intel_bufmgr_gem.c revision fd815b59
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007-2012 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#include <xf86drm.h> 38#include <xf86atomic.h> 39#include <fcntl.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43#include <unistd.h> 44#include <assert.h> 45#include <pthread.h> 46#include <sys/ioctl.h> 47#include <sys/stat.h> 48#include <sys/types.h> 49#include <stdbool.h> 50 51#include "errno.h" 52#ifndef ETIME 53#define ETIME ETIMEDOUT 54#endif 55#include "libdrm_macros.h" 56#include "libdrm_lists.h" 57#include "intel_bufmgr.h" 58#include "intel_bufmgr_priv.h" 59#include "intel_chipset.h" 60#include "string.h" 61 62#include "i915_drm.h" 63#include "uthash.h" 64 65#if HAVE_VALGRIND 66#include <valgrind.h> 67#include <memcheck.h> 68#define VG(x) x 69#else 70#define VG(x) 71#endif 72 73#define memclear(s) memset(&s, 0, sizeof(s)) 74 75#define DBG(...) do { \ 76 if (bufmgr_gem->bufmgr.debug) \ 77 fprintf(stderr, __VA_ARGS__); \ 78} while (0) 79 80#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 81#define MAX2(A, B) ((A) > (B) ? (A) : (B)) 82 83/** 84 * upper_32_bits - return bits 32-63 of a number 85 * @n: the number we're accessing 86 * 87 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress 88 * the "right shift count >= width of type" warning when that quantity is 89 * 32-bits. 90 */ 91#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) 92 93/** 94 * lower_32_bits - return bits 0-31 of a number 95 * @n: the number we're accessing 96 */ 97#define lower_32_bits(n) ((__u32)(n)) 98 99typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 100 101struct drm_intel_gem_bo_bucket { 102 drmMMListHead head; 103 unsigned long size; 104}; 105 106typedef struct _drm_intel_bufmgr_gem { 107 drm_intel_bufmgr bufmgr; 108 109 atomic_t refcount; 110 111 int fd; 112 113 int max_relocs; 114 115 pthread_mutex_t lock; 116 117 struct drm_i915_gem_exec_object *exec_objects; 118 struct drm_i915_gem_exec_object2 *exec2_objects; 119 drm_intel_bo **exec_bos; 120 int exec_size; 121 int exec_count; 122 123 /** Array of lists of cached gem objects of power-of-two sizes */ 124 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 125 int num_buckets; 126 time_t time; 127 128 drmMMListHead managers; 129 130 drm_intel_bo_gem *name_table; 131 drm_intel_bo_gem *handle_table; 132 133 drmMMListHead vma_cache; 134 int vma_count, vma_open, vma_max; 135 136 uint64_t gtt_size; 137 int available_fences; 138 int pci_device; 139 int gen; 140 unsigned int has_bsd : 1; 141 unsigned int has_blt : 1; 142 unsigned int has_relaxed_fencing : 1; 143 unsigned int has_llc : 1; 144 unsigned int has_wait_timeout : 1; 145 unsigned int bo_reuse : 1; 146 unsigned int no_exec : 1; 147 unsigned int has_vebox : 1; 148 unsigned int has_exec_async : 1; 149 bool fenced_relocs; 150 151 struct { 152 void *ptr; 153 uint32_t handle; 154 } userptr_active; 155 156} drm_intel_bufmgr_gem; 157 158#define DRM_INTEL_RELOC_FENCE (1<<0) 159 160typedef struct _drm_intel_reloc_target_info { 161 drm_intel_bo *bo; 162 int flags; 163} drm_intel_reloc_target; 164 165struct _drm_intel_bo_gem { 166 drm_intel_bo bo; 167 168 atomic_t refcount; 169 uint32_t gem_handle; 170 const char *name; 171 172 /** 173 * Kenel-assigned global name for this object 174 * 175 * List contains both flink named and prime fd'd objects 176 */ 177 unsigned int global_name; 178 179 UT_hash_handle handle_hh; 180 UT_hash_handle name_hh; 181 182 /** 183 * Index of the buffer within the validation list while preparing a 184 * batchbuffer execution. 185 */ 186 int validate_index; 187 188 /** 189 * Current tiling mode 190 */ 191 uint32_t tiling_mode; 192 uint32_t swizzle_mode; 193 unsigned long stride; 194 195 unsigned long kflags; 196 197 time_t free_time; 198 199 /** Array passed to the DRM containing relocation information. */ 200 struct drm_i915_gem_relocation_entry *relocs; 201 /** 202 * Array of info structs corresponding to relocs[i].target_handle etc 203 */ 204 drm_intel_reloc_target *reloc_target_info; 205 /** Number of entries in relocs */ 206 int reloc_count; 207 /** Array of BOs that are referenced by this buffer and will be softpinned */ 208 drm_intel_bo **softpin_target; 209 /** Number softpinned BOs that are referenced by this buffer */ 210 int softpin_target_count; 211 /** Maximum amount of softpinned BOs that are referenced by this buffer */ 212 int softpin_target_size; 213 214 /** Mapped address for the buffer, saved across map/unmap cycles */ 215 void *mem_virtual; 216 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 217 void *gtt_virtual; 218 /** WC CPU address for the buffer, saved across map/unmap cycles */ 219 void *wc_virtual; 220 /** 221 * Virtual address of the buffer allocated by user, used for userptr 222 * objects only. 223 */ 224 void *user_virtual; 225 int map_count; 226 drmMMListHead vma_list; 227 228 /** BO cache list */ 229 drmMMListHead head; 230 231 /** 232 * Boolean of whether this BO and its children have been included in 233 * the current drm_intel_bufmgr_check_aperture_space() total. 234 */ 235 bool included_in_check_aperture; 236 237 /** 238 * Boolean of whether this buffer has been used as a relocation 239 * target and had its size accounted for, and thus can't have any 240 * further relocations added to it. 241 */ 242 bool used_as_reloc_target; 243 244 /** 245 * Boolean of whether we have encountered an error whilst building the relocation tree. 246 */ 247 bool has_error; 248 249 /** 250 * Boolean of whether this buffer can be re-used 251 */ 252 bool reusable; 253 254 /** 255 * Boolean of whether the GPU is definitely not accessing the buffer. 256 * 257 * This is only valid when reusable, since non-reusable 258 * buffers are those that have been shared with other 259 * processes, so we don't know their state. 260 */ 261 bool idle; 262 263 /** 264 * Boolean of whether this buffer was allocated with userptr 265 */ 266 bool is_userptr; 267 268 /** 269 * Size in bytes of this buffer and its relocation descendents. 270 * 271 * Used to avoid costly tree walking in 272 * drm_intel_bufmgr_check_aperture in the common case. 273 */ 274 int reloc_tree_size; 275 276 /** 277 * Number of potential fence registers required by this buffer and its 278 * relocations. 279 */ 280 int reloc_tree_fences; 281 282 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */ 283 bool mapped_cpu_write; 284}; 285 286static unsigned int 287drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 288 289static unsigned int 290drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 291 292static int 293drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 294 uint32_t * swizzle_mode); 295 296static int 297drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 298 uint32_t tiling_mode, 299 uint32_t stride); 300 301static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 302 time_t time); 303 304static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 305 306static void drm_intel_gem_bo_free(drm_intel_bo *bo); 307 308static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) 309{ 310 return (drm_intel_bo_gem *)bo; 311} 312 313static unsigned long 314drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 315 uint32_t *tiling_mode) 316{ 317 unsigned long min_size, max_size; 318 unsigned long i; 319 320 if (*tiling_mode == I915_TILING_NONE) 321 return size; 322 323 /* 965+ just need multiples of page size for tiling */ 324 if (bufmgr_gem->gen >= 4) 325 return ROUND_UP_TO(size, 4096); 326 327 /* Older chips need powers of two, of at least 512k or 1M */ 328 if (bufmgr_gem->gen == 3) { 329 min_size = 1024*1024; 330 max_size = 128*1024*1024; 331 } else { 332 min_size = 512*1024; 333 max_size = 64*1024*1024; 334 } 335 336 if (size > max_size) { 337 *tiling_mode = I915_TILING_NONE; 338 return size; 339 } 340 341 /* Do we need to allocate every page for the fence? */ 342 if (bufmgr_gem->has_relaxed_fencing) 343 return ROUND_UP_TO(size, 4096); 344 345 for (i = min_size; i < size; i <<= 1) 346 ; 347 348 return i; 349} 350 351/* 352 * Round a given pitch up to the minimum required for X tiling on a 353 * given chip. We use 512 as the minimum to allow for a later tiling 354 * change. 355 */ 356static unsigned long 357drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 358 unsigned long pitch, uint32_t *tiling_mode) 359{ 360 unsigned long tile_width; 361 unsigned long i; 362 363 /* If untiled, then just align it so that we can do rendering 364 * to it with the 3D engine. 365 */ 366 if (*tiling_mode == I915_TILING_NONE) 367 return ALIGN(pitch, 64); 368 369 if (*tiling_mode == I915_TILING_X 370 || (IS_915(bufmgr_gem->pci_device) 371 && *tiling_mode == I915_TILING_Y)) 372 tile_width = 512; 373 else 374 tile_width = 128; 375 376 /* 965 is flexible */ 377 if (bufmgr_gem->gen >= 4) 378 return ROUND_UP_TO(pitch, tile_width); 379 380 /* The older hardware has a maximum pitch of 8192 with tiled 381 * surfaces, so fallback to untiled if it's too large. 382 */ 383 if (pitch > 8192) { 384 *tiling_mode = I915_TILING_NONE; 385 return ALIGN(pitch, 64); 386 } 387 388 /* Pre-965 needs power of two tile width */ 389 for (i = tile_width; i < pitch; i <<= 1) 390 ; 391 392 return i; 393} 394 395static struct drm_intel_gem_bo_bucket * 396drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 397 unsigned long size) 398{ 399 int i; 400 401 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 402 struct drm_intel_gem_bo_bucket *bucket = 403 &bufmgr_gem->cache_bucket[i]; 404 if (bucket->size >= size) { 405 return bucket; 406 } 407 } 408 409 return NULL; 410} 411 412static void 413drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 414{ 415 int i, j; 416 417 for (i = 0; i < bufmgr_gem->exec_count; i++) { 418 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 419 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 420 421 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { 422 DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, 423 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 424 bo_gem->name); 425 continue; 426 } 427 428 for (j = 0; j < bo_gem->reloc_count; j++) { 429 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 430 drm_intel_bo_gem *target_gem = 431 (drm_intel_bo_gem *) target_bo; 432 433 DBG("%2d: %d %s(%s)@0x%08x %08x -> " 434 "%d (%s)@0x%08x %08x + 0x%08x\n", 435 i, 436 bo_gem->gem_handle, 437 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 438 bo_gem->name, 439 upper_32_bits(bo_gem->relocs[j].offset), 440 lower_32_bits(bo_gem->relocs[j].offset), 441 target_gem->gem_handle, 442 target_gem->name, 443 upper_32_bits(target_bo->offset64), 444 lower_32_bits(target_bo->offset64), 445 bo_gem->relocs[j].delta); 446 } 447 448 for (j = 0; j < bo_gem->softpin_target_count; j++) { 449 drm_intel_bo *target_bo = bo_gem->softpin_target[j]; 450 drm_intel_bo_gem *target_gem = 451 (drm_intel_bo_gem *) target_bo; 452 DBG("%2d: %d %s(%s) -> " 453 "%d *(%s)@0x%08x %08x\n", 454 i, 455 bo_gem->gem_handle, 456 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 457 bo_gem->name, 458 target_gem->gem_handle, 459 target_gem->name, 460 upper_32_bits(target_bo->offset64), 461 lower_32_bits(target_bo->offset64)); 462 } 463 } 464} 465 466static inline void 467drm_intel_gem_bo_reference(drm_intel_bo *bo) 468{ 469 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 470 471 atomic_inc(&bo_gem->refcount); 472} 473 474/** 475 * Adds the given buffer to the list of buffers to be validated (moved into the 476 * appropriate memory type) with the next batch submission. 477 * 478 * If a buffer is validated multiple times in a batch submission, it ends up 479 * with the intersection of the memory type flags and the union of the 480 * access flags. 481 */ 482static void 483drm_intel_add_validate_buffer(drm_intel_bo *bo) 484{ 485 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 486 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 487 int index; 488 489 if (bo_gem->validate_index != -1) 490 return; 491 492 /* Extend the array of validation entries as necessary. */ 493 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 494 int new_size = bufmgr_gem->exec_size * 2; 495 496 if (new_size == 0) 497 new_size = 5; 498 499 bufmgr_gem->exec_objects = 500 realloc(bufmgr_gem->exec_objects, 501 sizeof(*bufmgr_gem->exec_objects) * new_size); 502 bufmgr_gem->exec_bos = 503 realloc(bufmgr_gem->exec_bos, 504 sizeof(*bufmgr_gem->exec_bos) * new_size); 505 bufmgr_gem->exec_size = new_size; 506 } 507 508 index = bufmgr_gem->exec_count; 509 bo_gem->validate_index = index; 510 /* Fill in array entry */ 511 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 512 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 513 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 514 bufmgr_gem->exec_objects[index].alignment = bo->align; 515 bufmgr_gem->exec_objects[index].offset = 0; 516 bufmgr_gem->exec_bos[index] = bo; 517 bufmgr_gem->exec_count++; 518} 519 520static void 521drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 522{ 523 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 524 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 525 int index; 526 unsigned long flags; 527 528 flags = 0; 529 if (need_fence) 530 flags |= EXEC_OBJECT_NEEDS_FENCE; 531 532 if (bo_gem->validate_index != -1) { 533 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; 534 return; 535 } 536 537 /* Extend the array of validation entries as necessary. */ 538 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 539 int new_size = bufmgr_gem->exec_size * 2; 540 541 if (new_size == 0) 542 new_size = 5; 543 544 bufmgr_gem->exec2_objects = 545 realloc(bufmgr_gem->exec2_objects, 546 sizeof(*bufmgr_gem->exec2_objects) * new_size); 547 bufmgr_gem->exec_bos = 548 realloc(bufmgr_gem->exec_bos, 549 sizeof(*bufmgr_gem->exec_bos) * new_size); 550 bufmgr_gem->exec_size = new_size; 551 } 552 553 index = bufmgr_gem->exec_count; 554 bo_gem->validate_index = index; 555 /* Fill in array entry */ 556 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 557 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 558 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 559 bufmgr_gem->exec2_objects[index].alignment = bo->align; 560 bufmgr_gem->exec2_objects[index].offset = bo->offset64; 561 bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags; 562 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 563 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 564 bufmgr_gem->exec_bos[index] = bo; 565 bufmgr_gem->exec_count++; 566} 567 568#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 569 sizeof(uint32_t)) 570 571static void 572drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 573 drm_intel_bo_gem *bo_gem, 574 unsigned int alignment) 575{ 576 unsigned int size; 577 578 assert(!bo_gem->used_as_reloc_target); 579 580 /* The older chipsets are far-less flexible in terms of tiling, 581 * and require tiled buffer to be size aligned in the aperture. 582 * This means that in the worst possible case we will need a hole 583 * twice as large as the object in order for it to fit into the 584 * aperture. Optimal packing is for wimps. 585 */ 586 size = bo_gem->bo.size; 587 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 588 unsigned int min_size; 589 590 if (bufmgr_gem->has_relaxed_fencing) { 591 if (bufmgr_gem->gen == 3) 592 min_size = 1024*1024; 593 else 594 min_size = 512*1024; 595 596 while (min_size < size) 597 min_size *= 2; 598 } else 599 min_size = size; 600 601 /* Account for worst-case alignment. */ 602 alignment = MAX2(alignment, min_size); 603 } 604 605 bo_gem->reloc_tree_size = size + alignment; 606} 607 608static int 609drm_intel_setup_reloc_list(drm_intel_bo *bo) 610{ 611 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 612 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 613 unsigned int max_relocs = bufmgr_gem->max_relocs; 614 615 if (bo->size / 4 < max_relocs) 616 max_relocs = bo->size / 4; 617 618 bo_gem->relocs = malloc(max_relocs * 619 sizeof(struct drm_i915_gem_relocation_entry)); 620 bo_gem->reloc_target_info = malloc(max_relocs * 621 sizeof(drm_intel_reloc_target)); 622 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 623 bo_gem->has_error = true; 624 625 free (bo_gem->relocs); 626 bo_gem->relocs = NULL; 627 628 free (bo_gem->reloc_target_info); 629 bo_gem->reloc_target_info = NULL; 630 631 return 1; 632 } 633 634 return 0; 635} 636 637static int 638drm_intel_gem_bo_busy(drm_intel_bo *bo) 639{ 640 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 641 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 642 struct drm_i915_gem_busy busy; 643 int ret; 644 645 if (bo_gem->reusable && bo_gem->idle) 646 return false; 647 648 memclear(busy); 649 busy.handle = bo_gem->gem_handle; 650 651 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 652 if (ret == 0) { 653 bo_gem->idle = !busy.busy; 654 return busy.busy; 655 } else { 656 return false; 657 } 658} 659 660static int 661drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 662 drm_intel_bo_gem *bo_gem, int state) 663{ 664 struct drm_i915_gem_madvise madv; 665 666 memclear(madv); 667 madv.handle = bo_gem->gem_handle; 668 madv.madv = state; 669 madv.retained = 1; 670 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 671 672 return madv.retained; 673} 674 675static int 676drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 677{ 678 return drm_intel_gem_bo_madvise_internal 679 ((drm_intel_bufmgr_gem *) bo->bufmgr, 680 (drm_intel_bo_gem *) bo, 681 madv); 682} 683 684/* drop the oldest entries that have been purged by the kernel */ 685static void 686drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 687 struct drm_intel_gem_bo_bucket *bucket) 688{ 689 while (!DRMLISTEMPTY(&bucket->head)) { 690 drm_intel_bo_gem *bo_gem; 691 692 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 693 bucket->head.next, head); 694 if (drm_intel_gem_bo_madvise_internal 695 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 696 break; 697 698 DRMLISTDEL(&bo_gem->head); 699 drm_intel_gem_bo_free(&bo_gem->bo); 700 } 701} 702 703static drm_intel_bo * 704drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 705 const char *name, 706 unsigned long size, 707 unsigned long flags, 708 uint32_t tiling_mode, 709 unsigned long stride, 710 unsigned int alignment) 711{ 712 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 713 drm_intel_bo_gem *bo_gem; 714 unsigned int page_size = getpagesize(); 715 int ret; 716 struct drm_intel_gem_bo_bucket *bucket; 717 bool alloc_from_cache; 718 unsigned long bo_size; 719 bool for_render = false; 720 721 if (flags & BO_ALLOC_FOR_RENDER) 722 for_render = true; 723 724 /* Round the allocated size up to a power of two number of pages. */ 725 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 726 727 /* If we don't have caching at this size, don't actually round the 728 * allocation up. 729 */ 730 if (bucket == NULL) { 731 bo_size = size; 732 if (bo_size < page_size) 733 bo_size = page_size; 734 } else { 735 bo_size = bucket->size; 736 } 737 738 pthread_mutex_lock(&bufmgr_gem->lock); 739 /* Get a buffer out of the cache if available */ 740retry: 741 alloc_from_cache = false; 742 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 743 if (for_render) { 744 /* Allocate new render-target BOs from the tail (MRU) 745 * of the list, as it will likely be hot in the GPU 746 * cache and in the aperture for us. 747 */ 748 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 749 bucket->head.prev, head); 750 DRMLISTDEL(&bo_gem->head); 751 alloc_from_cache = true; 752 bo_gem->bo.align = alignment; 753 } else { 754 assert(alignment == 0); 755 /* For non-render-target BOs (where we're probably 756 * going to map it first thing in order to fill it 757 * with data), check if the last BO in the cache is 758 * unbusy, and only reuse in that case. Otherwise, 759 * allocating a new buffer is probably faster than 760 * waiting for the GPU to finish. 761 */ 762 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 763 bucket->head.next, head); 764 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 765 alloc_from_cache = true; 766 DRMLISTDEL(&bo_gem->head); 767 } 768 } 769 770 if (alloc_from_cache) { 771 if (!drm_intel_gem_bo_madvise_internal 772 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 773 drm_intel_gem_bo_free(&bo_gem->bo); 774 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 775 bucket); 776 goto retry; 777 } 778 779 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 780 tiling_mode, 781 stride)) { 782 drm_intel_gem_bo_free(&bo_gem->bo); 783 goto retry; 784 } 785 } 786 } 787 788 if (!alloc_from_cache) { 789 struct drm_i915_gem_create create; 790 791 bo_gem = calloc(1, sizeof(*bo_gem)); 792 if (!bo_gem) 793 goto err; 794 795 /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized 796 list (vma_list), so better set the list head here */ 797 DRMINITLISTHEAD(&bo_gem->vma_list); 798 799 bo_gem->bo.size = bo_size; 800 801 memclear(create); 802 create.size = bo_size; 803 804 ret = drmIoctl(bufmgr_gem->fd, 805 DRM_IOCTL_I915_GEM_CREATE, 806 &create); 807 if (ret != 0) { 808 free(bo_gem); 809 goto err; 810 } 811 812 bo_gem->gem_handle = create.handle; 813 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 814 gem_handle, sizeof(bo_gem->gem_handle), 815 bo_gem); 816 817 bo_gem->bo.handle = bo_gem->gem_handle; 818 bo_gem->bo.bufmgr = bufmgr; 819 bo_gem->bo.align = alignment; 820 821 bo_gem->tiling_mode = I915_TILING_NONE; 822 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 823 bo_gem->stride = 0; 824 825 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 826 tiling_mode, 827 stride)) 828 goto err_free; 829 } 830 831 bo_gem->name = name; 832 atomic_set(&bo_gem->refcount, 1); 833 bo_gem->validate_index = -1; 834 bo_gem->reloc_tree_fences = 0; 835 bo_gem->used_as_reloc_target = false; 836 bo_gem->has_error = false; 837 bo_gem->reusable = true; 838 839 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); 840 pthread_mutex_unlock(&bufmgr_gem->lock); 841 842 DBG("bo_create: buf %d (%s) %ldb\n", 843 bo_gem->gem_handle, bo_gem->name, size); 844 845 return &bo_gem->bo; 846 847err_free: 848 drm_intel_gem_bo_free(&bo_gem->bo); 849err: 850 pthread_mutex_unlock(&bufmgr_gem->lock); 851 return NULL; 852} 853 854static drm_intel_bo * 855drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 856 const char *name, 857 unsigned long size, 858 unsigned int alignment) 859{ 860 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 861 BO_ALLOC_FOR_RENDER, 862 I915_TILING_NONE, 0, 863 alignment); 864} 865 866static drm_intel_bo * 867drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 868 const char *name, 869 unsigned long size, 870 unsigned int alignment) 871{ 872 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 873 I915_TILING_NONE, 0, 0); 874} 875 876static drm_intel_bo * 877drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 878 int x, int y, int cpp, uint32_t *tiling_mode, 879 unsigned long *pitch, unsigned long flags) 880{ 881 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 882 unsigned long size, stride; 883 uint32_t tiling; 884 885 do { 886 unsigned long aligned_y, height_alignment; 887 888 tiling = *tiling_mode; 889 890 /* If we're tiled, our allocations are in 8 or 32-row blocks, 891 * so failure to align our height means that we won't allocate 892 * enough pages. 893 * 894 * If we're untiled, we still have to align to 2 rows high 895 * because the data port accesses 2x2 blocks even if the 896 * bottom row isn't to be rendered, so failure to align means 897 * we could walk off the end of the GTT and fault. This is 898 * documented on 965, and may be the case on older chipsets 899 * too so we try to be careful. 900 */ 901 aligned_y = y; 902 height_alignment = 2; 903 904 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 905 height_alignment = 16; 906 else if (tiling == I915_TILING_X 907 || (IS_915(bufmgr_gem->pci_device) 908 && tiling == I915_TILING_Y)) 909 height_alignment = 8; 910 else if (tiling == I915_TILING_Y) 911 height_alignment = 32; 912 aligned_y = ALIGN(y, height_alignment); 913 914 stride = x * cpp; 915 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 916 size = stride * aligned_y; 917 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 918 } while (*tiling_mode != tiling); 919 *pitch = stride; 920 921 if (tiling == I915_TILING_NONE) 922 stride = 0; 923 924 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 925 tiling, stride, 0); 926} 927 928static drm_intel_bo * 929drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 930 const char *name, 931 void *addr, 932 uint32_t tiling_mode, 933 uint32_t stride, 934 unsigned long size, 935 unsigned long flags) 936{ 937 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 938 drm_intel_bo_gem *bo_gem; 939 int ret; 940 struct drm_i915_gem_userptr userptr; 941 942 /* Tiling with userptr surfaces is not supported 943 * on all hardware so refuse it for time being. 944 */ 945 if (tiling_mode != I915_TILING_NONE) 946 return NULL; 947 948 bo_gem = calloc(1, sizeof(*bo_gem)); 949 if (!bo_gem) 950 return NULL; 951 952 atomic_set(&bo_gem->refcount, 1); 953 DRMINITLISTHEAD(&bo_gem->vma_list); 954 955 bo_gem->bo.size = size; 956 957 memclear(userptr); 958 userptr.user_ptr = (__u64)((unsigned long)addr); 959 userptr.user_size = size; 960 userptr.flags = flags; 961 962 ret = drmIoctl(bufmgr_gem->fd, 963 DRM_IOCTL_I915_GEM_USERPTR, 964 &userptr); 965 if (ret != 0) { 966 DBG("bo_create_userptr: " 967 "ioctl failed with user ptr %p size 0x%lx, " 968 "user flags 0x%lx\n", addr, size, flags); 969 free(bo_gem); 970 return NULL; 971 } 972 973 pthread_mutex_lock(&bufmgr_gem->lock); 974 975 bo_gem->gem_handle = userptr.handle; 976 bo_gem->bo.handle = bo_gem->gem_handle; 977 bo_gem->bo.bufmgr = bufmgr; 978 bo_gem->is_userptr = true; 979 bo_gem->bo.virtual = addr; 980 /* Save the address provided by user */ 981 bo_gem->user_virtual = addr; 982 bo_gem->tiling_mode = I915_TILING_NONE; 983 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 984 bo_gem->stride = 0; 985 986 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 987 gem_handle, sizeof(bo_gem->gem_handle), 988 bo_gem); 989 990 bo_gem->name = name; 991 bo_gem->validate_index = -1; 992 bo_gem->reloc_tree_fences = 0; 993 bo_gem->used_as_reloc_target = false; 994 bo_gem->has_error = false; 995 bo_gem->reusable = false; 996 997 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 998 pthread_mutex_unlock(&bufmgr_gem->lock); 999 1000 DBG("bo_create_userptr: " 1001 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", 1002 addr, bo_gem->gem_handle, bo_gem->name, 1003 size, stride, tiling_mode); 1004 1005 return &bo_gem->bo; 1006} 1007 1008static bool 1009has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) 1010{ 1011 int ret; 1012 void *ptr; 1013 long pgsz; 1014 struct drm_i915_gem_userptr userptr; 1015 1016 pgsz = sysconf(_SC_PAGESIZE); 1017 assert(pgsz > 0); 1018 1019 ret = posix_memalign(&ptr, pgsz, pgsz); 1020 if (ret) { 1021 DBG("Failed to get a page (%ld) for userptr detection!\n", 1022 pgsz); 1023 return false; 1024 } 1025 1026 memclear(userptr); 1027 userptr.user_ptr = (__u64)(unsigned long)ptr; 1028 userptr.user_size = pgsz; 1029 1030retry: 1031 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); 1032 if (ret) { 1033 if (errno == ENODEV && userptr.flags == 0) { 1034 userptr.flags = I915_USERPTR_UNSYNCHRONIZED; 1035 goto retry; 1036 } 1037 free(ptr); 1038 return false; 1039 } 1040 1041 /* We don't release the userptr bo here as we want to keep the 1042 * kernel mm tracking alive for our lifetime. The first time we 1043 * create a userptr object the kernel has to install a mmu_notifer 1044 * which is a heavyweight operation (e.g. it requires taking all 1045 * mm_locks and stop_machine()). 1046 */ 1047 1048 bufmgr_gem->userptr_active.ptr = ptr; 1049 bufmgr_gem->userptr_active.handle = userptr.handle; 1050 1051 return true; 1052} 1053 1054static drm_intel_bo * 1055check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 1056 const char *name, 1057 void *addr, 1058 uint32_t tiling_mode, 1059 uint32_t stride, 1060 unsigned long size, 1061 unsigned long flags) 1062{ 1063 if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) 1064 bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; 1065 else 1066 bufmgr->bo_alloc_userptr = NULL; 1067 1068 return drm_intel_bo_alloc_userptr(bufmgr, name, addr, 1069 tiling_mode, stride, size, flags); 1070} 1071 1072/** 1073 * Returns a drm_intel_bo wrapping the given buffer object handle. 1074 * 1075 * This can be used when one application needs to pass a buffer object 1076 * to another. 1077 */ 1078drm_public drm_intel_bo * 1079drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 1080 const char *name, 1081 unsigned int handle) 1082{ 1083 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1084 drm_intel_bo_gem *bo_gem; 1085 int ret; 1086 struct drm_gem_open open_arg; 1087 struct drm_i915_gem_get_tiling get_tiling; 1088 1089 /* At the moment most applications only have a few named bo. 1090 * For instance, in a DRI client only the render buffers passed 1091 * between X and the client are named. And since X returns the 1092 * alternating names for the front/back buffer a linear search 1093 * provides a sufficiently fast match. 1094 */ 1095 pthread_mutex_lock(&bufmgr_gem->lock); 1096 HASH_FIND(name_hh, bufmgr_gem->name_table, 1097 &handle, sizeof(handle), bo_gem); 1098 if (bo_gem) { 1099 drm_intel_gem_bo_reference(&bo_gem->bo); 1100 goto out; 1101 } 1102 1103 memclear(open_arg); 1104 open_arg.name = handle; 1105 ret = drmIoctl(bufmgr_gem->fd, 1106 DRM_IOCTL_GEM_OPEN, 1107 &open_arg); 1108 if (ret != 0) { 1109 DBG("Couldn't reference %s handle 0x%08x: %s\n", 1110 name, handle, strerror(errno)); 1111 bo_gem = NULL; 1112 goto out; 1113 } 1114 /* Now see if someone has used a prime handle to get this 1115 * object from the kernel before by looking through the list 1116 * again for a matching gem_handle 1117 */ 1118 HASH_FIND(handle_hh, bufmgr_gem->handle_table, 1119 &open_arg.handle, sizeof(open_arg.handle), bo_gem); 1120 if (bo_gem) { 1121 drm_intel_gem_bo_reference(&bo_gem->bo); 1122 goto out; 1123 } 1124 1125 bo_gem = calloc(1, sizeof(*bo_gem)); 1126 if (!bo_gem) 1127 goto out; 1128 1129 atomic_set(&bo_gem->refcount, 1); 1130 DRMINITLISTHEAD(&bo_gem->vma_list); 1131 1132 bo_gem->bo.size = open_arg.size; 1133 bo_gem->bo.offset = 0; 1134 bo_gem->bo.offset64 = 0; 1135 bo_gem->bo.virtual = NULL; 1136 bo_gem->bo.bufmgr = bufmgr; 1137 bo_gem->name = name; 1138 bo_gem->validate_index = -1; 1139 bo_gem->gem_handle = open_arg.handle; 1140 bo_gem->bo.handle = open_arg.handle; 1141 bo_gem->global_name = handle; 1142 bo_gem->reusable = false; 1143 1144 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 1145 gem_handle, sizeof(bo_gem->gem_handle), bo_gem); 1146 HASH_ADD(name_hh, bufmgr_gem->name_table, 1147 global_name, sizeof(bo_gem->global_name), bo_gem); 1148 1149 memclear(get_tiling); 1150 get_tiling.handle = bo_gem->gem_handle; 1151 ret = drmIoctl(bufmgr_gem->fd, 1152 DRM_IOCTL_I915_GEM_GET_TILING, 1153 &get_tiling); 1154 if (ret != 0) 1155 goto err_unref; 1156 1157 bo_gem->tiling_mode = get_tiling.tiling_mode; 1158 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 1159 /* XXX stride is unknown */ 1160 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 1161 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 1162 1163out: 1164 pthread_mutex_unlock(&bufmgr_gem->lock); 1165 return &bo_gem->bo; 1166 1167err_unref: 1168 drm_intel_gem_bo_free(&bo_gem->bo); 1169 pthread_mutex_unlock(&bufmgr_gem->lock); 1170 return NULL; 1171} 1172 1173static void 1174drm_intel_gem_bo_free(drm_intel_bo *bo) 1175{ 1176 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1177 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1178 struct drm_gem_close close; 1179 int ret; 1180 1181 DRMLISTDEL(&bo_gem->vma_list); 1182 if (bo_gem->mem_virtual) { 1183 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 1184 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1185 bufmgr_gem->vma_count--; 1186 } 1187 if (bo_gem->wc_virtual) { 1188 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0)); 1189 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); 1190 bufmgr_gem->vma_count--; 1191 } 1192 if (bo_gem->gtt_virtual) { 1193 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1194 bufmgr_gem->vma_count--; 1195 } 1196 1197 if (bo_gem->global_name) 1198 HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem); 1199 HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem); 1200 1201 /* Close this object */ 1202 memclear(close); 1203 close.handle = bo_gem->gem_handle; 1204 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 1205 if (ret != 0) { 1206 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 1207 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 1208 } 1209 free(bo); 1210} 1211 1212static void 1213drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 1214{ 1215#if HAVE_VALGRIND 1216 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1217 1218 if (bo_gem->mem_virtual) 1219 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 1220 1221 if (bo_gem->wc_virtual) 1222 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size); 1223 1224 if (bo_gem->gtt_virtual) 1225 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 1226#endif 1227} 1228 1229/** Frees all cached buffers significantly older than @time. */ 1230static void 1231drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 1232{ 1233 int i; 1234 1235 if (bufmgr_gem->time == time) 1236 return; 1237 1238 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1239 struct drm_intel_gem_bo_bucket *bucket = 1240 &bufmgr_gem->cache_bucket[i]; 1241 1242 while (!DRMLISTEMPTY(&bucket->head)) { 1243 drm_intel_bo_gem *bo_gem; 1244 1245 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1246 bucket->head.next, head); 1247 if (time - bo_gem->free_time <= 1) 1248 break; 1249 1250 DRMLISTDEL(&bo_gem->head); 1251 1252 drm_intel_gem_bo_free(&bo_gem->bo); 1253 } 1254 } 1255 1256 bufmgr_gem->time = time; 1257} 1258 1259static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 1260{ 1261 int limit; 1262 1263 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 1264 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 1265 1266 if (bufmgr_gem->vma_max < 0) 1267 return; 1268 1269 /* We may need to evict a few entries in order to create new mmaps */ 1270 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 1271 if (limit < 0) 1272 limit = 0; 1273 1274 while (bufmgr_gem->vma_count > limit) { 1275 drm_intel_bo_gem *bo_gem; 1276 1277 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1278 bufmgr_gem->vma_cache.next, 1279 vma_list); 1280 assert(bo_gem->map_count == 0); 1281 DRMLISTDELINIT(&bo_gem->vma_list); 1282 1283 if (bo_gem->mem_virtual) { 1284 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1285 bo_gem->mem_virtual = NULL; 1286 bufmgr_gem->vma_count--; 1287 } 1288 if (bo_gem->wc_virtual) { 1289 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); 1290 bo_gem->wc_virtual = NULL; 1291 bufmgr_gem->vma_count--; 1292 } 1293 if (bo_gem->gtt_virtual) { 1294 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1295 bo_gem->gtt_virtual = NULL; 1296 bufmgr_gem->vma_count--; 1297 } 1298 } 1299} 1300 1301static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1302 drm_intel_bo_gem *bo_gem) 1303{ 1304 bufmgr_gem->vma_open--; 1305 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 1306 if (bo_gem->mem_virtual) 1307 bufmgr_gem->vma_count++; 1308 if (bo_gem->wc_virtual) 1309 bufmgr_gem->vma_count++; 1310 if (bo_gem->gtt_virtual) 1311 bufmgr_gem->vma_count++; 1312 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1313} 1314 1315static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1316 drm_intel_bo_gem *bo_gem) 1317{ 1318 bufmgr_gem->vma_open++; 1319 DRMLISTDEL(&bo_gem->vma_list); 1320 if (bo_gem->mem_virtual) 1321 bufmgr_gem->vma_count--; 1322 if (bo_gem->wc_virtual) 1323 bufmgr_gem->vma_count--; 1324 if (bo_gem->gtt_virtual) 1325 bufmgr_gem->vma_count--; 1326 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1327} 1328 1329static void 1330drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 1331{ 1332 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1333 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1334 struct drm_intel_gem_bo_bucket *bucket; 1335 int i; 1336 1337 /* Unreference all the target buffers */ 1338 for (i = 0; i < bo_gem->reloc_count; i++) { 1339 if (bo_gem->reloc_target_info[i].bo != bo) { 1340 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1341 reloc_target_info[i].bo, 1342 time); 1343 } 1344 } 1345 for (i = 0; i < bo_gem->softpin_target_count; i++) 1346 drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], 1347 time); 1348 bo_gem->kflags = 0; 1349 bo_gem->reloc_count = 0; 1350 bo_gem->used_as_reloc_target = false; 1351 bo_gem->softpin_target_count = 0; 1352 1353 DBG("bo_unreference final: %d (%s)\n", 1354 bo_gem->gem_handle, bo_gem->name); 1355 1356 /* release memory associated with this object */ 1357 if (bo_gem->reloc_target_info) { 1358 free(bo_gem->reloc_target_info); 1359 bo_gem->reloc_target_info = NULL; 1360 } 1361 if (bo_gem->relocs) { 1362 free(bo_gem->relocs); 1363 bo_gem->relocs = NULL; 1364 } 1365 if (bo_gem->softpin_target) { 1366 free(bo_gem->softpin_target); 1367 bo_gem->softpin_target = NULL; 1368 bo_gem->softpin_target_size = 0; 1369 } 1370 1371 /* Clear any left-over mappings */ 1372 if (bo_gem->map_count) { 1373 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 1374 bo_gem->map_count = 0; 1375 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1376 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1377 } 1378 1379 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 1380 /* Put the buffer into our internal cache for reuse if we can. */ 1381 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 1382 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 1383 I915_MADV_DONTNEED)) { 1384 bo_gem->free_time = time; 1385 1386 bo_gem->name = NULL; 1387 bo_gem->validate_index = -1; 1388 1389 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 1390 } else { 1391 drm_intel_gem_bo_free(bo); 1392 } 1393} 1394 1395static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 1396 time_t time) 1397{ 1398 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1399 1400 assert(atomic_read(&bo_gem->refcount) > 0); 1401 if (atomic_dec_and_test(&bo_gem->refcount)) 1402 drm_intel_gem_bo_unreference_final(bo, time); 1403} 1404 1405static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 1406{ 1407 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1408 1409 assert(atomic_read(&bo_gem->refcount) > 0); 1410 1411 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) { 1412 drm_intel_bufmgr_gem *bufmgr_gem = 1413 (drm_intel_bufmgr_gem *) bo->bufmgr; 1414 struct timespec time; 1415 1416 clock_gettime(CLOCK_MONOTONIC, &time); 1417 1418 pthread_mutex_lock(&bufmgr_gem->lock); 1419 1420 if (atomic_dec_and_test(&bo_gem->refcount)) { 1421 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1422 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1423 } 1424 1425 pthread_mutex_unlock(&bufmgr_gem->lock); 1426 } 1427} 1428 1429static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1430{ 1431 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1432 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1433 struct drm_i915_gem_set_domain set_domain; 1434 int ret; 1435 1436 if (bo_gem->is_userptr) { 1437 /* Return the same user ptr */ 1438 bo->virtual = bo_gem->user_virtual; 1439 return 0; 1440 } 1441 1442 pthread_mutex_lock(&bufmgr_gem->lock); 1443 1444 if (bo_gem->map_count++ == 0) 1445 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1446 1447 if (!bo_gem->mem_virtual) { 1448 struct drm_i915_gem_mmap mmap_arg; 1449 1450 DBG("bo_map: %d (%s), map_count=%d\n", 1451 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1452 1453 memclear(mmap_arg); 1454 mmap_arg.handle = bo_gem->gem_handle; 1455 mmap_arg.size = bo->size; 1456 ret = drmIoctl(bufmgr_gem->fd, 1457 DRM_IOCTL_I915_GEM_MMAP, 1458 &mmap_arg); 1459 if (ret != 0) { 1460 ret = -errno; 1461 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1462 __FILE__, __LINE__, bo_gem->gem_handle, 1463 bo_gem->name, strerror(errno)); 1464 if (--bo_gem->map_count == 0) 1465 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1466 pthread_mutex_unlock(&bufmgr_gem->lock); 1467 return ret; 1468 } 1469 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 1470 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1471 } 1472 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1473 bo_gem->mem_virtual); 1474 bo->virtual = bo_gem->mem_virtual; 1475 1476 memclear(set_domain); 1477 set_domain.handle = bo_gem->gem_handle; 1478 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1479 if (write_enable) 1480 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1481 else 1482 set_domain.write_domain = 0; 1483 ret = drmIoctl(bufmgr_gem->fd, 1484 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1485 &set_domain); 1486 if (ret != 0) { 1487 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1488 __FILE__, __LINE__, bo_gem->gem_handle, 1489 strerror(errno)); 1490 } 1491 1492 if (write_enable) 1493 bo_gem->mapped_cpu_write = true; 1494 1495 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1496 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 1497 pthread_mutex_unlock(&bufmgr_gem->lock); 1498 1499 return 0; 1500} 1501 1502static int 1503map_gtt(drm_intel_bo *bo) 1504{ 1505 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1506 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1507 int ret; 1508 1509 if (bo_gem->is_userptr) 1510 return -EINVAL; 1511 1512 if (bo_gem->map_count++ == 0) 1513 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1514 1515 /* Get a mapping of the buffer if we haven't before. */ 1516 if (bo_gem->gtt_virtual == NULL) { 1517 struct drm_i915_gem_mmap_gtt mmap_arg; 1518 1519 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 1520 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1521 1522 memclear(mmap_arg); 1523 mmap_arg.handle = bo_gem->gem_handle; 1524 1525 /* Get the fake offset back... */ 1526 ret = drmIoctl(bufmgr_gem->fd, 1527 DRM_IOCTL_I915_GEM_MMAP_GTT, 1528 &mmap_arg); 1529 if (ret != 0) { 1530 ret = -errno; 1531 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1532 __FILE__, __LINE__, 1533 bo_gem->gem_handle, bo_gem->name, 1534 strerror(errno)); 1535 if (--bo_gem->map_count == 0) 1536 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1537 return ret; 1538 } 1539 1540 /* and mmap it */ 1541 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, 1542 MAP_SHARED, bufmgr_gem->fd, 1543 mmap_arg.offset); 1544 if (bo_gem->gtt_virtual == MAP_FAILED) { 1545 bo_gem->gtt_virtual = NULL; 1546 ret = -errno; 1547 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1548 __FILE__, __LINE__, 1549 bo_gem->gem_handle, bo_gem->name, 1550 strerror(errno)); 1551 if (--bo_gem->map_count == 0) 1552 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1553 return ret; 1554 } 1555 } 1556 1557 bo->virtual = bo_gem->gtt_virtual; 1558 1559 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1560 bo_gem->gtt_virtual); 1561 1562 return 0; 1563} 1564 1565drm_public int 1566drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1567{ 1568 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1569 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1570 struct drm_i915_gem_set_domain set_domain; 1571 int ret; 1572 1573 pthread_mutex_lock(&bufmgr_gem->lock); 1574 1575 ret = map_gtt(bo); 1576 if (ret) { 1577 pthread_mutex_unlock(&bufmgr_gem->lock); 1578 return ret; 1579 } 1580 1581 /* Now move it to the GTT domain so that the GPU and CPU 1582 * caches are flushed and the GPU isn't actively using the 1583 * buffer. 1584 * 1585 * The pagefault handler does this domain change for us when 1586 * it has unbound the BO from the GTT, but it's up to us to 1587 * tell it when we're about to use things if we had done 1588 * rendering and it still happens to be bound to the GTT. 1589 */ 1590 memclear(set_domain); 1591 set_domain.handle = bo_gem->gem_handle; 1592 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1593 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1594 ret = drmIoctl(bufmgr_gem->fd, 1595 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1596 &set_domain); 1597 if (ret != 0) { 1598 DBG("%s:%d: Error setting domain %d: %s\n", 1599 __FILE__, __LINE__, bo_gem->gem_handle, 1600 strerror(errno)); 1601 } 1602 1603 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1604 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1605 pthread_mutex_unlock(&bufmgr_gem->lock); 1606 1607 return 0; 1608} 1609 1610/** 1611 * Performs a mapping of the buffer object like the normal GTT 1612 * mapping, but avoids waiting for the GPU to be done reading from or 1613 * rendering to the buffer. 1614 * 1615 * This is used in the implementation of GL_ARB_map_buffer_range: The 1616 * user asks to create a buffer, then does a mapping, fills some 1617 * space, runs a drawing command, then asks to map it again without 1618 * synchronizing because it guarantees that it won't write over the 1619 * data that the GPU is busy using (or, more specifically, that if it 1620 * does write over the data, it acknowledges that rendering is 1621 * undefined). 1622 */ 1623 1624drm_public int 1625drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 1626{ 1627 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1628#if HAVE_VALGRIND 1629 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1630#endif 1631 int ret; 1632 1633 /* If the CPU cache isn't coherent with the GTT, then use a 1634 * regular synchronized mapping. The problem is that we don't 1635 * track where the buffer was last used on the CPU side in 1636 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 1637 * we would potentially corrupt the buffer even when the user 1638 * does reasonable things. 1639 */ 1640 if (!bufmgr_gem->has_llc) 1641 return drm_intel_gem_bo_map_gtt(bo); 1642 1643 pthread_mutex_lock(&bufmgr_gem->lock); 1644 1645 ret = map_gtt(bo); 1646 if (ret == 0) { 1647 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1648 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1649 } 1650 1651 pthread_mutex_unlock(&bufmgr_gem->lock); 1652 1653 return ret; 1654} 1655 1656static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1657{ 1658 drm_intel_bufmgr_gem *bufmgr_gem; 1659 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1660 int ret = 0; 1661 1662 if (bo == NULL) 1663 return 0; 1664 1665 if (bo_gem->is_userptr) 1666 return 0; 1667 1668 bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1669 1670 pthread_mutex_lock(&bufmgr_gem->lock); 1671 1672 if (bo_gem->map_count <= 0) { 1673 DBG("attempted to unmap an unmapped bo\n"); 1674 pthread_mutex_unlock(&bufmgr_gem->lock); 1675 /* Preserve the old behaviour of just treating this as a 1676 * no-op rather than reporting the error. 1677 */ 1678 return 0; 1679 } 1680 1681 if (bo_gem->mapped_cpu_write) { 1682 struct drm_i915_gem_sw_finish sw_finish; 1683 1684 /* Cause a flush to happen if the buffer's pinned for 1685 * scanout, so the results show up in a timely manner. 1686 * Unlike GTT set domains, this only does work if the 1687 * buffer should be scanout-related. 1688 */ 1689 memclear(sw_finish); 1690 sw_finish.handle = bo_gem->gem_handle; 1691 ret = drmIoctl(bufmgr_gem->fd, 1692 DRM_IOCTL_I915_GEM_SW_FINISH, 1693 &sw_finish); 1694 ret = ret == -1 ? -errno : 0; 1695 1696 bo_gem->mapped_cpu_write = false; 1697 } 1698 1699 /* We need to unmap after every innovation as we cannot track 1700 * an open vma for every bo as that will exhaust the system 1701 * limits and cause later failures. 1702 */ 1703 if (--bo_gem->map_count == 0) { 1704 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1705 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1706 bo->virtual = NULL; 1707 } 1708 pthread_mutex_unlock(&bufmgr_gem->lock); 1709 1710 return ret; 1711} 1712 1713drm_public int 1714drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1715{ 1716 return drm_intel_gem_bo_unmap(bo); 1717} 1718 1719static int 1720drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1721 unsigned long size, const void *data) 1722{ 1723 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1724 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1725 struct drm_i915_gem_pwrite pwrite; 1726 int ret; 1727 1728 if (bo_gem->is_userptr) 1729 return -EINVAL; 1730 1731 memclear(pwrite); 1732 pwrite.handle = bo_gem->gem_handle; 1733 pwrite.offset = offset; 1734 pwrite.size = size; 1735 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1736 ret = drmIoctl(bufmgr_gem->fd, 1737 DRM_IOCTL_I915_GEM_PWRITE, 1738 &pwrite); 1739 if (ret != 0) { 1740 ret = -errno; 1741 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1742 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1743 (int)size, strerror(errno)); 1744 } 1745 1746 return ret; 1747} 1748 1749static int 1750drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1751{ 1752 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1753 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1754 int ret; 1755 1756 memclear(get_pipe_from_crtc_id); 1757 get_pipe_from_crtc_id.crtc_id = crtc_id; 1758 ret = drmIoctl(bufmgr_gem->fd, 1759 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1760 &get_pipe_from_crtc_id); 1761 if (ret != 0) { 1762 /* We return -1 here to signal that we don't 1763 * know which pipe is associated with this crtc. 1764 * This lets the caller know that this information 1765 * isn't available; using the wrong pipe for 1766 * vblank waiting can cause the chipset to lock up 1767 */ 1768 return -1; 1769 } 1770 1771 return get_pipe_from_crtc_id.pipe; 1772} 1773 1774static int 1775drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1776 unsigned long size, void *data) 1777{ 1778 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1779 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1780 struct drm_i915_gem_pread pread; 1781 int ret; 1782 1783 if (bo_gem->is_userptr) 1784 return -EINVAL; 1785 1786 memclear(pread); 1787 pread.handle = bo_gem->gem_handle; 1788 pread.offset = offset; 1789 pread.size = size; 1790 pread.data_ptr = (uint64_t) (uintptr_t) data; 1791 ret = drmIoctl(bufmgr_gem->fd, 1792 DRM_IOCTL_I915_GEM_PREAD, 1793 &pread); 1794 if (ret != 0) { 1795 ret = -errno; 1796 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1797 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1798 (int)size, strerror(errno)); 1799 } 1800 1801 return ret; 1802} 1803 1804/** Waits for all GPU rendering with the object to have completed. */ 1805static void 1806drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1807{ 1808 drm_intel_gem_bo_start_gtt_access(bo, 1); 1809} 1810 1811/** 1812 * Waits on a BO for the given amount of time. 1813 * 1814 * @bo: buffer object to wait for 1815 * @timeout_ns: amount of time to wait in nanoseconds. 1816 * If value is less than 0, an infinite wait will occur. 1817 * 1818 * Returns 0 if the wait was successful ie. the last batch referencing the 1819 * object has completed within the allotted time. Otherwise some negative return 1820 * value describes the error. Of particular interest is -ETIME when the wait has 1821 * failed to yield the desired result. 1822 * 1823 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 1824 * the operation to give up after a certain amount of time. Another subtle 1825 * difference is the internal locking semantics are different (this variant does 1826 * not hold the lock for the duration of the wait). This makes the wait subject 1827 * to a larger userspace race window. 1828 * 1829 * The implementation shall wait until the object is no longer actively 1830 * referenced within a batch buffer at the time of the call. The wait will 1831 * not guarantee that the buffer is re-issued via another thread, or an flinked 1832 * handle. Userspace must make sure this race does not occur if such precision 1833 * is important. 1834 * 1835 * Note that some kernels have broken the inifite wait for negative values 1836 * promise, upgrade to latest stable kernels if this is the case. 1837 */ 1838drm_public int 1839drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 1840{ 1841 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1842 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1843 struct drm_i915_gem_wait wait; 1844 int ret; 1845 1846 if (!bufmgr_gem->has_wait_timeout) { 1847 DBG("%s:%d: Timed wait is not supported. Falling back to " 1848 "infinite wait\n", __FILE__, __LINE__); 1849 if (timeout_ns) { 1850 drm_intel_gem_bo_wait_rendering(bo); 1851 return 0; 1852 } else { 1853 return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 1854 } 1855 } 1856 1857 memclear(wait); 1858 wait.bo_handle = bo_gem->gem_handle; 1859 wait.timeout_ns = timeout_ns; 1860 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1861 if (ret == -1) 1862 return -errno; 1863 1864 return ret; 1865} 1866 1867/** 1868 * Sets the object to the GTT read and possibly write domain, used by the X 1869 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1870 * 1871 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1872 * can do tiled pixmaps this way. 1873 */ 1874drm_public void 1875drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1876{ 1877 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1878 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1879 struct drm_i915_gem_set_domain set_domain; 1880 int ret; 1881 1882 memclear(set_domain); 1883 set_domain.handle = bo_gem->gem_handle; 1884 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1885 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1886 ret = drmIoctl(bufmgr_gem->fd, 1887 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1888 &set_domain); 1889 if (ret != 0) { 1890 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1891 __FILE__, __LINE__, bo_gem->gem_handle, 1892 set_domain.read_domains, set_domain.write_domain, 1893 strerror(errno)); 1894 } 1895} 1896 1897static void 1898drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1899{ 1900 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1901 struct drm_gem_close close_bo; 1902 int i, ret; 1903 1904 free(bufmgr_gem->exec2_objects); 1905 free(bufmgr_gem->exec_objects); 1906 free(bufmgr_gem->exec_bos); 1907 1908 pthread_mutex_destroy(&bufmgr_gem->lock); 1909 1910 /* Free any cached buffer objects we were going to reuse */ 1911 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1912 struct drm_intel_gem_bo_bucket *bucket = 1913 &bufmgr_gem->cache_bucket[i]; 1914 drm_intel_bo_gem *bo_gem; 1915 1916 while (!DRMLISTEMPTY(&bucket->head)) { 1917 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1918 bucket->head.next, head); 1919 DRMLISTDEL(&bo_gem->head); 1920 1921 drm_intel_gem_bo_free(&bo_gem->bo); 1922 } 1923 } 1924 1925 /* Release userptr bo kept hanging around for optimisation. */ 1926 if (bufmgr_gem->userptr_active.ptr) { 1927 memclear(close_bo); 1928 close_bo.handle = bufmgr_gem->userptr_active.handle; 1929 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); 1930 free(bufmgr_gem->userptr_active.ptr); 1931 if (ret) 1932 fprintf(stderr, 1933 "Failed to release test userptr object! (%d) " 1934 "i915 kernel driver may not be sane!\n", errno); 1935 } 1936 1937 free(bufmgr); 1938} 1939 1940/** 1941 * Adds the target buffer to the validation list and adds the relocation 1942 * to the reloc_buffer's relocation list. 1943 * 1944 * The relocation entry at the given offset must already contain the 1945 * precomputed relocation value, because the kernel will optimize out 1946 * the relocation entry write when the buffer hasn't moved from the 1947 * last known offset in target_bo. 1948 */ 1949static int 1950do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1951 drm_intel_bo *target_bo, uint32_t target_offset, 1952 uint32_t read_domains, uint32_t write_domain, 1953 bool need_fence) 1954{ 1955 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1956 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1957 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1958 bool fenced_command; 1959 1960 if (bo_gem->has_error) 1961 return -ENOMEM; 1962 1963 if (target_bo_gem->has_error) { 1964 bo_gem->has_error = true; 1965 return -ENOMEM; 1966 } 1967 1968 /* We never use HW fences for rendering on 965+ */ 1969 if (bufmgr_gem->gen >= 4) 1970 need_fence = false; 1971 1972 fenced_command = need_fence; 1973 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1974 need_fence = false; 1975 1976 /* Create a new relocation list if needed */ 1977 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1978 return -ENOMEM; 1979 1980 /* Check overflow */ 1981 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1982 1983 /* Check args */ 1984 assert(offset <= bo->size - 4); 1985 assert((write_domain & (write_domain - 1)) == 0); 1986 1987 /* An object needing a fence is a tiled buffer, so it won't have 1988 * relocs to other buffers. 1989 */ 1990 if (need_fence) { 1991 assert(target_bo_gem->reloc_count == 0); 1992 target_bo_gem->reloc_tree_fences = 1; 1993 } 1994 1995 /* Make sure that we're not adding a reloc to something whose size has 1996 * already been accounted for. 1997 */ 1998 assert(!bo_gem->used_as_reloc_target); 1999 if (target_bo_gem != bo_gem) { 2000 target_bo_gem->used_as_reloc_target = true; 2001 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 2002 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 2003 } 2004 2005 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 2006 if (target_bo != bo) 2007 drm_intel_gem_bo_reference(target_bo); 2008 if (fenced_command) 2009 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 2010 DRM_INTEL_RELOC_FENCE; 2011 else 2012 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 2013 2014 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 2015 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 2016 bo_gem->relocs[bo_gem->reloc_count].target_handle = 2017 target_bo_gem->gem_handle; 2018 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 2019 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 2020 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 2021 bo_gem->reloc_count++; 2022 2023 return 0; 2024} 2025 2026static void 2027drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) 2028{ 2029 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2030 2031 if (enable) 2032 bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 2033 else 2034 bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 2035} 2036 2037static int 2038drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo) 2039{ 2040 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2041 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2042 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2043 if (bo_gem->has_error) 2044 return -ENOMEM; 2045 2046 if (target_bo_gem->has_error) { 2047 bo_gem->has_error = true; 2048 return -ENOMEM; 2049 } 2050 2051 if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED)) 2052 return -EINVAL; 2053 if (target_bo_gem == bo_gem) 2054 return -EINVAL; 2055 2056 if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) { 2057 int new_size = bo_gem->softpin_target_size * 2; 2058 if (new_size == 0) 2059 new_size = bufmgr_gem->max_relocs; 2060 2061 bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size * 2062 sizeof(drm_intel_bo *)); 2063 if (!bo_gem->softpin_target) 2064 return -ENOMEM; 2065 2066 bo_gem->softpin_target_size = new_size; 2067 } 2068 bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo; 2069 drm_intel_gem_bo_reference(target_bo); 2070 bo_gem->softpin_target_count++; 2071 2072 return 0; 2073} 2074 2075static int 2076drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 2077 drm_intel_bo *target_bo, uint32_t target_offset, 2078 uint32_t read_domains, uint32_t write_domain) 2079{ 2080 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2081 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; 2082 2083 if (target_bo_gem->kflags & EXEC_OBJECT_PINNED) 2084 return drm_intel_gem_bo_add_softpin_target(bo, target_bo); 2085 else 2086 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 2087 read_domains, write_domain, 2088 !bufmgr_gem->fenced_relocs); 2089} 2090 2091static int 2092drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 2093 drm_intel_bo *target_bo, 2094 uint32_t target_offset, 2095 uint32_t read_domains, uint32_t write_domain) 2096{ 2097 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 2098 read_domains, write_domain, true); 2099} 2100 2101drm_public int 2102drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 2103{ 2104 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2105 2106 return bo_gem->reloc_count; 2107} 2108 2109/** 2110 * Removes existing relocation entries in the BO after "start". 2111 * 2112 * This allows a user to avoid a two-step process for state setup with 2113 * counting up all the buffer objects and doing a 2114 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 2115 * relocations for the state setup. Instead, save the state of the 2116 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 2117 * state, and then check if it still fits in the aperture. 2118 * 2119 * Any further drm_intel_bufmgr_check_aperture_space() queries 2120 * involving this buffer in the tree are undefined after this call. 2121 * 2122 * This also removes all softpinned targets being referenced by the BO. 2123 */ 2124drm_public void 2125drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 2126{ 2127 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2128 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2129 int i; 2130 struct timespec time; 2131 2132 clock_gettime(CLOCK_MONOTONIC, &time); 2133 2134 assert(bo_gem->reloc_count >= start); 2135 2136 /* Unreference the cleared target buffers */ 2137 pthread_mutex_lock(&bufmgr_gem->lock); 2138 2139 for (i = start; i < bo_gem->reloc_count; i++) { 2140 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 2141 if (&target_bo_gem->bo != bo) { 2142 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 2143 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 2144 time.tv_sec); 2145 } 2146 } 2147 bo_gem->reloc_count = start; 2148 2149 for (i = 0; i < bo_gem->softpin_target_count; i++) { 2150 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i]; 2151 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); 2152 } 2153 bo_gem->softpin_target_count = 0; 2154 2155 pthread_mutex_unlock(&bufmgr_gem->lock); 2156 2157} 2158 2159/** 2160 * Walk the tree of relocations rooted at BO and accumulate the list of 2161 * validations to be performed and update the relocation buffers with 2162 * index values into the validation list. 2163 */ 2164static void 2165drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 2166{ 2167 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2168 int i; 2169 2170 if (bo_gem->relocs == NULL) 2171 return; 2172 2173 for (i = 0; i < bo_gem->reloc_count; i++) { 2174 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2175 2176 if (target_bo == bo) 2177 continue; 2178 2179 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2180 2181 /* Continue walking the tree depth-first. */ 2182 drm_intel_gem_bo_process_reloc(target_bo); 2183 2184 /* Add the target to the validate list */ 2185 drm_intel_add_validate_buffer(target_bo); 2186 } 2187} 2188 2189static void 2190drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 2191{ 2192 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2193 int i; 2194 2195 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) 2196 return; 2197 2198 for (i = 0; i < bo_gem->reloc_count; i++) { 2199 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2200 int need_fence; 2201 2202 if (target_bo == bo) 2203 continue; 2204 2205 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2206 2207 /* Continue walking the tree depth-first. */ 2208 drm_intel_gem_bo_process_reloc2(target_bo); 2209 2210 need_fence = (bo_gem->reloc_target_info[i].flags & 2211 DRM_INTEL_RELOC_FENCE); 2212 2213 /* Add the target to the validate list */ 2214 drm_intel_add_validate_buffer2(target_bo, need_fence); 2215 } 2216 2217 for (i = 0; i < bo_gem->softpin_target_count; i++) { 2218 drm_intel_bo *target_bo = bo_gem->softpin_target[i]; 2219 2220 if (target_bo == bo) 2221 continue; 2222 2223 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2224 drm_intel_gem_bo_process_reloc2(target_bo); 2225 drm_intel_add_validate_buffer2(target_bo, false); 2226 } 2227} 2228 2229 2230static void 2231drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 2232{ 2233 int i; 2234 2235 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2236 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2237 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2238 2239 /* Update the buffer offset */ 2240 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 2241 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", 2242 bo_gem->gem_handle, bo_gem->name, 2243 upper_32_bits(bo->offset64), 2244 lower_32_bits(bo->offset64), 2245 upper_32_bits(bufmgr_gem->exec_objects[i].offset), 2246 lower_32_bits(bufmgr_gem->exec_objects[i].offset)); 2247 bo->offset64 = bufmgr_gem->exec_objects[i].offset; 2248 bo->offset = bufmgr_gem->exec_objects[i].offset; 2249 } 2250 } 2251} 2252 2253static void 2254drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 2255{ 2256 int i; 2257 2258 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2259 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2260 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2261 2262 /* Update the buffer offset */ 2263 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 2264 /* If we're seeing softpinned object here it means that the kernel 2265 * has relocated our object... Indicating a programming error 2266 */ 2267 assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED)); 2268 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", 2269 bo_gem->gem_handle, bo_gem->name, 2270 upper_32_bits(bo->offset64), 2271 lower_32_bits(bo->offset64), 2272 upper_32_bits(bufmgr_gem->exec2_objects[i].offset), 2273 lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); 2274 bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 2275 bo->offset = bufmgr_gem->exec2_objects[i].offset; 2276 } 2277 } 2278} 2279 2280drm_public void 2281drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 2282 int x1, int y1, int width, int height, 2283 enum aub_dump_bmp_format format, 2284 int pitch, int offset) 2285{ 2286} 2287 2288static int 2289drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 2290 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 2291{ 2292 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2293 struct drm_i915_gem_execbuffer execbuf; 2294 int ret, i; 2295 2296 if (to_bo_gem(bo)->has_error) 2297 return -ENOMEM; 2298 2299 pthread_mutex_lock(&bufmgr_gem->lock); 2300 /* Update indices and set up the validate list. */ 2301 drm_intel_gem_bo_process_reloc(bo); 2302 2303 /* Add the batch buffer to the validation list. There are no 2304 * relocations pointing to it. 2305 */ 2306 drm_intel_add_validate_buffer(bo); 2307 2308 memclear(execbuf); 2309 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 2310 execbuf.buffer_count = bufmgr_gem->exec_count; 2311 execbuf.batch_start_offset = 0; 2312 execbuf.batch_len = used; 2313 execbuf.cliprects_ptr = (uintptr_t) cliprects; 2314 execbuf.num_cliprects = num_cliprects; 2315 execbuf.DR1 = 0; 2316 execbuf.DR4 = DR4; 2317 2318 ret = drmIoctl(bufmgr_gem->fd, 2319 DRM_IOCTL_I915_GEM_EXECBUFFER, 2320 &execbuf); 2321 if (ret != 0) { 2322 ret = -errno; 2323 if (errno == ENOSPC) { 2324 DBG("Execbuffer fails to pin. " 2325 "Estimate: %u. Actual: %u. Available: %u\n", 2326 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2327 bufmgr_gem-> 2328 exec_count), 2329 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2330 bufmgr_gem-> 2331 exec_count), 2332 (unsigned int)bufmgr_gem->gtt_size); 2333 } 2334 } 2335 drm_intel_update_buffer_offsets(bufmgr_gem); 2336 2337 if (bufmgr_gem->bufmgr.debug) 2338 drm_intel_gem_dump_validation_list(bufmgr_gem); 2339 2340 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2341 drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); 2342 2343 bo_gem->idle = false; 2344 2345 /* Disconnect the buffer from the validate list */ 2346 bo_gem->validate_index = -1; 2347 bufmgr_gem->exec_bos[i] = NULL; 2348 } 2349 bufmgr_gem->exec_count = 0; 2350 pthread_mutex_unlock(&bufmgr_gem->lock); 2351 2352 return ret; 2353} 2354 2355static int 2356do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 2357 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2358 int in_fence, int *out_fence, 2359 unsigned int flags) 2360{ 2361 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2362 struct drm_i915_gem_execbuffer2 execbuf; 2363 int ret = 0; 2364 int i; 2365 2366 if (to_bo_gem(bo)->has_error) 2367 return -ENOMEM; 2368 2369 switch (flags & 0x7) { 2370 default: 2371 return -EINVAL; 2372 case I915_EXEC_BLT: 2373 if (!bufmgr_gem->has_blt) 2374 return -EINVAL; 2375 break; 2376 case I915_EXEC_BSD: 2377 if (!bufmgr_gem->has_bsd) 2378 return -EINVAL; 2379 break; 2380 case I915_EXEC_VEBOX: 2381 if (!bufmgr_gem->has_vebox) 2382 return -EINVAL; 2383 break; 2384 case I915_EXEC_RENDER: 2385 case I915_EXEC_DEFAULT: 2386 break; 2387 } 2388 2389 pthread_mutex_lock(&bufmgr_gem->lock); 2390 /* Update indices and set up the validate list. */ 2391 drm_intel_gem_bo_process_reloc2(bo); 2392 2393 /* Add the batch buffer to the validation list. There are no relocations 2394 * pointing to it. 2395 */ 2396 drm_intel_add_validate_buffer2(bo, 0); 2397 2398 memclear(execbuf); 2399 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 2400 execbuf.buffer_count = bufmgr_gem->exec_count; 2401 execbuf.batch_start_offset = 0; 2402 execbuf.batch_len = used; 2403 execbuf.cliprects_ptr = (uintptr_t)cliprects; 2404 execbuf.num_cliprects = num_cliprects; 2405 execbuf.DR1 = 0; 2406 execbuf.DR4 = DR4; 2407 execbuf.flags = flags; 2408 if (ctx == NULL) 2409 i915_execbuffer2_set_context_id(execbuf, 0); 2410 else 2411 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 2412 execbuf.rsvd2 = 0; 2413 if (in_fence != -1) { 2414 execbuf.rsvd2 = in_fence; 2415 execbuf.flags |= I915_EXEC_FENCE_IN; 2416 } 2417 if (out_fence != NULL) { 2418 *out_fence = -1; 2419 execbuf.flags |= I915_EXEC_FENCE_OUT; 2420 } 2421 2422 if (bufmgr_gem->no_exec) 2423 goto skip_execution; 2424 2425 ret = drmIoctl(bufmgr_gem->fd, 2426 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, 2427 &execbuf); 2428 if (ret != 0) { 2429 ret = -errno; 2430 if (ret == -ENOSPC) { 2431 DBG("Execbuffer fails to pin. " 2432 "Estimate: %u. Actual: %u. Available: %u\n", 2433 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2434 bufmgr_gem->exec_count), 2435 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2436 bufmgr_gem->exec_count), 2437 (unsigned int) bufmgr_gem->gtt_size); 2438 } 2439 } 2440 drm_intel_update_buffer_offsets2(bufmgr_gem); 2441 2442 if (ret == 0 && out_fence != NULL) 2443 *out_fence = execbuf.rsvd2 >> 32; 2444 2445skip_execution: 2446 if (bufmgr_gem->bufmgr.debug) 2447 drm_intel_gem_dump_validation_list(bufmgr_gem); 2448 2449 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2450 drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); 2451 2452 bo_gem->idle = false; 2453 2454 /* Disconnect the buffer from the validate list */ 2455 bo_gem->validate_index = -1; 2456 bufmgr_gem->exec_bos[i] = NULL; 2457 } 2458 bufmgr_gem->exec_count = 0; 2459 pthread_mutex_unlock(&bufmgr_gem->lock); 2460 2461 return ret; 2462} 2463 2464static int 2465drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2466 drm_clip_rect_t *cliprects, int num_cliprects, 2467 int DR4) 2468{ 2469 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2470 -1, NULL, I915_EXEC_RENDER); 2471} 2472 2473static int 2474drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 2475 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2476 unsigned int flags) 2477{ 2478 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2479 -1, NULL, flags); 2480} 2481 2482drm_public int 2483drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 2484 int used, unsigned int flags) 2485{ 2486 return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags); 2487} 2488 2489drm_public int 2490drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, 2491 drm_intel_context *ctx, 2492 int used, 2493 int in_fence, 2494 int *out_fence, 2495 unsigned int flags) 2496{ 2497 return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags); 2498} 2499 2500static int 2501drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 2502{ 2503 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2504 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2505 struct drm_i915_gem_pin pin; 2506 int ret; 2507 2508 memclear(pin); 2509 pin.handle = bo_gem->gem_handle; 2510 pin.alignment = alignment; 2511 2512 ret = drmIoctl(bufmgr_gem->fd, 2513 DRM_IOCTL_I915_GEM_PIN, 2514 &pin); 2515 if (ret != 0) 2516 return -errno; 2517 2518 bo->offset64 = pin.offset; 2519 bo->offset = pin.offset; 2520 return 0; 2521} 2522 2523static int 2524drm_intel_gem_bo_unpin(drm_intel_bo *bo) 2525{ 2526 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2527 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2528 struct drm_i915_gem_unpin unpin; 2529 int ret; 2530 2531 memclear(unpin); 2532 unpin.handle = bo_gem->gem_handle; 2533 2534 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 2535 if (ret != 0) 2536 return -errno; 2537 2538 return 0; 2539} 2540 2541static int 2542drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2543 uint32_t tiling_mode, 2544 uint32_t stride) 2545{ 2546 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2547 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2548 struct drm_i915_gem_set_tiling set_tiling; 2549 int ret; 2550 2551 if (bo_gem->global_name == 0 && 2552 tiling_mode == bo_gem->tiling_mode && 2553 stride == bo_gem->stride) 2554 return 0; 2555 2556 memset(&set_tiling, 0, sizeof(set_tiling)); 2557 do { 2558 /* set_tiling is slightly broken and overwrites the 2559 * input on the error path, so we have to open code 2560 * rmIoctl. 2561 */ 2562 set_tiling.handle = bo_gem->gem_handle; 2563 set_tiling.tiling_mode = tiling_mode; 2564 set_tiling.stride = stride; 2565 2566 ret = ioctl(bufmgr_gem->fd, 2567 DRM_IOCTL_I915_GEM_SET_TILING, 2568 &set_tiling); 2569 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2570 if (ret == -1) 2571 return -errno; 2572 2573 bo_gem->tiling_mode = set_tiling.tiling_mode; 2574 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 2575 bo_gem->stride = set_tiling.stride; 2576 return 0; 2577} 2578 2579static int 2580drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2581 uint32_t stride) 2582{ 2583 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2584 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2585 int ret; 2586 2587 /* Tiling with userptr surfaces is not supported 2588 * on all hardware so refuse it for time being. 2589 */ 2590 if (bo_gem->is_userptr) 2591 return -EINVAL; 2592 2593 /* Linear buffers have no stride. By ensuring that we only ever use 2594 * stride 0 with linear buffers, we simplify our code. 2595 */ 2596 if (*tiling_mode == I915_TILING_NONE) 2597 stride = 0; 2598 2599 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 2600 if (ret == 0) 2601 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 2602 2603 *tiling_mode = bo_gem->tiling_mode; 2604 return ret; 2605} 2606 2607static int 2608drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2609 uint32_t * swizzle_mode) 2610{ 2611 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2612 2613 *tiling_mode = bo_gem->tiling_mode; 2614 *swizzle_mode = bo_gem->swizzle_mode; 2615 return 0; 2616} 2617 2618static int 2619drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) 2620{ 2621 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2622 2623 bo->offset64 = offset; 2624 bo->offset = offset; 2625 bo_gem->kflags |= EXEC_OBJECT_PINNED; 2626 2627 return 0; 2628} 2629 2630drm_public drm_intel_bo * 2631drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 2632{ 2633 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2634 int ret; 2635 uint32_t handle; 2636 drm_intel_bo_gem *bo_gem; 2637 struct drm_i915_gem_get_tiling get_tiling; 2638 2639 pthread_mutex_lock(&bufmgr_gem->lock); 2640 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 2641 if (ret) { 2642 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno)); 2643 pthread_mutex_unlock(&bufmgr_gem->lock); 2644 return NULL; 2645 } 2646 2647 /* 2648 * See if the kernel has already returned this buffer to us. Just as 2649 * for named buffers, we must not create two bo's pointing at the same 2650 * kernel object 2651 */ 2652 HASH_FIND(handle_hh, bufmgr_gem->handle_table, 2653 &handle, sizeof(handle), bo_gem); 2654 if (bo_gem) { 2655 drm_intel_gem_bo_reference(&bo_gem->bo); 2656 goto out; 2657 } 2658 2659 bo_gem = calloc(1, sizeof(*bo_gem)); 2660 if (!bo_gem) 2661 goto out; 2662 2663 atomic_set(&bo_gem->refcount, 1); 2664 DRMINITLISTHEAD(&bo_gem->vma_list); 2665 2666 /* Determine size of bo. The fd-to-handle ioctl really should 2667 * return the size, but it doesn't. If we have kernel 3.12 or 2668 * later, we can lseek on the prime fd to get the size. Older 2669 * kernels will just fail, in which case we fall back to the 2670 * provided (estimated or guess size). */ 2671 ret = lseek(prime_fd, 0, SEEK_END); 2672 if (ret != -1) 2673 bo_gem->bo.size = ret; 2674 else 2675 bo_gem->bo.size = size; 2676 2677 bo_gem->bo.handle = handle; 2678 bo_gem->bo.bufmgr = bufmgr; 2679 2680 bo_gem->gem_handle = handle; 2681 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 2682 gem_handle, sizeof(bo_gem->gem_handle), bo_gem); 2683 2684 bo_gem->name = "prime"; 2685 bo_gem->validate_index = -1; 2686 bo_gem->reloc_tree_fences = 0; 2687 bo_gem->used_as_reloc_target = false; 2688 bo_gem->has_error = false; 2689 bo_gem->reusable = false; 2690 2691 memclear(get_tiling); 2692 get_tiling.handle = bo_gem->gem_handle; 2693 if (drmIoctl(bufmgr_gem->fd, 2694 DRM_IOCTL_I915_GEM_GET_TILING, 2695 &get_tiling)) 2696 goto err; 2697 2698 bo_gem->tiling_mode = get_tiling.tiling_mode; 2699 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 2700 /* XXX stride is unknown */ 2701 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 2702 2703out: 2704 pthread_mutex_unlock(&bufmgr_gem->lock); 2705 return &bo_gem->bo; 2706 2707err: 2708 drm_intel_gem_bo_free(&bo_gem->bo); 2709 pthread_mutex_unlock(&bufmgr_gem->lock); 2710 return NULL; 2711} 2712 2713drm_public int 2714drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 2715{ 2716 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2717 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2718 2719 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 2720 DRM_CLOEXEC, prime_fd) != 0) 2721 return -errno; 2722 2723 bo_gem->reusable = false; 2724 2725 return 0; 2726} 2727 2728static int 2729drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 2730{ 2731 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2732 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2733 2734 if (!bo_gem->global_name) { 2735 struct drm_gem_flink flink; 2736 2737 memclear(flink); 2738 flink.handle = bo_gem->gem_handle; 2739 if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink)) 2740 return -errno; 2741 2742 pthread_mutex_lock(&bufmgr_gem->lock); 2743 if (!bo_gem->global_name) { 2744 bo_gem->global_name = flink.name; 2745 bo_gem->reusable = false; 2746 2747 HASH_ADD(name_hh, bufmgr_gem->name_table, 2748 global_name, sizeof(bo_gem->global_name), 2749 bo_gem); 2750 } 2751 pthread_mutex_unlock(&bufmgr_gem->lock); 2752 } 2753 2754 *name = bo_gem->global_name; 2755 return 0; 2756} 2757 2758/** 2759 * Enables unlimited caching of buffer objects for reuse. 2760 * 2761 * This is potentially very memory expensive, as the cache at each bucket 2762 * size is only bounded by how many buffers of that size we've managed to have 2763 * in flight at once. 2764 */ 2765drm_public void 2766drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 2767{ 2768 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2769 2770 bufmgr_gem->bo_reuse = true; 2771} 2772 2773/** 2774 * Disables implicit synchronisation before executing the bo 2775 * 2776 * This will cause rendering corruption unless you correctly manage explicit 2777 * fences for all rendering involving this buffer - including use by others. 2778 * Disabling the implicit serialisation is only required if that serialisation 2779 * is too coarse (for example, you have split the buffer into many 2780 * non-overlapping regions and are sharing the whole buffer between concurrent 2781 * independent command streams). 2782 * 2783 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC, 2784 * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync, 2785 * or subsequent execbufs involving the bo will generate EINVAL. 2786 */ 2787drm_public void 2788drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo) 2789{ 2790 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2791 2792 bo_gem->kflags |= EXEC_OBJECT_ASYNC; 2793} 2794 2795/** 2796 * Enables implicit synchronisation before executing the bo 2797 * 2798 * This is the default behaviour of the kernel, to wait upon prior writes 2799 * completing on the object before rendering with it, or to wait for prior 2800 * reads to complete before writing into the object. 2801 * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling 2802 * the kernel never to insert a stall before using the object. Then this 2803 * function can be used to restore the implicit sync before subsequent 2804 * rendering. 2805 */ 2806drm_public void 2807drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo) 2808{ 2809 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2810 2811 bo_gem->kflags &= ~EXEC_OBJECT_ASYNC; 2812} 2813 2814/** 2815 * Query whether the kernel supports disabling of its implicit synchronisation 2816 * before execbuf. See drm_intel_gem_bo_disable_implicit_sync() 2817 */ 2818drm_public int 2819drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr) 2820{ 2821 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2822 2823 return bufmgr_gem->has_exec_async; 2824} 2825 2826/** 2827 * Enable use of fenced reloc type. 2828 * 2829 * New code should enable this to avoid unnecessary fence register 2830 * allocation. If this option is not enabled, all relocs will have fence 2831 * register allocated. 2832 */ 2833drm_public void 2834drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 2835{ 2836 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2837 2838 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 2839 bufmgr_gem->fenced_relocs = true; 2840} 2841 2842/** 2843 * Return the additional aperture space required by the tree of buffer objects 2844 * rooted at bo. 2845 */ 2846static int 2847drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 2848{ 2849 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2850 int i; 2851 int total = 0; 2852 2853 if (bo == NULL || bo_gem->included_in_check_aperture) 2854 return 0; 2855 2856 total += bo->size; 2857 bo_gem->included_in_check_aperture = true; 2858 2859 for (i = 0; i < bo_gem->reloc_count; i++) 2860 total += 2861 drm_intel_gem_bo_get_aperture_space(bo_gem-> 2862 reloc_target_info[i].bo); 2863 2864 return total; 2865} 2866 2867/** 2868 * Count the number of buffers in this list that need a fence reg 2869 * 2870 * If the count is greater than the number of available regs, we'll have 2871 * to ask the caller to resubmit a batch with fewer tiled buffers. 2872 * 2873 * This function over-counts if the same buffer is used multiple times. 2874 */ 2875static unsigned int 2876drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 2877{ 2878 int i; 2879 unsigned int total = 0; 2880 2881 for (i = 0; i < count; i++) { 2882 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2883 2884 if (bo_gem == NULL) 2885 continue; 2886 2887 total += bo_gem->reloc_tree_fences; 2888 } 2889 return total; 2890} 2891 2892/** 2893 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 2894 * for the next drm_intel_bufmgr_check_aperture_space() call. 2895 */ 2896static void 2897drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 2898{ 2899 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2900 int i; 2901 2902 if (bo == NULL || !bo_gem->included_in_check_aperture) 2903 return; 2904 2905 bo_gem->included_in_check_aperture = false; 2906 2907 for (i = 0; i < bo_gem->reloc_count; i++) 2908 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 2909 reloc_target_info[i].bo); 2910} 2911 2912/** 2913 * Return a conservative estimate for the amount of aperture required 2914 * for a collection of buffers. This may double-count some buffers. 2915 */ 2916static unsigned int 2917drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 2918{ 2919 int i; 2920 unsigned int total = 0; 2921 2922 for (i = 0; i < count; i++) { 2923 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2924 if (bo_gem != NULL) 2925 total += bo_gem->reloc_tree_size; 2926 } 2927 return total; 2928} 2929 2930/** 2931 * Return the amount of aperture needed for a collection of buffers. 2932 * This avoids double counting any buffers, at the cost of looking 2933 * at every buffer in the set. 2934 */ 2935static unsigned int 2936drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 2937{ 2938 int i; 2939 unsigned int total = 0; 2940 2941 for (i = 0; i < count; i++) { 2942 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 2943 /* For the first buffer object in the array, we get an 2944 * accurate count back for its reloc_tree size (since nothing 2945 * had been flagged as being counted yet). We can save that 2946 * value out as a more conservative reloc_tree_size that 2947 * avoids double-counting target buffers. Since the first 2948 * buffer happens to usually be the batch buffer in our 2949 * callers, this can pull us back from doing the tree 2950 * walk on every new batch emit. 2951 */ 2952 if (i == 0) { 2953 drm_intel_bo_gem *bo_gem = 2954 (drm_intel_bo_gem *) bo_array[i]; 2955 bo_gem->reloc_tree_size = total; 2956 } 2957 } 2958 2959 for (i = 0; i < count; i++) 2960 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 2961 return total; 2962} 2963 2964/** 2965 * Return -1 if the batchbuffer should be flushed before attempting to 2966 * emit rendering referencing the buffers pointed to by bo_array. 2967 * 2968 * This is required because if we try to emit a batchbuffer with relocations 2969 * to a tree of buffers that won't simultaneously fit in the aperture, 2970 * the rendering will return an error at a point where the software is not 2971 * prepared to recover from it. 2972 * 2973 * However, we also want to emit the batchbuffer significantly before we reach 2974 * the limit, as a series of batchbuffers each of which references buffers 2975 * covering almost all of the aperture means that at each emit we end up 2976 * waiting to evict a buffer from the last rendering, and we get synchronous 2977 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 2978 * get better parallelism. 2979 */ 2980static int 2981drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 2982{ 2983 drm_intel_bufmgr_gem *bufmgr_gem = 2984 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 2985 unsigned int total = 0; 2986 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 2987 int total_fences; 2988 2989 /* Check for fence reg constraints if necessary */ 2990 if (bufmgr_gem->available_fences) { 2991 total_fences = drm_intel_gem_total_fences(bo_array, count); 2992 if (total_fences > bufmgr_gem->available_fences) 2993 return -ENOSPC; 2994 } 2995 2996 total = drm_intel_gem_estimate_batch_space(bo_array, count); 2997 2998 if (total > threshold) 2999 total = drm_intel_gem_compute_batch_space(bo_array, count); 3000 3001 if (total > threshold) { 3002 DBG("check_space: overflowed available aperture, " 3003 "%dkb vs %dkb\n", 3004 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 3005 return -ENOSPC; 3006 } else { 3007 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 3008 (int)bufmgr_gem->gtt_size / 1024); 3009 return 0; 3010 } 3011} 3012 3013/* 3014 * Disable buffer reuse for objects which are shared with the kernel 3015 * as scanout buffers 3016 */ 3017static int 3018drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 3019{ 3020 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3021 3022 bo_gem->reusable = false; 3023 return 0; 3024} 3025 3026static int 3027drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 3028{ 3029 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3030 3031 return bo_gem->reusable; 3032} 3033 3034static int 3035_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 3036{ 3037 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3038 int i; 3039 3040 for (i = 0; i < bo_gem->reloc_count; i++) { 3041 if (bo_gem->reloc_target_info[i].bo == target_bo) 3042 return 1; 3043 if (bo == bo_gem->reloc_target_info[i].bo) 3044 continue; 3045 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 3046 target_bo)) 3047 return 1; 3048 } 3049 3050 for (i = 0; i< bo_gem->softpin_target_count; i++) { 3051 if (bo_gem->softpin_target[i] == target_bo) 3052 return 1; 3053 if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo)) 3054 return 1; 3055 } 3056 3057 return 0; 3058} 3059 3060/** Return true if target_bo is referenced by bo's relocation tree. */ 3061static int 3062drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 3063{ 3064 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 3065 3066 if (bo == NULL || target_bo == NULL) 3067 return 0; 3068 if (target_bo_gem->used_as_reloc_target) 3069 return _drm_intel_gem_bo_references(bo, target_bo); 3070 return 0; 3071} 3072 3073static void 3074add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 3075{ 3076 unsigned int i = bufmgr_gem->num_buckets; 3077 3078 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 3079 3080 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 3081 bufmgr_gem->cache_bucket[i].size = size; 3082 bufmgr_gem->num_buckets++; 3083} 3084 3085static void 3086init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 3087{ 3088 unsigned long size, cache_max_size = 64 * 1024 * 1024; 3089 3090 /* OK, so power of two buckets was too wasteful of memory. 3091 * Give 3 other sizes between each power of two, to hopefully 3092 * cover things accurately enough. (The alternative is 3093 * probably to just go for exact matching of sizes, and assume 3094 * that for things like composited window resize the tiled 3095 * width/height alignment and rounding of sizes to pages will 3096 * get us useful cache hit rates anyway) 3097 */ 3098 add_bucket(bufmgr_gem, 4096); 3099 add_bucket(bufmgr_gem, 4096 * 2); 3100 add_bucket(bufmgr_gem, 4096 * 3); 3101 3102 /* Initialize the linked lists for BO reuse cache. */ 3103 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 3104 add_bucket(bufmgr_gem, size); 3105 3106 add_bucket(bufmgr_gem, size + size * 1 / 4); 3107 add_bucket(bufmgr_gem, size + size * 2 / 4); 3108 add_bucket(bufmgr_gem, size + size * 3 / 4); 3109 } 3110} 3111 3112drm_public void 3113drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 3114{ 3115 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3116 3117 bufmgr_gem->vma_max = limit; 3118 3119 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 3120} 3121 3122static int 3123parse_devid_override(const char *devid_override) 3124{ 3125 static const struct { 3126 const char *name; 3127 int pci_id; 3128 } name_map[] = { 3129 { "brw", PCI_CHIP_I965_GM }, 3130 { "g4x", PCI_CHIP_GM45_GM }, 3131 { "ilk", PCI_CHIP_ILD_G }, 3132 { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS }, 3133 { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 }, 3134 { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 }, 3135 { "byt", PCI_CHIP_VALLEYVIEW_3 }, 3136 { "bdw", 0x1620 | BDW_ULX }, 3137 { "skl", PCI_CHIP_SKYLAKE_DT_GT2 }, 3138 { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 }, 3139 }; 3140 unsigned int i; 3141 3142 for (i = 0; i < ARRAY_SIZE(name_map); i++) { 3143 if (!strcmp(name_map[i].name, devid_override)) 3144 return name_map[i].pci_id; 3145 } 3146 3147 return strtod(devid_override, NULL); 3148} 3149 3150/** 3151 * Get the PCI ID for the device. This can be overridden by setting the 3152 * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 3153 */ 3154static int 3155get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 3156{ 3157 char *devid_override; 3158 int devid = 0; 3159 int ret; 3160 drm_i915_getparam_t gp; 3161 3162 if (geteuid() == getuid()) { 3163 devid_override = getenv("INTEL_DEVID_OVERRIDE"); 3164 if (devid_override) { 3165 bufmgr_gem->no_exec = true; 3166 return parse_devid_override(devid_override); 3167 } 3168 } 3169 3170 memclear(gp); 3171 gp.param = I915_PARAM_CHIPSET_ID; 3172 gp.value = &devid; 3173 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3174 if (ret) { 3175 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 3176 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 3177 } 3178 return devid; 3179} 3180 3181drm_public int 3182drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 3183{ 3184 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3185 3186 return bufmgr_gem->pci_device; 3187} 3188 3189/** 3190 * Sets the AUB filename. 3191 * 3192 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 3193 * for it to have any effect. 3194 */ 3195drm_public void 3196drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 3197 const char *filename) 3198{ 3199} 3200 3201/** 3202 * Sets up AUB dumping. 3203 * 3204 * This is a trace file format that can be used with the simulator. 3205 * Packets are emitted in a format somewhat like GPU command packets. 3206 * You can set up a GTT and upload your objects into the referenced 3207 * space, then send off batchbuffers and get BMPs out the other end. 3208 */ 3209drm_public void 3210drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 3211{ 3212 fprintf(stderr, "libdrm aub dumping is deprecated.\n\n" 3213 "Use intel_aubdump from intel-gpu-tools instead. Install intel-gpu-tools,\n" 3214 "then run (for example)\n\n" 3215 "\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n" 3216 "See the intel_aubdump man page for more details.\n"); 3217} 3218 3219drm_public drm_intel_context * 3220drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 3221{ 3222 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3223 struct drm_i915_gem_context_create create; 3224 drm_intel_context *context = NULL; 3225 int ret; 3226 3227 context = calloc(1, sizeof(*context)); 3228 if (!context) 3229 return NULL; 3230 3231 memclear(create); 3232 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 3233 if (ret != 0) { 3234 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 3235 strerror(errno)); 3236 free(context); 3237 return NULL; 3238 } 3239 3240 context->ctx_id = create.ctx_id; 3241 context->bufmgr = bufmgr; 3242 3243 return context; 3244} 3245 3246drm_public int 3247drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id) 3248{ 3249 if (ctx == NULL) 3250 return -EINVAL; 3251 3252 *ctx_id = ctx->ctx_id; 3253 3254 return 0; 3255} 3256 3257drm_public void 3258drm_intel_gem_context_destroy(drm_intel_context *ctx) 3259{ 3260 drm_intel_bufmgr_gem *bufmgr_gem; 3261 struct drm_i915_gem_context_destroy destroy; 3262 int ret; 3263 3264 if (ctx == NULL) 3265 return; 3266 3267 memclear(destroy); 3268 3269 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3270 destroy.ctx_id = ctx->ctx_id; 3271 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 3272 &destroy); 3273 if (ret != 0) 3274 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 3275 strerror(errno)); 3276 3277 free(ctx); 3278} 3279 3280drm_public int 3281drm_intel_get_reset_stats(drm_intel_context *ctx, 3282 uint32_t *reset_count, 3283 uint32_t *active, 3284 uint32_t *pending) 3285{ 3286 drm_intel_bufmgr_gem *bufmgr_gem; 3287 struct drm_i915_reset_stats stats; 3288 int ret; 3289 3290 if (ctx == NULL) 3291 return -EINVAL; 3292 3293 memclear(stats); 3294 3295 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3296 stats.ctx_id = ctx->ctx_id; 3297 ret = drmIoctl(bufmgr_gem->fd, 3298 DRM_IOCTL_I915_GET_RESET_STATS, 3299 &stats); 3300 if (ret == 0) { 3301 if (reset_count != NULL) 3302 *reset_count = stats.reset_count; 3303 3304 if (active != NULL) 3305 *active = stats.batch_active; 3306 3307 if (pending != NULL) 3308 *pending = stats.batch_pending; 3309 } 3310 3311 return ret; 3312} 3313 3314drm_public int 3315drm_intel_reg_read(drm_intel_bufmgr *bufmgr, 3316 uint32_t offset, 3317 uint64_t *result) 3318{ 3319 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3320 struct drm_i915_reg_read reg_read; 3321 int ret; 3322 3323 memclear(reg_read); 3324 reg_read.offset = offset; 3325 3326 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 3327 3328 *result = reg_read.val; 3329 return ret; 3330} 3331 3332drm_public int 3333drm_intel_get_subslice_total(int fd, unsigned int *subslice_total) 3334{ 3335 drm_i915_getparam_t gp; 3336 int ret; 3337 3338 memclear(gp); 3339 gp.value = (int*)subslice_total; 3340 gp.param = I915_PARAM_SUBSLICE_TOTAL; 3341 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3342 if (ret) 3343 return -errno; 3344 3345 return 0; 3346} 3347 3348drm_public int 3349drm_intel_get_eu_total(int fd, unsigned int *eu_total) 3350{ 3351 drm_i915_getparam_t gp; 3352 int ret; 3353 3354 memclear(gp); 3355 gp.value = (int*)eu_total; 3356 gp.param = I915_PARAM_EU_TOTAL; 3357 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3358 if (ret) 3359 return -errno; 3360 3361 return 0; 3362} 3363 3364drm_public int 3365drm_intel_get_pooled_eu(int fd) 3366{ 3367 drm_i915_getparam_t gp; 3368 int ret = -1; 3369 3370 memclear(gp); 3371 gp.param = I915_PARAM_HAS_POOLED_EU; 3372 gp.value = &ret; 3373 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 3374 return -errno; 3375 3376 return ret; 3377} 3378 3379drm_public int 3380drm_intel_get_min_eu_in_pool(int fd) 3381{ 3382 drm_i915_getparam_t gp; 3383 int ret = -1; 3384 3385 memclear(gp); 3386 gp.param = I915_PARAM_MIN_EU_IN_POOL; 3387 gp.value = &ret; 3388 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 3389 return -errno; 3390 3391 return ret; 3392} 3393 3394/** 3395 * Annotate the given bo for use in aub dumping. 3396 * 3397 * \param annotations is an array of drm_intel_aub_annotation objects 3398 * describing the type of data in various sections of the bo. Each 3399 * element of the array specifies the type and subtype of a section of 3400 * the bo, and the past-the-end offset of that section. The elements 3401 * of \c annotations must be sorted so that ending_offset is 3402 * increasing. 3403 * 3404 * \param count is the number of elements in the \c annotations array. 3405 * If \c count is zero, then \c annotations will not be dereferenced. 3406 * 3407 * Annotations are copied into a private data structure, so caller may 3408 * re-use the memory pointed to by \c annotations after the call 3409 * returns. 3410 * 3411 * Annotations are stored for the lifetime of the bo; to reset to the 3412 * default state (no annotations), call this function with a \c count 3413 * of zero. 3414 */ 3415drm_public void drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 3416 drm_intel_aub_annotation *annotations, 3417 unsigned count) 3418{ 3419} 3420 3421static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; 3422static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; 3423 3424static drm_intel_bufmgr_gem * 3425drm_intel_bufmgr_gem_find(int fd) 3426{ 3427 drm_intel_bufmgr_gem *bufmgr_gem; 3428 3429 DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { 3430 if (bufmgr_gem->fd == fd) { 3431 atomic_inc(&bufmgr_gem->refcount); 3432 return bufmgr_gem; 3433 } 3434 } 3435 3436 return NULL; 3437} 3438 3439static void 3440drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) 3441{ 3442 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3443 3444 if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) { 3445 pthread_mutex_lock(&bufmgr_list_mutex); 3446 3447 if (atomic_dec_and_test(&bufmgr_gem->refcount)) { 3448 DRMLISTDEL(&bufmgr_gem->managers); 3449 drm_intel_bufmgr_gem_destroy(bufmgr); 3450 } 3451 3452 pthread_mutex_unlock(&bufmgr_list_mutex); 3453 } 3454} 3455 3456drm_public void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo) 3457{ 3458 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3459 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3460 3461 if (bo_gem->gtt_virtual) 3462 return bo_gem->gtt_virtual; 3463 3464 if (bo_gem->is_userptr) 3465 return NULL; 3466 3467 pthread_mutex_lock(&bufmgr_gem->lock); 3468 if (bo_gem->gtt_virtual == NULL) { 3469 struct drm_i915_gem_mmap_gtt mmap_arg; 3470 void *ptr; 3471 3472 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 3473 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3474 3475 if (bo_gem->map_count++ == 0) 3476 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3477 3478 memclear(mmap_arg); 3479 mmap_arg.handle = bo_gem->gem_handle; 3480 3481 /* Get the fake offset back... */ 3482 ptr = MAP_FAILED; 3483 if (drmIoctl(bufmgr_gem->fd, 3484 DRM_IOCTL_I915_GEM_MMAP_GTT, 3485 &mmap_arg) == 0) { 3486 /* and mmap it */ 3487 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, 3488 MAP_SHARED, bufmgr_gem->fd, 3489 mmap_arg.offset); 3490 } 3491 if (ptr == MAP_FAILED) { 3492 if (--bo_gem->map_count == 0) 3493 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3494 ptr = NULL; 3495 } 3496 3497 bo_gem->gtt_virtual = ptr; 3498 } 3499 pthread_mutex_unlock(&bufmgr_gem->lock); 3500 3501 return bo_gem->gtt_virtual; 3502} 3503 3504drm_public void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo) 3505{ 3506 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3507 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3508 3509 if (bo_gem->mem_virtual) 3510 return bo_gem->mem_virtual; 3511 3512 if (bo_gem->is_userptr) { 3513 /* Return the same user ptr */ 3514 return bo_gem->user_virtual; 3515 } 3516 3517 pthread_mutex_lock(&bufmgr_gem->lock); 3518 if (!bo_gem->mem_virtual) { 3519 struct drm_i915_gem_mmap mmap_arg; 3520 3521 if (bo_gem->map_count++ == 0) 3522 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3523 3524 DBG("bo_map: %d (%s), map_count=%d\n", 3525 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3526 3527 memclear(mmap_arg); 3528 mmap_arg.handle = bo_gem->gem_handle; 3529 mmap_arg.size = bo->size; 3530 if (drmIoctl(bufmgr_gem->fd, 3531 DRM_IOCTL_I915_GEM_MMAP, 3532 &mmap_arg)) { 3533 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 3534 __FILE__, __LINE__, bo_gem->gem_handle, 3535 bo_gem->name, strerror(errno)); 3536 if (--bo_gem->map_count == 0) 3537 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3538 } else { 3539 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 3540 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 3541 } 3542 } 3543 pthread_mutex_unlock(&bufmgr_gem->lock); 3544 3545 return bo_gem->mem_virtual; 3546} 3547 3548drm_public void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo) 3549{ 3550 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3551 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3552 3553 if (bo_gem->wc_virtual) 3554 return bo_gem->wc_virtual; 3555 3556 if (bo_gem->is_userptr) 3557 return NULL; 3558 3559 pthread_mutex_lock(&bufmgr_gem->lock); 3560 if (!bo_gem->wc_virtual) { 3561 struct drm_i915_gem_mmap mmap_arg; 3562 3563 if (bo_gem->map_count++ == 0) 3564 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3565 3566 DBG("bo_map: %d (%s), map_count=%d\n", 3567 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3568 3569 memclear(mmap_arg); 3570 mmap_arg.handle = bo_gem->gem_handle; 3571 mmap_arg.size = bo->size; 3572 mmap_arg.flags = I915_MMAP_WC; 3573 if (drmIoctl(bufmgr_gem->fd, 3574 DRM_IOCTL_I915_GEM_MMAP, 3575 &mmap_arg)) { 3576 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 3577 __FILE__, __LINE__, bo_gem->gem_handle, 3578 bo_gem->name, strerror(errno)); 3579 if (--bo_gem->map_count == 0) 3580 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3581 } else { 3582 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 3583 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 3584 } 3585 } 3586 pthread_mutex_unlock(&bufmgr_gem->lock); 3587 3588 return bo_gem->wc_virtual; 3589} 3590 3591/** 3592 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 3593 * and manage map buffer objections. 3594 * 3595 * \param fd File descriptor of the opened DRM device. 3596 */ 3597drm_public drm_intel_bufmgr * 3598drm_intel_bufmgr_gem_init(int fd, int batch_size) 3599{ 3600 drm_intel_bufmgr_gem *bufmgr_gem; 3601 struct drm_i915_gem_get_aperture aperture; 3602 drm_i915_getparam_t gp; 3603 int ret, tmp; 3604 bool exec2 = false; 3605 3606 pthread_mutex_lock(&bufmgr_list_mutex); 3607 3608 bufmgr_gem = drm_intel_bufmgr_gem_find(fd); 3609 if (bufmgr_gem) 3610 goto exit; 3611 3612 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 3613 if (bufmgr_gem == NULL) 3614 goto exit; 3615 3616 bufmgr_gem->fd = fd; 3617 atomic_set(&bufmgr_gem->refcount, 1); 3618 3619 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 3620 free(bufmgr_gem); 3621 bufmgr_gem = NULL; 3622 goto exit; 3623 } 3624 3625 memclear(aperture); 3626 ret = drmIoctl(bufmgr_gem->fd, 3627 DRM_IOCTL_I915_GEM_GET_APERTURE, 3628 &aperture); 3629 3630 if (ret == 0) 3631 bufmgr_gem->gtt_size = aperture.aper_available_size; 3632 else { 3633 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 3634 strerror(errno)); 3635 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 3636 fprintf(stderr, "Assuming %dkB available aperture size.\n" 3637 "May lead to reduced performance or incorrect " 3638 "rendering.\n", 3639 (int)bufmgr_gem->gtt_size / 1024); 3640 } 3641 3642 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 3643 3644 if (IS_GEN2(bufmgr_gem->pci_device)) 3645 bufmgr_gem->gen = 2; 3646 else if (IS_GEN3(bufmgr_gem->pci_device)) 3647 bufmgr_gem->gen = 3; 3648 else if (IS_GEN4(bufmgr_gem->pci_device)) 3649 bufmgr_gem->gen = 4; 3650 else if (IS_GEN5(bufmgr_gem->pci_device)) 3651 bufmgr_gem->gen = 5; 3652 else if (IS_GEN6(bufmgr_gem->pci_device)) 3653 bufmgr_gem->gen = 6; 3654 else if (IS_GEN7(bufmgr_gem->pci_device)) 3655 bufmgr_gem->gen = 7; 3656 else if (IS_GEN8(bufmgr_gem->pci_device)) 3657 bufmgr_gem->gen = 8; 3658 else if (!intel_get_genx(bufmgr_gem->pci_device, &bufmgr_gem->gen)) { 3659 free(bufmgr_gem); 3660 bufmgr_gem = NULL; 3661 goto exit; 3662 } 3663 3664 if (IS_GEN3(bufmgr_gem->pci_device) && 3665 bufmgr_gem->gtt_size > 256*1024*1024) { 3666 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 3667 * be used for tiled blits. To simplify the accounting, just 3668 * subtract the unmappable part (fixed to 256MB on all known 3669 * gen3 devices) if the kernel advertises it. */ 3670 bufmgr_gem->gtt_size -= 256*1024*1024; 3671 } 3672 3673 memclear(gp); 3674 gp.value = &tmp; 3675 3676 gp.param = I915_PARAM_HAS_EXECBUF2; 3677 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3678 if (!ret) 3679 exec2 = true; 3680 3681 gp.param = I915_PARAM_HAS_BSD; 3682 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3683 bufmgr_gem->has_bsd = ret == 0; 3684 3685 gp.param = I915_PARAM_HAS_BLT; 3686 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3687 bufmgr_gem->has_blt = ret == 0; 3688 3689 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 3690 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3691 bufmgr_gem->has_relaxed_fencing = ret == 0; 3692 3693 gp.param = I915_PARAM_HAS_EXEC_ASYNC; 3694 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3695 bufmgr_gem->has_exec_async = ret == 0; 3696 3697 bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; 3698 3699 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 3700 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3701 bufmgr_gem->has_wait_timeout = ret == 0; 3702 3703 gp.param = I915_PARAM_HAS_LLC; 3704 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3705 if (ret != 0) { 3706 /* Kernel does not supports HAS_LLC query, fallback to GPU 3707 * generation detection and assume that we have LLC on GEN6/7 3708 */ 3709 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 3710 IS_GEN7(bufmgr_gem->pci_device)); 3711 } else 3712 bufmgr_gem->has_llc = *gp.value; 3713 3714 gp.param = I915_PARAM_HAS_VEBOX; 3715 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3716 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 3717 3718 gp.param = I915_PARAM_HAS_EXEC_SOFTPIN; 3719 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3720 if (ret == 0 && *gp.value > 0) 3721 bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset; 3722 3723 if (bufmgr_gem->gen < 4) { 3724 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 3725 gp.value = &bufmgr_gem->available_fences; 3726 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3727 if (ret) { 3728 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 3729 errno); 3730 fprintf(stderr, "param: %d, val: %d\n", gp.param, 3731 *gp.value); 3732 bufmgr_gem->available_fences = 0; 3733 } else { 3734 /* XXX The kernel reports the total number of fences, 3735 * including any that may be pinned. 3736 * 3737 * We presume that there will be at least one pinned 3738 * fence for the scanout buffer, but there may be more 3739 * than one scanout and the user may be manually 3740 * pinning buffers. Let's move to execbuffer2 and 3741 * thereby forget the insanity of using fences... 3742 */ 3743 bufmgr_gem->available_fences -= 2; 3744 if (bufmgr_gem->available_fences < 0) 3745 bufmgr_gem->available_fences = 0; 3746 } 3747 } 3748 3749 if (bufmgr_gem->gen >= 8) { 3750 gp.param = I915_PARAM_HAS_ALIASING_PPGTT; 3751 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3752 if (ret == 0 && *gp.value == 3) 3753 bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; 3754 } 3755 3756 /* Let's go with one relocation per every 2 dwords (but round down a bit 3757 * since a power of two will mean an extra page allocation for the reloc 3758 * buffer). 3759 * 3760 * Every 4 was too few for the blender benchmark. 3761 */ 3762 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 3763 3764 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 3765 bufmgr_gem->bufmgr.bo_alloc_for_render = 3766 drm_intel_gem_bo_alloc_for_render; 3767 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 3768 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 3769 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 3770 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 3771 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 3772 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 3773 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 3774 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 3775 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 3776 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 3777 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 3778 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 3779 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 3780 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 3781 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 3782 /* Use the new one if available */ 3783 if (exec2) { 3784 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 3785 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3786 } else 3787 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 3788 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 3789 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3790 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; 3791 bufmgr_gem->bufmgr.debug = 0; 3792 bufmgr_gem->bufmgr.check_aperture_space = 3793 drm_intel_gem_check_aperture_space; 3794 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3795 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 3796 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 3797 drm_intel_gem_get_pipe_from_crtc_id; 3798 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 3799 3800 init_cache_buckets(bufmgr_gem); 3801 3802 DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 3803 bufmgr_gem->vma_max = -1; /* unlimited by default */ 3804 3805 DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); 3806 3807exit: 3808 pthread_mutex_unlock(&bufmgr_list_mutex); 3809 3810 return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; 3811} 3812