intel_bufmgr_gem.c revision 87bf8e7c
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007-2012 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#include <xf86drm.h> 38#include <xf86atomic.h> 39#include <fcntl.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43#include <unistd.h> 44#include <assert.h> 45#include <pthread.h> 46#include <sys/ioctl.h> 47#include <sys/stat.h> 48#include <sys/types.h> 49#include <stdbool.h> 50 51#include "errno.h" 52#ifndef ETIME 53#define ETIME ETIMEDOUT 54#endif 55#include "libdrm_macros.h" 56#include "libdrm_lists.h" 57#include "intel_bufmgr.h" 58#include "intel_bufmgr_priv.h" 59#include "intel_chipset.h" 60#include "string.h" 61 62#include "i915_drm.h" 63#include "uthash.h" 64 65#if HAVE_VALGRIND 66#include <valgrind.h> 67#include <memcheck.h> 68#define VG(x) x 69#else 70#define VG(x) 71#endif 72 73#define memclear(s) memset(&s, 0, sizeof(s)) 74 75#define DBG(...) do { \ 76 if (bufmgr_gem->bufmgr.debug) \ 77 fprintf(stderr, __VA_ARGS__); \ 78} while (0) 79 80#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 81#define MAX2(A, B) ((A) > (B) ? (A) : (B)) 82 83/** 84 * upper_32_bits - return bits 32-63 of a number 85 * @n: the number we're accessing 86 * 87 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress 88 * the "right shift count >= width of type" warning when that quantity is 89 * 32-bits. 90 */ 91#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) 92 93/** 94 * lower_32_bits - return bits 0-31 of a number 95 * @n: the number we're accessing 96 */ 97#define lower_32_bits(n) ((__u32)(n)) 98 99typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 100 101struct drm_intel_gem_bo_bucket { 102 drmMMListHead head; 103 unsigned long size; 104}; 105 106typedef struct _drm_intel_bufmgr_gem { 107 drm_intel_bufmgr bufmgr; 108 109 atomic_t refcount; 110 111 int fd; 112 113 int max_relocs; 114 115 pthread_mutex_t lock; 116 117 struct drm_i915_gem_exec_object *exec_objects; 118 struct drm_i915_gem_exec_object2 *exec2_objects; 119 drm_intel_bo **exec_bos; 120 int exec_size; 121 int exec_count; 122 123 /** Array of lists of cached gem objects of power-of-two sizes */ 124 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 125 int num_buckets; 126 time_t time; 127 128 drmMMListHead managers; 129 130 drm_intel_bo_gem *name_table; 131 drm_intel_bo_gem *handle_table; 132 133 drmMMListHead vma_cache; 134 int vma_count, vma_open, vma_max; 135 136 uint64_t gtt_size; 137 int available_fences; 138 int pci_device; 139 int gen; 140 unsigned int has_bsd : 1; 141 unsigned int has_blt : 1; 142 unsigned int has_relaxed_fencing : 1; 143 unsigned int has_llc : 1; 144 unsigned int has_wait_timeout : 1; 145 unsigned int bo_reuse : 1; 146 unsigned int no_exec : 1; 147 unsigned int has_vebox : 1; 148 unsigned int has_exec_async : 1; 149 bool fenced_relocs; 150 151 struct { 152 void *ptr; 153 uint32_t handle; 154 } userptr_active; 155 156} drm_intel_bufmgr_gem; 157 158#define DRM_INTEL_RELOC_FENCE (1<<0) 159 160typedef struct _drm_intel_reloc_target_info { 161 drm_intel_bo *bo; 162 int flags; 163} drm_intel_reloc_target; 164 165struct _drm_intel_bo_gem { 166 drm_intel_bo bo; 167 168 atomic_t refcount; 169 uint32_t gem_handle; 170 const char *name; 171 172 /** 173 * Kenel-assigned global name for this object 174 * 175 * List contains both flink named and prime fd'd objects 176 */ 177 unsigned int global_name; 178 179 UT_hash_handle handle_hh; 180 UT_hash_handle name_hh; 181 182 /** 183 * Index of the buffer within the validation list while preparing a 184 * batchbuffer execution. 185 */ 186 int validate_index; 187 188 /** 189 * Current tiling mode 190 */ 191 uint32_t tiling_mode; 192 uint32_t swizzle_mode; 193 unsigned long stride; 194 195 unsigned long kflags; 196 197 time_t free_time; 198 199 /** Array passed to the DRM containing relocation information. */ 200 struct drm_i915_gem_relocation_entry *relocs; 201 /** 202 * Array of info structs corresponding to relocs[i].target_handle etc 203 */ 204 drm_intel_reloc_target *reloc_target_info; 205 /** Number of entries in relocs */ 206 int reloc_count; 207 /** Array of BOs that are referenced by this buffer and will be softpinned */ 208 drm_intel_bo **softpin_target; 209 /** Number softpinned BOs that are referenced by this buffer */ 210 int softpin_target_count; 211 /** Maximum amount of softpinned BOs that are referenced by this buffer */ 212 int softpin_target_size; 213 214 /** Mapped address for the buffer, saved across map/unmap cycles */ 215 void *mem_virtual; 216 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 217 void *gtt_virtual; 218 /** WC CPU address for the buffer, saved across map/unmap cycles */ 219 void *wc_virtual; 220 /** 221 * Virtual address of the buffer allocated by user, used for userptr 222 * objects only. 223 */ 224 void *user_virtual; 225 int map_count; 226 drmMMListHead vma_list; 227 228 /** BO cache list */ 229 drmMMListHead head; 230 231 /** 232 * Boolean of whether this BO and its children have been included in 233 * the current drm_intel_bufmgr_check_aperture_space() total. 234 */ 235 bool included_in_check_aperture; 236 237 /** 238 * Boolean of whether this buffer has been used as a relocation 239 * target and had its size accounted for, and thus can't have any 240 * further relocations added to it. 241 */ 242 bool used_as_reloc_target; 243 244 /** 245 * Boolean of whether we have encountered an error whilst building the relocation tree. 246 */ 247 bool has_error; 248 249 /** 250 * Boolean of whether this buffer can be re-used 251 */ 252 bool reusable; 253 254 /** 255 * Boolean of whether the GPU is definitely not accessing the buffer. 256 * 257 * This is only valid when reusable, since non-reusable 258 * buffers are those that have been shared with other 259 * processes, so we don't know their state. 260 */ 261 bool idle; 262 263 /** 264 * Boolean of whether this buffer was allocated with userptr 265 */ 266 bool is_userptr; 267 268 /** 269 * Size in bytes of this buffer and its relocation descendents. 270 * 271 * Used to avoid costly tree walking in 272 * drm_intel_bufmgr_check_aperture in the common case. 273 */ 274 int reloc_tree_size; 275 276 /** 277 * Number of potential fence registers required by this buffer and its 278 * relocations. 279 */ 280 int reloc_tree_fences; 281 282 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */ 283 bool mapped_cpu_write; 284}; 285 286static unsigned int 287drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 288 289static unsigned int 290drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 291 292static int 293drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 294 uint32_t * swizzle_mode); 295 296static int 297drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 298 uint32_t tiling_mode, 299 uint32_t stride); 300 301static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 302 time_t time); 303 304static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 305 306static void drm_intel_gem_bo_free(drm_intel_bo *bo); 307 308static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) 309{ 310 return (drm_intel_bo_gem *)bo; 311} 312 313static unsigned long 314drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 315 uint32_t *tiling_mode) 316{ 317 unsigned long min_size, max_size; 318 unsigned long i; 319 320 if (*tiling_mode == I915_TILING_NONE) 321 return size; 322 323 /* 965+ just need multiples of page size for tiling */ 324 if (bufmgr_gem->gen >= 4) 325 return ROUND_UP_TO(size, 4096); 326 327 /* Older chips need powers of two, of at least 512k or 1M */ 328 if (bufmgr_gem->gen == 3) { 329 min_size = 1024*1024; 330 max_size = 128*1024*1024; 331 } else { 332 min_size = 512*1024; 333 max_size = 64*1024*1024; 334 } 335 336 if (size > max_size) { 337 *tiling_mode = I915_TILING_NONE; 338 return size; 339 } 340 341 /* Do we need to allocate every page for the fence? */ 342 if (bufmgr_gem->has_relaxed_fencing) 343 return ROUND_UP_TO(size, 4096); 344 345 for (i = min_size; i < size; i <<= 1) 346 ; 347 348 return i; 349} 350 351/* 352 * Round a given pitch up to the minimum required for X tiling on a 353 * given chip. We use 512 as the minimum to allow for a later tiling 354 * change. 355 */ 356static unsigned long 357drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 358 unsigned long pitch, uint32_t *tiling_mode) 359{ 360 unsigned long tile_width; 361 unsigned long i; 362 363 /* If untiled, then just align it so that we can do rendering 364 * to it with the 3D engine. 365 */ 366 if (*tiling_mode == I915_TILING_NONE) 367 return ALIGN(pitch, 64); 368 369 if (*tiling_mode == I915_TILING_X 370 || (IS_915(bufmgr_gem->pci_device) 371 && *tiling_mode == I915_TILING_Y)) 372 tile_width = 512; 373 else 374 tile_width = 128; 375 376 /* 965 is flexible */ 377 if (bufmgr_gem->gen >= 4) 378 return ROUND_UP_TO(pitch, tile_width); 379 380 /* The older hardware has a maximum pitch of 8192 with tiled 381 * surfaces, so fallback to untiled if it's too large. 382 */ 383 if (pitch > 8192) { 384 *tiling_mode = I915_TILING_NONE; 385 return ALIGN(pitch, 64); 386 } 387 388 /* Pre-965 needs power of two tile width */ 389 for (i = tile_width; i < pitch; i <<= 1) 390 ; 391 392 return i; 393} 394 395static struct drm_intel_gem_bo_bucket * 396drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 397 unsigned long size) 398{ 399 int i; 400 401 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 402 struct drm_intel_gem_bo_bucket *bucket = 403 &bufmgr_gem->cache_bucket[i]; 404 if (bucket->size >= size) { 405 return bucket; 406 } 407 } 408 409 return NULL; 410} 411 412static void 413drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 414{ 415 int i, j; 416 417 for (i = 0; i < bufmgr_gem->exec_count; i++) { 418 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 419 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 420 421 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { 422 DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, 423 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 424 bo_gem->name); 425 continue; 426 } 427 428 for (j = 0; j < bo_gem->reloc_count; j++) { 429 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 430 drm_intel_bo_gem *target_gem = 431 (drm_intel_bo_gem *) target_bo; 432 433 DBG("%2d: %d %s(%s)@0x%08x %08x -> " 434 "%d (%s)@0x%08x %08x + 0x%08x\n", 435 i, 436 bo_gem->gem_handle, 437 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 438 bo_gem->name, 439 upper_32_bits(bo_gem->relocs[j].offset), 440 lower_32_bits(bo_gem->relocs[j].offset), 441 target_gem->gem_handle, 442 target_gem->name, 443 upper_32_bits(target_bo->offset64), 444 lower_32_bits(target_bo->offset64), 445 bo_gem->relocs[j].delta); 446 } 447 448 for (j = 0; j < bo_gem->softpin_target_count; j++) { 449 drm_intel_bo *target_bo = bo_gem->softpin_target[j]; 450 drm_intel_bo_gem *target_gem = 451 (drm_intel_bo_gem *) target_bo; 452 DBG("%2d: %d %s(%s) -> " 453 "%d *(%s)@0x%08x %08x\n", 454 i, 455 bo_gem->gem_handle, 456 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 457 bo_gem->name, 458 target_gem->gem_handle, 459 target_gem->name, 460 upper_32_bits(target_bo->offset64), 461 lower_32_bits(target_bo->offset64)); 462 } 463 } 464} 465 466static inline void 467drm_intel_gem_bo_reference(drm_intel_bo *bo) 468{ 469 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 470 471 atomic_inc(&bo_gem->refcount); 472} 473 474/** 475 * Adds the given buffer to the list of buffers to be validated (moved into the 476 * appropriate memory type) with the next batch submission. 477 * 478 * If a buffer is validated multiple times in a batch submission, it ends up 479 * with the intersection of the memory type flags and the union of the 480 * access flags. 481 */ 482static void 483drm_intel_add_validate_buffer(drm_intel_bo *bo) 484{ 485 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 486 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 487 int index; 488 489 if (bo_gem->validate_index != -1) 490 return; 491 492 /* Extend the array of validation entries as necessary. */ 493 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 494 int new_size = bufmgr_gem->exec_size * 2; 495 496 if (new_size == 0) 497 new_size = 5; 498 499 bufmgr_gem->exec_objects = 500 realloc(bufmgr_gem->exec_objects, 501 sizeof(*bufmgr_gem->exec_objects) * new_size); 502 bufmgr_gem->exec_bos = 503 realloc(bufmgr_gem->exec_bos, 504 sizeof(*bufmgr_gem->exec_bos) * new_size); 505 bufmgr_gem->exec_size = new_size; 506 } 507 508 index = bufmgr_gem->exec_count; 509 bo_gem->validate_index = index; 510 /* Fill in array entry */ 511 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 512 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 513 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 514 bufmgr_gem->exec_objects[index].alignment = bo->align; 515 bufmgr_gem->exec_objects[index].offset = 0; 516 bufmgr_gem->exec_bos[index] = bo; 517 bufmgr_gem->exec_count++; 518} 519 520static void 521drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 522{ 523 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 524 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 525 int index; 526 unsigned long flags; 527 528 flags = 0; 529 if (need_fence) 530 flags |= EXEC_OBJECT_NEEDS_FENCE; 531 532 if (bo_gem->validate_index != -1) { 533 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; 534 return; 535 } 536 537 /* Extend the array of validation entries as necessary. */ 538 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 539 int new_size = bufmgr_gem->exec_size * 2; 540 541 if (new_size == 0) 542 new_size = 5; 543 544 bufmgr_gem->exec2_objects = 545 realloc(bufmgr_gem->exec2_objects, 546 sizeof(*bufmgr_gem->exec2_objects) * new_size); 547 bufmgr_gem->exec_bos = 548 realloc(bufmgr_gem->exec_bos, 549 sizeof(*bufmgr_gem->exec_bos) * new_size); 550 bufmgr_gem->exec_size = new_size; 551 } 552 553 index = bufmgr_gem->exec_count; 554 bo_gem->validate_index = index; 555 /* Fill in array entry */ 556 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 557 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 558 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 559 bufmgr_gem->exec2_objects[index].alignment = bo->align; 560 bufmgr_gem->exec2_objects[index].offset = bo->offset64; 561 bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags; 562 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 563 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 564 bufmgr_gem->exec_bos[index] = bo; 565 bufmgr_gem->exec_count++; 566} 567 568#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 569 sizeof(uint32_t)) 570 571static void 572drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 573 drm_intel_bo_gem *bo_gem, 574 unsigned int alignment) 575{ 576 unsigned int size; 577 578 assert(!bo_gem->used_as_reloc_target); 579 580 /* The older chipsets are far-less flexible in terms of tiling, 581 * and require tiled buffer to be size aligned in the aperture. 582 * This means that in the worst possible case we will need a hole 583 * twice as large as the object in order for it to fit into the 584 * aperture. Optimal packing is for wimps. 585 */ 586 size = bo_gem->bo.size; 587 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 588 unsigned int min_size; 589 590 if (bufmgr_gem->has_relaxed_fencing) { 591 if (bufmgr_gem->gen == 3) 592 min_size = 1024*1024; 593 else 594 min_size = 512*1024; 595 596 while (min_size < size) 597 min_size *= 2; 598 } else 599 min_size = size; 600 601 /* Account for worst-case alignment. */ 602 alignment = MAX2(alignment, min_size); 603 } 604 605 bo_gem->reloc_tree_size = size + alignment; 606} 607 608static int 609drm_intel_setup_reloc_list(drm_intel_bo *bo) 610{ 611 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 612 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 613 unsigned int max_relocs = bufmgr_gem->max_relocs; 614 615 if (bo->size / 4 < max_relocs) 616 max_relocs = bo->size / 4; 617 618 bo_gem->relocs = malloc(max_relocs * 619 sizeof(struct drm_i915_gem_relocation_entry)); 620 bo_gem->reloc_target_info = malloc(max_relocs * 621 sizeof(drm_intel_reloc_target)); 622 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 623 bo_gem->has_error = true; 624 625 free (bo_gem->relocs); 626 bo_gem->relocs = NULL; 627 628 free (bo_gem->reloc_target_info); 629 bo_gem->reloc_target_info = NULL; 630 631 return 1; 632 } 633 634 return 0; 635} 636 637static int 638drm_intel_gem_bo_busy(drm_intel_bo *bo) 639{ 640 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 641 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 642 struct drm_i915_gem_busy busy; 643 int ret; 644 645 if (bo_gem->reusable && bo_gem->idle) 646 return false; 647 648 memclear(busy); 649 busy.handle = bo_gem->gem_handle; 650 651 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 652 if (ret == 0) { 653 bo_gem->idle = !busy.busy; 654 return busy.busy; 655 } else { 656 return false; 657 } 658} 659 660static int 661drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 662 drm_intel_bo_gem *bo_gem, int state) 663{ 664 struct drm_i915_gem_madvise madv; 665 666 memclear(madv); 667 madv.handle = bo_gem->gem_handle; 668 madv.madv = state; 669 madv.retained = 1; 670 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 671 672 return madv.retained; 673} 674 675static int 676drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 677{ 678 return drm_intel_gem_bo_madvise_internal 679 ((drm_intel_bufmgr_gem *) bo->bufmgr, 680 (drm_intel_bo_gem *) bo, 681 madv); 682} 683 684/* drop the oldest entries that have been purged by the kernel */ 685static void 686drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 687 struct drm_intel_gem_bo_bucket *bucket) 688{ 689 while (!DRMLISTEMPTY(&bucket->head)) { 690 drm_intel_bo_gem *bo_gem; 691 692 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 693 bucket->head.next, head); 694 if (drm_intel_gem_bo_madvise_internal 695 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 696 break; 697 698 DRMLISTDEL(&bo_gem->head); 699 drm_intel_gem_bo_free(&bo_gem->bo); 700 } 701} 702 703static drm_intel_bo * 704drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 705 const char *name, 706 unsigned long size, 707 unsigned long flags, 708 uint32_t tiling_mode, 709 unsigned long stride, 710 unsigned int alignment) 711{ 712 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 713 drm_intel_bo_gem *bo_gem; 714 unsigned int page_size = getpagesize(); 715 int ret; 716 struct drm_intel_gem_bo_bucket *bucket; 717 bool alloc_from_cache; 718 unsigned long bo_size; 719 bool for_render = false; 720 721 if (flags & BO_ALLOC_FOR_RENDER) 722 for_render = true; 723 724 /* Round the allocated size up to a power of two number of pages. */ 725 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 726 727 /* If we don't have caching at this size, don't actually round the 728 * allocation up. 729 */ 730 if (bucket == NULL) { 731 bo_size = size; 732 if (bo_size < page_size) 733 bo_size = page_size; 734 } else { 735 bo_size = bucket->size; 736 } 737 738 pthread_mutex_lock(&bufmgr_gem->lock); 739 /* Get a buffer out of the cache if available */ 740retry: 741 alloc_from_cache = false; 742 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 743 if (for_render) { 744 /* Allocate new render-target BOs from the tail (MRU) 745 * of the list, as it will likely be hot in the GPU 746 * cache and in the aperture for us. 747 */ 748 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 749 bucket->head.prev, head); 750 DRMLISTDEL(&bo_gem->head); 751 alloc_from_cache = true; 752 bo_gem->bo.align = alignment; 753 } else { 754 assert(alignment == 0); 755 /* For non-render-target BOs (where we're probably 756 * going to map it first thing in order to fill it 757 * with data), check if the last BO in the cache is 758 * unbusy, and only reuse in that case. Otherwise, 759 * allocating a new buffer is probably faster than 760 * waiting for the GPU to finish. 761 */ 762 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 763 bucket->head.next, head); 764 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 765 alloc_from_cache = true; 766 DRMLISTDEL(&bo_gem->head); 767 } 768 } 769 770 if (alloc_from_cache) { 771 if (!drm_intel_gem_bo_madvise_internal 772 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 773 drm_intel_gem_bo_free(&bo_gem->bo); 774 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 775 bucket); 776 goto retry; 777 } 778 779 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 780 tiling_mode, 781 stride)) { 782 drm_intel_gem_bo_free(&bo_gem->bo); 783 goto retry; 784 } 785 } 786 } 787 788 if (!alloc_from_cache) { 789 struct drm_i915_gem_create create; 790 791 bo_gem = calloc(1, sizeof(*bo_gem)); 792 if (!bo_gem) 793 goto err; 794 795 /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized 796 list (vma_list), so better set the list head here */ 797 DRMINITLISTHEAD(&bo_gem->vma_list); 798 799 bo_gem->bo.size = bo_size; 800 801 memclear(create); 802 create.size = bo_size; 803 804 ret = drmIoctl(bufmgr_gem->fd, 805 DRM_IOCTL_I915_GEM_CREATE, 806 &create); 807 if (ret != 0) { 808 free(bo_gem); 809 goto err; 810 } 811 812 bo_gem->gem_handle = create.handle; 813 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 814 gem_handle, sizeof(bo_gem->gem_handle), 815 bo_gem); 816 817 bo_gem->bo.handle = bo_gem->gem_handle; 818 bo_gem->bo.bufmgr = bufmgr; 819 bo_gem->bo.align = alignment; 820 821 bo_gem->tiling_mode = I915_TILING_NONE; 822 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 823 bo_gem->stride = 0; 824 825 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 826 tiling_mode, 827 stride)) 828 goto err_free; 829 } 830 831 bo_gem->name = name; 832 atomic_set(&bo_gem->refcount, 1); 833 bo_gem->validate_index = -1; 834 bo_gem->reloc_tree_fences = 0; 835 bo_gem->used_as_reloc_target = false; 836 bo_gem->has_error = false; 837 bo_gem->reusable = true; 838 839 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); 840 pthread_mutex_unlock(&bufmgr_gem->lock); 841 842 DBG("bo_create: buf %d (%s) %ldb\n", 843 bo_gem->gem_handle, bo_gem->name, size); 844 845 return &bo_gem->bo; 846 847err_free: 848 drm_intel_gem_bo_free(&bo_gem->bo); 849err: 850 pthread_mutex_unlock(&bufmgr_gem->lock); 851 return NULL; 852} 853 854static drm_intel_bo * 855drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 856 const char *name, 857 unsigned long size, 858 unsigned int alignment) 859{ 860 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 861 BO_ALLOC_FOR_RENDER, 862 I915_TILING_NONE, 0, 863 alignment); 864} 865 866static drm_intel_bo * 867drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 868 const char *name, 869 unsigned long size, 870 unsigned int alignment) 871{ 872 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 873 I915_TILING_NONE, 0, 0); 874} 875 876static drm_intel_bo * 877drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 878 int x, int y, int cpp, uint32_t *tiling_mode, 879 unsigned long *pitch, unsigned long flags) 880{ 881 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 882 unsigned long size, stride; 883 uint32_t tiling; 884 885 do { 886 unsigned long aligned_y, height_alignment; 887 888 tiling = *tiling_mode; 889 890 /* If we're tiled, our allocations are in 8 or 32-row blocks, 891 * so failure to align our height means that we won't allocate 892 * enough pages. 893 * 894 * If we're untiled, we still have to align to 2 rows high 895 * because the data port accesses 2x2 blocks even if the 896 * bottom row isn't to be rendered, so failure to align means 897 * we could walk off the end of the GTT and fault. This is 898 * documented on 965, and may be the case on older chipsets 899 * too so we try to be careful. 900 */ 901 aligned_y = y; 902 height_alignment = 2; 903 904 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 905 height_alignment = 16; 906 else if (tiling == I915_TILING_X 907 || (IS_915(bufmgr_gem->pci_device) 908 && tiling == I915_TILING_Y)) 909 height_alignment = 8; 910 else if (tiling == I915_TILING_Y) 911 height_alignment = 32; 912 aligned_y = ALIGN(y, height_alignment); 913 914 stride = x * cpp; 915 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 916 size = stride * aligned_y; 917 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 918 } while (*tiling_mode != tiling); 919 *pitch = stride; 920 921 if (tiling == I915_TILING_NONE) 922 stride = 0; 923 924 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 925 tiling, stride, 0); 926} 927 928static drm_intel_bo * 929drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 930 const char *name, 931 void *addr, 932 uint32_t tiling_mode, 933 uint32_t stride, 934 unsigned long size, 935 unsigned long flags) 936{ 937 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 938 drm_intel_bo_gem *bo_gem; 939 int ret; 940 struct drm_i915_gem_userptr userptr; 941 942 /* Tiling with userptr surfaces is not supported 943 * on all hardware so refuse it for time being. 944 */ 945 if (tiling_mode != I915_TILING_NONE) 946 return NULL; 947 948 bo_gem = calloc(1, sizeof(*bo_gem)); 949 if (!bo_gem) 950 return NULL; 951 952 atomic_set(&bo_gem->refcount, 1); 953 DRMINITLISTHEAD(&bo_gem->vma_list); 954 955 bo_gem->bo.size = size; 956 957 memclear(userptr); 958 userptr.user_ptr = (__u64)((unsigned long)addr); 959 userptr.user_size = size; 960 userptr.flags = flags; 961 962 ret = drmIoctl(bufmgr_gem->fd, 963 DRM_IOCTL_I915_GEM_USERPTR, 964 &userptr); 965 if (ret != 0) { 966 DBG("bo_create_userptr: " 967 "ioctl failed with user ptr %p size 0x%lx, " 968 "user flags 0x%lx\n", addr, size, flags); 969 free(bo_gem); 970 return NULL; 971 } 972 973 pthread_mutex_lock(&bufmgr_gem->lock); 974 975 bo_gem->gem_handle = userptr.handle; 976 bo_gem->bo.handle = bo_gem->gem_handle; 977 bo_gem->bo.bufmgr = bufmgr; 978 bo_gem->is_userptr = true; 979 bo_gem->bo.virtual = addr; 980 /* Save the address provided by user */ 981 bo_gem->user_virtual = addr; 982 bo_gem->tiling_mode = I915_TILING_NONE; 983 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 984 bo_gem->stride = 0; 985 986 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 987 gem_handle, sizeof(bo_gem->gem_handle), 988 bo_gem); 989 990 bo_gem->name = name; 991 bo_gem->validate_index = -1; 992 bo_gem->reloc_tree_fences = 0; 993 bo_gem->used_as_reloc_target = false; 994 bo_gem->has_error = false; 995 bo_gem->reusable = false; 996 997 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 998 pthread_mutex_unlock(&bufmgr_gem->lock); 999 1000 DBG("bo_create_userptr: " 1001 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", 1002 addr, bo_gem->gem_handle, bo_gem->name, 1003 size, stride, tiling_mode); 1004 1005 return &bo_gem->bo; 1006} 1007 1008static bool 1009has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) 1010{ 1011 int ret; 1012 void *ptr; 1013 long pgsz; 1014 struct drm_i915_gem_userptr userptr; 1015 1016 pgsz = sysconf(_SC_PAGESIZE); 1017 assert(pgsz > 0); 1018 1019 ret = posix_memalign(&ptr, pgsz, pgsz); 1020 if (ret) { 1021 DBG("Failed to get a page (%ld) for userptr detection!\n", 1022 pgsz); 1023 return false; 1024 } 1025 1026 memclear(userptr); 1027 userptr.user_ptr = (__u64)(unsigned long)ptr; 1028 userptr.user_size = pgsz; 1029 1030retry: 1031 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); 1032 if (ret) { 1033 if (errno == ENODEV && userptr.flags == 0) { 1034 userptr.flags = I915_USERPTR_UNSYNCHRONIZED; 1035 goto retry; 1036 } 1037 free(ptr); 1038 return false; 1039 } 1040 1041 /* We don't release the userptr bo here as we want to keep the 1042 * kernel mm tracking alive for our lifetime. The first time we 1043 * create a userptr object the kernel has to install a mmu_notifer 1044 * which is a heavyweight operation (e.g. it requires taking all 1045 * mm_locks and stop_machine()). 1046 */ 1047 1048 bufmgr_gem->userptr_active.ptr = ptr; 1049 bufmgr_gem->userptr_active.handle = userptr.handle; 1050 1051 return true; 1052} 1053 1054static drm_intel_bo * 1055check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 1056 const char *name, 1057 void *addr, 1058 uint32_t tiling_mode, 1059 uint32_t stride, 1060 unsigned long size, 1061 unsigned long flags) 1062{ 1063 if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) 1064 bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; 1065 else 1066 bufmgr->bo_alloc_userptr = NULL; 1067 1068 return drm_intel_bo_alloc_userptr(bufmgr, name, addr, 1069 tiling_mode, stride, size, flags); 1070} 1071 1072static int get_tiling_mode(drm_intel_bufmgr_gem *bufmgr_gem, 1073 uint32_t gem_handle, 1074 uint32_t *tiling_mode, 1075 uint32_t *swizzle_mode) 1076{ 1077 struct drm_i915_gem_get_tiling get_tiling = { 1078 .handle = gem_handle, 1079 }; 1080 int ret; 1081 1082 ret = drmIoctl(bufmgr_gem->fd, 1083 DRM_IOCTL_I915_GEM_GET_TILING, 1084 &get_tiling); 1085 if (ret != 0 && errno != EOPNOTSUPP) 1086 return ret; 1087 1088 *tiling_mode = get_tiling.tiling_mode; 1089 *swizzle_mode = get_tiling.swizzle_mode; 1090 1091 return 0; 1092} 1093 1094/** 1095 * Returns a drm_intel_bo wrapping the given buffer object handle. 1096 * 1097 * This can be used when one application needs to pass a buffer object 1098 * to another. 1099 */ 1100drm_public drm_intel_bo * 1101drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 1102 const char *name, 1103 unsigned int handle) 1104{ 1105 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1106 drm_intel_bo_gem *bo_gem; 1107 int ret; 1108 struct drm_gem_open open_arg; 1109 1110 /* At the moment most applications only have a few named bo. 1111 * For instance, in a DRI client only the render buffers passed 1112 * between X and the client are named. And since X returns the 1113 * alternating names for the front/back buffer a linear search 1114 * provides a sufficiently fast match. 1115 */ 1116 pthread_mutex_lock(&bufmgr_gem->lock); 1117 HASH_FIND(name_hh, bufmgr_gem->name_table, 1118 &handle, sizeof(handle), bo_gem); 1119 if (bo_gem) { 1120 drm_intel_gem_bo_reference(&bo_gem->bo); 1121 goto out; 1122 } 1123 1124 memclear(open_arg); 1125 open_arg.name = handle; 1126 ret = drmIoctl(bufmgr_gem->fd, 1127 DRM_IOCTL_GEM_OPEN, 1128 &open_arg); 1129 if (ret != 0) { 1130 DBG("Couldn't reference %s handle 0x%08x: %s\n", 1131 name, handle, strerror(errno)); 1132 bo_gem = NULL; 1133 goto out; 1134 } 1135 /* Now see if someone has used a prime handle to get this 1136 * object from the kernel before by looking through the list 1137 * again for a matching gem_handle 1138 */ 1139 HASH_FIND(handle_hh, bufmgr_gem->handle_table, 1140 &open_arg.handle, sizeof(open_arg.handle), bo_gem); 1141 if (bo_gem) { 1142 drm_intel_gem_bo_reference(&bo_gem->bo); 1143 goto out; 1144 } 1145 1146 bo_gem = calloc(1, sizeof(*bo_gem)); 1147 if (!bo_gem) 1148 goto out; 1149 1150 atomic_set(&bo_gem->refcount, 1); 1151 DRMINITLISTHEAD(&bo_gem->vma_list); 1152 1153 bo_gem->bo.size = open_arg.size; 1154 bo_gem->bo.offset = 0; 1155 bo_gem->bo.offset64 = 0; 1156 bo_gem->bo.virtual = NULL; 1157 bo_gem->bo.bufmgr = bufmgr; 1158 bo_gem->name = name; 1159 bo_gem->validate_index = -1; 1160 bo_gem->gem_handle = open_arg.handle; 1161 bo_gem->bo.handle = open_arg.handle; 1162 bo_gem->global_name = handle; 1163 bo_gem->reusable = false; 1164 1165 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 1166 gem_handle, sizeof(bo_gem->gem_handle), bo_gem); 1167 HASH_ADD(name_hh, bufmgr_gem->name_table, 1168 global_name, sizeof(bo_gem->global_name), bo_gem); 1169 1170 ret = get_tiling_mode(bufmgr_gem, bo_gem->gem_handle, 1171 &bo_gem->tiling_mode, &bo_gem->swizzle_mode); 1172 if (ret != 0) 1173 goto err_unref; 1174 1175 /* XXX stride is unknown */ 1176 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 1177 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 1178 1179out: 1180 pthread_mutex_unlock(&bufmgr_gem->lock); 1181 return &bo_gem->bo; 1182 1183err_unref: 1184 drm_intel_gem_bo_free(&bo_gem->bo); 1185 pthread_mutex_unlock(&bufmgr_gem->lock); 1186 return NULL; 1187} 1188 1189static void 1190drm_intel_gem_bo_free(drm_intel_bo *bo) 1191{ 1192 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1193 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1194 struct drm_gem_close close; 1195 int ret; 1196 1197 DRMLISTDEL(&bo_gem->vma_list); 1198 if (bo_gem->mem_virtual) { 1199 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 1200 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1201 bufmgr_gem->vma_count--; 1202 } 1203 if (bo_gem->wc_virtual) { 1204 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0)); 1205 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); 1206 bufmgr_gem->vma_count--; 1207 } 1208 if (bo_gem->gtt_virtual) { 1209 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1210 bufmgr_gem->vma_count--; 1211 } 1212 1213 if (bo_gem->global_name) 1214 HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem); 1215 HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem); 1216 1217 /* Close this object */ 1218 memclear(close); 1219 close.handle = bo_gem->gem_handle; 1220 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 1221 if (ret != 0) { 1222 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 1223 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 1224 } 1225 free(bo); 1226} 1227 1228static void 1229drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 1230{ 1231#if HAVE_VALGRIND 1232 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1233 1234 if (bo_gem->mem_virtual) 1235 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 1236 1237 if (bo_gem->wc_virtual) 1238 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size); 1239 1240 if (bo_gem->gtt_virtual) 1241 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 1242#endif 1243} 1244 1245/** Frees all cached buffers significantly older than @time. */ 1246static void 1247drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 1248{ 1249 int i; 1250 1251 if (bufmgr_gem->time == time) 1252 return; 1253 1254 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1255 struct drm_intel_gem_bo_bucket *bucket = 1256 &bufmgr_gem->cache_bucket[i]; 1257 1258 while (!DRMLISTEMPTY(&bucket->head)) { 1259 drm_intel_bo_gem *bo_gem; 1260 1261 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1262 bucket->head.next, head); 1263 if (time - bo_gem->free_time <= 1) 1264 break; 1265 1266 DRMLISTDEL(&bo_gem->head); 1267 1268 drm_intel_gem_bo_free(&bo_gem->bo); 1269 } 1270 } 1271 1272 bufmgr_gem->time = time; 1273} 1274 1275static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 1276{ 1277 int limit; 1278 1279 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 1280 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 1281 1282 if (bufmgr_gem->vma_max < 0) 1283 return; 1284 1285 /* We may need to evict a few entries in order to create new mmaps */ 1286 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 1287 if (limit < 0) 1288 limit = 0; 1289 1290 while (bufmgr_gem->vma_count > limit) { 1291 drm_intel_bo_gem *bo_gem; 1292 1293 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1294 bufmgr_gem->vma_cache.next, 1295 vma_list); 1296 assert(bo_gem->map_count == 0); 1297 DRMLISTDELINIT(&bo_gem->vma_list); 1298 1299 if (bo_gem->mem_virtual) { 1300 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1301 bo_gem->mem_virtual = NULL; 1302 bufmgr_gem->vma_count--; 1303 } 1304 if (bo_gem->wc_virtual) { 1305 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); 1306 bo_gem->wc_virtual = NULL; 1307 bufmgr_gem->vma_count--; 1308 } 1309 if (bo_gem->gtt_virtual) { 1310 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1311 bo_gem->gtt_virtual = NULL; 1312 bufmgr_gem->vma_count--; 1313 } 1314 } 1315} 1316 1317static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1318 drm_intel_bo_gem *bo_gem) 1319{ 1320 bufmgr_gem->vma_open--; 1321 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 1322 if (bo_gem->mem_virtual) 1323 bufmgr_gem->vma_count++; 1324 if (bo_gem->wc_virtual) 1325 bufmgr_gem->vma_count++; 1326 if (bo_gem->gtt_virtual) 1327 bufmgr_gem->vma_count++; 1328 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1329} 1330 1331static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1332 drm_intel_bo_gem *bo_gem) 1333{ 1334 bufmgr_gem->vma_open++; 1335 DRMLISTDEL(&bo_gem->vma_list); 1336 if (bo_gem->mem_virtual) 1337 bufmgr_gem->vma_count--; 1338 if (bo_gem->wc_virtual) 1339 bufmgr_gem->vma_count--; 1340 if (bo_gem->gtt_virtual) 1341 bufmgr_gem->vma_count--; 1342 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1343} 1344 1345static void 1346drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 1347{ 1348 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1349 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1350 struct drm_intel_gem_bo_bucket *bucket; 1351 int i; 1352 1353 /* Unreference all the target buffers */ 1354 for (i = 0; i < bo_gem->reloc_count; i++) { 1355 if (bo_gem->reloc_target_info[i].bo != bo) { 1356 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1357 reloc_target_info[i].bo, 1358 time); 1359 } 1360 } 1361 for (i = 0; i < bo_gem->softpin_target_count; i++) 1362 drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], 1363 time); 1364 bo_gem->kflags = 0; 1365 bo_gem->reloc_count = 0; 1366 bo_gem->used_as_reloc_target = false; 1367 bo_gem->softpin_target_count = 0; 1368 1369 DBG("bo_unreference final: %d (%s)\n", 1370 bo_gem->gem_handle, bo_gem->name); 1371 1372 /* release memory associated with this object */ 1373 if (bo_gem->reloc_target_info) { 1374 free(bo_gem->reloc_target_info); 1375 bo_gem->reloc_target_info = NULL; 1376 } 1377 if (bo_gem->relocs) { 1378 free(bo_gem->relocs); 1379 bo_gem->relocs = NULL; 1380 } 1381 if (bo_gem->softpin_target) { 1382 free(bo_gem->softpin_target); 1383 bo_gem->softpin_target = NULL; 1384 bo_gem->softpin_target_size = 0; 1385 } 1386 1387 /* Clear any left-over mappings */ 1388 if (bo_gem->map_count) { 1389 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 1390 bo_gem->map_count = 0; 1391 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1392 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1393 } 1394 1395 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 1396 /* Put the buffer into our internal cache for reuse if we can. */ 1397 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 1398 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 1399 I915_MADV_DONTNEED)) { 1400 bo_gem->free_time = time; 1401 1402 bo_gem->name = NULL; 1403 bo_gem->validate_index = -1; 1404 1405 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 1406 } else { 1407 drm_intel_gem_bo_free(bo); 1408 } 1409} 1410 1411static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 1412 time_t time) 1413{ 1414 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1415 1416 assert(atomic_read(&bo_gem->refcount) > 0); 1417 if (atomic_dec_and_test(&bo_gem->refcount)) 1418 drm_intel_gem_bo_unreference_final(bo, time); 1419} 1420 1421static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 1422{ 1423 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1424 1425 assert(atomic_read(&bo_gem->refcount) > 0); 1426 1427 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) { 1428 drm_intel_bufmgr_gem *bufmgr_gem = 1429 (drm_intel_bufmgr_gem *) bo->bufmgr; 1430 struct timespec time; 1431 1432 clock_gettime(CLOCK_MONOTONIC, &time); 1433 1434 pthread_mutex_lock(&bufmgr_gem->lock); 1435 1436 if (atomic_dec_and_test(&bo_gem->refcount)) { 1437 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1438 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1439 } 1440 1441 pthread_mutex_unlock(&bufmgr_gem->lock); 1442 } 1443} 1444 1445static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1446{ 1447 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1448 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1449 struct drm_i915_gem_set_domain set_domain; 1450 int ret; 1451 1452 if (bo_gem->is_userptr) { 1453 /* Return the same user ptr */ 1454 bo->virtual = bo_gem->user_virtual; 1455 return 0; 1456 } 1457 1458 pthread_mutex_lock(&bufmgr_gem->lock); 1459 1460 if (bo_gem->map_count++ == 0) 1461 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1462 1463 if (!bo_gem->mem_virtual) { 1464 struct drm_i915_gem_mmap mmap_arg; 1465 1466 DBG("bo_map: %d (%s), map_count=%d\n", 1467 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1468 1469 memclear(mmap_arg); 1470 mmap_arg.handle = bo_gem->gem_handle; 1471 mmap_arg.size = bo->size; 1472 ret = drmIoctl(bufmgr_gem->fd, 1473 DRM_IOCTL_I915_GEM_MMAP, 1474 &mmap_arg); 1475 if (ret != 0) { 1476 ret = -errno; 1477 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1478 __FILE__, __LINE__, bo_gem->gem_handle, 1479 bo_gem->name, strerror(errno)); 1480 if (--bo_gem->map_count == 0) 1481 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1482 pthread_mutex_unlock(&bufmgr_gem->lock); 1483 return ret; 1484 } 1485 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 1486 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1487 } 1488 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1489 bo_gem->mem_virtual); 1490 bo->virtual = bo_gem->mem_virtual; 1491 1492 memclear(set_domain); 1493 set_domain.handle = bo_gem->gem_handle; 1494 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1495 if (write_enable) 1496 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1497 else 1498 set_domain.write_domain = 0; 1499 ret = drmIoctl(bufmgr_gem->fd, 1500 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1501 &set_domain); 1502 if (ret != 0) { 1503 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1504 __FILE__, __LINE__, bo_gem->gem_handle, 1505 strerror(errno)); 1506 } 1507 1508 if (write_enable) 1509 bo_gem->mapped_cpu_write = true; 1510 1511 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1512 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 1513 pthread_mutex_unlock(&bufmgr_gem->lock); 1514 1515 return 0; 1516} 1517 1518static int 1519map_gtt(drm_intel_bo *bo) 1520{ 1521 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1522 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1523 int ret; 1524 1525 if (bo_gem->is_userptr) 1526 return -EINVAL; 1527 1528 if (bo_gem->map_count++ == 0) 1529 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1530 1531 /* Get a mapping of the buffer if we haven't before. */ 1532 if (bo_gem->gtt_virtual == NULL) { 1533 struct drm_i915_gem_mmap_gtt mmap_arg; 1534 1535 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 1536 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1537 1538 memclear(mmap_arg); 1539 mmap_arg.handle = bo_gem->gem_handle; 1540 1541 /* Get the fake offset back... */ 1542 ret = drmIoctl(bufmgr_gem->fd, 1543 DRM_IOCTL_I915_GEM_MMAP_GTT, 1544 &mmap_arg); 1545 if (ret != 0) { 1546 ret = -errno; 1547 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1548 __FILE__, __LINE__, 1549 bo_gem->gem_handle, bo_gem->name, 1550 strerror(errno)); 1551 if (--bo_gem->map_count == 0) 1552 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1553 return ret; 1554 } 1555 1556 /* and mmap it */ 1557 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, 1558 MAP_SHARED, bufmgr_gem->fd, 1559 mmap_arg.offset); 1560 if (bo_gem->gtt_virtual == MAP_FAILED) { 1561 bo_gem->gtt_virtual = NULL; 1562 ret = -errno; 1563 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1564 __FILE__, __LINE__, 1565 bo_gem->gem_handle, bo_gem->name, 1566 strerror(errno)); 1567 if (--bo_gem->map_count == 0) 1568 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1569 return ret; 1570 } 1571 } 1572 1573 bo->virtual = bo_gem->gtt_virtual; 1574 1575 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1576 bo_gem->gtt_virtual); 1577 1578 return 0; 1579} 1580 1581drm_public int 1582drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1583{ 1584 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1585 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1586 struct drm_i915_gem_set_domain set_domain; 1587 int ret; 1588 1589 pthread_mutex_lock(&bufmgr_gem->lock); 1590 1591 ret = map_gtt(bo); 1592 if (ret) { 1593 pthread_mutex_unlock(&bufmgr_gem->lock); 1594 return ret; 1595 } 1596 1597 /* Now move it to the GTT domain so that the GPU and CPU 1598 * caches are flushed and the GPU isn't actively using the 1599 * buffer. 1600 * 1601 * The pagefault handler does this domain change for us when 1602 * it has unbound the BO from the GTT, but it's up to us to 1603 * tell it when we're about to use things if we had done 1604 * rendering and it still happens to be bound to the GTT. 1605 */ 1606 memclear(set_domain); 1607 set_domain.handle = bo_gem->gem_handle; 1608 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1609 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1610 ret = drmIoctl(bufmgr_gem->fd, 1611 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1612 &set_domain); 1613 if (ret != 0) { 1614 DBG("%s:%d: Error setting domain %d: %s\n", 1615 __FILE__, __LINE__, bo_gem->gem_handle, 1616 strerror(errno)); 1617 } 1618 1619 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1620 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1621 pthread_mutex_unlock(&bufmgr_gem->lock); 1622 1623 return 0; 1624} 1625 1626/** 1627 * Performs a mapping of the buffer object like the normal GTT 1628 * mapping, but avoids waiting for the GPU to be done reading from or 1629 * rendering to the buffer. 1630 * 1631 * This is used in the implementation of GL_ARB_map_buffer_range: The 1632 * user asks to create a buffer, then does a mapping, fills some 1633 * space, runs a drawing command, then asks to map it again without 1634 * synchronizing because it guarantees that it won't write over the 1635 * data that the GPU is busy using (or, more specifically, that if it 1636 * does write over the data, it acknowledges that rendering is 1637 * undefined). 1638 */ 1639 1640drm_public int 1641drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 1642{ 1643 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1644#if HAVE_VALGRIND 1645 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1646#endif 1647 int ret; 1648 1649 /* If the CPU cache isn't coherent with the GTT, then use a 1650 * regular synchronized mapping. The problem is that we don't 1651 * track where the buffer was last used on the CPU side in 1652 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 1653 * we would potentially corrupt the buffer even when the user 1654 * does reasonable things. 1655 */ 1656 if (!bufmgr_gem->has_llc) 1657 return drm_intel_gem_bo_map_gtt(bo); 1658 1659 pthread_mutex_lock(&bufmgr_gem->lock); 1660 1661 ret = map_gtt(bo); 1662 if (ret == 0) { 1663 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1664 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1665 } 1666 1667 pthread_mutex_unlock(&bufmgr_gem->lock); 1668 1669 return ret; 1670} 1671 1672static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1673{ 1674 drm_intel_bufmgr_gem *bufmgr_gem; 1675 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1676 int ret = 0; 1677 1678 if (bo == NULL) 1679 return 0; 1680 1681 if (bo_gem->is_userptr) 1682 return 0; 1683 1684 bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1685 1686 pthread_mutex_lock(&bufmgr_gem->lock); 1687 1688 if (bo_gem->map_count <= 0) { 1689 DBG("attempted to unmap an unmapped bo\n"); 1690 pthread_mutex_unlock(&bufmgr_gem->lock); 1691 /* Preserve the old behaviour of just treating this as a 1692 * no-op rather than reporting the error. 1693 */ 1694 return 0; 1695 } 1696 1697 if (bo_gem->mapped_cpu_write) { 1698 struct drm_i915_gem_sw_finish sw_finish; 1699 1700 /* Cause a flush to happen if the buffer's pinned for 1701 * scanout, so the results show up in a timely manner. 1702 * Unlike GTT set domains, this only does work if the 1703 * buffer should be scanout-related. 1704 */ 1705 memclear(sw_finish); 1706 sw_finish.handle = bo_gem->gem_handle; 1707 ret = drmIoctl(bufmgr_gem->fd, 1708 DRM_IOCTL_I915_GEM_SW_FINISH, 1709 &sw_finish); 1710 ret = ret == -1 ? -errno : 0; 1711 1712 bo_gem->mapped_cpu_write = false; 1713 } 1714 1715 /* We need to unmap after every innovation as we cannot track 1716 * an open vma for every bo as that will exhaust the system 1717 * limits and cause later failures. 1718 */ 1719 if (--bo_gem->map_count == 0) { 1720 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1721 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1722 bo->virtual = NULL; 1723 } 1724 pthread_mutex_unlock(&bufmgr_gem->lock); 1725 1726 return ret; 1727} 1728 1729drm_public int 1730drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1731{ 1732 return drm_intel_gem_bo_unmap(bo); 1733} 1734 1735static int 1736drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1737 unsigned long size, const void *data) 1738{ 1739 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1740 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1741 struct drm_i915_gem_pwrite pwrite; 1742 int ret; 1743 1744 if (bo_gem->is_userptr) 1745 return -EINVAL; 1746 1747 memclear(pwrite); 1748 pwrite.handle = bo_gem->gem_handle; 1749 pwrite.offset = offset; 1750 pwrite.size = size; 1751 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1752 ret = drmIoctl(bufmgr_gem->fd, 1753 DRM_IOCTL_I915_GEM_PWRITE, 1754 &pwrite); 1755 if (ret != 0) { 1756 ret = -errno; 1757 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1758 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1759 (int)size, strerror(errno)); 1760 } 1761 1762 return ret; 1763} 1764 1765static int 1766drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1767{ 1768 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1769 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1770 int ret; 1771 1772 memclear(get_pipe_from_crtc_id); 1773 get_pipe_from_crtc_id.crtc_id = crtc_id; 1774 ret = drmIoctl(bufmgr_gem->fd, 1775 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1776 &get_pipe_from_crtc_id); 1777 if (ret != 0) { 1778 /* We return -1 here to signal that we don't 1779 * know which pipe is associated with this crtc. 1780 * This lets the caller know that this information 1781 * isn't available; using the wrong pipe for 1782 * vblank waiting can cause the chipset to lock up 1783 */ 1784 return -1; 1785 } 1786 1787 return get_pipe_from_crtc_id.pipe; 1788} 1789 1790static int 1791drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1792 unsigned long size, void *data) 1793{ 1794 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1795 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1796 struct drm_i915_gem_pread pread; 1797 int ret; 1798 1799 if (bo_gem->is_userptr) 1800 return -EINVAL; 1801 1802 memclear(pread); 1803 pread.handle = bo_gem->gem_handle; 1804 pread.offset = offset; 1805 pread.size = size; 1806 pread.data_ptr = (uint64_t) (uintptr_t) data; 1807 ret = drmIoctl(bufmgr_gem->fd, 1808 DRM_IOCTL_I915_GEM_PREAD, 1809 &pread); 1810 if (ret != 0) { 1811 ret = -errno; 1812 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1813 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1814 (int)size, strerror(errno)); 1815 } 1816 1817 return ret; 1818} 1819 1820/** Waits for all GPU rendering with the object to have completed. */ 1821static void 1822drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1823{ 1824 drm_intel_gem_bo_start_gtt_access(bo, 1); 1825} 1826 1827/** 1828 * Waits on a BO for the given amount of time. 1829 * 1830 * @bo: buffer object to wait for 1831 * @timeout_ns: amount of time to wait in nanoseconds. 1832 * If value is less than 0, an infinite wait will occur. 1833 * 1834 * Returns 0 if the wait was successful ie. the last batch referencing the 1835 * object has completed within the allotted time. Otherwise some negative return 1836 * value describes the error. Of particular interest is -ETIME when the wait has 1837 * failed to yield the desired result. 1838 * 1839 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 1840 * the operation to give up after a certain amount of time. Another subtle 1841 * difference is the internal locking semantics are different (this variant does 1842 * not hold the lock for the duration of the wait). This makes the wait subject 1843 * to a larger userspace race window. 1844 * 1845 * The implementation shall wait until the object is no longer actively 1846 * referenced within a batch buffer at the time of the call. The wait will 1847 * not guarantee that the buffer is re-issued via another thread, or an flinked 1848 * handle. Userspace must make sure this race does not occur if such precision 1849 * is important. 1850 * 1851 * Note that some kernels have broken the inifite wait for negative values 1852 * promise, upgrade to latest stable kernels if this is the case. 1853 */ 1854drm_public int 1855drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 1856{ 1857 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1858 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1859 struct drm_i915_gem_wait wait; 1860 int ret; 1861 1862 if (!bufmgr_gem->has_wait_timeout) { 1863 DBG("%s:%d: Timed wait is not supported. Falling back to " 1864 "infinite wait\n", __FILE__, __LINE__); 1865 if (timeout_ns) { 1866 drm_intel_gem_bo_wait_rendering(bo); 1867 return 0; 1868 } else { 1869 return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 1870 } 1871 } 1872 1873 memclear(wait); 1874 wait.bo_handle = bo_gem->gem_handle; 1875 wait.timeout_ns = timeout_ns; 1876 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1877 if (ret == -1) 1878 return -errno; 1879 1880 return ret; 1881} 1882 1883/** 1884 * Sets the object to the GTT read and possibly write domain, used by the X 1885 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1886 * 1887 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1888 * can do tiled pixmaps this way. 1889 */ 1890drm_public void 1891drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1892{ 1893 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1894 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1895 struct drm_i915_gem_set_domain set_domain; 1896 int ret; 1897 1898 memclear(set_domain); 1899 set_domain.handle = bo_gem->gem_handle; 1900 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1901 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1902 ret = drmIoctl(bufmgr_gem->fd, 1903 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1904 &set_domain); 1905 if (ret != 0) { 1906 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1907 __FILE__, __LINE__, bo_gem->gem_handle, 1908 set_domain.read_domains, set_domain.write_domain, 1909 strerror(errno)); 1910 } 1911} 1912 1913static void 1914drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1915{ 1916 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1917 struct drm_gem_close close_bo; 1918 int i, ret; 1919 1920 free(bufmgr_gem->exec2_objects); 1921 free(bufmgr_gem->exec_objects); 1922 free(bufmgr_gem->exec_bos); 1923 1924 pthread_mutex_destroy(&bufmgr_gem->lock); 1925 1926 /* Free any cached buffer objects we were going to reuse */ 1927 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1928 struct drm_intel_gem_bo_bucket *bucket = 1929 &bufmgr_gem->cache_bucket[i]; 1930 drm_intel_bo_gem *bo_gem; 1931 1932 while (!DRMLISTEMPTY(&bucket->head)) { 1933 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1934 bucket->head.next, head); 1935 DRMLISTDEL(&bo_gem->head); 1936 1937 drm_intel_gem_bo_free(&bo_gem->bo); 1938 } 1939 } 1940 1941 /* Release userptr bo kept hanging around for optimisation. */ 1942 if (bufmgr_gem->userptr_active.ptr) { 1943 memclear(close_bo); 1944 close_bo.handle = bufmgr_gem->userptr_active.handle; 1945 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); 1946 free(bufmgr_gem->userptr_active.ptr); 1947 if (ret) 1948 fprintf(stderr, 1949 "Failed to release test userptr object! (%d) " 1950 "i915 kernel driver may not be sane!\n", errno); 1951 } 1952 1953 free(bufmgr); 1954} 1955 1956/** 1957 * Adds the target buffer to the validation list and adds the relocation 1958 * to the reloc_buffer's relocation list. 1959 * 1960 * The relocation entry at the given offset must already contain the 1961 * precomputed relocation value, because the kernel will optimize out 1962 * the relocation entry write when the buffer hasn't moved from the 1963 * last known offset in target_bo. 1964 */ 1965static int 1966do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1967 drm_intel_bo *target_bo, uint32_t target_offset, 1968 uint32_t read_domains, uint32_t write_domain, 1969 bool need_fence) 1970{ 1971 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1972 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1973 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1974 bool fenced_command; 1975 1976 if (bo_gem->has_error) 1977 return -ENOMEM; 1978 1979 if (target_bo_gem->has_error) { 1980 bo_gem->has_error = true; 1981 return -ENOMEM; 1982 } 1983 1984 /* We never use HW fences for rendering on 965+ */ 1985 if (bufmgr_gem->gen >= 4) 1986 need_fence = false; 1987 1988 fenced_command = need_fence; 1989 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1990 need_fence = false; 1991 1992 /* Create a new relocation list if needed */ 1993 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1994 return -ENOMEM; 1995 1996 /* Check overflow */ 1997 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1998 1999 /* Check args */ 2000 assert(offset <= bo->size - 4); 2001 assert((write_domain & (write_domain - 1)) == 0); 2002 2003 /* An object needing a fence is a tiled buffer, so it won't have 2004 * relocs to other buffers. 2005 */ 2006 if (need_fence) { 2007 assert(target_bo_gem->reloc_count == 0); 2008 target_bo_gem->reloc_tree_fences = 1; 2009 } 2010 2011 /* Make sure that we're not adding a reloc to something whose size has 2012 * already been accounted for. 2013 */ 2014 assert(!bo_gem->used_as_reloc_target); 2015 if (target_bo_gem != bo_gem) { 2016 target_bo_gem->used_as_reloc_target = true; 2017 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 2018 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 2019 } 2020 2021 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 2022 if (target_bo != bo) 2023 drm_intel_gem_bo_reference(target_bo); 2024 if (fenced_command) 2025 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 2026 DRM_INTEL_RELOC_FENCE; 2027 else 2028 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 2029 2030 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 2031 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 2032 bo_gem->relocs[bo_gem->reloc_count].target_handle = 2033 target_bo_gem->gem_handle; 2034 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 2035 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 2036 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 2037 bo_gem->reloc_count++; 2038 2039 return 0; 2040} 2041 2042static void 2043drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) 2044{ 2045 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2046 2047 if (enable) 2048 bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 2049 else 2050 bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 2051} 2052 2053static int 2054drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo) 2055{ 2056 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2057 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2058 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2059 if (bo_gem->has_error) 2060 return -ENOMEM; 2061 2062 if (target_bo_gem->has_error) { 2063 bo_gem->has_error = true; 2064 return -ENOMEM; 2065 } 2066 2067 if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED)) 2068 return -EINVAL; 2069 if (target_bo_gem == bo_gem) 2070 return -EINVAL; 2071 2072 if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) { 2073 int new_size = bo_gem->softpin_target_size * 2; 2074 if (new_size == 0) 2075 new_size = bufmgr_gem->max_relocs; 2076 2077 bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size * 2078 sizeof(drm_intel_bo *)); 2079 if (!bo_gem->softpin_target) 2080 return -ENOMEM; 2081 2082 bo_gem->softpin_target_size = new_size; 2083 } 2084 bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo; 2085 drm_intel_gem_bo_reference(target_bo); 2086 bo_gem->softpin_target_count++; 2087 2088 return 0; 2089} 2090 2091static int 2092drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 2093 drm_intel_bo *target_bo, uint32_t target_offset, 2094 uint32_t read_domains, uint32_t write_domain) 2095{ 2096 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2097 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; 2098 2099 if (target_bo_gem->kflags & EXEC_OBJECT_PINNED) 2100 return drm_intel_gem_bo_add_softpin_target(bo, target_bo); 2101 else 2102 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 2103 read_domains, write_domain, 2104 !bufmgr_gem->fenced_relocs); 2105} 2106 2107static int 2108drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 2109 drm_intel_bo *target_bo, 2110 uint32_t target_offset, 2111 uint32_t read_domains, uint32_t write_domain) 2112{ 2113 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 2114 read_domains, write_domain, true); 2115} 2116 2117drm_public int 2118drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 2119{ 2120 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2121 2122 return bo_gem->reloc_count; 2123} 2124 2125/** 2126 * Removes existing relocation entries in the BO after "start". 2127 * 2128 * This allows a user to avoid a two-step process for state setup with 2129 * counting up all the buffer objects and doing a 2130 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 2131 * relocations for the state setup. Instead, save the state of the 2132 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 2133 * state, and then check if it still fits in the aperture. 2134 * 2135 * Any further drm_intel_bufmgr_check_aperture_space() queries 2136 * involving this buffer in the tree are undefined after this call. 2137 * 2138 * This also removes all softpinned targets being referenced by the BO. 2139 */ 2140drm_public void 2141drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 2142{ 2143 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2144 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2145 int i; 2146 struct timespec time; 2147 2148 clock_gettime(CLOCK_MONOTONIC, &time); 2149 2150 assert(bo_gem->reloc_count >= start); 2151 2152 /* Unreference the cleared target buffers */ 2153 pthread_mutex_lock(&bufmgr_gem->lock); 2154 2155 for (i = start; i < bo_gem->reloc_count; i++) { 2156 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 2157 if (&target_bo_gem->bo != bo) { 2158 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 2159 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 2160 time.tv_sec); 2161 } 2162 } 2163 bo_gem->reloc_count = start; 2164 2165 for (i = 0; i < bo_gem->softpin_target_count; i++) { 2166 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i]; 2167 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); 2168 } 2169 bo_gem->softpin_target_count = 0; 2170 2171 pthread_mutex_unlock(&bufmgr_gem->lock); 2172 2173} 2174 2175/** 2176 * Walk the tree of relocations rooted at BO and accumulate the list of 2177 * validations to be performed and update the relocation buffers with 2178 * index values into the validation list. 2179 */ 2180static void 2181drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 2182{ 2183 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2184 int i; 2185 2186 if (bo_gem->relocs == NULL) 2187 return; 2188 2189 for (i = 0; i < bo_gem->reloc_count; i++) { 2190 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2191 2192 if (target_bo == bo) 2193 continue; 2194 2195 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2196 2197 /* Continue walking the tree depth-first. */ 2198 drm_intel_gem_bo_process_reloc(target_bo); 2199 2200 /* Add the target to the validate list */ 2201 drm_intel_add_validate_buffer(target_bo); 2202 } 2203} 2204 2205static void 2206drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 2207{ 2208 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2209 int i; 2210 2211 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) 2212 return; 2213 2214 for (i = 0; i < bo_gem->reloc_count; i++) { 2215 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2216 int need_fence; 2217 2218 if (target_bo == bo) 2219 continue; 2220 2221 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2222 2223 /* Continue walking the tree depth-first. */ 2224 drm_intel_gem_bo_process_reloc2(target_bo); 2225 2226 need_fence = (bo_gem->reloc_target_info[i].flags & 2227 DRM_INTEL_RELOC_FENCE); 2228 2229 /* Add the target to the validate list */ 2230 drm_intel_add_validate_buffer2(target_bo, need_fence); 2231 } 2232 2233 for (i = 0; i < bo_gem->softpin_target_count; i++) { 2234 drm_intel_bo *target_bo = bo_gem->softpin_target[i]; 2235 2236 if (target_bo == bo) 2237 continue; 2238 2239 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2240 drm_intel_gem_bo_process_reloc2(target_bo); 2241 drm_intel_add_validate_buffer2(target_bo, false); 2242 } 2243} 2244 2245 2246static void 2247drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 2248{ 2249 int i; 2250 2251 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2252 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2253 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2254 2255 /* Update the buffer offset */ 2256 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 2257 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", 2258 bo_gem->gem_handle, bo_gem->name, 2259 upper_32_bits(bo->offset64), 2260 lower_32_bits(bo->offset64), 2261 upper_32_bits(bufmgr_gem->exec_objects[i].offset), 2262 lower_32_bits(bufmgr_gem->exec_objects[i].offset)); 2263 bo->offset64 = bufmgr_gem->exec_objects[i].offset; 2264 bo->offset = bufmgr_gem->exec_objects[i].offset; 2265 } 2266 } 2267} 2268 2269static void 2270drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 2271{ 2272 int i; 2273 2274 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2275 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2276 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2277 2278 /* Update the buffer offset */ 2279 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 2280 /* If we're seeing softpinned object here it means that the kernel 2281 * has relocated our object... Indicating a programming error 2282 */ 2283 assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED)); 2284 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", 2285 bo_gem->gem_handle, bo_gem->name, 2286 upper_32_bits(bo->offset64), 2287 lower_32_bits(bo->offset64), 2288 upper_32_bits(bufmgr_gem->exec2_objects[i].offset), 2289 lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); 2290 bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 2291 bo->offset = bufmgr_gem->exec2_objects[i].offset; 2292 } 2293 } 2294} 2295 2296drm_public void 2297drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 2298 int x1, int y1, int width, int height, 2299 enum aub_dump_bmp_format format, 2300 int pitch, int offset) 2301{ 2302} 2303 2304static int 2305drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 2306 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 2307{ 2308 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2309 struct drm_i915_gem_execbuffer execbuf; 2310 int ret, i; 2311 2312 if (to_bo_gem(bo)->has_error) 2313 return -ENOMEM; 2314 2315 pthread_mutex_lock(&bufmgr_gem->lock); 2316 /* Update indices and set up the validate list. */ 2317 drm_intel_gem_bo_process_reloc(bo); 2318 2319 /* Add the batch buffer to the validation list. There are no 2320 * relocations pointing to it. 2321 */ 2322 drm_intel_add_validate_buffer(bo); 2323 2324 memclear(execbuf); 2325 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 2326 execbuf.buffer_count = bufmgr_gem->exec_count; 2327 execbuf.batch_start_offset = 0; 2328 execbuf.batch_len = used; 2329 execbuf.cliprects_ptr = (uintptr_t) cliprects; 2330 execbuf.num_cliprects = num_cliprects; 2331 execbuf.DR1 = 0; 2332 execbuf.DR4 = DR4; 2333 2334 ret = drmIoctl(bufmgr_gem->fd, 2335 DRM_IOCTL_I915_GEM_EXECBUFFER, 2336 &execbuf); 2337 if (ret != 0) { 2338 ret = -errno; 2339 if (errno == ENOSPC) { 2340 DBG("Execbuffer fails to pin. " 2341 "Estimate: %u. Actual: %u. Available: %u\n", 2342 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2343 bufmgr_gem-> 2344 exec_count), 2345 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2346 bufmgr_gem-> 2347 exec_count), 2348 (unsigned int)bufmgr_gem->gtt_size); 2349 } 2350 } 2351 drm_intel_update_buffer_offsets(bufmgr_gem); 2352 2353 if (bufmgr_gem->bufmgr.debug) 2354 drm_intel_gem_dump_validation_list(bufmgr_gem); 2355 2356 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2357 drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); 2358 2359 bo_gem->idle = false; 2360 2361 /* Disconnect the buffer from the validate list */ 2362 bo_gem->validate_index = -1; 2363 bufmgr_gem->exec_bos[i] = NULL; 2364 } 2365 bufmgr_gem->exec_count = 0; 2366 pthread_mutex_unlock(&bufmgr_gem->lock); 2367 2368 return ret; 2369} 2370 2371static int 2372do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 2373 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2374 int in_fence, int *out_fence, 2375 unsigned int flags) 2376{ 2377 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2378 struct drm_i915_gem_execbuffer2 execbuf; 2379 int ret = 0; 2380 int i; 2381 2382 if (to_bo_gem(bo)->has_error) 2383 return -ENOMEM; 2384 2385 switch (flags & 0x7) { 2386 default: 2387 return -EINVAL; 2388 case I915_EXEC_BLT: 2389 if (!bufmgr_gem->has_blt) 2390 return -EINVAL; 2391 break; 2392 case I915_EXEC_BSD: 2393 if (!bufmgr_gem->has_bsd) 2394 return -EINVAL; 2395 break; 2396 case I915_EXEC_VEBOX: 2397 if (!bufmgr_gem->has_vebox) 2398 return -EINVAL; 2399 break; 2400 case I915_EXEC_RENDER: 2401 case I915_EXEC_DEFAULT: 2402 break; 2403 } 2404 2405 pthread_mutex_lock(&bufmgr_gem->lock); 2406 /* Update indices and set up the validate list. */ 2407 drm_intel_gem_bo_process_reloc2(bo); 2408 2409 /* Add the batch buffer to the validation list. There are no relocations 2410 * pointing to it. 2411 */ 2412 drm_intel_add_validate_buffer2(bo, 0); 2413 2414 memclear(execbuf); 2415 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 2416 execbuf.buffer_count = bufmgr_gem->exec_count; 2417 execbuf.batch_start_offset = 0; 2418 execbuf.batch_len = used; 2419 execbuf.cliprects_ptr = (uintptr_t)cliprects; 2420 execbuf.num_cliprects = num_cliprects; 2421 execbuf.DR1 = 0; 2422 execbuf.DR4 = DR4; 2423 execbuf.flags = flags; 2424 if (ctx == NULL) 2425 i915_execbuffer2_set_context_id(execbuf, 0); 2426 else 2427 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 2428 execbuf.rsvd2 = 0; 2429 if (in_fence != -1) { 2430 execbuf.rsvd2 = in_fence; 2431 execbuf.flags |= I915_EXEC_FENCE_IN; 2432 } 2433 if (out_fence != NULL) { 2434 *out_fence = -1; 2435 execbuf.flags |= I915_EXEC_FENCE_OUT; 2436 } 2437 2438 if (bufmgr_gem->no_exec) 2439 goto skip_execution; 2440 2441 ret = drmIoctl(bufmgr_gem->fd, 2442 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, 2443 &execbuf); 2444 if (ret != 0) { 2445 ret = -errno; 2446 if (ret == -ENOSPC) { 2447 DBG("Execbuffer fails to pin. " 2448 "Estimate: %u. Actual: %u. Available: %u\n", 2449 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2450 bufmgr_gem->exec_count), 2451 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2452 bufmgr_gem->exec_count), 2453 (unsigned int) bufmgr_gem->gtt_size); 2454 } 2455 } 2456 drm_intel_update_buffer_offsets2(bufmgr_gem); 2457 2458 if (ret == 0 && out_fence != NULL) 2459 *out_fence = execbuf.rsvd2 >> 32; 2460 2461skip_execution: 2462 if (bufmgr_gem->bufmgr.debug) 2463 drm_intel_gem_dump_validation_list(bufmgr_gem); 2464 2465 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2466 drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); 2467 2468 bo_gem->idle = false; 2469 2470 /* Disconnect the buffer from the validate list */ 2471 bo_gem->validate_index = -1; 2472 bufmgr_gem->exec_bos[i] = NULL; 2473 } 2474 bufmgr_gem->exec_count = 0; 2475 pthread_mutex_unlock(&bufmgr_gem->lock); 2476 2477 return ret; 2478} 2479 2480static int 2481drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2482 drm_clip_rect_t *cliprects, int num_cliprects, 2483 int DR4) 2484{ 2485 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2486 -1, NULL, I915_EXEC_RENDER); 2487} 2488 2489static int 2490drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 2491 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2492 unsigned int flags) 2493{ 2494 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2495 -1, NULL, flags); 2496} 2497 2498drm_public int 2499drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 2500 int used, unsigned int flags) 2501{ 2502 return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags); 2503} 2504 2505drm_public int 2506drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, 2507 drm_intel_context *ctx, 2508 int used, 2509 int in_fence, 2510 int *out_fence, 2511 unsigned int flags) 2512{ 2513 return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags); 2514} 2515 2516static int 2517drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 2518{ 2519 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2520 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2521 struct drm_i915_gem_pin pin; 2522 int ret; 2523 2524 memclear(pin); 2525 pin.handle = bo_gem->gem_handle; 2526 pin.alignment = alignment; 2527 2528 ret = drmIoctl(bufmgr_gem->fd, 2529 DRM_IOCTL_I915_GEM_PIN, 2530 &pin); 2531 if (ret != 0) 2532 return -errno; 2533 2534 bo->offset64 = pin.offset; 2535 bo->offset = pin.offset; 2536 return 0; 2537} 2538 2539static int 2540drm_intel_gem_bo_unpin(drm_intel_bo *bo) 2541{ 2542 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2543 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2544 struct drm_i915_gem_unpin unpin; 2545 int ret; 2546 2547 memclear(unpin); 2548 unpin.handle = bo_gem->gem_handle; 2549 2550 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 2551 if (ret != 0) 2552 return -errno; 2553 2554 return 0; 2555} 2556 2557static int 2558drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2559 uint32_t tiling_mode, 2560 uint32_t stride) 2561{ 2562 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2563 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2564 struct drm_i915_gem_set_tiling set_tiling; 2565 int ret; 2566 2567 if (bo_gem->global_name == 0 && 2568 tiling_mode == bo_gem->tiling_mode && 2569 stride == bo_gem->stride) 2570 return 0; 2571 2572 memset(&set_tiling, 0, sizeof(set_tiling)); 2573 do { 2574 /* set_tiling is slightly broken and overwrites the 2575 * input on the error path, so we have to open code 2576 * rmIoctl. 2577 */ 2578 set_tiling.handle = bo_gem->gem_handle; 2579 set_tiling.tiling_mode = tiling_mode; 2580 set_tiling.stride = stride; 2581 2582 ret = ioctl(bufmgr_gem->fd, 2583 DRM_IOCTL_I915_GEM_SET_TILING, 2584 &set_tiling); 2585 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2586 if (ret == -1) 2587 return -errno; 2588 2589 bo_gem->tiling_mode = set_tiling.tiling_mode; 2590 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 2591 bo_gem->stride = set_tiling.stride; 2592 return 0; 2593} 2594 2595static int 2596drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2597 uint32_t stride) 2598{ 2599 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2600 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2601 int ret; 2602 2603 /* Tiling with userptr surfaces is not supported 2604 * on all hardware so refuse it for time being. 2605 */ 2606 if (bo_gem->is_userptr) 2607 return -EINVAL; 2608 2609 /* Linear buffers have no stride. By ensuring that we only ever use 2610 * stride 0 with linear buffers, we simplify our code. 2611 */ 2612 if (*tiling_mode == I915_TILING_NONE) 2613 stride = 0; 2614 2615 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 2616 if (ret == 0) 2617 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 2618 2619 *tiling_mode = bo_gem->tiling_mode; 2620 return ret; 2621} 2622 2623static int 2624drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2625 uint32_t * swizzle_mode) 2626{ 2627 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2628 2629 *tiling_mode = bo_gem->tiling_mode; 2630 *swizzle_mode = bo_gem->swizzle_mode; 2631 return 0; 2632} 2633 2634static int 2635drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) 2636{ 2637 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2638 2639 bo->offset64 = offset; 2640 bo->offset = offset; 2641 bo_gem->kflags |= EXEC_OBJECT_PINNED; 2642 2643 return 0; 2644} 2645 2646drm_public drm_intel_bo * 2647drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 2648{ 2649 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2650 int ret; 2651 uint32_t handle; 2652 drm_intel_bo_gem *bo_gem; 2653 2654 pthread_mutex_lock(&bufmgr_gem->lock); 2655 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 2656 if (ret) { 2657 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno)); 2658 pthread_mutex_unlock(&bufmgr_gem->lock); 2659 return NULL; 2660 } 2661 2662 /* 2663 * See if the kernel has already returned this buffer to us. Just as 2664 * for named buffers, we must not create two bo's pointing at the same 2665 * kernel object 2666 */ 2667 HASH_FIND(handle_hh, bufmgr_gem->handle_table, 2668 &handle, sizeof(handle), bo_gem); 2669 if (bo_gem) { 2670 drm_intel_gem_bo_reference(&bo_gem->bo); 2671 goto out; 2672 } 2673 2674 bo_gem = calloc(1, sizeof(*bo_gem)); 2675 if (!bo_gem) 2676 goto out; 2677 2678 atomic_set(&bo_gem->refcount, 1); 2679 DRMINITLISTHEAD(&bo_gem->vma_list); 2680 2681 /* Determine size of bo. The fd-to-handle ioctl really should 2682 * return the size, but it doesn't. If we have kernel 3.12 or 2683 * later, we can lseek on the prime fd to get the size. Older 2684 * kernels will just fail, in which case we fall back to the 2685 * provided (estimated or guess size). */ 2686 ret = lseek(prime_fd, 0, SEEK_END); 2687 if (ret != -1) 2688 bo_gem->bo.size = ret; 2689 else 2690 bo_gem->bo.size = size; 2691 2692 bo_gem->bo.handle = handle; 2693 bo_gem->bo.bufmgr = bufmgr; 2694 2695 bo_gem->gem_handle = handle; 2696 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 2697 gem_handle, sizeof(bo_gem->gem_handle), bo_gem); 2698 2699 bo_gem->name = "prime"; 2700 bo_gem->validate_index = -1; 2701 bo_gem->reloc_tree_fences = 0; 2702 bo_gem->used_as_reloc_target = false; 2703 bo_gem->has_error = false; 2704 bo_gem->reusable = false; 2705 2706 ret = get_tiling_mode(bufmgr_gem, handle, 2707 &bo_gem->tiling_mode, &bo_gem->swizzle_mode); 2708 if (ret) 2709 goto err; 2710 2711 /* XXX stride is unknown */ 2712 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 2713 2714out: 2715 pthread_mutex_unlock(&bufmgr_gem->lock); 2716 return &bo_gem->bo; 2717 2718err: 2719 drm_intel_gem_bo_free(&bo_gem->bo); 2720 pthread_mutex_unlock(&bufmgr_gem->lock); 2721 return NULL; 2722} 2723 2724drm_public int 2725drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 2726{ 2727 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2728 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2729 2730 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 2731 DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) 2732 return -errno; 2733 2734 bo_gem->reusable = false; 2735 2736 return 0; 2737} 2738 2739static int 2740drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 2741{ 2742 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2743 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2744 2745 if (!bo_gem->global_name) { 2746 struct drm_gem_flink flink; 2747 2748 memclear(flink); 2749 flink.handle = bo_gem->gem_handle; 2750 if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink)) 2751 return -errno; 2752 2753 pthread_mutex_lock(&bufmgr_gem->lock); 2754 if (!bo_gem->global_name) { 2755 bo_gem->global_name = flink.name; 2756 bo_gem->reusable = false; 2757 2758 HASH_ADD(name_hh, bufmgr_gem->name_table, 2759 global_name, sizeof(bo_gem->global_name), 2760 bo_gem); 2761 } 2762 pthread_mutex_unlock(&bufmgr_gem->lock); 2763 } 2764 2765 *name = bo_gem->global_name; 2766 return 0; 2767} 2768 2769/** 2770 * Enables unlimited caching of buffer objects for reuse. 2771 * 2772 * This is potentially very memory expensive, as the cache at each bucket 2773 * size is only bounded by how many buffers of that size we've managed to have 2774 * in flight at once. 2775 */ 2776drm_public void 2777drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 2778{ 2779 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2780 2781 bufmgr_gem->bo_reuse = true; 2782} 2783 2784/** 2785 * Disables implicit synchronisation before executing the bo 2786 * 2787 * This will cause rendering corruption unless you correctly manage explicit 2788 * fences for all rendering involving this buffer - including use by others. 2789 * Disabling the implicit serialisation is only required if that serialisation 2790 * is too coarse (for example, you have split the buffer into many 2791 * non-overlapping regions and are sharing the whole buffer between concurrent 2792 * independent command streams). 2793 * 2794 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC, 2795 * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync, 2796 * or subsequent execbufs involving the bo will generate EINVAL. 2797 */ 2798drm_public void 2799drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo) 2800{ 2801 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2802 2803 bo_gem->kflags |= EXEC_OBJECT_ASYNC; 2804} 2805 2806/** 2807 * Enables implicit synchronisation before executing the bo 2808 * 2809 * This is the default behaviour of the kernel, to wait upon prior writes 2810 * completing on the object before rendering with it, or to wait for prior 2811 * reads to complete before writing into the object. 2812 * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling 2813 * the kernel never to insert a stall before using the object. Then this 2814 * function can be used to restore the implicit sync before subsequent 2815 * rendering. 2816 */ 2817drm_public void 2818drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo) 2819{ 2820 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2821 2822 bo_gem->kflags &= ~EXEC_OBJECT_ASYNC; 2823} 2824 2825/** 2826 * Query whether the kernel supports disabling of its implicit synchronisation 2827 * before execbuf. See drm_intel_gem_bo_disable_implicit_sync() 2828 */ 2829drm_public int 2830drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr) 2831{ 2832 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2833 2834 return bufmgr_gem->has_exec_async; 2835} 2836 2837/** 2838 * Enable use of fenced reloc type. 2839 * 2840 * New code should enable this to avoid unnecessary fence register 2841 * allocation. If this option is not enabled, all relocs will have fence 2842 * register allocated. 2843 */ 2844drm_public void 2845drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 2846{ 2847 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2848 2849 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 2850 bufmgr_gem->fenced_relocs = true; 2851} 2852 2853/** 2854 * Return the additional aperture space required by the tree of buffer objects 2855 * rooted at bo. 2856 */ 2857static int 2858drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 2859{ 2860 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2861 int i; 2862 int total = 0; 2863 2864 if (bo == NULL || bo_gem->included_in_check_aperture) 2865 return 0; 2866 2867 total += bo->size; 2868 bo_gem->included_in_check_aperture = true; 2869 2870 for (i = 0; i < bo_gem->reloc_count; i++) 2871 total += 2872 drm_intel_gem_bo_get_aperture_space(bo_gem-> 2873 reloc_target_info[i].bo); 2874 2875 return total; 2876} 2877 2878/** 2879 * Count the number of buffers in this list that need a fence reg 2880 * 2881 * If the count is greater than the number of available regs, we'll have 2882 * to ask the caller to resubmit a batch with fewer tiled buffers. 2883 * 2884 * This function over-counts if the same buffer is used multiple times. 2885 */ 2886static unsigned int 2887drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 2888{ 2889 int i; 2890 unsigned int total = 0; 2891 2892 for (i = 0; i < count; i++) { 2893 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2894 2895 if (bo_gem == NULL) 2896 continue; 2897 2898 total += bo_gem->reloc_tree_fences; 2899 } 2900 return total; 2901} 2902 2903/** 2904 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 2905 * for the next drm_intel_bufmgr_check_aperture_space() call. 2906 */ 2907static void 2908drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 2909{ 2910 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2911 int i; 2912 2913 if (bo == NULL || !bo_gem->included_in_check_aperture) 2914 return; 2915 2916 bo_gem->included_in_check_aperture = false; 2917 2918 for (i = 0; i < bo_gem->reloc_count; i++) 2919 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 2920 reloc_target_info[i].bo); 2921} 2922 2923/** 2924 * Return a conservative estimate for the amount of aperture required 2925 * for a collection of buffers. This may double-count some buffers. 2926 */ 2927static unsigned int 2928drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 2929{ 2930 int i; 2931 unsigned int total = 0; 2932 2933 for (i = 0; i < count; i++) { 2934 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2935 if (bo_gem != NULL) 2936 total += bo_gem->reloc_tree_size; 2937 } 2938 return total; 2939} 2940 2941/** 2942 * Return the amount of aperture needed for a collection of buffers. 2943 * This avoids double counting any buffers, at the cost of looking 2944 * at every buffer in the set. 2945 */ 2946static unsigned int 2947drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 2948{ 2949 int i; 2950 unsigned int total = 0; 2951 2952 for (i = 0; i < count; i++) { 2953 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 2954 /* For the first buffer object in the array, we get an 2955 * accurate count back for its reloc_tree size (since nothing 2956 * had been flagged as being counted yet). We can save that 2957 * value out as a more conservative reloc_tree_size that 2958 * avoids double-counting target buffers. Since the first 2959 * buffer happens to usually be the batch buffer in our 2960 * callers, this can pull us back from doing the tree 2961 * walk on every new batch emit. 2962 */ 2963 if (i == 0) { 2964 drm_intel_bo_gem *bo_gem = 2965 (drm_intel_bo_gem *) bo_array[i]; 2966 bo_gem->reloc_tree_size = total; 2967 } 2968 } 2969 2970 for (i = 0; i < count; i++) 2971 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 2972 return total; 2973} 2974 2975/** 2976 * Return -1 if the batchbuffer should be flushed before attempting to 2977 * emit rendering referencing the buffers pointed to by bo_array. 2978 * 2979 * This is required because if we try to emit a batchbuffer with relocations 2980 * to a tree of buffers that won't simultaneously fit in the aperture, 2981 * the rendering will return an error at a point where the software is not 2982 * prepared to recover from it. 2983 * 2984 * However, we also want to emit the batchbuffer significantly before we reach 2985 * the limit, as a series of batchbuffers each of which references buffers 2986 * covering almost all of the aperture means that at each emit we end up 2987 * waiting to evict a buffer from the last rendering, and we get synchronous 2988 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 2989 * get better parallelism. 2990 */ 2991static int 2992drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 2993{ 2994 drm_intel_bufmgr_gem *bufmgr_gem = 2995 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 2996 unsigned int total = 0; 2997 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 2998 int total_fences; 2999 3000 /* Check for fence reg constraints if necessary */ 3001 if (bufmgr_gem->available_fences) { 3002 total_fences = drm_intel_gem_total_fences(bo_array, count); 3003 if (total_fences > bufmgr_gem->available_fences) 3004 return -ENOSPC; 3005 } 3006 3007 total = drm_intel_gem_estimate_batch_space(bo_array, count); 3008 3009 if (total > threshold) 3010 total = drm_intel_gem_compute_batch_space(bo_array, count); 3011 3012 if (total > threshold) { 3013 DBG("check_space: overflowed available aperture, " 3014 "%dkb vs %dkb\n", 3015 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 3016 return -ENOSPC; 3017 } else { 3018 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 3019 (int)bufmgr_gem->gtt_size / 1024); 3020 return 0; 3021 } 3022} 3023 3024/* 3025 * Disable buffer reuse for objects which are shared with the kernel 3026 * as scanout buffers 3027 */ 3028static int 3029drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 3030{ 3031 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3032 3033 bo_gem->reusable = false; 3034 return 0; 3035} 3036 3037static int 3038drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 3039{ 3040 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3041 3042 return bo_gem->reusable; 3043} 3044 3045static int 3046_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 3047{ 3048 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3049 int i; 3050 3051 for (i = 0; i < bo_gem->reloc_count; i++) { 3052 if (bo_gem->reloc_target_info[i].bo == target_bo) 3053 return 1; 3054 if (bo == bo_gem->reloc_target_info[i].bo) 3055 continue; 3056 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 3057 target_bo)) 3058 return 1; 3059 } 3060 3061 for (i = 0; i< bo_gem->softpin_target_count; i++) { 3062 if (bo_gem->softpin_target[i] == target_bo) 3063 return 1; 3064 if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo)) 3065 return 1; 3066 } 3067 3068 return 0; 3069} 3070 3071/** Return true if target_bo is referenced by bo's relocation tree. */ 3072static int 3073drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 3074{ 3075 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 3076 3077 if (bo == NULL || target_bo == NULL) 3078 return 0; 3079 if (target_bo_gem->used_as_reloc_target) 3080 return _drm_intel_gem_bo_references(bo, target_bo); 3081 return 0; 3082} 3083 3084static void 3085add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 3086{ 3087 unsigned int i = bufmgr_gem->num_buckets; 3088 3089 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 3090 3091 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 3092 bufmgr_gem->cache_bucket[i].size = size; 3093 bufmgr_gem->num_buckets++; 3094} 3095 3096static void 3097init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 3098{ 3099 unsigned long size, cache_max_size = 64 * 1024 * 1024; 3100 3101 /* OK, so power of two buckets was too wasteful of memory. 3102 * Give 3 other sizes between each power of two, to hopefully 3103 * cover things accurately enough. (The alternative is 3104 * probably to just go for exact matching of sizes, and assume 3105 * that for things like composited window resize the tiled 3106 * width/height alignment and rounding of sizes to pages will 3107 * get us useful cache hit rates anyway) 3108 */ 3109 add_bucket(bufmgr_gem, 4096); 3110 add_bucket(bufmgr_gem, 4096 * 2); 3111 add_bucket(bufmgr_gem, 4096 * 3); 3112 3113 /* Initialize the linked lists for BO reuse cache. */ 3114 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 3115 add_bucket(bufmgr_gem, size); 3116 3117 add_bucket(bufmgr_gem, size + size * 1 / 4); 3118 add_bucket(bufmgr_gem, size + size * 2 / 4); 3119 add_bucket(bufmgr_gem, size + size * 3 / 4); 3120 } 3121} 3122 3123drm_public void 3124drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 3125{ 3126 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3127 3128 bufmgr_gem->vma_max = limit; 3129 3130 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 3131} 3132 3133static int 3134parse_devid_override(const char *devid_override) 3135{ 3136 static const struct { 3137 const char *name; 3138 int pci_id; 3139 } name_map[] = { 3140 { "brw", PCI_CHIP_I965_GM }, 3141 { "g4x", PCI_CHIP_GM45_GM }, 3142 { "ilk", PCI_CHIP_ILD_G }, 3143 { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS }, 3144 { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 }, 3145 { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 }, 3146 { "byt", PCI_CHIP_VALLEYVIEW_3 }, 3147 { "bdw", 0x1620 | BDW_ULX }, 3148 { "skl", PCI_CHIP_SKYLAKE_DT_GT2 }, 3149 { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 }, 3150 }; 3151 unsigned int i; 3152 3153 for (i = 0; i < ARRAY_SIZE(name_map); i++) { 3154 if (!strcmp(name_map[i].name, devid_override)) 3155 return name_map[i].pci_id; 3156 } 3157 3158 return strtod(devid_override, NULL); 3159} 3160 3161/** 3162 * Get the PCI ID for the device. This can be overridden by setting the 3163 * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 3164 */ 3165static int 3166get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 3167{ 3168 char *devid_override; 3169 int devid = 0; 3170 int ret; 3171 drm_i915_getparam_t gp; 3172 3173 if (geteuid() == getuid()) { 3174 devid_override = getenv("INTEL_DEVID_OVERRIDE"); 3175 if (devid_override) { 3176 bufmgr_gem->no_exec = true; 3177 return parse_devid_override(devid_override); 3178 } 3179 } 3180 3181 memclear(gp); 3182 gp.param = I915_PARAM_CHIPSET_ID; 3183 gp.value = &devid; 3184 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3185 if (ret) { 3186 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 3187 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 3188 } 3189 return devid; 3190} 3191 3192drm_public int 3193drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 3194{ 3195 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3196 3197 return bufmgr_gem->pci_device; 3198} 3199 3200/** 3201 * Sets the AUB filename. 3202 * 3203 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 3204 * for it to have any effect. 3205 */ 3206drm_public void 3207drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 3208 const char *filename) 3209{ 3210} 3211 3212/** 3213 * Sets up AUB dumping. 3214 * 3215 * This is a trace file format that can be used with the simulator. 3216 * Packets are emitted in a format somewhat like GPU command packets. 3217 * You can set up a GTT and upload your objects into the referenced 3218 * space, then send off batchbuffers and get BMPs out the other end. 3219 */ 3220drm_public void 3221drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 3222{ 3223 fprintf(stderr, "libdrm aub dumping is deprecated.\n\n" 3224 "Use intel_aubdump from intel-gpu-tools instead. Install intel-gpu-tools,\n" 3225 "then run (for example)\n\n" 3226 "\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n" 3227 "See the intel_aubdump man page for more details.\n"); 3228} 3229 3230drm_public drm_intel_context * 3231drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 3232{ 3233 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3234 struct drm_i915_gem_context_create create; 3235 drm_intel_context *context = NULL; 3236 int ret; 3237 3238 context = calloc(1, sizeof(*context)); 3239 if (!context) 3240 return NULL; 3241 3242 memclear(create); 3243 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 3244 if (ret != 0) { 3245 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 3246 strerror(errno)); 3247 free(context); 3248 return NULL; 3249 } 3250 3251 context->ctx_id = create.ctx_id; 3252 context->bufmgr = bufmgr; 3253 3254 return context; 3255} 3256 3257drm_public int 3258drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id) 3259{ 3260 if (ctx == NULL) 3261 return -EINVAL; 3262 3263 *ctx_id = ctx->ctx_id; 3264 3265 return 0; 3266} 3267 3268drm_public void 3269drm_intel_gem_context_destroy(drm_intel_context *ctx) 3270{ 3271 drm_intel_bufmgr_gem *bufmgr_gem; 3272 struct drm_i915_gem_context_destroy destroy; 3273 int ret; 3274 3275 if (ctx == NULL) 3276 return; 3277 3278 memclear(destroy); 3279 3280 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3281 destroy.ctx_id = ctx->ctx_id; 3282 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 3283 &destroy); 3284 if (ret != 0) 3285 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 3286 strerror(errno)); 3287 3288 free(ctx); 3289} 3290 3291drm_public int 3292drm_intel_get_reset_stats(drm_intel_context *ctx, 3293 uint32_t *reset_count, 3294 uint32_t *active, 3295 uint32_t *pending) 3296{ 3297 drm_intel_bufmgr_gem *bufmgr_gem; 3298 struct drm_i915_reset_stats stats; 3299 int ret; 3300 3301 if (ctx == NULL) 3302 return -EINVAL; 3303 3304 memclear(stats); 3305 3306 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3307 stats.ctx_id = ctx->ctx_id; 3308 ret = drmIoctl(bufmgr_gem->fd, 3309 DRM_IOCTL_I915_GET_RESET_STATS, 3310 &stats); 3311 if (ret == 0) { 3312 if (reset_count != NULL) 3313 *reset_count = stats.reset_count; 3314 3315 if (active != NULL) 3316 *active = stats.batch_active; 3317 3318 if (pending != NULL) 3319 *pending = stats.batch_pending; 3320 } 3321 3322 return ret; 3323} 3324 3325drm_public int 3326drm_intel_reg_read(drm_intel_bufmgr *bufmgr, 3327 uint32_t offset, 3328 uint64_t *result) 3329{ 3330 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3331 struct drm_i915_reg_read reg_read; 3332 int ret; 3333 3334 memclear(reg_read); 3335 reg_read.offset = offset; 3336 3337 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 3338 3339 *result = reg_read.val; 3340 return ret; 3341} 3342 3343drm_public int 3344drm_intel_get_subslice_total(int fd, unsigned int *subslice_total) 3345{ 3346 drm_i915_getparam_t gp; 3347 int ret; 3348 3349 memclear(gp); 3350 gp.value = (int*)subslice_total; 3351 gp.param = I915_PARAM_SUBSLICE_TOTAL; 3352 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3353 if (ret) 3354 return -errno; 3355 3356 return 0; 3357} 3358 3359drm_public int 3360drm_intel_get_eu_total(int fd, unsigned int *eu_total) 3361{ 3362 drm_i915_getparam_t gp; 3363 int ret; 3364 3365 memclear(gp); 3366 gp.value = (int*)eu_total; 3367 gp.param = I915_PARAM_EU_TOTAL; 3368 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3369 if (ret) 3370 return -errno; 3371 3372 return 0; 3373} 3374 3375drm_public int 3376drm_intel_get_pooled_eu(int fd) 3377{ 3378 drm_i915_getparam_t gp; 3379 int ret = -1; 3380 3381 memclear(gp); 3382 gp.param = I915_PARAM_HAS_POOLED_EU; 3383 gp.value = &ret; 3384 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 3385 return -errno; 3386 3387 return ret; 3388} 3389 3390drm_public int 3391drm_intel_get_min_eu_in_pool(int fd) 3392{ 3393 drm_i915_getparam_t gp; 3394 int ret = -1; 3395 3396 memclear(gp); 3397 gp.param = I915_PARAM_MIN_EU_IN_POOL; 3398 gp.value = &ret; 3399 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 3400 return -errno; 3401 3402 return ret; 3403} 3404 3405/** 3406 * Annotate the given bo for use in aub dumping. 3407 * 3408 * \param annotations is an array of drm_intel_aub_annotation objects 3409 * describing the type of data in various sections of the bo. Each 3410 * element of the array specifies the type and subtype of a section of 3411 * the bo, and the past-the-end offset of that section. The elements 3412 * of \c annotations must be sorted so that ending_offset is 3413 * increasing. 3414 * 3415 * \param count is the number of elements in the \c annotations array. 3416 * If \c count is zero, then \c annotations will not be dereferenced. 3417 * 3418 * Annotations are copied into a private data structure, so caller may 3419 * re-use the memory pointed to by \c annotations after the call 3420 * returns. 3421 * 3422 * Annotations are stored for the lifetime of the bo; to reset to the 3423 * default state (no annotations), call this function with a \c count 3424 * of zero. 3425 */ 3426drm_public void drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 3427 drm_intel_aub_annotation *annotations, 3428 unsigned count) 3429{ 3430} 3431 3432static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; 3433static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; 3434 3435static drm_intel_bufmgr_gem * 3436drm_intel_bufmgr_gem_find(int fd) 3437{ 3438 drm_intel_bufmgr_gem *bufmgr_gem; 3439 3440 DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { 3441 if (bufmgr_gem->fd == fd) { 3442 atomic_inc(&bufmgr_gem->refcount); 3443 return bufmgr_gem; 3444 } 3445 } 3446 3447 return NULL; 3448} 3449 3450static void 3451drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) 3452{ 3453 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3454 3455 if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) { 3456 pthread_mutex_lock(&bufmgr_list_mutex); 3457 3458 if (atomic_dec_and_test(&bufmgr_gem->refcount)) { 3459 DRMLISTDEL(&bufmgr_gem->managers); 3460 drm_intel_bufmgr_gem_destroy(bufmgr); 3461 } 3462 3463 pthread_mutex_unlock(&bufmgr_list_mutex); 3464 } 3465} 3466 3467drm_public void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo) 3468{ 3469 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3470 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3471 3472 if (bo_gem->gtt_virtual) 3473 return bo_gem->gtt_virtual; 3474 3475 if (bo_gem->is_userptr) 3476 return NULL; 3477 3478 pthread_mutex_lock(&bufmgr_gem->lock); 3479 if (bo_gem->gtt_virtual == NULL) { 3480 struct drm_i915_gem_mmap_gtt mmap_arg; 3481 void *ptr; 3482 3483 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 3484 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3485 3486 if (bo_gem->map_count++ == 0) 3487 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3488 3489 memclear(mmap_arg); 3490 mmap_arg.handle = bo_gem->gem_handle; 3491 3492 /* Get the fake offset back... */ 3493 ptr = MAP_FAILED; 3494 if (drmIoctl(bufmgr_gem->fd, 3495 DRM_IOCTL_I915_GEM_MMAP_GTT, 3496 &mmap_arg) == 0) { 3497 /* and mmap it */ 3498 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, 3499 MAP_SHARED, bufmgr_gem->fd, 3500 mmap_arg.offset); 3501 } 3502 if (ptr == MAP_FAILED) { 3503 if (--bo_gem->map_count == 0) 3504 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3505 ptr = NULL; 3506 } 3507 3508 bo_gem->gtt_virtual = ptr; 3509 } 3510 pthread_mutex_unlock(&bufmgr_gem->lock); 3511 3512 return bo_gem->gtt_virtual; 3513} 3514 3515drm_public void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo) 3516{ 3517 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3518 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3519 3520 if (bo_gem->mem_virtual) 3521 return bo_gem->mem_virtual; 3522 3523 if (bo_gem->is_userptr) { 3524 /* Return the same user ptr */ 3525 return bo_gem->user_virtual; 3526 } 3527 3528 pthread_mutex_lock(&bufmgr_gem->lock); 3529 if (!bo_gem->mem_virtual) { 3530 struct drm_i915_gem_mmap mmap_arg; 3531 3532 if (bo_gem->map_count++ == 0) 3533 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3534 3535 DBG("bo_map: %d (%s), map_count=%d\n", 3536 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3537 3538 memclear(mmap_arg); 3539 mmap_arg.handle = bo_gem->gem_handle; 3540 mmap_arg.size = bo->size; 3541 if (drmIoctl(bufmgr_gem->fd, 3542 DRM_IOCTL_I915_GEM_MMAP, 3543 &mmap_arg)) { 3544 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 3545 __FILE__, __LINE__, bo_gem->gem_handle, 3546 bo_gem->name, strerror(errno)); 3547 if (--bo_gem->map_count == 0) 3548 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3549 } else { 3550 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 3551 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 3552 } 3553 } 3554 pthread_mutex_unlock(&bufmgr_gem->lock); 3555 3556 return bo_gem->mem_virtual; 3557} 3558 3559drm_public void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo) 3560{ 3561 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3562 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3563 3564 if (bo_gem->wc_virtual) 3565 return bo_gem->wc_virtual; 3566 3567 if (bo_gem->is_userptr) 3568 return NULL; 3569 3570 pthread_mutex_lock(&bufmgr_gem->lock); 3571 if (!bo_gem->wc_virtual) { 3572 struct drm_i915_gem_mmap mmap_arg; 3573 3574 if (bo_gem->map_count++ == 0) 3575 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3576 3577 DBG("bo_map: %d (%s), map_count=%d\n", 3578 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3579 3580 memclear(mmap_arg); 3581 mmap_arg.handle = bo_gem->gem_handle; 3582 mmap_arg.size = bo->size; 3583 mmap_arg.flags = I915_MMAP_WC; 3584 if (drmIoctl(bufmgr_gem->fd, 3585 DRM_IOCTL_I915_GEM_MMAP, 3586 &mmap_arg)) { 3587 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 3588 __FILE__, __LINE__, bo_gem->gem_handle, 3589 bo_gem->name, strerror(errno)); 3590 if (--bo_gem->map_count == 0) 3591 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3592 } else { 3593 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 3594 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 3595 } 3596 } 3597 pthread_mutex_unlock(&bufmgr_gem->lock); 3598 3599 return bo_gem->wc_virtual; 3600} 3601 3602/** 3603 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 3604 * and manage map buffer objections. 3605 * 3606 * \param fd File descriptor of the opened DRM device. 3607 */ 3608drm_public drm_intel_bufmgr * 3609drm_intel_bufmgr_gem_init(int fd, int batch_size) 3610{ 3611 drm_intel_bufmgr_gem *bufmgr_gem; 3612 struct drm_i915_gem_get_aperture aperture; 3613 drm_i915_getparam_t gp; 3614 int ret, tmp; 3615 bool exec2 = false; 3616 3617 pthread_mutex_lock(&bufmgr_list_mutex); 3618 3619 bufmgr_gem = drm_intel_bufmgr_gem_find(fd); 3620 if (bufmgr_gem) 3621 goto exit; 3622 3623 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 3624 if (bufmgr_gem == NULL) 3625 goto exit; 3626 3627 bufmgr_gem->fd = fd; 3628 atomic_set(&bufmgr_gem->refcount, 1); 3629 3630 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 3631 free(bufmgr_gem); 3632 bufmgr_gem = NULL; 3633 goto exit; 3634 } 3635 3636 memclear(aperture); 3637 ret = drmIoctl(bufmgr_gem->fd, 3638 DRM_IOCTL_I915_GEM_GET_APERTURE, 3639 &aperture); 3640 3641 if (ret == 0) 3642 bufmgr_gem->gtt_size = aperture.aper_available_size; 3643 else { 3644 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 3645 strerror(errno)); 3646 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 3647 fprintf(stderr, "Assuming %dkB available aperture size.\n" 3648 "May lead to reduced performance or incorrect " 3649 "rendering.\n", 3650 (int)bufmgr_gem->gtt_size / 1024); 3651 } 3652 3653 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 3654 3655 if (IS_GEN2(bufmgr_gem->pci_device)) 3656 bufmgr_gem->gen = 2; 3657 else if (IS_GEN3(bufmgr_gem->pci_device)) 3658 bufmgr_gem->gen = 3; 3659 else if (IS_GEN4(bufmgr_gem->pci_device)) 3660 bufmgr_gem->gen = 4; 3661 else if (IS_GEN5(bufmgr_gem->pci_device)) 3662 bufmgr_gem->gen = 5; 3663 else if (IS_GEN6(bufmgr_gem->pci_device)) 3664 bufmgr_gem->gen = 6; 3665 else if (IS_GEN7(bufmgr_gem->pci_device)) 3666 bufmgr_gem->gen = 7; 3667 else if (IS_GEN8(bufmgr_gem->pci_device)) 3668 bufmgr_gem->gen = 8; 3669 else if (!intel_get_genx(bufmgr_gem->pci_device, &bufmgr_gem->gen)) { 3670 free(bufmgr_gem); 3671 bufmgr_gem = NULL; 3672 goto exit; 3673 } 3674 3675 if (IS_GEN3(bufmgr_gem->pci_device) && 3676 bufmgr_gem->gtt_size > 256*1024*1024) { 3677 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 3678 * be used for tiled blits. To simplify the accounting, just 3679 * subtract the unmappable part (fixed to 256MB on all known 3680 * gen3 devices) if the kernel advertises it. */ 3681 bufmgr_gem->gtt_size -= 256*1024*1024; 3682 } 3683 3684 memclear(gp); 3685 gp.value = &tmp; 3686 3687 gp.param = I915_PARAM_HAS_EXECBUF2; 3688 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3689 if (!ret) 3690 exec2 = true; 3691 3692 gp.param = I915_PARAM_HAS_BSD; 3693 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3694 bufmgr_gem->has_bsd = ret == 0; 3695 3696 gp.param = I915_PARAM_HAS_BLT; 3697 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3698 bufmgr_gem->has_blt = ret == 0; 3699 3700 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 3701 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3702 bufmgr_gem->has_relaxed_fencing = ret == 0; 3703 3704 gp.param = I915_PARAM_HAS_EXEC_ASYNC; 3705 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3706 bufmgr_gem->has_exec_async = ret == 0; 3707 3708 bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; 3709 3710 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 3711 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3712 bufmgr_gem->has_wait_timeout = ret == 0; 3713 3714 gp.param = I915_PARAM_HAS_LLC; 3715 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3716 if (ret != 0) { 3717 /* Kernel does not supports HAS_LLC query, fallback to GPU 3718 * generation detection and assume that we have LLC on GEN6/7 3719 */ 3720 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 3721 IS_GEN7(bufmgr_gem->pci_device)); 3722 } else 3723 bufmgr_gem->has_llc = *gp.value; 3724 3725 gp.param = I915_PARAM_HAS_VEBOX; 3726 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3727 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 3728 3729 gp.param = I915_PARAM_HAS_EXEC_SOFTPIN; 3730 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3731 if (ret == 0 && *gp.value > 0) 3732 bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset; 3733 3734 if (bufmgr_gem->gen < 4) { 3735 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 3736 gp.value = &bufmgr_gem->available_fences; 3737 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3738 if (ret) { 3739 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 3740 errno); 3741 fprintf(stderr, "param: %d, val: %d\n", gp.param, 3742 *gp.value); 3743 bufmgr_gem->available_fences = 0; 3744 } else { 3745 /* XXX The kernel reports the total number of fences, 3746 * including any that may be pinned. 3747 * 3748 * We presume that there will be at least one pinned 3749 * fence for the scanout buffer, but there may be more 3750 * than one scanout and the user may be manually 3751 * pinning buffers. Let's move to execbuffer2 and 3752 * thereby forget the insanity of using fences... 3753 */ 3754 bufmgr_gem->available_fences -= 2; 3755 if (bufmgr_gem->available_fences < 0) 3756 bufmgr_gem->available_fences = 0; 3757 } 3758 } 3759 3760 if (bufmgr_gem->gen >= 8) { 3761 gp.param = I915_PARAM_HAS_ALIASING_PPGTT; 3762 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3763 if (ret == 0 && *gp.value == 3) 3764 bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; 3765 } 3766 3767 /* Let's go with one relocation per every 2 dwords (but round down a bit 3768 * since a power of two will mean an extra page allocation for the reloc 3769 * buffer). 3770 * 3771 * Every 4 was too few for the blender benchmark. 3772 */ 3773 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 3774 3775 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 3776 bufmgr_gem->bufmgr.bo_alloc_for_render = 3777 drm_intel_gem_bo_alloc_for_render; 3778 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 3779 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 3780 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 3781 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 3782 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 3783 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 3784 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 3785 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 3786 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 3787 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 3788 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 3789 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 3790 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 3791 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 3792 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 3793 /* Use the new one if available */ 3794 if (exec2) { 3795 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 3796 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3797 } else 3798 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 3799 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 3800 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3801 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; 3802 bufmgr_gem->bufmgr.debug = 0; 3803 bufmgr_gem->bufmgr.check_aperture_space = 3804 drm_intel_gem_check_aperture_space; 3805 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3806 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 3807 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 3808 drm_intel_gem_get_pipe_from_crtc_id; 3809 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 3810 3811 init_cache_buckets(bufmgr_gem); 3812 3813 DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 3814 bufmgr_gem->vma_max = -1; /* unlimited by default */ 3815 3816 DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); 3817 3818exit: 3819 pthread_mutex_unlock(&bufmgr_list_mutex); 3820 3821 return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; 3822} 3823