intel_bufmgr_gem.c revision aaba2545
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <stddef.h> 51#include <sys/ioctl.h> 52#include <sys/mman.h> 53#include <sys/stat.h> 54#include <sys/types.h> 55 56#include "errno.h" 57#include "libdrm_lists.h" 58#include "intel_bufmgr.h" 59#include "intel_bufmgr_priv.h" 60#include "intel_chipset.h" 61#include "string.h" 62 63#include "i915_drm.h" 64 65#define DBG(...) do { \ 66 if (bufmgr_gem->bufmgr.debug) \ 67 fprintf(stderr, __VA_ARGS__); \ 68} while (0) 69 70#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 71 72typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 73 74struct drm_intel_gem_bo_bucket { 75 drmMMListHead head; 76 unsigned long size; 77}; 78 79typedef struct _drm_intel_bufmgr_gem { 80 drm_intel_bufmgr bufmgr; 81 82 int fd; 83 84 int max_relocs; 85 86 pthread_mutex_t lock; 87 88 struct drm_i915_gem_exec_object *exec_objects; 89 struct drm_i915_gem_exec_object2 *exec2_objects; 90 drm_intel_bo **exec_bos; 91 int exec_size; 92 int exec_count; 93 94 /** Array of lists of cached gem objects of power-of-two sizes */ 95 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 96 int num_buckets; 97 98 uint64_t gtt_size; 99 int available_fences; 100 int pci_device; 101 int gen; 102 char bo_reuse; 103 char fenced_relocs; 104} drm_intel_bufmgr_gem; 105 106#define DRM_INTEL_RELOC_FENCE (1<<0) 107 108typedef struct _drm_intel_reloc_target_info { 109 drm_intel_bo *bo; 110 int flags; 111} drm_intel_reloc_target; 112 113struct _drm_intel_bo_gem { 114 drm_intel_bo bo; 115 116 atomic_t refcount; 117 uint32_t gem_handle; 118 const char *name; 119 120 /** 121 * Kenel-assigned global name for this object 122 */ 123 unsigned int global_name; 124 125 /** 126 * Index of the buffer within the validation list while preparing a 127 * batchbuffer execution. 128 */ 129 int validate_index; 130 131 /** 132 * Current tiling mode 133 */ 134 uint32_t tiling_mode; 135 uint32_t swizzle_mode; 136 137 time_t free_time; 138 139 /** Array passed to the DRM containing relocation information. */ 140 struct drm_i915_gem_relocation_entry *relocs; 141 /** 142 * Array of info structs corresponding to relocs[i].target_handle etc 143 */ 144 drm_intel_reloc_target *reloc_target_info; 145 /** Number of entries in relocs */ 146 int reloc_count; 147 /** Mapped address for the buffer, saved across map/unmap cycles */ 148 void *mem_virtual; 149 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 150 void *gtt_virtual; 151 152 /** BO cache list */ 153 drmMMListHead head; 154 155 /** 156 * Boolean of whether this BO and its children have been included in 157 * the current drm_intel_bufmgr_check_aperture_space() total. 158 */ 159 char included_in_check_aperture; 160 161 /** 162 * Boolean of whether this buffer has been used as a relocation 163 * target and had its size accounted for, and thus can't have any 164 * further relocations added to it. 165 */ 166 char used_as_reloc_target; 167 168 /** 169 * Boolean of whether we have encountered an error whilst building the relocation tree. 170 */ 171 char has_error; 172 173 /** 174 * Boolean of whether this buffer can be re-used 175 */ 176 char reusable; 177 178 /** 179 * Size in bytes of this buffer and its relocation descendents. 180 * 181 * Used to avoid costly tree walking in 182 * drm_intel_bufmgr_check_aperture in the common case. 183 */ 184 int reloc_tree_size; 185 186 /** 187 * Number of potential fence registers required by this buffer and its 188 * relocations. 189 */ 190 int reloc_tree_fences; 191}; 192 193static unsigned int 194drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 195 196static unsigned int 197drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 198 199static int 200drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 201 uint32_t * swizzle_mode); 202 203static int 204drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 205 uint32_t stride); 206 207static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 208 time_t time); 209 210static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 211 212static void drm_intel_gem_bo_free(drm_intel_bo *bo); 213 214static unsigned long 215drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 216 uint32_t *tiling_mode) 217{ 218 unsigned long min_size, max_size; 219 unsigned long i; 220 221 if (*tiling_mode == I915_TILING_NONE) 222 return size; 223 224 /* 965+ just need multiples of page size for tiling */ 225 if (bufmgr_gem->gen >= 4) 226 return ROUND_UP_TO(size, 4096); 227 228 /* Older chips need powers of two, of at least 512k or 1M */ 229 if (bufmgr_gem->gen == 3) { 230 min_size = 1024*1024; 231 max_size = 128*1024*1024; 232 } else { 233 min_size = 512*1024; 234 max_size = 64*1024*1024; 235 } 236 237 if (size > max_size) { 238 *tiling_mode = I915_TILING_NONE; 239 return size; 240 } 241 242 for (i = min_size; i < size; i <<= 1) 243 ; 244 245 return i; 246} 247 248/* 249 * Round a given pitch up to the minimum required for X tiling on a 250 * given chip. We use 512 as the minimum to allow for a later tiling 251 * change. 252 */ 253static unsigned long 254drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 255 unsigned long pitch, uint32_t tiling_mode) 256{ 257 unsigned long tile_width; 258 unsigned long i; 259 260 /* If untiled, then just align it so that we can do rendering 261 * to it with the 3D engine. 262 */ 263 if (tiling_mode == I915_TILING_NONE) 264 return ALIGN(pitch, 64); 265 266 if (tiling_mode == I915_TILING_X) 267 tile_width = 512; 268 else 269 tile_width = 128; 270 271 /* 965 is flexible */ 272 if (bufmgr_gem->gen >= 4) 273 return ROUND_UP_TO(pitch, tile_width); 274 275 /* Pre-965 needs power of two tile width */ 276 for (i = tile_width; i < pitch; i <<= 1) 277 ; 278 279 return i; 280} 281 282static struct drm_intel_gem_bo_bucket * 283drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 284 unsigned long size) 285{ 286 int i; 287 288 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 289 struct drm_intel_gem_bo_bucket *bucket = 290 &bufmgr_gem->cache_bucket[i]; 291 if (bucket->size >= size) { 292 return bucket; 293 } 294 } 295 296 return NULL; 297} 298 299static void 300drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 301{ 302 int i, j; 303 304 for (i = 0; i < bufmgr_gem->exec_count; i++) { 305 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 306 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 307 308 if (bo_gem->relocs == NULL) { 309 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 310 bo_gem->name); 311 continue; 312 } 313 314 for (j = 0; j < bo_gem->reloc_count; j++) { 315 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 316 drm_intel_bo_gem *target_gem = 317 (drm_intel_bo_gem *) target_bo; 318 319 DBG("%2d: %d (%s)@0x%08llx -> " 320 "%d (%s)@0x%08lx + 0x%08x\n", 321 i, 322 bo_gem->gem_handle, bo_gem->name, 323 (unsigned long long)bo_gem->relocs[j].offset, 324 target_gem->gem_handle, 325 target_gem->name, 326 target_bo->offset, 327 bo_gem->relocs[j].delta); 328 } 329 } 330} 331 332static inline void 333drm_intel_gem_bo_reference(drm_intel_bo *bo) 334{ 335 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 336 337 assert(atomic_read(&bo_gem->refcount) > 0); 338 atomic_inc(&bo_gem->refcount); 339} 340 341/** 342 * Adds the given buffer to the list of buffers to be validated (moved into the 343 * appropriate memory type) with the next batch submission. 344 * 345 * If a buffer is validated multiple times in a batch submission, it ends up 346 * with the intersection of the memory type flags and the union of the 347 * access flags. 348 */ 349static void 350drm_intel_add_validate_buffer(drm_intel_bo *bo) 351{ 352 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 353 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 354 int index; 355 356 if (bo_gem->validate_index != -1) 357 return; 358 359 /* Extend the array of validation entries as necessary. */ 360 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 361 int new_size = bufmgr_gem->exec_size * 2; 362 363 if (new_size == 0) 364 new_size = 5; 365 366 bufmgr_gem->exec_objects = 367 realloc(bufmgr_gem->exec_objects, 368 sizeof(*bufmgr_gem->exec_objects) * new_size); 369 bufmgr_gem->exec_bos = 370 realloc(bufmgr_gem->exec_bos, 371 sizeof(*bufmgr_gem->exec_bos) * new_size); 372 bufmgr_gem->exec_size = new_size; 373 } 374 375 index = bufmgr_gem->exec_count; 376 bo_gem->validate_index = index; 377 /* Fill in array entry */ 378 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 379 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 380 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 381 bufmgr_gem->exec_objects[index].alignment = 0; 382 bufmgr_gem->exec_objects[index].offset = 0; 383 bufmgr_gem->exec_bos[index] = bo; 384 bufmgr_gem->exec_count++; 385} 386 387static void 388drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 389{ 390 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 391 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 392 int index; 393 394 if (bo_gem->validate_index != -1) { 395 if (need_fence) 396 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 397 EXEC_OBJECT_NEEDS_FENCE; 398 return; 399 } 400 401 /* Extend the array of validation entries as necessary. */ 402 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 403 int new_size = bufmgr_gem->exec_size * 2; 404 405 if (new_size == 0) 406 new_size = 5; 407 408 bufmgr_gem->exec2_objects = 409 realloc(bufmgr_gem->exec2_objects, 410 sizeof(*bufmgr_gem->exec2_objects) * new_size); 411 bufmgr_gem->exec_bos = 412 realloc(bufmgr_gem->exec_bos, 413 sizeof(*bufmgr_gem->exec_bos) * new_size); 414 bufmgr_gem->exec_size = new_size; 415 } 416 417 index = bufmgr_gem->exec_count; 418 bo_gem->validate_index = index; 419 /* Fill in array entry */ 420 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 421 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 422 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 423 bufmgr_gem->exec2_objects[index].alignment = 0; 424 bufmgr_gem->exec2_objects[index].offset = 0; 425 bufmgr_gem->exec_bos[index] = bo; 426 bufmgr_gem->exec2_objects[index].flags = 0; 427 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 428 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 429 if (need_fence) { 430 bufmgr_gem->exec2_objects[index].flags |= 431 EXEC_OBJECT_NEEDS_FENCE; 432 } 433 bufmgr_gem->exec_count++; 434} 435 436#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 437 sizeof(uint32_t)) 438 439static void 440drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 441 drm_intel_bo_gem *bo_gem) 442{ 443 int size; 444 445 assert(!bo_gem->used_as_reloc_target); 446 447 /* The older chipsets are far-less flexible in terms of tiling, 448 * and require tiled buffer to be size aligned in the aperture. 449 * This means that in the worst possible case we will need a hole 450 * twice as large as the object in order for it to fit into the 451 * aperture. Optimal packing is for wimps. 452 */ 453 size = bo_gem->bo.size; 454 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) 455 size *= 2; 456 457 bo_gem->reloc_tree_size = size; 458} 459 460static int 461drm_intel_setup_reloc_list(drm_intel_bo *bo) 462{ 463 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 464 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 465 unsigned int max_relocs = bufmgr_gem->max_relocs; 466 467 if (bo->size / 4 < max_relocs) 468 max_relocs = bo->size / 4; 469 470 bo_gem->relocs = malloc(max_relocs * 471 sizeof(struct drm_i915_gem_relocation_entry)); 472 bo_gem->reloc_target_info = malloc(max_relocs * 473 sizeof(drm_intel_reloc_target)); 474 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 475 bo_gem->has_error = 1; 476 477 free (bo_gem->relocs); 478 bo_gem->relocs = NULL; 479 480 free (bo_gem->reloc_target_info); 481 bo_gem->reloc_target_info = NULL; 482 483 return 1; 484 } 485 486 return 0; 487} 488 489static int 490drm_intel_gem_bo_busy(drm_intel_bo *bo) 491{ 492 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 493 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 494 struct drm_i915_gem_busy busy; 495 int ret; 496 497 memset(&busy, 0, sizeof(busy)); 498 busy.handle = bo_gem->gem_handle; 499 500 do { 501 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 502 } while (ret == -1 && errno == EINTR); 503 504 return (ret == 0 && busy.busy); 505} 506 507static int 508drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 509 drm_intel_bo_gem *bo_gem, int state) 510{ 511 struct drm_i915_gem_madvise madv; 512 513 madv.handle = bo_gem->gem_handle; 514 madv.madv = state; 515 madv.retained = 1; 516 ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 517 518 return madv.retained; 519} 520 521static int 522drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 523{ 524 return drm_intel_gem_bo_madvise_internal 525 ((drm_intel_bufmgr_gem *) bo->bufmgr, 526 (drm_intel_bo_gem *) bo, 527 madv); 528} 529 530/* drop the oldest entries that have been purged by the kernel */ 531static void 532drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 533 struct drm_intel_gem_bo_bucket *bucket) 534{ 535 while (!DRMLISTEMPTY(&bucket->head)) { 536 drm_intel_bo_gem *bo_gem; 537 538 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 539 bucket->head.next, head); 540 if (drm_intel_gem_bo_madvise_internal 541 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 542 break; 543 544 DRMLISTDEL(&bo_gem->head); 545 drm_intel_gem_bo_free(&bo_gem->bo); 546 } 547} 548 549static drm_intel_bo * 550drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 551 const char *name, 552 unsigned long size, 553 unsigned long flags) 554{ 555 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 556 drm_intel_bo_gem *bo_gem; 557 unsigned int page_size = getpagesize(); 558 int ret; 559 struct drm_intel_gem_bo_bucket *bucket; 560 int alloc_from_cache; 561 unsigned long bo_size; 562 int for_render = 0; 563 564 if (flags & BO_ALLOC_FOR_RENDER) 565 for_render = 1; 566 567 /* Round the allocated size up to a power of two number of pages. */ 568 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 569 570 /* If we don't have caching at this size, don't actually round the 571 * allocation up. 572 */ 573 if (bucket == NULL) { 574 bo_size = size; 575 if (bo_size < page_size) 576 bo_size = page_size; 577 } else { 578 bo_size = bucket->size; 579 } 580 581 pthread_mutex_lock(&bufmgr_gem->lock); 582 /* Get a buffer out of the cache if available */ 583retry: 584 alloc_from_cache = 0; 585 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 586 if (for_render) { 587 /* Allocate new render-target BOs from the tail (MRU) 588 * of the list, as it will likely be hot in the GPU 589 * cache and in the aperture for us. 590 */ 591 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 592 bucket->head.prev, head); 593 DRMLISTDEL(&bo_gem->head); 594 alloc_from_cache = 1; 595 } else { 596 /* For non-render-target BOs (where we're probably 597 * going to map it first thing in order to fill it 598 * with data), check if the last BO in the cache is 599 * unbusy, and only reuse in that case. Otherwise, 600 * allocating a new buffer is probably faster than 601 * waiting for the GPU to finish. 602 */ 603 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 604 bucket->head.next, head); 605 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 606 alloc_from_cache = 1; 607 DRMLISTDEL(&bo_gem->head); 608 } 609 } 610 611 if (alloc_from_cache) { 612 if (!drm_intel_gem_bo_madvise_internal 613 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 614 drm_intel_gem_bo_free(&bo_gem->bo); 615 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 616 bucket); 617 goto retry; 618 } 619 } 620 } 621 pthread_mutex_unlock(&bufmgr_gem->lock); 622 623 if (!alloc_from_cache) { 624 struct drm_i915_gem_create create; 625 626 bo_gem = calloc(1, sizeof(*bo_gem)); 627 if (!bo_gem) 628 return NULL; 629 630 bo_gem->bo.size = bo_size; 631 memset(&create, 0, sizeof(create)); 632 create.size = bo_size; 633 634 do { 635 ret = ioctl(bufmgr_gem->fd, 636 DRM_IOCTL_I915_GEM_CREATE, 637 &create); 638 } while (ret == -1 && errno == EINTR); 639 bo_gem->gem_handle = create.handle; 640 bo_gem->bo.handle = bo_gem->gem_handle; 641 if (ret != 0) { 642 free(bo_gem); 643 return NULL; 644 } 645 bo_gem->bo.bufmgr = bufmgr; 646 } 647 648 bo_gem->name = name; 649 atomic_set(&bo_gem->refcount, 1); 650 bo_gem->validate_index = -1; 651 bo_gem->reloc_tree_fences = 0; 652 bo_gem->used_as_reloc_target = 0; 653 bo_gem->has_error = 0; 654 bo_gem->tiling_mode = I915_TILING_NONE; 655 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 656 bo_gem->reusable = 1; 657 658 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 659 660 DBG("bo_create: buf %d (%s) %ldb\n", 661 bo_gem->gem_handle, bo_gem->name, size); 662 663 return &bo_gem->bo; 664} 665 666static drm_intel_bo * 667drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 668 const char *name, 669 unsigned long size, 670 unsigned int alignment) 671{ 672 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 673 BO_ALLOC_FOR_RENDER); 674} 675 676static drm_intel_bo * 677drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 678 const char *name, 679 unsigned long size, 680 unsigned int alignment) 681{ 682 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0); 683} 684 685static drm_intel_bo * 686drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 687 int x, int y, int cpp, uint32_t *tiling_mode, 688 unsigned long *pitch, unsigned long flags) 689{ 690 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 691 drm_intel_bo *bo; 692 unsigned long size, stride; 693 uint32_t tiling; 694 int ret; 695 696 do { 697 unsigned long aligned_y; 698 699 tiling = *tiling_mode; 700 701 /* If we're tiled, our allocations are in 8 or 32-row blocks, 702 * so failure to align our height means that we won't allocate 703 * enough pages. 704 * 705 * If we're untiled, we still have to align to 2 rows high 706 * because the data port accesses 2x2 blocks even if the 707 * bottom row isn't to be rendered, so failure to align means 708 * we could walk off the end of the GTT and fault. This is 709 * documented on 965, and may be the case on older chipsets 710 * too so we try to be careful. 711 */ 712 aligned_y = y; 713 if (tiling == I915_TILING_NONE) 714 aligned_y = ALIGN(y, 2); 715 else if (tiling == I915_TILING_X) 716 aligned_y = ALIGN(y, 8); 717 else if (tiling == I915_TILING_Y) 718 aligned_y = ALIGN(y, 32); 719 720 stride = x * cpp; 721 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling); 722 size = stride * aligned_y; 723 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 724 } while (*tiling_mode != tiling); 725 726 bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags); 727 if (!bo) 728 return NULL; 729 730 ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride); 731 if (ret != 0) { 732 drm_intel_gem_bo_unreference(bo); 733 return NULL; 734 } 735 736 *pitch = stride; 737 738 return bo; 739} 740 741/** 742 * Returns a drm_intel_bo wrapping the given buffer object handle. 743 * 744 * This can be used when one application needs to pass a buffer object 745 * to another. 746 */ 747drm_intel_bo * 748drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 749 const char *name, 750 unsigned int handle) 751{ 752 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 753 drm_intel_bo_gem *bo_gem; 754 int ret; 755 struct drm_gem_open open_arg; 756 struct drm_i915_gem_get_tiling get_tiling; 757 758 bo_gem = calloc(1, sizeof(*bo_gem)); 759 if (!bo_gem) 760 return NULL; 761 762 memset(&open_arg, 0, sizeof(open_arg)); 763 open_arg.name = handle; 764 do { 765 ret = ioctl(bufmgr_gem->fd, 766 DRM_IOCTL_GEM_OPEN, 767 &open_arg); 768 } while (ret == -1 && errno == EINTR); 769 if (ret != 0) { 770 fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 771 name, handle, strerror(errno)); 772 free(bo_gem); 773 return NULL; 774 } 775 bo_gem->bo.size = open_arg.size; 776 bo_gem->bo.offset = 0; 777 bo_gem->bo.virtual = NULL; 778 bo_gem->bo.bufmgr = bufmgr; 779 bo_gem->name = name; 780 atomic_set(&bo_gem->refcount, 1); 781 bo_gem->validate_index = -1; 782 bo_gem->gem_handle = open_arg.handle; 783 bo_gem->global_name = handle; 784 bo_gem->reusable = 0; 785 786 memset(&get_tiling, 0, sizeof(get_tiling)); 787 get_tiling.handle = bo_gem->gem_handle; 788 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 789 if (ret != 0) { 790 drm_intel_gem_bo_unreference(&bo_gem->bo); 791 return NULL; 792 } 793 bo_gem->tiling_mode = get_tiling.tiling_mode; 794 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 795 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 796 797 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 798 799 return &bo_gem->bo; 800} 801 802static void 803drm_intel_gem_bo_free(drm_intel_bo *bo) 804{ 805 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 806 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 807 struct drm_gem_close close; 808 int ret; 809 810 if (bo_gem->mem_virtual) 811 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 812 if (bo_gem->gtt_virtual) 813 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 814 815 /* Close this object */ 816 memset(&close, 0, sizeof(close)); 817 close.handle = bo_gem->gem_handle; 818 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 819 if (ret != 0) { 820 fprintf(stderr, 821 "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 822 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 823 } 824 free(bo); 825} 826 827/** Frees all cached buffers significantly older than @time. */ 828static void 829drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 830{ 831 int i; 832 833 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 834 struct drm_intel_gem_bo_bucket *bucket = 835 &bufmgr_gem->cache_bucket[i]; 836 837 while (!DRMLISTEMPTY(&bucket->head)) { 838 drm_intel_bo_gem *bo_gem; 839 840 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 841 bucket->head.next, head); 842 if (time - bo_gem->free_time <= 1) 843 break; 844 845 DRMLISTDEL(&bo_gem->head); 846 847 drm_intel_gem_bo_free(&bo_gem->bo); 848 } 849 } 850} 851 852static void 853drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 854{ 855 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 856 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 857 struct drm_intel_gem_bo_bucket *bucket; 858 uint32_t tiling_mode; 859 int i; 860 861 /* Unreference all the target buffers */ 862 for (i = 0; i < bo_gem->reloc_count; i++) { 863 if (bo_gem->reloc_target_info[i].bo != bo) { 864 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 865 reloc_target_info[i].bo, 866 time); 867 } 868 } 869 bo_gem->reloc_count = 0; 870 bo_gem->used_as_reloc_target = 0; 871 872 DBG("bo_unreference final: %d (%s)\n", 873 bo_gem->gem_handle, bo_gem->name); 874 875 /* release memory associated with this object */ 876 if (bo_gem->reloc_target_info) { 877 free(bo_gem->reloc_target_info); 878 bo_gem->reloc_target_info = NULL; 879 } 880 if (bo_gem->relocs) { 881 free(bo_gem->relocs); 882 bo_gem->relocs = NULL; 883 } 884 885 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 886 /* Put the buffer into our internal cache for reuse if we can. */ 887 tiling_mode = I915_TILING_NONE; 888 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 889 drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 && 890 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 891 I915_MADV_DONTNEED)) { 892 bo_gem->free_time = time; 893 894 bo_gem->name = NULL; 895 bo_gem->validate_index = -1; 896 897 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 898 899 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time); 900 } else { 901 drm_intel_gem_bo_free(bo); 902 } 903} 904 905static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 906 time_t time) 907{ 908 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 909 910 assert(atomic_read(&bo_gem->refcount) > 0); 911 if (atomic_dec_and_test(&bo_gem->refcount)) 912 drm_intel_gem_bo_unreference_final(bo, time); 913} 914 915static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 916{ 917 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 918 919 assert(atomic_read(&bo_gem->refcount) > 0); 920 if (atomic_dec_and_test(&bo_gem->refcount)) { 921 drm_intel_bufmgr_gem *bufmgr_gem = 922 (drm_intel_bufmgr_gem *) bo->bufmgr; 923 struct timespec time; 924 925 clock_gettime(CLOCK_MONOTONIC, &time); 926 927 pthread_mutex_lock(&bufmgr_gem->lock); 928 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 929 pthread_mutex_unlock(&bufmgr_gem->lock); 930 } 931} 932 933static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 934{ 935 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 936 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 937 struct drm_i915_gem_set_domain set_domain; 938 int ret; 939 940 pthread_mutex_lock(&bufmgr_gem->lock); 941 942 /* Allow recursive mapping. Mesa may recursively map buffers with 943 * nested display loops. 944 */ 945 if (!bo_gem->mem_virtual) { 946 struct drm_i915_gem_mmap mmap_arg; 947 948 DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 949 950 memset(&mmap_arg, 0, sizeof(mmap_arg)); 951 mmap_arg.handle = bo_gem->gem_handle; 952 mmap_arg.offset = 0; 953 mmap_arg.size = bo->size; 954 do { 955 ret = ioctl(bufmgr_gem->fd, 956 DRM_IOCTL_I915_GEM_MMAP, 957 &mmap_arg); 958 } while (ret == -1 && errno == EINTR); 959 if (ret != 0) { 960 ret = -errno; 961 fprintf(stderr, 962 "%s:%d: Error mapping buffer %d (%s): %s .\n", 963 __FILE__, __LINE__, bo_gem->gem_handle, 964 bo_gem->name, strerror(errno)); 965 pthread_mutex_unlock(&bufmgr_gem->lock); 966 return ret; 967 } 968 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 969 } 970 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 971 bo_gem->mem_virtual); 972 bo->virtual = bo_gem->mem_virtual; 973 974 set_domain.handle = bo_gem->gem_handle; 975 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 976 if (write_enable) 977 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 978 else 979 set_domain.write_domain = 0; 980 do { 981 ret = ioctl(bufmgr_gem->fd, 982 DRM_IOCTL_I915_GEM_SET_DOMAIN, 983 &set_domain); 984 } while (ret == -1 && errno == EINTR); 985 if (ret != 0) { 986 ret = -errno; 987 fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n", 988 __FILE__, __LINE__, bo_gem->gem_handle, 989 strerror(errno)); 990 pthread_mutex_unlock(&bufmgr_gem->lock); 991 return ret; 992 } 993 994 pthread_mutex_unlock(&bufmgr_gem->lock); 995 996 return 0; 997} 998 999int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1000{ 1001 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1002 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1003 struct drm_i915_gem_set_domain set_domain; 1004 int ret; 1005 1006 pthread_mutex_lock(&bufmgr_gem->lock); 1007 1008 /* Get a mapping of the buffer if we haven't before. */ 1009 if (bo_gem->gtt_virtual == NULL) { 1010 struct drm_i915_gem_mmap_gtt mmap_arg; 1011 1012 DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 1013 bo_gem->name); 1014 1015 memset(&mmap_arg, 0, sizeof(mmap_arg)); 1016 mmap_arg.handle = bo_gem->gem_handle; 1017 1018 /* Get the fake offset back... */ 1019 do { 1020 ret = ioctl(bufmgr_gem->fd, 1021 DRM_IOCTL_I915_GEM_MMAP_GTT, 1022 &mmap_arg); 1023 } while (ret == -1 && errno == EINTR); 1024 if (ret != 0) { 1025 ret = -errno; 1026 fprintf(stderr, 1027 "%s:%d: Error preparing buffer map %d (%s): %s .\n", 1028 __FILE__, __LINE__, 1029 bo_gem->gem_handle, bo_gem->name, 1030 strerror(errno)); 1031 pthread_mutex_unlock(&bufmgr_gem->lock); 1032 return ret; 1033 } 1034 1035 /* and mmap it */ 1036 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 1037 MAP_SHARED, bufmgr_gem->fd, 1038 mmap_arg.offset); 1039 if (bo_gem->gtt_virtual == MAP_FAILED) { 1040 bo_gem->gtt_virtual = NULL; 1041 ret = -errno; 1042 fprintf(stderr, 1043 "%s:%d: Error mapping buffer %d (%s): %s .\n", 1044 __FILE__, __LINE__, 1045 bo_gem->gem_handle, bo_gem->name, 1046 strerror(errno)); 1047 pthread_mutex_unlock(&bufmgr_gem->lock); 1048 return ret; 1049 } 1050 } 1051 1052 bo->virtual = bo_gem->gtt_virtual; 1053 1054 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1055 bo_gem->gtt_virtual); 1056 1057 /* Now move it to the GTT domain so that the CPU caches are flushed */ 1058 set_domain.handle = bo_gem->gem_handle; 1059 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1060 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1061 do { 1062 ret = ioctl(bufmgr_gem->fd, 1063 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1064 &set_domain); 1065 } while (ret == -1 && errno == EINTR); 1066 1067 if (ret != 0) { 1068 ret = -errno; 1069 fprintf(stderr, "%s:%d: Error setting domain %d: %s\n", 1070 __FILE__, __LINE__, bo_gem->gem_handle, 1071 strerror(errno)); 1072 } 1073 1074 pthread_mutex_unlock(&bufmgr_gem->lock); 1075 1076 return ret; 1077} 1078 1079int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1080{ 1081 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1082 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1083 int ret = 0; 1084 1085 if (bo == NULL) 1086 return 0; 1087 1088 assert(bo_gem->gtt_virtual != NULL); 1089 1090 pthread_mutex_lock(&bufmgr_gem->lock); 1091 bo->virtual = NULL; 1092 pthread_mutex_unlock(&bufmgr_gem->lock); 1093 1094 return ret; 1095} 1096 1097static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1098{ 1099 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1100 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1101 struct drm_i915_gem_sw_finish sw_finish; 1102 int ret; 1103 1104 if (bo == NULL) 1105 return 0; 1106 1107 assert(bo_gem->mem_virtual != NULL); 1108 1109 pthread_mutex_lock(&bufmgr_gem->lock); 1110 1111 /* Cause a flush to happen if the buffer's pinned for scanout, so the 1112 * results show up in a timely manner. 1113 */ 1114 sw_finish.handle = bo_gem->gem_handle; 1115 do { 1116 ret = ioctl(bufmgr_gem->fd, 1117 DRM_IOCTL_I915_GEM_SW_FINISH, 1118 &sw_finish); 1119 } while (ret == -1 && errno == EINTR); 1120 ret = ret == -1 ? -errno : 0; 1121 1122 bo->virtual = NULL; 1123 pthread_mutex_unlock(&bufmgr_gem->lock); 1124 1125 return ret; 1126} 1127 1128static int 1129drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1130 unsigned long size, const void *data) 1131{ 1132 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1133 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1134 struct drm_i915_gem_pwrite pwrite; 1135 int ret; 1136 1137 memset(&pwrite, 0, sizeof(pwrite)); 1138 pwrite.handle = bo_gem->gem_handle; 1139 pwrite.offset = offset; 1140 pwrite.size = size; 1141 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1142 do { 1143 ret = ioctl(bufmgr_gem->fd, 1144 DRM_IOCTL_I915_GEM_PWRITE, 1145 &pwrite); 1146 } while (ret == -1 && errno == EINTR); 1147 if (ret != 0) { 1148 ret = -errno; 1149 fprintf(stderr, 1150 "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1151 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1152 (int)size, strerror(errno)); 1153 } 1154 1155 return ret; 1156} 1157 1158static int 1159drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1160{ 1161 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1162 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1163 int ret; 1164 1165 get_pipe_from_crtc_id.crtc_id = crtc_id; 1166 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1167 &get_pipe_from_crtc_id); 1168 if (ret != 0) { 1169 /* We return -1 here to signal that we don't 1170 * know which pipe is associated with this crtc. 1171 * This lets the caller know that this information 1172 * isn't available; using the wrong pipe for 1173 * vblank waiting can cause the chipset to lock up 1174 */ 1175 return -1; 1176 } 1177 1178 return get_pipe_from_crtc_id.pipe; 1179} 1180 1181static int 1182drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1183 unsigned long size, void *data) 1184{ 1185 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1186 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1187 struct drm_i915_gem_pread pread; 1188 int ret; 1189 1190 memset(&pread, 0, sizeof(pread)); 1191 pread.handle = bo_gem->gem_handle; 1192 pread.offset = offset; 1193 pread.size = size; 1194 pread.data_ptr = (uint64_t) (uintptr_t) data; 1195 do { 1196 ret = ioctl(bufmgr_gem->fd, 1197 DRM_IOCTL_I915_GEM_PREAD, 1198 &pread); 1199 } while (ret == -1 && errno == EINTR); 1200 if (ret != 0) { 1201 ret = -errno; 1202 fprintf(stderr, 1203 "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1204 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1205 (int)size, strerror(errno)); 1206 } 1207 1208 return ret; 1209} 1210 1211/** Waits for all GPU rendering to the object to have completed. */ 1212static void 1213drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1214{ 1215 drm_intel_gem_bo_start_gtt_access(bo, 0); 1216} 1217 1218/** 1219 * Sets the object to the GTT read and possibly write domain, used by the X 1220 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1221 * 1222 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1223 * can do tiled pixmaps this way. 1224 */ 1225void 1226drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1227{ 1228 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1229 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1230 struct drm_i915_gem_set_domain set_domain; 1231 int ret; 1232 1233 set_domain.handle = bo_gem->gem_handle; 1234 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1235 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1236 do { 1237 ret = ioctl(bufmgr_gem->fd, 1238 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1239 &set_domain); 1240 } while (ret == -1 && errno == EINTR); 1241 if (ret != 0) { 1242 fprintf(stderr, 1243 "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1244 __FILE__, __LINE__, bo_gem->gem_handle, 1245 set_domain.read_domains, set_domain.write_domain, 1246 strerror(errno)); 1247 } 1248} 1249 1250static void 1251drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1252{ 1253 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1254 int i; 1255 1256 free(bufmgr_gem->exec2_objects); 1257 free(bufmgr_gem->exec_objects); 1258 free(bufmgr_gem->exec_bos); 1259 1260 pthread_mutex_destroy(&bufmgr_gem->lock); 1261 1262 /* Free any cached buffer objects we were going to reuse */ 1263 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1264 struct drm_intel_gem_bo_bucket *bucket = 1265 &bufmgr_gem->cache_bucket[i]; 1266 drm_intel_bo_gem *bo_gem; 1267 1268 while (!DRMLISTEMPTY(&bucket->head)) { 1269 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1270 bucket->head.next, head); 1271 DRMLISTDEL(&bo_gem->head); 1272 1273 drm_intel_gem_bo_free(&bo_gem->bo); 1274 } 1275 } 1276 1277 free(bufmgr); 1278} 1279 1280/** 1281 * Adds the target buffer to the validation list and adds the relocation 1282 * to the reloc_buffer's relocation list. 1283 * 1284 * The relocation entry at the given offset must already contain the 1285 * precomputed relocation value, because the kernel will optimize out 1286 * the relocation entry write when the buffer hasn't moved from the 1287 * last known offset in target_bo. 1288 */ 1289static int 1290do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1291 drm_intel_bo *target_bo, uint32_t target_offset, 1292 uint32_t read_domains, uint32_t write_domain, 1293 int need_fence) 1294{ 1295 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1296 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1297 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1298 1299 if (bo_gem->has_error) 1300 return -ENOMEM; 1301 1302 if (target_bo_gem->has_error) { 1303 bo_gem->has_error = 1; 1304 return -ENOMEM; 1305 } 1306 1307 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1308 need_fence = 0; 1309 1310 /* We never use HW fences for rendering on 965+ */ 1311 if (bufmgr_gem->gen >= 4) 1312 need_fence = 0; 1313 1314 /* Create a new relocation list if needed */ 1315 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1316 return -ENOMEM; 1317 1318 /* Check overflow */ 1319 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1320 1321 /* Check args */ 1322 assert(offset <= bo->size - 4); 1323 assert((write_domain & (write_domain - 1)) == 0); 1324 1325 /* Make sure that we're not adding a reloc to something whose size has 1326 * already been accounted for. 1327 */ 1328 assert(!bo_gem->used_as_reloc_target); 1329 if (target_bo_gem != bo_gem) { 1330 target_bo_gem->used_as_reloc_target = 1; 1331 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1332 } 1333 /* An object needing a fence is a tiled buffer, so it won't have 1334 * relocs to other buffers. 1335 */ 1336 if (need_fence) 1337 target_bo_gem->reloc_tree_fences = 1; 1338 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1339 1340 /* Flag the target to disallow further relocations in it. */ 1341 1342 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1343 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1344 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1345 target_bo_gem->gem_handle; 1346 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1347 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1348 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 1349 1350 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1351 if (target_bo != bo) 1352 drm_intel_gem_bo_reference(target_bo); 1353 if (need_fence) 1354 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1355 DRM_INTEL_RELOC_FENCE; 1356 else 1357 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1358 1359 bo_gem->reloc_count++; 1360 1361 return 0; 1362} 1363 1364static int 1365drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1366 drm_intel_bo *target_bo, uint32_t target_offset, 1367 uint32_t read_domains, uint32_t write_domain) 1368{ 1369 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1370 1371 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1372 read_domains, write_domain, 1373 !bufmgr_gem->fenced_relocs); 1374} 1375 1376static int 1377drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1378 drm_intel_bo *target_bo, 1379 uint32_t target_offset, 1380 uint32_t read_domains, uint32_t write_domain) 1381{ 1382 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1383 read_domains, write_domain, 1); 1384} 1385 1386/** 1387 * Walk the tree of relocations rooted at BO and accumulate the list of 1388 * validations to be performed and update the relocation buffers with 1389 * index values into the validation list. 1390 */ 1391static void 1392drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1393{ 1394 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1395 int i; 1396 1397 if (bo_gem->relocs == NULL) 1398 return; 1399 1400 for (i = 0; i < bo_gem->reloc_count; i++) { 1401 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1402 1403 if (target_bo == bo) 1404 continue; 1405 1406 /* Continue walking the tree depth-first. */ 1407 drm_intel_gem_bo_process_reloc(target_bo); 1408 1409 /* Add the target to the validate list */ 1410 drm_intel_add_validate_buffer(target_bo); 1411 } 1412} 1413 1414static void 1415drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 1416{ 1417 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1418 int i; 1419 1420 if (bo_gem->relocs == NULL) 1421 return; 1422 1423 for (i = 0; i < bo_gem->reloc_count; i++) { 1424 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1425 int need_fence; 1426 1427 if (target_bo == bo) 1428 continue; 1429 1430 /* Continue walking the tree depth-first. */ 1431 drm_intel_gem_bo_process_reloc2(target_bo); 1432 1433 need_fence = (bo_gem->reloc_target_info[i].flags & 1434 DRM_INTEL_RELOC_FENCE); 1435 1436 /* Add the target to the validate list */ 1437 drm_intel_add_validate_buffer2(target_bo, need_fence); 1438 } 1439} 1440 1441 1442static void 1443drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1444{ 1445 int i; 1446 1447 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1448 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1449 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1450 1451 /* Update the buffer offset */ 1452 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1453 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1454 bo_gem->gem_handle, bo_gem->name, bo->offset, 1455 (unsigned long long)bufmgr_gem->exec_objects[i]. 1456 offset); 1457 bo->offset = bufmgr_gem->exec_objects[i].offset; 1458 } 1459 } 1460} 1461 1462static void 1463drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 1464{ 1465 int i; 1466 1467 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1468 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1469 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1470 1471 /* Update the buffer offset */ 1472 if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 1473 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1474 bo_gem->gem_handle, bo_gem->name, bo->offset, 1475 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 1476 bo->offset = bufmgr_gem->exec2_objects[i].offset; 1477 } 1478 } 1479} 1480 1481static int 1482drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 1483 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 1484{ 1485 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1486 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1487 struct drm_i915_gem_execbuffer execbuf; 1488 int ret, i; 1489 1490 if (bo_gem->has_error) 1491 return -ENOMEM; 1492 1493 pthread_mutex_lock(&bufmgr_gem->lock); 1494 /* Update indices and set up the validate list. */ 1495 drm_intel_gem_bo_process_reloc(bo); 1496 1497 /* Add the batch buffer to the validation list. There are no 1498 * relocations pointing to it. 1499 */ 1500 drm_intel_add_validate_buffer(bo); 1501 1502 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 1503 execbuf.buffer_count = bufmgr_gem->exec_count; 1504 execbuf.batch_start_offset = 0; 1505 execbuf.batch_len = used; 1506 execbuf.cliprects_ptr = (uintptr_t) cliprects; 1507 execbuf.num_cliprects = num_cliprects; 1508 execbuf.DR1 = 0; 1509 execbuf.DR4 = DR4; 1510 1511 do { 1512 ret = ioctl(bufmgr_gem->fd, 1513 DRM_IOCTL_I915_GEM_EXECBUFFER, 1514 &execbuf); 1515 } while (ret != 0 && errno == EINTR); 1516 1517 if (ret != 0) { 1518 ret = -errno; 1519 if (errno == ENOSPC) { 1520 fprintf(stderr, 1521 "Execbuffer fails to pin. " 1522 "Estimate: %u. Actual: %u. Available: %u\n", 1523 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1524 bufmgr_gem-> 1525 exec_count), 1526 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1527 bufmgr_gem-> 1528 exec_count), 1529 (unsigned int)bufmgr_gem->gtt_size); 1530 } 1531 } 1532 drm_intel_update_buffer_offsets(bufmgr_gem); 1533 1534 if (bufmgr_gem->bufmgr.debug) 1535 drm_intel_gem_dump_validation_list(bufmgr_gem); 1536 1537 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1538 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1539 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1540 1541 /* Disconnect the buffer from the validate list */ 1542 bo_gem->validate_index = -1; 1543 bufmgr_gem->exec_bos[i] = NULL; 1544 } 1545 bufmgr_gem->exec_count = 0; 1546 pthread_mutex_unlock(&bufmgr_gem->lock); 1547 1548 return ret; 1549} 1550 1551static int 1552drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 1553 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 1554 int ring_flag) 1555{ 1556 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1557 struct drm_i915_gem_execbuffer2 execbuf; 1558 int ret, i; 1559 1560 if ((ring_flag != I915_EXEC_RENDER) && (ring_flag != I915_EXEC_BSD)) 1561 return -EINVAL; 1562 1563 pthread_mutex_lock(&bufmgr_gem->lock); 1564 /* Update indices and set up the validate list. */ 1565 drm_intel_gem_bo_process_reloc2(bo); 1566 1567 /* Add the batch buffer to the validation list. There are no relocations 1568 * pointing to it. 1569 */ 1570 drm_intel_add_validate_buffer2(bo, 0); 1571 1572 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 1573 execbuf.buffer_count = bufmgr_gem->exec_count; 1574 execbuf.batch_start_offset = 0; 1575 execbuf.batch_len = used; 1576 execbuf.cliprects_ptr = (uintptr_t)cliprects; 1577 execbuf.num_cliprects = num_cliprects; 1578 execbuf.DR1 = 0; 1579 execbuf.DR4 = DR4; 1580 execbuf.flags = ring_flag; 1581 execbuf.rsvd1 = 0; 1582 execbuf.rsvd2 = 0; 1583 1584 do { 1585 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, 1586 &execbuf); 1587 } while (ret != 0 && errno == EINTR); 1588 1589 if (ret != 0) { 1590 ret = -errno; 1591 if (ret == -ENOMEM) { 1592 fprintf(stderr, 1593 "Execbuffer fails to pin. " 1594 "Estimate: %u. Actual: %u. Available: %u\n", 1595 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1596 bufmgr_gem->exec_count), 1597 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1598 bufmgr_gem->exec_count), 1599 (unsigned int) bufmgr_gem->gtt_size); 1600 } 1601 } 1602 drm_intel_update_buffer_offsets2(bufmgr_gem); 1603 1604 if (bufmgr_gem->bufmgr.debug) 1605 drm_intel_gem_dump_validation_list(bufmgr_gem); 1606 1607 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1608 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1609 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1610 1611 /* Disconnect the buffer from the validate list */ 1612 bo_gem->validate_index = -1; 1613 bufmgr_gem->exec_bos[i] = NULL; 1614 } 1615 bufmgr_gem->exec_count = 0; 1616 pthread_mutex_unlock(&bufmgr_gem->lock); 1617 1618 return ret; 1619} 1620 1621static int 1622drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 1623 drm_clip_rect_t *cliprects, int num_cliprects, 1624 int DR4) 1625{ 1626 return drm_intel_gem_bo_mrb_exec2(bo, used, 1627 cliprects, num_cliprects, DR4, 1628 I915_EXEC_RENDER); 1629} 1630 1631static int 1632drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 1633{ 1634 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1635 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1636 struct drm_i915_gem_pin pin; 1637 int ret; 1638 1639 memset(&pin, 0, sizeof(pin)); 1640 pin.handle = bo_gem->gem_handle; 1641 pin.alignment = alignment; 1642 1643 do { 1644 ret = ioctl(bufmgr_gem->fd, 1645 DRM_IOCTL_I915_GEM_PIN, 1646 &pin); 1647 } while (ret == -1 && errno == EINTR); 1648 1649 if (ret != 0) 1650 return -errno; 1651 1652 bo->offset = pin.offset; 1653 return 0; 1654} 1655 1656static int 1657drm_intel_gem_bo_unpin(drm_intel_bo *bo) 1658{ 1659 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1660 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1661 struct drm_i915_gem_unpin unpin; 1662 int ret; 1663 1664 memset(&unpin, 0, sizeof(unpin)); 1665 unpin.handle = bo_gem->gem_handle; 1666 1667 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 1668 if (ret != 0) 1669 return -errno; 1670 1671 return 0; 1672} 1673 1674static int 1675drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1676 uint32_t stride) 1677{ 1678 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1679 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1680 struct drm_i915_gem_set_tiling set_tiling; 1681 int ret; 1682 1683 if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) 1684 return 0; 1685 1686 memset(&set_tiling, 0, sizeof(set_tiling)); 1687 set_tiling.handle = bo_gem->gem_handle; 1688 1689 do { 1690 set_tiling.tiling_mode = *tiling_mode; 1691 set_tiling.stride = stride; 1692 1693 ret = ioctl(bufmgr_gem->fd, 1694 DRM_IOCTL_I915_GEM_SET_TILING, 1695 &set_tiling); 1696 } while (ret == -1 && errno == EINTR); 1697 if (ret == 0) { 1698 bo_gem->tiling_mode = set_tiling.tiling_mode; 1699 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1700 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 1701 } else 1702 ret = -errno; 1703 1704 *tiling_mode = bo_gem->tiling_mode; 1705 return ret; 1706} 1707 1708static int 1709drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1710 uint32_t * swizzle_mode) 1711{ 1712 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1713 1714 *tiling_mode = bo_gem->tiling_mode; 1715 *swizzle_mode = bo_gem->swizzle_mode; 1716 return 0; 1717} 1718 1719static int 1720drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 1721{ 1722 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1723 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1724 struct drm_gem_flink flink; 1725 int ret; 1726 1727 if (!bo_gem->global_name) { 1728 memset(&flink, 0, sizeof(flink)); 1729 flink.handle = bo_gem->gem_handle; 1730 1731 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 1732 if (ret != 0) 1733 return -errno; 1734 bo_gem->global_name = flink.name; 1735 bo_gem->reusable = 0; 1736 } 1737 1738 *name = bo_gem->global_name; 1739 return 0; 1740} 1741 1742/** 1743 * Enables unlimited caching of buffer objects for reuse. 1744 * 1745 * This is potentially very memory expensive, as the cache at each bucket 1746 * size is only bounded by how many buffers of that size we've managed to have 1747 * in flight at once. 1748 */ 1749void 1750drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 1751{ 1752 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1753 1754 bufmgr_gem->bo_reuse = 1; 1755} 1756 1757/** 1758 * Enable use of fenced reloc type. 1759 * 1760 * New code should enable this to avoid unnecessary fence register 1761 * allocation. If this option is not enabled, all relocs will have fence 1762 * register allocated. 1763 */ 1764void 1765drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 1766{ 1767 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 1768 1769 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 1770 bufmgr_gem->fenced_relocs = 1; 1771} 1772 1773/** 1774 * Return the additional aperture space required by the tree of buffer objects 1775 * rooted at bo. 1776 */ 1777static int 1778drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 1779{ 1780 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1781 int i; 1782 int total = 0; 1783 1784 if (bo == NULL || bo_gem->included_in_check_aperture) 1785 return 0; 1786 1787 total += bo->size; 1788 bo_gem->included_in_check_aperture = 1; 1789 1790 for (i = 0; i < bo_gem->reloc_count; i++) 1791 total += 1792 drm_intel_gem_bo_get_aperture_space(bo_gem-> 1793 reloc_target_info[i].bo); 1794 1795 return total; 1796} 1797 1798/** 1799 * Count the number of buffers in this list that need a fence reg 1800 * 1801 * If the count is greater than the number of available regs, we'll have 1802 * to ask the caller to resubmit a batch with fewer tiled buffers. 1803 * 1804 * This function over-counts if the same buffer is used multiple times. 1805 */ 1806static unsigned int 1807drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 1808{ 1809 int i; 1810 unsigned int total = 0; 1811 1812 for (i = 0; i < count; i++) { 1813 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1814 1815 if (bo_gem == NULL) 1816 continue; 1817 1818 total += bo_gem->reloc_tree_fences; 1819 } 1820 return total; 1821} 1822 1823/** 1824 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 1825 * for the next drm_intel_bufmgr_check_aperture_space() call. 1826 */ 1827static void 1828drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 1829{ 1830 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1831 int i; 1832 1833 if (bo == NULL || !bo_gem->included_in_check_aperture) 1834 return; 1835 1836 bo_gem->included_in_check_aperture = 0; 1837 1838 for (i = 0; i < bo_gem->reloc_count; i++) 1839 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 1840 reloc_target_info[i].bo); 1841} 1842 1843/** 1844 * Return a conservative estimate for the amount of aperture required 1845 * for a collection of buffers. This may double-count some buffers. 1846 */ 1847static unsigned int 1848drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 1849{ 1850 int i; 1851 unsigned int total = 0; 1852 1853 for (i = 0; i < count; i++) { 1854 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1855 if (bo_gem != NULL) 1856 total += bo_gem->reloc_tree_size; 1857 } 1858 return total; 1859} 1860 1861/** 1862 * Return the amount of aperture needed for a collection of buffers. 1863 * This avoids double counting any buffers, at the cost of looking 1864 * at every buffer in the set. 1865 */ 1866static unsigned int 1867drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 1868{ 1869 int i; 1870 unsigned int total = 0; 1871 1872 for (i = 0; i < count; i++) { 1873 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 1874 /* For the first buffer object in the array, we get an 1875 * accurate count back for its reloc_tree size (since nothing 1876 * had been flagged as being counted yet). We can save that 1877 * value out as a more conservative reloc_tree_size that 1878 * avoids double-counting target buffers. Since the first 1879 * buffer happens to usually be the batch buffer in our 1880 * callers, this can pull us back from doing the tree 1881 * walk on every new batch emit. 1882 */ 1883 if (i == 0) { 1884 drm_intel_bo_gem *bo_gem = 1885 (drm_intel_bo_gem *) bo_array[i]; 1886 bo_gem->reloc_tree_size = total; 1887 } 1888 } 1889 1890 for (i = 0; i < count; i++) 1891 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 1892 return total; 1893} 1894 1895/** 1896 * Return -1 if the batchbuffer should be flushed before attempting to 1897 * emit rendering referencing the buffers pointed to by bo_array. 1898 * 1899 * This is required because if we try to emit a batchbuffer with relocations 1900 * to a tree of buffers that won't simultaneously fit in the aperture, 1901 * the rendering will return an error at a point where the software is not 1902 * prepared to recover from it. 1903 * 1904 * However, we also want to emit the batchbuffer significantly before we reach 1905 * the limit, as a series of batchbuffers each of which references buffers 1906 * covering almost all of the aperture means that at each emit we end up 1907 * waiting to evict a buffer from the last rendering, and we get synchronous 1908 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 1909 * get better parallelism. 1910 */ 1911static int 1912drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 1913{ 1914 drm_intel_bufmgr_gem *bufmgr_gem = 1915 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 1916 unsigned int total = 0; 1917 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 1918 int total_fences; 1919 1920 /* Check for fence reg constraints if necessary */ 1921 if (bufmgr_gem->available_fences) { 1922 total_fences = drm_intel_gem_total_fences(bo_array, count); 1923 if (total_fences > bufmgr_gem->available_fences) 1924 return -ENOSPC; 1925 } 1926 1927 total = drm_intel_gem_estimate_batch_space(bo_array, count); 1928 1929 if (total > threshold) 1930 total = drm_intel_gem_compute_batch_space(bo_array, count); 1931 1932 if (total > threshold) { 1933 DBG("check_space: overflowed available aperture, " 1934 "%dkb vs %dkb\n", 1935 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 1936 return -ENOSPC; 1937 } else { 1938 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 1939 (int)bufmgr_gem->gtt_size / 1024); 1940 return 0; 1941 } 1942} 1943 1944/* 1945 * Disable buffer reuse for objects which are shared with the kernel 1946 * as scanout buffers 1947 */ 1948static int 1949drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 1950{ 1951 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1952 1953 bo_gem->reusable = 0; 1954 return 0; 1955} 1956 1957static int 1958drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 1959{ 1960 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1961 1962 return bo_gem->reusable; 1963} 1964 1965static int 1966_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 1967{ 1968 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1969 int i; 1970 1971 for (i = 0; i < bo_gem->reloc_count; i++) { 1972 if (bo_gem->reloc_target_info[i].bo == target_bo) 1973 return 1; 1974 if (bo == bo_gem->reloc_target_info[i].bo) 1975 continue; 1976 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 1977 target_bo)) 1978 return 1; 1979 } 1980 1981 return 0; 1982} 1983 1984/** Return true if target_bo is referenced by bo's relocation tree. */ 1985static int 1986drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 1987{ 1988 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1989 1990 if (bo == NULL || target_bo == NULL) 1991 return 0; 1992 if (target_bo_gem->used_as_reloc_target) 1993 return _drm_intel_gem_bo_references(bo, target_bo); 1994 return 0; 1995} 1996 1997static void 1998add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 1999{ 2000 unsigned int i = bufmgr_gem->num_buckets; 2001 2002 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2003 2004 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2005 bufmgr_gem->cache_bucket[i].size = size; 2006 bufmgr_gem->num_buckets++; 2007} 2008 2009static void 2010init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2011{ 2012 unsigned long size, cache_max_size = 64 * 1024 * 1024; 2013 2014 /* OK, so power of two buckets was too wasteful of memory. 2015 * Give 3 other sizes between each power of two, to hopefully 2016 * cover things accurately enough. (The alternative is 2017 * probably to just go for exact matching of sizes, and assume 2018 * that for things like composited window resize the tiled 2019 * width/height alignment and rounding of sizes to pages will 2020 * get us useful cache hit rates anyway) 2021 */ 2022 add_bucket(bufmgr_gem, 4096); 2023 add_bucket(bufmgr_gem, 4096 * 2); 2024 add_bucket(bufmgr_gem, 4096 * 3); 2025 2026 /* Initialize the linked lists for BO reuse cache. */ 2027 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2028 add_bucket(bufmgr_gem, size); 2029 2030 add_bucket(bufmgr_gem, size + size * 1 / 4); 2031 add_bucket(bufmgr_gem, size + size * 2 / 4); 2032 add_bucket(bufmgr_gem, size + size * 3 / 4); 2033 } 2034} 2035 2036/** 2037 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 2038 * and manage map buffer objections. 2039 * 2040 * \param fd File descriptor of the opened DRM device. 2041 */ 2042drm_intel_bufmgr * 2043drm_intel_bufmgr_gem_init(int fd, int batch_size) 2044{ 2045 drm_intel_bufmgr_gem *bufmgr_gem; 2046 struct drm_i915_gem_get_aperture aperture; 2047 drm_i915_getparam_t gp; 2048 int ret; 2049 int exec2 = 0, has_bsd = 0; 2050 2051 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 2052 if (bufmgr_gem == NULL) 2053 return NULL; 2054 2055 bufmgr_gem->fd = fd; 2056 2057 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 2058 free(bufmgr_gem); 2059 return NULL; 2060 } 2061 2062 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 2063 2064 if (ret == 0) 2065 bufmgr_gem->gtt_size = aperture.aper_available_size; 2066 else { 2067 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 2068 strerror(errno)); 2069 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 2070 fprintf(stderr, "Assuming %dkB available aperture size.\n" 2071 "May lead to reduced performance or incorrect " 2072 "rendering.\n", 2073 (int)bufmgr_gem->gtt_size / 1024); 2074 } 2075 2076 gp.param = I915_PARAM_CHIPSET_ID; 2077 gp.value = &bufmgr_gem->pci_device; 2078 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2079 if (ret) { 2080 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 2081 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 2082 } 2083 2084 if (IS_GEN2(bufmgr_gem)) 2085 bufmgr_gem->gen = 2; 2086 else if (IS_GEN3(bufmgr_gem)) 2087 bufmgr_gem->gen = 3; 2088 else if (IS_GEN4(bufmgr_gem)) 2089 bufmgr_gem->gen = 4; 2090 else 2091 bufmgr_gem->gen = 6; 2092 2093 gp.param = I915_PARAM_HAS_EXECBUF2; 2094 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2095 if (!ret) 2096 exec2 = 1; 2097 2098 gp.param = I915_PARAM_HAS_BSD; 2099 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2100 if (!ret) 2101 has_bsd = 1; 2102 2103 if (bufmgr_gem->gen < 4) { 2104 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 2105 gp.value = &bufmgr_gem->available_fences; 2106 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2107 if (ret) { 2108 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 2109 errno); 2110 fprintf(stderr, "param: %d, val: %d\n", gp.param, 2111 *gp.value); 2112 bufmgr_gem->available_fences = 0; 2113 } else { 2114 /* XXX The kernel reports the total number of fences, 2115 * including any that may be pinned. 2116 * 2117 * We presume that there will be at least one pinned 2118 * fence for the scanout buffer, but there may be more 2119 * than one scanout and the user may be manually 2120 * pinning buffers. Let's move to execbuffer2 and 2121 * thereby forget the insanity of using fences... 2122 */ 2123 bufmgr_gem->available_fences -= 2; 2124 if (bufmgr_gem->available_fences < 0) 2125 bufmgr_gem->available_fences = 0; 2126 } 2127 } 2128 2129 /* Let's go with one relocation per every 2 dwords (but round down a bit 2130 * since a power of two will mean an extra page allocation for the reloc 2131 * buffer). 2132 * 2133 * Every 4 was too few for the blender benchmark. 2134 */ 2135 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 2136 2137 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 2138 bufmgr_gem->bufmgr.bo_alloc_for_render = 2139 drm_intel_gem_bo_alloc_for_render; 2140 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 2141 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 2142 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 2143 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 2144 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 2145 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 2146 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 2147 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 2148 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 2149 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 2150 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 2151 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 2152 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 2153 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 2154 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 2155 /* Use the new one if available */ 2156 if (exec2) { 2157 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 2158 if (has_bsd) 2159 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 2160 } else 2161 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 2162 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 2163 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 2164 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 2165 bufmgr_gem->bufmgr.debug = 0; 2166 bufmgr_gem->bufmgr.check_aperture_space = 2167 drm_intel_gem_check_aperture_space; 2168 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 2169 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 2170 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 2171 drm_intel_gem_get_pipe_from_crtc_id; 2172 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 2173 2174 init_cache_buckets(bufmgr_gem); 2175 2176 return &bufmgr_gem->bufmgr; 2177} 2178