intel_bufmgr_gem.c revision 22944501
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <sys/ioctl.h> 51#include <sys/mman.h> 52#include <sys/stat.h> 53#include <sys/types.h> 54 55#include "errno.h" 56#include "libdrm_lists.h" 57#include "intel_bufmgr.h" 58#include "intel_bufmgr_priv.h" 59#include "intel_chipset.h" 60#include "string.h" 61 62#include "i915_drm.h" 63 64#define DBG(...) do { \ 65 if (bufmgr_gem->bufmgr.debug) \ 66 fprintf(stderr, __VA_ARGS__); \ 67} while (0) 68 69typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 70 71struct drm_intel_gem_bo_bucket { 72 drmMMListHead head; 73 unsigned long size; 74}; 75 76/* Only cache objects up to 64MB. Bigger than that, and the rounding of the 77 * size makes many operations fail that wouldn't otherwise. 78 */ 79#define DRM_INTEL_GEM_BO_BUCKETS 14 80typedef struct _drm_intel_bufmgr_gem { 81 drm_intel_bufmgr bufmgr; 82 83 int fd; 84 85 int max_relocs; 86 87 pthread_mutex_t lock; 88 89 struct drm_i915_gem_exec_object *exec_objects; 90 struct drm_i915_gem_exec_object2 *exec2_objects; 91 drm_intel_bo **exec_bos; 92 int exec_size; 93 int exec_count; 94 95 /** Array of lists of cached gem objects of power-of-two sizes */ 96 struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS]; 97 98 uint64_t gtt_size; 99 int available_fences; 100 int pci_device; 101 int gen; 102 char bo_reuse; 103 char fenced_relocs; 104} drm_intel_bufmgr_gem; 105 106#define DRM_INTEL_RELOC_FENCE (1<<0) 107 108typedef struct _drm_intel_reloc_target_info { 109 drm_intel_bo *bo; 110 int flags; 111} drm_intel_reloc_target; 112 113struct _drm_intel_bo_gem { 114 drm_intel_bo bo; 115 116 atomic_t refcount; 117 uint32_t gem_handle; 118 const char *name; 119 120 /** 121 * Kenel-assigned global name for this object 122 */ 123 unsigned int global_name; 124 125 /** 126 * Index of the buffer within the validation list while preparing a 127 * batchbuffer execution. 128 */ 129 int validate_index; 130 131 /** 132 * Current tiling mode 133 */ 134 uint32_t tiling_mode; 135 uint32_t swizzle_mode; 136 137 time_t free_time; 138 139 /** Array passed to the DRM containing relocation information. */ 140 struct drm_i915_gem_relocation_entry *relocs; 141 /** 142 * Array of info structs corresponding to relocs[i].target_handle etc 143 */ 144 drm_intel_reloc_target *reloc_target_info; 145 /** Number of entries in relocs */ 146 int reloc_count; 147 /** Mapped address for the buffer, saved across map/unmap cycles */ 148 void *mem_virtual; 149 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 150 void *gtt_virtual; 151 152 /** BO cache list */ 153 drmMMListHead head; 154 155 /** 156 * Boolean of whether this BO and its children have been included in 157 * the current drm_intel_bufmgr_check_aperture_space() total. 158 */ 159 char included_in_check_aperture; 160 161 /** 162 * Boolean of whether this buffer has been used as a relocation 163 * target and had its size accounted for, and thus can't have any 164 * further relocations added to it. 165 */ 166 char used_as_reloc_target; 167 168 /** 169 * Boolean of whether we have encountered an error whilst building the relocation tree. 170 */ 171 char has_error; 172 173 /** 174 * Boolean of whether this buffer can be re-used 175 */ 176 char reusable; 177 178 /** 179 * Size in bytes of this buffer and its relocation descendents. 180 * 181 * Used to avoid costly tree walking in 182 * drm_intel_bufmgr_check_aperture in the common case. 183 */ 184 int reloc_tree_size; 185 186 /** 187 * Number of potential fence registers required by this buffer and its 188 * relocations. 189 */ 190 int reloc_tree_fences; 191}; 192 193static unsigned int 194drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 195 196static unsigned int 197drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 198 199static int 200drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 201 uint32_t * swizzle_mode); 202 203static int 204drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 205 uint32_t stride); 206 207static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 208 time_t time); 209 210static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 211 212static void drm_intel_gem_bo_free(drm_intel_bo *bo); 213 214static unsigned long 215drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 216 uint32_t *tiling_mode) 217{ 218 unsigned long min_size, max_size; 219 unsigned long i; 220 221 if (*tiling_mode == I915_TILING_NONE) 222 return size; 223 224 /* 965+ just need multiples of page size for tiling */ 225 if (bufmgr_gem->gen >= 4) 226 return ROUND_UP_TO(size, 4096); 227 228 /* Older chips need powers of two, of at least 512k or 1M */ 229 if (bufmgr_gem->gen == 3) { 230 min_size = 1024*1024; 231 max_size = 128*1024*1024; 232 } else { 233 min_size = 512*1024; 234 max_size = 64*1024*1024; 235 } 236 237 if (size > max_size) { 238 *tiling_mode = I915_TILING_NONE; 239 return size; 240 } 241 242 for (i = min_size; i < size; i <<= 1) 243 ; 244 245 return i; 246} 247 248/* 249 * Round a given pitch up to the minimum required for X tiling on a 250 * given chip. We use 512 as the minimum to allow for a later tiling 251 * change. 252 */ 253static unsigned long 254drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 255 unsigned long pitch, uint32_t tiling_mode) 256{ 257 unsigned long tile_width; 258 unsigned long i; 259 260 /* If untiled, then just align it so that we can do rendering 261 * to it with the 3D engine. 262 */ 263 if (tiling_mode == I915_TILING_NONE) 264 return ALIGN(pitch, 64); 265 266 if (tiling_mode == I915_TILING_X) 267 tile_width = 512; 268 else 269 tile_width = 128; 270 271 /* 965 is flexible */ 272 if (bufmgr_gem->gen >= 4) 273 return ROUND_UP_TO(pitch, tile_width); 274 275 /* Pre-965 needs power of two tile width */ 276 for (i = tile_width; i < pitch; i <<= 1) 277 ; 278 279 return i; 280} 281 282static struct drm_intel_gem_bo_bucket * 283drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 284 unsigned long size) 285{ 286 int i; 287 288 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 289 struct drm_intel_gem_bo_bucket *bucket = 290 &bufmgr_gem->cache_bucket[i]; 291 if (bucket->size >= size) { 292 return bucket; 293 } 294 } 295 296 return NULL; 297} 298 299static void 300drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 301{ 302 int i, j; 303 304 for (i = 0; i < bufmgr_gem->exec_count; i++) { 305 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 306 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 307 308 if (bo_gem->relocs == NULL) { 309 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 310 bo_gem->name); 311 continue; 312 } 313 314 for (j = 0; j < bo_gem->reloc_count; j++) { 315 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 316 drm_intel_bo_gem *target_gem = 317 (drm_intel_bo_gem *) target_bo; 318 319 DBG("%2d: %d (%s)@0x%08llx -> " 320 "%d (%s)@0x%08lx + 0x%08x\n", 321 i, 322 bo_gem->gem_handle, bo_gem->name, 323 (unsigned long long)bo_gem->relocs[j].offset, 324 target_gem->gem_handle, 325 target_gem->name, 326 target_bo->offset, 327 bo_gem->relocs[j].delta); 328 } 329 } 330} 331 332static inline void 333drm_intel_gem_bo_reference(drm_intel_bo *bo) 334{ 335 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 336 337 assert(atomic_read(&bo_gem->refcount) > 0); 338 atomic_inc(&bo_gem->refcount); 339} 340 341/** 342 * Adds the given buffer to the list of buffers to be validated (moved into the 343 * appropriate memory type) with the next batch submission. 344 * 345 * If a buffer is validated multiple times in a batch submission, it ends up 346 * with the intersection of the memory type flags and the union of the 347 * access flags. 348 */ 349static void 350drm_intel_add_validate_buffer(drm_intel_bo *bo) 351{ 352 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 353 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 354 int index; 355 356 if (bo_gem->validate_index != -1) 357 return; 358 359 /* Extend the array of validation entries as necessary. */ 360 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 361 int new_size = bufmgr_gem->exec_size * 2; 362 363 if (new_size == 0) 364 new_size = 5; 365 366 bufmgr_gem->exec_objects = 367 realloc(bufmgr_gem->exec_objects, 368 sizeof(*bufmgr_gem->exec_objects) * new_size); 369 bufmgr_gem->exec_bos = 370 realloc(bufmgr_gem->exec_bos, 371 sizeof(*bufmgr_gem->exec_bos) * new_size); 372 bufmgr_gem->exec_size = new_size; 373 } 374 375 index = bufmgr_gem->exec_count; 376 bo_gem->validate_index = index; 377 /* Fill in array entry */ 378 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 379 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 380 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 381 bufmgr_gem->exec_objects[index].alignment = 0; 382 bufmgr_gem->exec_objects[index].offset = 0; 383 bufmgr_gem->exec_bos[index] = bo; 384 bufmgr_gem->exec_count++; 385} 386 387static void 388drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 389{ 390 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 391 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 392 int index; 393 394 if (bo_gem->validate_index != -1) { 395 if (need_fence) 396 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 397 EXEC_OBJECT_NEEDS_FENCE; 398 return; 399 } 400 401 /* Extend the array of validation entries as necessary. */ 402 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 403 int new_size = bufmgr_gem->exec_size * 2; 404 405 if (new_size == 0) 406 new_size = 5; 407 408 bufmgr_gem->exec2_objects = 409 realloc(bufmgr_gem->exec2_objects, 410 sizeof(*bufmgr_gem->exec2_objects) * new_size); 411 bufmgr_gem->exec_bos = 412 realloc(bufmgr_gem->exec_bos, 413 sizeof(*bufmgr_gem->exec_bos) * new_size); 414 bufmgr_gem->exec_size = new_size; 415 } 416 417 index = bufmgr_gem->exec_count; 418 bo_gem->validate_index = index; 419 /* Fill in array entry */ 420 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 421 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 422 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 423 bufmgr_gem->exec2_objects[index].alignment = 0; 424 bufmgr_gem->exec2_objects[index].offset = 0; 425 bufmgr_gem->exec_bos[index] = bo; 426 bufmgr_gem->exec2_objects[index].flags = 0; 427 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 428 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 429 if (need_fence) { 430 bufmgr_gem->exec2_objects[index].flags |= 431 EXEC_OBJECT_NEEDS_FENCE; 432 } 433 bufmgr_gem->exec_count++; 434} 435 436#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 437 sizeof(uint32_t)) 438 439static void 440drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 441 drm_intel_bo_gem *bo_gem) 442{ 443 int size; 444 445 assert(!bo_gem->used_as_reloc_target); 446 447 /* The older chipsets are far-less flexible in terms of tiling, 448 * and require tiled buffer to be size aligned in the aperture. 449 * This means that in the worst possible case we will need a hole 450 * twice as large as the object in order for it to fit into the 451 * aperture. Optimal packing is for wimps. 452 */ 453 size = bo_gem->bo.size; 454 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) 455 size *= 2; 456 457 bo_gem->reloc_tree_size = size; 458} 459 460static int 461drm_intel_setup_reloc_list(drm_intel_bo *bo) 462{ 463 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 464 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 465 unsigned int max_relocs = bufmgr_gem->max_relocs; 466 467 if (bo->size / 4 < max_relocs) 468 max_relocs = bo->size / 4; 469 470 bo_gem->relocs = malloc(max_relocs * 471 sizeof(struct drm_i915_gem_relocation_entry)); 472 bo_gem->reloc_target_info = malloc(max_relocs * 473 sizeof(drm_intel_reloc_target *)); 474 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 475 bo_gem->has_error = 1; 476 477 free (bo_gem->relocs); 478 bo_gem->relocs = NULL; 479 480 free (bo_gem->reloc_target_info); 481 bo_gem->reloc_target_info = NULL; 482 483 return 1; 484 } 485 486 return 0; 487} 488 489static int 490drm_intel_gem_bo_busy(drm_intel_bo *bo) 491{ 492 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 493 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 494 struct drm_i915_gem_busy busy; 495 int ret; 496 497 memset(&busy, 0, sizeof(busy)); 498 busy.handle = bo_gem->gem_handle; 499 500 do { 501 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 502 } while (ret == -1 && errno == EINTR); 503 504 return (ret == 0 && busy.busy); 505} 506 507static int 508drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 509 drm_intel_bo_gem *bo_gem, int state) 510{ 511 struct drm_i915_gem_madvise madv; 512 513 madv.handle = bo_gem->gem_handle; 514 madv.madv = state; 515 madv.retained = 1; 516 ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 517 518 return madv.retained; 519} 520 521static int 522drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 523{ 524 return drm_intel_gem_bo_madvise_internal 525 ((drm_intel_bufmgr_gem *) bo->bufmgr, 526 (drm_intel_bo_gem *) bo, 527 madv); 528} 529 530/* drop the oldest entries that have been purged by the kernel */ 531static void 532drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 533 struct drm_intel_gem_bo_bucket *bucket) 534{ 535 while (!DRMLISTEMPTY(&bucket->head)) { 536 drm_intel_bo_gem *bo_gem; 537 538 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 539 bucket->head.next, head); 540 if (drm_intel_gem_bo_madvise_internal 541 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 542 break; 543 544 DRMLISTDEL(&bo_gem->head); 545 drm_intel_gem_bo_free(&bo_gem->bo); 546 } 547} 548 549static drm_intel_bo * 550drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 551 const char *name, 552 unsigned long size, 553 unsigned long flags) 554{ 555 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 556 drm_intel_bo_gem *bo_gem; 557 unsigned int page_size = getpagesize(); 558 int ret; 559 struct drm_intel_gem_bo_bucket *bucket; 560 int alloc_from_cache; 561 unsigned long bo_size; 562 int for_render = 0; 563 564 if (flags & BO_ALLOC_FOR_RENDER) 565 for_render = 1; 566 567 /* Round the allocated size up to a power of two number of pages. */ 568 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 569 570 /* If we don't have caching at this size, don't actually round the 571 * allocation up. 572 */ 573 if (bucket == NULL) { 574 bo_size = size; 575 if (bo_size < page_size) 576 bo_size = page_size; 577 } else { 578 bo_size = bucket->size; 579 } 580 581 pthread_mutex_lock(&bufmgr_gem->lock); 582 /* Get a buffer out of the cache if available */ 583retry: 584 alloc_from_cache = 0; 585 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 586 if (for_render) { 587 /* Allocate new render-target BOs from the tail (MRU) 588 * of the list, as it will likely be hot in the GPU 589 * cache and in the aperture for us. 590 */ 591 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 592 bucket->head.prev, head); 593 DRMLISTDEL(&bo_gem->head); 594 alloc_from_cache = 1; 595 } else { 596 /* For non-render-target BOs (where we're probably 597 * going to map it first thing in order to fill it 598 * with data), check if the last BO in the cache is 599 * unbusy, and only reuse in that case. Otherwise, 600 * allocating a new buffer is probably faster than 601 * waiting for the GPU to finish. 602 */ 603 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 604 bucket->head.next, head); 605 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 606 alloc_from_cache = 1; 607 DRMLISTDEL(&bo_gem->head); 608 } 609 } 610 611 if (alloc_from_cache) { 612 if (!drm_intel_gem_bo_madvise_internal 613 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 614 drm_intel_gem_bo_free(&bo_gem->bo); 615 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 616 bucket); 617 goto retry; 618 } 619 } 620 } 621 pthread_mutex_unlock(&bufmgr_gem->lock); 622 623 if (!alloc_from_cache) { 624 struct drm_i915_gem_create create; 625 626 bo_gem = calloc(1, sizeof(*bo_gem)); 627 if (!bo_gem) 628 return NULL; 629 630 bo_gem->bo.size = bo_size; 631 memset(&create, 0, sizeof(create)); 632 create.size = bo_size; 633 634 do { 635 ret = ioctl(bufmgr_gem->fd, 636 DRM_IOCTL_I915_GEM_CREATE, 637 &create); 638 } while (ret == -1 && errno == EINTR); 639 bo_gem->gem_handle = create.handle; 640 bo_gem->bo.handle = bo_gem->gem_handle; 641 if (ret != 0) { 642 free(bo_gem); 643 return NULL; 644 } 645 bo_gem->bo.bufmgr = bufmgr; 646 } 647 648 bo_gem->name = name; 649 atomic_set(&bo_gem->refcount, 1); 650 bo_gem->validate_index = -1; 651 bo_gem->reloc_tree_fences = 0; 652 bo_gem->used_as_reloc_target = 0; 653 bo_gem->has_error = 0; 654 bo_gem->tiling_mode = I915_TILING_NONE; 655 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 656 bo_gem->reusable = 1; 657 658 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 659 660 DBG("bo_create: buf %d (%s) %ldb\n", 661 bo_gem->gem_handle, bo_gem->name, size); 662 663 return &bo_gem->bo; 664} 665 666static drm_intel_bo * 667drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 668 const char *name, 669 unsigned long size, 670 unsigned int alignment) 671{ 672 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 673 BO_ALLOC_FOR_RENDER); 674} 675 676static drm_intel_bo * 677drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 678 const char *name, 679 unsigned long size, 680 unsigned int alignment) 681{ 682 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0); 683} 684 685static drm_intel_bo * 686drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 687 int x, int y, int cpp, uint32_t *tiling_mode, 688 unsigned long *pitch, unsigned long flags) 689{ 690 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 691 drm_intel_bo *bo; 692 unsigned long size, stride, aligned_y = y; 693 int ret; 694 695 /* If we're tiled, our allocations are in 8 or 32-row blocks, 696 * so failure to align our height means that we won't allocate 697 * enough pages. 698 * 699 * If we're untiled, we still have to align to 2 rows high 700 * because the data port accesses 2x2 blocks even if the 701 * bottom row isn't to be rendered, so failure to align means 702 * we could walk off the end of the GTT and fault. This is 703 * documented on 965, and may be the case on older chipsets 704 * too so we try to be careful. 705 */ 706 if (*tiling_mode == I915_TILING_NONE) 707 aligned_y = ALIGN(y, 2); 708 else if (*tiling_mode == I915_TILING_X) 709 aligned_y = ALIGN(y, 8); 710 else if (*tiling_mode == I915_TILING_Y) 711 aligned_y = ALIGN(y, 32); 712 713 stride = x * cpp; 714 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, *tiling_mode); 715 size = stride * aligned_y; 716 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 717 718 bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags); 719 if (!bo) 720 return NULL; 721 722 ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride); 723 if (ret != 0) { 724 drm_intel_gem_bo_unreference(bo); 725 return NULL; 726 } 727 728 *pitch = stride; 729 730 return bo; 731} 732 733/** 734 * Returns a drm_intel_bo wrapping the given buffer object handle. 735 * 736 * This can be used when one application needs to pass a buffer object 737 * to another. 738 */ 739drm_intel_bo * 740drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 741 const char *name, 742 unsigned int handle) 743{ 744 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 745 drm_intel_bo_gem *bo_gem; 746 int ret; 747 struct drm_gem_open open_arg; 748 struct drm_i915_gem_get_tiling get_tiling; 749 750 bo_gem = calloc(1, sizeof(*bo_gem)); 751 if (!bo_gem) 752 return NULL; 753 754 memset(&open_arg, 0, sizeof(open_arg)); 755 open_arg.name = handle; 756 do { 757 ret = ioctl(bufmgr_gem->fd, 758 DRM_IOCTL_GEM_OPEN, 759 &open_arg); 760 } while (ret == -1 && errno == EINTR); 761 if (ret != 0) { 762 fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 763 name, handle, strerror(errno)); 764 free(bo_gem); 765 return NULL; 766 } 767 bo_gem->bo.size = open_arg.size; 768 bo_gem->bo.offset = 0; 769 bo_gem->bo.virtual = NULL; 770 bo_gem->bo.bufmgr = bufmgr; 771 bo_gem->name = name; 772 atomic_set(&bo_gem->refcount, 1); 773 bo_gem->validate_index = -1; 774 bo_gem->gem_handle = open_arg.handle; 775 bo_gem->global_name = handle; 776 bo_gem->reusable = 0; 777 778 memset(&get_tiling, 0, sizeof(get_tiling)); 779 get_tiling.handle = bo_gem->gem_handle; 780 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 781 if (ret != 0) { 782 drm_intel_gem_bo_unreference(&bo_gem->bo); 783 return NULL; 784 } 785 bo_gem->tiling_mode = get_tiling.tiling_mode; 786 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 787 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 788 789 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 790 791 return &bo_gem->bo; 792} 793 794static void 795drm_intel_gem_bo_free(drm_intel_bo *bo) 796{ 797 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 798 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 799 struct drm_gem_close close; 800 int ret; 801 802 if (bo_gem->mem_virtual) 803 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 804 if (bo_gem->gtt_virtual) 805 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 806 807 /* Close this object */ 808 memset(&close, 0, sizeof(close)); 809 close.handle = bo_gem->gem_handle; 810 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 811 if (ret != 0) { 812 fprintf(stderr, 813 "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 814 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 815 } 816 free(bo); 817} 818 819/** Frees all cached buffers significantly older than @time. */ 820static void 821drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 822{ 823 int i; 824 825 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 826 struct drm_intel_gem_bo_bucket *bucket = 827 &bufmgr_gem->cache_bucket[i]; 828 829 while (!DRMLISTEMPTY(&bucket->head)) { 830 drm_intel_bo_gem *bo_gem; 831 832 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 833 bucket->head.next, head); 834 if (time - bo_gem->free_time <= 1) 835 break; 836 837 DRMLISTDEL(&bo_gem->head); 838 839 drm_intel_gem_bo_free(&bo_gem->bo); 840 } 841 } 842} 843 844static void 845drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 846{ 847 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 848 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 849 struct drm_intel_gem_bo_bucket *bucket; 850 uint32_t tiling_mode; 851 int i; 852 853 /* Unreference all the target buffers */ 854 for (i = 0; i < bo_gem->reloc_count; i++) { 855 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 856 reloc_target_info[i].bo, 857 time); 858 } 859 bo_gem->reloc_count = 0; 860 bo_gem->used_as_reloc_target = 0; 861 862 DBG("bo_unreference final: %d (%s)\n", 863 bo_gem->gem_handle, bo_gem->name); 864 865 /* release memory associated with this object */ 866 if (bo_gem->reloc_target_info) { 867 free(bo_gem->reloc_target_info); 868 bo_gem->reloc_target_info = NULL; 869 } 870 if (bo_gem->relocs) { 871 free(bo_gem->relocs); 872 bo_gem->relocs = NULL; 873 } 874 875 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 876 /* Put the buffer into our internal cache for reuse if we can. */ 877 tiling_mode = I915_TILING_NONE; 878 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 879 drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 && 880 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 881 I915_MADV_DONTNEED)) { 882 bo_gem->free_time = time; 883 884 bo_gem->name = NULL; 885 bo_gem->validate_index = -1; 886 887 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 888 889 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time); 890 } else { 891 drm_intel_gem_bo_free(bo); 892 } 893} 894 895static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 896 time_t time) 897{ 898 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 899 900 assert(atomic_read(&bo_gem->refcount) > 0); 901 if (atomic_dec_and_test(&bo_gem->refcount)) 902 drm_intel_gem_bo_unreference_final(bo, time); 903} 904 905static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 906{ 907 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 908 909 assert(atomic_read(&bo_gem->refcount) > 0); 910 if (atomic_dec_and_test(&bo_gem->refcount)) { 911 drm_intel_bufmgr_gem *bufmgr_gem = 912 (drm_intel_bufmgr_gem *) bo->bufmgr; 913 struct timespec time; 914 915 clock_gettime(CLOCK_MONOTONIC, &time); 916 917 pthread_mutex_lock(&bufmgr_gem->lock); 918 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 919 pthread_mutex_unlock(&bufmgr_gem->lock); 920 } 921} 922 923static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 924{ 925 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 926 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 927 struct drm_i915_gem_set_domain set_domain; 928 int ret; 929 930 pthread_mutex_lock(&bufmgr_gem->lock); 931 932 /* Allow recursive mapping. Mesa may recursively map buffers with 933 * nested display loops. 934 */ 935 if (!bo_gem->mem_virtual) { 936 struct drm_i915_gem_mmap mmap_arg; 937 938 DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 939 940 memset(&mmap_arg, 0, sizeof(mmap_arg)); 941 mmap_arg.handle = bo_gem->gem_handle; 942 mmap_arg.offset = 0; 943 mmap_arg.size = bo->size; 944 do { 945 ret = ioctl(bufmgr_gem->fd, 946 DRM_IOCTL_I915_GEM_MMAP, 947 &mmap_arg); 948 } while (ret == -1 && errno == EINTR); 949 if (ret != 0) { 950 ret = -errno; 951 fprintf(stderr, 952 "%s:%d: Error mapping buffer %d (%s): %s .\n", 953 __FILE__, __LINE__, bo_gem->gem_handle, 954 bo_gem->name, strerror(errno)); 955 pthread_mutex_unlock(&bufmgr_gem->lock); 956 return ret; 957 } 958 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 959 } 960 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 961 bo_gem->mem_virtual); 962 bo->virtual = bo_gem->mem_virtual; 963 964 set_domain.handle = bo_gem->gem_handle; 965 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 966 if (write_enable) 967 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 968 else 969 set_domain.write_domain = 0; 970 do { 971 ret = ioctl(bufmgr_gem->fd, 972 DRM_IOCTL_I915_GEM_SET_DOMAIN, 973 &set_domain); 974 } while (ret == -1 && errno == EINTR); 975 if (ret != 0) { 976 ret = -errno; 977 fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n", 978 __FILE__, __LINE__, bo_gem->gem_handle, 979 strerror(errno)); 980 pthread_mutex_unlock(&bufmgr_gem->lock); 981 return ret; 982 } 983 984 pthread_mutex_unlock(&bufmgr_gem->lock); 985 986 return 0; 987} 988 989int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 990{ 991 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 992 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 993 struct drm_i915_gem_set_domain set_domain; 994 int ret; 995 996 pthread_mutex_lock(&bufmgr_gem->lock); 997 998 /* Get a mapping of the buffer if we haven't before. */ 999 if (bo_gem->gtt_virtual == NULL) { 1000 struct drm_i915_gem_mmap_gtt mmap_arg; 1001 1002 DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 1003 bo_gem->name); 1004 1005 memset(&mmap_arg, 0, sizeof(mmap_arg)); 1006 mmap_arg.handle = bo_gem->gem_handle; 1007 1008 /* Get the fake offset back... */ 1009 do { 1010 ret = ioctl(bufmgr_gem->fd, 1011 DRM_IOCTL_I915_GEM_MMAP_GTT, 1012 &mmap_arg); 1013 } while (ret == -1 && errno == EINTR); 1014 if (ret != 0) { 1015 ret = -errno; 1016 fprintf(stderr, 1017 "%s:%d: Error preparing buffer map %d (%s): %s .\n", 1018 __FILE__, __LINE__, 1019 bo_gem->gem_handle, bo_gem->name, 1020 strerror(errno)); 1021 pthread_mutex_unlock(&bufmgr_gem->lock); 1022 return ret; 1023 } 1024 1025 /* and mmap it */ 1026 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 1027 MAP_SHARED, bufmgr_gem->fd, 1028 mmap_arg.offset); 1029 if (bo_gem->gtt_virtual == MAP_FAILED) { 1030 bo_gem->gtt_virtual = NULL; 1031 ret = -errno; 1032 fprintf(stderr, 1033 "%s:%d: Error mapping buffer %d (%s): %s .\n", 1034 __FILE__, __LINE__, 1035 bo_gem->gem_handle, bo_gem->name, 1036 strerror(errno)); 1037 pthread_mutex_unlock(&bufmgr_gem->lock); 1038 return ret; 1039 } 1040 } 1041 1042 bo->virtual = bo_gem->gtt_virtual; 1043 1044 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1045 bo_gem->gtt_virtual); 1046 1047 /* Now move it to the GTT domain so that the CPU caches are flushed */ 1048 set_domain.handle = bo_gem->gem_handle; 1049 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1050 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1051 do { 1052 ret = ioctl(bufmgr_gem->fd, 1053 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1054 &set_domain); 1055 } while (ret == -1 && errno == EINTR); 1056 1057 if (ret != 0) { 1058 ret = -errno; 1059 fprintf(stderr, "%s:%d: Error setting domain %d: %s\n", 1060 __FILE__, __LINE__, bo_gem->gem_handle, 1061 strerror(errno)); 1062 } 1063 1064 pthread_mutex_unlock(&bufmgr_gem->lock); 1065 1066 return ret; 1067} 1068 1069int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1070{ 1071 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1072 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1073 int ret = 0; 1074 1075 if (bo == NULL) 1076 return 0; 1077 1078 assert(bo_gem->gtt_virtual != NULL); 1079 1080 pthread_mutex_lock(&bufmgr_gem->lock); 1081 bo->virtual = NULL; 1082 pthread_mutex_unlock(&bufmgr_gem->lock); 1083 1084 return ret; 1085} 1086 1087static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1088{ 1089 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1090 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1091 struct drm_i915_gem_sw_finish sw_finish; 1092 int ret; 1093 1094 if (bo == NULL) 1095 return 0; 1096 1097 assert(bo_gem->mem_virtual != NULL); 1098 1099 pthread_mutex_lock(&bufmgr_gem->lock); 1100 1101 /* Cause a flush to happen if the buffer's pinned for scanout, so the 1102 * results show up in a timely manner. 1103 */ 1104 sw_finish.handle = bo_gem->gem_handle; 1105 do { 1106 ret = ioctl(bufmgr_gem->fd, 1107 DRM_IOCTL_I915_GEM_SW_FINISH, 1108 &sw_finish); 1109 } while (ret == -1 && errno == EINTR); 1110 ret = ret == -1 ? -errno : 0; 1111 1112 bo->virtual = NULL; 1113 pthread_mutex_unlock(&bufmgr_gem->lock); 1114 1115 return ret; 1116} 1117 1118static int 1119drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1120 unsigned long size, const void *data) 1121{ 1122 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1123 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1124 struct drm_i915_gem_pwrite pwrite; 1125 int ret; 1126 1127 memset(&pwrite, 0, sizeof(pwrite)); 1128 pwrite.handle = bo_gem->gem_handle; 1129 pwrite.offset = offset; 1130 pwrite.size = size; 1131 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1132 do { 1133 ret = ioctl(bufmgr_gem->fd, 1134 DRM_IOCTL_I915_GEM_PWRITE, 1135 &pwrite); 1136 } while (ret == -1 && errno == EINTR); 1137 if (ret != 0) { 1138 ret = -errno; 1139 fprintf(stderr, 1140 "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1141 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1142 (int)size, strerror(errno)); 1143 } 1144 1145 return ret; 1146} 1147 1148static int 1149drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1150{ 1151 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1152 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1153 int ret; 1154 1155 get_pipe_from_crtc_id.crtc_id = crtc_id; 1156 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1157 &get_pipe_from_crtc_id); 1158 if (ret != 0) { 1159 /* We return -1 here to signal that we don't 1160 * know which pipe is associated with this crtc. 1161 * This lets the caller know that this information 1162 * isn't available; using the wrong pipe for 1163 * vblank waiting can cause the chipset to lock up 1164 */ 1165 return -1; 1166 } 1167 1168 return get_pipe_from_crtc_id.pipe; 1169} 1170 1171static int 1172drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1173 unsigned long size, void *data) 1174{ 1175 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1176 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1177 struct drm_i915_gem_pread pread; 1178 int ret; 1179 1180 memset(&pread, 0, sizeof(pread)); 1181 pread.handle = bo_gem->gem_handle; 1182 pread.offset = offset; 1183 pread.size = size; 1184 pread.data_ptr = (uint64_t) (uintptr_t) data; 1185 do { 1186 ret = ioctl(bufmgr_gem->fd, 1187 DRM_IOCTL_I915_GEM_PREAD, 1188 &pread); 1189 } while (ret == -1 && errno == EINTR); 1190 if (ret != 0) { 1191 ret = -errno; 1192 fprintf(stderr, 1193 "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1194 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1195 (int)size, strerror(errno)); 1196 } 1197 1198 return ret; 1199} 1200 1201/** Waits for all GPU rendering to the object to have completed. */ 1202static void 1203drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1204{ 1205 drm_intel_gem_bo_start_gtt_access(bo, 0); 1206} 1207 1208/** 1209 * Sets the object to the GTT read and possibly write domain, used by the X 1210 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1211 * 1212 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1213 * can do tiled pixmaps this way. 1214 */ 1215void 1216drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1217{ 1218 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1219 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1220 struct drm_i915_gem_set_domain set_domain; 1221 int ret; 1222 1223 set_domain.handle = bo_gem->gem_handle; 1224 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1225 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1226 do { 1227 ret = ioctl(bufmgr_gem->fd, 1228 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1229 &set_domain); 1230 } while (ret == -1 && errno == EINTR); 1231 if (ret != 0) { 1232 fprintf(stderr, 1233 "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1234 __FILE__, __LINE__, bo_gem->gem_handle, 1235 set_domain.read_domains, set_domain.write_domain, 1236 strerror(errno)); 1237 } 1238} 1239 1240static void 1241drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1242{ 1243 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1244 int i; 1245 1246 free(bufmgr_gem->exec2_objects); 1247 free(bufmgr_gem->exec_objects); 1248 free(bufmgr_gem->exec_bos); 1249 1250 pthread_mutex_destroy(&bufmgr_gem->lock); 1251 1252 /* Free any cached buffer objects we were going to reuse */ 1253 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 1254 struct drm_intel_gem_bo_bucket *bucket = 1255 &bufmgr_gem->cache_bucket[i]; 1256 drm_intel_bo_gem *bo_gem; 1257 1258 while (!DRMLISTEMPTY(&bucket->head)) { 1259 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1260 bucket->head.next, head); 1261 DRMLISTDEL(&bo_gem->head); 1262 1263 drm_intel_gem_bo_free(&bo_gem->bo); 1264 } 1265 } 1266 1267 free(bufmgr); 1268} 1269 1270/** 1271 * Adds the target buffer to the validation list and adds the relocation 1272 * to the reloc_buffer's relocation list. 1273 * 1274 * The relocation entry at the given offset must already contain the 1275 * precomputed relocation value, because the kernel will optimize out 1276 * the relocation entry write when the buffer hasn't moved from the 1277 * last known offset in target_bo. 1278 */ 1279static int 1280do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1281 drm_intel_bo *target_bo, uint32_t target_offset, 1282 uint32_t read_domains, uint32_t write_domain, 1283 int need_fence) 1284{ 1285 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1286 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1287 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1288 1289 if (bo_gem->has_error) 1290 return -ENOMEM; 1291 1292 if (target_bo_gem->has_error) { 1293 bo_gem->has_error = 1; 1294 return -ENOMEM; 1295 } 1296 1297 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1298 need_fence = 0; 1299 1300 /* We never use HW fences for rendering on 965+ */ 1301 if (bufmgr_gem->gen >= 4) 1302 need_fence = 0; 1303 1304 /* Create a new relocation list if needed */ 1305 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1306 return -ENOMEM; 1307 1308 /* Check overflow */ 1309 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1310 1311 /* Check args */ 1312 assert(offset <= bo->size - 4); 1313 assert((write_domain & (write_domain - 1)) == 0); 1314 1315 /* Make sure that we're not adding a reloc to something whose size has 1316 * already been accounted for. 1317 */ 1318 assert(!bo_gem->used_as_reloc_target); 1319 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1320 /* An object needing a fence is a tiled buffer, so it won't have 1321 * relocs to other buffers. 1322 */ 1323 if (need_fence) 1324 target_bo_gem->reloc_tree_fences = 1; 1325 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1326 1327 /* Flag the target to disallow further relocations in it. */ 1328 target_bo_gem->used_as_reloc_target = 1; 1329 1330 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1331 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1332 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1333 target_bo_gem->gem_handle; 1334 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1335 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1336 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 1337 1338 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1339 drm_intel_gem_bo_reference(target_bo); 1340 if (need_fence) 1341 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1342 DRM_INTEL_RELOC_FENCE; 1343 else 1344 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1345 1346 bo_gem->reloc_count++; 1347 1348 return 0; 1349} 1350 1351static int 1352drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1353 drm_intel_bo *target_bo, uint32_t target_offset, 1354 uint32_t read_domains, uint32_t write_domain) 1355{ 1356 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1357 1358 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1359 read_domains, write_domain, 1360 !bufmgr_gem->fenced_relocs); 1361} 1362 1363static int 1364drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1365 drm_intel_bo *target_bo, 1366 uint32_t target_offset, 1367 uint32_t read_domains, uint32_t write_domain) 1368{ 1369 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1370 read_domains, write_domain, 1); 1371} 1372 1373/** 1374 * Walk the tree of relocations rooted at BO and accumulate the list of 1375 * validations to be performed and update the relocation buffers with 1376 * index values into the validation list. 1377 */ 1378static void 1379drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1380{ 1381 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1382 int i; 1383 1384 if (bo_gem->relocs == NULL) 1385 return; 1386 1387 for (i = 0; i < bo_gem->reloc_count; i++) { 1388 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1389 1390 /* Continue walking the tree depth-first. */ 1391 drm_intel_gem_bo_process_reloc(target_bo); 1392 1393 /* Add the target to the validate list */ 1394 drm_intel_add_validate_buffer(target_bo); 1395 } 1396} 1397 1398static void 1399drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 1400{ 1401 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1402 int i; 1403 1404 if (bo_gem->relocs == NULL) 1405 return; 1406 1407 for (i = 0; i < bo_gem->reloc_count; i++) { 1408 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1409 int need_fence; 1410 1411 /* Continue walking the tree depth-first. */ 1412 drm_intel_gem_bo_process_reloc2(target_bo); 1413 1414 need_fence = (bo_gem->reloc_target_info[i].flags & 1415 DRM_INTEL_RELOC_FENCE); 1416 1417 /* Add the target to the validate list */ 1418 drm_intel_add_validate_buffer2(target_bo, need_fence); 1419 } 1420} 1421 1422 1423static void 1424drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1425{ 1426 int i; 1427 1428 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1429 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1430 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1431 1432 /* Update the buffer offset */ 1433 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1434 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1435 bo_gem->gem_handle, bo_gem->name, bo->offset, 1436 (unsigned long long)bufmgr_gem->exec_objects[i]. 1437 offset); 1438 bo->offset = bufmgr_gem->exec_objects[i].offset; 1439 } 1440 } 1441} 1442 1443static void 1444drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 1445{ 1446 int i; 1447 1448 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1449 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1450 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1451 1452 /* Update the buffer offset */ 1453 if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 1454 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1455 bo_gem->gem_handle, bo_gem->name, bo->offset, 1456 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 1457 bo->offset = bufmgr_gem->exec2_objects[i].offset; 1458 } 1459 } 1460} 1461 1462static int 1463drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 1464 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 1465{ 1466 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1467 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1468 struct drm_i915_gem_execbuffer execbuf; 1469 int ret, i; 1470 1471 if (bo_gem->has_error) 1472 return -ENOMEM; 1473 1474 pthread_mutex_lock(&bufmgr_gem->lock); 1475 /* Update indices and set up the validate list. */ 1476 drm_intel_gem_bo_process_reloc(bo); 1477 1478 /* Add the batch buffer to the validation list. There are no 1479 * relocations pointing to it. 1480 */ 1481 drm_intel_add_validate_buffer(bo); 1482 1483 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 1484 execbuf.buffer_count = bufmgr_gem->exec_count; 1485 execbuf.batch_start_offset = 0; 1486 execbuf.batch_len = used; 1487 execbuf.cliprects_ptr = (uintptr_t) cliprects; 1488 execbuf.num_cliprects = num_cliprects; 1489 execbuf.DR1 = 0; 1490 execbuf.DR4 = DR4; 1491 1492 do { 1493 ret = ioctl(bufmgr_gem->fd, 1494 DRM_IOCTL_I915_GEM_EXECBUFFER, 1495 &execbuf); 1496 } while (ret != 0 && errno == EINTR); 1497 1498 if (ret != 0) { 1499 ret = -errno; 1500 if (errno == ENOSPC) { 1501 fprintf(stderr, 1502 "Execbuffer fails to pin. " 1503 "Estimate: %u. Actual: %u. Available: %u\n", 1504 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1505 bufmgr_gem-> 1506 exec_count), 1507 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1508 bufmgr_gem-> 1509 exec_count), 1510 (unsigned int)bufmgr_gem->gtt_size); 1511 } 1512 } 1513 drm_intel_update_buffer_offsets(bufmgr_gem); 1514 1515 if (bufmgr_gem->bufmgr.debug) 1516 drm_intel_gem_dump_validation_list(bufmgr_gem); 1517 1518 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1519 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1520 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1521 1522 /* Disconnect the buffer from the validate list */ 1523 bo_gem->validate_index = -1; 1524 bufmgr_gem->exec_bos[i] = NULL; 1525 } 1526 bufmgr_gem->exec_count = 0; 1527 pthread_mutex_unlock(&bufmgr_gem->lock); 1528 1529 return ret; 1530} 1531 1532static int 1533drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 1534 drm_clip_rect_t *cliprects, int num_cliprects, 1535 int DR4) 1536{ 1537 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1538 struct drm_i915_gem_execbuffer2 execbuf; 1539 int ret, i; 1540 1541 pthread_mutex_lock(&bufmgr_gem->lock); 1542 /* Update indices and set up the validate list. */ 1543 drm_intel_gem_bo_process_reloc2(bo); 1544 1545 /* Add the batch buffer to the validation list. There are no relocations 1546 * pointing to it. 1547 */ 1548 drm_intel_add_validate_buffer2(bo, 0); 1549 1550 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 1551 execbuf.buffer_count = bufmgr_gem->exec_count; 1552 execbuf.batch_start_offset = 0; 1553 execbuf.batch_len = used; 1554 execbuf.cliprects_ptr = (uintptr_t)cliprects; 1555 execbuf.num_cliprects = num_cliprects; 1556 execbuf.DR1 = 0; 1557 execbuf.DR4 = DR4; 1558 execbuf.flags = 0; 1559 execbuf.rsvd1 = 0; 1560 execbuf.rsvd2 = 0; 1561 1562 do { 1563 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, 1564 &execbuf); 1565 } while (ret != 0 && errno == EINTR); 1566 1567 if (ret != 0) { 1568 ret = -errno; 1569 if (ret == -ENOMEM) { 1570 fprintf(stderr, 1571 "Execbuffer fails to pin. " 1572 "Estimate: %u. Actual: %u. Available: %u\n", 1573 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1574 bufmgr_gem->exec_count), 1575 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1576 bufmgr_gem->exec_count), 1577 (unsigned int) bufmgr_gem->gtt_size); 1578 } 1579 } 1580 drm_intel_update_buffer_offsets2(bufmgr_gem); 1581 1582 if (bufmgr_gem->bufmgr.debug) 1583 drm_intel_gem_dump_validation_list(bufmgr_gem); 1584 1585 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1586 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1587 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1588 1589 /* Disconnect the buffer from the validate list */ 1590 bo_gem->validate_index = -1; 1591 bufmgr_gem->exec_bos[i] = NULL; 1592 } 1593 bufmgr_gem->exec_count = 0; 1594 pthread_mutex_unlock(&bufmgr_gem->lock); 1595 1596 return ret; 1597} 1598 1599static int 1600drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 1601{ 1602 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1603 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1604 struct drm_i915_gem_pin pin; 1605 int ret; 1606 1607 memset(&pin, 0, sizeof(pin)); 1608 pin.handle = bo_gem->gem_handle; 1609 pin.alignment = alignment; 1610 1611 do { 1612 ret = ioctl(bufmgr_gem->fd, 1613 DRM_IOCTL_I915_GEM_PIN, 1614 &pin); 1615 } while (ret == -1 && errno == EINTR); 1616 1617 if (ret != 0) 1618 return -errno; 1619 1620 bo->offset = pin.offset; 1621 return 0; 1622} 1623 1624static int 1625drm_intel_gem_bo_unpin(drm_intel_bo *bo) 1626{ 1627 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1628 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1629 struct drm_i915_gem_unpin unpin; 1630 int ret; 1631 1632 memset(&unpin, 0, sizeof(unpin)); 1633 unpin.handle = bo_gem->gem_handle; 1634 1635 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 1636 if (ret != 0) 1637 return -errno; 1638 1639 return 0; 1640} 1641 1642static int 1643drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1644 uint32_t stride) 1645{ 1646 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1647 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1648 struct drm_i915_gem_set_tiling set_tiling; 1649 int ret; 1650 1651 if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) 1652 return 0; 1653 1654 memset(&set_tiling, 0, sizeof(set_tiling)); 1655 set_tiling.handle = bo_gem->gem_handle; 1656 1657 do { 1658 set_tiling.tiling_mode = *tiling_mode; 1659 set_tiling.stride = stride; 1660 1661 ret = ioctl(bufmgr_gem->fd, 1662 DRM_IOCTL_I915_GEM_SET_TILING, 1663 &set_tiling); 1664 } while (ret == -1 && errno == EINTR); 1665 bo_gem->tiling_mode = set_tiling.tiling_mode; 1666 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1667 1668 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 1669 1670 *tiling_mode = bo_gem->tiling_mode; 1671 return ret == 0 ? 0 : -errno; 1672} 1673 1674static int 1675drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1676 uint32_t * swizzle_mode) 1677{ 1678 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1679 1680 *tiling_mode = bo_gem->tiling_mode; 1681 *swizzle_mode = bo_gem->swizzle_mode; 1682 return 0; 1683} 1684 1685static int 1686drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 1687{ 1688 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1689 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1690 struct drm_gem_flink flink; 1691 int ret; 1692 1693 if (!bo_gem->global_name) { 1694 memset(&flink, 0, sizeof(flink)); 1695 flink.handle = bo_gem->gem_handle; 1696 1697 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 1698 if (ret != 0) 1699 return -errno; 1700 bo_gem->global_name = flink.name; 1701 bo_gem->reusable = 0; 1702 } 1703 1704 *name = bo_gem->global_name; 1705 return 0; 1706} 1707 1708/** 1709 * Enables unlimited caching of buffer objects for reuse. 1710 * 1711 * This is potentially very memory expensive, as the cache at each bucket 1712 * size is only bounded by how many buffers of that size we've managed to have 1713 * in flight at once. 1714 */ 1715void 1716drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 1717{ 1718 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1719 1720 bufmgr_gem->bo_reuse = 1; 1721} 1722 1723/** 1724 * Enable use of fenced reloc type. 1725 * 1726 * New code should enable this to avoid unnecessary fence register 1727 * allocation. If this option is not enabled, all relocs will have fence 1728 * register allocated. 1729 */ 1730void 1731drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 1732{ 1733 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 1734 1735 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 1736 bufmgr_gem->fenced_relocs = 1; 1737} 1738 1739/** 1740 * Return the additional aperture space required by the tree of buffer objects 1741 * rooted at bo. 1742 */ 1743static int 1744drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 1745{ 1746 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1747 int i; 1748 int total = 0; 1749 1750 if (bo == NULL || bo_gem->included_in_check_aperture) 1751 return 0; 1752 1753 total += bo->size; 1754 bo_gem->included_in_check_aperture = 1; 1755 1756 for (i = 0; i < bo_gem->reloc_count; i++) 1757 total += 1758 drm_intel_gem_bo_get_aperture_space(bo_gem-> 1759 reloc_target_info[i].bo); 1760 1761 return total; 1762} 1763 1764/** 1765 * Count the number of buffers in this list that need a fence reg 1766 * 1767 * If the count is greater than the number of available regs, we'll have 1768 * to ask the caller to resubmit a batch with fewer tiled buffers. 1769 * 1770 * This function over-counts if the same buffer is used multiple times. 1771 */ 1772static unsigned int 1773drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 1774{ 1775 int i; 1776 unsigned int total = 0; 1777 1778 for (i = 0; i < count; i++) { 1779 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1780 1781 if (bo_gem == NULL) 1782 continue; 1783 1784 total += bo_gem->reloc_tree_fences; 1785 } 1786 return total; 1787} 1788 1789/** 1790 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 1791 * for the next drm_intel_bufmgr_check_aperture_space() call. 1792 */ 1793static void 1794drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 1795{ 1796 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1797 int i; 1798 1799 if (bo == NULL || !bo_gem->included_in_check_aperture) 1800 return; 1801 1802 bo_gem->included_in_check_aperture = 0; 1803 1804 for (i = 0; i < bo_gem->reloc_count; i++) 1805 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 1806 reloc_target_info[i].bo); 1807} 1808 1809/** 1810 * Return a conservative estimate for the amount of aperture required 1811 * for a collection of buffers. This may double-count some buffers. 1812 */ 1813static unsigned int 1814drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 1815{ 1816 int i; 1817 unsigned int total = 0; 1818 1819 for (i = 0; i < count; i++) { 1820 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1821 if (bo_gem != NULL) 1822 total += bo_gem->reloc_tree_size; 1823 } 1824 return total; 1825} 1826 1827/** 1828 * Return the amount of aperture needed for a collection of buffers. 1829 * This avoids double counting any buffers, at the cost of looking 1830 * at every buffer in the set. 1831 */ 1832static unsigned int 1833drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 1834{ 1835 int i; 1836 unsigned int total = 0; 1837 1838 for (i = 0; i < count; i++) { 1839 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 1840 /* For the first buffer object in the array, we get an 1841 * accurate count back for its reloc_tree size (since nothing 1842 * had been flagged as being counted yet). We can save that 1843 * value out as a more conservative reloc_tree_size that 1844 * avoids double-counting target buffers. Since the first 1845 * buffer happens to usually be the batch buffer in our 1846 * callers, this can pull us back from doing the tree 1847 * walk on every new batch emit. 1848 */ 1849 if (i == 0) { 1850 drm_intel_bo_gem *bo_gem = 1851 (drm_intel_bo_gem *) bo_array[i]; 1852 bo_gem->reloc_tree_size = total; 1853 } 1854 } 1855 1856 for (i = 0; i < count; i++) 1857 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 1858 return total; 1859} 1860 1861/** 1862 * Return -1 if the batchbuffer should be flushed before attempting to 1863 * emit rendering referencing the buffers pointed to by bo_array. 1864 * 1865 * This is required because if we try to emit a batchbuffer with relocations 1866 * to a tree of buffers that won't simultaneously fit in the aperture, 1867 * the rendering will return an error at a point where the software is not 1868 * prepared to recover from it. 1869 * 1870 * However, we also want to emit the batchbuffer significantly before we reach 1871 * the limit, as a series of batchbuffers each of which references buffers 1872 * covering almost all of the aperture means that at each emit we end up 1873 * waiting to evict a buffer from the last rendering, and we get synchronous 1874 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 1875 * get better parallelism. 1876 */ 1877static int 1878drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 1879{ 1880 drm_intel_bufmgr_gem *bufmgr_gem = 1881 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 1882 unsigned int total = 0; 1883 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 1884 int total_fences; 1885 1886 /* Check for fence reg constraints if necessary */ 1887 if (bufmgr_gem->available_fences) { 1888 total_fences = drm_intel_gem_total_fences(bo_array, count); 1889 if (total_fences > bufmgr_gem->available_fences) 1890 return -ENOSPC; 1891 } 1892 1893 total = drm_intel_gem_estimate_batch_space(bo_array, count); 1894 1895 if (total > threshold) 1896 total = drm_intel_gem_compute_batch_space(bo_array, count); 1897 1898 if (total > threshold) { 1899 DBG("check_space: overflowed available aperture, " 1900 "%dkb vs %dkb\n", 1901 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 1902 return -ENOSPC; 1903 } else { 1904 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 1905 (int)bufmgr_gem->gtt_size / 1024); 1906 return 0; 1907 } 1908} 1909 1910/* 1911 * Disable buffer reuse for objects which are shared with the kernel 1912 * as scanout buffers 1913 */ 1914static int 1915drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 1916{ 1917 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1918 1919 bo_gem->reusable = 0; 1920 return 0; 1921} 1922 1923static int 1924_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 1925{ 1926 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1927 int i; 1928 1929 for (i = 0; i < bo_gem->reloc_count; i++) { 1930 if (bo_gem->reloc_target_info[i].bo == target_bo) 1931 return 1; 1932 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 1933 target_bo)) 1934 return 1; 1935 } 1936 1937 return 0; 1938} 1939 1940/** Return true if target_bo is referenced by bo's relocation tree. */ 1941static int 1942drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 1943{ 1944 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1945 1946 if (bo == NULL || target_bo == NULL) 1947 return 0; 1948 if (target_bo_gem->used_as_reloc_target) 1949 return _drm_intel_gem_bo_references(bo, target_bo); 1950 return 0; 1951} 1952 1953/** 1954 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 1955 * and manage map buffer objections. 1956 * 1957 * \param fd File descriptor of the opened DRM device. 1958 */ 1959drm_intel_bufmgr * 1960drm_intel_bufmgr_gem_init(int fd, int batch_size) 1961{ 1962 drm_intel_bufmgr_gem *bufmgr_gem; 1963 struct drm_i915_gem_get_aperture aperture; 1964 drm_i915_getparam_t gp; 1965 int ret, i; 1966 unsigned long size; 1967 int exec2 = 0; 1968 1969 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 1970 if (bufmgr_gem == NULL) 1971 return NULL; 1972 1973 bufmgr_gem->fd = fd; 1974 1975 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 1976 free(bufmgr_gem); 1977 return NULL; 1978 } 1979 1980 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 1981 1982 if (ret == 0) 1983 bufmgr_gem->gtt_size = aperture.aper_available_size; 1984 else { 1985 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 1986 strerror(errno)); 1987 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 1988 fprintf(stderr, "Assuming %dkB available aperture size.\n" 1989 "May lead to reduced performance or incorrect " 1990 "rendering.\n", 1991 (int)bufmgr_gem->gtt_size / 1024); 1992 } 1993 1994 gp.param = I915_PARAM_CHIPSET_ID; 1995 gp.value = &bufmgr_gem->pci_device; 1996 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 1997 if (ret) { 1998 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 1999 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 2000 } 2001 2002 if (IS_GEN2(bufmgr_gem)) 2003 bufmgr_gem->gen = 2; 2004 else if (IS_GEN3(bufmgr_gem)) 2005 bufmgr_gem->gen = 3; 2006 else if (IS_GEN4(bufmgr_gem)) 2007 bufmgr_gem->gen = 4; 2008 else 2009 bufmgr_gem->gen = 6; 2010 2011 gp.param = I915_PARAM_HAS_EXECBUF2; 2012 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2013 if (!ret) 2014 exec2 = 1; 2015 2016 if (bufmgr_gem->gen < 4) { 2017 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 2018 gp.value = &bufmgr_gem->available_fences; 2019 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2020 if (ret) { 2021 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 2022 errno); 2023 fprintf(stderr, "param: %d, val: %d\n", gp.param, 2024 *gp.value); 2025 bufmgr_gem->available_fences = 0; 2026 } else { 2027 /* XXX The kernel reports the total number of fences, 2028 * including any that may be pinned. 2029 * 2030 * We presume that there will be at least one pinned 2031 * fence for the scanout buffer, but there may be more 2032 * than one scanout and the user may be manually 2033 * pinning buffers. Let's move to execbuffer2 and 2034 * thereby forget the insanity of using fences... 2035 */ 2036 bufmgr_gem->available_fences -= 2; 2037 if (bufmgr_gem->available_fences < 0) 2038 bufmgr_gem->available_fences = 0; 2039 } 2040 } 2041 2042 /* Let's go with one relocation per every 2 dwords (but round down a bit 2043 * since a power of two will mean an extra page allocation for the reloc 2044 * buffer). 2045 * 2046 * Every 4 was too few for the blender benchmark. 2047 */ 2048 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 2049 2050 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 2051 bufmgr_gem->bufmgr.bo_alloc_for_render = 2052 drm_intel_gem_bo_alloc_for_render; 2053 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 2054 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 2055 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 2056 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 2057 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 2058 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 2059 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 2060 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 2061 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 2062 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 2063 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 2064 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 2065 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 2066 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 2067 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 2068 /* Use the new one if available */ 2069 if (exec2) 2070 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 2071 else 2072 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 2073 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 2074 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 2075 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 2076 bufmgr_gem->bufmgr.debug = 0; 2077 bufmgr_gem->bufmgr.check_aperture_space = 2078 drm_intel_gem_check_aperture_space; 2079 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 2080 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 2081 drm_intel_gem_get_pipe_from_crtc_id; 2082 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 2083 2084 /* Initialize the linked lists for BO reuse cache. */ 2085 for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) { 2086 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2087 bufmgr_gem->cache_bucket[i].size = size; 2088 } 2089 2090 return &bufmgr_gem->bufmgr; 2091} 2092