intel_bufmgr_gem.c revision 6d98c517
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <stddef.h> 51#include <sys/ioctl.h> 52#include <sys/mman.h> 53#include <sys/stat.h> 54#include <sys/types.h> 55 56#include "errno.h" 57#include "libdrm_lists.h" 58#include "intel_bufmgr.h" 59#include "intel_bufmgr_priv.h" 60#include "intel_chipset.h" 61#include "string.h" 62 63#include "i915_drm.h" 64 65#define DBG(...) do { \ 66 if (bufmgr_gem->bufmgr.debug) \ 67 fprintf(stderr, __VA_ARGS__); \ 68} while (0) 69 70#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 71 72typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 73 74struct drm_intel_gem_bo_bucket { 75 drmMMListHead head; 76 unsigned long size; 77}; 78 79typedef struct _drm_intel_bufmgr_gem { 80 drm_intel_bufmgr bufmgr; 81 82 int fd; 83 84 int max_relocs; 85 86 pthread_mutex_t lock; 87 88 struct drm_i915_gem_exec_object *exec_objects; 89 struct drm_i915_gem_exec_object2 *exec2_objects; 90 drm_intel_bo **exec_bos; 91 int exec_size; 92 int exec_count; 93 94 /** Array of lists of cached gem objects of power-of-two sizes */ 95 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 96 int num_buckets; 97 time_t time; 98 99 uint64_t gtt_size; 100 int available_fences; 101 int pci_device; 102 int gen; 103 char bo_reuse; 104 char fenced_relocs; 105} drm_intel_bufmgr_gem; 106 107#define DRM_INTEL_RELOC_FENCE (1<<0) 108 109typedef struct _drm_intel_reloc_target_info { 110 drm_intel_bo *bo; 111 int flags; 112} drm_intel_reloc_target; 113 114struct _drm_intel_bo_gem { 115 drm_intel_bo bo; 116 117 atomic_t refcount; 118 uint32_t gem_handle; 119 const char *name; 120 121 /** 122 * Kenel-assigned global name for this object 123 */ 124 unsigned int global_name; 125 126 /** 127 * Index of the buffer within the validation list while preparing a 128 * batchbuffer execution. 129 */ 130 int validate_index; 131 132 /** 133 * Current tiling mode 134 */ 135 uint32_t tiling_mode; 136 uint32_t swizzle_mode; 137 unsigned long stride; 138 139 time_t free_time; 140 141 /** Array passed to the DRM containing relocation information. */ 142 struct drm_i915_gem_relocation_entry *relocs; 143 /** 144 * Array of info structs corresponding to relocs[i].target_handle etc 145 */ 146 drm_intel_reloc_target *reloc_target_info; 147 /** Number of entries in relocs */ 148 int reloc_count; 149 /** Mapped address for the buffer, saved across map/unmap cycles */ 150 void *mem_virtual; 151 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 152 void *gtt_virtual; 153 154 /** BO cache list */ 155 drmMMListHead head; 156 157 /** 158 * Boolean of whether this BO and its children have been included in 159 * the current drm_intel_bufmgr_check_aperture_space() total. 160 */ 161 char included_in_check_aperture; 162 163 /** 164 * Boolean of whether this buffer has been used as a relocation 165 * target and had its size accounted for, and thus can't have any 166 * further relocations added to it. 167 */ 168 char used_as_reloc_target; 169 170 /** 171 * Boolean of whether we have encountered an error whilst building the relocation tree. 172 */ 173 char has_error; 174 175 /** 176 * Boolean of whether this buffer can be re-used 177 */ 178 char reusable; 179 180 /** 181 * Size in bytes of this buffer and its relocation descendents. 182 * 183 * Used to avoid costly tree walking in 184 * drm_intel_bufmgr_check_aperture in the common case. 185 */ 186 int reloc_tree_size; 187 188 /** 189 * Number of potential fence registers required by this buffer and its 190 * relocations. 191 */ 192 int reloc_tree_fences; 193}; 194 195static unsigned int 196drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 197 198static unsigned int 199drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 200 201static int 202drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 203 uint32_t * swizzle_mode); 204 205static int 206drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 207 uint32_t tiling_mode, 208 uint32_t stride); 209 210static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 211 time_t time); 212 213static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 214 215static void drm_intel_gem_bo_free(drm_intel_bo *bo); 216 217static unsigned long 218drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 219 uint32_t *tiling_mode) 220{ 221 unsigned long min_size, max_size; 222 unsigned long i; 223 224 if (*tiling_mode == I915_TILING_NONE) 225 return size; 226 227 /* 965+ just need multiples of page size for tiling */ 228 if (bufmgr_gem->gen >= 4) 229 return ROUND_UP_TO(size, 4096); 230 231 /* Older chips need powers of two, of at least 512k or 1M */ 232 if (bufmgr_gem->gen == 3) { 233 min_size = 1024*1024; 234 max_size = 128*1024*1024; 235 } else { 236 min_size = 512*1024; 237 max_size = 64*1024*1024; 238 } 239 240 if (size > max_size) { 241 *tiling_mode = I915_TILING_NONE; 242 return size; 243 } 244 245 for (i = min_size; i < size; i <<= 1) 246 ; 247 248 return i; 249} 250 251/* 252 * Round a given pitch up to the minimum required for X tiling on a 253 * given chip. We use 512 as the minimum to allow for a later tiling 254 * change. 255 */ 256static unsigned long 257drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 258 unsigned long pitch, uint32_t *tiling_mode) 259{ 260 unsigned long tile_width; 261 unsigned long i; 262 263 /* If untiled, then just align it so that we can do rendering 264 * to it with the 3D engine. 265 */ 266 if (*tiling_mode == I915_TILING_NONE) 267 return ALIGN(pitch, 64); 268 269 if (*tiling_mode == I915_TILING_X) 270 tile_width = 512; 271 else 272 tile_width = 128; 273 274 /* 965 is flexible */ 275 if (bufmgr_gem->gen >= 4) 276 return ROUND_UP_TO(pitch, tile_width); 277 278 /* The older hardware has a maximum pitch of 8192 with tiled 279 * surfaces, so fallback to untiled if it's too large. 280 */ 281 if (pitch > 8192) { 282 *tiling_mode = I915_TILING_NONE; 283 return ALIGN(pitch, 64); 284 } 285 286 /* Pre-965 needs power of two tile width */ 287 for (i = tile_width; i < pitch; i <<= 1) 288 ; 289 290 return i; 291} 292 293static struct drm_intel_gem_bo_bucket * 294drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 295 unsigned long size) 296{ 297 int i; 298 299 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 300 struct drm_intel_gem_bo_bucket *bucket = 301 &bufmgr_gem->cache_bucket[i]; 302 if (bucket->size >= size) { 303 return bucket; 304 } 305 } 306 307 return NULL; 308} 309 310static void 311drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 312{ 313 int i, j; 314 315 for (i = 0; i < bufmgr_gem->exec_count; i++) { 316 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 317 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 318 319 if (bo_gem->relocs == NULL) { 320 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 321 bo_gem->name); 322 continue; 323 } 324 325 for (j = 0; j < bo_gem->reloc_count; j++) { 326 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 327 drm_intel_bo_gem *target_gem = 328 (drm_intel_bo_gem *) target_bo; 329 330 DBG("%2d: %d (%s)@0x%08llx -> " 331 "%d (%s)@0x%08lx + 0x%08x\n", 332 i, 333 bo_gem->gem_handle, bo_gem->name, 334 (unsigned long long)bo_gem->relocs[j].offset, 335 target_gem->gem_handle, 336 target_gem->name, 337 target_bo->offset, 338 bo_gem->relocs[j].delta); 339 } 340 } 341} 342 343static inline void 344drm_intel_gem_bo_reference(drm_intel_bo *bo) 345{ 346 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 347 348 assert(atomic_read(&bo_gem->refcount) > 0); 349 atomic_inc(&bo_gem->refcount); 350} 351 352/** 353 * Adds the given buffer to the list of buffers to be validated (moved into the 354 * appropriate memory type) with the next batch submission. 355 * 356 * If a buffer is validated multiple times in a batch submission, it ends up 357 * with the intersection of the memory type flags and the union of the 358 * access flags. 359 */ 360static void 361drm_intel_add_validate_buffer(drm_intel_bo *bo) 362{ 363 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 364 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 365 int index; 366 367 if (bo_gem->validate_index != -1) 368 return; 369 370 /* Extend the array of validation entries as necessary. */ 371 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 372 int new_size = bufmgr_gem->exec_size * 2; 373 374 if (new_size == 0) 375 new_size = 5; 376 377 bufmgr_gem->exec_objects = 378 realloc(bufmgr_gem->exec_objects, 379 sizeof(*bufmgr_gem->exec_objects) * new_size); 380 bufmgr_gem->exec_bos = 381 realloc(bufmgr_gem->exec_bos, 382 sizeof(*bufmgr_gem->exec_bos) * new_size); 383 bufmgr_gem->exec_size = new_size; 384 } 385 386 index = bufmgr_gem->exec_count; 387 bo_gem->validate_index = index; 388 /* Fill in array entry */ 389 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 390 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 391 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 392 bufmgr_gem->exec_objects[index].alignment = 0; 393 bufmgr_gem->exec_objects[index].offset = 0; 394 bufmgr_gem->exec_bos[index] = bo; 395 bufmgr_gem->exec_count++; 396} 397 398static void 399drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 400{ 401 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 402 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 403 int index; 404 405 if (bo_gem->validate_index != -1) { 406 if (need_fence) 407 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 408 EXEC_OBJECT_NEEDS_FENCE; 409 return; 410 } 411 412 /* Extend the array of validation entries as necessary. */ 413 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 414 int new_size = bufmgr_gem->exec_size * 2; 415 416 if (new_size == 0) 417 new_size = 5; 418 419 bufmgr_gem->exec2_objects = 420 realloc(bufmgr_gem->exec2_objects, 421 sizeof(*bufmgr_gem->exec2_objects) * new_size); 422 bufmgr_gem->exec_bos = 423 realloc(bufmgr_gem->exec_bos, 424 sizeof(*bufmgr_gem->exec_bos) * new_size); 425 bufmgr_gem->exec_size = new_size; 426 } 427 428 index = bufmgr_gem->exec_count; 429 bo_gem->validate_index = index; 430 /* Fill in array entry */ 431 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 432 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 433 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 434 bufmgr_gem->exec2_objects[index].alignment = 0; 435 bufmgr_gem->exec2_objects[index].offset = 0; 436 bufmgr_gem->exec_bos[index] = bo; 437 bufmgr_gem->exec2_objects[index].flags = 0; 438 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 439 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 440 if (need_fence) { 441 bufmgr_gem->exec2_objects[index].flags |= 442 EXEC_OBJECT_NEEDS_FENCE; 443 } 444 bufmgr_gem->exec_count++; 445} 446 447#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 448 sizeof(uint32_t)) 449 450static void 451drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 452 drm_intel_bo_gem *bo_gem) 453{ 454 int size; 455 456 assert(!bo_gem->used_as_reloc_target); 457 458 /* The older chipsets are far-less flexible in terms of tiling, 459 * and require tiled buffer to be size aligned in the aperture. 460 * This means that in the worst possible case we will need a hole 461 * twice as large as the object in order for it to fit into the 462 * aperture. Optimal packing is for wimps. 463 */ 464 size = bo_gem->bo.size; 465 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) 466 size *= 2; 467 468 bo_gem->reloc_tree_size = size; 469} 470 471static int 472drm_intel_setup_reloc_list(drm_intel_bo *bo) 473{ 474 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 475 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 476 unsigned int max_relocs = bufmgr_gem->max_relocs; 477 478 if (bo->size / 4 < max_relocs) 479 max_relocs = bo->size / 4; 480 481 bo_gem->relocs = malloc(max_relocs * 482 sizeof(struct drm_i915_gem_relocation_entry)); 483 bo_gem->reloc_target_info = malloc(max_relocs * 484 sizeof(drm_intel_reloc_target)); 485 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 486 bo_gem->has_error = 1; 487 488 free (bo_gem->relocs); 489 bo_gem->relocs = NULL; 490 491 free (bo_gem->reloc_target_info); 492 bo_gem->reloc_target_info = NULL; 493 494 return 1; 495 } 496 497 return 0; 498} 499 500static int 501drm_intel_gem_bo_busy(drm_intel_bo *bo) 502{ 503 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 504 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 505 struct drm_i915_gem_busy busy; 506 int ret; 507 508 memset(&busy, 0, sizeof(busy)); 509 busy.handle = bo_gem->gem_handle; 510 511 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 512 513 return (ret == 0 && busy.busy); 514} 515 516static int 517drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 518 drm_intel_bo_gem *bo_gem, int state) 519{ 520 struct drm_i915_gem_madvise madv; 521 522 madv.handle = bo_gem->gem_handle; 523 madv.madv = state; 524 madv.retained = 1; 525 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 526 527 return madv.retained; 528} 529 530static int 531drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 532{ 533 return drm_intel_gem_bo_madvise_internal 534 ((drm_intel_bufmgr_gem *) bo->bufmgr, 535 (drm_intel_bo_gem *) bo, 536 madv); 537} 538 539/* drop the oldest entries that have been purged by the kernel */ 540static void 541drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 542 struct drm_intel_gem_bo_bucket *bucket) 543{ 544 while (!DRMLISTEMPTY(&bucket->head)) { 545 drm_intel_bo_gem *bo_gem; 546 547 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 548 bucket->head.next, head); 549 if (drm_intel_gem_bo_madvise_internal 550 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 551 break; 552 553 DRMLISTDEL(&bo_gem->head); 554 drm_intel_gem_bo_free(&bo_gem->bo); 555 } 556} 557 558static drm_intel_bo * 559drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 560 const char *name, 561 unsigned long size, 562 unsigned long flags, 563 uint32_t tiling_mode, 564 unsigned long stride) 565{ 566 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 567 drm_intel_bo_gem *bo_gem; 568 unsigned int page_size = getpagesize(); 569 int ret; 570 struct drm_intel_gem_bo_bucket *bucket; 571 int alloc_from_cache; 572 unsigned long bo_size; 573 int for_render = 0; 574 575 if (flags & BO_ALLOC_FOR_RENDER) 576 for_render = 1; 577 578 /* Round the allocated size up to a power of two number of pages. */ 579 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 580 581 /* If we don't have caching at this size, don't actually round the 582 * allocation up. 583 */ 584 if (bucket == NULL) { 585 bo_size = size; 586 if (bo_size < page_size) 587 bo_size = page_size; 588 } else { 589 bo_size = bucket->size; 590 } 591 592 pthread_mutex_lock(&bufmgr_gem->lock); 593 /* Get a buffer out of the cache if available */ 594retry: 595 alloc_from_cache = 0; 596 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 597 if (for_render) { 598 /* Allocate new render-target BOs from the tail (MRU) 599 * of the list, as it will likely be hot in the GPU 600 * cache and in the aperture for us. 601 */ 602 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 603 bucket->head.prev, head); 604 DRMLISTDEL(&bo_gem->head); 605 alloc_from_cache = 1; 606 } else { 607 /* For non-render-target BOs (where we're probably 608 * going to map it first thing in order to fill it 609 * with data), check if the last BO in the cache is 610 * unbusy, and only reuse in that case. Otherwise, 611 * allocating a new buffer is probably faster than 612 * waiting for the GPU to finish. 613 */ 614 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 615 bucket->head.next, head); 616 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 617 alloc_from_cache = 1; 618 DRMLISTDEL(&bo_gem->head); 619 } 620 } 621 622 if (alloc_from_cache) { 623 if (!drm_intel_gem_bo_madvise_internal 624 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 625 drm_intel_gem_bo_free(&bo_gem->bo); 626 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 627 bucket); 628 goto retry; 629 } 630 631 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 632 tiling_mode, 633 stride)) { 634 drm_intel_gem_bo_free(&bo_gem->bo); 635 goto retry; 636 } 637 } 638 } 639 pthread_mutex_unlock(&bufmgr_gem->lock); 640 641 if (!alloc_from_cache) { 642 struct drm_i915_gem_create create; 643 644 bo_gem = calloc(1, sizeof(*bo_gem)); 645 if (!bo_gem) 646 return NULL; 647 648 bo_gem->bo.size = bo_size; 649 memset(&create, 0, sizeof(create)); 650 create.size = bo_size; 651 652 ret = drmIoctl(bufmgr_gem->fd, 653 DRM_IOCTL_I915_GEM_CREATE, 654 &create); 655 bo_gem->gem_handle = create.handle; 656 bo_gem->bo.handle = bo_gem->gem_handle; 657 if (ret != 0) { 658 free(bo_gem); 659 return NULL; 660 } 661 bo_gem->bo.bufmgr = bufmgr; 662 663 bo_gem->tiling_mode = I915_TILING_NONE; 664 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 665 bo_gem->stride = 0; 666 667 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 668 tiling_mode, 669 stride)) { 670 drm_intel_gem_bo_free(&bo_gem->bo); 671 return NULL; 672 } 673 } 674 675 bo_gem->name = name; 676 atomic_set(&bo_gem->refcount, 1); 677 bo_gem->validate_index = -1; 678 bo_gem->reloc_tree_fences = 0; 679 bo_gem->used_as_reloc_target = 0; 680 bo_gem->has_error = 0; 681 bo_gem->reusable = 1; 682 683 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 684 685 DBG("bo_create: buf %d (%s) %ldb\n", 686 bo_gem->gem_handle, bo_gem->name, size); 687 688 return &bo_gem->bo; 689} 690 691static drm_intel_bo * 692drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 693 const char *name, 694 unsigned long size, 695 unsigned int alignment) 696{ 697 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 698 BO_ALLOC_FOR_RENDER, 699 I915_TILING_NONE, 0); 700} 701 702static drm_intel_bo * 703drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 704 const char *name, 705 unsigned long size, 706 unsigned int alignment) 707{ 708 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 709 I915_TILING_NONE, 0); 710} 711 712static drm_intel_bo * 713drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 714 int x, int y, int cpp, uint32_t *tiling_mode, 715 unsigned long *pitch, unsigned long flags) 716{ 717 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 718 unsigned long size, stride; 719 uint32_t tiling; 720 721 do { 722 unsigned long aligned_y; 723 724 tiling = *tiling_mode; 725 726 /* If we're tiled, our allocations are in 8 or 32-row blocks, 727 * so failure to align our height means that we won't allocate 728 * enough pages. 729 * 730 * If we're untiled, we still have to align to 2 rows high 731 * because the data port accesses 2x2 blocks even if the 732 * bottom row isn't to be rendered, so failure to align means 733 * we could walk off the end of the GTT and fault. This is 734 * documented on 965, and may be the case on older chipsets 735 * too so we try to be careful. 736 */ 737 aligned_y = y; 738 if (tiling == I915_TILING_NONE) 739 aligned_y = ALIGN(y, 2); 740 else if (tiling == I915_TILING_X) 741 aligned_y = ALIGN(y, 8); 742 else if (tiling == I915_TILING_Y) 743 aligned_y = ALIGN(y, 32); 744 745 stride = x * cpp; 746 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 747 size = stride * aligned_y; 748 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 749 } while (*tiling_mode != tiling); 750 *pitch = stride; 751 752 if (tiling == I915_TILING_NONE) 753 stride = 0; 754 755 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 756 tiling, stride); 757} 758 759/** 760 * Returns a drm_intel_bo wrapping the given buffer object handle. 761 * 762 * This can be used when one application needs to pass a buffer object 763 * to another. 764 */ 765drm_intel_bo * 766drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 767 const char *name, 768 unsigned int handle) 769{ 770 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 771 drm_intel_bo_gem *bo_gem; 772 int ret; 773 struct drm_gem_open open_arg; 774 struct drm_i915_gem_get_tiling get_tiling; 775 776 bo_gem = calloc(1, sizeof(*bo_gem)); 777 if (!bo_gem) 778 return NULL; 779 780 memset(&open_arg, 0, sizeof(open_arg)); 781 open_arg.name = handle; 782 ret = drmIoctl(bufmgr_gem->fd, 783 DRM_IOCTL_GEM_OPEN, 784 &open_arg); 785 if (ret != 0) { 786 fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 787 name, handle, strerror(errno)); 788 free(bo_gem); 789 return NULL; 790 } 791 bo_gem->bo.size = open_arg.size; 792 bo_gem->bo.offset = 0; 793 bo_gem->bo.virtual = NULL; 794 bo_gem->bo.bufmgr = bufmgr; 795 bo_gem->name = name; 796 atomic_set(&bo_gem->refcount, 1); 797 bo_gem->validate_index = -1; 798 bo_gem->gem_handle = open_arg.handle; 799 bo_gem->global_name = handle; 800 bo_gem->reusable = 0; 801 802 memset(&get_tiling, 0, sizeof(get_tiling)); 803 get_tiling.handle = bo_gem->gem_handle; 804 ret = drmIoctl(bufmgr_gem->fd, 805 DRM_IOCTL_I915_GEM_GET_TILING, 806 &get_tiling); 807 if (ret != 0) { 808 drm_intel_gem_bo_unreference(&bo_gem->bo); 809 return NULL; 810 } 811 bo_gem->tiling_mode = get_tiling.tiling_mode; 812 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 813 /* XXX stride is unknown */ 814 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 815 816 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 817 818 return &bo_gem->bo; 819} 820 821static void 822drm_intel_gem_bo_free(drm_intel_bo *bo) 823{ 824 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 825 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 826 struct drm_gem_close close; 827 int ret; 828 829 if (bo_gem->mem_virtual) 830 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 831 if (bo_gem->gtt_virtual) 832 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 833 834 /* Close this object */ 835 memset(&close, 0, sizeof(close)); 836 close.handle = bo_gem->gem_handle; 837 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 838 if (ret != 0) { 839 fprintf(stderr, 840 "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 841 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 842 } 843 free(bo); 844} 845 846/** Frees all cached buffers significantly older than @time. */ 847static void 848drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 849{ 850 int i; 851 852 if (bufmgr_gem->time == time) 853 return; 854 855 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 856 struct drm_intel_gem_bo_bucket *bucket = 857 &bufmgr_gem->cache_bucket[i]; 858 859 while (!DRMLISTEMPTY(&bucket->head)) { 860 drm_intel_bo_gem *bo_gem; 861 862 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 863 bucket->head.next, head); 864 if (time - bo_gem->free_time <= 1) 865 break; 866 867 DRMLISTDEL(&bo_gem->head); 868 869 drm_intel_gem_bo_free(&bo_gem->bo); 870 } 871 } 872 873 bufmgr_gem->time = time; 874} 875 876static void 877drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 878{ 879 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 880 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 881 struct drm_intel_gem_bo_bucket *bucket; 882 int i; 883 884 /* Unreference all the target buffers */ 885 for (i = 0; i < bo_gem->reloc_count; i++) { 886 if (bo_gem->reloc_target_info[i].bo != bo) { 887 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 888 reloc_target_info[i].bo, 889 time); 890 } 891 } 892 bo_gem->reloc_count = 0; 893 bo_gem->used_as_reloc_target = 0; 894 895 DBG("bo_unreference final: %d (%s)\n", 896 bo_gem->gem_handle, bo_gem->name); 897 898 /* release memory associated with this object */ 899 if (bo_gem->reloc_target_info) { 900 free(bo_gem->reloc_target_info); 901 bo_gem->reloc_target_info = NULL; 902 } 903 if (bo_gem->relocs) { 904 free(bo_gem->relocs); 905 bo_gem->relocs = NULL; 906 } 907 908 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 909 /* Put the buffer into our internal cache for reuse if we can. */ 910 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 911 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 912 I915_MADV_DONTNEED)) { 913 bo_gem->free_time = time; 914 915 bo_gem->name = NULL; 916 bo_gem->validate_index = -1; 917 918 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 919 } else { 920 drm_intel_gem_bo_free(bo); 921 } 922} 923 924static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 925 time_t time) 926{ 927 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 928 929 assert(atomic_read(&bo_gem->refcount) > 0); 930 if (atomic_dec_and_test(&bo_gem->refcount)) 931 drm_intel_gem_bo_unreference_final(bo, time); 932} 933 934static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 935{ 936 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 937 938 assert(atomic_read(&bo_gem->refcount) > 0); 939 if (atomic_dec_and_test(&bo_gem->refcount)) { 940 drm_intel_bufmgr_gem *bufmgr_gem = 941 (drm_intel_bufmgr_gem *) bo->bufmgr; 942 struct timespec time; 943 944 clock_gettime(CLOCK_MONOTONIC, &time); 945 946 pthread_mutex_lock(&bufmgr_gem->lock); 947 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 948 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 949 pthread_mutex_unlock(&bufmgr_gem->lock); 950 } 951} 952 953static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 954{ 955 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 956 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 957 struct drm_i915_gem_set_domain set_domain; 958 int ret; 959 960 pthread_mutex_lock(&bufmgr_gem->lock); 961 962 /* Allow recursive mapping. Mesa may recursively map buffers with 963 * nested display loops. 964 */ 965 if (!bo_gem->mem_virtual) { 966 struct drm_i915_gem_mmap mmap_arg; 967 968 DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 969 970 memset(&mmap_arg, 0, sizeof(mmap_arg)); 971 mmap_arg.handle = bo_gem->gem_handle; 972 mmap_arg.offset = 0; 973 mmap_arg.size = bo->size; 974 ret = drmIoctl(bufmgr_gem->fd, 975 DRM_IOCTL_I915_GEM_MMAP, 976 &mmap_arg); 977 if (ret != 0) { 978 ret = -errno; 979 fprintf(stderr, 980 "%s:%d: Error mapping buffer %d (%s): %s .\n", 981 __FILE__, __LINE__, bo_gem->gem_handle, 982 bo_gem->name, strerror(errno)); 983 pthread_mutex_unlock(&bufmgr_gem->lock); 984 return ret; 985 } 986 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 987 } 988 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 989 bo_gem->mem_virtual); 990 bo->virtual = bo_gem->mem_virtual; 991 992 set_domain.handle = bo_gem->gem_handle; 993 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 994 if (write_enable) 995 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 996 else 997 set_domain.write_domain = 0; 998 ret = drmIoctl(bufmgr_gem->fd, 999 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1000 &set_domain); 1001 if (ret != 0) { 1002 fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n", 1003 __FILE__, __LINE__, bo_gem->gem_handle, 1004 strerror(errno)); 1005 } 1006 1007 pthread_mutex_unlock(&bufmgr_gem->lock); 1008 1009 return 0; 1010} 1011 1012int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1013{ 1014 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1015 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1016 struct drm_i915_gem_set_domain set_domain; 1017 int ret; 1018 1019 pthread_mutex_lock(&bufmgr_gem->lock); 1020 1021 /* Get a mapping of the buffer if we haven't before. */ 1022 if (bo_gem->gtt_virtual == NULL) { 1023 struct drm_i915_gem_mmap_gtt mmap_arg; 1024 1025 DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 1026 bo_gem->name); 1027 1028 memset(&mmap_arg, 0, sizeof(mmap_arg)); 1029 mmap_arg.handle = bo_gem->gem_handle; 1030 1031 /* Get the fake offset back... */ 1032 ret = drmIoctl(bufmgr_gem->fd, 1033 DRM_IOCTL_I915_GEM_MMAP_GTT, 1034 &mmap_arg); 1035 if (ret != 0) { 1036 ret = -errno; 1037 fprintf(stderr, 1038 "%s:%d: Error preparing buffer map %d (%s): %s .\n", 1039 __FILE__, __LINE__, 1040 bo_gem->gem_handle, bo_gem->name, 1041 strerror(errno)); 1042 pthread_mutex_unlock(&bufmgr_gem->lock); 1043 return ret; 1044 } 1045 1046 /* and mmap it */ 1047 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 1048 MAP_SHARED, bufmgr_gem->fd, 1049 mmap_arg.offset); 1050 if (bo_gem->gtt_virtual == MAP_FAILED) { 1051 bo_gem->gtt_virtual = NULL; 1052 ret = -errno; 1053 fprintf(stderr, 1054 "%s:%d: Error mapping buffer %d (%s): %s .\n", 1055 __FILE__, __LINE__, 1056 bo_gem->gem_handle, bo_gem->name, 1057 strerror(errno)); 1058 pthread_mutex_unlock(&bufmgr_gem->lock); 1059 return ret; 1060 } 1061 } 1062 1063 bo->virtual = bo_gem->gtt_virtual; 1064 1065 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1066 bo_gem->gtt_virtual); 1067 1068 /* Now move it to the GTT domain so that the CPU caches are flushed */ 1069 set_domain.handle = bo_gem->gem_handle; 1070 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1071 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1072 ret = drmIoctl(bufmgr_gem->fd, 1073 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1074 &set_domain); 1075 if (ret != 0) { 1076 fprintf(stderr, "%s:%d: Error setting domain %d: %s\n", 1077 __FILE__, __LINE__, bo_gem->gem_handle, 1078 strerror(errno)); 1079 } 1080 1081 pthread_mutex_unlock(&bufmgr_gem->lock); 1082 1083 return 0; 1084} 1085 1086int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1087{ 1088 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1089 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1090 int ret = 0; 1091 1092 if (bo == NULL) 1093 return 0; 1094 1095 assert(bo_gem->gtt_virtual != NULL); 1096 1097 pthread_mutex_lock(&bufmgr_gem->lock); 1098 bo->virtual = NULL; 1099 pthread_mutex_unlock(&bufmgr_gem->lock); 1100 1101 return ret; 1102} 1103 1104static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1105{ 1106 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1107 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1108 struct drm_i915_gem_sw_finish sw_finish; 1109 int ret; 1110 1111 if (bo == NULL) 1112 return 0; 1113 1114 assert(bo_gem->mem_virtual != NULL); 1115 1116 pthread_mutex_lock(&bufmgr_gem->lock); 1117 1118 /* Cause a flush to happen if the buffer's pinned for scanout, so the 1119 * results show up in a timely manner. 1120 */ 1121 sw_finish.handle = bo_gem->gem_handle; 1122 ret = drmIoctl(bufmgr_gem->fd, 1123 DRM_IOCTL_I915_GEM_SW_FINISH, 1124 &sw_finish); 1125 ret = ret == -1 ? -errno : 0; 1126 1127 bo->virtual = NULL; 1128 pthread_mutex_unlock(&bufmgr_gem->lock); 1129 1130 return ret; 1131} 1132 1133static int 1134drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1135 unsigned long size, const void *data) 1136{ 1137 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1138 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1139 struct drm_i915_gem_pwrite pwrite; 1140 int ret; 1141 1142 memset(&pwrite, 0, sizeof(pwrite)); 1143 pwrite.handle = bo_gem->gem_handle; 1144 pwrite.offset = offset; 1145 pwrite.size = size; 1146 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1147 ret = drmIoctl(bufmgr_gem->fd, 1148 DRM_IOCTL_I915_GEM_PWRITE, 1149 &pwrite); 1150 if (ret != 0) { 1151 ret = -errno; 1152 fprintf(stderr, 1153 "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1154 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1155 (int)size, strerror(errno)); 1156 } 1157 1158 return ret; 1159} 1160 1161static int 1162drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1163{ 1164 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1165 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1166 int ret; 1167 1168 get_pipe_from_crtc_id.crtc_id = crtc_id; 1169 ret = drmIoctl(bufmgr_gem->fd, 1170 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1171 &get_pipe_from_crtc_id); 1172 if (ret != 0) { 1173 /* We return -1 here to signal that we don't 1174 * know which pipe is associated with this crtc. 1175 * This lets the caller know that this information 1176 * isn't available; using the wrong pipe for 1177 * vblank waiting can cause the chipset to lock up 1178 */ 1179 return -1; 1180 } 1181 1182 return get_pipe_from_crtc_id.pipe; 1183} 1184 1185static int 1186drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1187 unsigned long size, void *data) 1188{ 1189 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1190 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1191 struct drm_i915_gem_pread pread; 1192 int ret; 1193 1194 memset(&pread, 0, sizeof(pread)); 1195 pread.handle = bo_gem->gem_handle; 1196 pread.offset = offset; 1197 pread.size = size; 1198 pread.data_ptr = (uint64_t) (uintptr_t) data; 1199 ret = drmIoctl(bufmgr_gem->fd, 1200 DRM_IOCTL_I915_GEM_PREAD, 1201 &pread); 1202 if (ret != 0) { 1203 ret = -errno; 1204 fprintf(stderr, 1205 "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1206 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1207 (int)size, strerror(errno)); 1208 } 1209 1210 return ret; 1211} 1212 1213/** Waits for all GPU rendering to the object to have completed. */ 1214static void 1215drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1216{ 1217 drm_intel_gem_bo_start_gtt_access(bo, 0); 1218} 1219 1220/** 1221 * Sets the object to the GTT read and possibly write domain, used by the X 1222 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1223 * 1224 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1225 * can do tiled pixmaps this way. 1226 */ 1227void 1228drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1229{ 1230 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1231 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1232 struct drm_i915_gem_set_domain set_domain; 1233 int ret; 1234 1235 set_domain.handle = bo_gem->gem_handle; 1236 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1237 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1238 ret = drmIoctl(bufmgr_gem->fd, 1239 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1240 &set_domain); 1241 if (ret != 0) { 1242 fprintf(stderr, 1243 "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1244 __FILE__, __LINE__, bo_gem->gem_handle, 1245 set_domain.read_domains, set_domain.write_domain, 1246 strerror(errno)); 1247 } 1248} 1249 1250static void 1251drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1252{ 1253 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1254 int i; 1255 1256 free(bufmgr_gem->exec2_objects); 1257 free(bufmgr_gem->exec_objects); 1258 free(bufmgr_gem->exec_bos); 1259 1260 pthread_mutex_destroy(&bufmgr_gem->lock); 1261 1262 /* Free any cached buffer objects we were going to reuse */ 1263 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1264 struct drm_intel_gem_bo_bucket *bucket = 1265 &bufmgr_gem->cache_bucket[i]; 1266 drm_intel_bo_gem *bo_gem; 1267 1268 while (!DRMLISTEMPTY(&bucket->head)) { 1269 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1270 bucket->head.next, head); 1271 DRMLISTDEL(&bo_gem->head); 1272 1273 drm_intel_gem_bo_free(&bo_gem->bo); 1274 } 1275 } 1276 1277 free(bufmgr); 1278} 1279 1280/** 1281 * Adds the target buffer to the validation list and adds the relocation 1282 * to the reloc_buffer's relocation list. 1283 * 1284 * The relocation entry at the given offset must already contain the 1285 * precomputed relocation value, because the kernel will optimize out 1286 * the relocation entry write when the buffer hasn't moved from the 1287 * last known offset in target_bo. 1288 */ 1289static int 1290do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1291 drm_intel_bo *target_bo, uint32_t target_offset, 1292 uint32_t read_domains, uint32_t write_domain, 1293 int need_fence) 1294{ 1295 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1296 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1297 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1298 1299 if (bo_gem->has_error) 1300 return -ENOMEM; 1301 1302 if (target_bo_gem->has_error) { 1303 bo_gem->has_error = 1; 1304 return -ENOMEM; 1305 } 1306 1307 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1308 need_fence = 0; 1309 1310 /* We never use HW fences for rendering on 965+ */ 1311 if (bufmgr_gem->gen >= 4) 1312 need_fence = 0; 1313 1314 /* Create a new relocation list if needed */ 1315 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1316 return -ENOMEM; 1317 1318 /* Check overflow */ 1319 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1320 1321 /* Check args */ 1322 assert(offset <= bo->size - 4); 1323 assert((write_domain & (write_domain - 1)) == 0); 1324 1325 /* Make sure that we're not adding a reloc to something whose size has 1326 * already been accounted for. 1327 */ 1328 assert(!bo_gem->used_as_reloc_target); 1329 if (target_bo_gem != bo_gem) { 1330 target_bo_gem->used_as_reloc_target = 1; 1331 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1332 } 1333 /* An object needing a fence is a tiled buffer, so it won't have 1334 * relocs to other buffers. 1335 */ 1336 if (need_fence) 1337 target_bo_gem->reloc_tree_fences = 1; 1338 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1339 1340 /* Flag the target to disallow further relocations in it. */ 1341 1342 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1343 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1344 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1345 target_bo_gem->gem_handle; 1346 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1347 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1348 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 1349 1350 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1351 if (target_bo != bo) 1352 drm_intel_gem_bo_reference(target_bo); 1353 if (need_fence) 1354 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1355 DRM_INTEL_RELOC_FENCE; 1356 else 1357 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1358 1359 bo_gem->reloc_count++; 1360 1361 return 0; 1362} 1363 1364static int 1365drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1366 drm_intel_bo *target_bo, uint32_t target_offset, 1367 uint32_t read_domains, uint32_t write_domain) 1368{ 1369 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1370 1371 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1372 read_domains, write_domain, 1373 !bufmgr_gem->fenced_relocs); 1374} 1375 1376static int 1377drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1378 drm_intel_bo *target_bo, 1379 uint32_t target_offset, 1380 uint32_t read_domains, uint32_t write_domain) 1381{ 1382 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1383 read_domains, write_domain, 1); 1384} 1385 1386/** 1387 * Walk the tree of relocations rooted at BO and accumulate the list of 1388 * validations to be performed and update the relocation buffers with 1389 * index values into the validation list. 1390 */ 1391static void 1392drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1393{ 1394 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1395 int i; 1396 1397 if (bo_gem->relocs == NULL) 1398 return; 1399 1400 for (i = 0; i < bo_gem->reloc_count; i++) { 1401 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1402 1403 if (target_bo == bo) 1404 continue; 1405 1406 /* Continue walking the tree depth-first. */ 1407 drm_intel_gem_bo_process_reloc(target_bo); 1408 1409 /* Add the target to the validate list */ 1410 drm_intel_add_validate_buffer(target_bo); 1411 } 1412} 1413 1414static void 1415drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 1416{ 1417 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1418 int i; 1419 1420 if (bo_gem->relocs == NULL) 1421 return; 1422 1423 for (i = 0; i < bo_gem->reloc_count; i++) { 1424 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1425 int need_fence; 1426 1427 if (target_bo == bo) 1428 continue; 1429 1430 /* Continue walking the tree depth-first. */ 1431 drm_intel_gem_bo_process_reloc2(target_bo); 1432 1433 need_fence = (bo_gem->reloc_target_info[i].flags & 1434 DRM_INTEL_RELOC_FENCE); 1435 1436 /* Add the target to the validate list */ 1437 drm_intel_add_validate_buffer2(target_bo, need_fence); 1438 } 1439} 1440 1441 1442static void 1443drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1444{ 1445 int i; 1446 1447 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1448 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1449 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1450 1451 /* Update the buffer offset */ 1452 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1453 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1454 bo_gem->gem_handle, bo_gem->name, bo->offset, 1455 (unsigned long long)bufmgr_gem->exec_objects[i]. 1456 offset); 1457 bo->offset = bufmgr_gem->exec_objects[i].offset; 1458 } 1459 } 1460} 1461 1462static void 1463drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 1464{ 1465 int i; 1466 1467 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1468 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1469 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1470 1471 /* Update the buffer offset */ 1472 if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 1473 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1474 bo_gem->gem_handle, bo_gem->name, bo->offset, 1475 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 1476 bo->offset = bufmgr_gem->exec2_objects[i].offset; 1477 } 1478 } 1479} 1480 1481static int 1482drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 1483 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 1484{ 1485 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1486 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1487 struct drm_i915_gem_execbuffer execbuf; 1488 int ret, i; 1489 1490 if (bo_gem->has_error) 1491 return -ENOMEM; 1492 1493 pthread_mutex_lock(&bufmgr_gem->lock); 1494 /* Update indices and set up the validate list. */ 1495 drm_intel_gem_bo_process_reloc(bo); 1496 1497 /* Add the batch buffer to the validation list. There are no 1498 * relocations pointing to it. 1499 */ 1500 drm_intel_add_validate_buffer(bo); 1501 1502 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 1503 execbuf.buffer_count = bufmgr_gem->exec_count; 1504 execbuf.batch_start_offset = 0; 1505 execbuf.batch_len = used; 1506 execbuf.cliprects_ptr = (uintptr_t) cliprects; 1507 execbuf.num_cliprects = num_cliprects; 1508 execbuf.DR1 = 0; 1509 execbuf.DR4 = DR4; 1510 1511 ret = drmIoctl(bufmgr_gem->fd, 1512 DRM_IOCTL_I915_GEM_EXECBUFFER, 1513 &execbuf); 1514 if (ret != 0) { 1515 ret = -errno; 1516 if (errno == ENOSPC) { 1517 fprintf(stderr, 1518 "Execbuffer fails to pin. " 1519 "Estimate: %u. Actual: %u. Available: %u\n", 1520 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1521 bufmgr_gem-> 1522 exec_count), 1523 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1524 bufmgr_gem-> 1525 exec_count), 1526 (unsigned int)bufmgr_gem->gtt_size); 1527 } 1528 } 1529 drm_intel_update_buffer_offsets(bufmgr_gem); 1530 1531 if (bufmgr_gem->bufmgr.debug) 1532 drm_intel_gem_dump_validation_list(bufmgr_gem); 1533 1534 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1535 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1536 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1537 1538 /* Disconnect the buffer from the validate list */ 1539 bo_gem->validate_index = -1; 1540 bufmgr_gem->exec_bos[i] = NULL; 1541 } 1542 bufmgr_gem->exec_count = 0; 1543 pthread_mutex_unlock(&bufmgr_gem->lock); 1544 1545 return ret; 1546} 1547 1548static int 1549drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 1550 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 1551 int ring_flag) 1552{ 1553 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1554 struct drm_i915_gem_execbuffer2 execbuf; 1555 int ret, i; 1556 1557 if ((ring_flag != I915_EXEC_RENDER) && (ring_flag != I915_EXEC_BSD)) 1558 return -EINVAL; 1559 1560 pthread_mutex_lock(&bufmgr_gem->lock); 1561 /* Update indices and set up the validate list. */ 1562 drm_intel_gem_bo_process_reloc2(bo); 1563 1564 /* Add the batch buffer to the validation list. There are no relocations 1565 * pointing to it. 1566 */ 1567 drm_intel_add_validate_buffer2(bo, 0); 1568 1569 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 1570 execbuf.buffer_count = bufmgr_gem->exec_count; 1571 execbuf.batch_start_offset = 0; 1572 execbuf.batch_len = used; 1573 execbuf.cliprects_ptr = (uintptr_t)cliprects; 1574 execbuf.num_cliprects = num_cliprects; 1575 execbuf.DR1 = 0; 1576 execbuf.DR4 = DR4; 1577 execbuf.flags = ring_flag; 1578 execbuf.rsvd1 = 0; 1579 execbuf.rsvd2 = 0; 1580 1581 ret = drmIoctl(bufmgr_gem->fd, 1582 DRM_IOCTL_I915_GEM_EXECBUFFER2, 1583 &execbuf); 1584 if (ret != 0) { 1585 ret = -errno; 1586 if (ret == -ENOSPC) { 1587 fprintf(stderr, 1588 "Execbuffer fails to pin. " 1589 "Estimate: %u. Actual: %u. Available: %u\n", 1590 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1591 bufmgr_gem->exec_count), 1592 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1593 bufmgr_gem->exec_count), 1594 (unsigned int) bufmgr_gem->gtt_size); 1595 } 1596 } 1597 drm_intel_update_buffer_offsets2(bufmgr_gem); 1598 1599 if (bufmgr_gem->bufmgr.debug) 1600 drm_intel_gem_dump_validation_list(bufmgr_gem); 1601 1602 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1603 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1604 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1605 1606 /* Disconnect the buffer from the validate list */ 1607 bo_gem->validate_index = -1; 1608 bufmgr_gem->exec_bos[i] = NULL; 1609 } 1610 bufmgr_gem->exec_count = 0; 1611 pthread_mutex_unlock(&bufmgr_gem->lock); 1612 1613 return ret; 1614} 1615 1616static int 1617drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 1618 drm_clip_rect_t *cliprects, int num_cliprects, 1619 int DR4) 1620{ 1621 return drm_intel_gem_bo_mrb_exec2(bo, used, 1622 cliprects, num_cliprects, DR4, 1623 I915_EXEC_RENDER); 1624} 1625 1626static int 1627drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 1628{ 1629 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1630 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1631 struct drm_i915_gem_pin pin; 1632 int ret; 1633 1634 memset(&pin, 0, sizeof(pin)); 1635 pin.handle = bo_gem->gem_handle; 1636 pin.alignment = alignment; 1637 1638 ret = drmIoctl(bufmgr_gem->fd, 1639 DRM_IOCTL_I915_GEM_PIN, 1640 &pin); 1641 if (ret != 0) 1642 return -errno; 1643 1644 bo->offset = pin.offset; 1645 return 0; 1646} 1647 1648static int 1649drm_intel_gem_bo_unpin(drm_intel_bo *bo) 1650{ 1651 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1652 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1653 struct drm_i915_gem_unpin unpin; 1654 int ret; 1655 1656 memset(&unpin, 0, sizeof(unpin)); 1657 unpin.handle = bo_gem->gem_handle; 1658 1659 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 1660 if (ret != 0) 1661 return -errno; 1662 1663 return 0; 1664} 1665 1666static int 1667drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 1668 uint32_t tiling_mode, 1669 uint32_t stride) 1670{ 1671 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1672 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1673 struct drm_i915_gem_set_tiling set_tiling; 1674 int ret; 1675 1676 if (bo_gem->global_name == 0 && 1677 tiling_mode == bo_gem->tiling_mode && 1678 stride == bo_gem->stride) 1679 return 0; 1680 1681 memset(&set_tiling, 0, sizeof(set_tiling)); 1682 do { 1683 /* set_tiling is slightly broken and overwrites the 1684 * input on the error path, so we have to open code 1685 * rmIoctl. 1686 */ 1687 set_tiling.handle = bo_gem->gem_handle; 1688 set_tiling.tiling_mode = tiling_mode; 1689 set_tiling.stride = stride; 1690 1691 ret = ioctl(bufmgr_gem->fd, 1692 DRM_IOCTL_I915_GEM_SET_TILING, 1693 &set_tiling); 1694 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 1695 if (ret == -1) 1696 return -errno; 1697 1698 bo_gem->tiling_mode = set_tiling.tiling_mode; 1699 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1700 bo_gem->stride = set_tiling.stride; 1701 return 0; 1702} 1703 1704static int 1705drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1706 uint32_t stride) 1707{ 1708 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1709 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1710 int ret; 1711 1712 /* Linear buffers have no stride. By ensuring that we only ever use 1713 * stride 0 with linear buffers, we simplify our code. 1714 */ 1715 if (*tiling_mode == I915_TILING_NONE) 1716 stride = 0; 1717 1718 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 1719 if (ret == 0) 1720 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 1721 1722 *tiling_mode = bo_gem->tiling_mode; 1723 return ret; 1724} 1725 1726static int 1727drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1728 uint32_t * swizzle_mode) 1729{ 1730 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1731 1732 *tiling_mode = bo_gem->tiling_mode; 1733 *swizzle_mode = bo_gem->swizzle_mode; 1734 return 0; 1735} 1736 1737static int 1738drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 1739{ 1740 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1741 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1742 struct drm_gem_flink flink; 1743 int ret; 1744 1745 if (!bo_gem->global_name) { 1746 memset(&flink, 0, sizeof(flink)); 1747 flink.handle = bo_gem->gem_handle; 1748 1749 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 1750 if (ret != 0) 1751 return -errno; 1752 bo_gem->global_name = flink.name; 1753 bo_gem->reusable = 0; 1754 } 1755 1756 *name = bo_gem->global_name; 1757 return 0; 1758} 1759 1760/** 1761 * Enables unlimited caching of buffer objects for reuse. 1762 * 1763 * This is potentially very memory expensive, as the cache at each bucket 1764 * size is only bounded by how many buffers of that size we've managed to have 1765 * in flight at once. 1766 */ 1767void 1768drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 1769{ 1770 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1771 1772 bufmgr_gem->bo_reuse = 1; 1773} 1774 1775/** 1776 * Enable use of fenced reloc type. 1777 * 1778 * New code should enable this to avoid unnecessary fence register 1779 * allocation. If this option is not enabled, all relocs will have fence 1780 * register allocated. 1781 */ 1782void 1783drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 1784{ 1785 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 1786 1787 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 1788 bufmgr_gem->fenced_relocs = 1; 1789} 1790 1791/** 1792 * Return the additional aperture space required by the tree of buffer objects 1793 * rooted at bo. 1794 */ 1795static int 1796drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 1797{ 1798 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1799 int i; 1800 int total = 0; 1801 1802 if (bo == NULL || bo_gem->included_in_check_aperture) 1803 return 0; 1804 1805 total += bo->size; 1806 bo_gem->included_in_check_aperture = 1; 1807 1808 for (i = 0; i < bo_gem->reloc_count; i++) 1809 total += 1810 drm_intel_gem_bo_get_aperture_space(bo_gem-> 1811 reloc_target_info[i].bo); 1812 1813 return total; 1814} 1815 1816/** 1817 * Count the number of buffers in this list that need a fence reg 1818 * 1819 * If the count is greater than the number of available regs, we'll have 1820 * to ask the caller to resubmit a batch with fewer tiled buffers. 1821 * 1822 * This function over-counts if the same buffer is used multiple times. 1823 */ 1824static unsigned int 1825drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 1826{ 1827 int i; 1828 unsigned int total = 0; 1829 1830 for (i = 0; i < count; i++) { 1831 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1832 1833 if (bo_gem == NULL) 1834 continue; 1835 1836 total += bo_gem->reloc_tree_fences; 1837 } 1838 return total; 1839} 1840 1841/** 1842 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 1843 * for the next drm_intel_bufmgr_check_aperture_space() call. 1844 */ 1845static void 1846drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 1847{ 1848 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1849 int i; 1850 1851 if (bo == NULL || !bo_gem->included_in_check_aperture) 1852 return; 1853 1854 bo_gem->included_in_check_aperture = 0; 1855 1856 for (i = 0; i < bo_gem->reloc_count; i++) 1857 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 1858 reloc_target_info[i].bo); 1859} 1860 1861/** 1862 * Return a conservative estimate for the amount of aperture required 1863 * for a collection of buffers. This may double-count some buffers. 1864 */ 1865static unsigned int 1866drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 1867{ 1868 int i; 1869 unsigned int total = 0; 1870 1871 for (i = 0; i < count; i++) { 1872 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1873 if (bo_gem != NULL) 1874 total += bo_gem->reloc_tree_size; 1875 } 1876 return total; 1877} 1878 1879/** 1880 * Return the amount of aperture needed for a collection of buffers. 1881 * This avoids double counting any buffers, at the cost of looking 1882 * at every buffer in the set. 1883 */ 1884static unsigned int 1885drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 1886{ 1887 int i; 1888 unsigned int total = 0; 1889 1890 for (i = 0; i < count; i++) { 1891 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 1892 /* For the first buffer object in the array, we get an 1893 * accurate count back for its reloc_tree size (since nothing 1894 * had been flagged as being counted yet). We can save that 1895 * value out as a more conservative reloc_tree_size that 1896 * avoids double-counting target buffers. Since the first 1897 * buffer happens to usually be the batch buffer in our 1898 * callers, this can pull us back from doing the tree 1899 * walk on every new batch emit. 1900 */ 1901 if (i == 0) { 1902 drm_intel_bo_gem *bo_gem = 1903 (drm_intel_bo_gem *) bo_array[i]; 1904 bo_gem->reloc_tree_size = total; 1905 } 1906 } 1907 1908 for (i = 0; i < count; i++) 1909 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 1910 return total; 1911} 1912 1913/** 1914 * Return -1 if the batchbuffer should be flushed before attempting to 1915 * emit rendering referencing the buffers pointed to by bo_array. 1916 * 1917 * This is required because if we try to emit a batchbuffer with relocations 1918 * to a tree of buffers that won't simultaneously fit in the aperture, 1919 * the rendering will return an error at a point where the software is not 1920 * prepared to recover from it. 1921 * 1922 * However, we also want to emit the batchbuffer significantly before we reach 1923 * the limit, as a series of batchbuffers each of which references buffers 1924 * covering almost all of the aperture means that at each emit we end up 1925 * waiting to evict a buffer from the last rendering, and we get synchronous 1926 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 1927 * get better parallelism. 1928 */ 1929static int 1930drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 1931{ 1932 drm_intel_bufmgr_gem *bufmgr_gem = 1933 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 1934 unsigned int total = 0; 1935 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 1936 int total_fences; 1937 1938 /* Check for fence reg constraints if necessary */ 1939 if (bufmgr_gem->available_fences) { 1940 total_fences = drm_intel_gem_total_fences(bo_array, count); 1941 if (total_fences > bufmgr_gem->available_fences) 1942 return -ENOSPC; 1943 } 1944 1945 total = drm_intel_gem_estimate_batch_space(bo_array, count); 1946 1947 if (total > threshold) 1948 total = drm_intel_gem_compute_batch_space(bo_array, count); 1949 1950 if (total > threshold) { 1951 DBG("check_space: overflowed available aperture, " 1952 "%dkb vs %dkb\n", 1953 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 1954 return -ENOSPC; 1955 } else { 1956 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 1957 (int)bufmgr_gem->gtt_size / 1024); 1958 return 0; 1959 } 1960} 1961 1962/* 1963 * Disable buffer reuse for objects which are shared with the kernel 1964 * as scanout buffers 1965 */ 1966static int 1967drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 1968{ 1969 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1970 1971 bo_gem->reusable = 0; 1972 return 0; 1973} 1974 1975static int 1976drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 1977{ 1978 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1979 1980 return bo_gem->reusable; 1981} 1982 1983static int 1984_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 1985{ 1986 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1987 int i; 1988 1989 for (i = 0; i < bo_gem->reloc_count; i++) { 1990 if (bo_gem->reloc_target_info[i].bo == target_bo) 1991 return 1; 1992 if (bo == bo_gem->reloc_target_info[i].bo) 1993 continue; 1994 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 1995 target_bo)) 1996 return 1; 1997 } 1998 1999 return 0; 2000} 2001 2002/** Return true if target_bo is referenced by bo's relocation tree. */ 2003static int 2004drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2005{ 2006 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2007 2008 if (bo == NULL || target_bo == NULL) 2009 return 0; 2010 if (target_bo_gem->used_as_reloc_target) 2011 return _drm_intel_gem_bo_references(bo, target_bo); 2012 return 0; 2013} 2014 2015static void 2016add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2017{ 2018 unsigned int i = bufmgr_gem->num_buckets; 2019 2020 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2021 2022 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2023 bufmgr_gem->cache_bucket[i].size = size; 2024 bufmgr_gem->num_buckets++; 2025} 2026 2027static void 2028init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2029{ 2030 unsigned long size, cache_max_size = 64 * 1024 * 1024; 2031 2032 /* OK, so power of two buckets was too wasteful of memory. 2033 * Give 3 other sizes between each power of two, to hopefully 2034 * cover things accurately enough. (The alternative is 2035 * probably to just go for exact matching of sizes, and assume 2036 * that for things like composited window resize the tiled 2037 * width/height alignment and rounding of sizes to pages will 2038 * get us useful cache hit rates anyway) 2039 */ 2040 add_bucket(bufmgr_gem, 4096); 2041 add_bucket(bufmgr_gem, 4096 * 2); 2042 add_bucket(bufmgr_gem, 4096 * 3); 2043 2044 /* Initialize the linked lists for BO reuse cache. */ 2045 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2046 add_bucket(bufmgr_gem, size); 2047 2048 add_bucket(bufmgr_gem, size + size * 1 / 4); 2049 add_bucket(bufmgr_gem, size + size * 2 / 4); 2050 add_bucket(bufmgr_gem, size + size * 3 / 4); 2051 } 2052} 2053 2054/** 2055 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 2056 * and manage map buffer objections. 2057 * 2058 * \param fd File descriptor of the opened DRM device. 2059 */ 2060drm_intel_bufmgr * 2061drm_intel_bufmgr_gem_init(int fd, int batch_size) 2062{ 2063 drm_intel_bufmgr_gem *bufmgr_gem; 2064 struct drm_i915_gem_get_aperture aperture; 2065 drm_i915_getparam_t gp; 2066 int ret; 2067 int exec2 = 0, has_bsd = 0; 2068 2069 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 2070 if (bufmgr_gem == NULL) 2071 return NULL; 2072 2073 bufmgr_gem->fd = fd; 2074 2075 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 2076 free(bufmgr_gem); 2077 return NULL; 2078 } 2079 2080 ret = drmIoctl(bufmgr_gem->fd, 2081 DRM_IOCTL_I915_GEM_GET_APERTURE, 2082 &aperture); 2083 2084 if (ret == 0) 2085 bufmgr_gem->gtt_size = aperture.aper_available_size; 2086 else { 2087 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 2088 strerror(errno)); 2089 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 2090 fprintf(stderr, "Assuming %dkB available aperture size.\n" 2091 "May lead to reduced performance or incorrect " 2092 "rendering.\n", 2093 (int)bufmgr_gem->gtt_size / 1024); 2094 } 2095 2096 gp.param = I915_PARAM_CHIPSET_ID; 2097 gp.value = &bufmgr_gem->pci_device; 2098 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2099 if (ret) { 2100 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 2101 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 2102 } 2103 2104 if (IS_GEN2(bufmgr_gem)) 2105 bufmgr_gem->gen = 2; 2106 else if (IS_GEN3(bufmgr_gem)) 2107 bufmgr_gem->gen = 3; 2108 else if (IS_GEN4(bufmgr_gem)) 2109 bufmgr_gem->gen = 4; 2110 else 2111 bufmgr_gem->gen = 6; 2112 2113 gp.param = I915_PARAM_HAS_EXECBUF2; 2114 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2115 if (!ret) 2116 exec2 = 1; 2117 2118 gp.param = I915_PARAM_HAS_BSD; 2119 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2120 if (!ret) 2121 has_bsd = 1; 2122 2123 if (bufmgr_gem->gen < 4) { 2124 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 2125 gp.value = &bufmgr_gem->available_fences; 2126 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2127 if (ret) { 2128 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 2129 errno); 2130 fprintf(stderr, "param: %d, val: %d\n", gp.param, 2131 *gp.value); 2132 bufmgr_gem->available_fences = 0; 2133 } else { 2134 /* XXX The kernel reports the total number of fences, 2135 * including any that may be pinned. 2136 * 2137 * We presume that there will be at least one pinned 2138 * fence for the scanout buffer, but there may be more 2139 * than one scanout and the user may be manually 2140 * pinning buffers. Let's move to execbuffer2 and 2141 * thereby forget the insanity of using fences... 2142 */ 2143 bufmgr_gem->available_fences -= 2; 2144 if (bufmgr_gem->available_fences < 0) 2145 bufmgr_gem->available_fences = 0; 2146 } 2147 } 2148 2149 /* Let's go with one relocation per every 2 dwords (but round down a bit 2150 * since a power of two will mean an extra page allocation for the reloc 2151 * buffer). 2152 * 2153 * Every 4 was too few for the blender benchmark. 2154 */ 2155 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 2156 2157 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 2158 bufmgr_gem->bufmgr.bo_alloc_for_render = 2159 drm_intel_gem_bo_alloc_for_render; 2160 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 2161 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 2162 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 2163 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 2164 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 2165 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 2166 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 2167 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 2168 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 2169 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 2170 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 2171 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 2172 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 2173 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 2174 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 2175 /* Use the new one if available */ 2176 if (exec2) { 2177 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 2178 if (has_bsd) 2179 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 2180 } else 2181 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 2182 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 2183 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 2184 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 2185 bufmgr_gem->bufmgr.debug = 0; 2186 bufmgr_gem->bufmgr.check_aperture_space = 2187 drm_intel_gem_check_aperture_space; 2188 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 2189 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 2190 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 2191 drm_intel_gem_get_pipe_from_crtc_id; 2192 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 2193 2194 init_cache_buckets(bufmgr_gem); 2195 2196 return &bufmgr_gem->bufmgr; 2197} 2198