intel_bufmgr_gem.c revision aec75c42
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007-2012 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <stddef.h> 51#include <sys/ioctl.h> 52#include <sys/mman.h> 53#include <sys/stat.h> 54#include <sys/types.h> 55#include <stdbool.h> 56 57#include "errno.h" 58#ifndef ETIME 59#define ETIME ETIMEDOUT 60#endif 61#include "libdrm_lists.h" 62#include "intel_bufmgr.h" 63#include "intel_bufmgr_priv.h" 64#include "intel_chipset.h" 65#include "intel_aub.h" 66#include "string.h" 67 68#include "i915_drm.h" 69 70#ifdef HAVE_VALGRIND 71#include <valgrind.h> 72#include <memcheck.h> 73#define VG(x) x 74#else 75#define VG(x) 76#endif 77 78#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) 79 80#define DBG(...) do { \ 81 if (bufmgr_gem->bufmgr.debug) \ 82 fprintf(stderr, __VA_ARGS__); \ 83} while (0) 84 85#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 86 87typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 88 89struct drm_intel_gem_bo_bucket { 90 drmMMListHead head; 91 unsigned long size; 92}; 93 94typedef struct _drm_intel_bufmgr_gem { 95 drm_intel_bufmgr bufmgr; 96 97 int fd; 98 99 int max_relocs; 100 101 pthread_mutex_t lock; 102 103 struct drm_i915_gem_exec_object *exec_objects; 104 struct drm_i915_gem_exec_object2 *exec2_objects; 105 drm_intel_bo **exec_bos; 106 int exec_size; 107 int exec_count; 108 109 /** Array of lists of cached gem objects of power-of-two sizes */ 110 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 111 int num_buckets; 112 time_t time; 113 114 drmMMListHead named; 115 drmMMListHead vma_cache; 116 int vma_count, vma_open, vma_max; 117 118 uint64_t gtt_size; 119 int available_fences; 120 int pci_device; 121 int gen; 122 unsigned int has_bsd : 1; 123 unsigned int has_blt : 1; 124 unsigned int has_relaxed_fencing : 1; 125 unsigned int has_llc : 1; 126 unsigned int has_wait_timeout : 1; 127 unsigned int bo_reuse : 1; 128 unsigned int no_exec : 1; 129 unsigned int has_vebox : 1; 130 bool fenced_relocs; 131 132 char *aub_filename; 133 FILE *aub_file; 134 uint32_t aub_offset; 135} drm_intel_bufmgr_gem; 136 137#define DRM_INTEL_RELOC_FENCE (1<<0) 138 139typedef struct _drm_intel_reloc_target_info { 140 drm_intel_bo *bo; 141 int flags; 142} drm_intel_reloc_target; 143 144struct _drm_intel_bo_gem { 145 drm_intel_bo bo; 146 147 atomic_t refcount; 148 uint32_t gem_handle; 149 const char *name; 150 151 /** 152 * Kenel-assigned global name for this object 153 * 154 * List contains both flink named and prime fd'd objects 155 */ 156 unsigned int global_name; 157 drmMMListHead name_list; 158 159 /** 160 * Index of the buffer within the validation list while preparing a 161 * batchbuffer execution. 162 */ 163 int validate_index; 164 165 /** 166 * Current tiling mode 167 */ 168 uint32_t tiling_mode; 169 uint32_t swizzle_mode; 170 unsigned long stride; 171 172 time_t free_time; 173 174 /** Array passed to the DRM containing relocation information. */ 175 struct drm_i915_gem_relocation_entry *relocs; 176 /** 177 * Array of info structs corresponding to relocs[i].target_handle etc 178 */ 179 drm_intel_reloc_target *reloc_target_info; 180 /** Number of entries in relocs */ 181 int reloc_count; 182 /** Mapped address for the buffer, saved across map/unmap cycles */ 183 void *mem_virtual; 184 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 185 void *gtt_virtual; 186 int map_count; 187 drmMMListHead vma_list; 188 189 /** BO cache list */ 190 drmMMListHead head; 191 192 /** 193 * Boolean of whether this BO and its children have been included in 194 * the current drm_intel_bufmgr_check_aperture_space() total. 195 */ 196 bool included_in_check_aperture; 197 198 /** 199 * Boolean of whether this buffer has been used as a relocation 200 * target and had its size accounted for, and thus can't have any 201 * further relocations added to it. 202 */ 203 bool used_as_reloc_target; 204 205 /** 206 * Boolean of whether we have encountered an error whilst building the relocation tree. 207 */ 208 bool has_error; 209 210 /** 211 * Boolean of whether this buffer can be re-used 212 */ 213 bool reusable; 214 215 /** 216 * Boolean of whether the GPU is definitely not accessing the buffer. 217 * 218 * This is only valid when reusable, since non-reusable 219 * buffers are those that have been shared wth other 220 * processes, so we don't know their state. 221 */ 222 bool idle; 223 224 /** 225 * Size in bytes of this buffer and its relocation descendents. 226 * 227 * Used to avoid costly tree walking in 228 * drm_intel_bufmgr_check_aperture in the common case. 229 */ 230 int reloc_tree_size; 231 232 /** 233 * Number of potential fence registers required by this buffer and its 234 * relocations. 235 */ 236 int reloc_tree_fences; 237 238 /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ 239 bool mapped_cpu_write; 240 241 uint32_t aub_offset; 242 243 drm_intel_aub_annotation *aub_annotations; 244 unsigned aub_annotation_count; 245}; 246 247static unsigned int 248drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 249 250static unsigned int 251drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 252 253static int 254drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 255 uint32_t * swizzle_mode); 256 257static int 258drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 259 uint32_t tiling_mode, 260 uint32_t stride); 261 262static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 263 time_t time); 264 265static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 266 267static void drm_intel_gem_bo_free(drm_intel_bo *bo); 268 269static unsigned long 270drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 271 uint32_t *tiling_mode) 272{ 273 unsigned long min_size, max_size; 274 unsigned long i; 275 276 if (*tiling_mode == I915_TILING_NONE) 277 return size; 278 279 /* 965+ just need multiples of page size for tiling */ 280 if (bufmgr_gem->gen >= 4) 281 return ROUND_UP_TO(size, 4096); 282 283 /* Older chips need powers of two, of at least 512k or 1M */ 284 if (bufmgr_gem->gen == 3) { 285 min_size = 1024*1024; 286 max_size = 128*1024*1024; 287 } else { 288 min_size = 512*1024; 289 max_size = 64*1024*1024; 290 } 291 292 if (size > max_size) { 293 *tiling_mode = I915_TILING_NONE; 294 return size; 295 } 296 297 /* Do we need to allocate every page for the fence? */ 298 if (bufmgr_gem->has_relaxed_fencing) 299 return ROUND_UP_TO(size, 4096); 300 301 for (i = min_size; i < size; i <<= 1) 302 ; 303 304 return i; 305} 306 307/* 308 * Round a given pitch up to the minimum required for X tiling on a 309 * given chip. We use 512 as the minimum to allow for a later tiling 310 * change. 311 */ 312static unsigned long 313drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 314 unsigned long pitch, uint32_t *tiling_mode) 315{ 316 unsigned long tile_width; 317 unsigned long i; 318 319 /* If untiled, then just align it so that we can do rendering 320 * to it with the 3D engine. 321 */ 322 if (*tiling_mode == I915_TILING_NONE) 323 return ALIGN(pitch, 64); 324 325 if (*tiling_mode == I915_TILING_X 326 || (IS_915(bufmgr_gem->pci_device) 327 && *tiling_mode == I915_TILING_Y)) 328 tile_width = 512; 329 else 330 tile_width = 128; 331 332 /* 965 is flexible */ 333 if (bufmgr_gem->gen >= 4) 334 return ROUND_UP_TO(pitch, tile_width); 335 336 /* The older hardware has a maximum pitch of 8192 with tiled 337 * surfaces, so fallback to untiled if it's too large. 338 */ 339 if (pitch > 8192) { 340 *tiling_mode = I915_TILING_NONE; 341 return ALIGN(pitch, 64); 342 } 343 344 /* Pre-965 needs power of two tile width */ 345 for (i = tile_width; i < pitch; i <<= 1) 346 ; 347 348 return i; 349} 350 351static struct drm_intel_gem_bo_bucket * 352drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 353 unsigned long size) 354{ 355 int i; 356 357 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 358 struct drm_intel_gem_bo_bucket *bucket = 359 &bufmgr_gem->cache_bucket[i]; 360 if (bucket->size >= size) { 361 return bucket; 362 } 363 } 364 365 return NULL; 366} 367 368static void 369drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 370{ 371 int i, j; 372 373 for (i = 0; i < bufmgr_gem->exec_count; i++) { 374 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 375 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 376 377 if (bo_gem->relocs == NULL) { 378 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 379 bo_gem->name); 380 continue; 381 } 382 383 for (j = 0; j < bo_gem->reloc_count; j++) { 384 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 385 drm_intel_bo_gem *target_gem = 386 (drm_intel_bo_gem *) target_bo; 387 388 DBG("%2d: %d (%s)@0x%08llx -> " 389 "%d (%s)@0x%08lx + 0x%08x\n", 390 i, 391 bo_gem->gem_handle, bo_gem->name, 392 (unsigned long long)bo_gem->relocs[j].offset, 393 target_gem->gem_handle, 394 target_gem->name, 395 target_bo->offset64, 396 bo_gem->relocs[j].delta); 397 } 398 } 399} 400 401static inline void 402drm_intel_gem_bo_reference(drm_intel_bo *bo) 403{ 404 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 405 406 atomic_inc(&bo_gem->refcount); 407} 408 409/** 410 * Adds the given buffer to the list of buffers to be validated (moved into the 411 * appropriate memory type) with the next batch submission. 412 * 413 * If a buffer is validated multiple times in a batch submission, it ends up 414 * with the intersection of the memory type flags and the union of the 415 * access flags. 416 */ 417static void 418drm_intel_add_validate_buffer(drm_intel_bo *bo) 419{ 420 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 421 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 422 int index; 423 424 if (bo_gem->validate_index != -1) 425 return; 426 427 /* Extend the array of validation entries as necessary. */ 428 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 429 int new_size = bufmgr_gem->exec_size * 2; 430 431 if (new_size == 0) 432 new_size = 5; 433 434 bufmgr_gem->exec_objects = 435 realloc(bufmgr_gem->exec_objects, 436 sizeof(*bufmgr_gem->exec_objects) * new_size); 437 bufmgr_gem->exec_bos = 438 realloc(bufmgr_gem->exec_bos, 439 sizeof(*bufmgr_gem->exec_bos) * new_size); 440 bufmgr_gem->exec_size = new_size; 441 } 442 443 index = bufmgr_gem->exec_count; 444 bo_gem->validate_index = index; 445 /* Fill in array entry */ 446 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 447 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 448 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 449 bufmgr_gem->exec_objects[index].alignment = 0; 450 bufmgr_gem->exec_objects[index].offset = 0; 451 bufmgr_gem->exec_bos[index] = bo; 452 bufmgr_gem->exec_count++; 453} 454 455static void 456drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 457{ 458 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 459 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 460 int index; 461 462 if (bo_gem->validate_index != -1) { 463 if (need_fence) 464 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 465 EXEC_OBJECT_NEEDS_FENCE; 466 return; 467 } 468 469 /* Extend the array of validation entries as necessary. */ 470 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 471 int new_size = bufmgr_gem->exec_size * 2; 472 473 if (new_size == 0) 474 new_size = 5; 475 476 bufmgr_gem->exec2_objects = 477 realloc(bufmgr_gem->exec2_objects, 478 sizeof(*bufmgr_gem->exec2_objects) * new_size); 479 bufmgr_gem->exec_bos = 480 realloc(bufmgr_gem->exec_bos, 481 sizeof(*bufmgr_gem->exec_bos) * new_size); 482 bufmgr_gem->exec_size = new_size; 483 } 484 485 index = bufmgr_gem->exec_count; 486 bo_gem->validate_index = index; 487 /* Fill in array entry */ 488 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 489 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 490 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 491 bufmgr_gem->exec2_objects[index].alignment = 0; 492 bufmgr_gem->exec2_objects[index].offset = 0; 493 bufmgr_gem->exec_bos[index] = bo; 494 bufmgr_gem->exec2_objects[index].flags = 0; 495 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 496 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 497 if (need_fence) { 498 bufmgr_gem->exec2_objects[index].flags |= 499 EXEC_OBJECT_NEEDS_FENCE; 500 } 501 bufmgr_gem->exec_count++; 502} 503 504#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 505 sizeof(uint32_t)) 506 507static void 508drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 509 drm_intel_bo_gem *bo_gem) 510{ 511 int size; 512 513 assert(!bo_gem->used_as_reloc_target); 514 515 /* The older chipsets are far-less flexible in terms of tiling, 516 * and require tiled buffer to be size aligned in the aperture. 517 * This means that in the worst possible case we will need a hole 518 * twice as large as the object in order for it to fit into the 519 * aperture. Optimal packing is for wimps. 520 */ 521 size = bo_gem->bo.size; 522 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 523 int min_size; 524 525 if (bufmgr_gem->has_relaxed_fencing) { 526 if (bufmgr_gem->gen == 3) 527 min_size = 1024*1024; 528 else 529 min_size = 512*1024; 530 531 while (min_size < size) 532 min_size *= 2; 533 } else 534 min_size = size; 535 536 /* Account for worst-case alignment. */ 537 size = 2 * min_size; 538 } 539 540 bo_gem->reloc_tree_size = size; 541} 542 543static int 544drm_intel_setup_reloc_list(drm_intel_bo *bo) 545{ 546 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 547 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 548 unsigned int max_relocs = bufmgr_gem->max_relocs; 549 550 if (bo->size / 4 < max_relocs) 551 max_relocs = bo->size / 4; 552 553 bo_gem->relocs = malloc(max_relocs * 554 sizeof(struct drm_i915_gem_relocation_entry)); 555 bo_gem->reloc_target_info = malloc(max_relocs * 556 sizeof(drm_intel_reloc_target)); 557 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 558 bo_gem->has_error = true; 559 560 free (bo_gem->relocs); 561 bo_gem->relocs = NULL; 562 563 free (bo_gem->reloc_target_info); 564 bo_gem->reloc_target_info = NULL; 565 566 return 1; 567 } 568 569 return 0; 570} 571 572static int 573drm_intel_gem_bo_busy(drm_intel_bo *bo) 574{ 575 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 576 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 577 struct drm_i915_gem_busy busy; 578 int ret; 579 580 if (bo_gem->reusable && bo_gem->idle) 581 return false; 582 583 VG_CLEAR(busy); 584 busy.handle = bo_gem->gem_handle; 585 586 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 587 if (ret == 0) { 588 bo_gem->idle = !busy.busy; 589 return busy.busy; 590 } else { 591 return false; 592 } 593 return (ret == 0 && busy.busy); 594} 595 596static int 597drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 598 drm_intel_bo_gem *bo_gem, int state) 599{ 600 struct drm_i915_gem_madvise madv; 601 602 VG_CLEAR(madv); 603 madv.handle = bo_gem->gem_handle; 604 madv.madv = state; 605 madv.retained = 1; 606 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 607 608 return madv.retained; 609} 610 611static int 612drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 613{ 614 return drm_intel_gem_bo_madvise_internal 615 ((drm_intel_bufmgr_gem *) bo->bufmgr, 616 (drm_intel_bo_gem *) bo, 617 madv); 618} 619 620/* drop the oldest entries that have been purged by the kernel */ 621static void 622drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 623 struct drm_intel_gem_bo_bucket *bucket) 624{ 625 while (!DRMLISTEMPTY(&bucket->head)) { 626 drm_intel_bo_gem *bo_gem; 627 628 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 629 bucket->head.next, head); 630 if (drm_intel_gem_bo_madvise_internal 631 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 632 break; 633 634 DRMLISTDEL(&bo_gem->head); 635 drm_intel_gem_bo_free(&bo_gem->bo); 636 } 637} 638 639static drm_intel_bo * 640drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 641 const char *name, 642 unsigned long size, 643 unsigned long flags, 644 uint32_t tiling_mode, 645 unsigned long stride) 646{ 647 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 648 drm_intel_bo_gem *bo_gem; 649 unsigned int page_size = getpagesize(); 650 int ret; 651 struct drm_intel_gem_bo_bucket *bucket; 652 bool alloc_from_cache; 653 unsigned long bo_size; 654 bool for_render = false; 655 656 if (flags & BO_ALLOC_FOR_RENDER) 657 for_render = true; 658 659 /* Round the allocated size up to a power of two number of pages. */ 660 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 661 662 /* If we don't have caching at this size, don't actually round the 663 * allocation up. 664 */ 665 if (bucket == NULL) { 666 bo_size = size; 667 if (bo_size < page_size) 668 bo_size = page_size; 669 } else { 670 bo_size = bucket->size; 671 } 672 673 pthread_mutex_lock(&bufmgr_gem->lock); 674 /* Get a buffer out of the cache if available */ 675retry: 676 alloc_from_cache = false; 677 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 678 if (for_render) { 679 /* Allocate new render-target BOs from the tail (MRU) 680 * of the list, as it will likely be hot in the GPU 681 * cache and in the aperture for us. 682 */ 683 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 684 bucket->head.prev, head); 685 DRMLISTDEL(&bo_gem->head); 686 alloc_from_cache = true; 687 } else { 688 /* For non-render-target BOs (where we're probably 689 * going to map it first thing in order to fill it 690 * with data), check if the last BO in the cache is 691 * unbusy, and only reuse in that case. Otherwise, 692 * allocating a new buffer is probably faster than 693 * waiting for the GPU to finish. 694 */ 695 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 696 bucket->head.next, head); 697 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 698 alloc_from_cache = true; 699 DRMLISTDEL(&bo_gem->head); 700 } 701 } 702 703 if (alloc_from_cache) { 704 if (!drm_intel_gem_bo_madvise_internal 705 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 706 drm_intel_gem_bo_free(&bo_gem->bo); 707 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 708 bucket); 709 goto retry; 710 } 711 712 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 713 tiling_mode, 714 stride)) { 715 drm_intel_gem_bo_free(&bo_gem->bo); 716 goto retry; 717 } 718 } 719 } 720 pthread_mutex_unlock(&bufmgr_gem->lock); 721 722 if (!alloc_from_cache) { 723 struct drm_i915_gem_create create; 724 725 bo_gem = calloc(1, sizeof(*bo_gem)); 726 if (!bo_gem) 727 return NULL; 728 729 bo_gem->bo.size = bo_size; 730 731 VG_CLEAR(create); 732 create.size = bo_size; 733 734 ret = drmIoctl(bufmgr_gem->fd, 735 DRM_IOCTL_I915_GEM_CREATE, 736 &create); 737 bo_gem->gem_handle = create.handle; 738 bo_gem->bo.handle = bo_gem->gem_handle; 739 if (ret != 0) { 740 free(bo_gem); 741 return NULL; 742 } 743 bo_gem->bo.bufmgr = bufmgr; 744 745 bo_gem->tiling_mode = I915_TILING_NONE; 746 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 747 bo_gem->stride = 0; 748 749 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 750 tiling_mode, 751 stride)) { 752 drm_intel_gem_bo_free(&bo_gem->bo); 753 return NULL; 754 } 755 756 DRMINITLISTHEAD(&bo_gem->name_list); 757 DRMINITLISTHEAD(&bo_gem->vma_list); 758 } 759 760 bo_gem->name = name; 761 atomic_set(&bo_gem->refcount, 1); 762 bo_gem->validate_index = -1; 763 bo_gem->reloc_tree_fences = 0; 764 bo_gem->used_as_reloc_target = false; 765 bo_gem->has_error = false; 766 bo_gem->reusable = true; 767 bo_gem->aub_annotations = NULL; 768 bo_gem->aub_annotation_count = 0; 769 770 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 771 772 DBG("bo_create: buf %d (%s) %ldb\n", 773 bo_gem->gem_handle, bo_gem->name, size); 774 775 return &bo_gem->bo; 776} 777 778static drm_intel_bo * 779drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 780 const char *name, 781 unsigned long size, 782 unsigned int alignment) 783{ 784 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 785 BO_ALLOC_FOR_RENDER, 786 I915_TILING_NONE, 0); 787} 788 789static drm_intel_bo * 790drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 791 const char *name, 792 unsigned long size, 793 unsigned int alignment) 794{ 795 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 796 I915_TILING_NONE, 0); 797} 798 799static drm_intel_bo * 800drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 801 int x, int y, int cpp, uint32_t *tiling_mode, 802 unsigned long *pitch, unsigned long flags) 803{ 804 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 805 unsigned long size, stride; 806 uint32_t tiling; 807 808 do { 809 unsigned long aligned_y, height_alignment; 810 811 tiling = *tiling_mode; 812 813 /* If we're tiled, our allocations are in 8 or 32-row blocks, 814 * so failure to align our height means that we won't allocate 815 * enough pages. 816 * 817 * If we're untiled, we still have to align to 2 rows high 818 * because the data port accesses 2x2 blocks even if the 819 * bottom row isn't to be rendered, so failure to align means 820 * we could walk off the end of the GTT and fault. This is 821 * documented on 965, and may be the case on older chipsets 822 * too so we try to be careful. 823 */ 824 aligned_y = y; 825 height_alignment = 2; 826 827 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 828 height_alignment = 16; 829 else if (tiling == I915_TILING_X 830 || (IS_915(bufmgr_gem->pci_device) 831 && tiling == I915_TILING_Y)) 832 height_alignment = 8; 833 else if (tiling == I915_TILING_Y) 834 height_alignment = 32; 835 aligned_y = ALIGN(y, height_alignment); 836 837 stride = x * cpp; 838 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 839 size = stride * aligned_y; 840 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 841 } while (*tiling_mode != tiling); 842 *pitch = stride; 843 844 if (tiling == I915_TILING_NONE) 845 stride = 0; 846 847 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 848 tiling, stride); 849} 850 851/** 852 * Returns a drm_intel_bo wrapping the given buffer object handle. 853 * 854 * This can be used when one application needs to pass a buffer object 855 * to another. 856 */ 857drm_intel_bo * 858drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 859 const char *name, 860 unsigned int handle) 861{ 862 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 863 drm_intel_bo_gem *bo_gem; 864 int ret; 865 struct drm_gem_open open_arg; 866 struct drm_i915_gem_get_tiling get_tiling; 867 drmMMListHead *list; 868 869 /* At the moment most applications only have a few named bo. 870 * For instance, in a DRI client only the render buffers passed 871 * between X and the client are named. And since X returns the 872 * alternating names for the front/back buffer a linear search 873 * provides a sufficiently fast match. 874 */ 875 for (list = bufmgr_gem->named.next; 876 list != &bufmgr_gem->named; 877 list = list->next) { 878 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 879 if (bo_gem->global_name == handle) { 880 drm_intel_gem_bo_reference(&bo_gem->bo); 881 return &bo_gem->bo; 882 } 883 } 884 885 VG_CLEAR(open_arg); 886 open_arg.name = handle; 887 ret = drmIoctl(bufmgr_gem->fd, 888 DRM_IOCTL_GEM_OPEN, 889 &open_arg); 890 if (ret != 0) { 891 DBG("Couldn't reference %s handle 0x%08x: %s\n", 892 name, handle, strerror(errno)); 893 return NULL; 894 } 895 /* Now see if someone has used a prime handle to get this 896 * object from the kernel before by looking through the list 897 * again for a matching gem_handle 898 */ 899 for (list = bufmgr_gem->named.next; 900 list != &bufmgr_gem->named; 901 list = list->next) { 902 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 903 if (bo_gem->gem_handle == open_arg.handle) { 904 drm_intel_gem_bo_reference(&bo_gem->bo); 905 return &bo_gem->bo; 906 } 907 } 908 909 bo_gem = calloc(1, sizeof(*bo_gem)); 910 if (!bo_gem) 911 return NULL; 912 913 bo_gem->bo.size = open_arg.size; 914 bo_gem->bo.offset = 0; 915 bo_gem->bo.offset64 = 0; 916 bo_gem->bo.virtual = NULL; 917 bo_gem->bo.bufmgr = bufmgr; 918 bo_gem->name = name; 919 atomic_set(&bo_gem->refcount, 1); 920 bo_gem->validate_index = -1; 921 bo_gem->gem_handle = open_arg.handle; 922 bo_gem->bo.handle = open_arg.handle; 923 bo_gem->global_name = handle; 924 bo_gem->reusable = false; 925 926 VG_CLEAR(get_tiling); 927 get_tiling.handle = bo_gem->gem_handle; 928 ret = drmIoctl(bufmgr_gem->fd, 929 DRM_IOCTL_I915_GEM_GET_TILING, 930 &get_tiling); 931 if (ret != 0) { 932 drm_intel_gem_bo_unreference(&bo_gem->bo); 933 return NULL; 934 } 935 bo_gem->tiling_mode = get_tiling.tiling_mode; 936 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 937 /* XXX stride is unknown */ 938 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 939 940 DRMINITLISTHEAD(&bo_gem->vma_list); 941 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 942 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 943 944 return &bo_gem->bo; 945} 946 947static void 948drm_intel_gem_bo_free(drm_intel_bo *bo) 949{ 950 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 951 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 952 struct drm_gem_close close; 953 int ret; 954 955 DRMLISTDEL(&bo_gem->vma_list); 956 if (bo_gem->mem_virtual) { 957 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 958 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 959 bufmgr_gem->vma_count--; 960 } 961 if (bo_gem->gtt_virtual) { 962 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 963 bufmgr_gem->vma_count--; 964 } 965 966 /* Close this object */ 967 VG_CLEAR(close); 968 close.handle = bo_gem->gem_handle; 969 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 970 if (ret != 0) { 971 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 972 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 973 } 974 free(bo_gem->aub_annotations); 975 free(bo); 976} 977 978static void 979drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 980{ 981#if HAVE_VALGRIND 982 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 983 984 if (bo_gem->mem_virtual) 985 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 986 987 if (bo_gem->gtt_virtual) 988 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 989#endif 990} 991 992/** Frees all cached buffers significantly older than @time. */ 993static void 994drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 995{ 996 int i; 997 998 if (bufmgr_gem->time == time) 999 return; 1000 1001 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1002 struct drm_intel_gem_bo_bucket *bucket = 1003 &bufmgr_gem->cache_bucket[i]; 1004 1005 while (!DRMLISTEMPTY(&bucket->head)) { 1006 drm_intel_bo_gem *bo_gem; 1007 1008 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1009 bucket->head.next, head); 1010 if (time - bo_gem->free_time <= 1) 1011 break; 1012 1013 DRMLISTDEL(&bo_gem->head); 1014 1015 drm_intel_gem_bo_free(&bo_gem->bo); 1016 } 1017 } 1018 1019 bufmgr_gem->time = time; 1020} 1021 1022static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 1023{ 1024 int limit; 1025 1026 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 1027 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 1028 1029 if (bufmgr_gem->vma_max < 0) 1030 return; 1031 1032 /* We may need to evict a few entries in order to create new mmaps */ 1033 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 1034 if (limit < 0) 1035 limit = 0; 1036 1037 while (bufmgr_gem->vma_count > limit) { 1038 drm_intel_bo_gem *bo_gem; 1039 1040 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1041 bufmgr_gem->vma_cache.next, 1042 vma_list); 1043 assert(bo_gem->map_count == 0); 1044 DRMLISTDELINIT(&bo_gem->vma_list); 1045 1046 if (bo_gem->mem_virtual) { 1047 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1048 bo_gem->mem_virtual = NULL; 1049 bufmgr_gem->vma_count--; 1050 } 1051 if (bo_gem->gtt_virtual) { 1052 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1053 bo_gem->gtt_virtual = NULL; 1054 bufmgr_gem->vma_count--; 1055 } 1056 } 1057} 1058 1059static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1060 drm_intel_bo_gem *bo_gem) 1061{ 1062 bufmgr_gem->vma_open--; 1063 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 1064 if (bo_gem->mem_virtual) 1065 bufmgr_gem->vma_count++; 1066 if (bo_gem->gtt_virtual) 1067 bufmgr_gem->vma_count++; 1068 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1069} 1070 1071static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1072 drm_intel_bo_gem *bo_gem) 1073{ 1074 bufmgr_gem->vma_open++; 1075 DRMLISTDEL(&bo_gem->vma_list); 1076 if (bo_gem->mem_virtual) 1077 bufmgr_gem->vma_count--; 1078 if (bo_gem->gtt_virtual) 1079 bufmgr_gem->vma_count--; 1080 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1081} 1082 1083static void 1084drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 1085{ 1086 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1087 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1088 struct drm_intel_gem_bo_bucket *bucket; 1089 int i; 1090 1091 /* Unreference all the target buffers */ 1092 for (i = 0; i < bo_gem->reloc_count; i++) { 1093 if (bo_gem->reloc_target_info[i].bo != bo) { 1094 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1095 reloc_target_info[i].bo, 1096 time); 1097 } 1098 } 1099 bo_gem->reloc_count = 0; 1100 bo_gem->used_as_reloc_target = false; 1101 1102 DBG("bo_unreference final: %d (%s)\n", 1103 bo_gem->gem_handle, bo_gem->name); 1104 1105 /* release memory associated with this object */ 1106 if (bo_gem->reloc_target_info) { 1107 free(bo_gem->reloc_target_info); 1108 bo_gem->reloc_target_info = NULL; 1109 } 1110 if (bo_gem->relocs) { 1111 free(bo_gem->relocs); 1112 bo_gem->relocs = NULL; 1113 } 1114 1115 /* Clear any left-over mappings */ 1116 if (bo_gem->map_count) { 1117 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 1118 bo_gem->map_count = 0; 1119 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1120 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1121 } 1122 1123 DRMLISTDEL(&bo_gem->name_list); 1124 1125 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 1126 /* Put the buffer into our internal cache for reuse if we can. */ 1127 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 1128 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 1129 I915_MADV_DONTNEED)) { 1130 bo_gem->free_time = time; 1131 1132 bo_gem->name = NULL; 1133 bo_gem->validate_index = -1; 1134 1135 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 1136 } else { 1137 drm_intel_gem_bo_free(bo); 1138 } 1139} 1140 1141static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 1142 time_t time) 1143{ 1144 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1145 1146 assert(atomic_read(&bo_gem->refcount) > 0); 1147 if (atomic_dec_and_test(&bo_gem->refcount)) 1148 drm_intel_gem_bo_unreference_final(bo, time); 1149} 1150 1151static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 1152{ 1153 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1154 1155 assert(atomic_read(&bo_gem->refcount) > 0); 1156 if (atomic_dec_and_test(&bo_gem->refcount)) { 1157 drm_intel_bufmgr_gem *bufmgr_gem = 1158 (drm_intel_bufmgr_gem *) bo->bufmgr; 1159 struct timespec time; 1160 1161 clock_gettime(CLOCK_MONOTONIC, &time); 1162 1163 pthread_mutex_lock(&bufmgr_gem->lock); 1164 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1165 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1166 pthread_mutex_unlock(&bufmgr_gem->lock); 1167 } 1168} 1169 1170static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1171{ 1172 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1173 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1174 struct drm_i915_gem_set_domain set_domain; 1175 int ret; 1176 1177 pthread_mutex_lock(&bufmgr_gem->lock); 1178 1179 if (bo_gem->map_count++ == 0) 1180 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1181 1182 if (!bo_gem->mem_virtual) { 1183 struct drm_i915_gem_mmap mmap_arg; 1184 1185 DBG("bo_map: %d (%s), map_count=%d\n", 1186 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1187 1188 VG_CLEAR(mmap_arg); 1189 mmap_arg.handle = bo_gem->gem_handle; 1190 mmap_arg.offset = 0; 1191 mmap_arg.size = bo->size; 1192 ret = drmIoctl(bufmgr_gem->fd, 1193 DRM_IOCTL_I915_GEM_MMAP, 1194 &mmap_arg); 1195 if (ret != 0) { 1196 ret = -errno; 1197 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1198 __FILE__, __LINE__, bo_gem->gem_handle, 1199 bo_gem->name, strerror(errno)); 1200 if (--bo_gem->map_count == 0) 1201 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1202 pthread_mutex_unlock(&bufmgr_gem->lock); 1203 return ret; 1204 } 1205 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 1206 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1207 } 1208 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1209 bo_gem->mem_virtual); 1210 bo->virtual = bo_gem->mem_virtual; 1211 1212 VG_CLEAR(set_domain); 1213 set_domain.handle = bo_gem->gem_handle; 1214 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1215 if (write_enable) 1216 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1217 else 1218 set_domain.write_domain = 0; 1219 ret = drmIoctl(bufmgr_gem->fd, 1220 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1221 &set_domain); 1222 if (ret != 0) { 1223 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1224 __FILE__, __LINE__, bo_gem->gem_handle, 1225 strerror(errno)); 1226 } 1227 1228 if (write_enable) 1229 bo_gem->mapped_cpu_write = true; 1230 1231 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1232 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 1233 pthread_mutex_unlock(&bufmgr_gem->lock); 1234 1235 return 0; 1236} 1237 1238static int 1239map_gtt(drm_intel_bo *bo) 1240{ 1241 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1242 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1243 int ret; 1244 1245 if (bo_gem->map_count++ == 0) 1246 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1247 1248 /* Get a mapping of the buffer if we haven't before. */ 1249 if (bo_gem->gtt_virtual == NULL) { 1250 struct drm_i915_gem_mmap_gtt mmap_arg; 1251 1252 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 1253 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1254 1255 VG_CLEAR(mmap_arg); 1256 mmap_arg.handle = bo_gem->gem_handle; 1257 1258 /* Get the fake offset back... */ 1259 ret = drmIoctl(bufmgr_gem->fd, 1260 DRM_IOCTL_I915_GEM_MMAP_GTT, 1261 &mmap_arg); 1262 if (ret != 0) { 1263 ret = -errno; 1264 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1265 __FILE__, __LINE__, 1266 bo_gem->gem_handle, bo_gem->name, 1267 strerror(errno)); 1268 if (--bo_gem->map_count == 0) 1269 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1270 return ret; 1271 } 1272 1273 /* and mmap it */ 1274 ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size, 1275 &bo_gem->gtt_virtual); 1276 if (ret) { 1277 bo_gem->gtt_virtual = NULL; 1278 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1279 __FILE__, __LINE__, 1280 bo_gem->gem_handle, bo_gem->name, 1281 strerror(errno)); 1282 if (--bo_gem->map_count == 0) 1283 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1284 return ret; 1285 } 1286 } 1287 1288 bo->virtual = bo_gem->gtt_virtual; 1289 1290 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1291 bo_gem->gtt_virtual); 1292 1293 return 0; 1294} 1295 1296int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1297{ 1298 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1299 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1300 struct drm_i915_gem_set_domain set_domain; 1301 int ret; 1302 1303 pthread_mutex_lock(&bufmgr_gem->lock); 1304 1305 ret = map_gtt(bo); 1306 if (ret) { 1307 pthread_mutex_unlock(&bufmgr_gem->lock); 1308 return ret; 1309 } 1310 1311 /* Now move it to the GTT domain so that the GPU and CPU 1312 * caches are flushed and the GPU isn't actively using the 1313 * buffer. 1314 * 1315 * The pagefault handler does this domain change for us when 1316 * it has unbound the BO from the GTT, but it's up to us to 1317 * tell it when we're about to use things if we had done 1318 * rendering and it still happens to be bound to the GTT. 1319 */ 1320 VG_CLEAR(set_domain); 1321 set_domain.handle = bo_gem->gem_handle; 1322 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1323 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1324 ret = drmIoctl(bufmgr_gem->fd, 1325 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1326 &set_domain); 1327 if (ret != 0) { 1328 DBG("%s:%d: Error setting domain %d: %s\n", 1329 __FILE__, __LINE__, bo_gem->gem_handle, 1330 strerror(errno)); 1331 } 1332 1333 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1334 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1335 pthread_mutex_unlock(&bufmgr_gem->lock); 1336 1337 return 0; 1338} 1339 1340/** 1341 * Performs a mapping of the buffer object like the normal GTT 1342 * mapping, but avoids waiting for the GPU to be done reading from or 1343 * rendering to the buffer. 1344 * 1345 * This is used in the implementation of GL_ARB_map_buffer_range: The 1346 * user asks to create a buffer, then does a mapping, fills some 1347 * space, runs a drawing command, then asks to map it again without 1348 * synchronizing because it guarantees that it won't write over the 1349 * data that the GPU is busy using (or, more specifically, that if it 1350 * does write over the data, it acknowledges that rendering is 1351 * undefined). 1352 */ 1353 1354int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 1355{ 1356 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1357#ifdef HAVE_VALGRIND 1358 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1359#endif 1360 int ret; 1361 1362 /* If the CPU cache isn't coherent with the GTT, then use a 1363 * regular synchronized mapping. The problem is that we don't 1364 * track where the buffer was last used on the CPU side in 1365 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 1366 * we would potentially corrupt the buffer even when the user 1367 * does reasonable things. 1368 */ 1369 if (!bufmgr_gem->has_llc) 1370 return drm_intel_gem_bo_map_gtt(bo); 1371 1372 pthread_mutex_lock(&bufmgr_gem->lock); 1373 1374 ret = map_gtt(bo); 1375 if (ret == 0) { 1376 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1377 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1378 } 1379 1380 pthread_mutex_unlock(&bufmgr_gem->lock); 1381 1382 return ret; 1383} 1384 1385static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1386{ 1387 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1388 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1389 int ret = 0; 1390 1391 if (bo == NULL) 1392 return 0; 1393 1394 pthread_mutex_lock(&bufmgr_gem->lock); 1395 1396 if (bo_gem->map_count <= 0) { 1397 DBG("attempted to unmap an unmapped bo\n"); 1398 pthread_mutex_unlock(&bufmgr_gem->lock); 1399 /* Preserve the old behaviour of just treating this as a 1400 * no-op rather than reporting the error. 1401 */ 1402 return 0; 1403 } 1404 1405 if (bo_gem->mapped_cpu_write) { 1406 struct drm_i915_gem_sw_finish sw_finish; 1407 1408 /* Cause a flush to happen if the buffer's pinned for 1409 * scanout, so the results show up in a timely manner. 1410 * Unlike GTT set domains, this only does work if the 1411 * buffer should be scanout-related. 1412 */ 1413 VG_CLEAR(sw_finish); 1414 sw_finish.handle = bo_gem->gem_handle; 1415 ret = drmIoctl(bufmgr_gem->fd, 1416 DRM_IOCTL_I915_GEM_SW_FINISH, 1417 &sw_finish); 1418 ret = ret == -1 ? -errno : 0; 1419 1420 bo_gem->mapped_cpu_write = false; 1421 } 1422 1423 /* We need to unmap after every innovation as we cannot track 1424 * an open vma for every bo as that will exhaasut the system 1425 * limits and cause later failures. 1426 */ 1427 if (--bo_gem->map_count == 0) { 1428 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1429 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1430 bo->virtual = NULL; 1431 } 1432 pthread_mutex_unlock(&bufmgr_gem->lock); 1433 1434 return ret; 1435} 1436 1437int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1438{ 1439 return drm_intel_gem_bo_unmap(bo); 1440} 1441 1442static int 1443drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1444 unsigned long size, const void *data) 1445{ 1446 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1447 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1448 struct drm_i915_gem_pwrite pwrite; 1449 int ret; 1450 1451 VG_CLEAR(pwrite); 1452 pwrite.handle = bo_gem->gem_handle; 1453 pwrite.offset = offset; 1454 pwrite.size = size; 1455 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1456 ret = drmIoctl(bufmgr_gem->fd, 1457 DRM_IOCTL_I915_GEM_PWRITE, 1458 &pwrite); 1459 if (ret != 0) { 1460 ret = -errno; 1461 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1462 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1463 (int)size, strerror(errno)); 1464 } 1465 1466 return ret; 1467} 1468 1469static int 1470drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1471{ 1472 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1473 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1474 int ret; 1475 1476 VG_CLEAR(get_pipe_from_crtc_id); 1477 get_pipe_from_crtc_id.crtc_id = crtc_id; 1478 ret = drmIoctl(bufmgr_gem->fd, 1479 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1480 &get_pipe_from_crtc_id); 1481 if (ret != 0) { 1482 /* We return -1 here to signal that we don't 1483 * know which pipe is associated with this crtc. 1484 * This lets the caller know that this information 1485 * isn't available; using the wrong pipe for 1486 * vblank waiting can cause the chipset to lock up 1487 */ 1488 return -1; 1489 } 1490 1491 return get_pipe_from_crtc_id.pipe; 1492} 1493 1494static int 1495drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1496 unsigned long size, void *data) 1497{ 1498 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1499 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1500 struct drm_i915_gem_pread pread; 1501 int ret; 1502 1503 VG_CLEAR(pread); 1504 pread.handle = bo_gem->gem_handle; 1505 pread.offset = offset; 1506 pread.size = size; 1507 pread.data_ptr = (uint64_t) (uintptr_t) data; 1508 ret = drmIoctl(bufmgr_gem->fd, 1509 DRM_IOCTL_I915_GEM_PREAD, 1510 &pread); 1511 if (ret != 0) { 1512 ret = -errno; 1513 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1514 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1515 (int)size, strerror(errno)); 1516 } 1517 1518 return ret; 1519} 1520 1521/** Waits for all GPU rendering with the object to have completed. */ 1522static void 1523drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1524{ 1525 drm_intel_gem_bo_start_gtt_access(bo, 1); 1526} 1527 1528/** 1529 * Waits on a BO for the given amount of time. 1530 * 1531 * @bo: buffer object to wait for 1532 * @timeout_ns: amount of time to wait in nanoseconds. 1533 * If value is less than 0, an infinite wait will occur. 1534 * 1535 * Returns 0 if the wait was successful ie. the last batch referencing the 1536 * object has completed within the allotted time. Otherwise some negative return 1537 * value describes the error. Of particular interest is -ETIME when the wait has 1538 * failed to yield the desired result. 1539 * 1540 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 1541 * the operation to give up after a certain amount of time. Another subtle 1542 * difference is the internal locking semantics are different (this variant does 1543 * not hold the lock for the duration of the wait). This makes the wait subject 1544 * to a larger userspace race window. 1545 * 1546 * The implementation shall wait until the object is no longer actively 1547 * referenced within a batch buffer at the time of the call. The wait will 1548 * not guarantee that the buffer is re-issued via another thread, or an flinked 1549 * handle. Userspace must make sure this race does not occur if such precision 1550 * is important. 1551 */ 1552int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 1553{ 1554 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1555 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1556 struct drm_i915_gem_wait wait; 1557 int ret; 1558 1559 if (!bufmgr_gem->has_wait_timeout) { 1560 DBG("%s:%d: Timed wait is not supported. Falling back to " 1561 "infinite wait\n", __FILE__, __LINE__); 1562 if (timeout_ns) { 1563 drm_intel_gem_bo_wait_rendering(bo); 1564 return 0; 1565 } else { 1566 return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 1567 } 1568 } 1569 1570 wait.bo_handle = bo_gem->gem_handle; 1571 wait.timeout_ns = timeout_ns; 1572 wait.flags = 0; 1573 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1574 if (ret == -1) 1575 return -errno; 1576 1577 return ret; 1578} 1579 1580/** 1581 * Sets the object to the GTT read and possibly write domain, used by the X 1582 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1583 * 1584 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1585 * can do tiled pixmaps this way. 1586 */ 1587void 1588drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1589{ 1590 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1591 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1592 struct drm_i915_gem_set_domain set_domain; 1593 int ret; 1594 1595 VG_CLEAR(set_domain); 1596 set_domain.handle = bo_gem->gem_handle; 1597 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1598 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1599 ret = drmIoctl(bufmgr_gem->fd, 1600 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1601 &set_domain); 1602 if (ret != 0) { 1603 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1604 __FILE__, __LINE__, bo_gem->gem_handle, 1605 set_domain.read_domains, set_domain.write_domain, 1606 strerror(errno)); 1607 } 1608} 1609 1610static void 1611drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1612{ 1613 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1614 int i; 1615 1616 free(bufmgr_gem->exec2_objects); 1617 free(bufmgr_gem->exec_objects); 1618 free(bufmgr_gem->exec_bos); 1619 free(bufmgr_gem->aub_filename); 1620 1621 pthread_mutex_destroy(&bufmgr_gem->lock); 1622 1623 /* Free any cached buffer objects we were going to reuse */ 1624 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1625 struct drm_intel_gem_bo_bucket *bucket = 1626 &bufmgr_gem->cache_bucket[i]; 1627 drm_intel_bo_gem *bo_gem; 1628 1629 while (!DRMLISTEMPTY(&bucket->head)) { 1630 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1631 bucket->head.next, head); 1632 DRMLISTDEL(&bo_gem->head); 1633 1634 drm_intel_gem_bo_free(&bo_gem->bo); 1635 } 1636 } 1637 1638 free(bufmgr); 1639} 1640 1641/** 1642 * Adds the target buffer to the validation list and adds the relocation 1643 * to the reloc_buffer's relocation list. 1644 * 1645 * The relocation entry at the given offset must already contain the 1646 * precomputed relocation value, because the kernel will optimize out 1647 * the relocation entry write when the buffer hasn't moved from the 1648 * last known offset in target_bo. 1649 */ 1650static int 1651do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1652 drm_intel_bo *target_bo, uint32_t target_offset, 1653 uint32_t read_domains, uint32_t write_domain, 1654 bool need_fence) 1655{ 1656 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1657 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1658 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1659 bool fenced_command; 1660 1661 if (bo_gem->has_error) 1662 return -ENOMEM; 1663 1664 if (target_bo_gem->has_error) { 1665 bo_gem->has_error = true; 1666 return -ENOMEM; 1667 } 1668 1669 /* We never use HW fences for rendering on 965+ */ 1670 if (bufmgr_gem->gen >= 4) 1671 need_fence = false; 1672 1673 fenced_command = need_fence; 1674 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1675 need_fence = false; 1676 1677 /* Create a new relocation list if needed */ 1678 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1679 return -ENOMEM; 1680 1681 /* Check overflow */ 1682 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1683 1684 /* Check args */ 1685 assert(offset <= bo->size - 4); 1686 assert((write_domain & (write_domain - 1)) == 0); 1687 1688 /* Make sure that we're not adding a reloc to something whose size has 1689 * already been accounted for. 1690 */ 1691 assert(!bo_gem->used_as_reloc_target); 1692 if (target_bo_gem != bo_gem) { 1693 target_bo_gem->used_as_reloc_target = true; 1694 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1695 } 1696 /* An object needing a fence is a tiled buffer, so it won't have 1697 * relocs to other buffers. 1698 */ 1699 if (need_fence) 1700 target_bo_gem->reloc_tree_fences = 1; 1701 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1702 1703 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1704 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1705 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1706 target_bo_gem->gem_handle; 1707 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1708 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1709 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 1710 1711 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1712 if (target_bo != bo) 1713 drm_intel_gem_bo_reference(target_bo); 1714 if (fenced_command) 1715 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1716 DRM_INTEL_RELOC_FENCE; 1717 else 1718 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1719 1720 bo_gem->reloc_count++; 1721 1722 return 0; 1723} 1724 1725static int 1726drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1727 drm_intel_bo *target_bo, uint32_t target_offset, 1728 uint32_t read_domains, uint32_t write_domain) 1729{ 1730 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1731 1732 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1733 read_domains, write_domain, 1734 !bufmgr_gem->fenced_relocs); 1735} 1736 1737static int 1738drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1739 drm_intel_bo *target_bo, 1740 uint32_t target_offset, 1741 uint32_t read_domains, uint32_t write_domain) 1742{ 1743 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1744 read_domains, write_domain, true); 1745} 1746 1747int 1748drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 1749{ 1750 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1751 1752 return bo_gem->reloc_count; 1753} 1754 1755/** 1756 * Removes existing relocation entries in the BO after "start". 1757 * 1758 * This allows a user to avoid a two-step process for state setup with 1759 * counting up all the buffer objects and doing a 1760 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 1761 * relocations for the state setup. Instead, save the state of the 1762 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 1763 * state, and then check if it still fits in the aperture. 1764 * 1765 * Any further drm_intel_bufmgr_check_aperture_space() queries 1766 * involving this buffer in the tree are undefined after this call. 1767 */ 1768void 1769drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 1770{ 1771 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1772 int i; 1773 struct timespec time; 1774 1775 clock_gettime(CLOCK_MONOTONIC, &time); 1776 1777 assert(bo_gem->reloc_count >= start); 1778 /* Unreference the cleared target buffers */ 1779 for (i = start; i < bo_gem->reloc_count; i++) { 1780 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 1781 if (&target_bo_gem->bo != bo) { 1782 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 1783 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 1784 time.tv_sec); 1785 } 1786 } 1787 bo_gem->reloc_count = start; 1788} 1789 1790/** 1791 * Walk the tree of relocations rooted at BO and accumulate the list of 1792 * validations to be performed and update the relocation buffers with 1793 * index values into the validation list. 1794 */ 1795static void 1796drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1797{ 1798 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1799 int i; 1800 1801 if (bo_gem->relocs == NULL) 1802 return; 1803 1804 for (i = 0; i < bo_gem->reloc_count; i++) { 1805 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1806 1807 if (target_bo == bo) 1808 continue; 1809 1810 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1811 1812 /* Continue walking the tree depth-first. */ 1813 drm_intel_gem_bo_process_reloc(target_bo); 1814 1815 /* Add the target to the validate list */ 1816 drm_intel_add_validate_buffer(target_bo); 1817 } 1818} 1819 1820static void 1821drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 1822{ 1823 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1824 int i; 1825 1826 if (bo_gem->relocs == NULL) 1827 return; 1828 1829 for (i = 0; i < bo_gem->reloc_count; i++) { 1830 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1831 int need_fence; 1832 1833 if (target_bo == bo) 1834 continue; 1835 1836 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1837 1838 /* Continue walking the tree depth-first. */ 1839 drm_intel_gem_bo_process_reloc2(target_bo); 1840 1841 need_fence = (bo_gem->reloc_target_info[i].flags & 1842 DRM_INTEL_RELOC_FENCE); 1843 1844 /* Add the target to the validate list */ 1845 drm_intel_add_validate_buffer2(target_bo, need_fence); 1846 } 1847} 1848 1849 1850static void 1851drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1852{ 1853 int i; 1854 1855 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1856 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1857 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1858 1859 /* Update the buffer offset */ 1860 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 1861 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1862 bo_gem->gem_handle, bo_gem->name, bo->offset64, 1863 (unsigned long long)bufmgr_gem->exec_objects[i]. 1864 offset); 1865 bo->offset64 = bufmgr_gem->exec_objects[i].offset; 1866 bo->offset = bufmgr_gem->exec_objects[i].offset; 1867 } 1868 } 1869} 1870 1871static void 1872drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 1873{ 1874 int i; 1875 1876 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1877 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1878 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1879 1880 /* Update the buffer offset */ 1881 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 1882 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1883 bo_gem->gem_handle, bo_gem->name, bo->offset64, 1884 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 1885 bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 1886 bo->offset = bufmgr_gem->exec2_objects[i].offset; 1887 } 1888 } 1889} 1890 1891static void 1892aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) 1893{ 1894 fwrite(&data, 1, 4, bufmgr_gem->aub_file); 1895} 1896 1897static void 1898aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) 1899{ 1900 fwrite(data, 1, size, bufmgr_gem->aub_file); 1901} 1902 1903static void 1904aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) 1905{ 1906 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1907 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1908 uint32_t *data; 1909 unsigned int i; 1910 1911 data = malloc(bo->size); 1912 drm_intel_bo_get_subdata(bo, offset, size, data); 1913 1914 /* Easy mode: write out bo with no relocations */ 1915 if (!bo_gem->reloc_count) { 1916 aub_out_data(bufmgr_gem, data, size); 1917 free(data); 1918 return; 1919 } 1920 1921 /* Otherwise, handle the relocations while writing. */ 1922 for (i = 0; i < size / 4; i++) { 1923 int r; 1924 for (r = 0; r < bo_gem->reloc_count; r++) { 1925 struct drm_i915_gem_relocation_entry *reloc; 1926 drm_intel_reloc_target *info; 1927 1928 reloc = &bo_gem->relocs[r]; 1929 info = &bo_gem->reloc_target_info[r]; 1930 1931 if (reloc->offset == offset + i * 4) { 1932 drm_intel_bo_gem *target_gem; 1933 uint32_t val; 1934 1935 target_gem = (drm_intel_bo_gem *)info->bo; 1936 1937 val = reloc->delta; 1938 val += target_gem->aub_offset; 1939 1940 aub_out(bufmgr_gem, val); 1941 data[i] = val; 1942 break; 1943 } 1944 } 1945 if (r == bo_gem->reloc_count) { 1946 /* no relocation, just the data */ 1947 aub_out(bufmgr_gem, data[i]); 1948 } 1949 } 1950 1951 free(data); 1952} 1953 1954static void 1955aub_bo_get_address(drm_intel_bo *bo) 1956{ 1957 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1958 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1959 1960 /* Give the object a graphics address in the AUB file. We 1961 * don't just use the GEM object address because we do AUB 1962 * dumping before execution -- we want to successfully log 1963 * when the hardware might hang, and we might even want to aub 1964 * capture for a driver trying to execute on a different 1965 * generation of hardware by disabling the actual kernel exec 1966 * call. 1967 */ 1968 bo_gem->aub_offset = bufmgr_gem->aub_offset; 1969 bufmgr_gem->aub_offset += bo->size; 1970 /* XXX: Handle aperture overflow. */ 1971 assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); 1972} 1973 1974static void 1975aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 1976 uint32_t offset, uint32_t size) 1977{ 1978 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1979 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1980 1981 aub_out(bufmgr_gem, 1982 CMD_AUB_TRACE_HEADER_BLOCK | 1983 ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 1984 aub_out(bufmgr_gem, 1985 AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); 1986 aub_out(bufmgr_gem, subtype); 1987 aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 1988 aub_out(bufmgr_gem, size); 1989 if (bufmgr_gem->gen >= 8) 1990 aub_out(bufmgr_gem, 0); 1991 aub_write_bo_data(bo, offset, size); 1992} 1993 1994/** 1995 * Break up large objects into multiple writes. Otherwise a 128kb VBO 1996 * would overflow the 16 bits of size field in the packet header and 1997 * everything goes badly after that. 1998 */ 1999static void 2000aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 2001 uint32_t offset, uint32_t size) 2002{ 2003 uint32_t block_size; 2004 uint32_t sub_offset; 2005 2006 for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { 2007 block_size = size - sub_offset; 2008 2009 if (block_size > 8 * 4096) 2010 block_size = 8 * 4096; 2011 2012 aub_write_trace_block(bo, type, subtype, offset + sub_offset, 2013 block_size); 2014 } 2015} 2016 2017static void 2018aub_write_bo(drm_intel_bo *bo) 2019{ 2020 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2021 uint32_t offset = 0; 2022 unsigned i; 2023 2024 aub_bo_get_address(bo); 2025 2026 /* Write out each annotated section separately. */ 2027 for (i = 0; i < bo_gem->aub_annotation_count; ++i) { 2028 drm_intel_aub_annotation *annotation = 2029 &bo_gem->aub_annotations[i]; 2030 uint32_t ending_offset = annotation->ending_offset; 2031 if (ending_offset > bo->size) 2032 ending_offset = bo->size; 2033 if (ending_offset > offset) { 2034 aub_write_large_trace_block(bo, annotation->type, 2035 annotation->subtype, 2036 offset, 2037 ending_offset - offset); 2038 offset = ending_offset; 2039 } 2040 } 2041 2042 /* Write out any remaining unannotated data */ 2043 if (offset < bo->size) { 2044 aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, 2045 offset, bo->size - offset); 2046 } 2047} 2048 2049/* 2050 * Make a ringbuffer on fly and dump it 2051 */ 2052static void 2053aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, 2054 uint32_t batch_buffer, int ring_flag) 2055{ 2056 uint32_t ringbuffer[4096]; 2057 int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ 2058 int ring_count = 0; 2059 2060 if (ring_flag == I915_EXEC_BSD) 2061 ring = AUB_TRACE_TYPE_RING_PRB1; 2062 else if (ring_flag == I915_EXEC_BLT) 2063 ring = AUB_TRACE_TYPE_RING_PRB2; 2064 2065 /* Make a ring buffer to execute our batchbuffer. */ 2066 memset(ringbuffer, 0, sizeof(ringbuffer)); 2067 if (bufmgr_gem->gen >= 8) { 2068 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); 2069 ringbuffer[ring_count++] = batch_buffer; 2070 ringbuffer[ring_count++] = 0; 2071 } else { 2072 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; 2073 ringbuffer[ring_count++] = batch_buffer; 2074 } 2075 2076 /* Write out the ring. This appears to trigger execution of 2077 * the ring in the simulator. 2078 */ 2079 aub_out(bufmgr_gem, 2080 CMD_AUB_TRACE_HEADER_BLOCK | 2081 ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 2082 aub_out(bufmgr_gem, 2083 AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); 2084 aub_out(bufmgr_gem, 0); /* general/surface subtype */ 2085 aub_out(bufmgr_gem, bufmgr_gem->aub_offset); 2086 aub_out(bufmgr_gem, ring_count * 4); 2087 if (bufmgr_gem->gen >= 8) 2088 aub_out(bufmgr_gem, 0); 2089 2090 /* FIXME: Need some flush operations here? */ 2091 aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); 2092 2093 /* Update offset pointer */ 2094 bufmgr_gem->aub_offset += 4096; 2095} 2096 2097void 2098drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 2099 int x1, int y1, int width, int height, 2100 enum aub_dump_bmp_format format, 2101 int pitch, int offset) 2102{ 2103 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2104 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2105 uint32_t cpp; 2106 2107 switch (format) { 2108 case AUB_DUMP_BMP_FORMAT_8BIT: 2109 cpp = 1; 2110 break; 2111 case AUB_DUMP_BMP_FORMAT_ARGB_4444: 2112 cpp = 2; 2113 break; 2114 case AUB_DUMP_BMP_FORMAT_ARGB_0888: 2115 case AUB_DUMP_BMP_FORMAT_ARGB_8888: 2116 cpp = 4; 2117 break; 2118 default: 2119 printf("Unknown AUB dump format %d\n", format); 2120 return; 2121 } 2122 2123 if (!bufmgr_gem->aub_file) 2124 return; 2125 2126 aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); 2127 aub_out(bufmgr_gem, (y1 << 16) | x1); 2128 aub_out(bufmgr_gem, 2129 (format << 24) | 2130 (cpp << 19) | 2131 pitch / 4); 2132 aub_out(bufmgr_gem, (height << 16) | width); 2133 aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 2134 aub_out(bufmgr_gem, 2135 ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | 2136 ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); 2137} 2138 2139static void 2140aub_exec(drm_intel_bo *bo, int ring_flag, int used) 2141{ 2142 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2143 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2144 int i; 2145 bool batch_buffer_needs_annotations; 2146 2147 if (!bufmgr_gem->aub_file) 2148 return; 2149 2150 /* If batch buffer is not annotated, annotate it the best we 2151 * can. 2152 */ 2153 batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; 2154 if (batch_buffer_needs_annotations) { 2155 drm_intel_aub_annotation annotations[2] = { 2156 { AUB_TRACE_TYPE_BATCH, 0, used }, 2157 { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } 2158 }; 2159 drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); 2160 } 2161 2162 /* Write out all buffers to AUB memory */ 2163 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2164 aub_write_bo(bufmgr_gem->exec_bos[i]); 2165 } 2166 2167 /* Remove any annotations we added */ 2168 if (batch_buffer_needs_annotations) 2169 drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); 2170 2171 /* Dump ring buffer */ 2172 aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); 2173 2174 fflush(bufmgr_gem->aub_file); 2175 2176 /* 2177 * One frame has been dumped. So reset the aub_offset for the next frame. 2178 * 2179 * FIXME: Can we do this? 2180 */ 2181 bufmgr_gem->aub_offset = 0x10000; 2182} 2183 2184static int 2185drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 2186 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 2187{ 2188 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2189 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2190 struct drm_i915_gem_execbuffer execbuf; 2191 int ret, i; 2192 2193 if (bo_gem->has_error) 2194 return -ENOMEM; 2195 2196 pthread_mutex_lock(&bufmgr_gem->lock); 2197 /* Update indices and set up the validate list. */ 2198 drm_intel_gem_bo_process_reloc(bo); 2199 2200 /* Add the batch buffer to the validation list. There are no 2201 * relocations pointing to it. 2202 */ 2203 drm_intel_add_validate_buffer(bo); 2204 2205 VG_CLEAR(execbuf); 2206 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 2207 execbuf.buffer_count = bufmgr_gem->exec_count; 2208 execbuf.batch_start_offset = 0; 2209 execbuf.batch_len = used; 2210 execbuf.cliprects_ptr = (uintptr_t) cliprects; 2211 execbuf.num_cliprects = num_cliprects; 2212 execbuf.DR1 = 0; 2213 execbuf.DR4 = DR4; 2214 2215 ret = drmIoctl(bufmgr_gem->fd, 2216 DRM_IOCTL_I915_GEM_EXECBUFFER, 2217 &execbuf); 2218 if (ret != 0) { 2219 ret = -errno; 2220 if (errno == ENOSPC) { 2221 DBG("Execbuffer fails to pin. " 2222 "Estimate: %u. Actual: %u. Available: %u\n", 2223 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2224 bufmgr_gem-> 2225 exec_count), 2226 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2227 bufmgr_gem-> 2228 exec_count), 2229 (unsigned int)bufmgr_gem->gtt_size); 2230 } 2231 } 2232 drm_intel_update_buffer_offsets(bufmgr_gem); 2233 2234 if (bufmgr_gem->bufmgr.debug) 2235 drm_intel_gem_dump_validation_list(bufmgr_gem); 2236 2237 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2238 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2239 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2240 2241 bo_gem->idle = false; 2242 2243 /* Disconnect the buffer from the validate list */ 2244 bo_gem->validate_index = -1; 2245 bufmgr_gem->exec_bos[i] = NULL; 2246 } 2247 bufmgr_gem->exec_count = 0; 2248 pthread_mutex_unlock(&bufmgr_gem->lock); 2249 2250 return ret; 2251} 2252 2253static int 2254do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 2255 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2256 unsigned int flags) 2257{ 2258 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2259 struct drm_i915_gem_execbuffer2 execbuf; 2260 int ret = 0; 2261 int i; 2262 2263 switch (flags & 0x7) { 2264 default: 2265 return -EINVAL; 2266 case I915_EXEC_BLT: 2267 if (!bufmgr_gem->has_blt) 2268 return -EINVAL; 2269 break; 2270 case I915_EXEC_BSD: 2271 if (!bufmgr_gem->has_bsd) 2272 return -EINVAL; 2273 break; 2274 case I915_EXEC_VEBOX: 2275 if (!bufmgr_gem->has_vebox) 2276 return -EINVAL; 2277 break; 2278 case I915_EXEC_RENDER: 2279 case I915_EXEC_DEFAULT: 2280 break; 2281 } 2282 2283 pthread_mutex_lock(&bufmgr_gem->lock); 2284 /* Update indices and set up the validate list. */ 2285 drm_intel_gem_bo_process_reloc2(bo); 2286 2287 /* Add the batch buffer to the validation list. There are no relocations 2288 * pointing to it. 2289 */ 2290 drm_intel_add_validate_buffer2(bo, 0); 2291 2292 VG_CLEAR(execbuf); 2293 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 2294 execbuf.buffer_count = bufmgr_gem->exec_count; 2295 execbuf.batch_start_offset = 0; 2296 execbuf.batch_len = used; 2297 execbuf.cliprects_ptr = (uintptr_t)cliprects; 2298 execbuf.num_cliprects = num_cliprects; 2299 execbuf.DR1 = 0; 2300 execbuf.DR4 = DR4; 2301 execbuf.flags = flags; 2302 if (ctx == NULL) 2303 i915_execbuffer2_set_context_id(execbuf, 0); 2304 else 2305 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 2306 execbuf.rsvd2 = 0; 2307 2308 aub_exec(bo, flags, used); 2309 2310 if (bufmgr_gem->no_exec) 2311 goto skip_execution; 2312 2313 ret = drmIoctl(bufmgr_gem->fd, 2314 DRM_IOCTL_I915_GEM_EXECBUFFER2, 2315 &execbuf); 2316 if (ret != 0) { 2317 ret = -errno; 2318 if (ret == -ENOSPC) { 2319 DBG("Execbuffer fails to pin. " 2320 "Estimate: %u. Actual: %u. Available: %u\n", 2321 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2322 bufmgr_gem->exec_count), 2323 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2324 bufmgr_gem->exec_count), 2325 (unsigned int) bufmgr_gem->gtt_size); 2326 } 2327 } 2328 drm_intel_update_buffer_offsets2(bufmgr_gem); 2329 2330skip_execution: 2331 if (bufmgr_gem->bufmgr.debug) 2332 drm_intel_gem_dump_validation_list(bufmgr_gem); 2333 2334 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2335 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2336 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2337 2338 bo_gem->idle = false; 2339 2340 /* Disconnect the buffer from the validate list */ 2341 bo_gem->validate_index = -1; 2342 bufmgr_gem->exec_bos[i] = NULL; 2343 } 2344 bufmgr_gem->exec_count = 0; 2345 pthread_mutex_unlock(&bufmgr_gem->lock); 2346 2347 return ret; 2348} 2349 2350static int 2351drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2352 drm_clip_rect_t *cliprects, int num_cliprects, 2353 int DR4) 2354{ 2355 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2356 I915_EXEC_RENDER); 2357} 2358 2359static int 2360drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 2361 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2362 unsigned int flags) 2363{ 2364 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2365 flags); 2366} 2367 2368int 2369drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 2370 int used, unsigned int flags) 2371{ 2372 return do_exec2(bo, used, ctx, NULL, 0, 0, flags); 2373} 2374 2375static int 2376drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 2377{ 2378 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2379 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2380 struct drm_i915_gem_pin pin; 2381 int ret; 2382 2383 VG_CLEAR(pin); 2384 pin.handle = bo_gem->gem_handle; 2385 pin.alignment = alignment; 2386 2387 ret = drmIoctl(bufmgr_gem->fd, 2388 DRM_IOCTL_I915_GEM_PIN, 2389 &pin); 2390 if (ret != 0) 2391 return -errno; 2392 2393 bo->offset64 = pin.offset; 2394 bo->offset = pin.offset; 2395 return 0; 2396} 2397 2398static int 2399drm_intel_gem_bo_unpin(drm_intel_bo *bo) 2400{ 2401 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2402 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2403 struct drm_i915_gem_unpin unpin; 2404 int ret; 2405 2406 VG_CLEAR(unpin); 2407 unpin.handle = bo_gem->gem_handle; 2408 2409 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 2410 if (ret != 0) 2411 return -errno; 2412 2413 return 0; 2414} 2415 2416static int 2417drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2418 uint32_t tiling_mode, 2419 uint32_t stride) 2420{ 2421 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2422 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2423 struct drm_i915_gem_set_tiling set_tiling; 2424 int ret; 2425 2426 if (bo_gem->global_name == 0 && 2427 tiling_mode == bo_gem->tiling_mode && 2428 stride == bo_gem->stride) 2429 return 0; 2430 2431 memset(&set_tiling, 0, sizeof(set_tiling)); 2432 do { 2433 /* set_tiling is slightly broken and overwrites the 2434 * input on the error path, so we have to open code 2435 * rmIoctl. 2436 */ 2437 set_tiling.handle = bo_gem->gem_handle; 2438 set_tiling.tiling_mode = tiling_mode; 2439 set_tiling.stride = stride; 2440 2441 ret = ioctl(bufmgr_gem->fd, 2442 DRM_IOCTL_I915_GEM_SET_TILING, 2443 &set_tiling); 2444 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2445 if (ret == -1) 2446 return -errno; 2447 2448 bo_gem->tiling_mode = set_tiling.tiling_mode; 2449 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 2450 bo_gem->stride = set_tiling.stride; 2451 return 0; 2452} 2453 2454static int 2455drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2456 uint32_t stride) 2457{ 2458 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2459 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2460 int ret; 2461 2462 /* Linear buffers have no stride. By ensuring that we only ever use 2463 * stride 0 with linear buffers, we simplify our code. 2464 */ 2465 if (*tiling_mode == I915_TILING_NONE) 2466 stride = 0; 2467 2468 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 2469 if (ret == 0) 2470 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 2471 2472 *tiling_mode = bo_gem->tiling_mode; 2473 return ret; 2474} 2475 2476static int 2477drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2478 uint32_t * swizzle_mode) 2479{ 2480 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2481 2482 *tiling_mode = bo_gem->tiling_mode; 2483 *swizzle_mode = bo_gem->swizzle_mode; 2484 return 0; 2485} 2486 2487drm_intel_bo * 2488drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 2489{ 2490 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2491 int ret; 2492 uint32_t handle; 2493 drm_intel_bo_gem *bo_gem; 2494 struct drm_i915_gem_get_tiling get_tiling; 2495 drmMMListHead *list; 2496 2497 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 2498 2499 /* 2500 * See if the kernel has already returned this buffer to us. Just as 2501 * for named buffers, we must not create two bo's pointing at the same 2502 * kernel object 2503 */ 2504 for (list = bufmgr_gem->named.next; 2505 list != &bufmgr_gem->named; 2506 list = list->next) { 2507 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 2508 if (bo_gem->gem_handle == handle) { 2509 drm_intel_gem_bo_reference(&bo_gem->bo); 2510 return &bo_gem->bo; 2511 } 2512 } 2513 2514 if (ret) { 2515 fprintf(stderr,"ret is %d %d\n", ret, errno); 2516 return NULL; 2517 } 2518 2519 bo_gem = calloc(1, sizeof(*bo_gem)); 2520 if (!bo_gem) 2521 return NULL; 2522 2523 /* Determine size of bo. The fd-to-handle ioctl really should 2524 * return the size, but it doesn't. If we have kernel 3.12 or 2525 * later, we can lseek on the prime fd to get the size. Older 2526 * kernels will just fail, in which case we fall back to the 2527 * provided (estimated or guess size). */ 2528 ret = lseek(prime_fd, 0, SEEK_END); 2529 if (ret != -1) 2530 bo_gem->bo.size = ret; 2531 else 2532 bo_gem->bo.size = size; 2533 2534 bo_gem->bo.handle = handle; 2535 bo_gem->bo.bufmgr = bufmgr; 2536 2537 bo_gem->gem_handle = handle; 2538 2539 atomic_set(&bo_gem->refcount, 1); 2540 2541 bo_gem->name = "prime"; 2542 bo_gem->validate_index = -1; 2543 bo_gem->reloc_tree_fences = 0; 2544 bo_gem->used_as_reloc_target = false; 2545 bo_gem->has_error = false; 2546 bo_gem->reusable = false; 2547 2548 DRMINITLISTHEAD(&bo_gem->vma_list); 2549 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2550 2551 VG_CLEAR(get_tiling); 2552 get_tiling.handle = bo_gem->gem_handle; 2553 ret = drmIoctl(bufmgr_gem->fd, 2554 DRM_IOCTL_I915_GEM_GET_TILING, 2555 &get_tiling); 2556 if (ret != 0) { 2557 drm_intel_gem_bo_unreference(&bo_gem->bo); 2558 return NULL; 2559 } 2560 bo_gem->tiling_mode = get_tiling.tiling_mode; 2561 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 2562 /* XXX stride is unknown */ 2563 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 2564 2565 return &bo_gem->bo; 2566} 2567 2568int 2569drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 2570{ 2571 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2572 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2573 2574 if (DRMLISTEMPTY(&bo_gem->name_list)) 2575 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2576 2577 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 2578 DRM_CLOEXEC, prime_fd) != 0) 2579 return -errno; 2580 2581 bo_gem->reusable = false; 2582 2583 return 0; 2584} 2585 2586static int 2587drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 2588{ 2589 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2590 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2591 int ret; 2592 2593 if (!bo_gem->global_name) { 2594 struct drm_gem_flink flink; 2595 2596 VG_CLEAR(flink); 2597 flink.handle = bo_gem->gem_handle; 2598 2599 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 2600 if (ret != 0) 2601 return -errno; 2602 2603 bo_gem->global_name = flink.name; 2604 bo_gem->reusable = false; 2605 2606 if (DRMLISTEMPTY(&bo_gem->name_list)) 2607 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2608 } 2609 2610 *name = bo_gem->global_name; 2611 return 0; 2612} 2613 2614/** 2615 * Enables unlimited caching of buffer objects for reuse. 2616 * 2617 * This is potentially very memory expensive, as the cache at each bucket 2618 * size is only bounded by how many buffers of that size we've managed to have 2619 * in flight at once. 2620 */ 2621void 2622drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 2623{ 2624 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2625 2626 bufmgr_gem->bo_reuse = true; 2627} 2628 2629/** 2630 * Enable use of fenced reloc type. 2631 * 2632 * New code should enable this to avoid unnecessary fence register 2633 * allocation. If this option is not enabled, all relocs will have fence 2634 * register allocated. 2635 */ 2636void 2637drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 2638{ 2639 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2640 2641 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 2642 bufmgr_gem->fenced_relocs = true; 2643} 2644 2645/** 2646 * Return the additional aperture space required by the tree of buffer objects 2647 * rooted at bo. 2648 */ 2649static int 2650drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 2651{ 2652 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2653 int i; 2654 int total = 0; 2655 2656 if (bo == NULL || bo_gem->included_in_check_aperture) 2657 return 0; 2658 2659 total += bo->size; 2660 bo_gem->included_in_check_aperture = true; 2661 2662 for (i = 0; i < bo_gem->reloc_count; i++) 2663 total += 2664 drm_intel_gem_bo_get_aperture_space(bo_gem-> 2665 reloc_target_info[i].bo); 2666 2667 return total; 2668} 2669 2670/** 2671 * Count the number of buffers in this list that need a fence reg 2672 * 2673 * If the count is greater than the number of available regs, we'll have 2674 * to ask the caller to resubmit a batch with fewer tiled buffers. 2675 * 2676 * This function over-counts if the same buffer is used multiple times. 2677 */ 2678static unsigned int 2679drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 2680{ 2681 int i; 2682 unsigned int total = 0; 2683 2684 for (i = 0; i < count; i++) { 2685 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2686 2687 if (bo_gem == NULL) 2688 continue; 2689 2690 total += bo_gem->reloc_tree_fences; 2691 } 2692 return total; 2693} 2694 2695/** 2696 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 2697 * for the next drm_intel_bufmgr_check_aperture_space() call. 2698 */ 2699static void 2700drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 2701{ 2702 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2703 int i; 2704 2705 if (bo == NULL || !bo_gem->included_in_check_aperture) 2706 return; 2707 2708 bo_gem->included_in_check_aperture = false; 2709 2710 for (i = 0; i < bo_gem->reloc_count; i++) 2711 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 2712 reloc_target_info[i].bo); 2713} 2714 2715/** 2716 * Return a conservative estimate for the amount of aperture required 2717 * for a collection of buffers. This may double-count some buffers. 2718 */ 2719static unsigned int 2720drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 2721{ 2722 int i; 2723 unsigned int total = 0; 2724 2725 for (i = 0; i < count; i++) { 2726 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2727 if (bo_gem != NULL) 2728 total += bo_gem->reloc_tree_size; 2729 } 2730 return total; 2731} 2732 2733/** 2734 * Return the amount of aperture needed for a collection of buffers. 2735 * This avoids double counting any buffers, at the cost of looking 2736 * at every buffer in the set. 2737 */ 2738static unsigned int 2739drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 2740{ 2741 int i; 2742 unsigned int total = 0; 2743 2744 for (i = 0; i < count; i++) { 2745 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 2746 /* For the first buffer object in the array, we get an 2747 * accurate count back for its reloc_tree size (since nothing 2748 * had been flagged as being counted yet). We can save that 2749 * value out as a more conservative reloc_tree_size that 2750 * avoids double-counting target buffers. Since the first 2751 * buffer happens to usually be the batch buffer in our 2752 * callers, this can pull us back from doing the tree 2753 * walk on every new batch emit. 2754 */ 2755 if (i == 0) { 2756 drm_intel_bo_gem *bo_gem = 2757 (drm_intel_bo_gem *) bo_array[i]; 2758 bo_gem->reloc_tree_size = total; 2759 } 2760 } 2761 2762 for (i = 0; i < count; i++) 2763 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 2764 return total; 2765} 2766 2767/** 2768 * Return -1 if the batchbuffer should be flushed before attempting to 2769 * emit rendering referencing the buffers pointed to by bo_array. 2770 * 2771 * This is required because if we try to emit a batchbuffer with relocations 2772 * to a tree of buffers that won't simultaneously fit in the aperture, 2773 * the rendering will return an error at a point where the software is not 2774 * prepared to recover from it. 2775 * 2776 * However, we also want to emit the batchbuffer significantly before we reach 2777 * the limit, as a series of batchbuffers each of which references buffers 2778 * covering almost all of the aperture means that at each emit we end up 2779 * waiting to evict a buffer from the last rendering, and we get synchronous 2780 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 2781 * get better parallelism. 2782 */ 2783static int 2784drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 2785{ 2786 drm_intel_bufmgr_gem *bufmgr_gem = 2787 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 2788 unsigned int total = 0; 2789 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 2790 int total_fences; 2791 2792 /* Check for fence reg constraints if necessary */ 2793 if (bufmgr_gem->available_fences) { 2794 total_fences = drm_intel_gem_total_fences(bo_array, count); 2795 if (total_fences > bufmgr_gem->available_fences) 2796 return -ENOSPC; 2797 } 2798 2799 total = drm_intel_gem_estimate_batch_space(bo_array, count); 2800 2801 if (total > threshold) 2802 total = drm_intel_gem_compute_batch_space(bo_array, count); 2803 2804 if (total > threshold) { 2805 DBG("check_space: overflowed available aperture, " 2806 "%dkb vs %dkb\n", 2807 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 2808 return -ENOSPC; 2809 } else { 2810 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 2811 (int)bufmgr_gem->gtt_size / 1024); 2812 return 0; 2813 } 2814} 2815 2816/* 2817 * Disable buffer reuse for objects which are shared with the kernel 2818 * as scanout buffers 2819 */ 2820static int 2821drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 2822{ 2823 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2824 2825 bo_gem->reusable = false; 2826 return 0; 2827} 2828 2829static int 2830drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 2831{ 2832 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2833 2834 return bo_gem->reusable; 2835} 2836 2837static int 2838_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2839{ 2840 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2841 int i; 2842 2843 for (i = 0; i < bo_gem->reloc_count; i++) { 2844 if (bo_gem->reloc_target_info[i].bo == target_bo) 2845 return 1; 2846 if (bo == bo_gem->reloc_target_info[i].bo) 2847 continue; 2848 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 2849 target_bo)) 2850 return 1; 2851 } 2852 2853 return 0; 2854} 2855 2856/** Return true if target_bo is referenced by bo's relocation tree. */ 2857static int 2858drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2859{ 2860 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2861 2862 if (bo == NULL || target_bo == NULL) 2863 return 0; 2864 if (target_bo_gem->used_as_reloc_target) 2865 return _drm_intel_gem_bo_references(bo, target_bo); 2866 return 0; 2867} 2868 2869static void 2870add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2871{ 2872 unsigned int i = bufmgr_gem->num_buckets; 2873 2874 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2875 2876 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2877 bufmgr_gem->cache_bucket[i].size = size; 2878 bufmgr_gem->num_buckets++; 2879} 2880 2881static void 2882init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2883{ 2884 unsigned long size, cache_max_size = 64 * 1024 * 1024; 2885 2886 /* OK, so power of two buckets was too wasteful of memory. 2887 * Give 3 other sizes between each power of two, to hopefully 2888 * cover things accurately enough. (The alternative is 2889 * probably to just go for exact matching of sizes, and assume 2890 * that for things like composited window resize the tiled 2891 * width/height alignment and rounding of sizes to pages will 2892 * get us useful cache hit rates anyway) 2893 */ 2894 add_bucket(bufmgr_gem, 4096); 2895 add_bucket(bufmgr_gem, 4096 * 2); 2896 add_bucket(bufmgr_gem, 4096 * 3); 2897 2898 /* Initialize the linked lists for BO reuse cache. */ 2899 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2900 add_bucket(bufmgr_gem, size); 2901 2902 add_bucket(bufmgr_gem, size + size * 1 / 4); 2903 add_bucket(bufmgr_gem, size + size * 2 / 4); 2904 add_bucket(bufmgr_gem, size + size * 3 / 4); 2905 } 2906} 2907 2908void 2909drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 2910{ 2911 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2912 2913 bufmgr_gem->vma_max = limit; 2914 2915 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 2916} 2917 2918/** 2919 * Get the PCI ID for the device. This can be overridden by setting the 2920 * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 2921 */ 2922static int 2923get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 2924{ 2925 char *devid_override; 2926 int devid; 2927 int ret; 2928 drm_i915_getparam_t gp; 2929 2930 if (geteuid() == getuid()) { 2931 devid_override = getenv("INTEL_DEVID_OVERRIDE"); 2932 if (devid_override) { 2933 bufmgr_gem->no_exec = true; 2934 return strtod(devid_override, NULL); 2935 } 2936 } 2937 2938 VG_CLEAR(devid); 2939 VG_CLEAR(gp); 2940 gp.param = I915_PARAM_CHIPSET_ID; 2941 gp.value = &devid; 2942 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2943 if (ret) { 2944 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 2945 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 2946 } 2947 return devid; 2948} 2949 2950int 2951drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 2952{ 2953 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2954 2955 return bufmgr_gem->pci_device; 2956} 2957 2958/** 2959 * Sets the AUB filename. 2960 * 2961 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 2962 * for it to have any effect. 2963 */ 2964void 2965drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 2966 const char *filename) 2967{ 2968 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2969 2970 free(bufmgr_gem->aub_filename); 2971 if (filename) 2972 bufmgr_gem->aub_filename = strdup(filename); 2973} 2974 2975/** 2976 * Sets up AUB dumping. 2977 * 2978 * This is a trace file format that can be used with the simulator. 2979 * Packets are emitted in a format somewhat like GPU command packets. 2980 * You can set up a GTT and upload your objects into the referenced 2981 * space, then send off batchbuffers and get BMPs out the other end. 2982 */ 2983void 2984drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 2985{ 2986 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2987 int entry = 0x200003; 2988 int i; 2989 int gtt_size = 0x10000; 2990 const char *filename; 2991 2992 if (!enable) { 2993 if (bufmgr_gem->aub_file) { 2994 fclose(bufmgr_gem->aub_file); 2995 bufmgr_gem->aub_file = NULL; 2996 } 2997 return; 2998 } 2999 3000 if (geteuid() != getuid()) 3001 return; 3002 3003 if (bufmgr_gem->aub_filename) 3004 filename = bufmgr_gem->aub_filename; 3005 else 3006 filename = "intel.aub"; 3007 bufmgr_gem->aub_file = fopen(filename, "w+"); 3008 if (!bufmgr_gem->aub_file) 3009 return; 3010 3011 /* Start allocating objects from just after the GTT. */ 3012 bufmgr_gem->aub_offset = gtt_size; 3013 3014 /* Start with a (required) version packet. */ 3015 aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); 3016 aub_out(bufmgr_gem, 3017 (4 << AUB_HEADER_MAJOR_SHIFT) | 3018 (0 << AUB_HEADER_MINOR_SHIFT)); 3019 for (i = 0; i < 8; i++) { 3020 aub_out(bufmgr_gem, 0); /* app name */ 3021 } 3022 aub_out(bufmgr_gem, 0); /* timestamp */ 3023 aub_out(bufmgr_gem, 0); /* timestamp */ 3024 aub_out(bufmgr_gem, 0); /* comment len */ 3025 3026 /* Set up the GTT. The max we can handle is 256M */ 3027 aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 3028 aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE); 3029 aub_out(bufmgr_gem, 0); /* subtype */ 3030 aub_out(bufmgr_gem, 0); /* offset */ 3031 aub_out(bufmgr_gem, gtt_size); /* size */ 3032 if (bufmgr_gem->gen >= 8) 3033 aub_out(bufmgr_gem, 0); 3034 for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { 3035 aub_out(bufmgr_gem, entry); 3036 } 3037} 3038 3039drm_intel_context * 3040drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 3041{ 3042 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3043 struct drm_i915_gem_context_create create; 3044 drm_intel_context *context = NULL; 3045 int ret; 3046 3047 context = calloc(1, sizeof(*context)); 3048 if (!context) 3049 return NULL; 3050 3051 VG_CLEAR(create); 3052 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 3053 if (ret != 0) { 3054 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 3055 strerror(errno)); 3056 free(context); 3057 return NULL; 3058 } 3059 3060 context->ctx_id = create.ctx_id; 3061 context->bufmgr = bufmgr; 3062 3063 return context; 3064} 3065 3066void 3067drm_intel_gem_context_destroy(drm_intel_context *ctx) 3068{ 3069 drm_intel_bufmgr_gem *bufmgr_gem; 3070 struct drm_i915_gem_context_destroy destroy; 3071 int ret; 3072 3073 if (ctx == NULL) 3074 return; 3075 3076 VG_CLEAR(destroy); 3077 3078 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3079 destroy.ctx_id = ctx->ctx_id; 3080 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 3081 &destroy); 3082 if (ret != 0) 3083 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 3084 strerror(errno)); 3085 3086 free(ctx); 3087} 3088 3089int 3090drm_intel_get_reset_stats(drm_intel_context *ctx, 3091 uint32_t *reset_count, 3092 uint32_t *active, 3093 uint32_t *pending) 3094{ 3095 drm_intel_bufmgr_gem *bufmgr_gem; 3096 struct drm_i915_reset_stats stats; 3097 int ret; 3098 3099 if (ctx == NULL) 3100 return -EINVAL; 3101 3102 memset(&stats, 0, sizeof(stats)); 3103 3104 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3105 stats.ctx_id = ctx->ctx_id; 3106 ret = drmIoctl(bufmgr_gem->fd, 3107 DRM_IOCTL_I915_GET_RESET_STATS, 3108 &stats); 3109 if (ret == 0) { 3110 if (reset_count != NULL) 3111 *reset_count = stats.reset_count; 3112 3113 if (active != NULL) 3114 *active = stats.batch_active; 3115 3116 if (pending != NULL) 3117 *pending = stats.batch_pending; 3118 } 3119 3120 return ret; 3121} 3122 3123int 3124drm_intel_reg_read(drm_intel_bufmgr *bufmgr, 3125 uint32_t offset, 3126 uint64_t *result) 3127{ 3128 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3129 struct drm_i915_reg_read reg_read; 3130 int ret; 3131 3132 VG_CLEAR(reg_read); 3133 reg_read.offset = offset; 3134 3135 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 3136 3137 *result = reg_read.val; 3138 return ret; 3139} 3140 3141 3142/** 3143 * Annotate the given bo for use in aub dumping. 3144 * 3145 * \param annotations is an array of drm_intel_aub_annotation objects 3146 * describing the type of data in various sections of the bo. Each 3147 * element of the array specifies the type and subtype of a section of 3148 * the bo, and the past-the-end offset of that section. The elements 3149 * of \c annotations must be sorted so that ending_offset is 3150 * increasing. 3151 * 3152 * \param count is the number of elements in the \c annotations array. 3153 * If \c count is zero, then \c annotations will not be dereferenced. 3154 * 3155 * Annotations are copied into a private data structure, so caller may 3156 * re-use the memory pointed to by \c annotations after the call 3157 * returns. 3158 * 3159 * Annotations are stored for the lifetime of the bo; to reset to the 3160 * default state (no annotations), call this function with a \c count 3161 * of zero. 3162 */ 3163void 3164drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 3165 drm_intel_aub_annotation *annotations, 3166 unsigned count) 3167{ 3168 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3169 unsigned size = sizeof(*annotations) * count; 3170 drm_intel_aub_annotation *new_annotations = 3171 count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; 3172 if (new_annotations == NULL) { 3173 free(bo_gem->aub_annotations); 3174 bo_gem->aub_annotations = NULL; 3175 bo_gem->aub_annotation_count = 0; 3176 return; 3177 } 3178 memcpy(new_annotations, annotations, size); 3179 bo_gem->aub_annotations = new_annotations; 3180 bo_gem->aub_annotation_count = count; 3181} 3182 3183/** 3184 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 3185 * and manage map buffer objections. 3186 * 3187 * \param fd File descriptor of the opened DRM device. 3188 */ 3189drm_intel_bufmgr * 3190drm_intel_bufmgr_gem_init(int fd, int batch_size) 3191{ 3192 drm_intel_bufmgr_gem *bufmgr_gem; 3193 struct drm_i915_gem_get_aperture aperture; 3194 drm_i915_getparam_t gp; 3195 int ret, tmp; 3196 bool exec2 = false; 3197 3198 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 3199 if (bufmgr_gem == NULL) 3200 return NULL; 3201 3202 bufmgr_gem->fd = fd; 3203 3204 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 3205 free(bufmgr_gem); 3206 return NULL; 3207 } 3208 3209 ret = drmIoctl(bufmgr_gem->fd, 3210 DRM_IOCTL_I915_GEM_GET_APERTURE, 3211 &aperture); 3212 3213 if (ret == 0) 3214 bufmgr_gem->gtt_size = aperture.aper_available_size; 3215 else { 3216 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 3217 strerror(errno)); 3218 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 3219 fprintf(stderr, "Assuming %dkB available aperture size.\n" 3220 "May lead to reduced performance or incorrect " 3221 "rendering.\n", 3222 (int)bufmgr_gem->gtt_size / 1024); 3223 } 3224 3225 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 3226 3227 if (IS_GEN2(bufmgr_gem->pci_device)) 3228 bufmgr_gem->gen = 2; 3229 else if (IS_GEN3(bufmgr_gem->pci_device)) 3230 bufmgr_gem->gen = 3; 3231 else if (IS_GEN4(bufmgr_gem->pci_device)) 3232 bufmgr_gem->gen = 4; 3233 else if (IS_GEN5(bufmgr_gem->pci_device)) 3234 bufmgr_gem->gen = 5; 3235 else if (IS_GEN6(bufmgr_gem->pci_device)) 3236 bufmgr_gem->gen = 6; 3237 else if (IS_GEN7(bufmgr_gem->pci_device)) 3238 bufmgr_gem->gen = 7; 3239 else if (IS_GEN8(bufmgr_gem->pci_device)) 3240 bufmgr_gem->gen = 8; 3241 else { 3242 free(bufmgr_gem); 3243 return NULL; 3244 } 3245 3246 if (IS_GEN3(bufmgr_gem->pci_device) && 3247 bufmgr_gem->gtt_size > 256*1024*1024) { 3248 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 3249 * be used for tiled blits. To simplify the accounting, just 3250 * substract the unmappable part (fixed to 256MB on all known 3251 * gen3 devices) if the kernel advertises it. */ 3252 bufmgr_gem->gtt_size -= 256*1024*1024; 3253 } 3254 3255 VG_CLEAR(gp); 3256 gp.value = &tmp; 3257 3258 gp.param = I915_PARAM_HAS_EXECBUF2; 3259 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3260 if (!ret) 3261 exec2 = true; 3262 3263 gp.param = I915_PARAM_HAS_BSD; 3264 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3265 bufmgr_gem->has_bsd = ret == 0; 3266 3267 gp.param = I915_PARAM_HAS_BLT; 3268 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3269 bufmgr_gem->has_blt = ret == 0; 3270 3271 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 3272 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3273 bufmgr_gem->has_relaxed_fencing = ret == 0; 3274 3275 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 3276 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3277 bufmgr_gem->has_wait_timeout = ret == 0; 3278 3279 gp.param = I915_PARAM_HAS_LLC; 3280 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3281 if (ret != 0) { 3282 /* Kernel does not supports HAS_LLC query, fallback to GPU 3283 * generation detection and assume that we have LLC on GEN6/7 3284 */ 3285 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 3286 IS_GEN7(bufmgr_gem->pci_device)); 3287 } else 3288 bufmgr_gem->has_llc = *gp.value; 3289 3290 gp.param = I915_PARAM_HAS_VEBOX; 3291 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3292 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 3293 3294 if (bufmgr_gem->gen < 4) { 3295 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 3296 gp.value = &bufmgr_gem->available_fences; 3297 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3298 if (ret) { 3299 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 3300 errno); 3301 fprintf(stderr, "param: %d, val: %d\n", gp.param, 3302 *gp.value); 3303 bufmgr_gem->available_fences = 0; 3304 } else { 3305 /* XXX The kernel reports the total number of fences, 3306 * including any that may be pinned. 3307 * 3308 * We presume that there will be at least one pinned 3309 * fence for the scanout buffer, but there may be more 3310 * than one scanout and the user may be manually 3311 * pinning buffers. Let's move to execbuffer2 and 3312 * thereby forget the insanity of using fences... 3313 */ 3314 bufmgr_gem->available_fences -= 2; 3315 if (bufmgr_gem->available_fences < 0) 3316 bufmgr_gem->available_fences = 0; 3317 } 3318 } 3319 3320 /* Let's go with one relocation per every 2 dwords (but round down a bit 3321 * since a power of two will mean an extra page allocation for the reloc 3322 * buffer). 3323 * 3324 * Every 4 was too few for the blender benchmark. 3325 */ 3326 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 3327 3328 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 3329 bufmgr_gem->bufmgr.bo_alloc_for_render = 3330 drm_intel_gem_bo_alloc_for_render; 3331 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 3332 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 3333 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 3334 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 3335 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 3336 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 3337 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 3338 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 3339 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 3340 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 3341 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 3342 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 3343 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 3344 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 3345 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 3346 /* Use the new one if available */ 3347 if (exec2) { 3348 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 3349 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3350 } else 3351 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 3352 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 3353 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3354 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 3355 bufmgr_gem->bufmgr.debug = 0; 3356 bufmgr_gem->bufmgr.check_aperture_space = 3357 drm_intel_gem_check_aperture_space; 3358 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3359 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 3360 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 3361 drm_intel_gem_get_pipe_from_crtc_id; 3362 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 3363 3364 DRMINITLISTHEAD(&bufmgr_gem->named); 3365 init_cache_buckets(bufmgr_gem); 3366 3367 DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 3368 bufmgr_gem->vma_max = -1; /* unlimited by default */ 3369 3370 return &bufmgr_gem->bufmgr; 3371} 3372