intel_bufmgr_gem.c revision d82d45b3
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007-2012 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <stddef.h> 51#include <sys/ioctl.h> 52#include <sys/mman.h> 53#include <sys/stat.h> 54#include <sys/types.h> 55#include <stdbool.h> 56 57#include "errno.h" 58#ifndef ETIME 59#define ETIME ETIMEDOUT 60#endif 61#include "libdrm_lists.h" 62#include "intel_bufmgr.h" 63#include "intel_bufmgr_priv.h" 64#include "intel_chipset.h" 65#include "intel_aub.h" 66#include "string.h" 67 68#include "i915_drm.h" 69 70#ifdef HAVE_VALGRIND 71#include <valgrind.h> 72#include <memcheck.h> 73#define VG(x) x 74#else 75#define VG(x) 76#endif 77 78#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) 79 80#define DBG(...) do { \ 81 if (bufmgr_gem->bufmgr.debug) \ 82 fprintf(stderr, __VA_ARGS__); \ 83} while (0) 84 85#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 86 87typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 88 89struct drm_intel_gem_bo_bucket { 90 drmMMListHead head; 91 unsigned long size; 92}; 93 94typedef struct _drm_intel_bufmgr_gem { 95 drm_intel_bufmgr bufmgr; 96 97 int fd; 98 99 int max_relocs; 100 101 pthread_mutex_t lock; 102 103 struct drm_i915_gem_exec_object *exec_objects; 104 struct drm_i915_gem_exec_object2 *exec2_objects; 105 drm_intel_bo **exec_bos; 106 int exec_size; 107 int exec_count; 108 109 /** Array of lists of cached gem objects of power-of-two sizes */ 110 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 111 int num_buckets; 112 time_t time; 113 114 drmMMListHead named; 115 drmMMListHead vma_cache; 116 int vma_count, vma_open, vma_max; 117 118 uint64_t gtt_size; 119 int available_fences; 120 int pci_device; 121 int gen; 122 unsigned int has_bsd : 1; 123 unsigned int has_blt : 1; 124 unsigned int has_relaxed_fencing : 1; 125 unsigned int has_llc : 1; 126 unsigned int has_wait_timeout : 1; 127 unsigned int bo_reuse : 1; 128 unsigned int no_exec : 1; 129 unsigned int has_vebox : 1; 130 bool fenced_relocs; 131 132 char *aub_filename; 133 FILE *aub_file; 134 uint32_t aub_offset; 135} drm_intel_bufmgr_gem; 136 137#define DRM_INTEL_RELOC_FENCE (1<<0) 138 139typedef struct _drm_intel_reloc_target_info { 140 drm_intel_bo *bo; 141 int flags; 142} drm_intel_reloc_target; 143 144struct _drm_intel_bo_gem { 145 drm_intel_bo bo; 146 147 atomic_t refcount; 148 uint32_t gem_handle; 149 const char *name; 150 151 /** 152 * Kenel-assigned global name for this object 153 * 154 * List contains both flink named and prime fd'd objects 155 */ 156 unsigned int global_name; 157 drmMMListHead name_list; 158 159 /** 160 * Index of the buffer within the validation list while preparing a 161 * batchbuffer execution. 162 */ 163 int validate_index; 164 165 /** 166 * Current tiling mode 167 */ 168 uint32_t tiling_mode; 169 uint32_t swizzle_mode; 170 unsigned long stride; 171 172 time_t free_time; 173 174 /** Array passed to the DRM containing relocation information. */ 175 struct drm_i915_gem_relocation_entry *relocs; 176 /** 177 * Array of info structs corresponding to relocs[i].target_handle etc 178 */ 179 drm_intel_reloc_target *reloc_target_info; 180 /** Number of entries in relocs */ 181 int reloc_count; 182 /** Mapped address for the buffer, saved across map/unmap cycles */ 183 void *mem_virtual; 184 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 185 void *gtt_virtual; 186 int map_count; 187 drmMMListHead vma_list; 188 189 /** BO cache list */ 190 drmMMListHead head; 191 192 /** 193 * Boolean of whether this BO and its children have been included in 194 * the current drm_intel_bufmgr_check_aperture_space() total. 195 */ 196 bool included_in_check_aperture; 197 198 /** 199 * Boolean of whether this buffer has been used as a relocation 200 * target and had its size accounted for, and thus can't have any 201 * further relocations added to it. 202 */ 203 bool used_as_reloc_target; 204 205 /** 206 * Boolean of whether we have encountered an error whilst building the relocation tree. 207 */ 208 bool has_error; 209 210 /** 211 * Boolean of whether this buffer can be re-used 212 */ 213 bool reusable; 214 215 /** 216 * Boolean of whether the GPU is definitely not accessing the buffer. 217 * 218 * This is only valid when reusable, since non-reusable 219 * buffers are those that have been shared wth other 220 * processes, so we don't know their state. 221 */ 222 bool idle; 223 224 /** 225 * Size in bytes of this buffer and its relocation descendents. 226 * 227 * Used to avoid costly tree walking in 228 * drm_intel_bufmgr_check_aperture in the common case. 229 */ 230 int reloc_tree_size; 231 232 /** 233 * Number of potential fence registers required by this buffer and its 234 * relocations. 235 */ 236 int reloc_tree_fences; 237 238 /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ 239 bool mapped_cpu_write; 240 241 uint32_t aub_offset; 242 243 drm_intel_aub_annotation *aub_annotations; 244 unsigned aub_annotation_count; 245}; 246 247static unsigned int 248drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 249 250static unsigned int 251drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 252 253static int 254drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 255 uint32_t * swizzle_mode); 256 257static int 258drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 259 uint32_t tiling_mode, 260 uint32_t stride); 261 262static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 263 time_t time); 264 265static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 266 267static void drm_intel_gem_bo_free(drm_intel_bo *bo); 268 269static unsigned long 270drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 271 uint32_t *tiling_mode) 272{ 273 unsigned long min_size, max_size; 274 unsigned long i; 275 276 if (*tiling_mode == I915_TILING_NONE) 277 return size; 278 279 /* 965+ just need multiples of page size for tiling */ 280 if (bufmgr_gem->gen >= 4) 281 return ROUND_UP_TO(size, 4096); 282 283 /* Older chips need powers of two, of at least 512k or 1M */ 284 if (bufmgr_gem->gen == 3) { 285 min_size = 1024*1024; 286 max_size = 128*1024*1024; 287 } else { 288 min_size = 512*1024; 289 max_size = 64*1024*1024; 290 } 291 292 if (size > max_size) { 293 *tiling_mode = I915_TILING_NONE; 294 return size; 295 } 296 297 /* Do we need to allocate every page for the fence? */ 298 if (bufmgr_gem->has_relaxed_fencing) 299 return ROUND_UP_TO(size, 4096); 300 301 for (i = min_size; i < size; i <<= 1) 302 ; 303 304 return i; 305} 306 307/* 308 * Round a given pitch up to the minimum required for X tiling on a 309 * given chip. We use 512 as the minimum to allow for a later tiling 310 * change. 311 */ 312static unsigned long 313drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 314 unsigned long pitch, uint32_t *tiling_mode) 315{ 316 unsigned long tile_width; 317 unsigned long i; 318 319 /* If untiled, then just align it so that we can do rendering 320 * to it with the 3D engine. 321 */ 322 if (*tiling_mode == I915_TILING_NONE) 323 return ALIGN(pitch, 64); 324 325 if (*tiling_mode == I915_TILING_X 326 || (IS_915(bufmgr_gem->pci_device) 327 && *tiling_mode == I915_TILING_Y)) 328 tile_width = 512; 329 else 330 tile_width = 128; 331 332 /* 965 is flexible */ 333 if (bufmgr_gem->gen >= 4) 334 return ROUND_UP_TO(pitch, tile_width); 335 336 /* The older hardware has a maximum pitch of 8192 with tiled 337 * surfaces, so fallback to untiled if it's too large. 338 */ 339 if (pitch > 8192) { 340 *tiling_mode = I915_TILING_NONE; 341 return ALIGN(pitch, 64); 342 } 343 344 /* Pre-965 needs power of two tile width */ 345 for (i = tile_width; i < pitch; i <<= 1) 346 ; 347 348 return i; 349} 350 351static struct drm_intel_gem_bo_bucket * 352drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 353 unsigned long size) 354{ 355 int i; 356 357 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 358 struct drm_intel_gem_bo_bucket *bucket = 359 &bufmgr_gem->cache_bucket[i]; 360 if (bucket->size >= size) { 361 return bucket; 362 } 363 } 364 365 return NULL; 366} 367 368static void 369drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 370{ 371 int i, j; 372 373 for (i = 0; i < bufmgr_gem->exec_count; i++) { 374 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 375 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 376 377 if (bo_gem->relocs == NULL) { 378 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 379 bo_gem->name); 380 continue; 381 } 382 383 for (j = 0; j < bo_gem->reloc_count; j++) { 384 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 385 drm_intel_bo_gem *target_gem = 386 (drm_intel_bo_gem *) target_bo; 387 388 DBG("%2d: %d (%s)@0x%08llx -> " 389 "%d (%s)@0x%08llx + 0x%08x\n", 390 i, 391 bo_gem->gem_handle, bo_gem->name, 392 (unsigned long long)bo_gem->relocs[j].offset, 393 target_gem->gem_handle, 394 target_gem->name, 395 (unsigned long long)target_bo->offset64, 396 bo_gem->relocs[j].delta); 397 } 398 } 399} 400 401static inline void 402drm_intel_gem_bo_reference(drm_intel_bo *bo) 403{ 404 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 405 406 atomic_inc(&bo_gem->refcount); 407} 408 409/** 410 * Adds the given buffer to the list of buffers to be validated (moved into the 411 * appropriate memory type) with the next batch submission. 412 * 413 * If a buffer is validated multiple times in a batch submission, it ends up 414 * with the intersection of the memory type flags and the union of the 415 * access flags. 416 */ 417static void 418drm_intel_add_validate_buffer(drm_intel_bo *bo) 419{ 420 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 421 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 422 int index; 423 424 if (bo_gem->validate_index != -1) 425 return; 426 427 /* Extend the array of validation entries as necessary. */ 428 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 429 int new_size = bufmgr_gem->exec_size * 2; 430 431 if (new_size == 0) 432 new_size = 5; 433 434 bufmgr_gem->exec_objects = 435 realloc(bufmgr_gem->exec_objects, 436 sizeof(*bufmgr_gem->exec_objects) * new_size); 437 bufmgr_gem->exec_bos = 438 realloc(bufmgr_gem->exec_bos, 439 sizeof(*bufmgr_gem->exec_bos) * new_size); 440 bufmgr_gem->exec_size = new_size; 441 } 442 443 index = bufmgr_gem->exec_count; 444 bo_gem->validate_index = index; 445 /* Fill in array entry */ 446 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 447 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 448 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 449 bufmgr_gem->exec_objects[index].alignment = 0; 450 bufmgr_gem->exec_objects[index].offset = 0; 451 bufmgr_gem->exec_bos[index] = bo; 452 bufmgr_gem->exec_count++; 453} 454 455static void 456drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 457{ 458 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 459 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 460 int index; 461 462 if (bo_gem->validate_index != -1) { 463 if (need_fence) 464 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 465 EXEC_OBJECT_NEEDS_FENCE; 466 return; 467 } 468 469 /* Extend the array of validation entries as necessary. */ 470 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 471 int new_size = bufmgr_gem->exec_size * 2; 472 473 if (new_size == 0) 474 new_size = 5; 475 476 bufmgr_gem->exec2_objects = 477 realloc(bufmgr_gem->exec2_objects, 478 sizeof(*bufmgr_gem->exec2_objects) * new_size); 479 bufmgr_gem->exec_bos = 480 realloc(bufmgr_gem->exec_bos, 481 sizeof(*bufmgr_gem->exec_bos) * new_size); 482 bufmgr_gem->exec_size = new_size; 483 } 484 485 index = bufmgr_gem->exec_count; 486 bo_gem->validate_index = index; 487 /* Fill in array entry */ 488 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 489 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 490 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 491 bufmgr_gem->exec2_objects[index].alignment = 0; 492 bufmgr_gem->exec2_objects[index].offset = 0; 493 bufmgr_gem->exec_bos[index] = bo; 494 bufmgr_gem->exec2_objects[index].flags = 0; 495 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 496 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 497 if (need_fence) { 498 bufmgr_gem->exec2_objects[index].flags |= 499 EXEC_OBJECT_NEEDS_FENCE; 500 } 501 bufmgr_gem->exec_count++; 502} 503 504#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 505 sizeof(uint32_t)) 506 507static void 508drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 509 drm_intel_bo_gem *bo_gem) 510{ 511 int size; 512 513 assert(!bo_gem->used_as_reloc_target); 514 515 /* The older chipsets are far-less flexible in terms of tiling, 516 * and require tiled buffer to be size aligned in the aperture. 517 * This means that in the worst possible case we will need a hole 518 * twice as large as the object in order for it to fit into the 519 * aperture. Optimal packing is for wimps. 520 */ 521 size = bo_gem->bo.size; 522 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 523 int min_size; 524 525 if (bufmgr_gem->has_relaxed_fencing) { 526 if (bufmgr_gem->gen == 3) 527 min_size = 1024*1024; 528 else 529 min_size = 512*1024; 530 531 while (min_size < size) 532 min_size *= 2; 533 } else 534 min_size = size; 535 536 /* Account for worst-case alignment. */ 537 size = 2 * min_size; 538 } 539 540 bo_gem->reloc_tree_size = size; 541} 542 543static int 544drm_intel_setup_reloc_list(drm_intel_bo *bo) 545{ 546 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 547 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 548 unsigned int max_relocs = bufmgr_gem->max_relocs; 549 550 if (bo->size / 4 < max_relocs) 551 max_relocs = bo->size / 4; 552 553 bo_gem->relocs = malloc(max_relocs * 554 sizeof(struct drm_i915_gem_relocation_entry)); 555 bo_gem->reloc_target_info = malloc(max_relocs * 556 sizeof(drm_intel_reloc_target)); 557 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 558 bo_gem->has_error = true; 559 560 free (bo_gem->relocs); 561 bo_gem->relocs = NULL; 562 563 free (bo_gem->reloc_target_info); 564 bo_gem->reloc_target_info = NULL; 565 566 return 1; 567 } 568 569 return 0; 570} 571 572static int 573drm_intel_gem_bo_busy(drm_intel_bo *bo) 574{ 575 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 576 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 577 struct drm_i915_gem_busy busy; 578 int ret; 579 580 if (bo_gem->reusable && bo_gem->idle) 581 return false; 582 583 VG_CLEAR(busy); 584 busy.handle = bo_gem->gem_handle; 585 586 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 587 if (ret == 0) { 588 bo_gem->idle = !busy.busy; 589 return busy.busy; 590 } else { 591 return false; 592 } 593 return (ret == 0 && busy.busy); 594} 595 596static int 597drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 598 drm_intel_bo_gem *bo_gem, int state) 599{ 600 struct drm_i915_gem_madvise madv; 601 602 VG_CLEAR(madv); 603 madv.handle = bo_gem->gem_handle; 604 madv.madv = state; 605 madv.retained = 1; 606 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 607 608 return madv.retained; 609} 610 611static int 612drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 613{ 614 return drm_intel_gem_bo_madvise_internal 615 ((drm_intel_bufmgr_gem *) bo->bufmgr, 616 (drm_intel_bo_gem *) bo, 617 madv); 618} 619 620/* drop the oldest entries that have been purged by the kernel */ 621static void 622drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 623 struct drm_intel_gem_bo_bucket *bucket) 624{ 625 while (!DRMLISTEMPTY(&bucket->head)) { 626 drm_intel_bo_gem *bo_gem; 627 628 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 629 bucket->head.next, head); 630 if (drm_intel_gem_bo_madvise_internal 631 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 632 break; 633 634 DRMLISTDEL(&bo_gem->head); 635 drm_intel_gem_bo_free(&bo_gem->bo); 636 } 637} 638 639static drm_intel_bo * 640drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 641 const char *name, 642 unsigned long size, 643 unsigned long flags, 644 uint32_t tiling_mode, 645 unsigned long stride) 646{ 647 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 648 drm_intel_bo_gem *bo_gem; 649 unsigned int page_size = getpagesize(); 650 int ret; 651 struct drm_intel_gem_bo_bucket *bucket; 652 bool alloc_from_cache; 653 unsigned long bo_size; 654 bool for_render = false; 655 656 if (flags & BO_ALLOC_FOR_RENDER) 657 for_render = true; 658 659 /* Round the allocated size up to a power of two number of pages. */ 660 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 661 662 /* If we don't have caching at this size, don't actually round the 663 * allocation up. 664 */ 665 if (bucket == NULL) { 666 bo_size = size; 667 if (bo_size < page_size) 668 bo_size = page_size; 669 } else { 670 bo_size = bucket->size; 671 } 672 673 pthread_mutex_lock(&bufmgr_gem->lock); 674 /* Get a buffer out of the cache if available */ 675retry: 676 alloc_from_cache = false; 677 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 678 if (for_render) { 679 /* Allocate new render-target BOs from the tail (MRU) 680 * of the list, as it will likely be hot in the GPU 681 * cache and in the aperture for us. 682 */ 683 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 684 bucket->head.prev, head); 685 DRMLISTDEL(&bo_gem->head); 686 alloc_from_cache = true; 687 } else { 688 /* For non-render-target BOs (where we're probably 689 * going to map it first thing in order to fill it 690 * with data), check if the last BO in the cache is 691 * unbusy, and only reuse in that case. Otherwise, 692 * allocating a new buffer is probably faster than 693 * waiting for the GPU to finish. 694 */ 695 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 696 bucket->head.next, head); 697 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 698 alloc_from_cache = true; 699 DRMLISTDEL(&bo_gem->head); 700 } 701 } 702 703 if (alloc_from_cache) { 704 if (!drm_intel_gem_bo_madvise_internal 705 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 706 drm_intel_gem_bo_free(&bo_gem->bo); 707 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 708 bucket); 709 goto retry; 710 } 711 712 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 713 tiling_mode, 714 stride)) { 715 drm_intel_gem_bo_free(&bo_gem->bo); 716 goto retry; 717 } 718 } 719 } 720 pthread_mutex_unlock(&bufmgr_gem->lock); 721 722 if (!alloc_from_cache) { 723 struct drm_i915_gem_create create; 724 725 bo_gem = calloc(1, sizeof(*bo_gem)); 726 if (!bo_gem) 727 return NULL; 728 729 bo_gem->bo.size = bo_size; 730 731 VG_CLEAR(create); 732 create.size = bo_size; 733 734 ret = drmIoctl(bufmgr_gem->fd, 735 DRM_IOCTL_I915_GEM_CREATE, 736 &create); 737 bo_gem->gem_handle = create.handle; 738 bo_gem->bo.handle = bo_gem->gem_handle; 739 if (ret != 0) { 740 free(bo_gem); 741 return NULL; 742 } 743 bo_gem->bo.bufmgr = bufmgr; 744 745 bo_gem->tiling_mode = I915_TILING_NONE; 746 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 747 bo_gem->stride = 0; 748 749 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 750 tiling_mode, 751 stride)) { 752 drm_intel_gem_bo_free(&bo_gem->bo); 753 return NULL; 754 } 755 756 DRMINITLISTHEAD(&bo_gem->name_list); 757 DRMINITLISTHEAD(&bo_gem->vma_list); 758 } 759 760 bo_gem->name = name; 761 atomic_set(&bo_gem->refcount, 1); 762 bo_gem->validate_index = -1; 763 bo_gem->reloc_tree_fences = 0; 764 bo_gem->used_as_reloc_target = false; 765 bo_gem->has_error = false; 766 bo_gem->reusable = true; 767 bo_gem->aub_annotations = NULL; 768 bo_gem->aub_annotation_count = 0; 769 770 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 771 772 DBG("bo_create: buf %d (%s) %ldb\n", 773 bo_gem->gem_handle, bo_gem->name, size); 774 775 return &bo_gem->bo; 776} 777 778static drm_intel_bo * 779drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 780 const char *name, 781 unsigned long size, 782 unsigned int alignment) 783{ 784 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 785 BO_ALLOC_FOR_RENDER, 786 I915_TILING_NONE, 0); 787} 788 789static drm_intel_bo * 790drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 791 const char *name, 792 unsigned long size, 793 unsigned int alignment) 794{ 795 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 796 I915_TILING_NONE, 0); 797} 798 799static drm_intel_bo * 800drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 801 int x, int y, int cpp, uint32_t *tiling_mode, 802 unsigned long *pitch, unsigned long flags) 803{ 804 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 805 unsigned long size, stride; 806 uint32_t tiling; 807 808 do { 809 unsigned long aligned_y, height_alignment; 810 811 tiling = *tiling_mode; 812 813 /* If we're tiled, our allocations are in 8 or 32-row blocks, 814 * so failure to align our height means that we won't allocate 815 * enough pages. 816 * 817 * If we're untiled, we still have to align to 2 rows high 818 * because the data port accesses 2x2 blocks even if the 819 * bottom row isn't to be rendered, so failure to align means 820 * we could walk off the end of the GTT and fault. This is 821 * documented on 965, and may be the case on older chipsets 822 * too so we try to be careful. 823 */ 824 aligned_y = y; 825 height_alignment = 2; 826 827 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 828 height_alignment = 16; 829 else if (tiling == I915_TILING_X 830 || (IS_915(bufmgr_gem->pci_device) 831 && tiling == I915_TILING_Y)) 832 height_alignment = 8; 833 else if (tiling == I915_TILING_Y) 834 height_alignment = 32; 835 aligned_y = ALIGN(y, height_alignment); 836 837 stride = x * cpp; 838 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 839 size = stride * aligned_y; 840 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 841 } while (*tiling_mode != tiling); 842 *pitch = stride; 843 844 if (tiling == I915_TILING_NONE) 845 stride = 0; 846 847 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 848 tiling, stride); 849} 850 851/** 852 * Returns a drm_intel_bo wrapping the given buffer object handle. 853 * 854 * This can be used when one application needs to pass a buffer object 855 * to another. 856 */ 857drm_intel_bo * 858drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 859 const char *name, 860 unsigned int handle) 861{ 862 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 863 drm_intel_bo_gem *bo_gem; 864 int ret; 865 struct drm_gem_open open_arg; 866 struct drm_i915_gem_get_tiling get_tiling; 867 drmMMListHead *list; 868 869 /* At the moment most applications only have a few named bo. 870 * For instance, in a DRI client only the render buffers passed 871 * between X and the client are named. And since X returns the 872 * alternating names for the front/back buffer a linear search 873 * provides a sufficiently fast match. 874 */ 875 for (list = bufmgr_gem->named.next; 876 list != &bufmgr_gem->named; 877 list = list->next) { 878 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 879 if (bo_gem->global_name == handle) { 880 drm_intel_gem_bo_reference(&bo_gem->bo); 881 return &bo_gem->bo; 882 } 883 } 884 885 VG_CLEAR(open_arg); 886 open_arg.name = handle; 887 ret = drmIoctl(bufmgr_gem->fd, 888 DRM_IOCTL_GEM_OPEN, 889 &open_arg); 890 if (ret != 0) { 891 DBG("Couldn't reference %s handle 0x%08x: %s\n", 892 name, handle, strerror(errno)); 893 return NULL; 894 } 895 /* Now see if someone has used a prime handle to get this 896 * object from the kernel before by looking through the list 897 * again for a matching gem_handle 898 */ 899 for (list = bufmgr_gem->named.next; 900 list != &bufmgr_gem->named; 901 list = list->next) { 902 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 903 if (bo_gem->gem_handle == open_arg.handle) { 904 drm_intel_gem_bo_reference(&bo_gem->bo); 905 return &bo_gem->bo; 906 } 907 } 908 909 bo_gem = calloc(1, sizeof(*bo_gem)); 910 if (!bo_gem) 911 return NULL; 912 913 bo_gem->bo.size = open_arg.size; 914 bo_gem->bo.offset = 0; 915 bo_gem->bo.offset64 = 0; 916 bo_gem->bo.virtual = NULL; 917 bo_gem->bo.bufmgr = bufmgr; 918 bo_gem->name = name; 919 atomic_set(&bo_gem->refcount, 1); 920 bo_gem->validate_index = -1; 921 bo_gem->gem_handle = open_arg.handle; 922 bo_gem->bo.handle = open_arg.handle; 923 bo_gem->global_name = handle; 924 bo_gem->reusable = false; 925 926 VG_CLEAR(get_tiling); 927 get_tiling.handle = bo_gem->gem_handle; 928 ret = drmIoctl(bufmgr_gem->fd, 929 DRM_IOCTL_I915_GEM_GET_TILING, 930 &get_tiling); 931 if (ret != 0) { 932 drm_intel_gem_bo_unreference(&bo_gem->bo); 933 return NULL; 934 } 935 bo_gem->tiling_mode = get_tiling.tiling_mode; 936 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 937 /* XXX stride is unknown */ 938 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 939 940 DRMINITLISTHEAD(&bo_gem->vma_list); 941 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 942 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 943 944 return &bo_gem->bo; 945} 946 947static void 948drm_intel_gem_bo_free(drm_intel_bo *bo) 949{ 950 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 951 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 952 struct drm_gem_close close; 953 int ret; 954 955 DRMLISTDEL(&bo_gem->vma_list); 956 if (bo_gem->mem_virtual) { 957 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 958 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 959 bufmgr_gem->vma_count--; 960 } 961 if (bo_gem->gtt_virtual) { 962 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 963 bufmgr_gem->vma_count--; 964 } 965 966 /* Close this object */ 967 VG_CLEAR(close); 968 close.handle = bo_gem->gem_handle; 969 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 970 if (ret != 0) { 971 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 972 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 973 } 974 free(bo_gem->aub_annotations); 975 free(bo); 976} 977 978static void 979drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 980{ 981#if HAVE_VALGRIND 982 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 983 984 if (bo_gem->mem_virtual) 985 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 986 987 if (bo_gem->gtt_virtual) 988 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 989#endif 990} 991 992/** Frees all cached buffers significantly older than @time. */ 993static void 994drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 995{ 996 int i; 997 998 if (bufmgr_gem->time == time) 999 return; 1000 1001 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1002 struct drm_intel_gem_bo_bucket *bucket = 1003 &bufmgr_gem->cache_bucket[i]; 1004 1005 while (!DRMLISTEMPTY(&bucket->head)) { 1006 drm_intel_bo_gem *bo_gem; 1007 1008 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1009 bucket->head.next, head); 1010 if (time - bo_gem->free_time <= 1) 1011 break; 1012 1013 DRMLISTDEL(&bo_gem->head); 1014 1015 drm_intel_gem_bo_free(&bo_gem->bo); 1016 } 1017 } 1018 1019 bufmgr_gem->time = time; 1020} 1021 1022static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 1023{ 1024 int limit; 1025 1026 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 1027 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 1028 1029 if (bufmgr_gem->vma_max < 0) 1030 return; 1031 1032 /* We may need to evict a few entries in order to create new mmaps */ 1033 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 1034 if (limit < 0) 1035 limit = 0; 1036 1037 while (bufmgr_gem->vma_count > limit) { 1038 drm_intel_bo_gem *bo_gem; 1039 1040 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1041 bufmgr_gem->vma_cache.next, 1042 vma_list); 1043 assert(bo_gem->map_count == 0); 1044 DRMLISTDELINIT(&bo_gem->vma_list); 1045 1046 if (bo_gem->mem_virtual) { 1047 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1048 bo_gem->mem_virtual = NULL; 1049 bufmgr_gem->vma_count--; 1050 } 1051 if (bo_gem->gtt_virtual) { 1052 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1053 bo_gem->gtt_virtual = NULL; 1054 bufmgr_gem->vma_count--; 1055 } 1056 } 1057} 1058 1059static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1060 drm_intel_bo_gem *bo_gem) 1061{ 1062 bufmgr_gem->vma_open--; 1063 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 1064 if (bo_gem->mem_virtual) 1065 bufmgr_gem->vma_count++; 1066 if (bo_gem->gtt_virtual) 1067 bufmgr_gem->vma_count++; 1068 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1069} 1070 1071static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1072 drm_intel_bo_gem *bo_gem) 1073{ 1074 bufmgr_gem->vma_open++; 1075 DRMLISTDEL(&bo_gem->vma_list); 1076 if (bo_gem->mem_virtual) 1077 bufmgr_gem->vma_count--; 1078 if (bo_gem->gtt_virtual) 1079 bufmgr_gem->vma_count--; 1080 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1081} 1082 1083static void 1084drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 1085{ 1086 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1087 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1088 struct drm_intel_gem_bo_bucket *bucket; 1089 int i; 1090 1091 /* Unreference all the target buffers */ 1092 for (i = 0; i < bo_gem->reloc_count; i++) { 1093 if (bo_gem->reloc_target_info[i].bo != bo) { 1094 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1095 reloc_target_info[i].bo, 1096 time); 1097 } 1098 } 1099 bo_gem->reloc_count = 0; 1100 bo_gem->used_as_reloc_target = false; 1101 1102 DBG("bo_unreference final: %d (%s)\n", 1103 bo_gem->gem_handle, bo_gem->name); 1104 1105 /* release memory associated with this object */ 1106 if (bo_gem->reloc_target_info) { 1107 free(bo_gem->reloc_target_info); 1108 bo_gem->reloc_target_info = NULL; 1109 } 1110 if (bo_gem->relocs) { 1111 free(bo_gem->relocs); 1112 bo_gem->relocs = NULL; 1113 } 1114 1115 /* Clear any left-over mappings */ 1116 if (bo_gem->map_count) { 1117 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 1118 bo_gem->map_count = 0; 1119 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1120 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1121 } 1122 1123 DRMLISTDEL(&bo_gem->name_list); 1124 1125 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 1126 /* Put the buffer into our internal cache for reuse if we can. */ 1127 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 1128 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 1129 I915_MADV_DONTNEED)) { 1130 bo_gem->free_time = time; 1131 1132 bo_gem->name = NULL; 1133 bo_gem->validate_index = -1; 1134 1135 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 1136 } else { 1137 drm_intel_gem_bo_free(bo); 1138 } 1139} 1140 1141static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 1142 time_t time) 1143{ 1144 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1145 1146 assert(atomic_read(&bo_gem->refcount) > 0); 1147 if (atomic_dec_and_test(&bo_gem->refcount)) 1148 drm_intel_gem_bo_unreference_final(bo, time); 1149} 1150 1151static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 1152{ 1153 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1154 1155 assert(atomic_read(&bo_gem->refcount) > 0); 1156 if (atomic_dec_and_test(&bo_gem->refcount)) { 1157 drm_intel_bufmgr_gem *bufmgr_gem = 1158 (drm_intel_bufmgr_gem *) bo->bufmgr; 1159 struct timespec time; 1160 1161 clock_gettime(CLOCK_MONOTONIC, &time); 1162 1163 pthread_mutex_lock(&bufmgr_gem->lock); 1164 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1165 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1166 pthread_mutex_unlock(&bufmgr_gem->lock); 1167 } 1168} 1169 1170static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1171{ 1172 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1173 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1174 struct drm_i915_gem_set_domain set_domain; 1175 int ret; 1176 1177 pthread_mutex_lock(&bufmgr_gem->lock); 1178 1179 if (bo_gem->map_count++ == 0) 1180 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1181 1182 if (!bo_gem->mem_virtual) { 1183 struct drm_i915_gem_mmap mmap_arg; 1184 1185 DBG("bo_map: %d (%s), map_count=%d\n", 1186 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1187 1188 VG_CLEAR(mmap_arg); 1189 mmap_arg.handle = bo_gem->gem_handle; 1190 mmap_arg.offset = 0; 1191 mmap_arg.size = bo->size; 1192 ret = drmIoctl(bufmgr_gem->fd, 1193 DRM_IOCTL_I915_GEM_MMAP, 1194 &mmap_arg); 1195 if (ret != 0) { 1196 ret = -errno; 1197 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1198 __FILE__, __LINE__, bo_gem->gem_handle, 1199 bo_gem->name, strerror(errno)); 1200 if (--bo_gem->map_count == 0) 1201 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1202 pthread_mutex_unlock(&bufmgr_gem->lock); 1203 return ret; 1204 } 1205 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 1206 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1207 } 1208 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1209 bo_gem->mem_virtual); 1210 bo->virtual = bo_gem->mem_virtual; 1211 1212 VG_CLEAR(set_domain); 1213 set_domain.handle = bo_gem->gem_handle; 1214 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1215 if (write_enable) 1216 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1217 else 1218 set_domain.write_domain = 0; 1219 ret = drmIoctl(bufmgr_gem->fd, 1220 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1221 &set_domain); 1222 if (ret != 0) { 1223 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1224 __FILE__, __LINE__, bo_gem->gem_handle, 1225 strerror(errno)); 1226 } 1227 1228 if (write_enable) 1229 bo_gem->mapped_cpu_write = true; 1230 1231 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1232 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 1233 pthread_mutex_unlock(&bufmgr_gem->lock); 1234 1235 return 0; 1236} 1237 1238static int 1239map_gtt(drm_intel_bo *bo) 1240{ 1241 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1242 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1243 int ret; 1244 1245 if (bo_gem->map_count++ == 0) 1246 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1247 1248 /* Get a mapping of the buffer if we haven't before. */ 1249 if (bo_gem->gtt_virtual == NULL) { 1250 struct drm_i915_gem_mmap_gtt mmap_arg; 1251 1252 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 1253 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1254 1255 VG_CLEAR(mmap_arg); 1256 mmap_arg.handle = bo_gem->gem_handle; 1257 1258 /* Get the fake offset back... */ 1259 ret = drmIoctl(bufmgr_gem->fd, 1260 DRM_IOCTL_I915_GEM_MMAP_GTT, 1261 &mmap_arg); 1262 if (ret != 0) { 1263 ret = -errno; 1264 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1265 __FILE__, __LINE__, 1266 bo_gem->gem_handle, bo_gem->name, 1267 strerror(errno)); 1268 if (--bo_gem->map_count == 0) 1269 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1270 return ret; 1271 } 1272 1273 /* and mmap it */ 1274 ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size, 1275 &bo_gem->gtt_virtual); 1276 if (ret) { 1277 bo_gem->gtt_virtual = NULL; 1278 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1279 __FILE__, __LINE__, 1280 bo_gem->gem_handle, bo_gem->name, 1281 strerror(errno)); 1282 if (--bo_gem->map_count == 0) 1283 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1284 return ret; 1285 } 1286 } 1287 1288 bo->virtual = bo_gem->gtt_virtual; 1289 1290 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1291 bo_gem->gtt_virtual); 1292 1293 return 0; 1294} 1295 1296int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1297{ 1298 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1299 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1300 struct drm_i915_gem_set_domain set_domain; 1301 int ret; 1302 1303 pthread_mutex_lock(&bufmgr_gem->lock); 1304 1305 ret = map_gtt(bo); 1306 if (ret) { 1307 pthread_mutex_unlock(&bufmgr_gem->lock); 1308 return ret; 1309 } 1310 1311 /* Now move it to the GTT domain so that the GPU and CPU 1312 * caches are flushed and the GPU isn't actively using the 1313 * buffer. 1314 * 1315 * The pagefault handler does this domain change for us when 1316 * it has unbound the BO from the GTT, but it's up to us to 1317 * tell it when we're about to use things if we had done 1318 * rendering and it still happens to be bound to the GTT. 1319 */ 1320 VG_CLEAR(set_domain); 1321 set_domain.handle = bo_gem->gem_handle; 1322 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1323 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1324 ret = drmIoctl(bufmgr_gem->fd, 1325 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1326 &set_domain); 1327 if (ret != 0) { 1328 DBG("%s:%d: Error setting domain %d: %s\n", 1329 __FILE__, __LINE__, bo_gem->gem_handle, 1330 strerror(errno)); 1331 } 1332 1333 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1334 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1335 pthread_mutex_unlock(&bufmgr_gem->lock); 1336 1337 return 0; 1338} 1339 1340/** 1341 * Performs a mapping of the buffer object like the normal GTT 1342 * mapping, but avoids waiting for the GPU to be done reading from or 1343 * rendering to the buffer. 1344 * 1345 * This is used in the implementation of GL_ARB_map_buffer_range: The 1346 * user asks to create a buffer, then does a mapping, fills some 1347 * space, runs a drawing command, then asks to map it again without 1348 * synchronizing because it guarantees that it won't write over the 1349 * data that the GPU is busy using (or, more specifically, that if it 1350 * does write over the data, it acknowledges that rendering is 1351 * undefined). 1352 */ 1353 1354int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 1355{ 1356 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1357#ifdef HAVE_VALGRIND 1358 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1359#endif 1360 int ret; 1361 1362 /* If the CPU cache isn't coherent with the GTT, then use a 1363 * regular synchronized mapping. The problem is that we don't 1364 * track where the buffer was last used on the CPU side in 1365 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 1366 * we would potentially corrupt the buffer even when the user 1367 * does reasonable things. 1368 */ 1369 if (!bufmgr_gem->has_llc) 1370 return drm_intel_gem_bo_map_gtt(bo); 1371 1372 pthread_mutex_lock(&bufmgr_gem->lock); 1373 1374 ret = map_gtt(bo); 1375 if (ret == 0) { 1376 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1377 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1378 } 1379 1380 pthread_mutex_unlock(&bufmgr_gem->lock); 1381 1382 return ret; 1383} 1384 1385static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1386{ 1387 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1388 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1389 int ret = 0; 1390 1391 if (bo == NULL) 1392 return 0; 1393 1394 pthread_mutex_lock(&bufmgr_gem->lock); 1395 1396 if (bo_gem->map_count <= 0) { 1397 DBG("attempted to unmap an unmapped bo\n"); 1398 pthread_mutex_unlock(&bufmgr_gem->lock); 1399 /* Preserve the old behaviour of just treating this as a 1400 * no-op rather than reporting the error. 1401 */ 1402 return 0; 1403 } 1404 1405 if (bo_gem->mapped_cpu_write) { 1406 struct drm_i915_gem_sw_finish sw_finish; 1407 1408 /* Cause a flush to happen if the buffer's pinned for 1409 * scanout, so the results show up in a timely manner. 1410 * Unlike GTT set domains, this only does work if the 1411 * buffer should be scanout-related. 1412 */ 1413 VG_CLEAR(sw_finish); 1414 sw_finish.handle = bo_gem->gem_handle; 1415 ret = drmIoctl(bufmgr_gem->fd, 1416 DRM_IOCTL_I915_GEM_SW_FINISH, 1417 &sw_finish); 1418 ret = ret == -1 ? -errno : 0; 1419 1420 bo_gem->mapped_cpu_write = false; 1421 } 1422 1423 /* We need to unmap after every innovation as we cannot track 1424 * an open vma for every bo as that will exhaasut the system 1425 * limits and cause later failures. 1426 */ 1427 if (--bo_gem->map_count == 0) { 1428 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1429 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1430 bo->virtual = NULL; 1431 } 1432 pthread_mutex_unlock(&bufmgr_gem->lock); 1433 1434 return ret; 1435} 1436 1437int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1438{ 1439 return drm_intel_gem_bo_unmap(bo); 1440} 1441 1442static int 1443drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1444 unsigned long size, const void *data) 1445{ 1446 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1447 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1448 struct drm_i915_gem_pwrite pwrite; 1449 int ret; 1450 1451 VG_CLEAR(pwrite); 1452 pwrite.handle = bo_gem->gem_handle; 1453 pwrite.offset = offset; 1454 pwrite.size = size; 1455 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1456 ret = drmIoctl(bufmgr_gem->fd, 1457 DRM_IOCTL_I915_GEM_PWRITE, 1458 &pwrite); 1459 if (ret != 0) { 1460 ret = -errno; 1461 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1462 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1463 (int)size, strerror(errno)); 1464 } 1465 1466 return ret; 1467} 1468 1469static int 1470drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1471{ 1472 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1473 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1474 int ret; 1475 1476 VG_CLEAR(get_pipe_from_crtc_id); 1477 get_pipe_from_crtc_id.crtc_id = crtc_id; 1478 ret = drmIoctl(bufmgr_gem->fd, 1479 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1480 &get_pipe_from_crtc_id); 1481 if (ret != 0) { 1482 /* We return -1 here to signal that we don't 1483 * know which pipe is associated with this crtc. 1484 * This lets the caller know that this information 1485 * isn't available; using the wrong pipe for 1486 * vblank waiting can cause the chipset to lock up 1487 */ 1488 return -1; 1489 } 1490 1491 return get_pipe_from_crtc_id.pipe; 1492} 1493 1494static int 1495drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1496 unsigned long size, void *data) 1497{ 1498 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1499 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1500 struct drm_i915_gem_pread pread; 1501 int ret; 1502 1503 VG_CLEAR(pread); 1504 pread.handle = bo_gem->gem_handle; 1505 pread.offset = offset; 1506 pread.size = size; 1507 pread.data_ptr = (uint64_t) (uintptr_t) data; 1508 ret = drmIoctl(bufmgr_gem->fd, 1509 DRM_IOCTL_I915_GEM_PREAD, 1510 &pread); 1511 if (ret != 0) { 1512 ret = -errno; 1513 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1514 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1515 (int)size, strerror(errno)); 1516 } 1517 1518 return ret; 1519} 1520 1521/** Waits for all GPU rendering with the object to have completed. */ 1522static void 1523drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1524{ 1525 drm_intel_gem_bo_start_gtt_access(bo, 1); 1526} 1527 1528/** 1529 * Waits on a BO for the given amount of time. 1530 * 1531 * @bo: buffer object to wait for 1532 * @timeout_ns: amount of time to wait in nanoseconds. 1533 * If value is less than 0, an infinite wait will occur. 1534 * 1535 * Returns 0 if the wait was successful ie. the last batch referencing the 1536 * object has completed within the allotted time. Otherwise some negative return 1537 * value describes the error. Of particular interest is -ETIME when the wait has 1538 * failed to yield the desired result. 1539 * 1540 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 1541 * the operation to give up after a certain amount of time. Another subtle 1542 * difference is the internal locking semantics are different (this variant does 1543 * not hold the lock for the duration of the wait). This makes the wait subject 1544 * to a larger userspace race window. 1545 * 1546 * The implementation shall wait until the object is no longer actively 1547 * referenced within a batch buffer at the time of the call. The wait will 1548 * not guarantee that the buffer is re-issued via another thread, or an flinked 1549 * handle. Userspace must make sure this race does not occur if such precision 1550 * is important. 1551 */ 1552int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 1553{ 1554 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1555 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1556 struct drm_i915_gem_wait wait; 1557 int ret; 1558 1559 if (!bufmgr_gem->has_wait_timeout) { 1560 DBG("%s:%d: Timed wait is not supported. Falling back to " 1561 "infinite wait\n", __FILE__, __LINE__); 1562 if (timeout_ns) { 1563 drm_intel_gem_bo_wait_rendering(bo); 1564 return 0; 1565 } else { 1566 return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 1567 } 1568 } 1569 1570 wait.bo_handle = bo_gem->gem_handle; 1571 wait.timeout_ns = timeout_ns; 1572 wait.flags = 0; 1573 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1574 if (ret == -1) 1575 return -errno; 1576 1577 return ret; 1578} 1579 1580/** 1581 * Sets the object to the GTT read and possibly write domain, used by the X 1582 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1583 * 1584 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1585 * can do tiled pixmaps this way. 1586 */ 1587void 1588drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1589{ 1590 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1591 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1592 struct drm_i915_gem_set_domain set_domain; 1593 int ret; 1594 1595 VG_CLEAR(set_domain); 1596 set_domain.handle = bo_gem->gem_handle; 1597 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1598 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1599 ret = drmIoctl(bufmgr_gem->fd, 1600 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1601 &set_domain); 1602 if (ret != 0) { 1603 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1604 __FILE__, __LINE__, bo_gem->gem_handle, 1605 set_domain.read_domains, set_domain.write_domain, 1606 strerror(errno)); 1607 } 1608} 1609 1610static void 1611drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1612{ 1613 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1614 int i; 1615 1616 free(bufmgr_gem->exec2_objects); 1617 free(bufmgr_gem->exec_objects); 1618 free(bufmgr_gem->exec_bos); 1619 free(bufmgr_gem->aub_filename); 1620 1621 pthread_mutex_destroy(&bufmgr_gem->lock); 1622 1623 /* Free any cached buffer objects we were going to reuse */ 1624 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1625 struct drm_intel_gem_bo_bucket *bucket = 1626 &bufmgr_gem->cache_bucket[i]; 1627 drm_intel_bo_gem *bo_gem; 1628 1629 while (!DRMLISTEMPTY(&bucket->head)) { 1630 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1631 bucket->head.next, head); 1632 DRMLISTDEL(&bo_gem->head); 1633 1634 drm_intel_gem_bo_free(&bo_gem->bo); 1635 } 1636 } 1637 1638 free(bufmgr); 1639} 1640 1641/** 1642 * Adds the target buffer to the validation list and adds the relocation 1643 * to the reloc_buffer's relocation list. 1644 * 1645 * The relocation entry at the given offset must already contain the 1646 * precomputed relocation value, because the kernel will optimize out 1647 * the relocation entry write when the buffer hasn't moved from the 1648 * last known offset in target_bo. 1649 */ 1650static int 1651do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1652 drm_intel_bo *target_bo, uint32_t target_offset, 1653 uint32_t read_domains, uint32_t write_domain, 1654 bool need_fence) 1655{ 1656 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1657 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1658 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1659 bool fenced_command; 1660 1661 if (bo_gem->has_error) 1662 return -ENOMEM; 1663 1664 if (target_bo_gem->has_error) { 1665 bo_gem->has_error = true; 1666 return -ENOMEM; 1667 } 1668 1669 /* We never use HW fences for rendering on 965+ */ 1670 if (bufmgr_gem->gen >= 4) 1671 need_fence = false; 1672 1673 fenced_command = need_fence; 1674 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1675 need_fence = false; 1676 1677 /* Create a new relocation list if needed */ 1678 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1679 return -ENOMEM; 1680 1681 /* Check overflow */ 1682 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1683 1684 /* Check args */ 1685 assert(offset <= bo->size - 4); 1686 assert((write_domain & (write_domain - 1)) == 0); 1687 1688 /* Make sure that we're not adding a reloc to something whose size has 1689 * already been accounted for. 1690 */ 1691 assert(!bo_gem->used_as_reloc_target); 1692 if (target_bo_gem != bo_gem) { 1693 target_bo_gem->used_as_reloc_target = true; 1694 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1695 } 1696 /* An object needing a fence is a tiled buffer, so it won't have 1697 * relocs to other buffers. 1698 */ 1699 if (need_fence) 1700 target_bo_gem->reloc_tree_fences = 1; 1701 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1702 1703 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1704 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1705 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1706 target_bo_gem->gem_handle; 1707 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1708 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1709 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 1710 1711 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1712 if (target_bo != bo) 1713 drm_intel_gem_bo_reference(target_bo); 1714 if (fenced_command) 1715 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1716 DRM_INTEL_RELOC_FENCE; 1717 else 1718 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1719 1720 bo_gem->reloc_count++; 1721 1722 return 0; 1723} 1724 1725static int 1726drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1727 drm_intel_bo *target_bo, uint32_t target_offset, 1728 uint32_t read_domains, uint32_t write_domain) 1729{ 1730 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1731 1732 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1733 read_domains, write_domain, 1734 !bufmgr_gem->fenced_relocs); 1735} 1736 1737static int 1738drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1739 drm_intel_bo *target_bo, 1740 uint32_t target_offset, 1741 uint32_t read_domains, uint32_t write_domain) 1742{ 1743 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1744 read_domains, write_domain, true); 1745} 1746 1747int 1748drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 1749{ 1750 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1751 1752 return bo_gem->reloc_count; 1753} 1754 1755/** 1756 * Removes existing relocation entries in the BO after "start". 1757 * 1758 * This allows a user to avoid a two-step process for state setup with 1759 * counting up all the buffer objects and doing a 1760 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 1761 * relocations for the state setup. Instead, save the state of the 1762 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 1763 * state, and then check if it still fits in the aperture. 1764 * 1765 * Any further drm_intel_bufmgr_check_aperture_space() queries 1766 * involving this buffer in the tree are undefined after this call. 1767 */ 1768void 1769drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 1770{ 1771 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1772 int i; 1773 struct timespec time; 1774 1775 clock_gettime(CLOCK_MONOTONIC, &time); 1776 1777 assert(bo_gem->reloc_count >= start); 1778 /* Unreference the cleared target buffers */ 1779 for (i = start; i < bo_gem->reloc_count; i++) { 1780 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 1781 if (&target_bo_gem->bo != bo) { 1782 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 1783 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 1784 time.tv_sec); 1785 } 1786 } 1787 bo_gem->reloc_count = start; 1788} 1789 1790/** 1791 * Walk the tree of relocations rooted at BO and accumulate the list of 1792 * validations to be performed and update the relocation buffers with 1793 * index values into the validation list. 1794 */ 1795static void 1796drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1797{ 1798 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1799 int i; 1800 1801 if (bo_gem->relocs == NULL) 1802 return; 1803 1804 for (i = 0; i < bo_gem->reloc_count; i++) { 1805 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1806 1807 if (target_bo == bo) 1808 continue; 1809 1810 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1811 1812 /* Continue walking the tree depth-first. */ 1813 drm_intel_gem_bo_process_reloc(target_bo); 1814 1815 /* Add the target to the validate list */ 1816 drm_intel_add_validate_buffer(target_bo); 1817 } 1818} 1819 1820static void 1821drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 1822{ 1823 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1824 int i; 1825 1826 if (bo_gem->relocs == NULL) 1827 return; 1828 1829 for (i = 0; i < bo_gem->reloc_count; i++) { 1830 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1831 int need_fence; 1832 1833 if (target_bo == bo) 1834 continue; 1835 1836 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1837 1838 /* Continue walking the tree depth-first. */ 1839 drm_intel_gem_bo_process_reloc2(target_bo); 1840 1841 need_fence = (bo_gem->reloc_target_info[i].flags & 1842 DRM_INTEL_RELOC_FENCE); 1843 1844 /* Add the target to the validate list */ 1845 drm_intel_add_validate_buffer2(target_bo, need_fence); 1846 } 1847} 1848 1849 1850static void 1851drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1852{ 1853 int i; 1854 1855 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1856 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1857 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1858 1859 /* Update the buffer offset */ 1860 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 1861 DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n", 1862 bo_gem->gem_handle, bo_gem->name, 1863 (unsigned long long)bo->offset64, 1864 (unsigned long long)bufmgr_gem->exec_objects[i]. 1865 offset); 1866 bo->offset64 = bufmgr_gem->exec_objects[i].offset; 1867 bo->offset = bufmgr_gem->exec_objects[i].offset; 1868 } 1869 } 1870} 1871 1872static void 1873drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 1874{ 1875 int i; 1876 1877 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1878 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1879 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1880 1881 /* Update the buffer offset */ 1882 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 1883 DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n", 1884 bo_gem->gem_handle, bo_gem->name, 1885 (unsigned long long)bo->offset64, 1886 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 1887 bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 1888 bo->offset = bufmgr_gem->exec2_objects[i].offset; 1889 } 1890 } 1891} 1892 1893static void 1894aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) 1895{ 1896 fwrite(&data, 1, 4, bufmgr_gem->aub_file); 1897} 1898 1899static void 1900aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) 1901{ 1902 fwrite(data, 1, size, bufmgr_gem->aub_file); 1903} 1904 1905static void 1906aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) 1907{ 1908 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1909 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1910 uint32_t *data; 1911 unsigned int i; 1912 1913 data = malloc(bo->size); 1914 drm_intel_bo_get_subdata(bo, offset, size, data); 1915 1916 /* Easy mode: write out bo with no relocations */ 1917 if (!bo_gem->reloc_count) { 1918 aub_out_data(bufmgr_gem, data, size); 1919 free(data); 1920 return; 1921 } 1922 1923 /* Otherwise, handle the relocations while writing. */ 1924 for (i = 0; i < size / 4; i++) { 1925 int r; 1926 for (r = 0; r < bo_gem->reloc_count; r++) { 1927 struct drm_i915_gem_relocation_entry *reloc; 1928 drm_intel_reloc_target *info; 1929 1930 reloc = &bo_gem->relocs[r]; 1931 info = &bo_gem->reloc_target_info[r]; 1932 1933 if (reloc->offset == offset + i * 4) { 1934 drm_intel_bo_gem *target_gem; 1935 uint32_t val; 1936 1937 target_gem = (drm_intel_bo_gem *)info->bo; 1938 1939 val = reloc->delta; 1940 val += target_gem->aub_offset; 1941 1942 aub_out(bufmgr_gem, val); 1943 data[i] = val; 1944 break; 1945 } 1946 } 1947 if (r == bo_gem->reloc_count) { 1948 /* no relocation, just the data */ 1949 aub_out(bufmgr_gem, data[i]); 1950 } 1951 } 1952 1953 free(data); 1954} 1955 1956static void 1957aub_bo_get_address(drm_intel_bo *bo) 1958{ 1959 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1960 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1961 1962 /* Give the object a graphics address in the AUB file. We 1963 * don't just use the GEM object address because we do AUB 1964 * dumping before execution -- we want to successfully log 1965 * when the hardware might hang, and we might even want to aub 1966 * capture for a driver trying to execute on a different 1967 * generation of hardware by disabling the actual kernel exec 1968 * call. 1969 */ 1970 bo_gem->aub_offset = bufmgr_gem->aub_offset; 1971 bufmgr_gem->aub_offset += bo->size; 1972 /* XXX: Handle aperture overflow. */ 1973 assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); 1974} 1975 1976static void 1977aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 1978 uint32_t offset, uint32_t size) 1979{ 1980 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1981 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1982 1983 aub_out(bufmgr_gem, 1984 CMD_AUB_TRACE_HEADER_BLOCK | 1985 ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 1986 aub_out(bufmgr_gem, 1987 AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); 1988 aub_out(bufmgr_gem, subtype); 1989 aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 1990 aub_out(bufmgr_gem, size); 1991 if (bufmgr_gem->gen >= 8) 1992 aub_out(bufmgr_gem, 0); 1993 aub_write_bo_data(bo, offset, size); 1994} 1995 1996/** 1997 * Break up large objects into multiple writes. Otherwise a 128kb VBO 1998 * would overflow the 16 bits of size field in the packet header and 1999 * everything goes badly after that. 2000 */ 2001static void 2002aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 2003 uint32_t offset, uint32_t size) 2004{ 2005 uint32_t block_size; 2006 uint32_t sub_offset; 2007 2008 for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { 2009 block_size = size - sub_offset; 2010 2011 if (block_size > 8 * 4096) 2012 block_size = 8 * 4096; 2013 2014 aub_write_trace_block(bo, type, subtype, offset + sub_offset, 2015 block_size); 2016 } 2017} 2018 2019static void 2020aub_write_bo(drm_intel_bo *bo) 2021{ 2022 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2023 uint32_t offset = 0; 2024 unsigned i; 2025 2026 aub_bo_get_address(bo); 2027 2028 /* Write out each annotated section separately. */ 2029 for (i = 0; i < bo_gem->aub_annotation_count; ++i) { 2030 drm_intel_aub_annotation *annotation = 2031 &bo_gem->aub_annotations[i]; 2032 uint32_t ending_offset = annotation->ending_offset; 2033 if (ending_offset > bo->size) 2034 ending_offset = bo->size; 2035 if (ending_offset > offset) { 2036 aub_write_large_trace_block(bo, annotation->type, 2037 annotation->subtype, 2038 offset, 2039 ending_offset - offset); 2040 offset = ending_offset; 2041 } 2042 } 2043 2044 /* Write out any remaining unannotated data */ 2045 if (offset < bo->size) { 2046 aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, 2047 offset, bo->size - offset); 2048 } 2049} 2050 2051/* 2052 * Make a ringbuffer on fly and dump it 2053 */ 2054static void 2055aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, 2056 uint32_t batch_buffer, int ring_flag) 2057{ 2058 uint32_t ringbuffer[4096]; 2059 int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ 2060 int ring_count = 0; 2061 2062 if (ring_flag == I915_EXEC_BSD) 2063 ring = AUB_TRACE_TYPE_RING_PRB1; 2064 else if (ring_flag == I915_EXEC_BLT) 2065 ring = AUB_TRACE_TYPE_RING_PRB2; 2066 2067 /* Make a ring buffer to execute our batchbuffer. */ 2068 memset(ringbuffer, 0, sizeof(ringbuffer)); 2069 if (bufmgr_gem->gen >= 8) { 2070 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); 2071 ringbuffer[ring_count++] = batch_buffer; 2072 ringbuffer[ring_count++] = 0; 2073 } else { 2074 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; 2075 ringbuffer[ring_count++] = batch_buffer; 2076 } 2077 2078 /* Write out the ring. This appears to trigger execution of 2079 * the ring in the simulator. 2080 */ 2081 aub_out(bufmgr_gem, 2082 CMD_AUB_TRACE_HEADER_BLOCK | 2083 ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 2084 aub_out(bufmgr_gem, 2085 AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); 2086 aub_out(bufmgr_gem, 0); /* general/surface subtype */ 2087 aub_out(bufmgr_gem, bufmgr_gem->aub_offset); 2088 aub_out(bufmgr_gem, ring_count * 4); 2089 if (bufmgr_gem->gen >= 8) 2090 aub_out(bufmgr_gem, 0); 2091 2092 /* FIXME: Need some flush operations here? */ 2093 aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); 2094 2095 /* Update offset pointer */ 2096 bufmgr_gem->aub_offset += 4096; 2097} 2098 2099void 2100drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 2101 int x1, int y1, int width, int height, 2102 enum aub_dump_bmp_format format, 2103 int pitch, int offset) 2104{ 2105 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2106 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2107 uint32_t cpp; 2108 2109 switch (format) { 2110 case AUB_DUMP_BMP_FORMAT_8BIT: 2111 cpp = 1; 2112 break; 2113 case AUB_DUMP_BMP_FORMAT_ARGB_4444: 2114 cpp = 2; 2115 break; 2116 case AUB_DUMP_BMP_FORMAT_ARGB_0888: 2117 case AUB_DUMP_BMP_FORMAT_ARGB_8888: 2118 cpp = 4; 2119 break; 2120 default: 2121 printf("Unknown AUB dump format %d\n", format); 2122 return; 2123 } 2124 2125 if (!bufmgr_gem->aub_file) 2126 return; 2127 2128 aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); 2129 aub_out(bufmgr_gem, (y1 << 16) | x1); 2130 aub_out(bufmgr_gem, 2131 (format << 24) | 2132 (cpp << 19) | 2133 pitch / 4); 2134 aub_out(bufmgr_gem, (height << 16) | width); 2135 aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 2136 aub_out(bufmgr_gem, 2137 ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | 2138 ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); 2139} 2140 2141static void 2142aub_exec(drm_intel_bo *bo, int ring_flag, int used) 2143{ 2144 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2145 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2146 int i; 2147 bool batch_buffer_needs_annotations; 2148 2149 if (!bufmgr_gem->aub_file) 2150 return; 2151 2152 /* If batch buffer is not annotated, annotate it the best we 2153 * can. 2154 */ 2155 batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; 2156 if (batch_buffer_needs_annotations) { 2157 drm_intel_aub_annotation annotations[2] = { 2158 { AUB_TRACE_TYPE_BATCH, 0, used }, 2159 { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } 2160 }; 2161 drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); 2162 } 2163 2164 /* Write out all buffers to AUB memory */ 2165 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2166 aub_write_bo(bufmgr_gem->exec_bos[i]); 2167 } 2168 2169 /* Remove any annotations we added */ 2170 if (batch_buffer_needs_annotations) 2171 drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); 2172 2173 /* Dump ring buffer */ 2174 aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); 2175 2176 fflush(bufmgr_gem->aub_file); 2177 2178 /* 2179 * One frame has been dumped. So reset the aub_offset for the next frame. 2180 * 2181 * FIXME: Can we do this? 2182 */ 2183 bufmgr_gem->aub_offset = 0x10000; 2184} 2185 2186static int 2187drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 2188 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 2189{ 2190 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2191 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2192 struct drm_i915_gem_execbuffer execbuf; 2193 int ret, i; 2194 2195 if (bo_gem->has_error) 2196 return -ENOMEM; 2197 2198 pthread_mutex_lock(&bufmgr_gem->lock); 2199 /* Update indices and set up the validate list. */ 2200 drm_intel_gem_bo_process_reloc(bo); 2201 2202 /* Add the batch buffer to the validation list. There are no 2203 * relocations pointing to it. 2204 */ 2205 drm_intel_add_validate_buffer(bo); 2206 2207 VG_CLEAR(execbuf); 2208 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 2209 execbuf.buffer_count = bufmgr_gem->exec_count; 2210 execbuf.batch_start_offset = 0; 2211 execbuf.batch_len = used; 2212 execbuf.cliprects_ptr = (uintptr_t) cliprects; 2213 execbuf.num_cliprects = num_cliprects; 2214 execbuf.DR1 = 0; 2215 execbuf.DR4 = DR4; 2216 2217 ret = drmIoctl(bufmgr_gem->fd, 2218 DRM_IOCTL_I915_GEM_EXECBUFFER, 2219 &execbuf); 2220 if (ret != 0) { 2221 ret = -errno; 2222 if (errno == ENOSPC) { 2223 DBG("Execbuffer fails to pin. " 2224 "Estimate: %u. Actual: %u. Available: %u\n", 2225 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2226 bufmgr_gem-> 2227 exec_count), 2228 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2229 bufmgr_gem-> 2230 exec_count), 2231 (unsigned int)bufmgr_gem->gtt_size); 2232 } 2233 } 2234 drm_intel_update_buffer_offsets(bufmgr_gem); 2235 2236 if (bufmgr_gem->bufmgr.debug) 2237 drm_intel_gem_dump_validation_list(bufmgr_gem); 2238 2239 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2240 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2241 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2242 2243 bo_gem->idle = false; 2244 2245 /* Disconnect the buffer from the validate list */ 2246 bo_gem->validate_index = -1; 2247 bufmgr_gem->exec_bos[i] = NULL; 2248 } 2249 bufmgr_gem->exec_count = 0; 2250 pthread_mutex_unlock(&bufmgr_gem->lock); 2251 2252 return ret; 2253} 2254 2255static int 2256do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 2257 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2258 unsigned int flags) 2259{ 2260 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2261 struct drm_i915_gem_execbuffer2 execbuf; 2262 int ret = 0; 2263 int i; 2264 2265 switch (flags & 0x7) { 2266 default: 2267 return -EINVAL; 2268 case I915_EXEC_BLT: 2269 if (!bufmgr_gem->has_blt) 2270 return -EINVAL; 2271 break; 2272 case I915_EXEC_BSD: 2273 if (!bufmgr_gem->has_bsd) 2274 return -EINVAL; 2275 break; 2276 case I915_EXEC_VEBOX: 2277 if (!bufmgr_gem->has_vebox) 2278 return -EINVAL; 2279 break; 2280 case I915_EXEC_RENDER: 2281 case I915_EXEC_DEFAULT: 2282 break; 2283 } 2284 2285 pthread_mutex_lock(&bufmgr_gem->lock); 2286 /* Update indices and set up the validate list. */ 2287 drm_intel_gem_bo_process_reloc2(bo); 2288 2289 /* Add the batch buffer to the validation list. There are no relocations 2290 * pointing to it. 2291 */ 2292 drm_intel_add_validate_buffer2(bo, 0); 2293 2294 VG_CLEAR(execbuf); 2295 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 2296 execbuf.buffer_count = bufmgr_gem->exec_count; 2297 execbuf.batch_start_offset = 0; 2298 execbuf.batch_len = used; 2299 execbuf.cliprects_ptr = (uintptr_t)cliprects; 2300 execbuf.num_cliprects = num_cliprects; 2301 execbuf.DR1 = 0; 2302 execbuf.DR4 = DR4; 2303 execbuf.flags = flags; 2304 if (ctx == NULL) 2305 i915_execbuffer2_set_context_id(execbuf, 0); 2306 else 2307 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 2308 execbuf.rsvd2 = 0; 2309 2310 aub_exec(bo, flags, used); 2311 2312 if (bufmgr_gem->no_exec) 2313 goto skip_execution; 2314 2315 ret = drmIoctl(bufmgr_gem->fd, 2316 DRM_IOCTL_I915_GEM_EXECBUFFER2, 2317 &execbuf); 2318 if (ret != 0) { 2319 ret = -errno; 2320 if (ret == -ENOSPC) { 2321 DBG("Execbuffer fails to pin. " 2322 "Estimate: %u. Actual: %u. Available: %u\n", 2323 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2324 bufmgr_gem->exec_count), 2325 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2326 bufmgr_gem->exec_count), 2327 (unsigned int) bufmgr_gem->gtt_size); 2328 } 2329 } 2330 drm_intel_update_buffer_offsets2(bufmgr_gem); 2331 2332skip_execution: 2333 if (bufmgr_gem->bufmgr.debug) 2334 drm_intel_gem_dump_validation_list(bufmgr_gem); 2335 2336 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2337 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2338 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2339 2340 bo_gem->idle = false; 2341 2342 /* Disconnect the buffer from the validate list */ 2343 bo_gem->validate_index = -1; 2344 bufmgr_gem->exec_bos[i] = NULL; 2345 } 2346 bufmgr_gem->exec_count = 0; 2347 pthread_mutex_unlock(&bufmgr_gem->lock); 2348 2349 return ret; 2350} 2351 2352static int 2353drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2354 drm_clip_rect_t *cliprects, int num_cliprects, 2355 int DR4) 2356{ 2357 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2358 I915_EXEC_RENDER); 2359} 2360 2361static int 2362drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 2363 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2364 unsigned int flags) 2365{ 2366 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2367 flags); 2368} 2369 2370int 2371drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 2372 int used, unsigned int flags) 2373{ 2374 return do_exec2(bo, used, ctx, NULL, 0, 0, flags); 2375} 2376 2377static int 2378drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 2379{ 2380 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2381 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2382 struct drm_i915_gem_pin pin; 2383 int ret; 2384 2385 VG_CLEAR(pin); 2386 pin.handle = bo_gem->gem_handle; 2387 pin.alignment = alignment; 2388 2389 ret = drmIoctl(bufmgr_gem->fd, 2390 DRM_IOCTL_I915_GEM_PIN, 2391 &pin); 2392 if (ret != 0) 2393 return -errno; 2394 2395 bo->offset64 = pin.offset; 2396 bo->offset = pin.offset; 2397 return 0; 2398} 2399 2400static int 2401drm_intel_gem_bo_unpin(drm_intel_bo *bo) 2402{ 2403 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2404 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2405 struct drm_i915_gem_unpin unpin; 2406 int ret; 2407 2408 VG_CLEAR(unpin); 2409 unpin.handle = bo_gem->gem_handle; 2410 2411 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 2412 if (ret != 0) 2413 return -errno; 2414 2415 return 0; 2416} 2417 2418static int 2419drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2420 uint32_t tiling_mode, 2421 uint32_t stride) 2422{ 2423 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2424 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2425 struct drm_i915_gem_set_tiling set_tiling; 2426 int ret; 2427 2428 if (bo_gem->global_name == 0 && 2429 tiling_mode == bo_gem->tiling_mode && 2430 stride == bo_gem->stride) 2431 return 0; 2432 2433 memset(&set_tiling, 0, sizeof(set_tiling)); 2434 do { 2435 /* set_tiling is slightly broken and overwrites the 2436 * input on the error path, so we have to open code 2437 * rmIoctl. 2438 */ 2439 set_tiling.handle = bo_gem->gem_handle; 2440 set_tiling.tiling_mode = tiling_mode; 2441 set_tiling.stride = stride; 2442 2443 ret = ioctl(bufmgr_gem->fd, 2444 DRM_IOCTL_I915_GEM_SET_TILING, 2445 &set_tiling); 2446 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2447 if (ret == -1) 2448 return -errno; 2449 2450 bo_gem->tiling_mode = set_tiling.tiling_mode; 2451 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 2452 bo_gem->stride = set_tiling.stride; 2453 return 0; 2454} 2455 2456static int 2457drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2458 uint32_t stride) 2459{ 2460 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2461 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2462 int ret; 2463 2464 /* Linear buffers have no stride. By ensuring that we only ever use 2465 * stride 0 with linear buffers, we simplify our code. 2466 */ 2467 if (*tiling_mode == I915_TILING_NONE) 2468 stride = 0; 2469 2470 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 2471 if (ret == 0) 2472 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 2473 2474 *tiling_mode = bo_gem->tiling_mode; 2475 return ret; 2476} 2477 2478static int 2479drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2480 uint32_t * swizzle_mode) 2481{ 2482 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2483 2484 *tiling_mode = bo_gem->tiling_mode; 2485 *swizzle_mode = bo_gem->swizzle_mode; 2486 return 0; 2487} 2488 2489drm_intel_bo * 2490drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 2491{ 2492 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2493 int ret; 2494 uint32_t handle; 2495 drm_intel_bo_gem *bo_gem; 2496 struct drm_i915_gem_get_tiling get_tiling; 2497 drmMMListHead *list; 2498 2499 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 2500 2501 /* 2502 * See if the kernel has already returned this buffer to us. Just as 2503 * for named buffers, we must not create two bo's pointing at the same 2504 * kernel object 2505 */ 2506 for (list = bufmgr_gem->named.next; 2507 list != &bufmgr_gem->named; 2508 list = list->next) { 2509 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 2510 if (bo_gem->gem_handle == handle) { 2511 drm_intel_gem_bo_reference(&bo_gem->bo); 2512 return &bo_gem->bo; 2513 } 2514 } 2515 2516 if (ret) { 2517 fprintf(stderr,"ret is %d %d\n", ret, errno); 2518 return NULL; 2519 } 2520 2521 bo_gem = calloc(1, sizeof(*bo_gem)); 2522 if (!bo_gem) 2523 return NULL; 2524 2525 /* Determine size of bo. The fd-to-handle ioctl really should 2526 * return the size, but it doesn't. If we have kernel 3.12 or 2527 * later, we can lseek on the prime fd to get the size. Older 2528 * kernels will just fail, in which case we fall back to the 2529 * provided (estimated or guess size). */ 2530 ret = lseek(prime_fd, 0, SEEK_END); 2531 if (ret != -1) 2532 bo_gem->bo.size = ret; 2533 else 2534 bo_gem->bo.size = size; 2535 2536 bo_gem->bo.handle = handle; 2537 bo_gem->bo.bufmgr = bufmgr; 2538 2539 bo_gem->gem_handle = handle; 2540 2541 atomic_set(&bo_gem->refcount, 1); 2542 2543 bo_gem->name = "prime"; 2544 bo_gem->validate_index = -1; 2545 bo_gem->reloc_tree_fences = 0; 2546 bo_gem->used_as_reloc_target = false; 2547 bo_gem->has_error = false; 2548 bo_gem->reusable = false; 2549 2550 DRMINITLISTHEAD(&bo_gem->vma_list); 2551 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2552 2553 VG_CLEAR(get_tiling); 2554 get_tiling.handle = bo_gem->gem_handle; 2555 ret = drmIoctl(bufmgr_gem->fd, 2556 DRM_IOCTL_I915_GEM_GET_TILING, 2557 &get_tiling); 2558 if (ret != 0) { 2559 drm_intel_gem_bo_unreference(&bo_gem->bo); 2560 return NULL; 2561 } 2562 bo_gem->tiling_mode = get_tiling.tiling_mode; 2563 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 2564 /* XXX stride is unknown */ 2565 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 2566 2567 return &bo_gem->bo; 2568} 2569 2570int 2571drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 2572{ 2573 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2574 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2575 2576 if (DRMLISTEMPTY(&bo_gem->name_list)) 2577 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2578 2579 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 2580 DRM_CLOEXEC, prime_fd) != 0) 2581 return -errno; 2582 2583 bo_gem->reusable = false; 2584 2585 return 0; 2586} 2587 2588static int 2589drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 2590{ 2591 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2592 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2593 int ret; 2594 2595 if (!bo_gem->global_name) { 2596 struct drm_gem_flink flink; 2597 2598 VG_CLEAR(flink); 2599 flink.handle = bo_gem->gem_handle; 2600 2601 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 2602 if (ret != 0) 2603 return -errno; 2604 2605 bo_gem->global_name = flink.name; 2606 bo_gem->reusable = false; 2607 2608 if (DRMLISTEMPTY(&bo_gem->name_list)) 2609 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2610 } 2611 2612 *name = bo_gem->global_name; 2613 return 0; 2614} 2615 2616/** 2617 * Enables unlimited caching of buffer objects for reuse. 2618 * 2619 * This is potentially very memory expensive, as the cache at each bucket 2620 * size is only bounded by how many buffers of that size we've managed to have 2621 * in flight at once. 2622 */ 2623void 2624drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 2625{ 2626 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2627 2628 bufmgr_gem->bo_reuse = true; 2629} 2630 2631/** 2632 * Enable use of fenced reloc type. 2633 * 2634 * New code should enable this to avoid unnecessary fence register 2635 * allocation. If this option is not enabled, all relocs will have fence 2636 * register allocated. 2637 */ 2638void 2639drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 2640{ 2641 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2642 2643 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 2644 bufmgr_gem->fenced_relocs = true; 2645} 2646 2647/** 2648 * Return the additional aperture space required by the tree of buffer objects 2649 * rooted at bo. 2650 */ 2651static int 2652drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 2653{ 2654 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2655 int i; 2656 int total = 0; 2657 2658 if (bo == NULL || bo_gem->included_in_check_aperture) 2659 return 0; 2660 2661 total += bo->size; 2662 bo_gem->included_in_check_aperture = true; 2663 2664 for (i = 0; i < bo_gem->reloc_count; i++) 2665 total += 2666 drm_intel_gem_bo_get_aperture_space(bo_gem-> 2667 reloc_target_info[i].bo); 2668 2669 return total; 2670} 2671 2672/** 2673 * Count the number of buffers in this list that need a fence reg 2674 * 2675 * If the count is greater than the number of available regs, we'll have 2676 * to ask the caller to resubmit a batch with fewer tiled buffers. 2677 * 2678 * This function over-counts if the same buffer is used multiple times. 2679 */ 2680static unsigned int 2681drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 2682{ 2683 int i; 2684 unsigned int total = 0; 2685 2686 for (i = 0; i < count; i++) { 2687 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2688 2689 if (bo_gem == NULL) 2690 continue; 2691 2692 total += bo_gem->reloc_tree_fences; 2693 } 2694 return total; 2695} 2696 2697/** 2698 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 2699 * for the next drm_intel_bufmgr_check_aperture_space() call. 2700 */ 2701static void 2702drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 2703{ 2704 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2705 int i; 2706 2707 if (bo == NULL || !bo_gem->included_in_check_aperture) 2708 return; 2709 2710 bo_gem->included_in_check_aperture = false; 2711 2712 for (i = 0; i < bo_gem->reloc_count; i++) 2713 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 2714 reloc_target_info[i].bo); 2715} 2716 2717/** 2718 * Return a conservative estimate for the amount of aperture required 2719 * for a collection of buffers. This may double-count some buffers. 2720 */ 2721static unsigned int 2722drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 2723{ 2724 int i; 2725 unsigned int total = 0; 2726 2727 for (i = 0; i < count; i++) { 2728 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2729 if (bo_gem != NULL) 2730 total += bo_gem->reloc_tree_size; 2731 } 2732 return total; 2733} 2734 2735/** 2736 * Return the amount of aperture needed for a collection of buffers. 2737 * This avoids double counting any buffers, at the cost of looking 2738 * at every buffer in the set. 2739 */ 2740static unsigned int 2741drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 2742{ 2743 int i; 2744 unsigned int total = 0; 2745 2746 for (i = 0; i < count; i++) { 2747 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 2748 /* For the first buffer object in the array, we get an 2749 * accurate count back for its reloc_tree size (since nothing 2750 * had been flagged as being counted yet). We can save that 2751 * value out as a more conservative reloc_tree_size that 2752 * avoids double-counting target buffers. Since the first 2753 * buffer happens to usually be the batch buffer in our 2754 * callers, this can pull us back from doing the tree 2755 * walk on every new batch emit. 2756 */ 2757 if (i == 0) { 2758 drm_intel_bo_gem *bo_gem = 2759 (drm_intel_bo_gem *) bo_array[i]; 2760 bo_gem->reloc_tree_size = total; 2761 } 2762 } 2763 2764 for (i = 0; i < count; i++) 2765 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 2766 return total; 2767} 2768 2769/** 2770 * Return -1 if the batchbuffer should be flushed before attempting to 2771 * emit rendering referencing the buffers pointed to by bo_array. 2772 * 2773 * This is required because if we try to emit a batchbuffer with relocations 2774 * to a tree of buffers that won't simultaneously fit in the aperture, 2775 * the rendering will return an error at a point where the software is not 2776 * prepared to recover from it. 2777 * 2778 * However, we also want to emit the batchbuffer significantly before we reach 2779 * the limit, as a series of batchbuffers each of which references buffers 2780 * covering almost all of the aperture means that at each emit we end up 2781 * waiting to evict a buffer from the last rendering, and we get synchronous 2782 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 2783 * get better parallelism. 2784 */ 2785static int 2786drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 2787{ 2788 drm_intel_bufmgr_gem *bufmgr_gem = 2789 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 2790 unsigned int total = 0; 2791 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 2792 int total_fences; 2793 2794 /* Check for fence reg constraints if necessary */ 2795 if (bufmgr_gem->available_fences) { 2796 total_fences = drm_intel_gem_total_fences(bo_array, count); 2797 if (total_fences > bufmgr_gem->available_fences) 2798 return -ENOSPC; 2799 } 2800 2801 total = drm_intel_gem_estimate_batch_space(bo_array, count); 2802 2803 if (total > threshold) 2804 total = drm_intel_gem_compute_batch_space(bo_array, count); 2805 2806 if (total > threshold) { 2807 DBG("check_space: overflowed available aperture, " 2808 "%dkb vs %dkb\n", 2809 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 2810 return -ENOSPC; 2811 } else { 2812 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 2813 (int)bufmgr_gem->gtt_size / 1024); 2814 return 0; 2815 } 2816} 2817 2818/* 2819 * Disable buffer reuse for objects which are shared with the kernel 2820 * as scanout buffers 2821 */ 2822static int 2823drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 2824{ 2825 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2826 2827 bo_gem->reusable = false; 2828 return 0; 2829} 2830 2831static int 2832drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 2833{ 2834 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2835 2836 return bo_gem->reusable; 2837} 2838 2839static int 2840_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2841{ 2842 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2843 int i; 2844 2845 for (i = 0; i < bo_gem->reloc_count; i++) { 2846 if (bo_gem->reloc_target_info[i].bo == target_bo) 2847 return 1; 2848 if (bo == bo_gem->reloc_target_info[i].bo) 2849 continue; 2850 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 2851 target_bo)) 2852 return 1; 2853 } 2854 2855 return 0; 2856} 2857 2858/** Return true if target_bo is referenced by bo's relocation tree. */ 2859static int 2860drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2861{ 2862 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2863 2864 if (bo == NULL || target_bo == NULL) 2865 return 0; 2866 if (target_bo_gem->used_as_reloc_target) 2867 return _drm_intel_gem_bo_references(bo, target_bo); 2868 return 0; 2869} 2870 2871static void 2872add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2873{ 2874 unsigned int i = bufmgr_gem->num_buckets; 2875 2876 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2877 2878 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2879 bufmgr_gem->cache_bucket[i].size = size; 2880 bufmgr_gem->num_buckets++; 2881} 2882 2883static void 2884init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2885{ 2886 unsigned long size, cache_max_size = 64 * 1024 * 1024; 2887 2888 /* OK, so power of two buckets was too wasteful of memory. 2889 * Give 3 other sizes between each power of two, to hopefully 2890 * cover things accurately enough. (The alternative is 2891 * probably to just go for exact matching of sizes, and assume 2892 * that for things like composited window resize the tiled 2893 * width/height alignment and rounding of sizes to pages will 2894 * get us useful cache hit rates anyway) 2895 */ 2896 add_bucket(bufmgr_gem, 4096); 2897 add_bucket(bufmgr_gem, 4096 * 2); 2898 add_bucket(bufmgr_gem, 4096 * 3); 2899 2900 /* Initialize the linked lists for BO reuse cache. */ 2901 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2902 add_bucket(bufmgr_gem, size); 2903 2904 add_bucket(bufmgr_gem, size + size * 1 / 4); 2905 add_bucket(bufmgr_gem, size + size * 2 / 4); 2906 add_bucket(bufmgr_gem, size + size * 3 / 4); 2907 } 2908} 2909 2910void 2911drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 2912{ 2913 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2914 2915 bufmgr_gem->vma_max = limit; 2916 2917 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 2918} 2919 2920/** 2921 * Get the PCI ID for the device. This can be overridden by setting the 2922 * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 2923 */ 2924static int 2925get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 2926{ 2927 char *devid_override; 2928 int devid; 2929 int ret; 2930 drm_i915_getparam_t gp; 2931 2932 if (geteuid() == getuid()) { 2933 devid_override = getenv("INTEL_DEVID_OVERRIDE"); 2934 if (devid_override) { 2935 bufmgr_gem->no_exec = true; 2936 return strtod(devid_override, NULL); 2937 } 2938 } 2939 2940 VG_CLEAR(devid); 2941 VG_CLEAR(gp); 2942 gp.param = I915_PARAM_CHIPSET_ID; 2943 gp.value = &devid; 2944 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2945 if (ret) { 2946 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 2947 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 2948 } 2949 return devid; 2950} 2951 2952int 2953drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 2954{ 2955 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2956 2957 return bufmgr_gem->pci_device; 2958} 2959 2960/** 2961 * Sets the AUB filename. 2962 * 2963 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 2964 * for it to have any effect. 2965 */ 2966void 2967drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 2968 const char *filename) 2969{ 2970 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2971 2972 free(bufmgr_gem->aub_filename); 2973 if (filename) 2974 bufmgr_gem->aub_filename = strdup(filename); 2975} 2976 2977/** 2978 * Sets up AUB dumping. 2979 * 2980 * This is a trace file format that can be used with the simulator. 2981 * Packets are emitted in a format somewhat like GPU command packets. 2982 * You can set up a GTT and upload your objects into the referenced 2983 * space, then send off batchbuffers and get BMPs out the other end. 2984 */ 2985void 2986drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 2987{ 2988 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2989 int entry = 0x200003; 2990 int i; 2991 int gtt_size = 0x10000; 2992 const char *filename; 2993 2994 if (!enable) { 2995 if (bufmgr_gem->aub_file) { 2996 fclose(bufmgr_gem->aub_file); 2997 bufmgr_gem->aub_file = NULL; 2998 } 2999 return; 3000 } 3001 3002 if (geteuid() != getuid()) 3003 return; 3004 3005 if (bufmgr_gem->aub_filename) 3006 filename = bufmgr_gem->aub_filename; 3007 else 3008 filename = "intel.aub"; 3009 bufmgr_gem->aub_file = fopen(filename, "w+"); 3010 if (!bufmgr_gem->aub_file) 3011 return; 3012 3013 /* Start allocating objects from just after the GTT. */ 3014 bufmgr_gem->aub_offset = gtt_size; 3015 3016 /* Start with a (required) version packet. */ 3017 aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); 3018 aub_out(bufmgr_gem, 3019 (4 << AUB_HEADER_MAJOR_SHIFT) | 3020 (0 << AUB_HEADER_MINOR_SHIFT)); 3021 for (i = 0; i < 8; i++) { 3022 aub_out(bufmgr_gem, 0); /* app name */ 3023 } 3024 aub_out(bufmgr_gem, 0); /* timestamp */ 3025 aub_out(bufmgr_gem, 0); /* timestamp */ 3026 aub_out(bufmgr_gem, 0); /* comment len */ 3027 3028 /* Set up the GTT. The max we can handle is 256M */ 3029 aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 3030 aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE); 3031 aub_out(bufmgr_gem, 0); /* subtype */ 3032 aub_out(bufmgr_gem, 0); /* offset */ 3033 aub_out(bufmgr_gem, gtt_size); /* size */ 3034 if (bufmgr_gem->gen >= 8) 3035 aub_out(bufmgr_gem, 0); 3036 for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { 3037 aub_out(bufmgr_gem, entry); 3038 } 3039} 3040 3041drm_intel_context * 3042drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 3043{ 3044 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3045 struct drm_i915_gem_context_create create; 3046 drm_intel_context *context = NULL; 3047 int ret; 3048 3049 context = calloc(1, sizeof(*context)); 3050 if (!context) 3051 return NULL; 3052 3053 VG_CLEAR(create); 3054 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 3055 if (ret != 0) { 3056 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 3057 strerror(errno)); 3058 free(context); 3059 return NULL; 3060 } 3061 3062 context->ctx_id = create.ctx_id; 3063 context->bufmgr = bufmgr; 3064 3065 return context; 3066} 3067 3068void 3069drm_intel_gem_context_destroy(drm_intel_context *ctx) 3070{ 3071 drm_intel_bufmgr_gem *bufmgr_gem; 3072 struct drm_i915_gem_context_destroy destroy; 3073 int ret; 3074 3075 if (ctx == NULL) 3076 return; 3077 3078 VG_CLEAR(destroy); 3079 3080 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3081 destroy.ctx_id = ctx->ctx_id; 3082 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 3083 &destroy); 3084 if (ret != 0) 3085 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 3086 strerror(errno)); 3087 3088 free(ctx); 3089} 3090 3091int 3092drm_intel_get_reset_stats(drm_intel_context *ctx, 3093 uint32_t *reset_count, 3094 uint32_t *active, 3095 uint32_t *pending) 3096{ 3097 drm_intel_bufmgr_gem *bufmgr_gem; 3098 struct drm_i915_reset_stats stats; 3099 int ret; 3100 3101 if (ctx == NULL) 3102 return -EINVAL; 3103 3104 memset(&stats, 0, sizeof(stats)); 3105 3106 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3107 stats.ctx_id = ctx->ctx_id; 3108 ret = drmIoctl(bufmgr_gem->fd, 3109 DRM_IOCTL_I915_GET_RESET_STATS, 3110 &stats); 3111 if (ret == 0) { 3112 if (reset_count != NULL) 3113 *reset_count = stats.reset_count; 3114 3115 if (active != NULL) 3116 *active = stats.batch_active; 3117 3118 if (pending != NULL) 3119 *pending = stats.batch_pending; 3120 } 3121 3122 return ret; 3123} 3124 3125int 3126drm_intel_reg_read(drm_intel_bufmgr *bufmgr, 3127 uint32_t offset, 3128 uint64_t *result) 3129{ 3130 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3131 struct drm_i915_reg_read reg_read; 3132 int ret; 3133 3134 VG_CLEAR(reg_read); 3135 reg_read.offset = offset; 3136 3137 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 3138 3139 *result = reg_read.val; 3140 return ret; 3141} 3142 3143 3144/** 3145 * Annotate the given bo for use in aub dumping. 3146 * 3147 * \param annotations is an array of drm_intel_aub_annotation objects 3148 * describing the type of data in various sections of the bo. Each 3149 * element of the array specifies the type and subtype of a section of 3150 * the bo, and the past-the-end offset of that section. The elements 3151 * of \c annotations must be sorted so that ending_offset is 3152 * increasing. 3153 * 3154 * \param count is the number of elements in the \c annotations array. 3155 * If \c count is zero, then \c annotations will not be dereferenced. 3156 * 3157 * Annotations are copied into a private data structure, so caller may 3158 * re-use the memory pointed to by \c annotations after the call 3159 * returns. 3160 * 3161 * Annotations are stored for the lifetime of the bo; to reset to the 3162 * default state (no annotations), call this function with a \c count 3163 * of zero. 3164 */ 3165void 3166drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 3167 drm_intel_aub_annotation *annotations, 3168 unsigned count) 3169{ 3170 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3171 unsigned size = sizeof(*annotations) * count; 3172 drm_intel_aub_annotation *new_annotations = 3173 count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; 3174 if (new_annotations == NULL) { 3175 free(bo_gem->aub_annotations); 3176 bo_gem->aub_annotations = NULL; 3177 bo_gem->aub_annotation_count = 0; 3178 return; 3179 } 3180 memcpy(new_annotations, annotations, size); 3181 bo_gem->aub_annotations = new_annotations; 3182 bo_gem->aub_annotation_count = count; 3183} 3184 3185/** 3186 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 3187 * and manage map buffer objections. 3188 * 3189 * \param fd File descriptor of the opened DRM device. 3190 */ 3191drm_intel_bufmgr * 3192drm_intel_bufmgr_gem_init(int fd, int batch_size) 3193{ 3194 drm_intel_bufmgr_gem *bufmgr_gem; 3195 struct drm_i915_gem_get_aperture aperture; 3196 drm_i915_getparam_t gp; 3197 int ret, tmp; 3198 bool exec2 = false; 3199 3200 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 3201 if (bufmgr_gem == NULL) 3202 return NULL; 3203 3204 bufmgr_gem->fd = fd; 3205 3206 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 3207 free(bufmgr_gem); 3208 return NULL; 3209 } 3210 3211 ret = drmIoctl(bufmgr_gem->fd, 3212 DRM_IOCTL_I915_GEM_GET_APERTURE, 3213 &aperture); 3214 3215 if (ret == 0) 3216 bufmgr_gem->gtt_size = aperture.aper_available_size; 3217 else { 3218 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 3219 strerror(errno)); 3220 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 3221 fprintf(stderr, "Assuming %dkB available aperture size.\n" 3222 "May lead to reduced performance or incorrect " 3223 "rendering.\n", 3224 (int)bufmgr_gem->gtt_size / 1024); 3225 } 3226 3227 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 3228 3229 if (IS_GEN2(bufmgr_gem->pci_device)) 3230 bufmgr_gem->gen = 2; 3231 else if (IS_GEN3(bufmgr_gem->pci_device)) 3232 bufmgr_gem->gen = 3; 3233 else if (IS_GEN4(bufmgr_gem->pci_device)) 3234 bufmgr_gem->gen = 4; 3235 else if (IS_GEN5(bufmgr_gem->pci_device)) 3236 bufmgr_gem->gen = 5; 3237 else if (IS_GEN6(bufmgr_gem->pci_device)) 3238 bufmgr_gem->gen = 6; 3239 else if (IS_GEN7(bufmgr_gem->pci_device)) 3240 bufmgr_gem->gen = 7; 3241 else if (IS_GEN8(bufmgr_gem->pci_device)) 3242 bufmgr_gem->gen = 8; 3243 else { 3244 free(bufmgr_gem); 3245 return NULL; 3246 } 3247 3248 if (IS_GEN3(bufmgr_gem->pci_device) && 3249 bufmgr_gem->gtt_size > 256*1024*1024) { 3250 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 3251 * be used for tiled blits. To simplify the accounting, just 3252 * substract the unmappable part (fixed to 256MB on all known 3253 * gen3 devices) if the kernel advertises it. */ 3254 bufmgr_gem->gtt_size -= 256*1024*1024; 3255 } 3256 3257 VG_CLEAR(gp); 3258 gp.value = &tmp; 3259 3260 gp.param = I915_PARAM_HAS_EXECBUF2; 3261 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3262 if (!ret) 3263 exec2 = true; 3264 3265 gp.param = I915_PARAM_HAS_BSD; 3266 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3267 bufmgr_gem->has_bsd = ret == 0; 3268 3269 gp.param = I915_PARAM_HAS_BLT; 3270 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3271 bufmgr_gem->has_blt = ret == 0; 3272 3273 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 3274 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3275 bufmgr_gem->has_relaxed_fencing = ret == 0; 3276 3277 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 3278 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3279 bufmgr_gem->has_wait_timeout = ret == 0; 3280 3281 gp.param = I915_PARAM_HAS_LLC; 3282 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3283 if (ret != 0) { 3284 /* Kernel does not supports HAS_LLC query, fallback to GPU 3285 * generation detection and assume that we have LLC on GEN6/7 3286 */ 3287 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 3288 IS_GEN7(bufmgr_gem->pci_device)); 3289 } else 3290 bufmgr_gem->has_llc = *gp.value; 3291 3292 gp.param = I915_PARAM_HAS_VEBOX; 3293 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3294 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 3295 3296 if (bufmgr_gem->gen < 4) { 3297 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 3298 gp.value = &bufmgr_gem->available_fences; 3299 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3300 if (ret) { 3301 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 3302 errno); 3303 fprintf(stderr, "param: %d, val: %d\n", gp.param, 3304 *gp.value); 3305 bufmgr_gem->available_fences = 0; 3306 } else { 3307 /* XXX The kernel reports the total number of fences, 3308 * including any that may be pinned. 3309 * 3310 * We presume that there will be at least one pinned 3311 * fence for the scanout buffer, but there may be more 3312 * than one scanout and the user may be manually 3313 * pinning buffers. Let's move to execbuffer2 and 3314 * thereby forget the insanity of using fences... 3315 */ 3316 bufmgr_gem->available_fences -= 2; 3317 if (bufmgr_gem->available_fences < 0) 3318 bufmgr_gem->available_fences = 0; 3319 } 3320 } 3321 3322 /* Let's go with one relocation per every 2 dwords (but round down a bit 3323 * since a power of two will mean an extra page allocation for the reloc 3324 * buffer). 3325 * 3326 * Every 4 was too few for the blender benchmark. 3327 */ 3328 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 3329 3330 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 3331 bufmgr_gem->bufmgr.bo_alloc_for_render = 3332 drm_intel_gem_bo_alloc_for_render; 3333 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 3334 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 3335 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 3336 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 3337 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 3338 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 3339 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 3340 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 3341 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 3342 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 3343 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 3344 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 3345 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 3346 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 3347 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 3348 /* Use the new one if available */ 3349 if (exec2) { 3350 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 3351 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3352 } else 3353 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 3354 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 3355 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3356 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 3357 bufmgr_gem->bufmgr.debug = 0; 3358 bufmgr_gem->bufmgr.check_aperture_space = 3359 drm_intel_gem_check_aperture_space; 3360 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3361 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 3362 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 3363 drm_intel_gem_get_pipe_from_crtc_id; 3364 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 3365 3366 DRMINITLISTHEAD(&bufmgr_gem->named); 3367 init_cache_buckets(bufmgr_gem); 3368 3369 DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 3370 bufmgr_gem->vma_max = -1; /* unlimited by default */ 3371 3372 return &bufmgr_gem->bufmgr; 3373} 3374