kgem.c revision 9a906b70
1/* 2 * Copyright (c) 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_reg.h" 34 35#include <unistd.h> 36#include <sys/ioctl.h> 37#include <sys/mman.h> 38#include <sys/stat.h> 39#include <time.h> 40#include <sched.h> 41#include <errno.h> 42#include <fcntl.h> 43 44#include <xf86drm.h> 45 46#ifdef HAVE_VALGRIND 47#include <valgrind.h> 48#include <memcheck.h> 49#endif 50 51#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM 52#include <sys/sysinfo.h> 53#endif 54 55#include "sna_cpuid.h" 56 57static struct kgem_bo * 58search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); 59 60static struct kgem_bo * 61search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); 62 63#define DBG_NO_HW 0 64#define DBG_NO_EXEC 0 65#define DBG_NO_TILING 0 66#define DBG_NO_CACHE 0 67#define DBG_NO_SNOOP_CACHE 0 68#define DBG_NO_CACHE_LEVEL 0 69#define DBG_NO_CPU 0 70#define DBG_NO_CREATE2 0 71#define DBG_NO_USERPTR 0 72#define DBG_NO_UNSYNCHRONIZED_USERPTR 0 73#define DBG_NO_LLC 0 74#define DBG_NO_SEMAPHORES 0 75#define DBG_NO_MADV 0 76#define DBG_NO_UPLOAD_CACHE 0 77#define DBG_NO_UPLOAD_ACTIVE 0 78#define DBG_NO_MAP_UPLOAD 0 79#define DBG_NO_RELAXED_FENCING 0 80#define DBG_NO_SECURE_BATCHES 0 81#define DBG_NO_PINNED_BATCHES 0 82#define DBG_NO_FAST_RELOC 0 83#define DBG_NO_HANDLE_LUT 0 84#define DBG_NO_WT 0 85#define DBG_DUMP 0 86#define DBG_NO_MALLOC_CACHE 0 87 88#define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */ 89 90#ifndef DEBUG_SYNC 91#define DEBUG_SYNC 0 92#endif 93 94#define SHOW_BATCH_BEFORE 0 95#define SHOW_BATCH_AFTER 0 96 97#if 0 98#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) 99#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__)) 100#else 101#define ASSERT_IDLE(kgem__, handle__) 102#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) 103#endif 104 105/* Worst case seems to be 965gm where we cannot write within a cacheline that 106 * is being simultaneously being read by the GPU, or within the sampler 107 * prefetch. In general, the chipsets seem to have a requirement that sampler 108 * offsets be aligned to a cacheline (64 bytes). 109 * 110 * Actually, it turns out the BLT color pattern (BR15) has the most severe 111 * alignment restrictions, 64 bytes for 8-bpp, 128 bytes for 16-bpp and 256 112 * bytes for 32-bpp. 113 */ 114#define UPLOAD_ALIGNMENT 256 115 116#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE) 117#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE) 118 119#define MAX_GTT_VMA_CACHE 512 120#define MAX_CPU_VMA_CACHE INT16_MAX 121#define MAP_PRESERVE_TIME 10 122 123#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) 124#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 1) 125 126#define LOCAL_I915_PARAM_HAS_BLT 11 127#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12 128#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15 129#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 130#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 131#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 132#define LOCAL_I915_PARAM_HAS_NO_RELOC 25 133#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 134#define LOCAL_I915_PARAM_HAS_WT 27 135 136#define LOCAL_I915_EXEC_IS_PINNED (1<<10) 137#define LOCAL_I915_EXEC_NO_RELOC (1<<11) 138#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) 139 140#define LOCAL_I915_GEM_CREATE2 0x34 141#define LOCAL_IOCTL_I915_GEM_CREATE2 DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CREATE2, struct local_i915_gem_create2) 142struct local_i915_gem_create2 { 143 uint64_t size; 144 uint32_t placement; 145#define LOCAL_I915_CREATE_PLACEMENT_SYSTEM 0 146#define LOCAL_I915_CREATE_PLACEMENT_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */ 147 uint32_t domain; 148 uint32_t caching; 149 uint32_t tiling_mode; 150 uint32_t stride; 151 uint32_t flags; 152 uint32_t pad; 153 uint32_t handle; 154}; 155 156#define LOCAL_I915_GEM_USERPTR 0x33 157#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) 158struct local_i915_gem_userptr { 159 uint64_t user_ptr; 160 uint64_t user_size; 161 uint32_t flags; 162#define I915_USERPTR_READ_ONLY 0x1 163#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 164 uint32_t handle; 165}; 166 167#define UNCACHED 0 168#define SNOOPED 1 169#define DISPLAY 2 170 171struct local_i915_gem_caching { 172 uint32_t handle; 173 uint32_t caching; 174}; 175 176#define LOCAL_I915_GEM_SET_CACHING 0x2f 177#define LOCAL_I915_GEM_GET_CACHING 0x30 178#define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching) 179#define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching) 180 181struct kgem_buffer { 182 struct kgem_bo base; 183 void *mem; 184 uint32_t used; 185 uint32_t need_io : 1; 186 uint32_t write : 2; 187 uint32_t mmapped : 2; 188}; 189enum { 190 MMAPPED_NONE, 191 MMAPPED_GTT, 192 MMAPPED_CPU 193}; 194 195static struct kgem_bo *__kgem_freed_bo; 196static struct kgem_request *__kgem_freed_request; 197static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; 198 199static inline int bytes(struct kgem_bo *bo) 200{ 201 return __kgem_bo_size(bo); 202} 203 204#define bucket(B) (B)->size.pages.bucket 205#define num_pages(B) (B)->size.pages.count 206 207static int do_ioctl(int fd, unsigned long req, void *arg) 208{ 209 int err; 210 211restart: 212 if (ioctl(fd, req, arg) == 0) 213 return 0; 214 215 err = errno; 216 217 if (err == EINTR) 218 goto restart; 219 220 if (err == EAGAIN) { 221 sched_yield(); 222 goto restart; 223 } 224 225 return -err; 226} 227 228#ifdef DEBUG_MEMORY 229static void debug_alloc(struct kgem *kgem, size_t size) 230{ 231 kgem->debug_memory.bo_allocs++; 232 kgem->debug_memory.bo_bytes += size; 233} 234static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo) 235{ 236 debug_alloc(kgem, bytes(bo)); 237} 238#else 239#define debug_alloc__bo(k, b) 240#endif 241 242#ifndef NDEBUG 243static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) 244{ 245 struct drm_i915_gem_get_tiling tiling; 246 247 assert(bo); 248 249 VG_CLEAR(tiling); 250 tiling.handle = bo->handle; 251 tiling.tiling_mode = bo->tiling; 252 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling); 253 assert(tiling.tiling_mode == bo->tiling); 254} 255 256static void assert_cacheing(struct kgem *kgem, struct kgem_bo *bo) 257{ 258 struct local_i915_gem_caching arg; 259 int expect = kgem->has_llc ? SNOOPED : UNCACHED; 260 261 VG_CLEAR(arg); 262 arg.handle = bo->handle; 263 arg.caching = expect; 264 265 (void)do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &arg); 266 267 assert(arg.caching == expect); 268} 269 270static void assert_bo_retired(struct kgem_bo *bo) 271{ 272 DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, 273 bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); 274 assert(bo->refcnt); 275 assert(bo->rq == NULL); 276 assert(bo->exec == NULL); 277 assert(list_is_empty(&bo->request)); 278} 279#else 280#define assert_tiling(kgem, bo) 281#define assert_cacheing(kgem, bo) 282#define assert_bo_retired(bo) 283#endif 284 285static void kgem_sna_reset(struct kgem *kgem) 286{ 287 struct sna *sna = container_of(kgem, struct sna, kgem); 288 289 sna->render.reset(sna); 290 sna->blt_state.fill_bo = 0; 291} 292 293static void kgem_sna_flush(struct kgem *kgem) 294{ 295 struct sna *sna = container_of(kgem, struct sna, kgem); 296 297 sna->render.flush(sna); 298 299 if (sna->render.solid_cache.dirty) 300 sna_render_flush_solid(sna); 301} 302 303static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) 304{ 305 struct drm_i915_gem_set_tiling set_tiling; 306 int err; 307 308 if (DBG_NO_TILING) 309 return false; 310 311 VG_CLEAR(set_tiling); 312restart: 313 set_tiling.handle = handle; 314 set_tiling.tiling_mode = tiling; 315 set_tiling.stride = stride; 316 317 if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) 318 return true; 319 320 err = errno; 321 if (err == EINTR) 322 goto restart; 323 324 if (err == EAGAIN) { 325 sched_yield(); 326 goto restart; 327 } 328 329 return false; 330} 331 332static bool gem_set_caching(int fd, uint32_t handle, int caching) 333{ 334 struct local_i915_gem_caching arg; 335 336 VG_CLEAR(arg); 337 arg.handle = handle; 338 arg.caching = caching; 339 return do_ioctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0; 340} 341 342static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only) 343{ 344 struct local_i915_gem_userptr arg; 345 346 VG_CLEAR(arg); 347 arg.user_ptr = (uintptr_t)ptr; 348 arg.user_size = size; 349 arg.flags = I915_USERPTR_UNSYNCHRONIZED; 350 if (read_only) 351 arg.flags |= I915_USERPTR_READ_ONLY; 352 353 if (DBG_NO_UNSYNCHRONIZED_USERPTR || 354 do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { 355 arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED; 356 if (do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { 357 DBG(("%s: failed to map %p + %d bytes: %d\n", 358 __FUNCTION__, ptr, size, errno)); 359 return 0; 360 } 361 } 362 363 return arg.handle; 364} 365 366static bool __kgem_throttle(struct kgem *kgem, bool harder) 367{ 368 /* Let this be woken up by sigtimer so that we don't block here 369 * too much and completely starve X. We will sleep again shortly, 370 * and so catch up or detect the hang. 371 */ 372 do { 373 if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE) == 0) { 374 kgem->need_throttle = 0; 375 return false; 376 } 377 378 if (errno == EIO) 379 return true; 380 } while (harder); 381 382 return false; 383} 384 385static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags) 386{ 387 if (flags & CREATE_NO_RETIRE || !kgem->need_retire) { 388 DBG(("%s: not retiring\n", __FUNCTION__)); 389 return false; 390 } 391 392 if (kgem_retire(kgem)) 393 return true; 394 395 if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) { 396 DBG(("%s: not throttling\n", __FUNCTION__)); 397 return false; 398 } 399 400 __kgem_throttle(kgem, false); 401 return kgem_retire(kgem); 402} 403 404static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) 405{ 406 struct drm_i915_gem_mmap_gtt mmap_arg; 407 void *ptr; 408 int err; 409 410 DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, 411 bo->handle, bytes(bo))); 412 assert(bo->proxy == NULL); 413 assert(!bo->snoop); 414 assert(num_pages(bo) <= kgem->aperture_mappable / 4); 415 416retry_gtt: 417 VG_CLEAR(mmap_arg); 418 mmap_arg.handle = bo->handle; 419#ifdef __NetBSD__ 420 if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) { 421 err = errno; 422#else 423 if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg))) { 424#endif 425 assert(err != EINVAL); 426 427 (void)__kgem_throttle_retire(kgem, 0); 428 if (kgem_expire_cache(kgem)) 429 goto retry_gtt; 430 431 if (kgem_cleanup_cache(kgem)) 432 goto retry_gtt; 433 434 ERR(("%s: failed to retrieve GTT offset for handle=%d: %d\n", 435 __FUNCTION__, bo->handle, -err)); 436 return NULL; 437 } 438 439retry_mmap: 440#ifdef __NetBSD__ 441 err = -drmMap(kgem->fd, mmap_arg.offset, bytes(bo), &ptr); 442 if (err) { 443#else 444 ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED, 445 kgem->fd, mmap_arg.offset); 446 if (ptr == MAP_FAILED) { 447 err = errno; 448#endif 449 assert(err != EINVAL); 450 451 if (__kgem_throttle_retire(kgem, 0)) 452 goto retry_mmap; 453 454 if (kgem_cleanup_cache(kgem)) 455 goto retry_mmap; 456 457 ERR(("%s: failed to mmap handle=%d, %d bytes, into GTT domain: %d\n", 458 __FUNCTION__, bo->handle, bytes(bo), err)); 459 ptr = NULL; 460 } 461 462 return ptr; 463} 464 465static int gem_write(int fd, uint32_t handle, 466 int offset, int length, 467 const void *src) 468{ 469 struct drm_i915_gem_pwrite pwrite; 470 471 DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, 472 handle, offset, length)); 473 474 VG_CLEAR(pwrite); 475 pwrite.handle = handle; 476 pwrite.offset = offset; 477 pwrite.size = length; 478 pwrite.data_ptr = (uintptr_t)src; 479 return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); 480} 481 482static int gem_write__cachealigned(int fd, uint32_t handle, 483 int offset, int length, 484 const void *src) 485{ 486 struct drm_i915_gem_pwrite pwrite; 487 488 DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, 489 handle, offset, length)); 490 491 VG_CLEAR(pwrite); 492 pwrite.handle = handle; 493 /* align the transfer to cachelines; fortuitously this is safe! */ 494 if ((offset | length) & 63) { 495 pwrite.offset = offset & ~63; 496 pwrite.size = ALIGN(offset+length, 64) - pwrite.offset; 497 pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset; 498 } else { 499 pwrite.offset = offset; 500 pwrite.size = length; 501 pwrite.data_ptr = (uintptr_t)src; 502 } 503 return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); 504} 505 506static int gem_read(int fd, uint32_t handle, const void *dst, 507 int offset, int length) 508{ 509 struct drm_i915_gem_pread pread; 510 int ret; 511 512 DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__, 513 handle, length)); 514 515 VG_CLEAR(pread); 516 pread.handle = handle; 517 pread.offset = offset; 518 pread.size = length; 519 pread.data_ptr = (uintptr_t)dst; 520 ret = do_ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread); 521 if (ret) { 522 DBG(("%s: failed, errno=%d\n", __FUNCTION__, -ret)); 523 return ret; 524 } 525 526 VG(VALGRIND_MAKE_MEM_DEFINED(dst, length)); 527 return 0; 528} 529 530bool __kgem_busy(struct kgem *kgem, int handle) 531{ 532 struct drm_i915_gem_busy busy; 533 534 VG_CLEAR(busy); 535 busy.handle = handle; 536 busy.busy = !kgem->wedged; 537 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 538 DBG(("%s: handle=%d, busy=%d, wedged=%d\n", 539 __FUNCTION__, handle, busy.busy, kgem->wedged)); 540 541 return busy.busy; 542} 543 544static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) 545{ 546 DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n", 547 __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL, 548 __kgem_busy(kgem, bo->handle))); 549 assert(bo->exec == NULL); 550 assert(list_is_empty(&bo->vma)); 551 552 if (bo->rq) { 553 __kgem_bo_clear_busy(bo); 554 kgem_retire(kgem); 555 assert_bo_retired(bo); 556 } else { 557 assert(bo->exec == NULL); 558 assert(list_is_empty(&bo->request)); 559 assert(!bo->needs_flush); 560 ASSERT_IDLE(kgem, bo->handle); 561 } 562} 563 564static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo) 565{ 566 DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n", 567 __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL, 568 __kgem_busy(kgem, bo->handle))); 569 assert(bo->exec == NULL); 570 assert(list_is_empty(&bo->vma)); 571 572 if (bo->rq) { 573 if (!__kgem_busy(kgem, bo->handle)) { 574 __kgem_bo_clear_busy(bo); 575 kgem_retire(kgem); 576 } 577 } else { 578 assert(!bo->needs_flush); 579 ASSERT_IDLE(kgem, bo->handle); 580 } 581} 582 583bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, 584 const void *data, int length) 585{ 586 int err; 587 588 assert(bo->refcnt); 589 assert(bo->proxy == NULL); 590 ASSERT_IDLE(kgem, bo->handle); 591 592 assert(length <= bytes(bo)); 593retry: 594 if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) { 595 assert(err != EINVAL); 596 597 (void)__kgem_throttle_retire(kgem, 0); 598 if (kgem_expire_cache(kgem)) 599 goto retry; 600 601 if (kgem_cleanup_cache(kgem)) 602 goto retry; 603 604 ERR(("%s: failed to write %d bytes into BO handle=%d: %d\n", 605 __FUNCTION__, length, bo->handle, -err)); 606 return false; 607 } 608 609 DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain)); 610 if (bo->exec == NULL) 611 kgem_bo_maybe_retire(kgem, bo); 612 bo->domain = DOMAIN_NONE; 613 bo->gtt_dirty = true; 614 return true; 615} 616 617static uint32_t gem_create(int fd, int num_pages) 618{ 619 struct drm_i915_gem_create create; 620 621 VG_CLEAR(create); 622 create.handle = 0; 623 create.size = PAGE_SIZE * num_pages; 624 (void)do_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); 625 626 return create.handle; 627} 628 629static bool 630kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo) 631{ 632#if DBG_NO_MADV 633 return true; 634#else 635 struct drm_i915_gem_madvise madv; 636 637 assert(bo->exec == NULL); 638 assert(!bo->purged); 639 640 VG_CLEAR(madv); 641 madv.handle = bo->handle; 642 madv.madv = I915_MADV_DONTNEED; 643 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { 644 bo->purged = 1; 645 kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; 646 return madv.retained; 647 } 648 649 return true; 650#endif 651} 652 653static bool 654kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo) 655{ 656#if DBG_NO_MADV 657 return true; 658#else 659 struct drm_i915_gem_madvise madv; 660 661 if (!bo->purged) 662 return true; 663 664 VG_CLEAR(madv); 665 madv.handle = bo->handle; 666 madv.madv = I915_MADV_DONTNEED; 667 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) 668 return madv.retained; 669 670 return false; 671#endif 672} 673 674static bool 675kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo) 676{ 677#if DBG_NO_MADV 678 return true; 679#else 680 struct drm_i915_gem_madvise madv; 681 682 assert(bo->purged); 683 684 VG_CLEAR(madv); 685 madv.handle = bo->handle; 686 madv.madv = I915_MADV_WILLNEED; 687 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { 688 bo->purged = !madv.retained; 689 kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; 690 return madv.retained; 691 } 692 693 return false; 694#endif 695} 696 697static void gem_close(int fd, uint32_t handle) 698{ 699 struct drm_gem_close close; 700 701 VG_CLEAR(close); 702 close.handle = handle; 703 (void)do_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); 704} 705 706constant inline static unsigned long __fls(unsigned long word) 707{ 708#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__)) 709 asm("bsr %1,%0" 710 : "=r" (word) 711 : "rm" (word)); 712 return word; 713#else 714 unsigned int v = 0; 715 716 while (word >>= 1) 717 v++; 718 719 return v; 720#endif 721} 722 723constant inline static int cache_bucket(int num_pages) 724{ 725 return __fls(num_pages); 726} 727 728static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, 729 int handle, int num_pages) 730{ 731 DBG(("%s(handle=%d, num_pages=%d)\n", __FUNCTION__, handle, num_pages)); 732 733 assert(num_pages); 734 memset(bo, 0, sizeof(*bo)); 735 736 bo->refcnt = 1; 737 bo->handle = handle; 738 bo->target_handle = -1; 739 num_pages(bo) = num_pages; 740 bucket(bo) = cache_bucket(num_pages); 741 bo->reusable = true; 742 bo->domain = DOMAIN_CPU; 743 list_init(&bo->request); 744 list_init(&bo->list); 745 list_init(&bo->vma); 746 747 return bo; 748} 749 750static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages) 751{ 752 struct kgem_bo *bo; 753 754 if (__kgem_freed_bo) { 755 bo = __kgem_freed_bo; 756 __kgem_freed_bo = *(struct kgem_bo **)bo; 757 } else { 758 bo = malloc(sizeof(*bo)); 759 if (bo == NULL) 760 return NULL; 761 } 762 763 return __kgem_bo_init(bo, handle, num_pages); 764} 765 766static struct kgem_request *__kgem_request_alloc(struct kgem *kgem) 767{ 768 struct kgem_request *rq; 769 770 rq = __kgem_freed_request; 771 if (rq) { 772 __kgem_freed_request = *(struct kgem_request **)rq; 773 } else { 774 rq = malloc(sizeof(*rq)); 775 if (rq == NULL) 776 rq = &kgem->static_request; 777 } 778 779 list_init(&rq->buffers); 780 rq->bo = NULL; 781 rq->ring = 0; 782 783 return rq; 784} 785 786static void __kgem_request_free(struct kgem_request *rq) 787{ 788 _list_del(&rq->list); 789 if (DBG_NO_MALLOC_CACHE) { 790 free(rq); 791 } else { 792 *(struct kgem_request **)rq = __kgem_freed_request; 793 __kgem_freed_request = rq; 794 } 795} 796 797static struct list *inactive(struct kgem *kgem, int num_pages) 798{ 799 assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE); 800 assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS); 801 return &kgem->inactive[cache_bucket(num_pages)]; 802} 803 804static struct list *active(struct kgem *kgem, int num_pages, int tiling) 805{ 806 assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE); 807 assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS); 808 return &kgem->active[cache_bucket(num_pages)][tiling]; 809} 810 811static size_t 812agp_aperture_size(struct pci_device *dev, unsigned gen) 813{ 814 /* XXX assume that only future chipsets are unknown and follow 815 * the post gen2 PCI layout. 816 */ 817 return dev->regions[gen < 030 ? 0 : 2].size; 818} 819 820static size_t 821total_ram_size(void) 822{ 823#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM 824 struct sysinfo info; 825 if (sysinfo(&info) == 0) 826 return info.totalram * info.mem_unit; 827#endif 828 829#ifdef _SC_PHYS_PAGES 830 return sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); 831#endif 832 833 return 0; 834} 835 836static unsigned 837cpu_cache_size__cpuid4(void) 838{ 839 /* Deterministic Cache Parameters (Function 04h)": 840 * When EAX is initialized to a value of 4, the CPUID instruction 841 * returns deterministic cache information in the EAX, EBX, ECX 842 * and EDX registers. This function requires ECX be initialized 843 * with an index which indicates which cache to return information 844 * about. The OS is expected to call this function (CPUID.4) with 845 * ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches. 846 * The order in which the caches are returned is not specified 847 * and may change at Intel's discretion. 848 * 849 * Calculating the Cache Size in bytes: 850 * = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1) 851 */ 852 853 unsigned int eax, ebx, ecx, edx; 854 unsigned int llc_size = 0; 855 int cnt; 856 857 if (__get_cpuid_max(BASIC_CPUID, NULL) < 4) 858 return 0; 859 860 cnt = 0; 861 do { 862 unsigned associativity, line_partitions, line_size, sets; 863 864 __cpuid_count(4, cnt++, eax, ebx, ecx, edx); 865 866 if ((eax & 0x1f) == 0) 867 break; 868 869 associativity = ((ebx >> 22) & 0x3ff) + 1; 870 line_partitions = ((ebx >> 12) & 0x3ff) + 1; 871 line_size = (ebx & 0xfff) + 1; 872 sets = ecx + 1; 873 874 llc_size = associativity * line_partitions * line_size * sets; 875 } while (1); 876 877 return llc_size; 878} 879 880static unsigned 881cpu_cache_size(void) 882{ 883 unsigned size; 884 FILE *file; 885 886 size = cpu_cache_size__cpuid4(); 887 if (size) 888 return size; 889 890 file = fopen("/proc/cpuinfo", "r"); 891 if (file) { 892 size_t len = 0; 893 char *line = NULL; 894 while (getline(&line, &len, file) != -1) { 895 int kb; 896 if (sscanf(line, "cache size : %d KB", &kb) == 1) { 897 /* Paranoid check against gargantuan caches */ 898 if (kb <= 1<<20) 899 size = kb * 1024; 900 break; 901 } 902 } 903 free(line); 904 fclose(file); 905 } 906 907 if (size == 0) 908 size = 64 * 1024; 909 910 return size; 911} 912 913static int gem_param(struct kgem *kgem, int name) 914{ 915 drm_i915_getparam_t gp; 916 int v = -1; /* No param uses the sign bit, reserve it for errors */ 917 918 VG_CLEAR(gp); 919 gp.param = name; 920 gp.value = &v; 921 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp)) 922 return -1; 923 924 VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); 925 return v; 926} 927 928static bool test_has_execbuffer2(struct kgem *kgem) 929{ 930 struct drm_i915_gem_execbuffer2 execbuf; 931 932 memset(&execbuf, 0, sizeof(execbuf)); 933 execbuf.buffer_count = 1; 934 935 return do_ioctl(kgem->fd, 936 DRM_IOCTL_I915_GEM_EXECBUFFER2, 937 &execbuf) == -EFAULT; 938} 939 940static bool test_has_no_reloc(struct kgem *kgem) 941{ 942 if (DBG_NO_FAST_RELOC) 943 return false; 944 945 return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; 946} 947 948static bool test_has_handle_lut(struct kgem *kgem) 949{ 950 if (DBG_NO_HANDLE_LUT) 951 return false; 952 953 return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; 954} 955 956static bool test_has_wt(struct kgem *kgem) 957{ 958 if (DBG_NO_WT) 959 return false; 960 961 return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0; 962} 963 964static bool test_has_semaphores_enabled(struct kgem *kgem) 965{ 966 FILE *file; 967 bool detected = false; 968 int ret; 969 970 if (DBG_NO_SEMAPHORES) 971 return false; 972 973 ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES); 974 if (ret != -1) 975 return ret > 0; 976 977 file = fopen("/sys/module/i915/parameters/semaphores", "r"); 978 if (file) { 979 int value; 980 if (fscanf(file, "%d", &value) == 1) 981 detected = value != 0; 982 fclose(file); 983 } 984 985 return detected; 986} 987 988static bool is_hw_supported(struct kgem *kgem, 989 struct pci_device *dev) 990{ 991 if (DBG_NO_HW) 992 return false; 993 994 if (!test_has_execbuffer2(kgem)) 995 return false; 996 997 if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */ 998 return kgem->has_blt; 999 1000 /* Although pre-855gm the GMCH is fubar, it works mostly. So 1001 * let the user decide through "NoAccel" whether or not to risk 1002 * hw acceleration. 1003 */ 1004 1005 if (kgem->gen == 060 && dev && dev->revision < 8) { 1006 /* pre-production SNB with dysfunctional BLT */ 1007 return false; 1008 } 1009 1010 if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */ 1011 return kgem->has_blt; 1012 1013 return true; 1014} 1015 1016static bool test_has_relaxed_fencing(struct kgem *kgem) 1017{ 1018 if (kgem->gen < 040) { 1019 if (DBG_NO_RELAXED_FENCING) 1020 return false; 1021 1022 return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0; 1023 } else 1024 return true; 1025} 1026 1027static bool test_has_llc(struct kgem *kgem) 1028{ 1029 int has_llc = -1; 1030 1031 if (DBG_NO_LLC) 1032 return false; 1033 1034#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */ 1035 has_llc = gem_param(kgem, I915_PARAM_HAS_LLC); 1036#endif 1037 if (has_llc == -1) { 1038 DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); 1039 has_llc = kgem->gen >= 060; 1040 } 1041 1042 return has_llc; 1043} 1044 1045static bool test_has_caching(struct kgem *kgem) 1046{ 1047 uint32_t handle; 1048 bool ret; 1049 1050 if (DBG_NO_CACHE_LEVEL) 1051 return false; 1052 1053 /* Incoherent blt and sampler hangs the GPU */ 1054 if (kgem->gen == 040) 1055 return false; 1056 1057 handle = gem_create(kgem->fd, 1); 1058 if (handle == 0) 1059 return false; 1060 1061 ret = gem_set_caching(kgem->fd, handle, UNCACHED); 1062 gem_close(kgem->fd, handle); 1063 return ret; 1064} 1065 1066static bool test_has_userptr(struct kgem *kgem) 1067{ 1068 uint32_t handle; 1069 void *ptr; 1070 1071 if (DBG_NO_USERPTR) 1072 return false; 1073 1074 /* Incoherent blt and sampler hangs the GPU */ 1075 if (kgem->gen == 040) 1076 return false; 1077 1078 if (kgem->gen >= 0100) 1079 return false; /* FIXME https://bugs.freedesktop.org/show_bug.cgi?id=79053 */ 1080 1081 if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE)) 1082 return false; 1083 1084 handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); 1085 gem_close(kgem->fd, handle); 1086 free(ptr); 1087 1088 return handle != 0; 1089} 1090 1091static bool test_has_create2(struct kgem *kgem) 1092{ 1093#if defined(USE_CREATE2) 1094 struct local_i915_gem_create2 args; 1095 1096 if (DBG_NO_CREATE2) 1097 return false; 1098 1099 memset(&args, 0, sizeof(args)); 1100 args.size = PAGE_SIZE; 1101 args.caching = DISPLAY; 1102 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0) 1103 gem_close(kgem->fd, args.handle); 1104 1105 return args.handle != 0; 1106#else 1107 return false; 1108#endif 1109} 1110 1111static bool test_has_secure_batches(struct kgem *kgem) 1112{ 1113 if (DBG_NO_SECURE_BATCHES) 1114 return false; 1115 1116 return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; 1117} 1118 1119static bool test_has_pinned_batches(struct kgem *kgem) 1120{ 1121 if (DBG_NO_PINNED_BATCHES) 1122 return false; 1123 1124 return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; 1125} 1126 1127static int kgem_get_screen_index(struct kgem *kgem) 1128{ 1129 struct sna *sna = container_of(kgem, struct sna, kgem); 1130 return sna->scrn->scrnIndex; 1131} 1132 1133static int __find_debugfs(struct kgem *kgem) 1134{ 1135 int i; 1136 1137 for (i = 0; i < DRM_MAX_MINOR; i++) { 1138 char path[80]; 1139 1140 sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i); 1141 if (access(path, R_OK) == 0) 1142 return i; 1143 1144 sprintf(path, "/debug/dri/%d/i915_wedged", i); 1145 if (access(path, R_OK) == 0) 1146 return i; 1147 } 1148 1149 return -1; 1150} 1151 1152static int kgem_get_minor(struct kgem *kgem) 1153{ 1154 struct stat st; 1155 1156 if (fstat(kgem->fd, &st)) 1157 return __find_debugfs(kgem); 1158 1159 if (!S_ISCHR(st.st_mode)) 1160 return __find_debugfs(kgem); 1161 1162 return st.st_rdev & 0x63; 1163} 1164 1165static bool kgem_init_pinned_batches(struct kgem *kgem) 1166{ 1167 int count[2] = { 16, 4 }; 1168 int size[2] = { 1, 4 }; 1169 int n, i; 1170 1171 if (kgem->wedged) 1172 return true; 1173 1174 for (n = 0; n < ARRAY_SIZE(count); n++) { 1175 for (i = 0; i < count[n]; i++) { 1176 struct drm_i915_gem_pin pin; 1177 struct kgem_bo *bo; 1178 1179 VG_CLEAR(pin); 1180 1181 pin.handle = gem_create(kgem->fd, size[n]); 1182 if (pin.handle == 0) 1183 goto err; 1184 1185 DBG(("%s: new handle=%d, num_pages=%d\n", 1186 __FUNCTION__, pin.handle, size[n])); 1187 1188 bo = __kgem_bo_alloc(pin.handle, size[n]); 1189 if (bo == NULL) { 1190 gem_close(kgem->fd, pin.handle); 1191 goto err; 1192 } 1193 1194 pin.alignment = 0; 1195 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) { 1196 gem_close(kgem->fd, pin.handle); 1197 free(bo); 1198 goto err; 1199 } 1200 bo->presumed_offset = pin.offset; 1201 debug_alloc__bo(kgem, bo); 1202 list_add(&bo->list, &kgem->pinned_batches[n]); 1203 } 1204 } 1205 1206 return true; 1207 1208err: 1209 for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) { 1210 while (!list_is_empty(&kgem->pinned_batches[n])) { 1211 kgem_bo_destroy(kgem, 1212 list_first_entry(&kgem->pinned_batches[n], 1213 struct kgem_bo, list)); 1214 } 1215 } 1216 1217 /* For simplicity populate the lists with a single unpinned bo */ 1218 for (n = 0; n < ARRAY_SIZE(count); n++) { 1219 struct kgem_bo *bo; 1220 uint32_t handle; 1221 1222 handle = gem_create(kgem->fd, size[n]); 1223 if (handle == 0) 1224 break; 1225 1226 bo = __kgem_bo_alloc(handle, size[n]); 1227 if (bo == NULL) { 1228 gem_close(kgem->fd, handle); 1229 break; 1230 } 1231 1232 debug_alloc__bo(kgem, bo); 1233 list_add(&bo->list, &kgem->pinned_batches[n]); 1234 } 1235 return false; 1236} 1237 1238static void kgem_init_swizzling(struct kgem *kgem) 1239{ 1240 struct drm_i915_gem_get_tiling tiling; 1241 1242 if (kgem->gen < 050) /* bit17 swizzling :( */ 1243 return; 1244 1245 VG_CLEAR(tiling); 1246 tiling.handle = gem_create(kgem->fd, 1); 1247 if (!tiling.handle) 1248 return; 1249 1250 if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512)) 1251 goto out; 1252 1253 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) 1254 goto out; 1255 1256 choose_memcpy_tiled_x(kgem, tiling.swizzle_mode); 1257out: 1258 gem_close(kgem->fd, tiling.handle); 1259} 1260 1261 1262void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) 1263{ 1264 struct drm_i915_gem_get_aperture aperture; 1265 size_t totalram; 1266 unsigned half_gpu_max; 1267 unsigned int i, j; 1268 1269 DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); 1270 1271 kgem->fd = fd; 1272 kgem->gen = gen; 1273 1274 list_init(&kgem->requests[0]); 1275 list_init(&kgem->requests[1]); 1276 list_init(&kgem->batch_buffers); 1277 list_init(&kgem->active_buffers); 1278 list_init(&kgem->flushing); 1279 list_init(&kgem->large); 1280 list_init(&kgem->large_inactive); 1281 list_init(&kgem->snoop); 1282 list_init(&kgem->scanout); 1283 for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++) 1284 list_init(&kgem->pinned_batches[i]); 1285 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) 1286 list_init(&kgem->inactive[i]); 1287 for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { 1288 for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) 1289 list_init(&kgem->active[i][j]); 1290 } 1291 for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { 1292 for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) 1293 list_init(&kgem->vma[i].inactive[j]); 1294 } 1295 kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; 1296 kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; 1297 1298 kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; 1299 DBG(("%s: has BLT ring? %d\n", __FUNCTION__, 1300 kgem->has_blt)); 1301 1302 kgem->has_relaxed_delta = 1303 gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; 1304 DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, 1305 kgem->has_relaxed_delta)); 1306 1307 kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); 1308 DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, 1309 kgem->has_relaxed_fencing)); 1310 1311 kgem->has_llc = test_has_llc(kgem); 1312 DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__, 1313 kgem->has_llc)); 1314 1315 kgem->has_wt = test_has_wt(kgem); 1316 DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__, 1317 kgem->has_wt)); 1318 1319 kgem->has_caching = test_has_caching(kgem); 1320 DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, 1321 kgem->has_caching)); 1322 1323 kgem->has_userptr = test_has_userptr(kgem); 1324 DBG(("%s: has userptr? %d\n", __FUNCTION__, 1325 kgem->has_userptr)); 1326 1327 kgem->has_create2 = test_has_create2(kgem); 1328 DBG(("%s: has create2? %d\n", __FUNCTION__, 1329 kgem->has_create2)); 1330 1331 kgem->has_no_reloc = test_has_no_reloc(kgem); 1332 DBG(("%s: has no-reloc? %d\n", __FUNCTION__, 1333 kgem->has_no_reloc)); 1334 1335 kgem->has_handle_lut = test_has_handle_lut(kgem); 1336 DBG(("%s: has handle-lut? %d\n", __FUNCTION__, 1337 kgem->has_handle_lut)); 1338 1339 kgem->has_semaphores = false; 1340 if (kgem->has_blt && test_has_semaphores_enabled(kgem)) 1341 kgem->has_semaphores = true; 1342 DBG(("%s: semaphores enabled? %d\n", __FUNCTION__, 1343 kgem->has_semaphores)); 1344 1345 kgem->can_blt_cpu = gen >= 030; 1346 DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, 1347 kgem->can_blt_cpu)); 1348 1349 kgem->can_render_y = gen != 021 && (gen >> 3) != 4; 1350 DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__, 1351 kgem->can_render_y)); 1352 1353 kgem->has_secure_batches = test_has_secure_batches(kgem); 1354 DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, 1355 kgem->has_secure_batches)); 1356 1357 kgem->has_pinned_batches = test_has_pinned_batches(kgem); 1358 DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, 1359 kgem->has_pinned_batches)); 1360 1361 if (!is_hw_supported(kgem, dev)) { 1362 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, 1363 "Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); 1364 kgem->wedged = 1; 1365 } else if (__kgem_throttle(kgem, false)) { 1366 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, 1367 "Detected a hung GPU, disabling acceleration.\n"); 1368 kgem->wedged = 1; 1369 } 1370 1371 kgem->batch_size = ARRAY_SIZE(kgem->batch); 1372 if (gen == 020 && !kgem->has_pinned_batches) 1373 /* Limited to what we can pin */ 1374 kgem->batch_size = 4*1024; 1375 if (gen == 022) 1376 /* 865g cannot handle a batch spanning multiple pages */ 1377 kgem->batch_size = PAGE_SIZE / sizeof(uint32_t); 1378 if (gen >= 070) 1379 kgem->batch_size = 16*1024; 1380 if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) 1381 kgem->batch_size = 4*1024; 1382 1383 if (!kgem_init_pinned_batches(kgem) && gen == 020) { 1384 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, 1385 "Unable to reserve memory for GPU, disabling acceleration.\n"); 1386 kgem->wedged = 1; 1387 } 1388 1389 DBG(("%s: maximum batch size? %d\n", __FUNCTION__, 1390 kgem->batch_size)); 1391 1392 kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; 1393 DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n", 1394 __FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages)); 1395 1396 kgem->next_request = __kgem_request_alloc(kgem); 1397 1398 DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, 1399 !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching), 1400 kgem->has_llc, kgem->has_caching, kgem->has_userptr)); 1401 1402 VG_CLEAR(aperture); 1403 aperture.aper_size = 0; 1404 (void)do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 1405 if (aperture.aper_size == 0) 1406 aperture.aper_size = 64*1024*1024; 1407 1408 DBG(("%s: aperture size %lld, available now %lld\n", 1409 __FUNCTION__, 1410 (long long)aperture.aper_size, 1411 (long long)aperture.aper_available_size)); 1412 1413 kgem->aperture_total = aperture.aper_size; 1414 kgem->aperture_high = aperture.aper_size * 3/4; 1415 kgem->aperture_low = aperture.aper_size * 1/3; 1416 if (gen < 033) { 1417 /* Severe alignment penalties */ 1418 kgem->aperture_high /= 2; 1419 kgem->aperture_low /= 2; 1420 } 1421 DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, 1422 kgem->aperture_low, kgem->aperture_low / (1024*1024), 1423 kgem->aperture_high, kgem->aperture_high / (1024*1024))); 1424 1425 kgem->aperture_mappable = 256 * 1024 * 1024; 1426 if (dev != NULL) 1427 kgem->aperture_mappable = agp_aperture_size(dev, gen); 1428 if (kgem->aperture_mappable == 0 || 1429 kgem->aperture_mappable > aperture.aper_size) 1430 kgem->aperture_mappable = aperture.aper_size; 1431 DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, 1432 kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); 1433 1434 kgem->aperture_fenceable = MIN(256*1024*1024, kgem->aperture_mappable); 1435 DBG(("%s: aperture fenceable=%d [%d MiB]\n", __FUNCTION__, 1436 kgem->aperture_fenceable, kgem->aperture_fenceable / (1024*1024))); 1437 1438 kgem->buffer_size = 64 * 1024; 1439 while (kgem->buffer_size < kgem->aperture_mappable >> 10) 1440 kgem->buffer_size *= 2; 1441 if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages) 1442 kgem->buffer_size = kgem->half_cpu_cache_pages << 12; 1443 kgem->buffer_size = 1 << __fls(kgem->buffer_size); 1444 DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, 1445 kgem->buffer_size, kgem->buffer_size / 1024)); 1446 assert(kgem->buffer_size); 1447 1448 kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10; 1449 kgem->max_gpu_size = kgem->max_object_size; 1450 if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE) 1451 kgem->max_gpu_size = MAX_CACHE_SIZE; 1452 1453 totalram = total_ram_size(); 1454 if (totalram == 0) { 1455 DBG(("%s: total ram size unknown, assuming maximum of total aperture\n", 1456 __FUNCTION__)); 1457 totalram = kgem->aperture_total; 1458 } 1459 DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); 1460 if (kgem->max_object_size > totalram / 2) 1461 kgem->max_object_size = totalram / 2; 1462 if (kgem->max_gpu_size > totalram / 4) 1463 kgem->max_gpu_size = totalram / 4; 1464 1465 if (kgem->aperture_high > totalram / 2) { 1466 kgem->aperture_high = totalram / 2; 1467 kgem->aperture_low = kgem->aperture_high / 4; 1468 DBG(("%s: reduced aperture watermaks to fit into ram; low=%d [%d], high=%d [%d]\n", __FUNCTION__, 1469 kgem->aperture_low, kgem->aperture_low / (1024*1024), 1470 kgem->aperture_high, kgem->aperture_high / (1024*1024))); 1471 } 1472 1473 kgem->max_cpu_size = kgem->max_object_size; 1474 1475 half_gpu_max = kgem->max_gpu_size / 2; 1476 kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2; 1477 if (kgem->max_copy_tile_size > half_gpu_max) 1478 kgem->max_copy_tile_size = half_gpu_max; 1479 1480 if (kgem->has_llc) 1481 kgem->max_upload_tile_size = kgem->max_copy_tile_size; 1482 else 1483 kgem->max_upload_tile_size = kgem->aperture_fenceable / 4; 1484 if (kgem->max_upload_tile_size > half_gpu_max) 1485 kgem->max_upload_tile_size = half_gpu_max; 1486 if (kgem->max_upload_tile_size > kgem->aperture_high/2) 1487 kgem->max_upload_tile_size = kgem->aperture_high/2; 1488 if (kgem->max_upload_tile_size > kgem->aperture_low) 1489 kgem->max_upload_tile_size = kgem->aperture_low; 1490 if (kgem->max_upload_tile_size < 16*PAGE_SIZE) 1491 kgem->max_upload_tile_size = 16*PAGE_SIZE; 1492 1493 kgem->large_object_size = MAX_CACHE_SIZE; 1494 if (kgem->large_object_size > half_gpu_max) 1495 kgem->large_object_size = half_gpu_max; 1496 if (kgem->max_copy_tile_size > kgem->aperture_high/2) 1497 kgem->max_copy_tile_size = kgem->aperture_high/2; 1498 if (kgem->max_copy_tile_size > kgem->aperture_low) 1499 kgem->max_copy_tile_size = kgem->aperture_low; 1500 if (kgem->max_copy_tile_size < 16*PAGE_SIZE) 1501 kgem->max_copy_tile_size = 16*PAGE_SIZE; 1502 1503 if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) { 1504 if (kgem->large_object_size > kgem->max_cpu_size) 1505 kgem->large_object_size = kgem->max_cpu_size; 1506 } else 1507 kgem->max_cpu_size = 0; 1508 if (DBG_NO_CPU) 1509 kgem->max_cpu_size = 0; 1510 1511 DBG(("%s: maximum object size=%d\n", 1512 __FUNCTION__, kgem->max_object_size)); 1513 DBG(("%s: large object thresold=%d\n", 1514 __FUNCTION__, kgem->large_object_size)); 1515 DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", 1516 __FUNCTION__, 1517 kgem->max_gpu_size, kgem->max_cpu_size, 1518 kgem->max_upload_tile_size, kgem->max_copy_tile_size)); 1519 1520 /* Convert the aperture thresholds to pages */ 1521 kgem->aperture_mappable /= PAGE_SIZE; 1522 kgem->aperture_fenceable /= PAGE_SIZE; 1523 kgem->aperture_low /= PAGE_SIZE; 1524 kgem->aperture_high /= PAGE_SIZE; 1525 kgem->aperture_total /= PAGE_SIZE; 1526 1527 kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; 1528 if ((int)kgem->fence_max < 0) 1529 kgem->fence_max = 5; /* minimum safe value for all hw */ 1530 DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); 1531 1532 kgem->batch_flags_base = 0; 1533 if (kgem->has_no_reloc) 1534 kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC; 1535 if (kgem->has_handle_lut) 1536 kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; 1537 if (kgem->has_pinned_batches) 1538 kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; 1539 1540 kgem_init_swizzling(kgem); 1541} 1542 1543/* XXX hopefully a good approximation */ 1544static uint32_t kgem_get_unique_id(struct kgem *kgem) 1545{ 1546 uint32_t id; 1547 id = ++kgem->unique_id; 1548 if (id == 0) 1549 id = ++kgem->unique_id; 1550 return id; 1551} 1552 1553inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags) 1554{ 1555 if (flags & CREATE_PRIME) 1556 return 256; 1557 if (flags & CREATE_SCANOUT) 1558 return 64; 1559 return 8; 1560} 1561 1562void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch, 1563 int *tile_width, int *tile_height, int *tile_size) 1564{ 1565 if (kgem->gen <= 030) { 1566 if (tiling) { 1567 if (kgem->gen < 030) { 1568 *tile_width = 128; 1569 *tile_height = 16; 1570 *tile_size = 2048; 1571 } else { 1572 *tile_width = 512; 1573 *tile_height = 8; 1574 *tile_size = 4096; 1575 } 1576 } else { 1577 *tile_width = 1; 1578 *tile_height = 1; 1579 *tile_size = 1; 1580 } 1581 } else switch (tiling) { 1582 default: 1583 case I915_TILING_NONE: 1584 *tile_width = 1; 1585 *tile_height = 1; 1586 *tile_size = 1; 1587 break; 1588 case I915_TILING_X: 1589 *tile_width = 512; 1590 *tile_height = 8; 1591 *tile_size = 4096; 1592 break; 1593 case I915_TILING_Y: 1594 *tile_width = 128; 1595 *tile_height = 32; 1596 *tile_size = 4096; 1597 break; 1598 } 1599 1600 /* Force offset alignment to tile-row */ 1601 if (tiling && kgem->gen < 033) 1602 *tile_width = pitch; 1603} 1604 1605static uint32_t kgem_surface_size(struct kgem *kgem, 1606 bool relaxed_fencing, 1607 unsigned flags, 1608 uint32_t width, 1609 uint32_t height, 1610 uint32_t bpp, 1611 uint32_t tiling, 1612 uint32_t *pitch) 1613{ 1614 uint32_t tile_width, tile_height; 1615 uint32_t size; 1616 1617 assert(width <= MAXSHORT); 1618 assert(height <= MAXSHORT); 1619 assert(bpp >= 8); 1620 1621 if (kgem->gen <= 030) { 1622 if (tiling) { 1623 if (kgem->gen < 030) { 1624 tile_width = 128; 1625 tile_height = 16; 1626 } else { 1627 tile_width = 512; 1628 tile_height = 8; 1629 } 1630 } else { 1631 tile_width = 2 * bpp >> 3; 1632 tile_width = ALIGN(tile_width, 1633 kgem_pitch_alignment(kgem, flags)); 1634 tile_height = 1; 1635 } 1636 } else switch (tiling) { 1637 default: 1638 case I915_TILING_NONE: 1639 tile_width = 2 * bpp >> 3; 1640 tile_width = ALIGN(tile_width, 1641 kgem_pitch_alignment(kgem, flags)); 1642 tile_height = 1; 1643 break; 1644 1645 case I915_TILING_X: 1646 tile_width = 512; 1647 tile_height = 8; 1648 break; 1649 case I915_TILING_Y: 1650 tile_width = 128; 1651 tile_height = 32; 1652 break; 1653 } 1654 /* XXX align to an even tile row */ 1655 if (!kgem->has_relaxed_fencing) 1656 tile_height *= 2; 1657 1658 *pitch = ALIGN(width * bpp / 8, tile_width); 1659 height = ALIGN(height, tile_height); 1660 DBG(("%s: tile_width=%d, tile_height=%d => aligned pitch=%d, height=%d\n", 1661 __FUNCTION__, tile_width, tile_height, *pitch, height)); 1662 1663 if (kgem->gen >= 040) 1664 return PAGE_ALIGN(*pitch * height); 1665 1666 /* If it is too wide for the blitter, don't even bother. */ 1667 if (tiling != I915_TILING_NONE) { 1668 if (*pitch > 8192) { 1669 DBG(("%s: too wide for tiled surface (pitch=%d, limit=%d)\n", 1670 __FUNCTION__, *pitch, 8192)); 1671 return 0; 1672 } 1673 1674 for (size = tile_width; size < *pitch; size <<= 1) 1675 ; 1676 *pitch = size; 1677 } else { 1678 if (*pitch >= 32768) { 1679 DBG(("%s: too wide for linear surface (pitch=%d, limit=%d)\n", 1680 __FUNCTION__, *pitch, 32767)); 1681 return 0; 1682 } 1683 } 1684 1685 size = *pitch * height; 1686 if (relaxed_fencing || tiling == I915_TILING_NONE) 1687 return PAGE_ALIGN(size); 1688 1689 /* We need to allocate a pot fence region for a tiled buffer. */ 1690 if (kgem->gen < 030) 1691 tile_width = 512 * 1024; 1692 else 1693 tile_width = 1024 * 1024; 1694 while (tile_width < size) 1695 tile_width *= 2; 1696 return tile_width; 1697} 1698 1699bool kgem_check_surface_size(struct kgem *kgem, 1700 uint32_t width, 1701 uint32_t height, 1702 uint32_t bpp, 1703 uint32_t tiling, 1704 uint32_t pitch, 1705 uint32_t size) 1706{ 1707 uint32_t min_size, min_pitch; 1708 int tile_width, tile_height, tile_size; 1709 1710 DBG(("%s(width=%d, height=%d, bpp=%d, tiling=%d, pitch=%d, size=%d)\n", 1711 __FUNCTION__, width, height, bpp, tiling, pitch, size)); 1712 1713 if (pitch & 3) 1714 return false; 1715 1716 min_size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, 0, 1717 width, height, bpp, tiling, 1718 &min_pitch); 1719 1720 DBG(("%s: min_pitch=%d, min_size=%d\n", __FUNCTION__, min_pitch, min_size)); 1721 1722 if (size < min_size) 1723 return false; 1724 1725 if (pitch < min_pitch) 1726 return false; 1727 1728 kgem_get_tile_size(kgem, tiling, min_pitch, 1729 &tile_width, &tile_height, &tile_size); 1730 1731 DBG(("%s: tile_width=%d, tile_size=%d\n", __FUNCTION__, tile_width, tile_size)); 1732 if (pitch & (tile_width - 1)) 1733 return false; 1734 if (size & (tile_size - 1)) 1735 return false; 1736 1737 return true; 1738} 1739 1740static uint32_t kgem_aligned_height(struct kgem *kgem, 1741 uint32_t height, uint32_t tiling) 1742{ 1743 uint32_t tile_height; 1744 1745 if (kgem->gen <= 030) { 1746 tile_height = tiling ? kgem->gen < 030 ? 16 : 8 : 1; 1747 } else switch (tiling) { 1748 /* XXX align to an even tile row */ 1749 default: 1750 case I915_TILING_NONE: 1751 tile_height = 1; 1752 break; 1753 case I915_TILING_X: 1754 tile_height = 8; 1755 break; 1756 case I915_TILING_Y: 1757 tile_height = 32; 1758 break; 1759 } 1760 1761 /* XXX align to an even tile row */ 1762 if (!kgem->has_relaxed_fencing) 1763 tile_height *= 2; 1764 1765 return ALIGN(height, tile_height); 1766} 1767 1768static struct drm_i915_gem_exec_object2 * 1769kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) 1770{ 1771 struct drm_i915_gem_exec_object2 *exec; 1772 1773 DBG(("%s: handle=%d, index=%d\n", 1774 __FUNCTION__, bo->handle, kgem->nexec)); 1775 1776 assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); 1777 bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle; 1778 exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec)); 1779 exec->handle = bo->handle; 1780 exec->offset = bo->presumed_offset; 1781 1782 kgem->aperture += num_pages(bo); 1783 1784 return exec; 1785} 1786 1787static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) 1788{ 1789 assert(bo->refcnt); 1790 assert(bo->proxy == NULL); 1791 1792 bo->exec = kgem_add_handle(kgem, bo); 1793 bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring); 1794 1795 list_move_tail(&bo->request, &kgem->next_request->buffers); 1796 if (bo->io && !list_is_empty(&bo->list)) 1797 list_move(&bo->list, &kgem->batch_buffers); 1798 1799 /* XXX is it worth working around gcc here? */ 1800 kgem->flush |= bo->flush; 1801} 1802 1803static uint32_t kgem_end_batch(struct kgem *kgem) 1804{ 1805 kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END; 1806 if (kgem->nbatch & 1) 1807 kgem->batch[kgem->nbatch++] = MI_NOOP; 1808 1809 return kgem->nbatch; 1810} 1811 1812static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) 1813{ 1814 int n; 1815 1816 assert(kgem->nreloc__self <= 256); 1817 if (kgem->nreloc__self == 0) 1818 return; 1819 1820 for (n = 0; n < kgem->nreloc__self; n++) { 1821 int i = kgem->reloc__self[n]; 1822 assert(kgem->reloc[i].target_handle == ~0U); 1823 kgem->reloc[i].target_handle = bo->target_handle; 1824 kgem->reloc[i].presumed_offset = bo->presumed_offset; 1825 kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] = 1826 kgem->reloc[i].delta + bo->presumed_offset; 1827 } 1828 1829 if (n == 256) { 1830 for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { 1831 if (kgem->reloc[n].target_handle == ~0U) { 1832 kgem->reloc[n].target_handle = bo->target_handle; 1833 kgem->reloc[n].presumed_offset = bo->presumed_offset; 1834 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = 1835 kgem->reloc[n].delta + bo->presumed_offset; 1836 } 1837 } 1838 1839 } 1840} 1841 1842static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) 1843{ 1844 struct kgem_bo_binding *b; 1845 1846 b = bo->binding.next; 1847 while (b) { 1848 struct kgem_bo_binding *next = b->next; 1849 free(b); 1850 b = next; 1851 } 1852} 1853 1854static void kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo) 1855{ 1856 if (bo->scanout && bo->delta) { 1857 DBG(("%s: releasing fb=%d for handle=%d\n", 1858 __FUNCTION__, bo->delta, bo->handle)); 1859 /* XXX will leak if we are not DRM_MASTER. *shrug* */ 1860 do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta); 1861 bo->delta = 0; 1862 } 1863} 1864 1865static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) 1866{ 1867 DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); 1868 assert(bo->refcnt == 0); 1869 assert(bo->proxy == NULL); 1870 assert(bo->exec == NULL); 1871 assert(!bo->snoop || bo->rq == NULL); 1872 1873#ifdef DEBUG_MEMORY 1874 kgem->debug_memory.bo_allocs--; 1875 kgem->debug_memory.bo_bytes -= bytes(bo); 1876#endif 1877 1878 kgem_bo_binding_free(kgem, bo); 1879 kgem_bo_rmfb(kgem, bo); 1880 1881 if (IS_USER_MAP(bo->map__cpu)) { 1882 assert(bo->rq == NULL); 1883 assert(!__kgem_busy(kgem, bo->handle)); 1884 assert(MAP(bo->map__cpu) != bo || bo->io || bo->flush); 1885 if (!(bo->io || bo->flush)) { 1886 DBG(("%s: freeing snooped base\n", __FUNCTION__)); 1887 assert(bo != MAP(bo->map__cpu)); 1888 free(MAP(bo->map__cpu)); 1889 } 1890 bo->map__cpu = NULL; 1891 } 1892 1893 DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n", 1894 __FUNCTION__, bo->map__gtt, bo->map__cpu, 1895 bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL].count)); 1896 1897 if (!list_is_empty(&bo->vma)) { 1898 _list_del(&bo->vma); 1899 kgem->vma[bo->map__gtt == NULL].count--; 1900 } 1901 1902 if (bo->map__gtt) 1903 munmap(MAP(bo->map__gtt), bytes(bo)); 1904 if (bo->map__cpu) 1905 munmap(MAP(bo->map__cpu), bytes(bo)); 1906 1907 _list_del(&bo->list); 1908 _list_del(&bo->request); 1909 gem_close(kgem->fd, bo->handle); 1910 1911 if (!bo->io && !DBG_NO_MALLOC_CACHE) { 1912 *(struct kgem_bo **)bo = __kgem_freed_bo; 1913 __kgem_freed_bo = bo; 1914 } else 1915 free(bo); 1916} 1917 1918inline static void kgem_bo_move_to_inactive(struct kgem *kgem, 1919 struct kgem_bo *bo) 1920{ 1921 DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle)); 1922 1923 assert(bo->refcnt == 0); 1924 assert(bo->reusable); 1925 assert(bo->rq == NULL); 1926 assert(bo->exec == NULL); 1927 assert(bo->domain != DOMAIN_GPU); 1928 assert(!bo->proxy); 1929 assert(!bo->io); 1930 assert(!bo->scanout); 1931 assert(!bo->snoop); 1932 assert(!bo->flush); 1933 assert(!bo->needs_flush); 1934 assert(list_is_empty(&bo->vma)); 1935 assert_tiling(kgem, bo); 1936 assert_cacheing(kgem, bo); 1937 ASSERT_IDLE(kgem, bo->handle); 1938 1939 if (bucket(bo) >= NUM_CACHE_BUCKETS) { 1940 if (bo->map__gtt) { 1941 munmap(MAP(bo->map__gtt), bytes(bo)); 1942 bo->map__gtt = NULL; 1943 } 1944 1945 list_move(&bo->list, &kgem->large_inactive); 1946 } else { 1947 assert(bo->flush == false); 1948 list_move(&bo->list, &kgem->inactive[bucket(bo)]); 1949 if (bo->map__gtt) { 1950 if (!kgem_bo_can_map(kgem, bo)) { 1951 munmap(MAP(bo->map__gtt), bytes(bo)); 1952 bo->map__gtt = NULL; 1953 } 1954 if (bo->map__gtt) { 1955 list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]); 1956 kgem->vma[0].count++; 1957 } 1958 } 1959 if (bo->map__cpu && !bo->map__gtt) { 1960 list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]); 1961 kgem->vma[1].count++; 1962 } 1963 } 1964 1965 kgem->need_expire = true; 1966} 1967 1968static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo) 1969{ 1970 struct kgem_bo *base; 1971 1972 if (!bo->io) 1973 return bo; 1974 1975 assert(!bo->snoop); 1976 if (__kgem_freed_bo) { 1977 base = __kgem_freed_bo; 1978 __kgem_freed_bo = *(struct kgem_bo **)base; 1979 } else 1980 base = malloc(sizeof(*base)); 1981 if (base) { 1982 DBG(("%s: transferring io handle=%d to bo\n", 1983 __FUNCTION__, bo->handle)); 1984 /* transfer the handle to a minimum bo */ 1985 memcpy(base, bo, sizeof(*base)); 1986 base->io = false; 1987 list_init(&base->list); 1988 list_replace(&bo->request, &base->request); 1989 list_replace(&bo->vma, &base->vma); 1990 free(bo); 1991 bo = base; 1992 } else 1993 bo->reusable = false; 1994 1995 return bo; 1996} 1997 1998inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, 1999 struct kgem_bo *bo) 2000{ 2001 DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle)); 2002 2003 list_del(&bo->list); 2004 assert(bo->rq == NULL); 2005 assert(bo->exec == NULL); 2006 if (!list_is_empty(&bo->vma)) { 2007 assert(bo->map__gtt || bo->map__cpu); 2008 list_del(&bo->vma); 2009 kgem->vma[bo->map__gtt == NULL].count--; 2010 } 2011} 2012 2013inline static void kgem_bo_remove_from_active(struct kgem *kgem, 2014 struct kgem_bo *bo) 2015{ 2016 DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle)); 2017 2018 list_del(&bo->list); 2019 assert(bo->rq != NULL); 2020 if (RQ(bo->rq) == (void *)kgem) { 2021 assert(bo->exec == NULL); 2022 list_del(&bo->request); 2023 } 2024 assert(list_is_empty(&bo->vma)); 2025} 2026 2027static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo) 2028{ 2029 struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; 2030 2031 DBG(("%s: size=%d, offset=%d, parent used=%d\n", 2032 __FUNCTION__, bo->size.bytes, bo->delta, io->used)); 2033 2034 if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used) 2035 io->used = bo->delta; 2036} 2037 2038static bool check_scanout_size(struct kgem *kgem, 2039 struct kgem_bo *bo, 2040 int width, int height) 2041{ 2042 struct drm_mode_fb_cmd info; 2043 2044 assert(bo->scanout); 2045 2046 VG_CLEAR(info); 2047 info.fb_id = bo->delta; 2048 2049 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_GETFB, &info)) 2050 return false; 2051 2052 gem_close(kgem->fd, info.handle); 2053 2054 if (width != info.width || height != info.height) { 2055 DBG(("%s: not using scanout %d (%dx%d), want (%dx%d)\n", 2056 __FUNCTION__, 2057 info.fb_id, info.width, info.height, 2058 width, height)); 2059 return false; 2060 } 2061 2062 return true; 2063} 2064 2065static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) 2066{ 2067 assert(bo->refcnt == 0); 2068 assert(bo->scanout); 2069 assert(!bo->flush); 2070 assert(!bo->snoop); 2071 assert(!bo->io); 2072 2073 if (bo->purged) { /* for stolen fb */ 2074 if (!bo->exec) { 2075 DBG(("%s: discarding purged scanout - stolen?\n", 2076 __FUNCTION__)); 2077 kgem_bo_free(kgem, bo); 2078 } 2079 return; 2080 } 2081 2082 DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n", 2083 __FUNCTION__, bo->handle, bo->delta, bo->rq != NULL)); 2084 if (bo->rq) 2085 list_move_tail(&bo->list, &kgem->scanout); 2086 else 2087 list_move(&bo->list, &kgem->scanout); 2088 2089 kgem->need_expire = true; 2090 2091} 2092 2093static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) 2094{ 2095 assert(bo->reusable); 2096 assert(!bo->scanout); 2097 assert(!bo->flush); 2098 assert(!bo->needs_flush); 2099 assert(bo->refcnt == 0); 2100 assert(bo->exec == NULL); 2101 2102 if (DBG_NO_SNOOP_CACHE) { 2103 kgem_bo_free(kgem, bo); 2104 return; 2105 } 2106 2107 if (num_pages(bo) > kgem->max_cpu_size >> 13) { 2108 DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n", 2109 __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13)); 2110 kgem_bo_free(kgem, bo); 2111 return; 2112 } 2113 2114 assert(bo->tiling == I915_TILING_NONE); 2115 assert(bo->rq == NULL); 2116 2117 DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle)); 2118 list_add(&bo->list, &kgem->snoop); 2119 kgem->need_expire = true; 2120} 2121 2122static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo) 2123{ 2124 bool retired = false; 2125 2126 DBG(("%s: release handle=%d\n", __FUNCTION__, bo->handle)); 2127 2128 if (bo->prime) { 2129 DBG(("%s: discarding imported prime handle=%d\n", 2130 __FUNCTION__, bo->handle)); 2131 kgem_bo_free(kgem, bo); 2132 } else if (bo->snoop) { 2133 kgem_bo_move_to_snoop(kgem, bo); 2134 } else if (bo->scanout) { 2135 kgem_bo_move_to_scanout(kgem, bo); 2136 } else if ((bo = kgem_bo_replace_io(bo))->reusable && 2137 kgem_bo_set_purgeable(kgem, bo)) { 2138 kgem_bo_move_to_inactive(kgem, bo); 2139 retired = true; 2140 } else 2141 kgem_bo_free(kgem, bo); 2142 2143 return retired; 2144} 2145 2146static struct kgem_bo * 2147search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) 2148{ 2149 struct kgem_bo *bo, *first = NULL; 2150 2151 DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags)); 2152 2153 if ((kgem->has_caching | kgem->has_userptr) == 0) 2154 return NULL; 2155 2156 if (list_is_empty(&kgem->snoop)) { 2157 DBG(("%s: inactive and cache empty\n", __FUNCTION__)); 2158 if (!__kgem_throttle_retire(kgem, flags)) { 2159 DBG(("%s: nothing retired\n", __FUNCTION__)); 2160 return NULL; 2161 } 2162 } 2163 2164 list_for_each_entry(bo, &kgem->snoop, list) { 2165 assert(bo->refcnt == 0); 2166 assert(bo->snoop); 2167 assert(!bo->scanout); 2168 assert(!bo->purged); 2169 assert(bo->proxy == NULL); 2170 assert(bo->tiling == I915_TILING_NONE); 2171 assert(bo->rq == NULL); 2172 assert(bo->exec == NULL); 2173 2174 if (num_pages > num_pages(bo)) 2175 continue; 2176 2177 if (num_pages(bo) > 2*num_pages) { 2178 if (first == NULL) 2179 first = bo; 2180 continue; 2181 } 2182 2183 list_del(&bo->list); 2184 bo->pitch = 0; 2185 bo->delta = 0; 2186 2187 DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n", 2188 __FUNCTION__, bo->handle, num_pages(bo))); 2189 return bo; 2190 } 2191 2192 if (first) { 2193 list_del(&first->list); 2194 first->pitch = 0; 2195 first->delta = 0; 2196 2197 DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n", 2198 __FUNCTION__, first->handle, num_pages(first))); 2199 return first; 2200 } 2201 2202 return NULL; 2203} 2204 2205void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) 2206{ 2207 if (kgem->nexec != 1 || bo->exec == NULL) 2208 return; 2209 2210 assert(bo); 2211 DBG(("%s: only handle in batch, discarding last operations for handle=%d\n", 2212 __FUNCTION__, bo->handle)); 2213 2214 assert(bo->exec == &kgem->exec[0]); 2215 assert(kgem->exec[0].handle == bo->handle); 2216 assert(RQ(bo->rq) == kgem->next_request); 2217 2218 bo->refcnt++; 2219 kgem_reset(kgem); 2220 bo->refcnt--; 2221 2222 assert(kgem->nreloc == 0); 2223 assert(kgem->nexec == 0); 2224 assert(bo->exec == NULL); 2225} 2226 2227void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b) 2228{ 2229 if (kgem->nexec > 2) 2230 return; 2231 2232 if (kgem->nexec == 1) { 2233 if (a) 2234 kgem_bo_undo(kgem, a); 2235 if (b) 2236 kgem_bo_undo(kgem, b); 2237 return; 2238 } 2239 2240 if (a == NULL || b == NULL) 2241 return; 2242 if (a->exec == NULL || b->exec == NULL) 2243 return; 2244 2245 DBG(("%s: only handles in batch, discarding last operations for handle=%d and handle=%d\n", 2246 __FUNCTION__, a->handle, b->handle)); 2247 2248 assert(a->exec == &kgem->exec[0] || a->exec == &kgem->exec[1]); 2249 assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle); 2250 assert(RQ(a->rq) == kgem->next_request); 2251 assert(b->exec == &kgem->exec[0] || b->exec == &kgem->exec[1]); 2252 assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle); 2253 assert(RQ(b->rq) == kgem->next_request); 2254 2255 a->refcnt++; 2256 b->refcnt++; 2257 kgem_reset(kgem); 2258 b->refcnt--; 2259 a->refcnt--; 2260 2261 assert(kgem->nreloc == 0); 2262 assert(kgem->nexec == 0); 2263 assert(a->exec == NULL); 2264 assert(b->exec == NULL); 2265} 2266 2267static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) 2268{ 2269 DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); 2270 2271 assert(list_is_empty(&bo->list)); 2272 assert(bo->refcnt == 0); 2273 assert(bo->proxy == NULL); 2274 assert(bo->active_scanout == 0); 2275 assert_tiling(kgem, bo); 2276 2277 bo->binding.offset = 0; 2278 2279 if (DBG_NO_CACHE) 2280 goto destroy; 2281 2282 if (bo->prime) 2283 goto destroy; 2284 2285 if (bo->snoop && !bo->flush) { 2286 DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle)); 2287 assert(bo->reusable); 2288 assert(list_is_empty(&bo->list)); 2289 if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle)) 2290 __kgem_bo_clear_busy(bo); 2291 if (bo->rq == NULL) 2292 kgem_bo_move_to_snoop(kgem, bo); 2293 return; 2294 } 2295 if (!IS_USER_MAP(bo->map__cpu)) 2296 bo->flush = false; 2297 2298 if (bo->scanout) { 2299 kgem_bo_move_to_scanout(kgem, bo); 2300 return; 2301 } 2302 2303 if (bo->io) 2304 bo = kgem_bo_replace_io(bo); 2305 if (!bo->reusable) { 2306 DBG(("%s: handle=%d, not reusable\n", 2307 __FUNCTION__, bo->handle)); 2308 goto destroy; 2309 } 2310 2311 assert(list_is_empty(&bo->vma)); 2312 assert(list_is_empty(&bo->list)); 2313 assert(bo->flush == false); 2314 assert(bo->snoop == false); 2315 assert(bo->io == false); 2316 assert(bo->scanout == false); 2317 assert_cacheing(kgem, bo); 2318 2319 kgem_bo_undo(kgem, bo); 2320 assert(bo->refcnt == 0); 2321 2322 if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle)) 2323 __kgem_bo_clear_busy(bo); 2324 2325 if (bo->rq) { 2326 struct list *cache; 2327 2328 DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle)); 2329 if (bucket(bo) < NUM_CACHE_BUCKETS) 2330 cache = &kgem->active[bucket(bo)][bo->tiling]; 2331 else 2332 cache = &kgem->large; 2333 list_add(&bo->list, cache); 2334 return; 2335 } 2336 2337 assert(bo->exec == NULL); 2338 assert(list_is_empty(&bo->request)); 2339 2340 if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) { 2341 if (!kgem_bo_set_purgeable(kgem, bo)) 2342 goto destroy; 2343 2344 if (!kgem->has_llc && bo->domain == DOMAIN_CPU) 2345 goto destroy; 2346 2347 DBG(("%s: handle=%d, purged\n", 2348 __FUNCTION__, bo->handle)); 2349 } 2350 2351 kgem_bo_move_to_inactive(kgem, bo); 2352 return; 2353 2354destroy: 2355 if (!bo->exec) 2356 kgem_bo_free(kgem, bo); 2357} 2358 2359static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo) 2360{ 2361 assert(bo->refcnt); 2362 if (--bo->refcnt == 0) 2363 __kgem_bo_destroy(kgem, bo); 2364} 2365 2366static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo) 2367{ 2368 assert(bo->base.io); 2369 while (!list_is_empty(&bo->base.vma)) { 2370 struct kgem_bo *cached; 2371 2372 cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma); 2373 assert(cached->proxy == &bo->base); 2374 assert(cached != &bo->base); 2375 list_del(&cached->vma); 2376 2377 assert(*(struct kgem_bo **)cached->map__gtt == cached); 2378 *(struct kgem_bo **)cached->map__gtt = NULL; 2379 cached->map__gtt = NULL; 2380 2381 kgem_bo_destroy(kgem, cached); 2382 } 2383} 2384 2385void kgem_retire__buffers(struct kgem *kgem) 2386{ 2387 while (!list_is_empty(&kgem->active_buffers)) { 2388 struct kgem_buffer *bo = 2389 list_last_entry(&kgem->active_buffers, 2390 struct kgem_buffer, 2391 base.list); 2392 2393 DBG(("%s: handle=%d, busy? %d [%d]\n", 2394 __FUNCTION__, bo->base.handle, bo->base.rq != NULL, bo->base.exec != NULL)); 2395 2396 assert(bo->base.exec == NULL || RQ(bo->base.rq) == kgem->next_request); 2397 if (bo->base.rq) 2398 break; 2399 2400 DBG(("%s: releasing upload cache for handle=%d? %d\n", 2401 __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma))); 2402 list_del(&bo->base.list); 2403 kgem_buffer_release(kgem, bo); 2404 kgem_bo_unref(kgem, &bo->base); 2405 } 2406} 2407 2408static bool kgem_retire__flushing(struct kgem *kgem) 2409{ 2410 struct kgem_bo *bo, *next; 2411 bool retired = false; 2412 2413 list_for_each_entry_safe(bo, next, &kgem->flushing, request) { 2414 assert(RQ(bo->rq) == (void *)kgem); 2415 assert(bo->exec == NULL); 2416 2417 if (__kgem_busy(kgem, bo->handle)) 2418 break; 2419 2420 __kgem_bo_clear_busy(bo); 2421 2422 if (bo->refcnt) 2423 continue; 2424 2425 retired |= kgem_bo_move_to_cache(kgem, bo); 2426 } 2427#if HAS_DEBUG_FULL 2428 { 2429 int count = 0; 2430 list_for_each_entry(bo, &kgem->flushing, request) 2431 count++; 2432 DBG(("%s: %d bo on flushing list\n", __FUNCTION__, count)); 2433 } 2434#endif 2435 2436 kgem->need_retire |= !list_is_empty(&kgem->flushing); 2437 2438 return retired; 2439} 2440 2441static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) 2442{ 2443 bool retired = false; 2444 2445 DBG(("%s: request %d complete\n", 2446 __FUNCTION__, rq->bo->handle)); 2447 assert(RQ(rq->bo->rq) == rq); 2448 2449 while (!list_is_empty(&rq->buffers)) { 2450 struct kgem_bo *bo; 2451 2452 bo = list_first_entry(&rq->buffers, 2453 struct kgem_bo, 2454 request); 2455 2456 assert(RQ(bo->rq) == rq); 2457 assert(bo->exec == NULL); 2458 assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); 2459 2460 list_del(&bo->request); 2461 2462 if (bo->needs_flush) 2463 bo->needs_flush = __kgem_busy(kgem, bo->handle); 2464 if (bo->needs_flush) { 2465 DBG(("%s: moving %d to flushing\n", 2466 __FUNCTION__, bo->handle)); 2467 list_add(&bo->request, &kgem->flushing); 2468 bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq)); 2469 kgem->need_retire = true; 2470 continue; 2471 } 2472 2473 bo->domain = DOMAIN_NONE; 2474 bo->rq = NULL; 2475 if (bo->refcnt) 2476 continue; 2477 2478 retired |= kgem_bo_move_to_cache(kgem, bo); 2479 } 2480 2481 assert(rq->bo->rq == NULL); 2482 assert(rq->bo->exec == NULL); 2483 assert(list_is_empty(&rq->bo->request)); 2484 assert(rq->bo->refcnt > 0); 2485 2486 if (--rq->bo->refcnt == 0) { 2487 if (kgem_bo_set_purgeable(kgem, rq->bo)) { 2488 kgem_bo_move_to_inactive(kgem, rq->bo); 2489 retired = true; 2490 } else { 2491 DBG(("%s: closing %d\n", 2492 __FUNCTION__, rq->bo->handle)); 2493 kgem_bo_free(kgem, rq->bo); 2494 } 2495 } 2496 2497 __kgem_request_free(rq); 2498 return retired; 2499} 2500 2501static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) 2502{ 2503 bool retired = false; 2504 2505 while (!list_is_empty(&kgem->requests[ring])) { 2506 struct kgem_request *rq; 2507 2508 rq = list_first_entry(&kgem->requests[ring], 2509 struct kgem_request, 2510 list); 2511 assert(rq->ring == ring); 2512 if (__kgem_busy(kgem, rq->bo->handle)) 2513 break; 2514 2515 retired |= __kgem_retire_rq(kgem, rq); 2516 } 2517 2518#if HAS_DEBUG_FULL 2519 { 2520 struct kgem_bo *bo; 2521 int count = 0; 2522 2523 list_for_each_entry(bo, &kgem->requests[ring], request) 2524 count++; 2525 2526 bo = NULL; 2527 if (!list_is_empty(&kgem->requests[ring])) 2528 bo = list_first_entry(&kgem->requests[ring], 2529 struct kgem_request, 2530 list)->bo; 2531 2532 DBG(("%s: ring=%d, %d outstanding requests, oldest=%d\n", 2533 __FUNCTION__, ring, count, bo ? bo->handle : 0)); 2534 } 2535#endif 2536 2537 return retired; 2538} 2539 2540static bool kgem_retire__requests(struct kgem *kgem) 2541{ 2542 bool retired = false; 2543 int n; 2544 2545 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { 2546 retired |= kgem_retire__requests_ring(kgem, n); 2547 kgem->need_retire |= !list_is_empty(&kgem->requests[n]); 2548 } 2549 2550 return retired; 2551} 2552 2553bool kgem_retire(struct kgem *kgem) 2554{ 2555 bool retired = false; 2556 2557 DBG(("%s, need_retire?=%d\n", __FUNCTION__, kgem->need_retire)); 2558 2559 kgem->need_retire = false; 2560 2561 retired |= kgem_retire__flushing(kgem); 2562 retired |= kgem_retire__requests(kgem); 2563 2564 DBG(("%s -- retired=%d, need_retire=%d\n", 2565 __FUNCTION__, retired, kgem->need_retire)); 2566 2567 kgem->retire(kgem); 2568 2569 return retired; 2570} 2571 2572bool __kgem_ring_is_idle(struct kgem *kgem, int ring) 2573{ 2574 struct kgem_request *rq; 2575 2576 assert(ring < ARRAY_SIZE(kgem->requests)); 2577 assert(!list_is_empty(&kgem->requests[ring])); 2578 2579 rq = list_last_entry(&kgem->requests[ring], 2580 struct kgem_request, list); 2581 assert(rq->ring == ring); 2582 if (__kgem_busy(kgem, rq->bo->handle)) { 2583 DBG(("%s: last requests handle=%d still busy\n", 2584 __FUNCTION__, rq->bo->handle)); 2585 return false; 2586 } 2587 2588 DBG(("%s: ring=%d idle (handle=%d)\n", 2589 __FUNCTION__, ring, rq->bo->handle)); 2590 2591 kgem_retire__requests_ring(kgem, ring); 2592 2593 assert(list_is_empty(&kgem->requests[ring])); 2594 return true; 2595} 2596 2597#if 0 2598static void kgem_commit__check_reloc(struct kgem *kgem) 2599{ 2600 struct kgem_request *rq = kgem->next_request; 2601 struct kgem_bo *bo; 2602 bool has_64bit = kgem->gen >= 0100; 2603 int i; 2604 2605 for (i = 0; i < kgem->nreloc; i++) { 2606 list_for_each_entry(bo, &rq->buffers, request) { 2607 if (bo->target_handle == kgem->reloc[i].target_handle) { 2608 uint64_t value = 0; 2609 gem_read(kgem->fd, rq->bo->handle, &value, kgem->reloc[i].offset, has_64bit ? 8 : 4); 2610 assert(bo->exec->offset == -1 || value == bo->exec->offset + (int)kgem->reloc[i].delta); 2611 break; 2612 } 2613 } 2614 } 2615} 2616#else 2617#define kgem_commit__check_reloc(kgem) 2618#endif 2619 2620#ifndef NDEBUG 2621static void kgem_commit__check_buffers(struct kgem *kgem) 2622{ 2623 struct kgem_buffer *bo; 2624 2625 list_for_each_entry(bo, &kgem->active_buffers, base.list) 2626 assert(bo->base.exec == NULL); 2627} 2628#else 2629#define kgem_commit__check_buffers(kgem) 2630#endif 2631 2632static void kgem_commit(struct kgem *kgem) 2633{ 2634 struct kgem_request *rq = kgem->next_request; 2635 struct kgem_bo *bo, *next; 2636 2637 kgem_commit__check_reloc(kgem); 2638 2639 list_for_each_entry_safe(bo, next, &rq->buffers, request) { 2640 assert(next->request.prev == &bo->request); 2641 2642 DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n", 2643 __FUNCTION__, bo->handle, bo->proxy != NULL, 2644 bo->gpu_dirty, bo->needs_flush, bo->snoop, 2645 (unsigned)bo->exec->offset)); 2646 2647 assert(bo->exec); 2648 assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec); 2649 assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq)); 2650 2651 bo->presumed_offset = bo->exec->offset; 2652 bo->exec = NULL; 2653 bo->target_handle = -1; 2654 2655 if (!bo->refcnt && !bo->reusable) { 2656 assert(!bo->snoop); 2657 assert(!bo->proxy); 2658 kgem_bo_free(kgem, bo); 2659 continue; 2660 } 2661 2662 bo->binding.offset = 0; 2663 bo->domain = DOMAIN_GPU; 2664 bo->gpu_dirty = false; 2665 2666 if (bo->proxy) { 2667 /* proxies are not used for domain tracking */ 2668 __kgem_bo_clear_busy(bo); 2669 } 2670 2671 kgem->scanout_busy |= bo->scanout && bo->needs_flush; 2672 } 2673 2674 if (rq == &kgem->static_request) { 2675 struct drm_i915_gem_set_domain set_domain; 2676 2677 DBG(("%s: syncing due to allocation failure\n", __FUNCTION__)); 2678 2679 VG_CLEAR(set_domain); 2680 set_domain.handle = rq->bo->handle; 2681 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 2682 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 2683 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 2684 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 2685 kgem_throttle(kgem); 2686 } 2687 2688 kgem_retire(kgem); 2689 assert(list_is_empty(&rq->buffers)); 2690 2691 assert(rq->bo->map__gtt == NULL); 2692 assert(rq->bo->map__cpu == NULL); 2693 gem_close(kgem->fd, rq->bo->handle); 2694 kgem_cleanup_cache(kgem); 2695 } else { 2696 assert(rq->ring < ARRAY_SIZE(kgem->requests)); 2697 list_add_tail(&rq->list, &kgem->requests[rq->ring]); 2698 kgem->need_throttle = kgem->need_retire = 1; 2699 } 2700 2701 kgem->next_request = NULL; 2702 2703 kgem_commit__check_buffers(kgem); 2704} 2705 2706static void kgem_close_list(struct kgem *kgem, struct list *head) 2707{ 2708 while (!list_is_empty(head)) 2709 kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list)); 2710} 2711 2712static void kgem_close_inactive(struct kgem *kgem) 2713{ 2714 unsigned int i; 2715 2716 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) 2717 kgem_close_list(kgem, &kgem->inactive[i]); 2718} 2719 2720static void kgem_finish_buffers(struct kgem *kgem) 2721{ 2722 struct kgem_buffer *bo, *next; 2723 2724 list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) { 2725 DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n", 2726 __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL, 2727 bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no", 2728 bo->base.refcnt)); 2729 2730 assert(next->base.list.prev == &bo->base.list); 2731 assert(bo->base.io); 2732 assert(bo->base.refcnt >= 1); 2733 2734 if (bo->base.refcnt > 1 && !bo->base.exec) { 2735 DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n", 2736 __FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt)); 2737 continue; 2738 } 2739 2740 if (!bo->write) { 2741 assert(bo->base.exec || bo->base.refcnt > 1); 2742 goto decouple; 2743 } 2744 2745 if (bo->mmapped) { 2746 uint32_t used; 2747 2748 assert(!bo->need_io); 2749 2750 used = ALIGN(bo->used, PAGE_SIZE); 2751 if (!DBG_NO_UPLOAD_ACTIVE && 2752 used + PAGE_SIZE <= bytes(&bo->base) && 2753 (kgem->has_llc || bo->mmapped == MMAPPED_GTT || bo->base.snoop)) { 2754 DBG(("%s: retaining upload buffer (%d/%d): used=%d, refcnt=%d\n", 2755 __FUNCTION__, bo->used, bytes(&bo->base), used, bo->base.refcnt)); 2756 bo->used = used; 2757 list_move(&bo->base.list, 2758 &kgem->active_buffers); 2759 kgem->need_retire = true; 2760 continue; 2761 } 2762 DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n", 2763 __FUNCTION__, bo->used, bo->mmapped)); 2764 goto decouple; 2765 } 2766 2767 if (!bo->used || !bo->base.exec) { 2768 /* Unless we replace the handle in the execbuffer, 2769 * then this bo will become active. So decouple it 2770 * from the buffer list and track it in the normal 2771 * manner. 2772 */ 2773 goto decouple; 2774 } 2775 2776 assert(bo->need_io); 2777 assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); 2778 assert(bo->base.domain != DOMAIN_GPU); 2779 2780 if (bo->base.refcnt == 1 && 2781 bo->base.size.pages.count > 1 && 2782 bo->used < bytes(&bo->base) / 2) { 2783 struct kgem_bo *shrink; 2784 unsigned alloc = NUM_PAGES(bo->used); 2785 2786 shrink = search_snoop_cache(kgem, alloc, 2787 CREATE_INACTIVE | CREATE_NO_RETIRE); 2788 if (shrink) { 2789 void *map; 2790 int n; 2791 2792 DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", 2793 __FUNCTION__, 2794 bo->used, bytes(&bo->base), bytes(shrink), 2795 bo->base.handle, shrink->handle)); 2796 2797 assert(bo->used <= bytes(shrink)); 2798 map = kgem_bo_map__cpu(kgem, shrink); 2799 if (map) { 2800 kgem_bo_sync__cpu(kgem, shrink); 2801 memcpy(map, bo->mem, bo->used); 2802 2803 shrink->target_handle = 2804 kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; 2805 for (n = 0; n < kgem->nreloc; n++) { 2806 if (kgem->reloc[n].target_handle == bo->base.target_handle) { 2807 kgem->reloc[n].target_handle = shrink->target_handle; 2808 kgem->reloc[n].presumed_offset = shrink->presumed_offset; 2809 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = 2810 kgem->reloc[n].delta + shrink->presumed_offset; 2811 } 2812 } 2813 2814 bo->base.exec->handle = shrink->handle; 2815 bo->base.exec->offset = shrink->presumed_offset; 2816 shrink->exec = bo->base.exec; 2817 shrink->rq = bo->base.rq; 2818 list_replace(&bo->base.request, 2819 &shrink->request); 2820 list_init(&bo->base.request); 2821 shrink->needs_flush = bo->base.gpu_dirty; 2822 2823 bo->base.exec = NULL; 2824 bo->base.rq = NULL; 2825 bo->base.gpu_dirty = false; 2826 bo->base.needs_flush = false; 2827 bo->used = 0; 2828 2829 goto decouple; 2830 } 2831 2832 __kgem_bo_destroy(kgem, shrink); 2833 } 2834 2835 shrink = search_linear_cache(kgem, alloc, 2836 CREATE_INACTIVE | CREATE_NO_RETIRE); 2837 if (shrink) { 2838 int n; 2839 2840 DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", 2841 __FUNCTION__, 2842 bo->used, bytes(&bo->base), bytes(shrink), 2843 bo->base.handle, shrink->handle)); 2844 2845 assert(bo->used <= bytes(shrink)); 2846 if (gem_write__cachealigned(kgem->fd, shrink->handle, 2847 0, bo->used, bo->mem) == 0) { 2848 shrink->target_handle = 2849 kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; 2850 for (n = 0; n < kgem->nreloc; n++) { 2851 if (kgem->reloc[n].target_handle == bo->base.target_handle) { 2852 kgem->reloc[n].target_handle = shrink->target_handle; 2853 kgem->reloc[n].presumed_offset = shrink->presumed_offset; 2854 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = 2855 kgem->reloc[n].delta + shrink->presumed_offset; 2856 } 2857 } 2858 2859 bo->base.exec->handle = shrink->handle; 2860 bo->base.exec->offset = shrink->presumed_offset; 2861 shrink->exec = bo->base.exec; 2862 shrink->rq = bo->base.rq; 2863 list_replace(&bo->base.request, 2864 &shrink->request); 2865 list_init(&bo->base.request); 2866 shrink->needs_flush = bo->base.gpu_dirty; 2867 2868 bo->base.exec = NULL; 2869 bo->base.rq = NULL; 2870 bo->base.gpu_dirty = false; 2871 bo->base.needs_flush = false; 2872 bo->used = 0; 2873 2874 goto decouple; 2875 } 2876 2877 __kgem_bo_destroy(kgem, shrink); 2878 } 2879 } 2880 2881 DBG(("%s: handle=%d, uploading %d/%d\n", 2882 __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base))); 2883 ASSERT_IDLE(kgem, bo->base.handle); 2884 assert(bo->used <= bytes(&bo->base)); 2885 gem_write__cachealigned(kgem->fd, bo->base.handle, 2886 0, bo->used, bo->mem); 2887 bo->need_io = 0; 2888 2889decouple: 2890 DBG(("%s: releasing handle=%d\n", 2891 __FUNCTION__, bo->base.handle)); 2892 list_del(&bo->base.list); 2893 kgem_bo_unref(kgem, &bo->base); 2894 } 2895} 2896 2897static void kgem_cleanup(struct kgem *kgem) 2898{ 2899 int n; 2900 2901 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { 2902 while (!list_is_empty(&kgem->requests[n])) { 2903 struct kgem_request *rq; 2904 2905 rq = list_first_entry(&kgem->requests[n], 2906 struct kgem_request, 2907 list); 2908 assert(rq->ring == n); 2909 while (!list_is_empty(&rq->buffers)) { 2910 struct kgem_bo *bo; 2911 2912 bo = list_first_entry(&rq->buffers, 2913 struct kgem_bo, 2914 request); 2915 2916 bo->exec = NULL; 2917 bo->gpu_dirty = false; 2918 __kgem_bo_clear_busy(bo); 2919 if (bo->refcnt == 0) 2920 kgem_bo_free(kgem, bo); 2921 } 2922 2923 __kgem_request_free(rq); 2924 } 2925 } 2926 2927 kgem_close_inactive(kgem); 2928} 2929 2930static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size) 2931{ 2932 int ret; 2933 2934 ASSERT_IDLE(kgem, handle); 2935 2936#if DBG_NO_EXEC 2937 { 2938 uint32_t batch[] = { MI_BATCH_BUFFER_END, 0}; 2939 return gem_write(kgem->fd, handle, 0, sizeof(batch), batch); 2940 } 2941#endif 2942 2943 2944retry: 2945 /* If there is no surface data, just upload the batch */ 2946 if (kgem->surface == kgem->batch_size) { 2947 if ((ret = gem_write__cachealigned(kgem->fd, handle, 2948 0, sizeof(uint32_t)*kgem->nbatch, 2949 kgem->batch)) == 0) 2950 return 0; 2951 2952 goto expire; 2953 } 2954 2955 /* Are the batch pages conjoint with the surface pages? */ 2956 if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) { 2957 assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t))); 2958 if ((ret = gem_write__cachealigned(kgem->fd, handle, 2959 0, kgem->batch_size*sizeof(uint32_t), 2960 kgem->batch)) == 0) 2961 return 0; 2962 2963 goto expire; 2964 } 2965 2966 /* Disjoint surface/batch, upload separately */ 2967 if ((ret = gem_write__cachealigned(kgem->fd, handle, 2968 0, sizeof(uint32_t)*kgem->nbatch, 2969 kgem->batch))) 2970 goto expire; 2971 2972 ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size); 2973 ret -= sizeof(uint32_t) * kgem->surface; 2974 assert(size-ret >= kgem->nbatch*sizeof(uint32_t)); 2975 if (gem_write(kgem->fd, handle, 2976 size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t), 2977 kgem->batch + kgem->surface)) 2978 goto expire; 2979 2980 return 0; 2981 2982expire: 2983 assert(ret != EINVAL); 2984 2985 (void)__kgem_throttle_retire(kgem, 0); 2986 if (kgem_expire_cache(kgem)) 2987 goto retry; 2988 2989 if (kgem_cleanup_cache(kgem)) 2990 goto retry; 2991 2992 ERR(("%s: failed to write batch (handle=%d): %d\n", 2993 __FUNCTION__, handle, -ret)); 2994 return ret; 2995} 2996 2997void kgem_reset(struct kgem *kgem) 2998{ 2999 if (kgem->next_request) { 3000 struct kgem_request *rq = kgem->next_request; 3001 3002 while (!list_is_empty(&rq->buffers)) { 3003 struct kgem_bo *bo = 3004 list_first_entry(&rq->buffers, 3005 struct kgem_bo, 3006 request); 3007 list_del(&bo->request); 3008 3009 assert(RQ(bo->rq) == rq); 3010 3011 bo->binding.offset = 0; 3012 bo->exec = NULL; 3013 bo->target_handle = -1; 3014 bo->gpu_dirty = false; 3015 3016 if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) { 3017 assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); 3018 list_add(&bo->request, &kgem->flushing); 3019 bo->rq = (void *)kgem; 3020 kgem->need_retire = true; 3021 } else 3022 __kgem_bo_clear_busy(bo); 3023 3024 if (bo->refcnt || bo->rq) 3025 continue; 3026 3027 kgem_bo_move_to_cache(kgem, bo); 3028 } 3029 3030 if (rq != &kgem->static_request) { 3031 list_init(&rq->list); 3032 __kgem_request_free(rq); 3033 } 3034 } 3035 3036 kgem->nfence = 0; 3037 kgem->nexec = 0; 3038 kgem->nreloc = 0; 3039 kgem->nreloc__self = 0; 3040 kgem->aperture = 0; 3041 kgem->aperture_fenced = 0; 3042 kgem->aperture_max_fence = 0; 3043 kgem->nbatch = 0; 3044 kgem->surface = kgem->batch_size; 3045 kgem->mode = KGEM_NONE; 3046 kgem->needs_semaphore = false; 3047 kgem->needs_reservation = false; 3048 kgem->flush = 0; 3049 kgem->batch_flags = kgem->batch_flags_base; 3050 3051 kgem->next_request = __kgem_request_alloc(kgem); 3052 3053 kgem_sna_reset(kgem); 3054} 3055 3056static int compact_batch_surface(struct kgem *kgem) 3057{ 3058 int size, shrink, n; 3059 3060 if (!kgem->has_relaxed_delta) 3061 return kgem->batch_size * sizeof(uint32_t); 3062 3063 /* See if we can pack the contents into one or two pages */ 3064 n = ALIGN(kgem->batch_size, 1024); 3065 size = n - kgem->surface + kgem->nbatch; 3066 size = ALIGN(size, 1024); 3067 3068 shrink = n - size; 3069 if (shrink) { 3070 DBG(("shrinking from %d to %d\n", kgem->batch_size, size)); 3071 3072 shrink *= sizeof(uint32_t); 3073 for (n = 0; n < kgem->nreloc; n++) { 3074 if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION && 3075 kgem->reloc[n].target_handle == ~0U) 3076 kgem->reloc[n].delta -= shrink; 3077 3078 if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) 3079 kgem->reloc[n].offset -= shrink; 3080 } 3081 } 3082 3083 return size * sizeof(uint32_t); 3084} 3085 3086static struct kgem_bo * 3087kgem_create_batch(struct kgem *kgem, int size) 3088{ 3089 struct drm_i915_gem_set_domain set_domain; 3090 struct kgem_bo *bo; 3091 3092 if (size <= 4096) { 3093 bo = list_first_entry(&kgem->pinned_batches[0], 3094 struct kgem_bo, 3095 list); 3096 if (!bo->rq) { 3097out_4096: 3098 assert(bo->refcnt > 0); 3099 list_move_tail(&bo->list, &kgem->pinned_batches[0]); 3100 return kgem_bo_reference(bo); 3101 } 3102 3103 if (!__kgem_busy(kgem, bo->handle)) { 3104 __kgem_retire_rq(kgem, RQ(bo->rq)); 3105 goto out_4096; 3106 } 3107 } 3108 3109 if (size <= 16384) { 3110 bo = list_first_entry(&kgem->pinned_batches[1], 3111 struct kgem_bo, 3112 list); 3113 if (!bo->rq) { 3114out_16384: 3115 assert(bo->refcnt > 0); 3116 list_move_tail(&bo->list, &kgem->pinned_batches[1]); 3117 return kgem_bo_reference(bo); 3118 } 3119 3120 if (!__kgem_busy(kgem, bo->handle)) { 3121 __kgem_retire_rq(kgem, RQ(bo->rq)); 3122 goto out_16384; 3123 } 3124 } 3125 3126 if (kgem->gen == 020) { 3127 bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY); 3128 if (bo) 3129 return bo; 3130 3131 /* Nothing available for reuse, rely on the kernel wa */ 3132 if (kgem->has_pinned_batches) { 3133 bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY); 3134 if (bo) { 3135 kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED; 3136 return bo; 3137 } 3138 } 3139 3140 if (size < 16384) { 3141 bo = list_first_entry(&kgem->pinned_batches[size > 4096], 3142 struct kgem_bo, 3143 list); 3144 list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]); 3145 3146 DBG(("%s: syncing due to busy batches\n", __FUNCTION__)); 3147 3148 VG_CLEAR(set_domain); 3149 set_domain.handle = bo->handle; 3150 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 3151 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 3152 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 3153 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 3154 kgem_throttle(kgem); 3155 return NULL; 3156 } 3157 3158 kgem_retire(kgem); 3159 assert(bo->rq == NULL); 3160 return kgem_bo_reference(bo); 3161 } 3162 } 3163 3164 return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); 3165} 3166 3167#if !NDEBUG 3168static bool dump_file(const char *path) 3169{ 3170 FILE *file; 3171 size_t len = 0; 3172 char *line = NULL; 3173 3174 file = fopen(path, "r"); 3175 if (file == NULL) 3176 return false; 3177 3178 while (getline(&line, &len, file) != -1) 3179 ErrorF("%s", line); 3180 3181 free(line); 3182 fclose(file); 3183 return true; 3184} 3185 3186static void dump_debugfs(struct kgem *kgem, const char *name) 3187{ 3188 char path[80]; 3189 int minor = kgem_get_minor(kgem); 3190 3191 if (minor < 0) 3192 return; 3193 3194 sprintf(path, "/sys/kernel/debug/dri/%d/%s", minor, name); 3195 if (dump_file(path)) 3196 return; 3197 3198 sprintf(path, "/debug/dri/%d/%s", minor, name); 3199 if (dump_file(path)) 3200 return; 3201} 3202 3203static void dump_gtt_info(struct kgem *kgem) 3204{ 3205 dump_debugfs(kgem, "i915_gem_gtt"); 3206} 3207 3208static void dump_fence_regs(struct kgem *kgem) 3209{ 3210 dump_debugfs(kgem, "i915_gem_fence_regs"); 3211} 3212#endif 3213 3214static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf) 3215{ 3216 int ret; 3217 3218retry: 3219 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); 3220 if (ret == 0) 3221 return 0; 3222 3223 DBG(("%s: failed ret=%d, throttling and discarding cache\n", __FUNCTION__, ret)); 3224 (void)__kgem_throttle_retire(kgem, 0); 3225 if (kgem_expire_cache(kgem)) 3226 goto retry; 3227 3228 if (kgem_cleanup_cache(kgem)) 3229 goto retry; 3230 3231 /* last gasp */ 3232 return do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); 3233} 3234 3235void _kgem_submit(struct kgem *kgem) 3236{ 3237 struct kgem_request *rq; 3238 uint32_t batch_end; 3239 int size; 3240 3241 assert(!DBG_NO_HW); 3242 assert(!kgem->wedged); 3243 3244 assert(kgem->nbatch); 3245 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); 3246 assert(kgem->nbatch <= kgem->surface); 3247 3248 batch_end = kgem_end_batch(kgem); 3249 kgem_sna_flush(kgem); 3250 3251 DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d [fenced=%d]\n", 3252 kgem->mode, kgem->ring, kgem->batch_flags, 3253 batch_end, kgem->nbatch, kgem->surface, kgem->batch_size, 3254 kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced)); 3255 3256 assert(kgem->nbatch <= kgem->batch_size); 3257 assert(kgem->nbatch <= kgem->surface); 3258 assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc)); 3259 assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); 3260 assert(kgem->nfence <= kgem->fence_max); 3261 3262 kgem_finish_buffers(kgem); 3263 3264#if SHOW_BATCH_BEFORE 3265 __kgem_batch_debug(kgem, batch_end); 3266#endif 3267 3268 rq = kgem->next_request; 3269 if (kgem->surface != kgem->batch_size) 3270 size = compact_batch_surface(kgem); 3271 else 3272 size = kgem->nbatch * sizeof(kgem->batch[0]); 3273 rq->bo = kgem_create_batch(kgem, size); 3274 if (rq->bo) { 3275 uint32_t handle = rq->bo->handle; 3276 int i; 3277 3278 assert(!rq->bo->needs_flush); 3279 3280 i = kgem->nexec++; 3281 kgem->exec[i].handle = handle; 3282 kgem->exec[i].relocation_count = kgem->nreloc; 3283 kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; 3284 kgem->exec[i].alignment = 0; 3285 kgem->exec[i].offset = rq->bo->presumed_offset; 3286 kgem->exec[i].flags = 0; 3287 kgem->exec[i].rsvd1 = 0; 3288 kgem->exec[i].rsvd2 = 0; 3289 3290 rq->bo->target_handle = kgem->has_handle_lut ? i : handle; 3291 rq->bo->exec = &kgem->exec[i]; 3292 rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */ 3293 list_add(&rq->bo->request, &rq->buffers); 3294 rq->ring = kgem->ring == KGEM_BLT; 3295 3296 kgem_fixup_self_relocs(kgem, rq->bo); 3297 3298 if (kgem_batch_write(kgem, handle, size) == 0) { 3299 struct drm_i915_gem_execbuffer2 execbuf; 3300 int ret; 3301 3302 memset(&execbuf, 0, sizeof(execbuf)); 3303 execbuf.buffers_ptr = (uintptr_t)kgem->exec; 3304 execbuf.buffer_count = kgem->nexec; 3305 execbuf.batch_len = batch_end*sizeof(uint32_t); 3306 execbuf.flags = kgem->ring | kgem->batch_flags; 3307 3308 if (DBG_DUMP) { 3309 int fd = open("/tmp/i915-batchbuffers.dump", 3310 O_WRONLY | O_CREAT | O_APPEND, 3311 0666); 3312 if (fd != -1) { 3313 ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); 3314 fd = close(fd); 3315 } 3316 } 3317 3318 ret = do_execbuf(kgem, &execbuf); 3319 if (DEBUG_SYNC && ret == 0) { 3320 struct drm_i915_gem_set_domain set_domain; 3321 3322 VG_CLEAR(set_domain); 3323 set_domain.handle = handle; 3324 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 3325 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 3326 3327 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); 3328 } 3329 if (ret < 0) { 3330 kgem_throttle(kgem); 3331 if (!kgem->wedged) { 3332 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, 3333 "Failed to submit rendering commands, disabling acceleration.\n"); 3334 kgem->wedged = true; 3335 } 3336 3337#if !NDEBUG 3338 ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", 3339 kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, 3340 kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); 3341 3342 for (i = 0; i < kgem->nexec; i++) { 3343 struct kgem_bo *bo, *found = NULL; 3344 3345 list_for_each_entry(bo, &kgem->next_request->buffers, request) { 3346 if (bo->handle == kgem->exec[i].handle) { 3347 found = bo; 3348 break; 3349 } 3350 } 3351 ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", 3352 i, 3353 kgem->exec[i].handle, 3354 (int)kgem->exec[i].offset, 3355 found ? kgem_bo_size(found) : -1, 3356 found ? found->tiling : -1, 3357 (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), 3358 found ? found->snoop : -1, 3359 found ? found->purged : -1); 3360 } 3361 for (i = 0; i < kgem->nreloc; i++) { 3362 ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", 3363 i, 3364 (int)kgem->reloc[i].offset, 3365 kgem->reloc[i].target_handle, 3366 kgem->reloc[i].delta, 3367 kgem->reloc[i].read_domains, 3368 kgem->reloc[i].write_domain, 3369 (int)kgem->reloc[i].presumed_offset); 3370 } 3371 3372 { 3373 struct drm_i915_gem_get_aperture aperture; 3374 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) 3375 ErrorF("Aperture size %lld, available %lld\n", 3376 (long long)aperture.aper_size, 3377 (long long)aperture.aper_available_size); 3378 } 3379 3380 if (ret == -ENOSPC) 3381 dump_gtt_info(kgem); 3382 if (ret == -EDEADLK) 3383 dump_fence_regs(kgem); 3384 3385 if (DEBUG_SYNC) { 3386 int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); 3387 if (fd != -1) { 3388 int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); 3389 assert(ignored == batch_end*sizeof(uint32_t)); 3390 close(fd); 3391 } 3392 3393 FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); 3394 } 3395#endif 3396 } 3397 } 3398 } 3399#if SHOW_BATCH_AFTER 3400 if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t) == 0)) 3401 __kgem_batch_debug(kgem, batch_end); 3402#endif 3403 kgem_commit(kgem); 3404 if (kgem->wedged) 3405 kgem_cleanup(kgem); 3406 3407 kgem_reset(kgem); 3408 3409 assert(kgem->next_request != NULL); 3410} 3411 3412static bool find_hang_state(struct kgem *kgem, char *path, int maxlen) 3413{ 3414 int minor = kgem_get_minor(kgem); 3415 3416 /* Search for our hang state in a few canonical locations. 3417 * In the unlikely event of having multiple devices, we 3418 * will need to check which minor actually corresponds to ours. 3419 */ 3420 3421 snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor); 3422 if (access(path, R_OK) == 0) 3423 return true; 3424 3425 snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor); 3426 if (access(path, R_OK) == 0) 3427 return true; 3428 3429 snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor); 3430 if (access(path, R_OK) == 0) 3431 return true; 3432 3433 path[0] = '\0'; 3434 return false; 3435} 3436 3437void kgem_throttle(struct kgem *kgem) 3438{ 3439 if (kgem->wedged) 3440 return; 3441 3442 kgem->wedged = __kgem_throttle(kgem, true); 3443 if (kgem->wedged) { 3444 static int once; 3445 char path[128]; 3446 3447 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, 3448 "Detected a hung GPU, disabling acceleration.\n"); 3449 if (!once && find_hang_state(kgem, path, sizeof(path))) { 3450 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, 3451 "When reporting this, please include %s and the full dmesg.\n", 3452 path); 3453 once = 1; 3454 } 3455 3456 kgem->need_throttle = false; 3457 } 3458} 3459 3460int kgem_is_wedged(struct kgem *kgem) 3461{ 3462 return __kgem_throttle(kgem, true); 3463} 3464 3465static void kgem_purge_cache(struct kgem *kgem) 3466{ 3467 struct kgem_bo *bo, *next; 3468 int i; 3469 3470 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { 3471 list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) { 3472 if (!kgem_bo_is_retained(kgem, bo)) { 3473 DBG(("%s: purging %d\n", 3474 __FUNCTION__, bo->handle)); 3475 kgem_bo_free(kgem, bo); 3476 } 3477 } 3478 } 3479 3480 kgem->need_purge = false; 3481} 3482 3483void kgem_clean_scanout_cache(struct kgem *kgem) 3484{ 3485 while (!list_is_empty(&kgem->scanout)) { 3486 struct kgem_bo *bo; 3487 3488 bo = list_first_entry(&kgem->scanout, struct kgem_bo, list); 3489 3490 assert(bo->scanout); 3491 assert(!bo->refcnt); 3492 assert(!bo->prime); 3493 assert(bo->proxy == NULL); 3494 3495 if (bo->exec || __kgem_busy(kgem, bo->handle)) 3496 break; 3497 3498 DBG(("%s: handle=%d, fb=%d (reusable=%d)\n", 3499 __FUNCTION__, bo->handle, bo->delta, bo->reusable)); 3500 list_del(&bo->list); 3501 3502 kgem_bo_rmfb(kgem, bo); 3503 bo->scanout = false; 3504 3505 if (!bo->purged) { 3506 bo->reusable = true; 3507 if (kgem->has_llc && 3508 !gem_set_caching(kgem->fd, bo->handle, SNOOPED)) 3509 bo->reusable = false; 3510 3511 } 3512 3513 __kgem_bo_destroy(kgem, bo); 3514 } 3515} 3516 3517void kgem_clean_large_cache(struct kgem *kgem) 3518{ 3519 while (!list_is_empty(&kgem->large_inactive)) { 3520 kgem_bo_free(kgem, 3521 list_first_entry(&kgem->large_inactive, 3522 struct kgem_bo, list)); 3523 3524 } 3525} 3526 3527bool kgem_expire_cache(struct kgem *kgem) 3528{ 3529 time_t now, expire; 3530 struct kgem_bo *bo; 3531 unsigned int size = 0, count = 0; 3532 bool idle; 3533 unsigned int i; 3534 3535 time(&now); 3536 3537 while (__kgem_freed_bo) { 3538 bo = __kgem_freed_bo; 3539 __kgem_freed_bo = *(struct kgem_bo **)bo; 3540 free(bo); 3541 } 3542 3543 while (__kgem_freed_request) { 3544 struct kgem_request *rq = __kgem_freed_request; 3545 __kgem_freed_request = *(struct kgem_request **)rq; 3546 free(rq); 3547 } 3548 3549 kgem_clean_large_cache(kgem); 3550 if (container_of(kgem, struct sna, kgem)->scrn->vtSema) 3551 kgem_clean_scanout_cache(kgem); 3552 3553 expire = 0; 3554 list_for_each_entry(bo, &kgem->snoop, list) { 3555 if (bo->delta) { 3556 expire = now - MAX_INACTIVE_TIME/2; 3557 break; 3558 } 3559 3560 bo->delta = now; 3561 } 3562 if (expire) { 3563 while (!list_is_empty(&kgem->snoop)) { 3564 bo = list_last_entry(&kgem->snoop, struct kgem_bo, list); 3565 3566 if (bo->delta > expire) 3567 break; 3568 3569 kgem_bo_free(kgem, bo); 3570 } 3571 } 3572#ifdef DEBUG_MEMORY 3573 { 3574 long snoop_size = 0; 3575 int snoop_count = 0; 3576 list_for_each_entry(bo, &kgem->snoop, list) 3577 snoop_count++, snoop_size += bytes(bo); 3578 DBG(("%s: still allocated %d bo, %ld bytes, in snoop cache\n", 3579 __FUNCTION__, snoop_count, snoop_size)); 3580 } 3581#endif 3582 3583 kgem_retire(kgem); 3584 if (kgem->wedged) 3585 kgem_cleanup(kgem); 3586 3587 kgem->expire(kgem); 3588 3589 if (kgem->need_purge) 3590 kgem_purge_cache(kgem); 3591 3592 if (kgem->need_retire) 3593 kgem_retire(kgem); 3594 3595 expire = 0; 3596 idle = true; 3597 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { 3598 idle &= list_is_empty(&kgem->inactive[i]); 3599 list_for_each_entry(bo, &kgem->inactive[i], list) { 3600 if (bo->delta) { 3601 expire = now - MAX_INACTIVE_TIME; 3602 break; 3603 } 3604 3605 bo->delta = now; 3606 } 3607 } 3608 if (expire == 0) { 3609 DBG(("%s: idle? %d\n", __FUNCTION__, idle)); 3610 kgem->need_expire = !idle; 3611 return false; 3612 } 3613 3614 idle = true; 3615 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { 3616 struct list preserve; 3617 3618 list_init(&preserve); 3619 while (!list_is_empty(&kgem->inactive[i])) { 3620 bo = list_last_entry(&kgem->inactive[i], 3621 struct kgem_bo, list); 3622 3623 if (bo->delta > expire) { 3624 idle = false; 3625 break; 3626 } 3627 3628 if (bo->map__cpu && bo->delta + MAP_PRESERVE_TIME > expire) { 3629 idle = false; 3630 list_move_tail(&bo->list, &preserve); 3631 } else { 3632 count++; 3633 size += bytes(bo); 3634 kgem_bo_free(kgem, bo); 3635 DBG(("%s: expiring %d\n", 3636 __FUNCTION__, bo->handle)); 3637 } 3638 } 3639 if (!list_is_empty(&preserve)) { 3640 preserve.prev->next = kgem->inactive[i].next; 3641 kgem->inactive[i].next->prev = preserve.prev; 3642 kgem->inactive[i].next = preserve.next; 3643 preserve.next->prev = &kgem->inactive[i]; 3644 } 3645 } 3646 3647#ifdef DEBUG_MEMORY 3648 { 3649 long inactive_size = 0; 3650 int inactive_count = 0; 3651 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) 3652 list_for_each_entry(bo, &kgem->inactive[i], list) 3653 inactive_count++, inactive_size += bytes(bo); 3654 DBG(("%s: still allocated %d bo, %ld bytes, in inactive cache\n", 3655 __FUNCTION__, inactive_count, inactive_size)); 3656 } 3657#endif 3658 3659 DBG(("%s: expired %d objects, %d bytes, idle? %d\n", 3660 __FUNCTION__, count, size, idle)); 3661 3662 kgem->need_expire = !idle; 3663 return count; 3664 (void)count; 3665 (void)size; 3666} 3667 3668bool kgem_cleanup_cache(struct kgem *kgem) 3669{ 3670 unsigned int i; 3671 int n; 3672 3673 /* sync to the most recent request */ 3674 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { 3675 if (!list_is_empty(&kgem->requests[n])) { 3676 struct kgem_request *rq; 3677 struct drm_i915_gem_set_domain set_domain; 3678 3679 rq = list_first_entry(&kgem->requests[n], 3680 struct kgem_request, 3681 list); 3682 3683 DBG(("%s: sync on cleanup\n", __FUNCTION__)); 3684 3685 VG_CLEAR(set_domain); 3686 set_domain.handle = rq->bo->handle; 3687 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 3688 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 3689 (void)do_ioctl(kgem->fd, 3690 DRM_IOCTL_I915_GEM_SET_DOMAIN, 3691 &set_domain); 3692 } 3693 } 3694 3695 kgem_retire(kgem); 3696 kgem_cleanup(kgem); 3697 3698 if (!kgem->need_expire) 3699 return false; 3700 3701 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { 3702 while (!list_is_empty(&kgem->inactive[i])) 3703 kgem_bo_free(kgem, 3704 list_last_entry(&kgem->inactive[i], 3705 struct kgem_bo, list)); 3706 } 3707 3708 kgem_clean_large_cache(kgem); 3709 kgem_clean_scanout_cache(kgem); 3710 3711 while (!list_is_empty(&kgem->snoop)) 3712 kgem_bo_free(kgem, 3713 list_last_entry(&kgem->snoop, 3714 struct kgem_bo, list)); 3715 3716 while (__kgem_freed_bo) { 3717 struct kgem_bo *bo = __kgem_freed_bo; 3718 __kgem_freed_bo = *(struct kgem_bo **)bo; 3719 free(bo); 3720 } 3721 3722 kgem->need_purge = false; 3723 kgem->need_expire = false; 3724 return true; 3725} 3726 3727static struct kgem_bo * 3728search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) 3729{ 3730 struct kgem_bo *bo, *first = NULL; 3731 bool use_active = (flags & CREATE_INACTIVE) == 0; 3732 struct list *cache; 3733 3734 DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n", 3735 __FUNCTION__, num_pages, flags, use_active, 3736 num_pages >= MAX_CACHE_SIZE / PAGE_SIZE, 3737 MAX_CACHE_SIZE / PAGE_SIZE)); 3738 3739 assert(num_pages); 3740 3741 if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) { 3742 DBG(("%s: searching large buffers\n", __FUNCTION__)); 3743retry_large: 3744 cache = use_active ? &kgem->large : &kgem->large_inactive; 3745 list_for_each_entry_safe(bo, first, cache, list) { 3746 assert(bo->refcnt == 0); 3747 assert(bo->reusable); 3748 assert(!bo->scanout); 3749 3750 if (num_pages > num_pages(bo)) 3751 goto discard; 3752 3753 if (bo->tiling != I915_TILING_NONE) { 3754 if (use_active) 3755 goto discard; 3756 3757 if (!gem_set_tiling(kgem->fd, bo->handle, 3758 I915_TILING_NONE, 0)) 3759 goto discard; 3760 3761 bo->tiling = I915_TILING_NONE; 3762 bo->pitch = 0; 3763 } 3764 3765 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) 3766 goto discard; 3767 3768 list_del(&bo->list); 3769 if (RQ(bo->rq) == (void *)kgem) { 3770 assert(bo->exec == NULL); 3771 list_del(&bo->request); 3772 } 3773 3774 bo->delta = 0; 3775 assert_tiling(kgem, bo); 3776 return bo; 3777 3778discard: 3779 if (!use_active) 3780 kgem_bo_free(kgem, bo); 3781 } 3782 3783 if (use_active) { 3784 use_active = false; 3785 goto retry_large; 3786 } 3787 3788 if (__kgem_throttle_retire(kgem, flags)) 3789 goto retry_large; 3790 3791 return NULL; 3792 } 3793 3794 if (!use_active && list_is_empty(inactive(kgem, num_pages))) { 3795 DBG(("%s: inactive and cache bucket empty\n", 3796 __FUNCTION__)); 3797 3798 if (flags & CREATE_NO_RETIRE) { 3799 DBG(("%s: can not retire\n", __FUNCTION__)); 3800 return NULL; 3801 } 3802 3803 if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) { 3804 DBG(("%s: active cache bucket empty\n", __FUNCTION__)); 3805 return NULL; 3806 } 3807 3808 if (!__kgem_throttle_retire(kgem, flags)) { 3809 DBG(("%s: nothing retired\n", __FUNCTION__)); 3810 return NULL; 3811 } 3812 3813 if (list_is_empty(inactive(kgem, num_pages))) { 3814 DBG(("%s: active cache bucket still empty after retire\n", 3815 __FUNCTION__)); 3816 return NULL; 3817 } 3818 } 3819 3820 if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { 3821 int for_cpu = !!(flags & CREATE_CPU_MAP); 3822 DBG(("%s: searching for inactive %s map\n", 3823 __FUNCTION__, for_cpu ? "cpu" : "gtt")); 3824 cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)]; 3825 list_for_each_entry(bo, cache, vma) { 3826 assert(for_cpu ? bo->map__cpu : bo->map__gtt); 3827 assert(bucket(bo) == cache_bucket(num_pages)); 3828 assert(bo->proxy == NULL); 3829 assert(bo->rq == NULL); 3830 assert(bo->exec == NULL); 3831 assert(!bo->scanout); 3832 3833 if (num_pages > num_pages(bo)) { 3834 DBG(("inactive too small: %d < %d\n", 3835 num_pages(bo), num_pages)); 3836 continue; 3837 } 3838 3839 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 3840 kgem_bo_free(kgem, bo); 3841 break; 3842 } 3843 3844 if (I915_TILING_NONE != bo->tiling && 3845 !gem_set_tiling(kgem->fd, bo->handle, 3846 I915_TILING_NONE, 0)) 3847 continue; 3848 3849 kgem_bo_remove_from_inactive(kgem, bo); 3850 assert(list_is_empty(&bo->vma)); 3851 assert(list_is_empty(&bo->list)); 3852 3853 bo->tiling = I915_TILING_NONE; 3854 bo->pitch = 0; 3855 bo->delta = 0; 3856 DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n", 3857 __FUNCTION__, bo->handle, num_pages(bo))); 3858 assert(use_active || bo->domain != DOMAIN_GPU); 3859 assert(!bo->needs_flush); 3860 assert_tiling(kgem, bo); 3861 ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); 3862 return bo; 3863 } 3864 3865 if (flags & CREATE_EXACT) 3866 return NULL; 3867 3868 if (flags & CREATE_CPU_MAP && !kgem->has_llc) 3869 return NULL; 3870 } 3871 3872 cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages); 3873 list_for_each_entry(bo, cache, list) { 3874 assert(bo->refcnt == 0); 3875 assert(bo->reusable); 3876 assert(!!bo->rq == !!use_active); 3877 assert(bo->proxy == NULL); 3878 assert(!bo->scanout); 3879 3880 if (num_pages > num_pages(bo)) 3881 continue; 3882 3883 if (use_active && 3884 kgem->gen <= 040 && 3885 bo->tiling != I915_TILING_NONE) 3886 continue; 3887 3888 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 3889 kgem_bo_free(kgem, bo); 3890 break; 3891 } 3892 3893 if (I915_TILING_NONE != bo->tiling) { 3894 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) 3895 continue; 3896 3897 if (first) 3898 continue; 3899 3900 if (!gem_set_tiling(kgem->fd, bo->handle, 3901 I915_TILING_NONE, 0)) 3902 continue; 3903 3904 bo->tiling = I915_TILING_NONE; 3905 bo->pitch = 0; 3906 } 3907 3908 if (bo->map__gtt || bo->map__cpu) { 3909 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { 3910 int for_cpu = !!(flags & CREATE_CPU_MAP); 3911 if (for_cpu ? bo->map__cpu : bo->map__gtt){ 3912 if (first != NULL) 3913 break; 3914 3915 first = bo; 3916 continue; 3917 } 3918 } else { 3919 if (first != NULL) 3920 break; 3921 3922 first = bo; 3923 continue; 3924 } 3925 } else { 3926 if (flags & CREATE_GTT_MAP && !kgem_bo_can_map(kgem, bo)) 3927 continue; 3928 3929 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { 3930 if (first != NULL) 3931 break; 3932 3933 first = bo; 3934 continue; 3935 } 3936 } 3937 3938 if (use_active) 3939 kgem_bo_remove_from_active(kgem, bo); 3940 else 3941 kgem_bo_remove_from_inactive(kgem, bo); 3942 3943 assert(bo->tiling == I915_TILING_NONE); 3944 bo->pitch = 0; 3945 bo->delta = 0; 3946 DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", 3947 __FUNCTION__, bo->handle, num_pages(bo), 3948 use_active ? "active" : "inactive")); 3949 assert(list_is_empty(&bo->list)); 3950 assert(list_is_empty(&bo->vma)); 3951 assert(use_active || bo->domain != DOMAIN_GPU); 3952 assert(!bo->needs_flush || use_active); 3953 assert_tiling(kgem, bo); 3954 ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); 3955 return bo; 3956 } 3957 3958 if (first) { 3959 assert(first->tiling == I915_TILING_NONE); 3960 3961 if (use_active) 3962 kgem_bo_remove_from_active(kgem, first); 3963 else 3964 kgem_bo_remove_from_inactive(kgem, first); 3965 3966 first->pitch = 0; 3967 first->delta = 0; 3968 DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n", 3969 __FUNCTION__, first->handle, num_pages(first), 3970 use_active ? "active" : "inactive")); 3971 assert(list_is_empty(&first->list)); 3972 assert(list_is_empty(&first->vma)); 3973 assert(use_active || first->domain != DOMAIN_GPU); 3974 assert(!first->needs_flush || use_active); 3975 ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active); 3976 return first; 3977 } 3978 3979 return NULL; 3980} 3981 3982struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) 3983{ 3984 struct drm_gem_open open_arg; 3985 struct drm_i915_gem_get_tiling tiling; 3986 struct kgem_bo *bo; 3987 3988 DBG(("%s(name=%d)\n", __FUNCTION__, name)); 3989 3990 VG_CLEAR(open_arg); 3991 open_arg.name = name; 3992 if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) 3993 return NULL; 3994 3995 DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle)); 3996 3997 VG_CLEAR(tiling); 3998 tiling.handle = open_arg.handle; 3999 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) { 4000 DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno)); 4001 gem_close(kgem->fd, open_arg.handle); 4002 return NULL; 4003 } 4004 4005 DBG(("%s: handle=%d, tiling=%d\n", __FUNCTION__, tiling.handle, tiling.tiling_mode)); 4006 4007 bo = __kgem_bo_alloc(open_arg.handle, open_arg.size / PAGE_SIZE); 4008 if (bo == NULL) { 4009 gem_close(kgem->fd, open_arg.handle); 4010 return NULL; 4011 } 4012 4013 bo->unique_id = kgem_get_unique_id(kgem); 4014 bo->tiling = tiling.tiling_mode; 4015 bo->reusable = false; 4016 bo->prime = true; 4017 bo->purged = true; /* no coherency guarantees */ 4018 4019 debug_alloc__bo(kgem, bo); 4020 return bo; 4021} 4022 4023struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size) 4024{ 4025#ifdef DRM_IOCTL_PRIME_FD_TO_HANDLE 4026 struct drm_prime_handle args; 4027 struct drm_i915_gem_get_tiling tiling; 4028 struct local_i915_gem_caching caching; 4029 struct kgem_bo *bo; 4030 off_t seek; 4031 4032 DBG(("%s(name=%d)\n", __FUNCTION__, name)); 4033 4034 VG_CLEAR(args); 4035 args.fd = name; 4036 args.flags = 0; 4037 if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args)) { 4038 DBG(("%s(name=%d) fd-to-handle failed, ret=%d\n", __FUNCTION__, name, errno)); 4039 return NULL; 4040 } 4041 4042 VG_CLEAR(tiling); 4043 tiling.handle = args.handle; 4044 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) { 4045 DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno)); 4046 gem_close(kgem->fd, args.handle); 4047 return NULL; 4048 } 4049 4050 /* Query actual size, overriding specified if available */ 4051 seek = lseek(args.fd, 0, SEEK_END); 4052 DBG(("%s: estimated size=%ld, actual=%lld\n", 4053 __FUNCTION__, (long)size, (long long)seek)); 4054 if (seek != -1) { 4055 if (size > seek) { 4056 DBG(("%s(name=%d) estimated required size [%d] is larger than actual [%ld]\n", __FUNCTION__, name, size, (long)seek)); 4057 gem_close(kgem->fd, args.handle); 4058 return NULL; 4059 } 4060 size = seek; 4061 } 4062 4063 DBG(("%s: new handle=%d, tiling=%d\n", __FUNCTION__, 4064 args.handle, tiling.tiling_mode)); 4065 bo = __kgem_bo_alloc(args.handle, NUM_PAGES(size)); 4066 if (bo == NULL) { 4067 gem_close(kgem->fd, args.handle); 4068 return NULL; 4069 } 4070 4071 bo->unique_id = kgem_get_unique_id(kgem); 4072 bo->tiling = tiling.tiling_mode; 4073 bo->reusable = false; 4074 bo->prime = true; 4075 bo->domain = DOMAIN_NONE; 4076 4077 /* is this a special bo (e.g. scanout or CPU coherent)? */ 4078 4079 VG_CLEAR(caching); 4080 caching.handle = args.handle; 4081 caching.caching = kgem->has_llc; 4082 (void)drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &caching); 4083 DBG(("%s: imported handle=%d has caching %d\n", __FUNCTION__, args.handle, caching.caching)); 4084 switch (caching.caching) { 4085 case 0: 4086 if (kgem->has_llc) { 4087 DBG(("%s: interpreting handle=%d as a foreign scanout\n", 4088 __FUNCTION__, args.handle)); 4089 bo->scanout = true; 4090 } 4091 break; 4092 case 1: 4093 if (!kgem->has_llc) { 4094 DBG(("%s: interpreting handle=%d as a foreign snooped buffer\n", 4095 __FUNCTION__, args.handle)); 4096 bo->snoop = true; 4097 if (bo->tiling) { 4098 DBG(("%s: illegal snooped tiled buffer\n", __FUNCTION__)); 4099 kgem_bo_free(kgem, bo); 4100 return NULL; 4101 } 4102 } 4103 break; 4104 case 2: 4105 DBG(("%s: interpreting handle=%d as a foreign scanout\n", 4106 __FUNCTION__, args.handle)); 4107 bo->scanout = true; 4108 break; 4109 } 4110 4111 debug_alloc__bo(kgem, bo); 4112 return bo; 4113#else 4114 return NULL; 4115#endif 4116} 4117 4118int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo) 4119{ 4120#if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC) 4121 struct drm_prime_handle args; 4122 4123 VG_CLEAR(args); 4124 args.handle = bo->handle; 4125 args.flags = O_CLOEXEC; 4126 4127 if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args)) 4128 return -1; 4129 4130 bo->reusable = false; 4131 return args.fd; 4132#else 4133 return -1; 4134#endif 4135} 4136 4137struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) 4138{ 4139 struct kgem_bo *bo; 4140 uint32_t handle; 4141 4142 DBG(("%s(%d)\n", __FUNCTION__, size)); 4143 assert(size); 4144 4145 if (flags & CREATE_GTT_MAP && kgem->has_llc) { 4146 flags &= ~CREATE_GTT_MAP; 4147 flags |= CREATE_CPU_MAP; 4148 } 4149 4150 size = NUM_PAGES(size); 4151 bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); 4152 if (bo) { 4153 assert(bo->domain != DOMAIN_GPU); 4154 ASSERT_IDLE(kgem, bo->handle); 4155 bo->refcnt = 1; 4156 return bo; 4157 } 4158 4159 if (flags & CREATE_CACHED) 4160 return NULL; 4161 4162 handle = gem_create(kgem->fd, size); 4163 if (handle == 0) 4164 return NULL; 4165 4166 DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size)); 4167 bo = __kgem_bo_alloc(handle, size); 4168 if (bo == NULL) { 4169 gem_close(kgem->fd, handle); 4170 return NULL; 4171 } 4172 4173 debug_alloc__bo(kgem, bo); 4174 return bo; 4175} 4176 4177int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp) 4178{ 4179 if (DBG_NO_TILING) 4180 return tiling < 0 ? tiling : I915_TILING_NONE; 4181 4182 if (kgem->gen < 040) { 4183 if (tiling && width * bpp > 8192 * 8) { 4184 DBG(("%s: pitch too large for tliing [%d]\n", 4185 __FUNCTION__, width*bpp/8)); 4186 tiling = I915_TILING_NONE; 4187 goto done; 4188 } 4189 } else { 4190 if (width*bpp > (MAXSHORT-512) * 8) { 4191 if (tiling > 0) 4192 tiling = -tiling; 4193 else if (tiling == 0) 4194 tiling = -I915_TILING_X; 4195 DBG(("%s: large pitch [%d], forcing TILING [%d]\n", 4196 __FUNCTION__, width*bpp/8, tiling)); 4197 } else if (tiling && (width|height) > 8192) { 4198 DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n", 4199 __FUNCTION__, width, height)); 4200 tiling = -I915_TILING_X; 4201 } 4202 4203 /* fences limited to 128k (256k on ivb) */ 4204 assert(width * bpp <= 128 * 1024 * 8); 4205 } 4206 4207 if (tiling < 0) 4208 return tiling; 4209 4210 if (tiling == I915_TILING_Y && !kgem->can_render_y) 4211 tiling = I915_TILING_X; 4212 4213 if (tiling && (height == 1 || width == 1)) { 4214 DBG(("%s: disabling tiling [%dx%d] for single row/col\n", 4215 __FUNCTION__,width, height)); 4216 tiling = I915_TILING_NONE; 4217 goto done; 4218 } 4219 if (tiling == I915_TILING_Y && height <= 16) { 4220 DBG(("%s: too short [%d] for TILING_Y\n", 4221 __FUNCTION__,height)); 4222 tiling = I915_TILING_X; 4223 } 4224 if (tiling && width * bpp > 8 * (4096 - 64)) { 4225 DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n", 4226 __FUNCTION__, 4227 width, height, width*bpp/8, 4228 tiling)); 4229 return -tiling; 4230 } 4231 if (tiling == I915_TILING_X && height < 4) { 4232 DBG(("%s: too short [%d] for TILING_X\n", 4233 __FUNCTION__, height)); 4234 tiling = I915_TILING_NONE; 4235 goto done; 4236 } 4237 4238 if (tiling == I915_TILING_X && width * bpp <= 8*512) { 4239 DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n", 4240 __FUNCTION__, width, bpp)); 4241 tiling = I915_TILING_NONE; 4242 goto done; 4243 } 4244 if (tiling == I915_TILING_Y && width * bpp < 8*128) { 4245 DBG(("%s: too thin [%d] for TILING_Y\n", 4246 __FUNCTION__, width)); 4247 tiling = I915_TILING_NONE; 4248 goto done; 4249 } 4250 4251 if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) { 4252 DBG(("%s: too small [%d bytes] for TILING_%c\n", __FUNCTION__, 4253 ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8, 4254 tiling == I915_TILING_X ? 'X' : 'Y')); 4255 tiling = I915_TILING_NONE; 4256 goto done; 4257 } 4258 4259 if (tiling && width * bpp >= 8 * 4096 / 2) { 4260 DBG(("%s: TLB near-miss between lines %dx%d (pitch=%d), forcing tiling %d\n", 4261 __FUNCTION__, 4262 width, height, width*bpp/8, 4263 tiling)); 4264 return -tiling; 4265 } 4266 4267done: 4268 DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling)); 4269 return tiling; 4270} 4271 4272static int bits_per_pixel(int depth) 4273{ 4274 switch (depth) { 4275 case 8: return 8; 4276 case 15: 4277 case 16: return 16; 4278 case 24: 4279 case 30: 4280 case 32: return 32; 4281 default: return 0; 4282 } 4283} 4284 4285unsigned kgem_can_create_2d(struct kgem *kgem, 4286 int width, int height, int depth) 4287{ 4288 uint32_t pitch, size; 4289 unsigned flags = 0; 4290 int tiling; 4291 int bpp; 4292 4293 DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth)); 4294 4295 bpp = bits_per_pixel(depth); 4296 if (bpp == 0) { 4297 DBG(("%s: unhandled depth %d\n", __FUNCTION__, depth)); 4298 return 0; 4299 } 4300 4301 if (width > MAXSHORT || height > MAXSHORT) { 4302 DBG(("%s: unhandled size %dx%d\n", 4303 __FUNCTION__, width, height)); 4304 return 0; 4305 } 4306 4307 size = kgem_surface_size(kgem, false, 0, 4308 width, height, bpp, 4309 I915_TILING_NONE, &pitch); 4310 DBG(("%s: untiled size=%d\n", __FUNCTION__, size)); 4311 if (size > 0) { 4312 if (size <= kgem->max_cpu_size) 4313 flags |= KGEM_CAN_CREATE_CPU; 4314 if (size > 4096 && size <= kgem->max_gpu_size) 4315 flags |= KGEM_CAN_CREATE_GPU; 4316 if (size <= PAGE_SIZE*kgem->aperture_mappable/4) 4317 flags |= KGEM_CAN_CREATE_GTT; 4318 if (size > kgem->large_object_size) 4319 flags |= KGEM_CAN_CREATE_LARGE; 4320 if (size > kgem->max_object_size) { 4321 DBG(("%s: too large (untiled) %d > %d\n", 4322 __FUNCTION__, size, kgem->max_object_size)); 4323 return 0; 4324 } 4325 } 4326 4327 tiling = kgem_choose_tiling(kgem, I915_TILING_X, 4328 width, height, bpp); 4329 if (tiling != I915_TILING_NONE) { 4330 size = kgem_surface_size(kgem, false, 0, 4331 width, height, bpp, tiling, 4332 &pitch); 4333 DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size)); 4334 if (size > 0 && size <= kgem->max_gpu_size) 4335 flags |= KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED; 4336 if (size > 0 && size <= PAGE_SIZE*kgem->aperture_mappable/4) 4337 flags |= KGEM_CAN_CREATE_GTT; 4338 if (size > PAGE_SIZE*kgem->aperture_mappable/4) 4339 flags &= ~KGEM_CAN_CREATE_GTT; 4340 if (size > kgem->large_object_size) 4341 flags |= KGEM_CAN_CREATE_LARGE; 4342 if (size > kgem->max_object_size) { 4343 DBG(("%s: too large (tiled) %d > %d\n", 4344 __FUNCTION__, size, kgem->max_object_size)); 4345 return 0; 4346 } 4347 if (kgem->gen < 040) { 4348 int fence_size = 1024 * 1024; 4349 while (fence_size < size) 4350 fence_size <<= 1; 4351 if (fence_size > kgem->max_gpu_size) 4352 flags &= ~KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED; 4353 if (fence_size > PAGE_SIZE*kgem->aperture_fenceable/4) 4354 flags &= ~KGEM_CAN_CREATE_GTT; 4355 } 4356 } 4357 4358 return flags; 4359} 4360 4361inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) 4362{ 4363 unsigned int size; 4364 4365 assert(bo->tiling); 4366 assert_tiling(kgem, bo); 4367 assert(kgem->gen < 040); 4368 4369 if (kgem->gen < 030) 4370 size = 512 * 1024 / PAGE_SIZE; 4371 else 4372 size = 1024 * 1024 / PAGE_SIZE; 4373 while (size < num_pages(bo)) 4374 size <<= 1; 4375 4376 return size; 4377} 4378 4379static struct kgem_bo * 4380__kgem_bo_create_as_display(struct kgem *kgem, int size, int tiling, int pitch) 4381{ 4382 struct local_i915_gem_create2 args; 4383 struct kgem_bo *bo; 4384 4385 if (!kgem->has_create2) 4386 return NULL; 4387 4388 memset(&args, 0, sizeof(args)); 4389 args.size = size * PAGE_SIZE; 4390 args.placement = LOCAL_I915_CREATE_PLACEMENT_STOLEN; 4391 args.caching = DISPLAY; 4392 args.tiling_mode = tiling; 4393 args.stride = pitch; 4394 4395 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args)) { 4396 args.placement = LOCAL_I915_CREATE_PLACEMENT_SYSTEM; 4397 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args)) 4398 return NULL; 4399 } 4400 4401 bo = __kgem_bo_alloc(args.handle, size); 4402 if (bo == NULL) { 4403 gem_close(kgem->fd, args.handle); 4404 return NULL; 4405 } 4406 4407 bo->unique_id = kgem_get_unique_id(kgem); 4408 bo->tiling = tiling; 4409 bo->pitch = pitch; 4410 if (args.placement == LOCAL_I915_CREATE_PLACEMENT_STOLEN) { 4411 bo->purged = true; /* for asserts against CPU access */ 4412 } 4413 bo->reusable = false; /* so that unclaimed scanouts are freed */ 4414 bo->domain = DOMAIN_NONE; 4415 4416 if (__kgem_busy(kgem, bo->handle)) { 4417 assert(bo->exec == NULL); 4418 list_add(&bo->request, &kgem->flushing); 4419 bo->rq = (void *)kgem; 4420 kgem->need_retire = true; 4421 } 4422 4423 assert_tiling(kgem, bo); 4424 debug_alloc__bo(kgem, bo); 4425 4426 return bo; 4427} 4428 4429static void __kgem_bo_make_scanout(struct kgem *kgem, 4430 struct kgem_bo *bo, 4431 int width, int height) 4432{ 4433 ScrnInfoPtr scrn = 4434 container_of(kgem, struct sna, kgem)->scrn; 4435 struct drm_mode_fb_cmd arg; 4436 4437 assert(bo->proxy == NULL); 4438 4439 if (!scrn->vtSema) 4440 return; 4441 4442 DBG(("%s: create fb %dx%d@%d/%d\n", 4443 __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel)); 4444 4445 VG_CLEAR(arg); 4446 arg.width = width; 4447 arg.height = height; 4448 arg.pitch = bo->pitch; 4449 arg.bpp = scrn->bitsPerPixel; 4450 arg.depth = scrn->depth; 4451 arg.handle = bo->handle; 4452 4453 /* First move the scanout out of cached memory */ 4454 if (kgem->has_llc) { 4455 if (!gem_set_caching(kgem->fd, bo->handle, DISPLAY) && 4456 !gem_set_caching(kgem->fd, bo->handle, UNCACHED)) 4457 return; 4458 } 4459 4460 bo->scanout = true; 4461 4462 /* Then pre-emptively move the object into the mappable 4463 * portion to avoid rebinding later when busy. 4464 */ 4465 if (bo->map__gtt == NULL) 4466 bo->map__gtt = __kgem_bo_map__gtt(kgem, bo); 4467 if (bo->map__gtt) { 4468 *(uint32_t *)bo->map__gtt = 0; 4469 bo->domain = DOMAIN_GTT; 4470 } 4471 4472 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0) { 4473 DBG(("%s: attached fb=%d to handle=%d\n", 4474 __FUNCTION__, arg.fb_id, arg.handle)); 4475 bo->delta = arg.fb_id; 4476 } 4477} 4478 4479struct kgem_bo *kgem_create_2d(struct kgem *kgem, 4480 int width, 4481 int height, 4482 int bpp, 4483 int tiling, 4484 uint32_t flags) 4485{ 4486 struct list *cache; 4487 struct kgem_bo *bo; 4488 uint32_t pitch, tiled_height, size; 4489 uint32_t handle; 4490 int i, bucket, retry; 4491 bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT); 4492 4493 if (tiling < 0) 4494 exact = true, tiling = -tiling; 4495 4496 DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__, 4497 width, height, bpp, tiling, exact, 4498 !!(flags & CREATE_INACTIVE), 4499 !!(flags & CREATE_CPU_MAP), 4500 !!(flags & CREATE_GTT_MAP), 4501 !!(flags & CREATE_SCANOUT), 4502 !!(flags & CREATE_PRIME), 4503 !!(flags & CREATE_TEMPORARY))); 4504 4505 size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, 4506 width, height, bpp, tiling, &pitch); 4507 if (size == 0) { 4508 DBG(("%s: invalid surface size (too large?)\n", __FUNCTION__)); 4509 return NULL; 4510 } 4511 4512 size /= PAGE_SIZE; 4513 bucket = cache_bucket(size); 4514 4515 if (flags & CREATE_SCANOUT) { 4516 struct kgem_bo *last = NULL; 4517 4518 list_for_each_entry_reverse(bo, &kgem->scanout, list) { 4519 assert(bo->scanout); 4520 assert(!bo->flush); 4521 assert(!bo->refcnt); 4522 assert_tiling(kgem, bo); 4523 4524 if (size > num_pages(bo) || num_pages(bo) > 2*size) 4525 continue; 4526 4527 if (bo->tiling != tiling || bo->pitch != pitch) 4528 /* No tiling/pitch without recreating fb */ 4529 continue; 4530 4531 if (bo->delta && !check_scanout_size(kgem, bo, width, height)) 4532 continue; 4533 4534 if (flags & CREATE_INACTIVE && bo->rq) { 4535 last = bo; 4536 continue; 4537 } 4538 4539 list_del(&bo->list); 4540 4541 bo->unique_id = kgem_get_unique_id(kgem); 4542 DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4543 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4544 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4545 assert_tiling(kgem, bo); 4546 bo->refcnt = 1; 4547 return bo; 4548 } 4549 4550 if (last) { 4551 list_del(&last->list); 4552 4553 last->unique_id = kgem_get_unique_id(kgem); 4554 DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4555 last->pitch, last->tiling, last->handle, last->unique_id)); 4556 assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last)); 4557 assert_tiling(kgem, last); 4558 last->refcnt = 1; 4559 return last; 4560 } 4561 4562 if (container_of(kgem, struct sna, kgem)->scrn->vtSema) { 4563 ScrnInfoPtr scrn = container_of(kgem, struct sna, kgem)->scrn; 4564 4565 list_for_each_entry_reverse(bo, &kgem->scanout, list) { 4566 struct drm_mode_fb_cmd arg; 4567 4568 assert(bo->scanout); 4569 assert(!bo->refcnt); 4570 4571 if (size > num_pages(bo) || num_pages(bo) > 2*size) 4572 continue; 4573 4574 if (flags & CREATE_INACTIVE && bo->rq) 4575 continue; 4576 4577 list_del(&bo->list); 4578 4579 if (bo->tiling != tiling || bo->pitch != pitch) { 4580 if (bo->delta) { 4581 kgem_bo_rmfb(kgem, bo); 4582 bo->delta = 0; 4583 } 4584 4585 if (gem_set_tiling(kgem->fd, bo->handle, 4586 tiling, pitch)) { 4587 bo->tiling = tiling; 4588 bo->pitch = pitch; 4589 } else { 4590 kgem_bo_free(kgem, bo); 4591 break; 4592 } 4593 } 4594 4595 VG_CLEAR(arg); 4596 arg.width = width; 4597 arg.height = height; 4598 arg.pitch = bo->pitch; 4599 arg.bpp = scrn->bitsPerPixel; 4600 arg.depth = scrn->depth; 4601 arg.handle = bo->handle; 4602 4603 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg)) { 4604 kgem_bo_free(kgem, bo); 4605 break; 4606 } 4607 4608 bo->delta = arg.fb_id; 4609 bo->unique_id = kgem_get_unique_id(kgem); 4610 4611 DBG((" 2:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4612 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4613 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4614 assert_tiling(kgem, bo); 4615 bo->refcnt = 1; 4616 return bo; 4617 } 4618 } 4619 4620 bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch); 4621 if (bo) 4622 return bo; 4623 4624 flags |= CREATE_INACTIVE; 4625 } 4626 4627 if (bucket >= NUM_CACHE_BUCKETS) { 4628 DBG(("%s: large bo num pages=%d, bucket=%d\n", 4629 __FUNCTION__, size, bucket)); 4630 4631 if (flags & CREATE_INACTIVE) 4632 goto large_inactive; 4633 4634 tiled_height = kgem_aligned_height(kgem, height, tiling); 4635 4636 list_for_each_entry(bo, &kgem->large, list) { 4637 assert(!bo->purged); 4638 assert(!bo->scanout); 4639 assert(bo->refcnt == 0); 4640 assert(bo->reusable); 4641 assert_tiling(kgem, bo); 4642 4643 if (kgem->gen < 040) { 4644 if (bo->pitch < pitch) { 4645 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", 4646 bo->tiling, tiling, 4647 bo->pitch, pitch)); 4648 continue; 4649 } 4650 4651 if (bo->pitch * tiled_height > bytes(bo)) 4652 continue; 4653 } else { 4654 if (num_pages(bo) < size) 4655 continue; 4656 4657 if (bo->pitch != pitch || bo->tiling != tiling) { 4658 if (!gem_set_tiling(kgem->fd, bo->handle, 4659 tiling, pitch)) 4660 continue; 4661 4662 bo->pitch = pitch; 4663 bo->tiling = tiling; 4664 } 4665 } 4666 4667 kgem_bo_remove_from_active(kgem, bo); 4668 4669 bo->unique_id = kgem_get_unique_id(kgem); 4670 bo->delta = 0; 4671 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4672 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4673 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4674 assert_tiling(kgem, bo); 4675 bo->refcnt = 1; 4676 return bo; 4677 } 4678 4679large_inactive: 4680 __kgem_throttle_retire(kgem, flags); 4681 list_for_each_entry(bo, &kgem->large_inactive, list) { 4682 assert(bo->refcnt == 0); 4683 assert(bo->reusable); 4684 assert(!bo->scanout); 4685 assert_tiling(kgem, bo); 4686 4687 if (size > num_pages(bo)) 4688 continue; 4689 4690 if (bo->tiling != tiling || 4691 (tiling != I915_TILING_NONE && bo->pitch != pitch)) { 4692 if (!gem_set_tiling(kgem->fd, bo->handle, 4693 tiling, pitch)) 4694 continue; 4695 4696 bo->tiling = tiling; 4697 bo->pitch = pitch; 4698 } 4699 4700 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 4701 kgem_bo_free(kgem, bo); 4702 break; 4703 } 4704 4705 list_del(&bo->list); 4706 4707 assert(bo->domain != DOMAIN_GPU); 4708 bo->unique_id = kgem_get_unique_id(kgem); 4709 bo->pitch = pitch; 4710 bo->delta = 0; 4711 DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4712 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4713 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4714 assert_tiling(kgem, bo); 4715 bo->refcnt = 1; 4716 4717 if (flags & CREATE_SCANOUT) 4718 __kgem_bo_make_scanout(kgem, bo, width, height); 4719 4720 return bo; 4721 } 4722 4723 goto create; 4724 } 4725 4726 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { 4727 int for_cpu = !!(flags & CREATE_CPU_MAP); 4728 if (kgem->has_llc && tiling == I915_TILING_NONE) 4729 for_cpu = 1; 4730 /* We presume that we will need to upload to this bo, 4731 * and so would prefer to have an active VMA. 4732 */ 4733 cache = &kgem->vma[for_cpu].inactive[bucket]; 4734 do { 4735 list_for_each_entry(bo, cache, vma) { 4736 assert(bucket(bo) == bucket); 4737 assert(bo->refcnt == 0); 4738 assert(!bo->scanout); 4739 assert(for_cpu ? bo->map__cpu : bo->map__gtt); 4740 assert(bo->rq == NULL); 4741 assert(bo->exec == NULL); 4742 assert(list_is_empty(&bo->request)); 4743 assert(bo->flush == false); 4744 assert_tiling(kgem, bo); 4745 4746 if (size > num_pages(bo)) { 4747 DBG(("inactive too small: %d < %d\n", 4748 num_pages(bo), size)); 4749 continue; 4750 } 4751 4752 if (flags & UNCACHED && !kgem->has_llc && bo->domain != DOMAIN_CPU) 4753 continue; 4754 4755 if (bo->tiling != tiling || 4756 (tiling != I915_TILING_NONE && bo->pitch != pitch)) { 4757 if (bo->map__gtt || 4758 !gem_set_tiling(kgem->fd, bo->handle, 4759 tiling, pitch)) { 4760 DBG(("inactive GTT vma with wrong tiling: %d < %d\n", 4761 bo->tiling, tiling)); 4762 continue; 4763 } 4764 bo->tiling = tiling; 4765 bo->pitch = pitch; 4766 } 4767 4768 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 4769 kgem_bo_free(kgem, bo); 4770 break; 4771 } 4772 4773 assert(bo->tiling == tiling); 4774 bo->pitch = pitch; 4775 bo->delta = 0; 4776 bo->unique_id = kgem_get_unique_id(kgem); 4777 4778 kgem_bo_remove_from_inactive(kgem, bo); 4779 assert(list_is_empty(&bo->list)); 4780 assert(list_is_empty(&bo->vma)); 4781 4782 DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n", 4783 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4784 assert(bo->reusable); 4785 assert(bo->domain != DOMAIN_GPU); 4786 ASSERT_IDLE(kgem, bo->handle); 4787 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4788 assert_tiling(kgem, bo); 4789 bo->refcnt = 1; 4790 return bo; 4791 } 4792 } while (!list_is_empty(cache) && 4793 __kgem_throttle_retire(kgem, flags)); 4794 4795 if (flags & CREATE_CPU_MAP && !kgem->has_llc) { 4796 if (list_is_empty(&kgem->active[bucket][tiling]) && 4797 list_is_empty(&kgem->inactive[bucket])) 4798 flags &= ~CREATE_CACHED; 4799 4800 goto create; 4801 } 4802 } 4803 4804 if (flags & CREATE_INACTIVE) 4805 goto skip_active_search; 4806 4807 /* Best active match */ 4808 retry = NUM_CACHE_BUCKETS - bucket; 4809 if (retry > 3 && (flags & CREATE_TEMPORARY) == 0) 4810 retry = 3; 4811search_active: 4812 assert(bucket < NUM_CACHE_BUCKETS); 4813 cache = &kgem->active[bucket][tiling]; 4814 if (tiling) { 4815 tiled_height = kgem_aligned_height(kgem, height, tiling); 4816 list_for_each_entry(bo, cache, list) { 4817 assert(!bo->purged); 4818 assert(bo->refcnt == 0); 4819 assert(bucket(bo) == bucket); 4820 assert(bo->reusable); 4821 assert(bo->tiling == tiling); 4822 assert(bo->flush == false); 4823 assert(!bo->scanout); 4824 assert_tiling(kgem, bo); 4825 4826 if (kgem->gen < 040) { 4827 if (bo->pitch < pitch) { 4828 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", 4829 bo->tiling, tiling, 4830 bo->pitch, pitch)); 4831 continue; 4832 } 4833 4834 if (bo->pitch * tiled_height > bytes(bo)) 4835 continue; 4836 } else { 4837 if (num_pages(bo) < size) 4838 continue; 4839 4840 if (bo->pitch != pitch) { 4841 if (!gem_set_tiling(kgem->fd, 4842 bo->handle, 4843 tiling, pitch)) 4844 continue; 4845 4846 bo->pitch = pitch; 4847 } 4848 } 4849 4850 kgem_bo_remove_from_active(kgem, bo); 4851 4852 bo->unique_id = kgem_get_unique_id(kgem); 4853 bo->delta = 0; 4854 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4855 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4856 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4857 assert_tiling(kgem, bo); 4858 bo->refcnt = 1; 4859 return bo; 4860 } 4861 } else { 4862 list_for_each_entry(bo, cache, list) { 4863 assert(bucket(bo) == bucket); 4864 assert(!bo->purged); 4865 assert(bo->refcnt == 0); 4866 assert(bo->reusable); 4867 assert(!bo->scanout); 4868 assert(bo->tiling == tiling); 4869 assert(bo->flush == false); 4870 assert_tiling(kgem, bo); 4871 4872 if (num_pages(bo) < size) 4873 continue; 4874 4875 kgem_bo_remove_from_active(kgem, bo); 4876 4877 bo->pitch = pitch; 4878 bo->unique_id = kgem_get_unique_id(kgem); 4879 bo->delta = 0; 4880 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4881 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4882 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4883 assert_tiling(kgem, bo); 4884 bo->refcnt = 1; 4885 return bo; 4886 } 4887 } 4888 4889 if (kgem->gen >= 040) { 4890 for (i = I915_TILING_Y; i >= I915_TILING_NONE; i--) { 4891 cache = &kgem->active[bucket][i]; 4892 list_for_each_entry(bo, cache, list) { 4893 assert(!bo->purged); 4894 assert(bo->refcnt == 0); 4895 assert(bo->reusable); 4896 assert(!bo->scanout); 4897 assert(bo->flush == false); 4898 assert_tiling(kgem, bo); 4899 4900 if (num_pages(bo) < size) 4901 continue; 4902 4903 if (bo->tiling != tiling || 4904 (tiling != I915_TILING_NONE && bo->pitch != pitch)) { 4905 if (!gem_set_tiling(kgem->fd, 4906 bo->handle, 4907 tiling, pitch)) 4908 continue; 4909 } 4910 4911 kgem_bo_remove_from_active(kgem, bo); 4912 4913 bo->unique_id = kgem_get_unique_id(kgem); 4914 bo->pitch = pitch; 4915 bo->tiling = tiling; 4916 bo->delta = 0; 4917 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4918 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4919 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4920 assert_tiling(kgem, bo); 4921 bo->refcnt = 1; 4922 return bo; 4923 } 4924 } 4925 } else if (!exact) { /* allow an active near-miss? */ 4926 for (i = tiling; i >= I915_TILING_NONE; i--) { 4927 tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, 4928 width, height, bpp, tiling, &pitch); 4929 cache = active(kgem, tiled_height / PAGE_SIZE, i); 4930 tiled_height = kgem_aligned_height(kgem, height, i); 4931 list_for_each_entry(bo, cache, list) { 4932 assert(!bo->purged); 4933 assert(bo->refcnt == 0); 4934 assert(bo->reusable); 4935 assert(!bo->scanout); 4936 assert(bo->flush == false); 4937 assert_tiling(kgem, bo); 4938 4939 if (bo->tiling) { 4940 if (bo->pitch < pitch) { 4941 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", 4942 bo->tiling, tiling, 4943 bo->pitch, pitch)); 4944 continue; 4945 } 4946 } else 4947 bo->pitch = pitch; 4948 4949 if (bo->pitch * tiled_height > bytes(bo)) 4950 continue; 4951 4952 kgem_bo_remove_from_active(kgem, bo); 4953 4954 bo->unique_id = kgem_get_unique_id(kgem); 4955 bo->delta = 0; 4956 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4957 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4958 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4959 assert_tiling(kgem, bo); 4960 bo->refcnt = 1; 4961 return bo; 4962 } 4963 } 4964 } 4965 4966 if (--retry) { 4967 bucket++; 4968 goto search_active; 4969 } 4970 4971skip_active_search: 4972 bucket = cache_bucket(size); 4973 retry = NUM_CACHE_BUCKETS - bucket; 4974 if (retry > 3) 4975 retry = 3; 4976search_inactive: 4977 /* Now just look for a close match and prefer any currently active */ 4978 assert(bucket < NUM_CACHE_BUCKETS); 4979 cache = &kgem->inactive[bucket]; 4980 list_for_each_entry(bo, cache, list) { 4981 assert(bucket(bo) == bucket); 4982 assert(bo->reusable); 4983 assert(!bo->scanout); 4984 assert(bo->flush == false); 4985 assert_tiling(kgem, bo); 4986 4987 if (size > num_pages(bo)) { 4988 DBG(("inactive too small: %d < %d\n", 4989 num_pages(bo), size)); 4990 continue; 4991 } 4992 4993 if (bo->tiling != tiling || 4994 (tiling != I915_TILING_NONE && bo->pitch != pitch)) { 4995 if (!gem_set_tiling(kgem->fd, bo->handle, 4996 tiling, pitch)) 4997 continue; 4998 } 4999 5000 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 5001 kgem_bo_free(kgem, bo); 5002 break; 5003 } 5004 5005 kgem_bo_remove_from_inactive(kgem, bo); 5006 assert(list_is_empty(&bo->list)); 5007 assert(list_is_empty(&bo->vma)); 5008 5009 bo->pitch = pitch; 5010 bo->tiling = tiling; 5011 5012 bo->delta = 0; 5013 bo->unique_id = kgem_get_unique_id(kgem); 5014 assert(bo->pitch); 5015 DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n", 5016 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5017 assert(bo->refcnt == 0); 5018 assert(bo->reusable); 5019 assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU); 5020 ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE); 5021 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5022 assert_tiling(kgem, bo); 5023 bo->refcnt = 1; 5024 5025 if (flags & CREATE_SCANOUT) 5026 __kgem_bo_make_scanout(kgem, bo, width, height); 5027 5028 return bo; 5029 } 5030 5031 if ((flags & CREATE_NO_RETIRE) == 0) { 5032 list_for_each_entry_reverse(bo, &kgem->active[bucket][tiling], list) { 5033 if (bo->exec) 5034 break; 5035 5036 if (size > num_pages(bo)) 5037 continue; 5038 5039 if (__kgem_busy(kgem, bo->handle)) { 5040 if (flags & CREATE_NO_THROTTLE) 5041 goto no_retire; 5042 5043 do { 5044 if (!kgem->need_throttle) { 5045 DBG(("%s: not throttling for active handle=%d\n", __FUNCTION__, bo->handle)); 5046 goto no_retire; 5047 } 5048 5049 __kgem_throttle(kgem, false); 5050 } while (__kgem_busy(kgem, bo->handle)); 5051 } 5052 5053 DBG(("%s: flushed active handle=%d\n", __FUNCTION__, bo->handle)); 5054 5055 kgem_bo_remove_from_active(kgem, bo); 5056 __kgem_bo_clear_busy(bo); 5057 5058 if (tiling != I915_TILING_NONE && bo->pitch != pitch) { 5059 if (!gem_set_tiling(kgem->fd, bo->handle, tiling, pitch)) { 5060 kgem_bo_free(kgem, bo); 5061 goto no_retire; 5062 } 5063 } 5064 5065 bo->pitch = pitch; 5066 bo->unique_id = kgem_get_unique_id(kgem); 5067 bo->delta = 0; 5068 DBG((" 2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 5069 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5070 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5071 assert_tiling(kgem, bo); 5072 bo->refcnt = 1; 5073 5074 if (flags & CREATE_SCANOUT) 5075 __kgem_bo_make_scanout(kgem, bo, width, height); 5076 5077 return bo; 5078 } 5079no_retire: 5080 flags |= CREATE_NO_RETIRE; 5081 } 5082 5083 if (--retry) { 5084 bucket++; 5085 goto search_inactive; 5086 } 5087 5088create: 5089 if (flags & CREATE_CACHED) { 5090 DBG(("%s: no cached bo found, requested not to create a new bo\n", __FUNCTION__)); 5091 return NULL; 5092 } 5093 5094 if (bucket >= NUM_CACHE_BUCKETS) 5095 size = ALIGN(size, 1024); 5096 handle = gem_create(kgem->fd, size); 5097 if (handle == 0) { 5098 DBG(("%s: kernel allocation (gem_create) failure\n", __FUNCTION__)); 5099 return NULL; 5100 } 5101 5102 bo = __kgem_bo_alloc(handle, size); 5103 if (!bo) { 5104 DBG(("%s: malloc failed\n", __FUNCTION__)); 5105 gem_close(kgem->fd, handle); 5106 return NULL; 5107 } 5108 5109 bo->unique_id = kgem_get_unique_id(kgem); 5110 if (tiling == I915_TILING_NONE || 5111 gem_set_tiling(kgem->fd, handle, tiling, pitch)) { 5112 bo->tiling = tiling; 5113 bo->pitch = pitch; 5114 if (flags & CREATE_SCANOUT) 5115 __kgem_bo_make_scanout(kgem, bo, width, height); 5116 } else { 5117 if (flags & CREATE_EXACT) { 5118 DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__)); 5119 gem_close(kgem->fd, handle); 5120 free(bo); 5121 return NULL; 5122 } 5123 } 5124 5125 assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling)); 5126 assert_tiling(kgem, bo); 5127 5128 debug_alloc__bo(kgem, bo); 5129 5130 DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n", 5131 bo->pitch, bo->tiling, bo->handle, bo->unique_id, 5132 size, num_pages(bo), bucket(bo))); 5133 return bo; 5134} 5135 5136struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, 5137 int width, 5138 int height, 5139 int bpp, 5140 uint32_t flags) 5141{ 5142 struct kgem_bo *bo; 5143 int stride, size; 5144 5145 if (DBG_NO_CPU) 5146 return NULL; 5147 5148 DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp)); 5149 5150 if (kgem->has_llc) { 5151 bo = kgem_create_2d(kgem, width, height, bpp, 5152 I915_TILING_NONE, flags); 5153 if (bo == NULL) 5154 return bo; 5155 5156 assert(bo->tiling == I915_TILING_NONE); 5157 assert_tiling(kgem, bo); 5158 5159 if (kgem_bo_map__cpu(kgem, bo) == NULL) { 5160 kgem_bo_destroy(kgem, bo); 5161 return NULL; 5162 } 5163 5164 return bo; 5165 } 5166 5167 assert(width > 0 && height > 0); 5168 stride = ALIGN(width, 2) * bpp >> 3; 5169 stride = ALIGN(stride, 4); 5170 size = stride * ALIGN(height, 2); 5171 assert(size >= PAGE_SIZE); 5172 5173 DBG(("%s: %dx%d, %d bpp, stride=%d\n", 5174 __FUNCTION__, width, height, bpp, stride)); 5175 5176 bo = search_snoop_cache(kgem, NUM_PAGES(size), 0); 5177 if (bo) { 5178 assert(bo->tiling == I915_TILING_NONE); 5179 assert_tiling(kgem, bo); 5180 assert(bo->snoop); 5181 bo->refcnt = 1; 5182 bo->pitch = stride; 5183 bo->unique_id = kgem_get_unique_id(kgem); 5184 return bo; 5185 } 5186 5187 if (kgem->has_caching) { 5188 bo = kgem_create_linear(kgem, size, flags); 5189 if (bo == NULL) 5190 return NULL; 5191 5192 assert(bo->tiling == I915_TILING_NONE); 5193 assert_tiling(kgem, bo); 5194 5195 if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) { 5196 kgem_bo_destroy(kgem, bo); 5197 return NULL; 5198 } 5199 bo->snoop = true; 5200 5201 if (kgem_bo_map__cpu(kgem, bo) == NULL) { 5202 kgem_bo_destroy(kgem, bo); 5203 return NULL; 5204 } 5205 5206 bo->pitch = stride; 5207 bo->unique_id = kgem_get_unique_id(kgem); 5208 return bo; 5209 } 5210 5211 if (kgem->has_userptr) { 5212 void *ptr; 5213 5214 /* XXX */ 5215 //if (posix_memalign(&ptr, 64, ALIGN(size, 64))) 5216 if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE))) 5217 return NULL; 5218 5219 bo = kgem_create_map(kgem, ptr, size, false); 5220 if (bo == NULL) { 5221 free(ptr); 5222 return NULL; 5223 } 5224 5225 bo->pitch = stride; 5226 bo->unique_id = kgem_get_unique_id(kgem); 5227 return bo; 5228 } 5229 5230 return NULL; 5231} 5232 5233void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) 5234{ 5235 DBG(("%s: handle=%d, proxy? %d\n", 5236 __FUNCTION__, bo->handle, bo->proxy != NULL)); 5237 5238 if (bo->proxy) { 5239 assert(!bo->reusable); 5240 kgem_bo_binding_free(kgem, bo); 5241 5242 assert(list_is_empty(&bo->list)); 5243 _list_del(&bo->vma); 5244 _list_del(&bo->request); 5245 5246 if (bo->io && bo->domain == DOMAIN_CPU) 5247 _kgem_bo_delete_buffer(kgem, bo); 5248 5249 kgem_bo_unref(kgem, bo->proxy); 5250 5251 if (DBG_NO_MALLOC_CACHE) { 5252 free(bo); 5253 } else { 5254 *(struct kgem_bo **)bo = __kgem_freed_bo; 5255 __kgem_freed_bo = bo; 5256 } 5257 } else 5258 __kgem_bo_destroy(kgem, bo); 5259} 5260 5261static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) 5262{ 5263 assert(bo->rq); 5264 assert(bo->exec == NULL); 5265 assert(bo->needs_flush); 5266 5267 /* The kernel will emit a flush *and* update its own flushing lists. */ 5268 if (!__kgem_busy(kgem, bo->handle)) 5269 __kgem_bo_clear_busy(bo); 5270 5271 DBG(("%s: handle=%d, busy?=%d\n", 5272 __FUNCTION__, bo->handle, bo->rq != NULL)); 5273} 5274 5275void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) 5276{ 5277 if (!bo->needs_flush) 5278 return; 5279 5280 kgem_bo_submit(kgem, bo); 5281 5282 /* If the kernel fails to emit the flush, then it will be forced when 5283 * we assume direct access. And as the usual failure is EIO, we do 5284 * not actually care. 5285 */ 5286 assert(bo->exec == NULL); 5287 if (bo->rq) 5288 __kgem_flush(kgem, bo); 5289 5290 /* Whatever actually happens, we can regard the GTT write domain 5291 * as being flushed. 5292 */ 5293 bo->gtt_dirty = false; 5294 bo->needs_flush = false; 5295 bo->domain = DOMAIN_NONE; 5296} 5297 5298inline static bool nearly_idle(struct kgem *kgem) 5299{ 5300 int ring = kgem->ring == KGEM_BLT; 5301 5302 if (list_is_singular(&kgem->requests[ring])) 5303 return true; 5304 5305 return __kgem_ring_is_idle(kgem, ring); 5306} 5307 5308inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo) 5309{ 5310 if (kgem->needs_semaphore) 5311 return false; 5312 5313 if (bo->rq == NULL || RQ_RING(bo->rq) == kgem->ring) 5314 return false; 5315 5316 kgem->needs_semaphore = true; 5317 return true; 5318} 5319 5320inline static bool needs_reservation(struct kgem *kgem, struct kgem_bo *bo) 5321{ 5322 if (kgem->needs_reservation) 5323 return false; 5324 5325 if (bo->presumed_offset) 5326 return false; 5327 5328 kgem->needs_reservation = true; 5329 return nearly_idle(kgem); 5330} 5331 5332inline static bool needs_batch_flush(struct kgem *kgem, struct kgem_bo *bo) 5333{ 5334 bool flush = false; 5335 5336 if (needs_semaphore(kgem, bo)) { 5337 DBG(("%s: flushing before handle=%d for required semaphore\n", __FUNCTION__, bo->handle)); 5338 flush = true; 5339 } 5340 5341 if (needs_reservation(kgem, bo)) { 5342 DBG(("%s: flushing before handle=%d for new reservation\n", __FUNCTION__, bo->handle)); 5343 flush = true; 5344 } 5345 5346 return kgem->nreloc ? flush : false; 5347} 5348 5349static bool aperture_check(struct kgem *kgem, unsigned num_pages) 5350{ 5351 struct drm_i915_gem_get_aperture aperture; 5352 int reserve; 5353 5354 if (kgem->aperture) 5355 return false; 5356 5357 /* Leave some space in case of alignment issues */ 5358 reserve = kgem->aperture_mappable / 2; 5359 if (kgem->gen < 033 && reserve < kgem->aperture_max_fence) 5360 reserve = kgem->aperture_max_fence; 5361 if (!kgem->has_llc) 5362 reserve += kgem->nexec * PAGE_SIZE * 2; 5363 5364 DBG(("%s: num_pages=%d, holding %d pages in reserve, total aperture %d\n", 5365 __FUNCTION__, num_pages, reserve, kgem->aperture_total)); 5366 num_pages += reserve; 5367 5368 VG_CLEAR(aperture); 5369 aperture.aper_available_size = kgem->aperture_total; 5370 aperture.aper_available_size *= PAGE_SIZE; 5371 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 5372 5373 DBG(("%s: aperture required %ld bytes, available %ld bytes\n", 5374 __FUNCTION__, 5375 (long)num_pages * PAGE_SIZE, 5376 (long)aperture.aper_available_size)); 5377 5378 return num_pages <= aperture.aper_available_size / PAGE_SIZE; 5379} 5380 5381static inline bool kgem_flush(struct kgem *kgem, bool flush) 5382{ 5383 if (unlikely(kgem->wedged)) 5384 return false; 5385 5386 if (kgem->nreloc == 0) 5387 return true; 5388 5389 if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE) 5390 return true; 5391 5392 if (kgem->flush == flush && kgem->aperture < kgem->aperture_low) 5393 return true; 5394 5395 DBG(("%s: opportunistic flushing? flush=%d,%d, aperture=%d/%d, idle?=%d\n", 5396 __FUNCTION__, kgem->flush, flush, kgem->aperture, kgem->aperture_low, kgem_ring_is_idle(kgem, kgem->ring))); 5397 return !kgem_ring_is_idle(kgem, kgem->ring); 5398} 5399 5400bool kgem_check_bo(struct kgem *kgem, ...) 5401{ 5402 va_list ap; 5403 struct kgem_bo *bo; 5404 int num_exec = 0; 5405 int num_pages = 0; 5406 bool flush = false; 5407 bool busy = true; 5408 5409 va_start(ap, kgem); 5410 while ((bo = va_arg(ap, struct kgem_bo *))) { 5411 while (bo->proxy) 5412 bo = bo->proxy; 5413 if (bo->exec) 5414 continue; 5415 5416 if (needs_batch_flush(kgem, bo)) { 5417 va_end(ap); 5418 return false; 5419 } 5420 5421 num_pages += num_pages(bo); 5422 num_exec++; 5423 5424 flush |= bo->flush; 5425 busy &= bo->rq != NULL; 5426 } 5427 va_end(ap); 5428 5429 DBG(("%s: num_pages=+%d, num_exec=+%d\n", 5430 __FUNCTION__, num_pages, num_exec)); 5431 5432 if (!num_pages) 5433 return true; 5434 5435 if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) { 5436 DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__, 5437 kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem))); 5438 return false; 5439 } 5440 5441 if (num_pages + kgem->aperture > kgem->aperture_high) { 5442 DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n", 5443 __FUNCTION__, kgem->aperture, num_pages, kgem->aperture_high)); 5444 return aperture_check(kgem, num_pages); 5445 } 5446 5447 if (busy) 5448 return true; 5449 5450 return kgem_flush(kgem, flush); 5451} 5452 5453bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo) 5454{ 5455 assert(bo->refcnt); 5456 while (bo->proxy) 5457 bo = bo->proxy; 5458 assert(bo->refcnt); 5459 5460 if (bo->exec) { 5461 if (kgem->gen < 040 && 5462 bo->tiling != I915_TILING_NONE && 5463 (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { 5464 uint32_t size; 5465 5466 assert(bo->tiling == I915_TILING_X); 5467 5468 if (kgem->nfence >= kgem->fence_max) 5469 return false; 5470 5471 if (kgem->aperture_fenced) { 5472 size = 3*kgem->aperture_fenced; 5473 if (kgem->aperture_total == kgem->aperture_mappable) 5474 size += kgem->aperture; 5475 if (size > kgem->aperture_fenceable && 5476 kgem_ring_is_idle(kgem, kgem->ring)) { 5477 DBG(("%s: opportunistic fence flush\n", __FUNCTION__)); 5478 return false; 5479 } 5480 } 5481 5482 size = kgem_bo_fenced_size(kgem, bo); 5483 if (size > kgem->aperture_max_fence) 5484 kgem->aperture_max_fence = size; 5485 size += kgem->aperture_fenced; 5486 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence) 5487 size = 2 * kgem->aperture_max_fence; 5488 if (kgem->aperture_total == kgem->aperture_mappable) 5489 size += kgem->aperture; 5490 if (size > kgem->aperture_fenceable) { 5491 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n", 5492 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable)); 5493 return false; 5494 } 5495 } 5496 5497 return true; 5498 } 5499 5500 if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1) 5501 return false; 5502 5503 if (needs_batch_flush(kgem, bo)) 5504 return false; 5505 5506 assert_tiling(kgem, bo); 5507 if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) { 5508 uint32_t size; 5509 5510 assert(bo->tiling == I915_TILING_X); 5511 5512 if (kgem->nfence >= kgem->fence_max) 5513 return false; 5514 5515 if (kgem->aperture_fenced) { 5516 size = 3*kgem->aperture_fenced; 5517 if (kgem->aperture_total == kgem->aperture_mappable) 5518 size += kgem->aperture; 5519 if (size > kgem->aperture_fenceable && 5520 kgem_ring_is_idle(kgem, kgem->ring)) { 5521 DBG(("%s: opportunistic fence flush\n", __FUNCTION__)); 5522 return false; 5523 } 5524 } 5525 5526 size = kgem_bo_fenced_size(kgem, bo); 5527 if (size > kgem->aperture_max_fence) 5528 kgem->aperture_max_fence = size; 5529 size += kgem->aperture_fenced; 5530 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence) 5531 size = 2 * kgem->aperture_max_fence; 5532 if (kgem->aperture_total == kgem->aperture_mappable) 5533 size += kgem->aperture; 5534 if (size > kgem->aperture_fenceable) { 5535 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n", 5536 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable)); 5537 return false; 5538 } 5539 } 5540 5541 if (kgem->aperture + kgem->aperture_fenced + num_pages(bo) > kgem->aperture_high) { 5542 DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n", 5543 __FUNCTION__, kgem->aperture, num_pages(bo), kgem->aperture_high)); 5544 return aperture_check(kgem, num_pages(bo)); 5545 } 5546 5547 if (bo->rq) 5548 return true; 5549 5550 return kgem_flush(kgem, bo->flush); 5551} 5552 5553bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) 5554{ 5555 va_list ap; 5556 struct kgem_bo *bo; 5557 int num_fence = 0; 5558 int num_exec = 0; 5559 int num_pages = 0; 5560 int fenced_size = 0; 5561 bool flush = false; 5562 bool busy = true; 5563 5564 va_start(ap, kgem); 5565 while ((bo = va_arg(ap, struct kgem_bo *))) { 5566 assert(bo->refcnt); 5567 while (bo->proxy) 5568 bo = bo->proxy; 5569 assert(bo->refcnt); 5570 if (bo->exec) { 5571 if (kgem->gen >= 040 || bo->tiling == I915_TILING_NONE) 5572 continue; 5573 5574 if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { 5575 fenced_size += kgem_bo_fenced_size(kgem, bo); 5576 num_fence++; 5577 } 5578 5579 continue; 5580 } 5581 5582 if (needs_batch_flush(kgem, bo)) { 5583 va_end(ap); 5584 return false; 5585 } 5586 5587 assert_tiling(kgem, bo); 5588 num_pages += num_pages(bo); 5589 num_exec++; 5590 if (kgem->gen < 040 && bo->tiling) { 5591 uint32_t size = kgem_bo_fenced_size(kgem, bo); 5592 if (size > kgem->aperture_max_fence) 5593 kgem->aperture_max_fence = size; 5594 fenced_size += size; 5595 num_fence++; 5596 } 5597 5598 flush |= bo->flush; 5599 busy &= bo->rq != NULL; 5600 } 5601 va_end(ap); 5602 5603 if (num_fence) { 5604 uint32_t size; 5605 5606 if (kgem->nfence + num_fence > kgem->fence_max) 5607 return false; 5608 5609 if (kgem->aperture_fenced) { 5610 size = 3*kgem->aperture_fenced; 5611 if (kgem->aperture_total == kgem->aperture_mappable) 5612 size += kgem->aperture; 5613 if (size > kgem->aperture_fenceable && 5614 kgem_ring_is_idle(kgem, kgem->ring)) { 5615 DBG(("%s: opportunistic fence flush\n", __FUNCTION__)); 5616 return false; 5617 } 5618 } 5619 5620 size = kgem->aperture_fenced; 5621 size += fenced_size; 5622 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence) 5623 size = 2 * kgem->aperture_max_fence; 5624 if (kgem->aperture_total == kgem->aperture_mappable) 5625 size += kgem->aperture; 5626 if (size > kgem->aperture_fenceable) { 5627 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n", 5628 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable)); 5629 return false; 5630 } 5631 } 5632 5633 if (num_pages == 0) 5634 return true; 5635 5636 if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) 5637 return false; 5638 5639 if (num_pages + kgem->aperture > kgem->aperture_high - kgem->aperture_fenced) { 5640 DBG(("%s: final aperture usage (%d + %d + %d) is greater than high water mark (%d)\n", 5641 __FUNCTION__, kgem->aperture, kgem->aperture_fenced, num_pages, kgem->aperture_high)); 5642 return aperture_check(kgem, num_pages); 5643 } 5644 5645 if (busy) 5646 return true; 5647 5648 return kgem_flush(kgem, flush); 5649} 5650 5651uint32_t kgem_add_reloc(struct kgem *kgem, 5652 uint32_t pos, 5653 struct kgem_bo *bo, 5654 uint32_t read_write_domain, 5655 uint32_t delta) 5656{ 5657 int index; 5658 5659 DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n", 5660 __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain)); 5661 5662 assert(kgem->gen < 0100); 5663 assert((read_write_domain & 0x7fff) == 0 || bo != NULL); 5664 5665 index = kgem->nreloc++; 5666 assert(index < ARRAY_SIZE(kgem->reloc)); 5667 kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); 5668 if (bo) { 5669 assert(kgem->mode != KGEM_NONE); 5670 assert(bo->refcnt); 5671 while (bo->proxy) { 5672 DBG(("%s: adding proxy [delta=%d] for handle=%d\n", 5673 __FUNCTION__, bo->delta, bo->handle)); 5674 delta += bo->delta; 5675 assert(bo->handle == bo->proxy->handle); 5676 /* need to release the cache upon batch submit */ 5677 if (bo->exec == NULL) { 5678 list_move_tail(&bo->request, 5679 &kgem->next_request->buffers); 5680 bo->rq = MAKE_REQUEST(kgem->next_request, 5681 kgem->ring); 5682 bo->exec = &_kgem_dummy_exec; 5683 bo->domain = DOMAIN_GPU; 5684 } 5685 5686 if (read_write_domain & 0x7fff && !bo->gpu_dirty) 5687 __kgem_bo_mark_dirty(bo); 5688 5689 bo = bo->proxy; 5690 assert(bo->refcnt); 5691 } 5692 assert(bo->refcnt); 5693 5694 if (bo->exec == NULL) 5695 kgem_add_bo(kgem, bo); 5696 assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); 5697 assert(RQ_RING(bo->rq) == kgem->ring); 5698 5699 if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) { 5700 if (bo->tiling && 5701 (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { 5702 assert(bo->tiling == I915_TILING_X); 5703 assert(kgem->nfence < kgem->fence_max); 5704 kgem->aperture_fenced += 5705 kgem_bo_fenced_size(kgem, bo); 5706 kgem->nfence++; 5707 } 5708 bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE; 5709 } 5710 5711 kgem->reloc[index].delta = delta; 5712 kgem->reloc[index].target_handle = bo->target_handle; 5713 kgem->reloc[index].presumed_offset = bo->presumed_offset; 5714 5715 if (read_write_domain & 0x7fff && !bo->gpu_dirty) { 5716 assert(!bo->snoop || kgem->can_blt_cpu); 5717 __kgem_bo_mark_dirty(bo); 5718 } 5719 5720 delta += bo->presumed_offset; 5721 } else { 5722 kgem->reloc[index].delta = delta; 5723 kgem->reloc[index].target_handle = ~0U; 5724 kgem->reloc[index].presumed_offset = 0; 5725 if (kgem->nreloc__self < 256) 5726 kgem->reloc__self[kgem->nreloc__self++] = index; 5727 } 5728 kgem->reloc[index].read_domains = read_write_domain >> 16; 5729 kgem->reloc[index].write_domain = read_write_domain & 0x7fff; 5730 5731 return delta; 5732} 5733 5734uint64_t kgem_add_reloc64(struct kgem *kgem, 5735 uint32_t pos, 5736 struct kgem_bo *bo, 5737 uint32_t read_write_domain, 5738 uint64_t delta) 5739{ 5740 int index; 5741 5742 DBG(("%s: handle=%d, pos=%d, delta=%ld, domains=%08x\n", 5743 __FUNCTION__, bo ? bo->handle : 0, pos, (long)delta, read_write_domain)); 5744 5745 assert(kgem->gen >= 0100); 5746 assert((read_write_domain & 0x7fff) == 0 || bo != NULL); 5747 5748 index = kgem->nreloc++; 5749 assert(index < ARRAY_SIZE(kgem->reloc)); 5750 kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); 5751 if (bo) { 5752 assert(kgem->mode != KGEM_NONE); 5753 assert(bo->refcnt); 5754 while (bo->proxy) { 5755 DBG(("%s: adding proxy [delta=%ld] for handle=%d\n", 5756 __FUNCTION__, (long)bo->delta, bo->handle)); 5757 delta += bo->delta; 5758 assert(bo->handle == bo->proxy->handle); 5759 /* need to release the cache upon batch submit */ 5760 if (bo->exec == NULL) { 5761 list_move_tail(&bo->request, 5762 &kgem->next_request->buffers); 5763 bo->rq = MAKE_REQUEST(kgem->next_request, 5764 kgem->ring); 5765 bo->exec = &_kgem_dummy_exec; 5766 bo->domain = DOMAIN_GPU; 5767 } 5768 5769 if (read_write_domain & 0x7fff && !bo->gpu_dirty) 5770 __kgem_bo_mark_dirty(bo); 5771 5772 bo = bo->proxy; 5773 assert(bo->refcnt); 5774 } 5775 assert(bo->refcnt); 5776 5777 if (bo->exec == NULL) 5778 kgem_add_bo(kgem, bo); 5779 assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); 5780 assert(RQ_RING(bo->rq) == kgem->ring); 5781 5782 DBG(("%s[%d] = (delta=%d, target handle=%d, presumed=%llx)\n", 5783 __FUNCTION__, index, delta, bo->target_handle, (long long)bo->presumed_offset)); 5784 kgem->reloc[index].delta = delta; 5785 kgem->reloc[index].target_handle = bo->target_handle; 5786 kgem->reloc[index].presumed_offset = bo->presumed_offset; 5787 5788 if (read_write_domain & 0x7fff && !bo->gpu_dirty) { 5789 assert(!bo->snoop || kgem->can_blt_cpu); 5790 __kgem_bo_mark_dirty(bo); 5791 } 5792 5793 delta += bo->presumed_offset; 5794 } else { 5795 DBG(("%s[%d] = (delta=%d, target handle=batch)\n", 5796 __FUNCTION__, index, delta)); 5797 kgem->reloc[index].delta = delta; 5798 kgem->reloc[index].target_handle = ~0U; 5799 kgem->reloc[index].presumed_offset = 0; 5800 if (kgem->nreloc__self < 256) 5801 kgem->reloc__self[kgem->nreloc__self++] = index; 5802 } 5803 kgem->reloc[index].read_domains = read_write_domain >> 16; 5804 kgem->reloc[index].write_domain = read_write_domain & 0x7fff; 5805 5806 return delta; 5807} 5808 5809static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) 5810{ 5811 int i, j; 5812 5813 DBG(("%s: type=%d, count=%d (bucket: %d)\n", 5814 __FUNCTION__, type, kgem->vma[type].count, bucket)); 5815 if (kgem->vma[type].count <= 0) 5816 return; 5817 5818 if (kgem->need_purge) 5819 kgem_purge_cache(kgem); 5820 5821 /* vma are limited on a per-process basis to around 64k. 5822 * This includes all malloc arenas as well as other file 5823 * mappings. In order to be fair and not hog the cache, 5824 * and more importantly not to exhaust that limit and to 5825 * start failing mappings, we keep our own number of open 5826 * vma to within a conservative value. 5827 */ 5828 i = 0; 5829 while (kgem->vma[type].count > 0) { 5830 struct kgem_bo *bo = NULL; 5831 void **ptr; 5832 5833 for (j = 0; 5834 bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive); 5835 j++) { 5836 struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)]; 5837 if (!list_is_empty(head)) 5838 bo = list_last_entry(head, struct kgem_bo, vma); 5839 } 5840 if (bo == NULL) 5841 break; 5842 5843 DBG(("%s: discarding inactive %s vma cache for %d\n", 5844 __FUNCTION__, type ? "CPU" : "GTT", bo->handle)); 5845 5846 ptr = type ? &bo->map__cpu : &bo->map__gtt; 5847 assert(bo->rq == NULL); 5848 5849 VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(*ptr), bytes(bo))); 5850 munmap(MAP(*ptr), bytes(bo)); 5851 *ptr = NULL; 5852 list_del(&bo->vma); 5853 kgem->vma[type].count--; 5854 5855 if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) { 5856 DBG(("%s: freeing unpurgeable old mapping\n", 5857 __FUNCTION__)); 5858 kgem_bo_free(kgem, bo); 5859 } 5860 } 5861} 5862 5863void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) 5864{ 5865 void *ptr; 5866 5867 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, 5868 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); 5869 5870 assert(bo->proxy == NULL); 5871 assert(list_is_empty(&bo->list)); 5872 assert_tiling(kgem, bo); 5873 assert(!bo->purged || bo->reusable); 5874 5875 if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) { 5876 DBG(("%s: converting request for GTT map into CPU map\n", 5877 __FUNCTION__)); 5878 return kgem_bo_map__cpu(kgem, bo); 5879 } 5880 5881 ptr = MAP(bo->map__gtt); 5882 if (ptr == NULL) { 5883 assert(num_pages(bo) <= kgem->aperture_mappable / 2); 5884 5885 kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); 5886 5887 ptr = __kgem_bo_map__gtt(kgem, bo); 5888 if (ptr == NULL) 5889 return NULL; 5890 5891 /* Cache this mapping to avoid the overhead of an 5892 * excruciatingly slow GTT pagefault. This is more an 5893 * issue with compositing managers which need to frequently 5894 * flush CPU damage to their GPU bo. 5895 */ 5896 bo->map__gtt = ptr; 5897 DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); 5898 } 5899 5900 return ptr; 5901} 5902 5903void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) 5904{ 5905 void *ptr; 5906 5907 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, 5908 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); 5909 5910 assert(bo->proxy == NULL); 5911 assert(list_is_empty(&bo->list)); 5912 assert(bo->exec == NULL); 5913 assert_tiling(kgem, bo); 5914 assert(!bo->purged || bo->reusable); 5915 5916 if (bo->tiling == I915_TILING_NONE && !bo->scanout && 5917 (kgem->has_llc || bo->domain == DOMAIN_CPU)) { 5918 DBG(("%s: converting request for GTT map into CPU map\n", 5919 __FUNCTION__)); 5920 ptr = kgem_bo_map__cpu(kgem, bo); 5921 if (ptr) 5922 kgem_bo_sync__cpu(kgem, bo); 5923 return ptr; 5924 } 5925 5926 ptr = MAP(bo->map__gtt); 5927 if (ptr == NULL) { 5928 assert(num_pages(bo) <= kgem->aperture_mappable / 2); 5929 assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); 5930 5931 kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); 5932 5933 ptr = __kgem_bo_map__gtt(kgem, bo); 5934 if (ptr == NULL) 5935 return NULL; 5936 5937 /* Cache this mapping to avoid the overhead of an 5938 * excruciatingly slow GTT pagefault. This is more an 5939 * issue with compositing managers which need to frequently 5940 * flush CPU damage to their GPU bo. 5941 */ 5942 bo->map__gtt = ptr; 5943 DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); 5944 } 5945 5946 if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) { 5947 struct drm_i915_gem_set_domain set_domain; 5948 5949 DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, 5950 bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle))); 5951 5952 /* XXX use PROT_READ to avoid the write flush? */ 5953 5954 VG_CLEAR(set_domain); 5955 set_domain.handle = bo->handle; 5956 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 5957 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 5958 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 5959 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 5960 kgem_throttle(kgem); 5961 } 5962 kgem_bo_retire(kgem, bo); 5963 bo->domain = DOMAIN_GTT; 5964 bo->gtt_dirty = true; 5965 } 5966 5967 return ptr; 5968} 5969 5970void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) 5971{ 5972 void *ptr; 5973 5974 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, 5975 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); 5976 5977 assert(bo->proxy == NULL); 5978 assert(bo->exec == NULL); 5979 assert(list_is_empty(&bo->list)); 5980 assert_tiling(kgem, bo); 5981 assert(!bo->purged || bo->reusable); 5982 5983 ptr = MAP(bo->map__gtt); 5984 if (ptr == NULL) { 5985 assert(num_pages(bo) <= kgem->aperture_mappable / 4); 5986 5987 kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); 5988 5989 ptr = __kgem_bo_map__gtt(kgem, bo); 5990 if (ptr == NULL) 5991 return NULL; 5992 5993 /* Cache this mapping to avoid the overhead of an 5994 * excruciatingly slow GTT pagefault. This is more an 5995 * issue with compositing managers which need to frequently 5996 * flush CPU damage to their GPU bo. 5997 */ 5998 bo->map__gtt = ptr; 5999 DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); 6000 } 6001 6002 return ptr; 6003} 6004 6005void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo) 6006{ 6007 return kgem_bo_map__async(kgem, bo); 6008} 6009 6010void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) 6011{ 6012 struct drm_i915_gem_mmap mmap_arg; 6013 int err; 6014 6015 DBG(("%s(handle=%d, size=%d, map=%p:%p)\n", 6016 __FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu)); 6017 assert(!bo->purged); 6018 assert(list_is_empty(&bo->list)); 6019 assert(bo->proxy == NULL); 6020 assert_tiling(kgem, bo); 6021 6022 if (bo->map__cpu) 6023 return MAP(bo->map__cpu); 6024 6025 kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo)); 6026 6027retry: 6028 VG_CLEAR(mmap_arg); 6029 mmap_arg.handle = bo->handle; 6030 mmap_arg.offset = 0; 6031 mmap_arg.size = bytes(bo); 6032 if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) { 6033 assert(err != EINVAL); 6034 6035 if (__kgem_throttle_retire(kgem, 0)) 6036 goto retry; 6037 6038 if (kgem_cleanup_cache(kgem)) 6039 goto retry; 6040 6041 ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n", 6042 __FUNCTION__, bo->handle, bytes(bo), -err)); 6043 return NULL; 6044 } 6045 6046 VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); 6047 6048 DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); 6049 return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr; 6050} 6051 6052uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) 6053{ 6054 struct drm_gem_flink flink; 6055 6056 VG_CLEAR(flink); 6057 flink.handle = bo->handle; 6058 if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink)) 6059 return 0; 6060 6061 DBG(("%s: flinked handle=%d to name=%d, marking non-reusable\n", 6062 __FUNCTION__, flink.handle, flink.name)); 6063 6064 /* Ordinarily giving the name aware makes the buffer non-reusable. 6065 * However, we track the lifetime of all clients and their hold 6066 * on the buffer, and *presuming* they do not pass it on to a third 6067 * party, we track the lifetime accurately. 6068 */ 6069 bo->reusable = false; 6070 6071 kgem_bo_unclean(kgem, bo); 6072 6073 return flink.name; 6074} 6075 6076struct kgem_bo *kgem_create_map(struct kgem *kgem, 6077 void *ptr, uint32_t size, 6078 bool read_only) 6079{ 6080 struct kgem_bo *bo; 6081 uintptr_t first_page, last_page; 6082 uint32_t handle; 6083 6084 assert(MAP(ptr) == ptr); 6085 6086 DBG(("%s(%p size=%d, read-only?=%d) - has_userptr?=%d\n", __FUNCTION__, 6087 ptr, size, read_only, kgem->has_userptr)); 6088 if (!kgem->has_userptr) 6089 return NULL; 6090 6091 first_page = (uintptr_t)ptr; 6092 last_page = first_page + size + PAGE_SIZE - 1; 6093 6094 first_page &= ~(PAGE_SIZE-1); 6095 last_page &= ~(PAGE_SIZE-1); 6096 assert(last_page > first_page); 6097 6098 handle = gem_userptr(kgem->fd, 6099 (void *)first_page, last_page-first_page, 6100 read_only); 6101 if (handle == 0) { 6102 DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno)); 6103 return NULL; 6104 } 6105 6106 bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE); 6107 if (bo == NULL) { 6108 gem_close(kgem->fd, handle); 6109 return NULL; 6110 } 6111 6112 bo->unique_id = kgem_get_unique_id(kgem); 6113 bo->snoop = !kgem->has_llc; 6114 debug_alloc__bo(kgem, bo); 6115 6116 if (first_page != (uintptr_t)ptr) { 6117 struct kgem_bo *proxy; 6118 6119 proxy = kgem_create_proxy(kgem, bo, 6120 (uintptr_t)ptr - first_page, size); 6121 kgem_bo_destroy(kgem, bo); 6122 if (proxy == NULL) 6123 return NULL; 6124 6125 bo = proxy; 6126 } 6127 6128 bo->map__cpu = MAKE_USER_MAP(ptr); 6129 6130 DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d (proxy? %d)\n", 6131 __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle, bo->proxy != NULL)); 6132 return bo; 6133} 6134 6135void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) 6136{ 6137 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 6138 assert(!bo->scanout); 6139 assert_tiling(kgem, bo); 6140 6141 kgem_bo_submit(kgem, bo); 6142 6143 /* SHM pixmaps use proxies for subpage offsets */ 6144 assert(!bo->purged); 6145 while (bo->proxy) 6146 bo = bo->proxy; 6147 assert(!bo->purged); 6148 6149 if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { 6150 struct drm_i915_gem_set_domain set_domain; 6151 6152 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", 6153 __FUNCTION__, bo->handle, 6154 bo->needs_flush, bo->domain, 6155 __kgem_busy(kgem, bo->handle))); 6156 6157 VG_CLEAR(set_domain); 6158 set_domain.handle = bo->handle; 6159 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 6160 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 6161 6162 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 6163 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 6164 kgem_throttle(kgem); 6165 } 6166 kgem_bo_retire(kgem, bo); 6167 bo->domain = DOMAIN_CPU; 6168 } 6169} 6170 6171void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) 6172{ 6173 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 6174 assert(!bo->scanout || !write); 6175 assert_tiling(kgem, bo); 6176 6177 if (write || bo->needs_flush) 6178 kgem_bo_submit(kgem, bo); 6179 6180 /* SHM pixmaps use proxies for subpage offsets */ 6181 assert(!bo->purged); 6182 assert(bo->refcnt); 6183 while (bo->proxy) 6184 bo = bo->proxy; 6185 assert(bo->refcnt); 6186 assert(!bo->purged); 6187 6188 if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { 6189 struct drm_i915_gem_set_domain set_domain; 6190 6191 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", 6192 __FUNCTION__, bo->handle, 6193 bo->needs_flush, bo->domain, 6194 __kgem_busy(kgem, bo->handle))); 6195 6196 VG_CLEAR(set_domain); 6197 set_domain.handle = bo->handle; 6198 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 6199 set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0; 6200 6201 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 6202 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 6203 kgem_throttle(kgem); 6204 } 6205 if (write) { 6206 kgem_bo_retire(kgem, bo); 6207 bo->domain = DOMAIN_CPU; 6208 } else { 6209 if (bo->exec == NULL) 6210 kgem_bo_maybe_retire(kgem, bo); 6211 bo->domain = DOMAIN_NONE; 6212 } 6213 } 6214} 6215 6216void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) 6217{ 6218 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 6219 assert(bo->refcnt); 6220 assert(bo->proxy == NULL); 6221 assert_tiling(kgem, bo); 6222 6223 kgem_bo_submit(kgem, bo); 6224 6225 if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) { 6226 struct drm_i915_gem_set_domain set_domain; 6227 6228 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", 6229 __FUNCTION__, bo->handle, 6230 bo->needs_flush, bo->domain, 6231 __kgem_busy(kgem, bo->handle))); 6232 6233 VG_CLEAR(set_domain); 6234 set_domain.handle = bo->handle; 6235 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 6236 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 6237 6238 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 6239 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 6240 kgem_throttle(kgem); 6241 } 6242 kgem_bo_retire(kgem, bo); 6243 bo->domain = DOMAIN_GTT; 6244 bo->gtt_dirty = true; 6245 } 6246} 6247 6248void kgem_clear_dirty(struct kgem *kgem) 6249{ 6250 struct list * const buffers = &kgem->next_request->buffers; 6251 struct kgem_bo *bo; 6252 6253 list_for_each_entry(bo, buffers, request) { 6254 if (!bo->gpu_dirty) 6255 break; 6256 6257 bo->gpu_dirty = false; 6258 } 6259} 6260 6261struct kgem_bo *kgem_create_proxy(struct kgem *kgem, 6262 struct kgem_bo *target, 6263 int offset, int length) 6264{ 6265 struct kgem_bo *bo; 6266 6267 DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n", 6268 __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1, 6269 offset, length, target->io)); 6270 6271 bo = __kgem_bo_alloc(target->handle, length); 6272 if (bo == NULL) 6273 return NULL; 6274 6275 bo->unique_id = kgem_get_unique_id(kgem); 6276 bo->reusable = false; 6277 bo->size.bytes = length; 6278 6279 bo->io = target->io && target->proxy == NULL; 6280 bo->gpu_dirty = target->gpu_dirty; 6281 bo->tiling = target->tiling; 6282 bo->pitch = target->pitch; 6283 bo->flush = target->flush; 6284 bo->snoop = target->snoop; 6285 6286 assert(!bo->scanout); 6287 bo->proxy = kgem_bo_reference(target); 6288 bo->delta = offset; 6289 6290 /* Proxies are only tracked for busyness on the current rq */ 6291 if (target->exec && !bo->io) { 6292 assert(RQ(target->rq) == kgem->next_request); 6293 list_move_tail(&bo->request, &kgem->next_request->buffers); 6294 bo->exec = &_kgem_dummy_exec; 6295 bo->rq = target->rq; 6296 } 6297 6298 return bo; 6299} 6300 6301static struct kgem_buffer * 6302buffer_alloc(void) 6303{ 6304 struct kgem_buffer *bo; 6305 6306 bo = malloc(sizeof(*bo)); 6307 if (bo == NULL) 6308 return NULL; 6309 6310 bo->mem = NULL; 6311 bo->need_io = false; 6312 bo->mmapped = MMAPPED_CPU; 6313 6314 return bo; 6315} 6316 6317static struct kgem_buffer * 6318buffer_alloc_with_data(int num_pages) 6319{ 6320 struct kgem_buffer *bo; 6321 6322 bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE); 6323 if (bo == NULL) 6324 return NULL; 6325 6326 bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT); 6327 bo->mmapped = false; 6328 return bo; 6329} 6330 6331static inline bool 6332use_snoopable_buffer(struct kgem *kgem, uint32_t flags) 6333{ 6334 if ((flags & KGEM_BUFFER_WRITE) == 0) 6335 return kgem->gen >= 030; 6336 6337 return true; 6338} 6339 6340static void 6341init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old) 6342{ 6343 DBG(("%s: reusing handle=%d for buffer\n", 6344 __FUNCTION__, old->handle)); 6345 6346 assert(old->proxy == NULL); 6347 6348 memcpy(&bo->base, old, sizeof(*old)); 6349 if (old->rq) 6350 list_replace(&old->request, &bo->base.request); 6351 else 6352 list_init(&bo->base.request); 6353 list_replace(&old->vma, &bo->base.vma); 6354 list_init(&bo->base.list); 6355 free(old); 6356 6357 assert(bo->base.tiling == I915_TILING_NONE); 6358 6359 bo->base.refcnt = 1; 6360} 6361 6362static struct kgem_buffer * 6363search_snoopable_buffer(struct kgem *kgem, unsigned alloc) 6364{ 6365 struct kgem_buffer *bo; 6366 struct kgem_bo *old; 6367 6368 old = search_snoop_cache(kgem, alloc, 0); 6369 if (old) { 6370 if (!old->io) { 6371 bo = buffer_alloc(); 6372 if (bo == NULL) 6373 return NULL; 6374 6375 init_buffer_from_bo(bo, old); 6376 } else { 6377 bo = (struct kgem_buffer *)old; 6378 bo->base.refcnt = 1; 6379 } 6380 6381 DBG(("%s: created CPU handle=%d for buffer, size %d\n", 6382 __FUNCTION__, bo->base.handle, num_pages(&bo->base))); 6383 6384 assert(bo->base.snoop); 6385 assert(bo->base.tiling == I915_TILING_NONE); 6386 assert(num_pages(&bo->base) >= alloc); 6387 assert(bo->mmapped == MMAPPED_CPU); 6388 assert(bo->need_io == false); 6389 6390 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 6391 if (bo->mem == NULL) { 6392 bo->base.refcnt = 0; 6393 kgem_bo_free(kgem, &bo->base); 6394 bo = NULL; 6395 } 6396 6397 return bo; 6398 } 6399 6400 return NULL; 6401} 6402 6403static struct kgem_buffer * 6404create_snoopable_buffer(struct kgem *kgem, unsigned alloc) 6405{ 6406 struct kgem_buffer *bo; 6407 uint32_t handle; 6408 6409 if (kgem->has_llc) { 6410 struct kgem_bo *old; 6411 6412 bo = buffer_alloc(); 6413 if (bo == NULL) 6414 return NULL; 6415 6416 old = search_linear_cache(kgem, alloc, 6417 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); 6418 if (old) { 6419 init_buffer_from_bo(bo, old); 6420 } else { 6421 handle = gem_create(kgem->fd, alloc); 6422 if (handle == 0) { 6423 free(bo); 6424 return NULL; 6425 } 6426 6427 __kgem_bo_init(&bo->base, handle, alloc); 6428 debug_alloc__bo(kgem, &bo->base); 6429 DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n", 6430 __FUNCTION__, bo->base.handle, alloc)); 6431 } 6432 6433 assert(bo->base.refcnt == 1); 6434 assert(bo->mmapped == MMAPPED_CPU); 6435 assert(bo->need_io == false); 6436 6437 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 6438 if (bo->mem != NULL) 6439 return bo; 6440 6441 bo->base.refcnt = 0; /* for valgrind */ 6442 kgem_bo_free(kgem, &bo->base); 6443 } 6444 6445 if (kgem->has_caching) { 6446 struct kgem_bo *old; 6447 6448 bo = buffer_alloc(); 6449 if (bo == NULL) 6450 return NULL; 6451 6452 old = search_linear_cache(kgem, alloc, 6453 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); 6454 if (old) { 6455 init_buffer_from_bo(bo, old); 6456 } else { 6457 handle = gem_create(kgem->fd, alloc); 6458 if (handle == 0) { 6459 free(bo); 6460 return NULL; 6461 } 6462 6463 __kgem_bo_init(&bo->base, handle, alloc); 6464 debug_alloc__bo(kgem, &bo->base); 6465 DBG(("%s: created CPU handle=%d for buffer, size %d\n", 6466 __FUNCTION__, bo->base.handle, alloc)); 6467 } 6468 6469 assert(bo->base.refcnt == 1); 6470 assert(bo->mmapped == MMAPPED_CPU); 6471 assert(bo->need_io == false); 6472 6473 if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED)) 6474 goto free_caching; 6475 6476 bo->base.snoop = true; 6477 6478 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 6479 if (bo->mem == NULL) 6480 goto free_caching; 6481 6482 return bo; 6483 6484free_caching: 6485 bo->base.refcnt = 0; /* for valgrind */ 6486 kgem_bo_free(kgem, &bo->base); 6487 } 6488 6489 if (kgem->has_userptr) { 6490 bo = buffer_alloc(); 6491 if (bo == NULL) 6492 return NULL; 6493 6494 //if (posix_memalign(&ptr, 64, ALIGN(size, 64))) 6495 if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) { 6496 free(bo); 6497 return NULL; 6498 } 6499 6500 handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false); 6501 if (handle == 0) { 6502 free(bo->mem); 6503 free(bo); 6504 return NULL; 6505 } 6506 6507 __kgem_bo_init(&bo->base, handle, alloc); 6508 debug_alloc__bo(kgem, &bo->base); 6509 DBG(("%s: created snoop handle=%d for buffer\n", 6510 __FUNCTION__, bo->base.handle)); 6511 6512 assert(bo->mmapped == MMAPPED_CPU); 6513 assert(bo->need_io == false); 6514 6515 bo->base.refcnt = 1; 6516 bo->base.snoop = true; 6517 bo->base.map__cpu = MAKE_USER_MAP(bo->mem); 6518 6519 return bo; 6520 } 6521 6522 return NULL; 6523} 6524 6525struct kgem_bo *kgem_create_buffer(struct kgem *kgem, 6526 uint32_t size, uint32_t flags, 6527 void **ret) 6528{ 6529 struct kgem_buffer *bo; 6530 unsigned offset, alloc; 6531 struct kgem_bo *old; 6532 6533 DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n", 6534 __FUNCTION__, size, flags, 6535 !!(flags & KGEM_BUFFER_WRITE), 6536 !!(flags & KGEM_BUFFER_INPLACE), 6537 !!(flags & KGEM_BUFFER_LAST))); 6538 assert(size); 6539 /* we should never be asked to create anything TOO large */ 6540 assert(size <= kgem->max_object_size); 6541 6542#if !DBG_NO_UPLOAD_CACHE 6543 list_for_each_entry(bo, &kgem->batch_buffers, base.list) { 6544 assert(bo->base.io); 6545 assert(bo->base.refcnt >= 1); 6546 6547 /* We can reuse any write buffer which we can fit */ 6548 if (flags == KGEM_BUFFER_LAST && 6549 bo->write == KGEM_BUFFER_WRITE && 6550 bo->base.refcnt == 1 && 6551 bo->mmapped == MMAPPED_NONE && 6552 size <= bytes(&bo->base)) { 6553 DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n", 6554 __FUNCTION__, size, bo->used, bytes(&bo->base))); 6555 gem_write__cachealigned(kgem->fd, bo->base.handle, 6556 0, bo->used, bo->mem); 6557 assert(list_is_empty(&bo->base.vma)); 6558 bo->need_io = 0; 6559 bo->write = 0; 6560 offset = 0; 6561 bo->used = size; 6562 goto done; 6563 } 6564 6565 if (flags & KGEM_BUFFER_WRITE) { 6566 if ((bo->write & KGEM_BUFFER_WRITE) == 0 || 6567 (((bo->write & ~flags) & KGEM_BUFFER_INPLACE) && 6568 !bo->base.snoop)) { 6569 DBG(("%s: skip write %x buffer, need %x\n", 6570 __FUNCTION__, bo->write, flags)); 6571 continue; 6572 } 6573 assert(bo->mmapped || bo->need_io); 6574 } else { 6575 if (bo->write & KGEM_BUFFER_WRITE) { 6576 DBG(("%s: skip write %x buffer, need %x\n", 6577 __FUNCTION__, bo->write, flags)); 6578 continue; 6579 } 6580 } 6581 6582 if (bo->used + size <= bytes(&bo->base)) { 6583 DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", 6584 __FUNCTION__, bo->used, size, bytes(&bo->base))); 6585 offset = bo->used; 6586 bo->used += size; 6587 goto done; 6588 } 6589 } 6590 6591 if (flags & KGEM_BUFFER_WRITE) { 6592 list_for_each_entry(bo, &kgem->active_buffers, base.list) { 6593 assert(bo->base.io); 6594 assert(bo->base.refcnt >= 1); 6595 assert(bo->base.exec == NULL); 6596 assert(bo->mmapped); 6597 assert(bo->mmapped == MMAPPED_GTT || kgem->has_llc || bo->base.snoop); 6598 6599 if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE && !bo->base.snoop) { 6600 DBG(("%s: skip write %x buffer, need %x\n", 6601 __FUNCTION__, bo->write, flags)); 6602 continue; 6603 } 6604 6605 if (bo->used + size <= bytes(&bo->base)) { 6606 DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", 6607 __FUNCTION__, bo->used, size, bytes(&bo->base))); 6608 offset = bo->used; 6609 bo->used += size; 6610 list_move(&bo->base.list, &kgem->batch_buffers); 6611 goto done; 6612 } 6613 6614 if (bo->base.refcnt == 1 && 6615 size <= bytes(&bo->base) && 6616 (bo->base.rq == NULL || 6617 !__kgem_busy(kgem, bo->base.handle))) { 6618 DBG(("%s: reusing whole buffer? size=%d, total=%d\n", 6619 __FUNCTION__, size, bytes(&bo->base))); 6620 __kgem_bo_clear_busy(&bo->base); 6621 assert(list_is_empty(&bo->base.vma)); 6622 6623 switch (bo->mmapped) { 6624 case MMAPPED_CPU: 6625 kgem_bo_sync__cpu(kgem, &bo->base); 6626 break; 6627 case MMAPPED_GTT: 6628 kgem_bo_sync__gtt(kgem, &bo->base); 6629 break; 6630 } 6631 6632 offset = 0; 6633 bo->used = size; 6634 list_move(&bo->base.list, &kgem->batch_buffers); 6635 goto done; 6636 } 6637 } 6638 } 6639#endif 6640 6641#if !DBG_NO_MAP_UPLOAD 6642 /* Be a little more generous and hope to hold fewer mmappings */ 6643 alloc = ALIGN(2*size, kgem->buffer_size); 6644 if (alloc > MAX_CACHE_SIZE) 6645 alloc = ALIGN(size, kgem->buffer_size); 6646 if (alloc > MAX_CACHE_SIZE) 6647 alloc = PAGE_ALIGN(size); 6648 assert(alloc); 6649 6650 alloc /= PAGE_SIZE; 6651 if (alloc > kgem->aperture_mappable / 4) 6652 flags &= ~KGEM_BUFFER_INPLACE; 6653 6654 if (kgem->has_llc && 6655 (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) { 6656 bo = buffer_alloc(); 6657 if (bo == NULL) 6658 goto skip_llc; 6659 6660 old = NULL; 6661 if ((flags & KGEM_BUFFER_WRITE) == 0) 6662 old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP); 6663 if (old == NULL) 6664 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP); 6665 if (old == NULL) 6666 old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP); 6667 if (old) { 6668 DBG(("%s: found LLC handle=%d for buffer\n", 6669 __FUNCTION__, old->handle)); 6670 6671 init_buffer_from_bo(bo, old); 6672 } else { 6673 uint32_t handle = gem_create(kgem->fd, alloc); 6674 if (handle == 0) { 6675 free(bo); 6676 goto skip_llc; 6677 } 6678 __kgem_bo_init(&bo->base, handle, alloc); 6679 debug_alloc__bo(kgem, &bo->base); 6680 DBG(("%s: created LLC handle=%d for buffer\n", 6681 __FUNCTION__, bo->base.handle)); 6682 } 6683 6684 assert(bo->mmapped); 6685 assert(!bo->need_io); 6686 6687 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 6688 if (bo->mem) { 6689 if (flags & KGEM_BUFFER_WRITE) 6690 kgem_bo_sync__cpu(kgem, &bo->base); 6691 flags &= ~KGEM_BUFFER_INPLACE; 6692 goto init; 6693 } else { 6694 bo->base.refcnt = 0; /* for valgrind */ 6695 kgem_bo_free(kgem, &bo->base); 6696 } 6697 } 6698skip_llc: 6699 6700 if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) { 6701 /* The issue with using a GTT upload buffer is that we may 6702 * cause eviction-stalls in order to free up some GTT space. 6703 * An is-mappable? ioctl could help us detect when we are 6704 * about to block, or some per-page magic in the kernel. 6705 * 6706 * XXX This is especially noticeable on memory constrained 6707 * devices like gen2 or with relatively slow gpu like i3. 6708 */ 6709 DBG(("%s: searching for an inactive GTT map for upload\n", 6710 __FUNCTION__)); 6711 old = search_linear_cache(kgem, alloc, 6712 CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); 6713#if HAVE_I915_GEM_BUFFER_INFO 6714 if (old) { 6715 struct drm_i915_gem_buffer_info info; 6716 6717 /* An example of such a non-blocking ioctl might work */ 6718 6719 VG_CLEAR(info); 6720 info.handle = handle; 6721 if (do_ioctl(kgem->fd, 6722 DRM_IOCTL_I915_GEM_BUFFER_INFO, 6723 &fino) == 0) { 6724 old->presumed_offset = info.addr; 6725 if ((info.flags & I915_GEM_MAPPABLE) == 0) { 6726 kgem_bo_move_to_inactive(kgem, old); 6727 old = NULL; 6728 } 6729 } 6730 } 6731#endif 6732 if (old == NULL) 6733 old = search_linear_cache(kgem, NUM_PAGES(size), 6734 CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); 6735 if (old == NULL) { 6736 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); 6737 if (old && !kgem_bo_can_map(kgem, old)) { 6738 _kgem_bo_destroy(kgem, old); 6739 old = NULL; 6740 } 6741 } 6742 if (old) { 6743 DBG(("%s: reusing handle=%d for buffer\n", 6744 __FUNCTION__, old->handle)); 6745 assert(kgem_bo_can_map(kgem, old)); 6746 assert(!old->snoop); 6747 assert(old->rq == NULL); 6748 6749 bo = buffer_alloc(); 6750 if (bo == NULL) 6751 return NULL; 6752 6753 init_buffer_from_bo(bo, old); 6754 assert(num_pages(&bo->base) >= NUM_PAGES(size)); 6755 6756 assert(bo->mmapped); 6757 assert(bo->base.refcnt == 1); 6758 6759 bo->mem = kgem_bo_map(kgem, &bo->base); 6760 if (bo->mem) { 6761 if (bo->mem == MAP(bo->base.map__cpu)) 6762 flags &= ~KGEM_BUFFER_INPLACE; 6763 else 6764 bo->mmapped = MMAPPED_GTT; 6765 goto init; 6766 } else { 6767 bo->base.refcnt = 0; 6768 kgem_bo_free(kgem, &bo->base); 6769 } 6770 } 6771 } 6772#else 6773 flags &= ~KGEM_BUFFER_INPLACE; 6774#endif 6775 /* Be more parsimonious with pwrite/pread/cacheable buffers */ 6776 if ((flags & KGEM_BUFFER_INPLACE) == 0) 6777 alloc = NUM_PAGES(size); 6778 6779 if (use_snoopable_buffer(kgem, flags)) { 6780 bo = search_snoopable_buffer(kgem, alloc); 6781 if (bo) { 6782 if (flags & KGEM_BUFFER_WRITE) 6783 kgem_bo_sync__cpu(kgem, &bo->base); 6784 flags &= ~KGEM_BUFFER_INPLACE; 6785 goto init; 6786 } 6787 6788 if ((flags & KGEM_BUFFER_INPLACE) == 0) { 6789 bo = create_snoopable_buffer(kgem, alloc); 6790 if (bo) 6791 goto init; 6792 } 6793 } 6794 6795 flags &= ~KGEM_BUFFER_INPLACE; 6796 6797 old = NULL; 6798 if ((flags & KGEM_BUFFER_WRITE) == 0) 6799 old = search_linear_cache(kgem, alloc, 0); 6800 if (old == NULL) 6801 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); 6802 if (old) { 6803 DBG(("%s: reusing ordinary handle %d for io\n", 6804 __FUNCTION__, old->handle)); 6805 bo = buffer_alloc_with_data(num_pages(old)); 6806 if (bo == NULL) 6807 return NULL; 6808 6809 init_buffer_from_bo(bo, old); 6810 bo->need_io = flags & KGEM_BUFFER_WRITE; 6811 } else { 6812 unsigned hint; 6813 6814 if (use_snoopable_buffer(kgem, flags)) { 6815 bo = create_snoopable_buffer(kgem, alloc); 6816 if (bo) 6817 goto init; 6818 } 6819 6820 bo = buffer_alloc(); 6821 if (bo == NULL) 6822 return NULL; 6823 6824 hint = CREATE_INACTIVE; 6825 if (flags & KGEM_BUFFER_WRITE) 6826 hint |= CREATE_CPU_MAP; 6827 old = search_linear_cache(kgem, alloc, hint); 6828 if (old) { 6829 DBG(("%s: reusing handle=%d for buffer\n", 6830 __FUNCTION__, old->handle)); 6831 6832 init_buffer_from_bo(bo, old); 6833 } else { 6834 uint32_t handle = gem_create(kgem->fd, alloc); 6835 if (handle == 0) { 6836 free(bo); 6837 return NULL; 6838 } 6839 6840 DBG(("%s: created handle=%d for buffer\n", 6841 __FUNCTION__, handle)); 6842 6843 __kgem_bo_init(&bo->base, handle, alloc); 6844 debug_alloc__bo(kgem, &bo->base); 6845 } 6846 6847 assert(bo->mmapped); 6848 assert(!bo->need_io); 6849 assert(bo->base.refcnt == 1); 6850 6851 if (flags & KGEM_BUFFER_WRITE) { 6852 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 6853 if (bo->mem != NULL) { 6854 kgem_bo_sync__cpu(kgem, &bo->base); 6855 goto init; 6856 } 6857 } 6858 6859 DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__)); 6860 old = &bo->base; 6861 bo = buffer_alloc_with_data(num_pages(old)); 6862 if (bo == NULL) { 6863 old->refcnt= 0; 6864 kgem_bo_free(kgem, old); 6865 return NULL; 6866 } 6867 6868 init_buffer_from_bo(bo, old); 6869 6870 assert(bo->mem); 6871 assert(!bo->mmapped); 6872 assert(bo->base.refcnt == 1); 6873 6874 bo->need_io = flags & KGEM_BUFFER_WRITE; 6875 } 6876init: 6877 bo->base.io = true; 6878 assert(bo->base.refcnt == 1); 6879 assert(num_pages(&bo->base) >= NUM_PAGES(size)); 6880 assert(!bo->need_io || !bo->base.needs_flush); 6881 assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); 6882 assert(bo->mem); 6883 assert(bo->mmapped != MMAPPED_GTT || MAP(bo->base.map__gtt) == bo->mem); 6884 assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem); 6885 6886 bo->used = size; 6887 bo->write = flags & KGEM_BUFFER_WRITE_INPLACE; 6888 offset = 0; 6889 6890 assert(list_is_empty(&bo->base.list)); 6891 list_add(&bo->base.list, &kgem->batch_buffers); 6892 6893 DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n", 6894 __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write)); 6895 6896done: 6897 bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT); 6898 assert(bo->used && bo->used <= bytes(&bo->base)); 6899 assert(bo->mem); 6900 *ret = (char *)bo->mem + offset; 6901 return kgem_create_proxy(kgem, &bo->base, offset, size); 6902} 6903 6904bool kgem_buffer_is_inplace(struct kgem_bo *_bo) 6905{ 6906 struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy; 6907 return bo->write & KGEM_BUFFER_WRITE_INPLACE; 6908} 6909 6910struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, 6911 int width, int height, int bpp, 6912 uint32_t flags, 6913 void **ret) 6914{ 6915 struct kgem_bo *bo; 6916 int stride; 6917 6918 assert(width > 0 && height > 0); 6919 assert(ret != NULL); 6920 stride = ALIGN(width, 2) * bpp >> 3; 6921 stride = ALIGN(stride, 4); 6922 6923 DBG(("%s: %dx%d, %d bpp, stride=%d\n", 6924 __FUNCTION__, width, height, bpp, stride)); 6925 6926 bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret); 6927 if (bo == NULL) { 6928 DBG(("%s: allocation failure for upload buffer\n", 6929 __FUNCTION__)); 6930 return NULL; 6931 } 6932 assert(*ret != NULL); 6933 assert(bo->proxy != NULL); 6934 6935 if (height & 1) { 6936 struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; 6937 int min; 6938 6939 assert(io->used); 6940 6941 /* Having padded this surface to ensure that accesses to 6942 * the last pair of rows is valid, remove the padding so 6943 * that it can be allocated to other pixmaps. 6944 */ 6945 min = bo->delta + height * stride; 6946 min = ALIGN(min, UPLOAD_ALIGNMENT); 6947 if (io->used != min) { 6948 DBG(("%s: trimming buffer from %d to %d\n", 6949 __FUNCTION__, io->used, min)); 6950 io->used = min; 6951 } 6952 bo->size.bytes -= stride; 6953 } 6954 6955 bo->map__cpu = *ret; 6956 bo->pitch = stride; 6957 bo->unique_id = kgem_get_unique_id(kgem); 6958 return bo; 6959} 6960 6961struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, 6962 const void *data, 6963 const BoxRec *box, 6964 int stride, int bpp) 6965{ 6966 int width = box->x2 - box->x1; 6967 int height = box->y2 - box->y1; 6968 struct kgem_bo *bo; 6969 void *dst; 6970 6971 if (!kgem_can_create_2d(kgem, width, height, bpp)) 6972 return NULL; 6973 6974 DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n", 6975 __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp)); 6976 6977 assert(data); 6978 assert(width > 0); 6979 assert(height > 0); 6980 assert(stride); 6981 assert(bpp); 6982 6983 bo = kgem_create_buffer_2d(kgem, 6984 width, height, bpp, 6985 KGEM_BUFFER_WRITE_INPLACE, &dst); 6986 if (bo == NULL) 6987 return NULL; 6988 6989 if (sigtrap_get()) { 6990 kgem_bo_destroy(kgem, bo); 6991 return NULL; 6992 } 6993 6994 memcpy_blt(data, dst, bpp, 6995 stride, bo->pitch, 6996 box->x1, box->y1, 6997 0, 0, 6998 width, height); 6999 7000 sigtrap_put(); 7001 return bo; 7002} 7003 7004void kgem_proxy_bo_attach(struct kgem_bo *bo, 7005 struct kgem_bo **ptr) 7006{ 7007 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 7008 assert(bo->map__gtt == NULL); 7009 assert(bo->proxy); 7010 list_add(&bo->vma, &bo->proxy->vma); 7011 bo->map__gtt = ptr; 7012 *ptr = kgem_bo_reference(bo); 7013} 7014 7015void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) 7016{ 7017 struct kgem_buffer *bo; 7018 uint32_t offset = _bo->delta, length = _bo->size.bytes; 7019 7020 /* We expect the caller to have already submitted the batch */ 7021 assert(_bo->io); 7022 assert(_bo->exec == NULL); 7023 assert(_bo->rq == NULL); 7024 assert(_bo->proxy); 7025 7026 _bo = _bo->proxy; 7027 assert(_bo->proxy == NULL); 7028 assert(_bo->exec == NULL); 7029 7030 bo = (struct kgem_buffer *)_bo; 7031 7032 DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__, 7033 offset, length, bo->base.snoop)); 7034 7035 if (bo->mmapped) { 7036 struct drm_i915_gem_set_domain set_domain; 7037 7038 DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", 7039 __FUNCTION__, 7040 bo->base.needs_flush, 7041 bo->base.domain, 7042 __kgem_busy(kgem, bo->base.handle))); 7043 7044 assert(bo->mmapped == MMAPPED_GTT || bo->base.snoop || kgem->has_llc); 7045 7046 VG_CLEAR(set_domain); 7047 set_domain.handle = bo->base.handle; 7048 set_domain.write_domain = 0; 7049 set_domain.read_domains = 7050 bo->mmapped == MMAPPED_CPU ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT; 7051 7052 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 7053 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 7054 kgem_throttle(kgem); 7055 } 7056 } else { 7057 if (gem_read(kgem->fd, 7058 bo->base.handle, (char *)bo->mem+offset, 7059 offset, length)) 7060 return; 7061 } 7062 kgem_bo_maybe_retire(kgem, &bo->base); 7063 bo->base.domain = DOMAIN_NONE; 7064} 7065 7066uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) 7067{ 7068 struct kgem_bo_binding *b; 7069 7070 assert(bo->refcnt); 7071 7072 for (b = &bo->binding; b && b->offset; b = b->next) 7073 if (format == b->format) 7074 return b->offset; 7075 7076 return 0; 7077} 7078 7079void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) 7080{ 7081 struct kgem_bo_binding *b; 7082 7083 assert(bo->refcnt); 7084 7085 for (b = &bo->binding; b; b = b->next) { 7086 if (b->offset) 7087 continue; 7088 7089 b->offset = offset; 7090 b->format = format; 7091 7092 if (b->next) 7093 b->next->offset = 0; 7094 7095 return; 7096 } 7097 7098 b = malloc(sizeof(*b)); 7099 if (b) { 7100 b->next = bo->binding.next; 7101 b->format = format; 7102 b->offset = offset; 7103 bo->binding.next = b; 7104 } 7105} 7106 7107struct kgem_bo * 7108kgem_replace_bo(struct kgem *kgem, 7109 struct kgem_bo *src, 7110 uint32_t width, 7111 uint32_t height, 7112 uint32_t pitch, 7113 uint32_t bpp) 7114{ 7115 struct kgem_bo *dst; 7116 uint32_t br00, br13; 7117 uint32_t handle; 7118 uint32_t size; 7119 uint32_t *b; 7120 7121 DBG(("%s: replacing bo handle=%d, size=%dx%d pitch=%d, with pitch=%d\n", 7122 __FUNCTION__, src->handle, width, height, src->pitch, pitch)); 7123 7124 /* We only expect to be called to fixup small buffers, hence why 7125 * we only attempt to allocate a linear bo. 7126 */ 7127 assert(src->tiling == I915_TILING_NONE); 7128 assert(kgem_bo_can_blt(kgem, src)); 7129 7130 size = height * pitch; 7131 size = NUM_PAGES(size); 7132 7133 dst = search_linear_cache(kgem, size, 0); 7134 if (dst == NULL) 7135 dst = search_linear_cache(kgem, size, CREATE_INACTIVE); 7136 if (dst == NULL) { 7137 handle = gem_create(kgem->fd, size); 7138 if (handle == 0) 7139 return NULL; 7140 7141 dst = __kgem_bo_alloc(handle, size); 7142 if (dst == NULL) { 7143 gem_close(kgem->fd, handle); 7144 return NULL; 7145 } 7146 7147 debug_alloc__bo(kgem, dst); 7148 } 7149 dst->pitch = pitch; 7150 dst->unique_id = kgem_get_unique_id(kgem); 7151 dst->refcnt = 1; 7152 assert(dst->tiling == I915_TILING_NONE); 7153 assert(kgem_bo_can_blt(kgem, dst)); 7154 7155 kgem_set_mode(kgem, KGEM_BLT, dst); 7156 if (!kgem_check_batch(kgem, 10) || 7157 !kgem_check_reloc(kgem, 2) || 7158 !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { 7159 kgem_submit(kgem); 7160 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { 7161 kgem_bo_destroy(kgem, dst); 7162 return NULL; 7163 } 7164 _kgem_set_mode(kgem, KGEM_BLT); 7165 } 7166 7167 br00 = XY_SRC_COPY_BLT_CMD; 7168 br13 = pitch; 7169 pitch = src->pitch; 7170 if (kgem->gen >= 040 && src->tiling) { 7171 br00 |= BLT_SRC_TILED; 7172 pitch >>= 2; 7173 } 7174 7175 br13 |= 0xcc << 16; 7176 switch (bpp) { 7177 default: 7178 case 32: br00 |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 7179 br13 |= 1 << 25; /* RGB8888 */ 7180 case 16: br13 |= 1 << 24; /* RGB565 */ 7181 case 8: break; 7182 } 7183 7184 b = kgem->batch + kgem->nbatch; 7185 if (kgem->gen >= 0100) { 7186 b[0] = br00 | 8; 7187 b[1] = br13; 7188 b[2] = 0; 7189 b[3] = height << 16 | width; 7190 *(uint64_t *)(b+4) = 7191 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst, 7192 I915_GEM_DOMAIN_RENDER << 16 | 7193 I915_GEM_DOMAIN_RENDER | 7194 KGEM_RELOC_FENCED, 7195 0); 7196 b[6] = 0; 7197 b[7] = pitch; 7198 *(uint64_t *)(b+8) = 7199 kgem_add_reloc64(kgem, kgem->nbatch + 8, src, 7200 I915_GEM_DOMAIN_RENDER << 16 | 7201 KGEM_RELOC_FENCED, 7202 0); 7203 kgem->nbatch += 10; 7204 } else { 7205 b[0] = br00 | 6; 7206 b[1] = br13; 7207 b[2] = 0; 7208 b[3] = height << 16 | width; 7209 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst, 7210 I915_GEM_DOMAIN_RENDER << 16 | 7211 I915_GEM_DOMAIN_RENDER | 7212 KGEM_RELOC_FENCED, 7213 0); 7214 b[5] = 0; 7215 b[6] = pitch; 7216 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src, 7217 I915_GEM_DOMAIN_RENDER << 16 | 7218 KGEM_RELOC_FENCED, 7219 0); 7220 kgem->nbatch += 8; 7221 } 7222 7223 return dst; 7224} 7225 7226bool kgem_bo_convert_to_gpu(struct kgem *kgem, 7227 struct kgem_bo *bo, 7228 unsigned flags) 7229{ 7230 DBG(("%s: converting handle=%d from CPU to GPU, flags=%x\n", __FUNCTION__, bo->handle)); 7231 assert(bo->tiling == I915_TILING_NONE); 7232 7233 if (kgem->has_llc) 7234 return true; 7235 7236 if (flags & MOVE_ASYNC_HINT && __kgem_bo_is_busy(kgem, bo)) 7237 return false; 7238 7239 assert(bo->snoop); 7240 7241 kgem_bo_submit(kgem, bo); 7242 7243 if (!gem_set_caching(kgem->fd, bo->handle, UNCACHED)) 7244 return false; 7245 7246 bo->snoop = false; 7247 return true; 7248} 7249