1/* 2 * Copyright (c) 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Chris Wilson <chris@chris-wilson.co.uk> 25 * 26 */ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "sna.h" 33#include "sna_reg.h" 34 35#include <unistd.h> 36#include <sys/ioctl.h> 37#include <sys/mman.h> 38#include <sys/stat.h> 39#include <time.h> 40#include <sched.h> 41#include <errno.h> 42#include <fcntl.h> 43 44#include <xf86drm.h> 45 46#ifdef HAVE_VALGRIND 47#include <valgrind.h> 48#include <memcheck.h> 49#endif 50 51#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM 52#include <sys/sysinfo.h> 53#endif 54 55#include "sna_cpuid.h" 56 57static struct kgem_bo * 58search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); 59 60static struct kgem_bo * 61search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); 62 63#define DBG_NO_HW 0 64#define DBG_NO_EXEC 0 65#define DBG_NO_TILING 0 66#define DBG_NO_CACHE 0 67#define DBG_NO_SNOOP_CACHE 0 68#define DBG_NO_CACHE_LEVEL 0 69#define DBG_NO_CPU 0 70#define DBG_NO_CREATE2 0 71#define DBG_NO_USERPTR 0 72#define DBG_NO_UNSYNCHRONIZED_USERPTR 0 73#define DBG_NO_LLC 0 74#define DBG_NO_SEMAPHORES 0 75#define DBG_NO_MADV 0 76#define DBG_NO_UPLOAD_CACHE 0 77#define DBG_NO_UPLOAD_ACTIVE 0 78#define DBG_NO_MAP_UPLOAD 0 79#define DBG_NO_RELAXED_FENCING 0 80#define DBG_NO_SECURE_BATCHES 0 81#define DBG_NO_PINNED_BATCHES 0 82#define DBG_NO_SHRINK_BATCHES 0 83#define DBG_NO_FAST_RELOC 0 84#define DBG_NO_HANDLE_LUT 0 85#define DBG_NO_WT 0 86#define DBG_NO_WC_MMAP 0 87#define DBG_DUMP 0 88#define DBG_NO_MALLOC_CACHE 0 89 90#define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */ 91 92#ifndef DEBUG_SYNC 93#define DEBUG_SYNC 0 94#endif 95 96#define SHOW_BATCH_BEFORE 0 97#define SHOW_BATCH_AFTER 0 98 99#if !USE_WC_MMAP 100#undef DBG_NO_WC_MMAP 101#define DBG_NO_WC_MMAP 1 102#endif 103 104#if 0 105#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) 106#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__)) 107#else 108#define ASSERT_IDLE(kgem__, handle__) 109#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) 110#endif 111 112/* Worst case seems to be 965gm where we cannot write within a cacheline that 113 * is being simultaneously being read by the GPU, or within the sampler 114 * prefetch. In general, the chipsets seem to have a requirement that sampler 115 * offsets be aligned to a cacheline (64 bytes). 116 * 117 * Actually, it turns out the BLT color pattern (BR15) has the most severe 118 * alignment restrictions, 64 bytes for 8-bpp, 128 bytes for 16-bpp and 256 119 * bytes for 32-bpp. 120 */ 121#define UPLOAD_ALIGNMENT 256 122 123#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE) 124#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE) 125 126#define MAX_GTT_VMA_CACHE 512 127#define MAX_CPU_VMA_CACHE INT16_MAX 128#define MAP_PRESERVE_TIME 10 129 130#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) 131#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 1) 132 133#define LOCAL_I915_PARAM_HAS_BLT 11 134#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12 135#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15 136#define LOCAL_I915_PARAM_HAS_LLC 17 137#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 138#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 139#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 140#define LOCAL_I915_PARAM_HAS_NO_RELOC 25 141#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 142#define LOCAL_I915_PARAM_HAS_WT 27 143#define LOCAL_I915_PARAM_MMAP_VERSION 30 144 145#define LOCAL_I915_EXEC_IS_PINNED (1<<10) 146#define LOCAL_I915_EXEC_NO_RELOC (1<<11) 147#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) 148 149#define LOCAL_I915_GEM_CREATE2 0x34 150#define LOCAL_IOCTL_I915_GEM_CREATE2 DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CREATE2, struct local_i915_gem_create2) 151struct local_i915_gem_create2 { 152 uint64_t size; 153 uint32_t placement; 154#define LOCAL_I915_CREATE_PLACEMENT_SYSTEM 0 155#define LOCAL_I915_CREATE_PLACEMENT_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */ 156 uint32_t domain; 157 uint32_t caching; 158 uint32_t tiling_mode; 159 uint32_t stride; 160 uint32_t flags; 161 uint32_t pad; 162 uint32_t handle; 163}; 164 165#define LOCAL_I915_GEM_USERPTR 0x33 166#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) 167struct local_i915_gem_userptr { 168 uint64_t user_ptr; 169 uint64_t user_size; 170 uint32_t flags; 171#define I915_USERPTR_READ_ONLY 0x1 172#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 173 uint32_t handle; 174}; 175 176#define UNCACHED 0 177#define SNOOPED 1 178#define DISPLAY 2 179 180struct local_i915_gem_caching { 181 uint32_t handle; 182 uint32_t caching; 183}; 184 185#define LOCAL_I915_GEM_SET_CACHING 0x2f 186#define LOCAL_I915_GEM_GET_CACHING 0x30 187#define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching) 188#define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching) 189 190struct local_i915_gem_mmap2 { 191 uint32_t handle; 192 uint32_t pad; 193 uint64_t offset; 194 uint64_t size; 195 uint64_t addr_ptr; 196 uint64_t flags; 197#define I915_MMAP_WC 0x1 198}; 199#define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap2) 200 201struct kgem_buffer { 202 struct kgem_bo base; 203 void *mem; 204 uint32_t used; 205 uint32_t need_io : 1; 206 uint32_t write : 2; 207 uint32_t mmapped : 2; 208}; 209enum { 210 MMAPPED_NONE, 211 MMAPPED_GTT, 212 MMAPPED_CPU 213}; 214 215static struct kgem_bo *__kgem_freed_bo; 216static struct kgem_request *__kgem_freed_request; 217static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; 218 219static inline int bytes(struct kgem_bo *bo) 220{ 221 return __kgem_bo_size(bo); 222} 223 224#define bucket(B) (B)->size.pages.bucket 225#define num_pages(B) (B)->size.pages.count 226 227static int do_ioctl(int fd, unsigned long req, void *arg) 228{ 229 int err; 230 231restart: 232 if (ioctl(fd, req, arg) == 0) 233 return 0; 234 235 err = errno; 236 237 if (err == EINTR) 238 goto restart; 239 240 if (err == EAGAIN) { 241 sched_yield(); 242 goto restart; 243 } 244 245 return -err; 246} 247 248#ifdef DEBUG_MEMORY 249static void debug_alloc(struct kgem *kgem, size_t size) 250{ 251 kgem->debug_memory.bo_allocs++; 252 kgem->debug_memory.bo_bytes += size; 253} 254static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo) 255{ 256 debug_alloc(kgem, bytes(bo)); 257} 258#else 259#define debug_alloc__bo(k, b) 260#endif 261 262#ifndef NDEBUG 263static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) 264{ 265 struct drm_i915_gem_get_tiling tiling; 266 267 assert(bo); 268 269 VG_CLEAR(tiling); 270 tiling.handle = bo->handle; 271 tiling.tiling_mode = bo->tiling; 272 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling); 273 assert(tiling.tiling_mode == bo->tiling); 274} 275 276static void assert_cacheing(struct kgem *kgem, struct kgem_bo *bo) 277{ 278 struct local_i915_gem_caching arg; 279 int expect = kgem->has_llc ? SNOOPED : UNCACHED; 280 281 VG_CLEAR(arg); 282 arg.handle = bo->handle; 283 arg.caching = expect; 284 285 (void)do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &arg); 286 287 assert(arg.caching == expect); 288} 289 290static void assert_bo_retired(struct kgem_bo *bo) 291{ 292 DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, 293 bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); 294 assert(bo->refcnt); 295 assert(bo->rq == NULL); 296 assert(bo->exec == NULL); 297 assert(list_is_empty(&bo->request)); 298} 299#else 300#define assert_tiling(kgem, bo) 301#define assert_cacheing(kgem, bo) 302#define assert_bo_retired(bo) 303#endif 304 305static void 306__kgem_set_wedged(struct kgem *kgem) 307{ 308 kgem->wedged = true; 309 sna_render_mark_wedged(container_of(kgem, struct sna, kgem)); 310} 311 312static void kgem_sna_reset(struct kgem *kgem) 313{ 314 struct sna *sna = container_of(kgem, struct sna, kgem); 315 316 sna->render.reset(sna); 317 sna->blt_state.fill_bo = 0; 318} 319 320static void kgem_sna_flush(struct kgem *kgem) 321{ 322 struct sna *sna = container_of(kgem, struct sna, kgem); 323 324 sna->render.flush(sna); 325 326 if (sna->render.solid_cache.dirty) 327 sna_render_flush_solid(sna); 328} 329 330static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) 331{ 332 struct drm_i915_gem_set_tiling set_tiling; 333 int err; 334 335 if (DBG_NO_TILING) 336 return false; 337 338 VG_CLEAR(set_tiling); 339restart: 340 set_tiling.handle = handle; 341 set_tiling.tiling_mode = tiling; 342 set_tiling.stride = stride; 343 344 if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) 345 return true; 346 347 err = errno; 348 if (err == EINTR) 349 goto restart; 350 351 if (err == EAGAIN) { 352 sched_yield(); 353 goto restart; 354 } 355 356 return false; 357} 358 359static bool gem_set_caching(int fd, uint32_t handle, int caching) 360{ 361 struct local_i915_gem_caching arg; 362 363 VG_CLEAR(arg); 364 arg.handle = handle; 365 arg.caching = caching; 366 return do_ioctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0; 367} 368 369static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only) 370{ 371 struct local_i915_gem_userptr arg; 372 373 VG_CLEAR(arg); 374 arg.user_ptr = (uintptr_t)ptr; 375 arg.user_size = size; 376 arg.flags = I915_USERPTR_UNSYNCHRONIZED; 377 if (read_only) 378 arg.flags |= I915_USERPTR_READ_ONLY; 379 380 if (DBG_NO_UNSYNCHRONIZED_USERPTR || 381 do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { 382 arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED; 383 if (do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { 384 DBG(("%s: failed to map %p + %d bytes: %d\n", 385 __FUNCTION__, ptr, size, errno)); 386 return 0; 387 } 388 } 389 390 return arg.handle; 391} 392 393static bool __kgem_throttle(struct kgem *kgem, bool harder) 394{ 395 /* Let this be woken up by sigtimer so that we don't block here 396 * too much and completely starve X. We will sleep again shortly, 397 * and so catch up or detect the hang. 398 */ 399 do { 400 if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE) == 0) { 401 kgem->need_throttle = 0; 402 return false; 403 } 404 405 if (errno == EIO) 406 return true; 407 } while (harder); 408 409 return false; 410} 411 412static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags) 413{ 414 if (flags & CREATE_NO_RETIRE || !kgem->need_retire) { 415 DBG(("%s: not retiring\n", __FUNCTION__)); 416 return false; 417 } 418 419 if (kgem_retire(kgem)) 420 return true; 421 422 if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) { 423 DBG(("%s: not throttling\n", __FUNCTION__)); 424 return false; 425 } 426 427 __kgem_throttle(kgem, false); 428 return kgem_retire(kgem); 429} 430 431static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) 432{ 433 struct drm_i915_gem_mmap_gtt gtt; 434 void *ptr; 435 int err; 436 437 DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, 438 bo->handle, bytes(bo))); 439 440 VG_CLEAR(gtt); 441retry_gtt: 442 gtt.handle = bo->handle; 443 if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, >t))) { 444 assert(err != EINVAL); 445 446 (void)__kgem_throttle_retire(kgem, 0); 447 if (kgem_expire_cache(kgem)) 448 goto retry_gtt; 449 450 if (kgem_cleanup_cache(kgem)) 451 goto retry_gtt; 452 453 ERR(("%s: failed to retrieve GTT offset for handle=%d: %d\n", 454 __FUNCTION__, bo->handle, -err)); 455 return NULL; 456 } 457 458retry_mmap: 459 ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED, 460 kgem->fd, gtt.offset); 461 if (ptr == MAP_FAILED) { 462 err = errno; 463 assert(err != EINVAL); 464 465 if (__kgem_throttle_retire(kgem, 0)) 466 goto retry_mmap; 467 468 if (kgem_cleanup_cache(kgem)) 469 goto retry_mmap; 470 471 ERR(("%s: failed to mmap handle=%d, %d bytes, into GTT domain: %d\n", 472 __FUNCTION__, bo->handle, bytes(bo), err)); 473 ptr = NULL; 474 } 475 476 /* Cache this mapping to avoid the overhead of an 477 * excruciatingly slow GTT pagefault. This is more an 478 * issue with compositing managers which need to 479 * frequently flush CPU damage to their GPU bo. 480 */ 481 return bo->map__gtt = ptr; 482} 483 484static void *__kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo) 485{ 486 struct local_i915_gem_mmap2 wc; 487 int err; 488 489 DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, 490 bo->handle, bytes(bo))); 491 assert(kgem->has_wc_mmap); 492 493 VG_CLEAR(wc); 494 495retry_wc: 496 wc.handle = bo->handle; 497 wc.offset = 0; 498 wc.size = bytes(bo); 499 wc.flags = I915_MMAP_WC; 500 if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) { 501 assert(err != EINVAL); 502 503 if (__kgem_throttle_retire(kgem, 0)) 504 goto retry_wc; 505 506 if (kgem_cleanup_cache(kgem)) 507 goto retry_wc; 508 509 ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU(wc) domain: %d\n", 510 __FUNCTION__, bo->handle, bytes(bo), -err)); 511 return NULL; 512 } 513 514 VG(VALGRIND_MAKE_MEM_DEFINED(wc.addr_ptr, bytes(bo))); 515 516 DBG(("%s: caching CPU(wc) vma for %d\n", __FUNCTION__, bo->handle)); 517 return bo->map__wc = (void *)(uintptr_t)wc.addr_ptr; 518} 519 520static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) 521{ 522 struct drm_i915_gem_mmap mmap_arg; 523 int err; 524 525retry: 526 VG_CLEAR(mmap_arg); 527 mmap_arg.handle = bo->handle; 528 mmap_arg.offset = 0; 529 mmap_arg.size = bytes(bo); 530 mmap_arg.flags = 0; 531 if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) { 532 assert(err != EINVAL); 533 534 if (__kgem_throttle_retire(kgem, 0)) 535 goto retry; 536 537 if (kgem_cleanup_cache(kgem)) 538 goto retry; 539 540 ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n", 541 __FUNCTION__, bo->handle, bytes(bo), -err)); 542 return NULL; 543 } 544 545 VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); 546 547 DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); 548 return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr; 549} 550 551static int gem_write(int fd, uint32_t handle, 552 int offset, int length, 553 const void *src) 554{ 555 struct drm_i915_gem_pwrite pwrite; 556 557 DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, 558 handle, offset, length)); 559 560 VG_CLEAR(pwrite); 561 pwrite.handle = handle; 562 pwrite.offset = offset; 563 pwrite.size = length; 564 pwrite.data_ptr = (uintptr_t)src; 565 return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); 566} 567 568static int gem_write__cachealigned(int fd, uint32_t handle, 569 int offset, int length, 570 const void *src) 571{ 572 struct drm_i915_gem_pwrite pwrite; 573 574 DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, 575 handle, offset, length)); 576 577 VG_CLEAR(pwrite); 578 pwrite.handle = handle; 579 /* align the transfer to cachelines; fortuitously this is safe! */ 580 if ((offset | length) & 63) { 581 pwrite.offset = offset & ~63; 582 pwrite.size = ALIGN(offset+length, 64) - pwrite.offset; 583 pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset; 584 } else { 585 pwrite.offset = offset; 586 pwrite.size = length; 587 pwrite.data_ptr = (uintptr_t)src; 588 } 589 return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); 590} 591 592static int gem_read(int fd, uint32_t handle, const void *dst, 593 int offset, int length) 594{ 595 struct drm_i915_gem_pread pread; 596 int ret; 597 598 DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__, 599 handle, length)); 600 601 VG_CLEAR(pread); 602 pread.handle = handle; 603 pread.offset = offset; 604 pread.size = length; 605 pread.data_ptr = (uintptr_t)dst; 606 ret = do_ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread); 607 if (ret) { 608 DBG(("%s: failed, errno=%d\n", __FUNCTION__, -ret)); 609 return ret; 610 } 611 612 VG(VALGRIND_MAKE_MEM_DEFINED(dst, length)); 613 return 0; 614} 615 616bool __kgem_busy(struct kgem *kgem, int handle) 617{ 618 struct drm_i915_gem_busy busy; 619 620 VG_CLEAR(busy); 621 busy.handle = handle; 622 busy.busy = !kgem->wedged; 623 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 624 DBG(("%s: handle=%d, busy=%d, wedged=%d\n", 625 __FUNCTION__, handle, busy.busy, kgem->wedged)); 626 627 return busy.busy; 628} 629 630static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) 631{ 632 DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n", 633 __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL, 634 __kgem_busy(kgem, bo->handle))); 635 assert(bo->exec == NULL); 636 assert(list_is_empty(&bo->vma)); 637 638 if (bo->rq) { 639 __kgem_bo_clear_busy(bo); 640 kgem_retire(kgem); 641 assert_bo_retired(bo); 642 } else { 643 assert(bo->exec == NULL); 644 assert(list_is_empty(&bo->request)); 645 assert(!bo->needs_flush); 646 ASSERT_IDLE(kgem, bo->handle); 647 } 648} 649 650static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo) 651{ 652 DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n", 653 __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL, 654 __kgem_busy(kgem, bo->handle))); 655 assert(bo->exec == NULL); 656 assert(list_is_empty(&bo->vma)); 657 658 if (bo->rq) { 659 if (!__kgem_busy(kgem, bo->handle)) { 660 __kgem_bo_clear_busy(bo); 661 kgem_retire(kgem); 662 } 663 } else { 664 assert(!bo->needs_flush); 665 ASSERT_IDLE(kgem, bo->handle); 666 } 667} 668 669bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, 670 const void *data, int length) 671{ 672 void *ptr; 673 int err; 674 675 assert(bo->refcnt); 676 assert(bo->proxy == NULL); 677 ASSERT_IDLE(kgem, bo->handle); 678 679 assert(length <= bytes(bo)); 680retry: 681 ptr = NULL; 682 if (bo->domain == DOMAIN_CPU || (kgem->has_llc && !bo->scanout)) { 683 ptr = bo->map__cpu; 684 if (ptr == NULL) 685 ptr = __kgem_bo_map__cpu(kgem, bo); 686 } else if (kgem->has_wc_mmap) { 687 ptr = bo->map__wc; 688 if (ptr == NULL) 689 ptr = __kgem_bo_map__wc(kgem, bo); 690 } 691 if (ptr) { 692 /* XXX unsynchronized? */ 693 memcpy(ptr, data, length); 694 return true; 695 } 696 697 if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) { 698 assert(err != EINVAL); 699 700 (void)__kgem_throttle_retire(kgem, 0); 701 if (kgem_expire_cache(kgem)) 702 goto retry; 703 704 if (kgem_cleanup_cache(kgem)) 705 goto retry; 706 707 ERR(("%s: failed to write %d bytes into BO handle=%d: %d\n", 708 __FUNCTION__, length, bo->handle, -err)); 709 return false; 710 } 711 712 DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain)); 713 if (bo->exec == NULL) 714 kgem_bo_maybe_retire(kgem, bo); 715 bo->domain = DOMAIN_NONE; 716 bo->gtt_dirty = true; 717 return true; 718} 719 720static uint32_t gem_create(int fd, int num_pages) 721{ 722 struct drm_i915_gem_create create; 723 724 VG_CLEAR(create); 725 create.handle = 0; 726 create.size = PAGE_SIZE * num_pages; 727 (void)do_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); 728 729 return create.handle; 730} 731 732static bool 733kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo) 734{ 735#if DBG_NO_MADV 736 return true; 737#else 738 struct drm_i915_gem_madvise madv; 739 740 assert(bo->exec == NULL); 741 assert(!bo->purged); 742 743 VG_CLEAR(madv); 744 madv.handle = bo->handle; 745 madv.madv = I915_MADV_DONTNEED; 746 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { 747 bo->purged = 1; 748 kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; 749 return madv.retained; 750 } 751 752 return true; 753#endif 754} 755 756static bool 757kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo) 758{ 759#if DBG_NO_MADV 760 return true; 761#else 762 struct drm_i915_gem_madvise madv; 763 764 if (!bo->purged) 765 return true; 766 767 VG_CLEAR(madv); 768 madv.handle = bo->handle; 769 madv.madv = I915_MADV_DONTNEED; 770 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) 771 return madv.retained; 772 773 return false; 774#endif 775} 776 777static bool 778kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo) 779{ 780#if DBG_NO_MADV 781 return true; 782#else 783 struct drm_i915_gem_madvise madv; 784 785 assert(bo->purged); 786 787 VG_CLEAR(madv); 788 madv.handle = bo->handle; 789 madv.madv = I915_MADV_WILLNEED; 790 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { 791 bo->purged = !madv.retained; 792 kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; 793 return madv.retained; 794 } 795 796 return false; 797#endif 798} 799 800static void gem_close(int fd, uint32_t handle) 801{ 802 struct drm_gem_close close; 803 804 VG_CLEAR(close); 805 close.handle = handle; 806 (void)do_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); 807} 808 809constant inline static unsigned long __fls(unsigned long word) 810{ 811#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__)) 812 asm("bsr %1,%0" 813 : "=r" (word) 814 : "rm" (word)); 815 return word; 816#else 817 unsigned int v = 0; 818 819 while (word >>= 1) 820 v++; 821 822 return v; 823#endif 824} 825 826constant inline static int cache_bucket(int num_pages) 827{ 828 return __fls(num_pages); 829} 830 831static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, 832 int handle, int num_pages) 833{ 834 DBG(("%s(handle=%d, num_pages=%d)\n", __FUNCTION__, handle, num_pages)); 835 836 assert(num_pages); 837 memset(bo, 0, sizeof(*bo)); 838 839 bo->refcnt = 1; 840 bo->handle = handle; 841 bo->target_handle = -1; 842 num_pages(bo) = num_pages; 843 bucket(bo) = cache_bucket(num_pages); 844 bo->reusable = true; 845 bo->domain = DOMAIN_CPU; 846 list_init(&bo->request); 847 list_init(&bo->list); 848 list_init(&bo->vma); 849 850 return bo; 851} 852 853static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages) 854{ 855 struct kgem_bo *bo; 856 857 if (__kgem_freed_bo) { 858 bo = __kgem_freed_bo; 859 __kgem_freed_bo = *(struct kgem_bo **)bo; 860 } else { 861 bo = malloc(sizeof(*bo)); 862 if (bo == NULL) 863 return NULL; 864 } 865 866 return __kgem_bo_init(bo, handle, num_pages); 867} 868 869static struct kgem_request *__kgem_request_alloc(struct kgem *kgem) 870{ 871 struct kgem_request *rq; 872 873 rq = __kgem_freed_request; 874 if (rq) { 875 __kgem_freed_request = *(struct kgem_request **)rq; 876 } else { 877 rq = malloc(sizeof(*rq)); 878 if (rq == NULL) 879 rq = &kgem->static_request; 880 } 881 882 list_init(&rq->buffers); 883 rq->bo = NULL; 884 rq->ring = 0; 885 886 return rq; 887} 888 889static void __kgem_request_free(struct kgem_request *rq) 890{ 891 _list_del(&rq->list); 892 if (DBG_NO_MALLOC_CACHE) { 893 free(rq); 894 } else { 895 *(struct kgem_request **)rq = __kgem_freed_request; 896 __kgem_freed_request = rq; 897 } 898} 899 900static struct list *inactive(struct kgem *kgem, int num_pages) 901{ 902 assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE); 903 assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS); 904 return &kgem->inactive[cache_bucket(num_pages)]; 905} 906 907static struct list *active(struct kgem *kgem, int num_pages, int tiling) 908{ 909 assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE); 910 assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS); 911 return &kgem->active[cache_bucket(num_pages)][tiling]; 912} 913 914static size_t 915agp_aperture_size(struct pci_device *dev, unsigned gen) 916{ 917 /* XXX assume that only future chipsets are unknown and follow 918 * the post gen2 PCI layout. 919 */ 920 return dev->regions[gen < 030 ? 0 : 2].size; 921} 922 923static size_t 924total_ram_size(void) 925{ 926#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM 927 struct sysinfo info; 928 if (sysinfo(&info) == 0) 929 return info.totalram * info.mem_unit; 930#endif 931 932#ifdef _SC_PHYS_PAGES 933 return sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); 934#endif 935 936 return 0; 937} 938 939static unsigned 940cpu_cache_size__cpuid4(void) 941{ 942 /* Deterministic Cache Parameters (Function 04h)": 943 * When EAX is initialized to a value of 4, the CPUID instruction 944 * returns deterministic cache information in the EAX, EBX, ECX 945 * and EDX registers. This function requires ECX be initialized 946 * with an index which indicates which cache to return information 947 * about. The OS is expected to call this function (CPUID.4) with 948 * ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches. 949 * The order in which the caches are returned is not specified 950 * and may change at Intel's discretion. 951 * 952 * Calculating the Cache Size in bytes: 953 * = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1) 954 */ 955 956 unsigned int eax, ebx, ecx, edx; 957 unsigned int llc_size = 0; 958 int cnt; 959 960 if (__get_cpuid_max(BASIC_CPUID, NULL) < 4) 961 return 0; 962 963 cnt = 0; 964 do { 965 unsigned associativity, line_partitions, line_size, sets; 966 967 __cpuid_count(4, cnt++, eax, ebx, ecx, edx); 968 969 if ((eax & 0x1f) == 0) 970 break; 971 972 associativity = ((ebx >> 22) & 0x3ff) + 1; 973 line_partitions = ((ebx >> 12) & 0x3ff) + 1; 974 line_size = (ebx & 0xfff) + 1; 975 sets = ecx + 1; 976 977 llc_size = associativity * line_partitions * line_size * sets; 978 } while (1); 979 980 return llc_size; 981} 982 983static unsigned 984cpu_cache_size(void) 985{ 986 unsigned size; 987 FILE *file; 988 989 size = cpu_cache_size__cpuid4(); 990 if (size) 991 return size; 992 993 file = fopen("/proc/cpuinfo", "r"); 994 if (file) { 995 size_t len = 0; 996 char *line = NULL; 997 while (getline(&line, &len, file) != -1) { 998 int kb; 999 if (sscanf(line, "cache size : %d KB", &kb) == 1) { 1000 /* Paranoid check against gargantuan caches */ 1001 if (kb <= 1<<20) 1002 size = kb * 1024; 1003 break; 1004 } 1005 } 1006 free(line); 1007 fclose(file); 1008 } 1009 1010 if (size == 0) 1011 size = 64 * 1024; 1012 1013 return size; 1014} 1015 1016static int gem_param(struct kgem *kgem, int name) 1017{ 1018 drm_i915_getparam_t gp; 1019 int v = -1; /* No param uses the sign bit, reserve it for errors */ 1020 1021 VG_CLEAR(gp); 1022 gp.param = name; 1023 gp.value = &v; 1024 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp)) 1025 return -1; 1026 1027 VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); 1028 return v; 1029} 1030 1031static bool test_has_execbuffer2(struct kgem *kgem) 1032{ 1033 struct drm_i915_gem_execbuffer2 execbuf; 1034 1035 memset(&execbuf, 0, sizeof(execbuf)); 1036 execbuf.buffer_count = 1; 1037 1038 return do_ioctl(kgem->fd, 1039 DRM_IOCTL_I915_GEM_EXECBUFFER2, 1040 &execbuf) == -EFAULT; 1041} 1042 1043static bool test_has_no_reloc(struct kgem *kgem) 1044{ 1045 if (DBG_NO_FAST_RELOC) 1046 return false; 1047 1048 return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; 1049} 1050 1051static bool test_has_handle_lut(struct kgem *kgem) 1052{ 1053 if (DBG_NO_HANDLE_LUT) 1054 return false; 1055 1056 return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; 1057} 1058 1059static bool test_has_wt(struct kgem *kgem) 1060{ 1061 if (DBG_NO_WT) 1062 return false; 1063 1064 return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0; 1065} 1066 1067static bool test_has_semaphores_enabled(struct kgem *kgem) 1068{ 1069 FILE *file; 1070 bool detected = false; 1071 int ret; 1072 1073 if (DBG_NO_SEMAPHORES) 1074 return false; 1075 1076 ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES); 1077 if (ret != -1) 1078 return ret > 0; 1079 1080 file = fopen("/sys/module/i915/parameters/semaphores", "r"); 1081 if (file) { 1082 int value; 1083 if (fscanf(file, "%d", &value) == 1) 1084 detected = value != 0; 1085 fclose(file); 1086 } 1087 1088 return detected; 1089} 1090 1091static bool is_hw_supported(struct kgem *kgem, 1092 struct pci_device *dev) 1093{ 1094 if (DBG_NO_HW) 1095 return false; 1096 1097 if (!test_has_execbuffer2(kgem)) 1098 return false; 1099 1100 if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */ 1101 return kgem->has_blt; 1102 1103 /* Although pre-855gm the GMCH is fubar, it works mostly. So 1104 * let the user decide through "NoAccel" whether or not to risk 1105 * hw acceleration. 1106 */ 1107 1108 if (kgem->gen == 060 && dev && dev->revision < 8) { 1109 /* pre-production SNB with dysfunctional BLT */ 1110 return false; 1111 } 1112 1113 if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */ 1114 return kgem->has_blt; 1115 1116 return true; 1117} 1118 1119static bool test_has_relaxed_fencing(struct kgem *kgem) 1120{ 1121 if (kgem->gen < 040) { 1122 if (DBG_NO_RELAXED_FENCING) 1123 return false; 1124 1125 return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0; 1126 } else 1127 return true; 1128} 1129 1130static bool test_has_llc(struct kgem *kgem) 1131{ 1132 int has_llc = -1; 1133 1134 if (DBG_NO_LLC) 1135 return false; 1136 1137 has_llc = gem_param(kgem, LOCAL_I915_PARAM_HAS_LLC); 1138 if (has_llc == -1) { 1139 DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); 1140 has_llc = kgem->gen >= 060; 1141 } 1142 1143 return has_llc; 1144} 1145 1146static bool test_has_wc_mmap(struct kgem *kgem) 1147{ 1148 struct local_i915_gem_mmap2 wc; 1149 bool ret; 1150 1151 if (DBG_NO_WC_MMAP) 1152 return false; 1153 1154 if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1) 1155 return false; 1156 1157 VG_CLEAR(wc); 1158 wc.handle = gem_create(kgem->fd, 1); 1159 wc.offset = 0; 1160 wc.size = 4096; 1161 wc.flags = I915_MMAP_WC; 1162 ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc) == 0; 1163 gem_close(kgem->fd, wc.handle); 1164 1165 return ret; 1166} 1167 1168static bool test_has_caching(struct kgem *kgem) 1169{ 1170 uint32_t handle; 1171 bool ret; 1172 1173 if (DBG_NO_CACHE_LEVEL) 1174 return false; 1175 1176 /* Incoherent blt and sampler hangs the GPU */ 1177 if (kgem->gen == 040) 1178 return false; 1179 1180 handle = gem_create(kgem->fd, 1); 1181 if (handle == 0) 1182 return false; 1183 1184 ret = gem_set_caching(kgem->fd, handle, UNCACHED); 1185 gem_close(kgem->fd, handle); 1186 return ret; 1187} 1188 1189static bool test_has_userptr(struct kgem *kgem) 1190{ 1191 uint32_t handle; 1192 void *ptr; 1193 1194 if (DBG_NO_USERPTR) 1195 return false; 1196 1197 /* Incoherent blt and sampler hangs the GPU */ 1198 if (kgem->gen == 040) 1199 return false; 1200 1201 if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE)) 1202 return false; 1203 1204 handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); 1205 gem_close(kgem->fd, handle); 1206 free(ptr); 1207 1208 return handle != 0; 1209} 1210 1211static bool test_has_create2(struct kgem *kgem) 1212{ 1213#if defined(USE_CREATE2) 1214 struct local_i915_gem_create2 args; 1215 1216 if (DBG_NO_CREATE2) 1217 return false; 1218 1219 memset(&args, 0, sizeof(args)); 1220 args.size = PAGE_SIZE; 1221 args.caching = DISPLAY; 1222 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0) 1223 gem_close(kgem->fd, args.handle); 1224 1225 return args.handle != 0; 1226#else 1227 return false; 1228#endif 1229} 1230 1231static bool test_has_secure_batches(struct kgem *kgem) 1232{ 1233 if (DBG_NO_SECURE_BATCHES) 1234 return false; 1235 1236 return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; 1237} 1238 1239static bool test_has_pinned_batches(struct kgem *kgem) 1240{ 1241 if (DBG_NO_PINNED_BATCHES) 1242 return false; 1243 1244 return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; 1245} 1246 1247static int kgem_get_screen_index(struct kgem *kgem) 1248{ 1249 struct sna *sna = container_of(kgem, struct sna, kgem); 1250 return sna->scrn->scrnIndex; 1251} 1252 1253static int __find_debugfs(struct kgem *kgem) 1254{ 1255 int i; 1256 1257 for (i = 0; i < DRM_MAX_MINOR; i++) { 1258 char path[80]; 1259 1260 sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i); 1261 if (access(path, R_OK) == 0) 1262 return i; 1263 1264 sprintf(path, "/debug/dri/%d/i915_wedged", i); 1265 if (access(path, R_OK) == 0) 1266 return i; 1267 } 1268 1269 return -1; 1270} 1271 1272static int kgem_get_minor(struct kgem *kgem) 1273{ 1274 struct stat st; 1275 1276 if (fstat(kgem->fd, &st)) 1277 return __find_debugfs(kgem); 1278 1279 if (!S_ISCHR(st.st_mode)) 1280 return __find_debugfs(kgem); 1281 1282 return st.st_rdev & 0x63; 1283} 1284 1285static bool kgem_init_pinned_batches(struct kgem *kgem) 1286{ 1287 int count[2] = { 16, 4 }; 1288 int size[2] = { 1, 4 }; 1289 int n, i; 1290 1291 if (kgem->wedged) 1292 return true; 1293 1294 for (n = 0; n < ARRAY_SIZE(count); n++) { 1295 for (i = 0; i < count[n]; i++) { 1296 struct drm_i915_gem_pin pin; 1297 struct kgem_bo *bo; 1298 1299 VG_CLEAR(pin); 1300 1301 pin.handle = gem_create(kgem->fd, size[n]); 1302 if (pin.handle == 0) 1303 goto err; 1304 1305 DBG(("%s: new handle=%d, num_pages=%d\n", 1306 __FUNCTION__, pin.handle, size[n])); 1307 1308 bo = __kgem_bo_alloc(pin.handle, size[n]); 1309 if (bo == NULL) { 1310 gem_close(kgem->fd, pin.handle); 1311 goto err; 1312 } 1313 1314 pin.alignment = 0; 1315 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) { 1316 gem_close(kgem->fd, pin.handle); 1317 free(bo); 1318 goto err; 1319 } 1320 bo->presumed_offset = pin.offset; 1321 debug_alloc__bo(kgem, bo); 1322 list_add(&bo->list, &kgem->pinned_batches[n]); 1323 } 1324 } 1325 1326 return true; 1327 1328err: 1329 for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) { 1330 while (!list_is_empty(&kgem->pinned_batches[n])) { 1331 kgem_bo_destroy(kgem, 1332 list_first_entry(&kgem->pinned_batches[n], 1333 struct kgem_bo, list)); 1334 } 1335 } 1336 1337 /* For simplicity populate the lists with a single unpinned bo */ 1338 for (n = 0; n < ARRAY_SIZE(count); n++) { 1339 struct kgem_bo *bo; 1340 uint32_t handle; 1341 1342 handle = gem_create(kgem->fd, size[n]); 1343 if (handle == 0) 1344 break; 1345 1346 bo = __kgem_bo_alloc(handle, size[n]); 1347 if (bo == NULL) { 1348 gem_close(kgem->fd, handle); 1349 break; 1350 } 1351 1352 debug_alloc__bo(kgem, bo); 1353 list_add(&bo->list, &kgem->pinned_batches[n]); 1354 } 1355 return false; 1356} 1357 1358static void kgem_init_swizzling(struct kgem *kgem) 1359{ 1360 struct local_i915_gem_get_tiling_v2 { 1361 uint32_t handle; 1362 uint32_t tiling_mode; 1363 uint32_t swizzle_mode; 1364 uint32_t phys_swizzle_mode; 1365 } tiling; 1366#define LOCAL_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct local_i915_gem_get_tiling_v2) 1367 1368 VG_CLEAR(tiling); 1369 tiling.handle = gem_create(kgem->fd, 1); 1370 if (!tiling.handle) 1371 return; 1372 1373 if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512)) 1374 goto out; 1375 1376 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_TILING, &tiling)) 1377 goto out; 1378 1379 if (kgem->gen < 50 && tiling.phys_swizzle_mode != tiling.swizzle_mode) 1380 goto out; 1381 1382 choose_memcpy_tiled_x(kgem, tiling.swizzle_mode); 1383out: 1384 gem_close(kgem->fd, tiling.handle); 1385} 1386 1387static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) 1388{ 1389 int n; 1390 1391 bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle; 1392 1393 assert(kgem->nreloc__self <= 256); 1394 if (kgem->nreloc__self == 0) 1395 return; 1396 1397 DBG(("%s: fixing up %d%s self-relocations to handle=%p, presumed-offset=%llx\n", 1398 __FUNCTION__, kgem->nreloc__self, 1399 kgem->nreloc__self == 256 ? "+" : "", 1400 bo->handle, (long long)bo->presumed_offset)); 1401 for (n = 0; n < kgem->nreloc__self; n++) { 1402 int i = kgem->reloc__self[n]; 1403 1404 assert(kgem->reloc[i].target_handle == ~0U); 1405 kgem->reloc[i].target_handle = bo->target_handle; 1406 kgem->reloc[i].presumed_offset = bo->presumed_offset; 1407 1408 if (kgem->reloc[i].read_domains == I915_GEM_DOMAIN_INSTRUCTION) { 1409 DBG(("%s: moving base of self-reloc[%d:%d] %d -> %d\n", 1410 __FUNCTION__, n, i, 1411 kgem->reloc[i].delta, 1412 kgem->reloc[i].delta - shrink)); 1413 1414 kgem->reloc[i].delta -= shrink; 1415 } 1416 kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = 1417 kgem->reloc[i].delta + bo->presumed_offset; 1418 } 1419 1420 if (n == 256) { 1421 for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { 1422 if (kgem->reloc[n].target_handle == ~0U) { 1423 kgem->reloc[n].target_handle = bo->target_handle; 1424 kgem->reloc[n].presumed_offset = bo->presumed_offset; 1425 1426 if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION) { 1427 DBG(("%s: moving base of reloc[%d] %d -> %d\n", 1428 __FUNCTION__, n, 1429 kgem->reloc[n].delta, 1430 kgem->reloc[n].delta - shrink)); 1431 kgem->reloc[n].delta -= shrink; 1432 } 1433 kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = 1434 kgem->reloc[n].delta + bo->presumed_offset; 1435 } 1436 } 1437 } 1438 1439 if (shrink) { 1440 DBG(("%s: shrinking by %d\n", __FUNCTION__, shrink)); 1441 for (n = 0; n < kgem->nreloc; n++) { 1442 if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) 1443 kgem->reloc[n].offset -= shrink; 1444 } 1445 } 1446} 1447 1448static struct kgem_bo *kgem_new_batch(struct kgem *kgem) 1449{ 1450 struct kgem_bo *last; 1451 unsigned flags; 1452 1453 last = kgem->batch_bo; 1454 if (last) { 1455 kgem_fixup_relocs(kgem, last, 0); 1456 kgem->batch = NULL; 1457 } 1458 1459 if (kgem->batch) { 1460 assert(last == NULL); 1461 return NULL; 1462 } 1463 1464 flags = CREATE_CPU_MAP | CREATE_NO_THROTTLE; 1465 if (!kgem->has_llc) 1466 flags |= CREATE_UNCACHED; 1467 1468 kgem->batch_bo = kgem_create_linear(kgem, 1469 sizeof(uint32_t)*kgem->batch_size, 1470 flags); 1471 if (kgem->batch_bo) 1472 kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo); 1473 if (kgem->batch == NULL) { 1474 DBG(("%s: unable to map batch bo, mallocing(size=%d)\n", 1475 __FUNCTION__, 1476 sizeof(uint32_t)*kgem->batch_size)); 1477 if (kgem->batch_bo) { 1478 kgem_bo_destroy(kgem, kgem->batch_bo); 1479 kgem->batch_bo = NULL; 1480 } 1481 1482 if (posix_memalign((void **)&kgem->batch, PAGE_SIZE, 1483 ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) { 1484 ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__)); 1485 __kgem_set_wedged(kgem); 1486 } 1487 } else { 1488 DBG(("%s: allocated and mapped batch handle=%d [size=%d]\n", 1489 __FUNCTION__, kgem->batch_bo->handle, 1490 sizeof(uint32_t)*kgem->batch_size)); 1491 kgem_bo_sync__cpu(kgem, kgem->batch_bo); 1492 } 1493 1494 DBG(("%s: using last batch handle=%d\n", 1495 __FUNCTION__, last ? last->handle : 0)); 1496 return last; 1497} 1498 1499void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) 1500{ 1501 struct drm_i915_gem_get_aperture aperture; 1502 size_t totalram; 1503 unsigned half_gpu_max; 1504 unsigned int i, j; 1505 1506 DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); 1507 1508 kgem->fd = fd; 1509 kgem->gen = gen; 1510 1511 list_init(&kgem->requests[0]); 1512 list_init(&kgem->requests[1]); 1513 list_init(&kgem->batch_buffers); 1514 list_init(&kgem->active_buffers); 1515 list_init(&kgem->flushing); 1516 list_init(&kgem->large); 1517 list_init(&kgem->large_inactive); 1518 list_init(&kgem->snoop); 1519 list_init(&kgem->scanout); 1520 for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++) 1521 list_init(&kgem->pinned_batches[i]); 1522 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) 1523 list_init(&kgem->inactive[i]); 1524 for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { 1525 for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) 1526 list_init(&kgem->active[i][j]); 1527 } 1528 for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { 1529 for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) 1530 list_init(&kgem->vma[i].inactive[j]); 1531 } 1532 kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; 1533 kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; 1534 1535 kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; 1536 DBG(("%s: has BLT ring? %d\n", __FUNCTION__, 1537 kgem->has_blt)); 1538 1539 kgem->has_relaxed_delta = 1540 gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; 1541 DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, 1542 kgem->has_relaxed_delta)); 1543 1544 kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); 1545 DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, 1546 kgem->has_relaxed_fencing)); 1547 1548 kgem->has_llc = test_has_llc(kgem); 1549 DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__, 1550 kgem->has_llc)); 1551 1552 kgem->has_wt = test_has_wt(kgem); 1553 DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__, 1554 kgem->has_wt)); 1555 1556 kgem->has_wc_mmap = test_has_wc_mmap(kgem); 1557 DBG(("%s: has wc-mmapping? %d\n", __FUNCTION__, 1558 kgem->has_wc_mmap)); 1559 1560 kgem->has_caching = test_has_caching(kgem); 1561 DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, 1562 kgem->has_caching)); 1563 1564 kgem->has_userptr = test_has_userptr(kgem); 1565 DBG(("%s: has userptr? %d\n", __FUNCTION__, 1566 kgem->has_userptr)); 1567 1568 kgem->has_create2 = test_has_create2(kgem); 1569 DBG(("%s: has create2? %d\n", __FUNCTION__, 1570 kgem->has_create2)); 1571 1572 kgem->has_no_reloc = test_has_no_reloc(kgem); 1573 DBG(("%s: has no-reloc? %d\n", __FUNCTION__, 1574 kgem->has_no_reloc)); 1575 1576 kgem->has_handle_lut = test_has_handle_lut(kgem); 1577 DBG(("%s: has handle-lut? %d\n", __FUNCTION__, 1578 kgem->has_handle_lut)); 1579 1580 kgem->has_semaphores = false; 1581 if (kgem->has_blt && test_has_semaphores_enabled(kgem)) 1582 kgem->has_semaphores = true; 1583 DBG(("%s: semaphores enabled? %d\n", __FUNCTION__, 1584 kgem->has_semaphores)); 1585 1586 kgem->can_blt_cpu = gen >= 030; 1587 DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, 1588 kgem->can_blt_cpu)); 1589 1590 kgem->can_render_y = gen != 021 && (gen >> 3) != 4; 1591 DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__, 1592 kgem->can_render_y)); 1593 1594 kgem->has_secure_batches = test_has_secure_batches(kgem); 1595 DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, 1596 kgem->has_secure_batches)); 1597 1598 kgem->has_pinned_batches = test_has_pinned_batches(kgem); 1599 DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, 1600 kgem->has_pinned_batches)); 1601 1602 if (!is_hw_supported(kgem, dev)) { 1603 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, 1604 "Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); 1605 __kgem_set_wedged(kgem); 1606 } else if (__kgem_throttle(kgem, false)) { 1607 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, 1608 "Detected a hung GPU, disabling acceleration.\n"); 1609 __kgem_set_wedged(kgem); 1610 } 1611 1612 kgem->batch_size = UINT16_MAX & ~7; 1613 if (gen == 020 && !kgem->has_pinned_batches) 1614 /* Limited to what we can pin */ 1615 kgem->batch_size = 4*1024; 1616 if (gen == 022) 1617 /* 865g cannot handle a batch spanning multiple pages */ 1618 kgem->batch_size = PAGE_SIZE / sizeof(uint32_t); 1619 if (gen >= 070) 1620 kgem->batch_size = 16*1024; 1621 if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) 1622 kgem->batch_size = 4*1024; 1623 1624 if (!kgem_init_pinned_batches(kgem) && gen == 020) { 1625 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, 1626 "Unable to reserve memory for GPU, disabling acceleration.\n"); 1627 __kgem_set_wedged(kgem); 1628 } 1629 1630 DBG(("%s: maximum batch size? %d\n", __FUNCTION__, 1631 kgem->batch_size)); 1632 kgem_new_batch(kgem); 1633 1634 kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; 1635 DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n", 1636 __FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages)); 1637 1638 kgem->next_request = __kgem_request_alloc(kgem); 1639 1640 DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, 1641 !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching), 1642 kgem->has_llc, kgem->has_caching, kgem->has_userptr)); 1643 1644 VG_CLEAR(aperture); 1645 aperture.aper_size = 0; 1646 (void)do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 1647 if (aperture.aper_size == 0) 1648 aperture.aper_size = 64*1024*1024; 1649 1650 DBG(("%s: aperture size %lld, available now %lld\n", 1651 __FUNCTION__, 1652 (long long)aperture.aper_size, 1653 (long long)aperture.aper_available_size)); 1654 1655 kgem->aperture_total = aperture.aper_size; 1656 kgem->aperture_high = aperture.aper_size * 3/4; 1657 kgem->aperture_low = aperture.aper_size * 1/3; 1658 if (gen < 033) { 1659 /* Severe alignment penalties */ 1660 kgem->aperture_high /= 2; 1661 kgem->aperture_low /= 2; 1662 } 1663 DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, 1664 kgem->aperture_low, kgem->aperture_low / (1024*1024), 1665 kgem->aperture_high, kgem->aperture_high / (1024*1024))); 1666 1667 kgem->aperture_mappable = 256 * 1024 * 1024; 1668 if (dev != NULL) 1669 kgem->aperture_mappable = agp_aperture_size(dev, gen); 1670 if (kgem->aperture_mappable == 0 || 1671 kgem->aperture_mappable > aperture.aper_size) 1672 kgem->aperture_mappable = aperture.aper_size; 1673 DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, 1674 kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); 1675 1676 kgem->aperture_fenceable = MIN(256*1024*1024, kgem->aperture_mappable); 1677 DBG(("%s: aperture fenceable=%d [%d MiB]\n", __FUNCTION__, 1678 kgem->aperture_fenceable, kgem->aperture_fenceable / (1024*1024))); 1679 1680 kgem->buffer_size = 64 * 1024; 1681 while (kgem->buffer_size < kgem->aperture_mappable >> 10) 1682 kgem->buffer_size *= 2; 1683 if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages) 1684 kgem->buffer_size = kgem->half_cpu_cache_pages << 12; 1685 kgem->buffer_size = 1 << __fls(kgem->buffer_size); 1686 DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, 1687 kgem->buffer_size, kgem->buffer_size / 1024)); 1688 assert(kgem->buffer_size); 1689 1690 kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10; 1691 kgem->max_gpu_size = kgem->max_object_size; 1692 if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE) 1693 kgem->max_gpu_size = MAX_CACHE_SIZE; 1694 1695 totalram = total_ram_size(); 1696 if (totalram == 0) { 1697 DBG(("%s: total ram size unknown, assuming maximum of total aperture\n", 1698 __FUNCTION__)); 1699 totalram = kgem->aperture_total; 1700 } 1701 DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); 1702 if (kgem->max_object_size > totalram / 2) 1703 kgem->max_object_size = totalram / 2; 1704 if (kgem->max_gpu_size > totalram / 4) 1705 kgem->max_gpu_size = totalram / 4; 1706 1707 if (kgem->aperture_high > totalram / 2) { 1708 kgem->aperture_high = totalram / 2; 1709 kgem->aperture_low = kgem->aperture_high / 4; 1710 DBG(("%s: reduced aperture watermaks to fit into ram; low=%d [%d], high=%d [%d]\n", __FUNCTION__, 1711 kgem->aperture_low, kgem->aperture_low / (1024*1024), 1712 kgem->aperture_high, kgem->aperture_high / (1024*1024))); 1713 } 1714 1715 kgem->max_cpu_size = kgem->max_object_size; 1716 1717 half_gpu_max = kgem->max_gpu_size / 2; 1718 kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2; 1719 if (kgem->max_copy_tile_size > half_gpu_max) 1720 kgem->max_copy_tile_size = half_gpu_max; 1721 1722 if (kgem->has_llc) 1723 kgem->max_upload_tile_size = kgem->max_copy_tile_size; 1724 else 1725 kgem->max_upload_tile_size = kgem->aperture_fenceable / 4; 1726 if (kgem->max_upload_tile_size > half_gpu_max) 1727 kgem->max_upload_tile_size = half_gpu_max; 1728 if (kgem->max_upload_tile_size > kgem->aperture_high/2) 1729 kgem->max_upload_tile_size = kgem->aperture_high/2; 1730 if (kgem->max_upload_tile_size > kgem->aperture_low) 1731 kgem->max_upload_tile_size = kgem->aperture_low; 1732 if (kgem->max_upload_tile_size < 16*PAGE_SIZE) 1733 kgem->max_upload_tile_size = 16*PAGE_SIZE; 1734 1735 kgem->large_object_size = MAX_CACHE_SIZE; 1736 if (kgem->large_object_size > half_gpu_max) 1737 kgem->large_object_size = half_gpu_max; 1738 if (kgem->max_copy_tile_size > kgem->aperture_high/2) 1739 kgem->max_copy_tile_size = kgem->aperture_high/2; 1740 if (kgem->max_copy_tile_size > kgem->aperture_low) 1741 kgem->max_copy_tile_size = kgem->aperture_low; 1742 if (kgem->max_copy_tile_size < 16*PAGE_SIZE) 1743 kgem->max_copy_tile_size = 16*PAGE_SIZE; 1744 1745 if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) { 1746 if (kgem->large_object_size > kgem->max_cpu_size) 1747 kgem->large_object_size = kgem->max_cpu_size; 1748 } else 1749 kgem->max_cpu_size = 0; 1750 if (DBG_NO_CPU) 1751 kgem->max_cpu_size = 0; 1752 1753 DBG(("%s: maximum object size=%d\n", 1754 __FUNCTION__, kgem->max_object_size)); 1755 DBG(("%s: large object thresold=%d\n", 1756 __FUNCTION__, kgem->large_object_size)); 1757 DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", 1758 __FUNCTION__, 1759 kgem->max_gpu_size, kgem->max_cpu_size, 1760 kgem->max_upload_tile_size, kgem->max_copy_tile_size)); 1761 1762 /* Convert the aperture thresholds to pages */ 1763 kgem->aperture_mappable /= PAGE_SIZE; 1764 kgem->aperture_fenceable /= PAGE_SIZE; 1765 kgem->aperture_low /= PAGE_SIZE; 1766 kgem->aperture_high /= PAGE_SIZE; 1767 kgem->aperture_total /= PAGE_SIZE; 1768 1769 kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; 1770 if ((int)kgem->fence_max < 0) 1771 kgem->fence_max = 5; /* minimum safe value for all hw */ 1772 DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); 1773 1774 kgem->batch_flags_base = 0; 1775 if (kgem->has_no_reloc) 1776 kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC; 1777 if (kgem->has_handle_lut) 1778 kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; 1779 if (kgem->has_pinned_batches) 1780 kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; 1781 1782 kgem_init_swizzling(kgem); 1783} 1784 1785/* XXX hopefully a good approximation */ 1786static uint32_t kgem_get_unique_id(struct kgem *kgem) 1787{ 1788 uint32_t id; 1789 id = ++kgem->unique_id; 1790 if (id == 0) 1791 id = ++kgem->unique_id; 1792 return id; 1793} 1794 1795inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags) 1796{ 1797 if (flags & CREATE_PRIME) 1798 return 256; 1799 if (flags & CREATE_SCANOUT) 1800 return 64; 1801 if (kgem->gen >= 0100) 1802 return 32; 1803 return 8; 1804} 1805 1806void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch, 1807 int *tile_width, int *tile_height, int *tile_size) 1808{ 1809 if (kgem->gen <= 030) { 1810 if (tiling) { 1811 if (kgem->gen < 030) { 1812 *tile_width = 128; 1813 *tile_height = 16; 1814 *tile_size = 2048; 1815 } else { 1816 *tile_width = 512; 1817 *tile_height = 8; 1818 *tile_size = 4096; 1819 } 1820 } else { 1821 *tile_width = 1; 1822 *tile_height = 1; 1823 *tile_size = 1; 1824 } 1825 } else switch (tiling) { 1826 default: 1827 case I915_TILING_NONE: 1828 *tile_width = 1; 1829 *tile_height = 1; 1830 *tile_size = 1; 1831 break; 1832 case I915_TILING_X: 1833 *tile_width = 512; 1834 *tile_height = 8; 1835 *tile_size = 4096; 1836 break; 1837 case I915_TILING_Y: 1838 *tile_width = 128; 1839 *tile_height = 32; 1840 *tile_size = 4096; 1841 break; 1842 } 1843 1844 /* Force offset alignment to tile-row */ 1845 if (tiling && kgem->gen < 033) 1846 *tile_width = pitch; 1847} 1848 1849static uint32_t kgem_surface_size(struct kgem *kgem, 1850 bool relaxed_fencing, 1851 unsigned flags, 1852 uint32_t width, 1853 uint32_t height, 1854 uint32_t bpp, 1855 uint32_t tiling, 1856 uint32_t *pitch) 1857{ 1858 uint32_t tile_width, tile_height; 1859 uint32_t size; 1860 1861 assert(width <= MAXSHORT); 1862 assert(height <= MAXSHORT); 1863 assert(bpp >= 8); 1864 1865 if (kgem->gen <= 030) { 1866 if (tiling) { 1867 if (kgem->gen < 030) { 1868 tile_width = 128; 1869 tile_height = 16; 1870 } else { 1871 tile_width = 512; 1872 tile_height = 8; 1873 } 1874 } else { 1875 tile_width = 2 * bpp >> 3; 1876 tile_width = ALIGN(tile_width, 1877 kgem_pitch_alignment(kgem, flags)); 1878 tile_height = 1; 1879 } 1880 } else switch (tiling) { 1881 default: 1882 case I915_TILING_NONE: 1883 tile_width = 2 * bpp >> 3; 1884 tile_width = ALIGN(tile_width, 1885 kgem_pitch_alignment(kgem, flags)); 1886 tile_height = 1; 1887 break; 1888 1889 case I915_TILING_X: 1890 tile_width = 512; 1891 tile_height = 8; 1892 break; 1893 case I915_TILING_Y: 1894 tile_width = 128; 1895 tile_height = 32; 1896 break; 1897 } 1898 /* XXX align to an even tile row */ 1899 if (!kgem->has_relaxed_fencing) 1900 tile_height *= 2; 1901 1902 *pitch = ALIGN(width * bpp / 8, tile_width); 1903 height = ALIGN(height, tile_height); 1904 DBG(("%s: tile_width=%d, tile_height=%d => aligned pitch=%d, height=%d\n", 1905 __FUNCTION__, tile_width, tile_height, *pitch, height)); 1906 1907 if (kgem->gen >= 040) 1908 return PAGE_ALIGN(*pitch * height); 1909 1910 /* If it is too wide for the blitter, don't even bother. */ 1911 if (tiling != I915_TILING_NONE) { 1912 if (*pitch > 8192) { 1913 DBG(("%s: too wide for tiled surface (pitch=%d, limit=%d)\n", 1914 __FUNCTION__, *pitch, 8192)); 1915 return 0; 1916 } 1917 1918 for (size = tile_width; size < *pitch; size <<= 1) 1919 ; 1920 *pitch = size; 1921 } else { 1922 if (*pitch >= 32768) { 1923 DBG(("%s: too wide for linear surface (pitch=%d, limit=%d)\n", 1924 __FUNCTION__, *pitch, 32767)); 1925 return 0; 1926 } 1927 } 1928 1929 size = *pitch * height; 1930 if (relaxed_fencing || tiling == I915_TILING_NONE) 1931 return PAGE_ALIGN(size); 1932 1933 /* We need to allocate a pot fence region for a tiled buffer. */ 1934 if (kgem->gen < 030) 1935 tile_width = 512 * 1024; 1936 else 1937 tile_width = 1024 * 1024; 1938 while (tile_width < size) 1939 tile_width *= 2; 1940 return tile_width; 1941} 1942 1943bool kgem_check_surface_size(struct kgem *kgem, 1944 uint32_t width, 1945 uint32_t height, 1946 uint32_t bpp, 1947 uint32_t tiling, 1948 uint32_t pitch, 1949 uint32_t size) 1950{ 1951 uint32_t min_size, min_pitch; 1952 int tile_width, tile_height, tile_size; 1953 1954 DBG(("%s(width=%d, height=%d, bpp=%d, tiling=%d, pitch=%d, size=%d)\n", 1955 __FUNCTION__, width, height, bpp, tiling, pitch, size)); 1956 1957 if (pitch & 3) 1958 return false; 1959 1960 min_size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, 0, 1961 width, height, bpp, tiling, 1962 &min_pitch); 1963 1964 DBG(("%s: min_pitch=%d, min_size=%d\n", __FUNCTION__, min_pitch, min_size)); 1965 1966 if (size < min_size) 1967 return false; 1968 1969 if (pitch < min_pitch) 1970 return false; 1971 1972 kgem_get_tile_size(kgem, tiling, min_pitch, 1973 &tile_width, &tile_height, &tile_size); 1974 1975 DBG(("%s: tile_width=%d, tile_size=%d\n", __FUNCTION__, tile_width, tile_size)); 1976 if (pitch & (tile_width - 1)) 1977 return false; 1978 if (size & (tile_size - 1)) 1979 return false; 1980 1981 return true; 1982} 1983 1984static uint32_t kgem_aligned_height(struct kgem *kgem, 1985 uint32_t height, uint32_t tiling) 1986{ 1987 uint32_t tile_height; 1988 1989 if (kgem->gen <= 030) { 1990 tile_height = tiling ? kgem->gen < 030 ? 16 : 8 : 1; 1991 } else switch (tiling) { 1992 /* XXX align to an even tile row */ 1993 default: 1994 case I915_TILING_NONE: 1995 tile_height = 1; 1996 break; 1997 case I915_TILING_X: 1998 tile_height = 8; 1999 break; 2000 case I915_TILING_Y: 2001 tile_height = 32; 2002 break; 2003 } 2004 2005 /* XXX align to an even tile row */ 2006 if (!kgem->has_relaxed_fencing) 2007 tile_height *= 2; 2008 2009 return ALIGN(height, tile_height); 2010} 2011 2012static struct drm_i915_gem_exec_object2 * 2013kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) 2014{ 2015 struct drm_i915_gem_exec_object2 *exec; 2016 2017 DBG(("%s: handle=%d, index=%d\n", 2018 __FUNCTION__, bo->handle, kgem->nexec)); 2019 2020 assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); 2021 bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle; 2022 exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec)); 2023 exec->handle = bo->handle; 2024 exec->offset = bo->presumed_offset; 2025 2026 kgem->aperture += num_pages(bo); 2027 2028 return exec; 2029} 2030 2031static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) 2032{ 2033 assert(bo->refcnt); 2034 assert(bo->proxy == NULL); 2035 2036 bo->exec = kgem_add_handle(kgem, bo); 2037 bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring); 2038 2039 list_move_tail(&bo->request, &kgem->next_request->buffers); 2040 if (bo->io && !list_is_empty(&bo->list)) 2041 list_move(&bo->list, &kgem->batch_buffers); 2042 2043 /* XXX is it worth working around gcc here? */ 2044 kgem->flush |= bo->flush; 2045} 2046 2047static uint32_t kgem_end_batch(struct kgem *kgem) 2048{ 2049 kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END; 2050 if (kgem->nbatch & 1) 2051 kgem->batch[kgem->nbatch++] = MI_NOOP; 2052 2053 return kgem->nbatch; 2054} 2055 2056static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) 2057{ 2058 struct kgem_bo_binding *b; 2059 2060 b = bo->binding.next; 2061 while (b) { 2062 struct kgem_bo_binding *next = b->next; 2063 free(b); 2064 b = next; 2065 } 2066} 2067 2068static void kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo) 2069{ 2070 if (bo->scanout && bo->delta) { 2071 DBG(("%s: releasing fb=%d for handle=%d\n", 2072 __FUNCTION__, bo->delta, bo->handle)); 2073 /* XXX will leak if we are not DRM_MASTER. *shrug* */ 2074 do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta); 2075 bo->delta = 0; 2076 } 2077} 2078 2079static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) 2080{ 2081 DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); 2082 assert(bo->refcnt == 0); 2083 assert(bo->proxy == NULL); 2084 assert(bo->exec == NULL); 2085 assert(!bo->snoop || bo->rq == NULL); 2086 2087#ifdef DEBUG_MEMORY 2088 kgem->debug_memory.bo_allocs--; 2089 kgem->debug_memory.bo_bytes -= bytes(bo); 2090#endif 2091 2092 kgem_bo_binding_free(kgem, bo); 2093 kgem_bo_rmfb(kgem, bo); 2094 2095 if (IS_USER_MAP(bo->map__cpu)) { 2096 assert(bo->rq == NULL); 2097 assert(!__kgem_busy(kgem, bo->handle)); 2098 assert(MAP(bo->map__cpu) != bo || bo->io || bo->flush); 2099 if (!(bo->io || bo->flush)) { 2100 DBG(("%s: freeing snooped base\n", __FUNCTION__)); 2101 assert(bo != MAP(bo->map__cpu)); 2102 free(MAP(bo->map__cpu)); 2103 } 2104 bo->map__cpu = NULL; 2105 } 2106 2107 DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n", 2108 __FUNCTION__, bo->map__gtt, bo->map__cpu, 2109 bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count)); 2110 2111 if (!list_is_empty(&bo->vma)) { 2112 _list_del(&bo->vma); 2113 kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--; 2114 } 2115 2116 if (bo->map__gtt) 2117 munmap(bo->map__gtt, bytes(bo)); 2118 if (bo->map__wc) { 2119 VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo))); 2120 munmap(bo->map__wc, bytes(bo)); 2121 } 2122 if (bo->map__cpu) { 2123 VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo))); 2124 munmap(MAP(bo->map__cpu), bytes(bo)); 2125 } 2126 2127 _list_del(&bo->list); 2128 _list_del(&bo->request); 2129 gem_close(kgem->fd, bo->handle); 2130 2131 if (!bo->io && !DBG_NO_MALLOC_CACHE) { 2132 *(struct kgem_bo **)bo = __kgem_freed_bo; 2133 __kgem_freed_bo = bo; 2134 } else 2135 free(bo); 2136} 2137 2138inline static void kgem_bo_move_to_inactive(struct kgem *kgem, 2139 struct kgem_bo *bo) 2140{ 2141 DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle)); 2142 2143 assert(bo->refcnt == 0); 2144 assert(bo->reusable); 2145 assert(bo->rq == NULL); 2146 assert(bo->exec == NULL); 2147 assert(bo->domain != DOMAIN_GPU); 2148 assert(!bo->proxy); 2149 assert(!bo->io); 2150 assert(!bo->scanout); 2151 assert(!bo->snoop); 2152 assert(!bo->flush); 2153 assert(!bo->needs_flush); 2154 assert(list_is_empty(&bo->vma)); 2155 assert_tiling(kgem, bo); 2156 assert_cacheing(kgem, bo); 2157 ASSERT_IDLE(kgem, bo->handle); 2158 2159 if (bucket(bo) >= NUM_CACHE_BUCKETS) { 2160 if (bo->map__gtt) { 2161 munmap(bo->map__gtt, bytes(bo)); 2162 bo->map__gtt = NULL; 2163 } 2164 2165 list_move(&bo->list, &kgem->large_inactive); 2166 } else { 2167 assert(bo->flush == false); 2168 assert(list_is_empty(&bo->vma)); 2169 list_move(&bo->list, &kgem->inactive[bucket(bo)]); 2170 if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) { 2171 munmap(bo->map__gtt, bytes(bo)); 2172 bo->map__gtt = NULL; 2173 } 2174 if (bo->map__gtt || (bo->map__wc && !bo->tiling)) { 2175 list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]); 2176 kgem->vma[0].count++; 2177 } 2178 if (bo->map__cpu && list_is_empty(&bo->vma)) { 2179 list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]); 2180 kgem->vma[1].count++; 2181 } 2182 } 2183 2184 kgem->need_expire = true; 2185} 2186 2187static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo) 2188{ 2189 struct kgem_bo *base; 2190 2191 if (!bo->io) 2192 return bo; 2193 2194 assert(!bo->snoop); 2195 if (__kgem_freed_bo) { 2196 base = __kgem_freed_bo; 2197 __kgem_freed_bo = *(struct kgem_bo **)base; 2198 } else 2199 base = malloc(sizeof(*base)); 2200 if (base) { 2201 DBG(("%s: transferring io handle=%d to bo\n", 2202 __FUNCTION__, bo->handle)); 2203 /* transfer the handle to a minimum bo */ 2204 memcpy(base, bo, sizeof(*base)); 2205 base->io = false; 2206 list_init(&base->list); 2207 list_replace(&bo->request, &base->request); 2208 list_replace(&bo->vma, &base->vma); 2209 free(bo); 2210 bo = base; 2211 } else 2212 bo->reusable = false; 2213 2214 return bo; 2215} 2216 2217inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, 2218 struct kgem_bo *bo) 2219{ 2220 DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle)); 2221 2222 list_del(&bo->list); 2223 assert(bo->rq == NULL); 2224 assert(bo->exec == NULL); 2225 if (!list_is_empty(&bo->vma)) { 2226 assert(bo->map__gtt || bo->map__wc || bo->map__cpu); 2227 list_del(&bo->vma); 2228 kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--; 2229 } 2230} 2231 2232inline static void kgem_bo_remove_from_active(struct kgem *kgem, 2233 struct kgem_bo *bo) 2234{ 2235 DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle)); 2236 2237 list_del(&bo->list); 2238 assert(bo->rq != NULL); 2239 if (RQ(bo->rq) == (void *)kgem) { 2240 assert(bo->exec == NULL); 2241 list_del(&bo->request); 2242 } 2243 assert(list_is_empty(&bo->vma)); 2244} 2245 2246static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo) 2247{ 2248 struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; 2249 2250 DBG(("%s: size=%d, offset=%d, parent used=%d\n", 2251 __FUNCTION__, bo->size.bytes, bo->delta, io->used)); 2252 2253 if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used) 2254 io->used = bo->delta; 2255} 2256 2257static bool check_scanout_size(struct kgem *kgem, 2258 struct kgem_bo *bo, 2259 int width, int height) 2260{ 2261 struct drm_mode_fb_cmd info; 2262 2263 assert(bo->scanout); 2264 2265 VG_CLEAR(info); 2266 info.fb_id = bo->delta; 2267 2268 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_GETFB, &info)) 2269 return false; 2270 2271 gem_close(kgem->fd, info.handle); 2272 2273 if (width != info.width || height != info.height) { 2274 DBG(("%s: not using scanout %d (%dx%d), want (%dx%d)\n", 2275 __FUNCTION__, 2276 info.fb_id, info.width, info.height, 2277 width, height)); 2278 return false; 2279 } 2280 2281 return true; 2282} 2283 2284static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) 2285{ 2286 assert(bo->refcnt == 0); 2287 assert(bo->scanout); 2288 assert(!bo->flush); 2289 assert(!bo->snoop); 2290 assert(!bo->io); 2291 2292 if (bo->purged) { /* for stolen fb */ 2293 if (!bo->exec) { 2294 DBG(("%s: discarding purged scanout - stolen?\n", 2295 __FUNCTION__)); 2296 kgem_bo_free(kgem, bo); 2297 } 2298 return; 2299 } 2300 2301 DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n", 2302 __FUNCTION__, bo->handle, bo->delta, bo->rq != NULL)); 2303 if (bo->rq) 2304 list_move_tail(&bo->list, &kgem->scanout); 2305 else 2306 list_move(&bo->list, &kgem->scanout); 2307 2308 kgem->need_expire = true; 2309 2310} 2311 2312static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) 2313{ 2314 assert(bo->reusable); 2315 assert(!bo->scanout); 2316 assert(!bo->flush); 2317 assert(!bo->needs_flush); 2318 assert(bo->refcnt == 0); 2319 assert(bo->exec == NULL); 2320 2321 if (DBG_NO_SNOOP_CACHE) { 2322 kgem_bo_free(kgem, bo); 2323 return; 2324 } 2325 2326 if (num_pages(bo) > kgem->max_cpu_size >> 13) { 2327 DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n", 2328 __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13)); 2329 kgem_bo_free(kgem, bo); 2330 return; 2331 } 2332 2333 assert(bo->tiling == I915_TILING_NONE); 2334 assert(bo->rq == NULL); 2335 2336 DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle)); 2337 list_add(&bo->list, &kgem->snoop); 2338 kgem->need_expire = true; 2339} 2340 2341static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo) 2342{ 2343 bool retired = false; 2344 2345 DBG(("%s: release handle=%d\n", __FUNCTION__, bo->handle)); 2346 2347 if (bo->prime) { 2348 DBG(("%s: discarding imported prime handle=%d\n", 2349 __FUNCTION__, bo->handle)); 2350 kgem_bo_free(kgem, bo); 2351 } else if (bo->snoop) { 2352 kgem_bo_move_to_snoop(kgem, bo); 2353 } else if (bo->scanout) { 2354 kgem_bo_move_to_scanout(kgem, bo); 2355 } else if ((bo = kgem_bo_replace_io(bo))->reusable && 2356 kgem_bo_set_purgeable(kgem, bo)) { 2357 kgem_bo_move_to_inactive(kgem, bo); 2358 retired = true; 2359 } else 2360 kgem_bo_free(kgem, bo); 2361 2362 return retired; 2363} 2364 2365static struct kgem_bo * 2366search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) 2367{ 2368 struct kgem_bo *bo, *first = NULL; 2369 2370 DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags)); 2371 2372 if ((kgem->has_caching | kgem->has_userptr) == 0) 2373 return NULL; 2374 2375 if (list_is_empty(&kgem->snoop)) { 2376 DBG(("%s: inactive and cache empty\n", __FUNCTION__)); 2377 if (!__kgem_throttle_retire(kgem, flags)) { 2378 DBG(("%s: nothing retired\n", __FUNCTION__)); 2379 return NULL; 2380 } 2381 } 2382 2383 list_for_each_entry(bo, &kgem->snoop, list) { 2384 assert(bo->refcnt == 0); 2385 assert(bo->snoop); 2386 assert(!bo->scanout); 2387 assert(!bo->purged); 2388 assert(bo->proxy == NULL); 2389 assert(bo->tiling == I915_TILING_NONE); 2390 assert(bo->rq == NULL); 2391 assert(bo->exec == NULL); 2392 2393 if (num_pages > num_pages(bo)) 2394 continue; 2395 2396 if (num_pages(bo) > 2*num_pages) { 2397 if (first == NULL) 2398 first = bo; 2399 continue; 2400 } 2401 2402 list_del(&bo->list); 2403 bo->pitch = 0; 2404 bo->delta = 0; 2405 2406 DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n", 2407 __FUNCTION__, bo->handle, num_pages(bo))); 2408 return bo; 2409 } 2410 2411 if (first) { 2412 list_del(&first->list); 2413 first->pitch = 0; 2414 first->delta = 0; 2415 2416 DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n", 2417 __FUNCTION__, first->handle, num_pages(first))); 2418 return first; 2419 } 2420 2421 return NULL; 2422} 2423 2424void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) 2425{ 2426 if (kgem->nexec != 1 || bo->exec == NULL) 2427 return; 2428 2429 assert(bo); 2430 DBG(("%s: only handle in batch, discarding last operations for handle=%d\n", 2431 __FUNCTION__, bo->handle)); 2432 2433 assert(bo->exec == &kgem->exec[0]); 2434 assert(kgem->exec[0].handle == bo->handle); 2435 assert(RQ(bo->rq) == kgem->next_request); 2436 2437 bo->refcnt++; 2438 kgem_reset(kgem); 2439 bo->refcnt--; 2440 2441 assert(kgem->nreloc == 0); 2442 assert(kgem->nexec == 0); 2443 assert(bo->exec == NULL); 2444} 2445 2446void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b) 2447{ 2448 if (kgem->nexec > 2) 2449 return; 2450 2451 if (kgem->nexec == 1) { 2452 if (a) 2453 kgem_bo_undo(kgem, a); 2454 if (b) 2455 kgem_bo_undo(kgem, b); 2456 return; 2457 } 2458 2459 if (a == NULL || b == NULL) 2460 return; 2461 if (a->exec == NULL || b->exec == NULL) 2462 return; 2463 2464 DBG(("%s: only handles in batch, discarding last operations for handle=%d and handle=%d\n", 2465 __FUNCTION__, a->handle, b->handle)); 2466 2467 assert(a->exec == &kgem->exec[0] || a->exec == &kgem->exec[1]); 2468 assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle); 2469 assert(RQ(a->rq) == kgem->next_request); 2470 assert(b->exec == &kgem->exec[0] || b->exec == &kgem->exec[1]); 2471 assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle); 2472 assert(RQ(b->rq) == kgem->next_request); 2473 2474 a->refcnt++; 2475 b->refcnt++; 2476 kgem_reset(kgem); 2477 b->refcnt--; 2478 a->refcnt--; 2479 2480 assert(kgem->nreloc == 0); 2481 assert(kgem->nexec == 0); 2482 assert(a->exec == NULL); 2483 assert(b->exec == NULL); 2484} 2485 2486static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) 2487{ 2488 DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); 2489 2490 assert(list_is_empty(&bo->list)); 2491 assert(bo->refcnt == 0); 2492 assert(bo->proxy == NULL); 2493 assert(bo->active_scanout == 0); 2494 assert_tiling(kgem, bo); 2495 2496 bo->binding.offset = 0; 2497 2498 if (DBG_NO_CACHE) 2499 goto destroy; 2500 2501 if (bo->prime) 2502 goto destroy; 2503 2504 if (bo->snoop && !bo->flush) { 2505 DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle)); 2506 assert(bo->reusable); 2507 assert(list_is_empty(&bo->list)); 2508 if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle)) 2509 __kgem_bo_clear_busy(bo); 2510 if (bo->rq == NULL) 2511 kgem_bo_move_to_snoop(kgem, bo); 2512 return; 2513 } 2514 if (!IS_USER_MAP(bo->map__cpu)) 2515 bo->flush = false; 2516 2517 if (bo->scanout) { 2518 kgem_bo_move_to_scanout(kgem, bo); 2519 return; 2520 } 2521 2522 if (bo->io) 2523 bo = kgem_bo_replace_io(bo); 2524 if (!bo->reusable) { 2525 DBG(("%s: handle=%d, not reusable\n", 2526 __FUNCTION__, bo->handle)); 2527 goto destroy; 2528 } 2529 2530 assert(list_is_empty(&bo->vma)); 2531 assert(list_is_empty(&bo->list)); 2532 assert(bo->flush == false); 2533 assert(bo->snoop == false); 2534 assert(bo->io == false); 2535 assert(bo->scanout == false); 2536 assert_cacheing(kgem, bo); 2537 2538 kgem_bo_undo(kgem, bo); 2539 assert(bo->refcnt == 0); 2540 2541 if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle)) 2542 __kgem_bo_clear_busy(bo); 2543 2544 if (bo->rq) { 2545 struct list *cache; 2546 2547 DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle)); 2548 if (bucket(bo) < NUM_CACHE_BUCKETS) 2549 cache = &kgem->active[bucket(bo)][bo->tiling]; 2550 else 2551 cache = &kgem->large; 2552 list_add(&bo->list, cache); 2553 return; 2554 } 2555 2556 assert(bo->exec == NULL); 2557 assert(list_is_empty(&bo->request)); 2558 2559 if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) { 2560 if (!kgem_bo_set_purgeable(kgem, bo)) 2561 goto destroy; 2562 2563 if (!kgem->has_llc && bo->domain == DOMAIN_CPU) 2564 goto destroy; 2565 2566 DBG(("%s: handle=%d, purged\n", 2567 __FUNCTION__, bo->handle)); 2568 } 2569 2570 kgem_bo_move_to_inactive(kgem, bo); 2571 return; 2572 2573destroy: 2574 if (!bo->exec) 2575 kgem_bo_free(kgem, bo); 2576} 2577 2578static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo) 2579{ 2580 assert(bo->refcnt); 2581 if (--bo->refcnt == 0) 2582 __kgem_bo_destroy(kgem, bo); 2583} 2584 2585static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo) 2586{ 2587 assert(bo->base.io); 2588 while (!list_is_empty(&bo->base.vma)) { 2589 struct kgem_bo *cached; 2590 2591 cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma); 2592 assert(cached->proxy == &bo->base); 2593 assert(cached != &bo->base); 2594 list_del(&cached->vma); 2595 2596 assert(*(struct kgem_bo **)cached->map__gtt == cached); 2597 *(struct kgem_bo **)cached->map__gtt = NULL; 2598 cached->map__gtt = NULL; 2599 2600 kgem_bo_destroy(kgem, cached); 2601 } 2602} 2603 2604void kgem_retire__buffers(struct kgem *kgem) 2605{ 2606 while (!list_is_empty(&kgem->active_buffers)) { 2607 struct kgem_buffer *bo = 2608 list_last_entry(&kgem->active_buffers, 2609 struct kgem_buffer, 2610 base.list); 2611 2612 DBG(("%s: handle=%d, busy? %d [%d]\n", 2613 __FUNCTION__, bo->base.handle, bo->base.rq != NULL, bo->base.exec != NULL)); 2614 2615 assert(bo->base.exec == NULL || RQ(bo->base.rq) == kgem->next_request); 2616 if (bo->base.rq) 2617 break; 2618 2619 DBG(("%s: releasing upload cache for handle=%d? %d\n", 2620 __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma))); 2621 list_del(&bo->base.list); 2622 kgem_buffer_release(kgem, bo); 2623 kgem_bo_unref(kgem, &bo->base); 2624 } 2625} 2626 2627static bool kgem_retire__flushing(struct kgem *kgem) 2628{ 2629 struct kgem_bo *bo, *next; 2630 bool retired = false; 2631 2632 list_for_each_entry_safe(bo, next, &kgem->flushing, request) { 2633 assert(RQ(bo->rq) == (void *)kgem); 2634 assert(bo->exec == NULL); 2635 2636 if (__kgem_busy(kgem, bo->handle)) 2637 break; 2638 2639 __kgem_bo_clear_busy(bo); 2640 2641 if (bo->refcnt) 2642 continue; 2643 2644 retired |= kgem_bo_move_to_cache(kgem, bo); 2645 } 2646#if HAS_DEBUG_FULL 2647 { 2648 int count = 0; 2649 list_for_each_entry(bo, &kgem->flushing, request) 2650 count++; 2651 DBG(("%s: %d bo on flushing list\n", __FUNCTION__, count)); 2652 } 2653#endif 2654 2655 kgem->need_retire |= !list_is_empty(&kgem->flushing); 2656 2657 return retired; 2658} 2659 2660static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) 2661{ 2662 bool retired = false; 2663 2664 DBG(("%s: request %d complete\n", 2665 __FUNCTION__, rq->bo->handle)); 2666 assert(RQ(rq->bo->rq) == rq); 2667 2668 if (rq == kgem->fence[rq->ring]) 2669 kgem->fence[rq->ring] = NULL; 2670 2671 while (!list_is_empty(&rq->buffers)) { 2672 struct kgem_bo *bo; 2673 2674 bo = list_first_entry(&rq->buffers, 2675 struct kgem_bo, 2676 request); 2677 2678 assert(RQ(bo->rq) == rq); 2679 assert(bo->exec == NULL); 2680 assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); 2681 2682 list_del(&bo->request); 2683 2684 if (bo->needs_flush) 2685 bo->needs_flush = __kgem_busy(kgem, bo->handle); 2686 if (bo->needs_flush) { 2687 DBG(("%s: moving %d to flushing\n", 2688 __FUNCTION__, bo->handle)); 2689 list_add(&bo->request, &kgem->flushing); 2690 bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq)); 2691 kgem->need_retire = true; 2692 continue; 2693 } 2694 2695 bo->domain = DOMAIN_NONE; 2696 bo->gtt_dirty = false; 2697 bo->rq = NULL; 2698 if (bo->refcnt) 2699 continue; 2700 2701 retired |= kgem_bo_move_to_cache(kgem, bo); 2702 } 2703 2704 assert(rq->bo->rq == NULL); 2705 assert(rq->bo->exec == NULL); 2706 assert(list_is_empty(&rq->bo->request)); 2707 assert(rq->bo->refcnt > 0); 2708 2709 if (--rq->bo->refcnt == 0) { 2710 if (kgem_bo_set_purgeable(kgem, rq->bo)) { 2711 kgem_bo_move_to_inactive(kgem, rq->bo); 2712 retired = true; 2713 } else { 2714 DBG(("%s: closing %d\n", 2715 __FUNCTION__, rq->bo->handle)); 2716 kgem_bo_free(kgem, rq->bo); 2717 } 2718 } 2719 2720 __kgem_request_free(rq); 2721 return retired; 2722} 2723 2724static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) 2725{ 2726 bool retired = false; 2727 2728 while (!list_is_empty(&kgem->requests[ring])) { 2729 struct kgem_request *rq; 2730 2731 rq = list_first_entry(&kgem->requests[ring], 2732 struct kgem_request, 2733 list); 2734 assert(rq->ring == ring); 2735 if (__kgem_busy(kgem, rq->bo->handle)) 2736 break; 2737 2738 retired |= __kgem_retire_rq(kgem, rq); 2739 } 2740 2741#if HAS_DEBUG_FULL 2742 { 2743 struct kgem_bo *bo; 2744 int count = 0; 2745 2746 list_for_each_entry(bo, &kgem->requests[ring], request) 2747 count++; 2748 2749 bo = NULL; 2750 if (!list_is_empty(&kgem->requests[ring])) 2751 bo = list_first_entry(&kgem->requests[ring], 2752 struct kgem_request, 2753 list)->bo; 2754 2755 DBG(("%s: ring=%d, %d outstanding requests, oldest=%d\n", 2756 __FUNCTION__, ring, count, bo ? bo->handle : 0)); 2757 } 2758#endif 2759 2760 return retired; 2761} 2762 2763static bool kgem_retire__requests(struct kgem *kgem) 2764{ 2765 bool retired = false; 2766 int n; 2767 2768 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { 2769 retired |= kgem_retire__requests_ring(kgem, n); 2770 kgem->need_retire |= !list_is_empty(&kgem->requests[n]); 2771 } 2772 2773 return retired; 2774} 2775 2776bool kgem_retire(struct kgem *kgem) 2777{ 2778 bool retired = false; 2779 2780 DBG(("%s, need_retire?=%d\n", __FUNCTION__, kgem->need_retire)); 2781 2782 kgem->need_retire = false; 2783 2784 retired |= kgem_retire__flushing(kgem); 2785 retired |= kgem_retire__requests(kgem); 2786 2787 DBG(("%s -- retired=%d, need_retire=%d\n", 2788 __FUNCTION__, retired, kgem->need_retire)); 2789 2790 kgem->retire(kgem); 2791 2792 return retired; 2793} 2794 2795bool __kgem_ring_is_idle(struct kgem *kgem, int ring) 2796{ 2797 struct kgem_request *rq; 2798 2799 assert(ring < ARRAY_SIZE(kgem->requests)); 2800 assert(!list_is_empty(&kgem->requests[ring])); 2801 2802 rq = kgem->fence[ring]; 2803 if (rq) { 2804 struct kgem_request *tmp; 2805 2806 if (__kgem_busy(kgem, rq->bo->handle)) { 2807 DBG(("%s: last fence handle=%d still busy\n", 2808 __FUNCTION__, rq->bo->handle)); 2809 return false; 2810 } 2811 2812 do { 2813 tmp = list_first_entry(&kgem->requests[ring], 2814 struct kgem_request, 2815 list); 2816 assert(tmp->ring == ring); 2817 __kgem_retire_rq(kgem, tmp); 2818 } while (tmp != rq); 2819 2820 assert(kgem->fence[ring] == NULL); 2821 if (list_is_empty(&kgem->requests[ring])) 2822 return true; 2823 } 2824 2825 rq = list_last_entry(&kgem->requests[ring], 2826 struct kgem_request, list); 2827 assert(rq->ring == ring); 2828 if (__kgem_busy(kgem, rq->bo->handle)) { 2829 DBG(("%s: last requests handle=%d still busy\n", 2830 __FUNCTION__, rq->bo->handle)); 2831 kgem->fence[ring] = rq; 2832 return false; 2833 } 2834 2835 DBG(("%s: ring=%d idle (handle=%d)\n", 2836 __FUNCTION__, ring, rq->bo->handle)); 2837 2838 while (!list_is_empty(&kgem->requests[ring])) { 2839 rq = list_first_entry(&kgem->requests[ring], 2840 struct kgem_request, 2841 list); 2842 assert(rq->ring == ring); 2843 __kgem_retire_rq(kgem, rq); 2844 } 2845 2846 return true; 2847} 2848 2849void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo) 2850{ 2851 struct kgem_request *rq = bo->rq, *tmp; 2852 struct list *requests = &kgem->requests[RQ_RING(rq) == I915_EXEC_BLT]; 2853 2854 rq = RQ(rq); 2855 assert(rq != &kgem->static_request); 2856 if (rq == (struct kgem_request *)kgem) { 2857 __kgem_bo_clear_busy(bo); 2858 return; 2859 } 2860 2861 do { 2862 tmp = list_first_entry(requests, struct kgem_request, list); 2863 assert(tmp->ring == rq->ring); 2864 __kgem_retire_rq(kgem, tmp); 2865 } while (tmp != rq); 2866} 2867 2868#if 0 2869static void kgem_commit__check_reloc(struct kgem *kgem) 2870{ 2871 struct kgem_request *rq = kgem->next_request; 2872 struct kgem_bo *bo; 2873 bool has_64bit = kgem->gen >= 0100; 2874 int i; 2875 2876 for (i = 0; i < kgem->nreloc; i++) { 2877 list_for_each_entry(bo, &rq->buffers, request) { 2878 if (bo->target_handle == kgem->reloc[i].target_handle) { 2879 uint64_t value = 0; 2880 gem_read(kgem->fd, rq->bo->handle, &value, kgem->reloc[i].offset, has_64bit ? 8 : 4); 2881 assert(bo->exec->offset == -1 || value == bo->exec->offset + (int)kgem->reloc[i].delta); 2882 break; 2883 } 2884 } 2885 } 2886} 2887#else 2888#define kgem_commit__check_reloc(kgem) 2889#endif 2890 2891#ifndef NDEBUG 2892static void kgem_commit__check_buffers(struct kgem *kgem) 2893{ 2894 struct kgem_buffer *bo; 2895 2896 list_for_each_entry(bo, &kgem->active_buffers, base.list) 2897 assert(bo->base.exec == NULL); 2898} 2899#else 2900#define kgem_commit__check_buffers(kgem) 2901#endif 2902 2903static void kgem_commit(struct kgem *kgem) 2904{ 2905 struct kgem_request *rq = kgem->next_request; 2906 struct kgem_bo *bo, *next; 2907 2908 kgem_commit__check_reloc(kgem); 2909 2910 list_for_each_entry_safe(bo, next, &rq->buffers, request) { 2911 assert(next->request.prev == &bo->request); 2912 2913 DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n", 2914 __FUNCTION__, bo->handle, bo->proxy != NULL, 2915 bo->gpu_dirty, bo->needs_flush, bo->snoop, 2916 (unsigned)bo->exec->offset)); 2917 2918 assert(bo->exec); 2919 assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec); 2920 assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq)); 2921 2922 bo->presumed_offset = bo->exec->offset; 2923 bo->exec = NULL; 2924 bo->target_handle = -1; 2925 2926 if (!bo->refcnt && !bo->reusable) { 2927 assert(!bo->snoop); 2928 assert(!bo->proxy); 2929 kgem_bo_free(kgem, bo); 2930 continue; 2931 } 2932 2933 bo->binding.offset = 0; 2934 bo->domain = DOMAIN_GPU; 2935 bo->gpu_dirty = false; 2936 2937 if (bo->proxy) { 2938 /* proxies are not used for domain tracking */ 2939 __kgem_bo_clear_busy(bo); 2940 } 2941 2942 kgem->scanout_busy |= bo->scanout && bo->needs_flush; 2943 } 2944 2945 if (rq == &kgem->static_request) { 2946 struct drm_i915_gem_set_domain set_domain; 2947 2948 DBG(("%s: syncing due to allocation failure\n", __FUNCTION__)); 2949 2950 VG_CLEAR(set_domain); 2951 set_domain.handle = rq->bo->handle; 2952 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 2953 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 2954 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 2955 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 2956 kgem_throttle(kgem); 2957 } 2958 2959 kgem_retire(kgem); 2960 assert(list_is_empty(&rq->buffers)); 2961 2962 assert(rq->bo->map__gtt == NULL); 2963 assert(rq->bo->map__wc == NULL); 2964 assert(rq->bo->map__cpu == NULL); 2965 gem_close(kgem->fd, rq->bo->handle); 2966 kgem_cleanup_cache(kgem); 2967 } else { 2968 assert(rq->ring < ARRAY_SIZE(kgem->requests)); 2969 list_add_tail(&rq->list, &kgem->requests[rq->ring]); 2970 kgem->need_throttle = kgem->need_retire = 1; 2971 2972 if (kgem->fence[rq->ring] == NULL && 2973 __kgem_busy(kgem, rq->bo->handle)) 2974 kgem->fence[rq->ring] = rq; 2975 } 2976 2977 kgem->next_request = NULL; 2978 2979 kgem_commit__check_buffers(kgem); 2980} 2981 2982static void kgem_close_list(struct kgem *kgem, struct list *head) 2983{ 2984 while (!list_is_empty(head)) 2985 kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list)); 2986} 2987 2988static void kgem_close_inactive(struct kgem *kgem) 2989{ 2990 unsigned int i; 2991 2992 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) 2993 kgem_close_list(kgem, &kgem->inactive[i]); 2994} 2995 2996static void kgem_finish_buffers(struct kgem *kgem) 2997{ 2998 struct kgem_buffer *bo, *next; 2999 3000 list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) { 3001 DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n", 3002 __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL, 3003 bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no", 3004 bo->base.refcnt)); 3005 3006 assert(next->base.list.prev == &bo->base.list); 3007 assert(bo->base.io); 3008 assert(bo->base.refcnt >= 1); 3009 3010 if (bo->base.refcnt > 1 && !bo->base.exec) { 3011 DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n", 3012 __FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt)); 3013 continue; 3014 } 3015 3016 if (!bo->write) { 3017 assert(bo->base.exec || bo->base.refcnt > 1); 3018 goto decouple; 3019 } 3020 3021 if (bo->mmapped) { 3022 uint32_t used; 3023 3024 assert(!bo->need_io); 3025 3026 used = ALIGN(bo->used, PAGE_SIZE); 3027 if (!DBG_NO_UPLOAD_ACTIVE && 3028 used + PAGE_SIZE <= bytes(&bo->base) && 3029 (kgem->has_llc || bo->mmapped == MMAPPED_GTT || bo->base.snoop)) { 3030 DBG(("%s: retaining upload buffer (%d/%d): used=%d, refcnt=%d\n", 3031 __FUNCTION__, bo->used, bytes(&bo->base), used, bo->base.refcnt)); 3032 bo->used = used; 3033 list_move(&bo->base.list, 3034 &kgem->active_buffers); 3035 kgem->need_retire = true; 3036 continue; 3037 } 3038 DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n", 3039 __FUNCTION__, bo->used, bo->mmapped)); 3040 goto decouple; 3041 } 3042 3043 if (!bo->used || !bo->base.exec) { 3044 /* Unless we replace the handle in the execbuffer, 3045 * then this bo will become active. So decouple it 3046 * from the buffer list and track it in the normal 3047 * manner. 3048 */ 3049 goto decouple; 3050 } 3051 3052 assert(bo->need_io); 3053 assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); 3054 assert(bo->base.domain != DOMAIN_GPU); 3055 3056 if (bo->base.refcnt == 1 && 3057 bo->base.size.pages.count > 1 && 3058 bo->used < bytes(&bo->base) / 2) { 3059 struct kgem_bo *shrink; 3060 unsigned alloc = NUM_PAGES(bo->used); 3061 3062 shrink = search_snoop_cache(kgem, alloc, 3063 CREATE_INACTIVE | CREATE_NO_RETIRE); 3064 if (shrink) { 3065 void *map; 3066 int n; 3067 3068 DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", 3069 __FUNCTION__, 3070 bo->used, bytes(&bo->base), bytes(shrink), 3071 bo->base.handle, shrink->handle)); 3072 3073 assert(bo->used <= bytes(shrink)); 3074 map = kgem_bo_map__cpu(kgem, shrink); 3075 if (map) { 3076 kgem_bo_sync__cpu(kgem, shrink); 3077 memcpy(map, bo->mem, bo->used); 3078 3079 shrink->target_handle = 3080 kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; 3081 for (n = 0; n < kgem->nreloc; n++) { 3082 if (kgem->reloc[n].target_handle == bo->base.target_handle) { 3083 kgem->reloc[n].target_handle = shrink->target_handle; 3084 kgem->reloc[n].presumed_offset = shrink->presumed_offset; 3085 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = 3086 kgem->reloc[n].delta + shrink->presumed_offset; 3087 } 3088 } 3089 3090 bo->base.exec->handle = shrink->handle; 3091 bo->base.exec->offset = shrink->presumed_offset; 3092 shrink->exec = bo->base.exec; 3093 shrink->rq = bo->base.rq; 3094 list_replace(&bo->base.request, 3095 &shrink->request); 3096 list_init(&bo->base.request); 3097 shrink->needs_flush = bo->base.gpu_dirty; 3098 3099 bo->base.exec = NULL; 3100 bo->base.rq = NULL; 3101 bo->base.gpu_dirty = false; 3102 bo->base.needs_flush = false; 3103 bo->used = 0; 3104 3105 goto decouple; 3106 } 3107 3108 __kgem_bo_destroy(kgem, shrink); 3109 } 3110 3111 shrink = search_linear_cache(kgem, alloc, 3112 CREATE_INACTIVE | CREATE_NO_RETIRE); 3113 if (shrink) { 3114 int n; 3115 3116 DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", 3117 __FUNCTION__, 3118 bo->used, bytes(&bo->base), bytes(shrink), 3119 bo->base.handle, shrink->handle)); 3120 3121 assert(bo->used <= bytes(shrink)); 3122 if (gem_write__cachealigned(kgem->fd, shrink->handle, 3123 0, bo->used, bo->mem) == 0) { 3124 shrink->target_handle = 3125 kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; 3126 for (n = 0; n < kgem->nreloc; n++) { 3127 if (kgem->reloc[n].target_handle == bo->base.target_handle) { 3128 kgem->reloc[n].target_handle = shrink->target_handle; 3129 kgem->reloc[n].presumed_offset = shrink->presumed_offset; 3130 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = 3131 kgem->reloc[n].delta + shrink->presumed_offset; 3132 } 3133 } 3134 3135 bo->base.exec->handle = shrink->handle; 3136 bo->base.exec->offset = shrink->presumed_offset; 3137 shrink->exec = bo->base.exec; 3138 shrink->rq = bo->base.rq; 3139 list_replace(&bo->base.request, 3140 &shrink->request); 3141 list_init(&bo->base.request); 3142 shrink->needs_flush = bo->base.gpu_dirty; 3143 3144 bo->base.exec = NULL; 3145 bo->base.rq = NULL; 3146 bo->base.gpu_dirty = false; 3147 bo->base.needs_flush = false; 3148 bo->used = 0; 3149 3150 goto decouple; 3151 } 3152 3153 __kgem_bo_destroy(kgem, shrink); 3154 } 3155 } 3156 3157 DBG(("%s: handle=%d, uploading %d/%d\n", 3158 __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base))); 3159 ASSERT_IDLE(kgem, bo->base.handle); 3160 assert(bo->used <= bytes(&bo->base)); 3161 gem_write__cachealigned(kgem->fd, bo->base.handle, 3162 0, bo->used, bo->mem); 3163 bo->need_io = 0; 3164 3165decouple: 3166 DBG(("%s: releasing handle=%d\n", 3167 __FUNCTION__, bo->base.handle)); 3168 list_del(&bo->base.list); 3169 kgem_bo_unref(kgem, &bo->base); 3170 } 3171} 3172 3173static void kgem_cleanup(struct kgem *kgem) 3174{ 3175 int n; 3176 3177 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { 3178 while (!list_is_empty(&kgem->requests[n])) { 3179 struct kgem_request *rq; 3180 3181 rq = list_first_entry(&kgem->requests[n], 3182 struct kgem_request, 3183 list); 3184 assert(rq->ring == n); 3185 while (!list_is_empty(&rq->buffers)) { 3186 struct kgem_bo *bo; 3187 3188 bo = list_first_entry(&rq->buffers, 3189 struct kgem_bo, 3190 request); 3191 3192 bo->exec = NULL; 3193 bo->gpu_dirty = false; 3194 __kgem_bo_clear_busy(bo); 3195 if (bo->refcnt == 0) 3196 kgem_bo_free(kgem, bo); 3197 } 3198 3199 __kgem_request_free(rq); 3200 } 3201 } 3202 3203 kgem_close_inactive(kgem); 3204} 3205 3206static int 3207kgem_batch_write(struct kgem *kgem, 3208 struct kgem_bo *bo, 3209 uint32_t size) 3210{ 3211 char *ptr; 3212 int ret; 3213 3214 ASSERT_IDLE(kgem, bo->handle); 3215 3216#if DBG_NO_EXEC 3217 { 3218 uint32_t batch[] = { MI_BATCH_BUFFER_END, 0}; 3219 return gem_write(kgem->fd, bo->handle, 0, sizeof(batch), batch); 3220 } 3221#endif 3222 3223 assert(!bo->scanout); 3224retry: 3225 ptr = NULL; 3226 if (bo->domain == DOMAIN_CPU || kgem->has_llc) { 3227 ptr = bo->map__cpu; 3228 if (ptr == NULL) 3229 ptr = __kgem_bo_map__cpu(kgem, bo); 3230 } else if (kgem->has_wc_mmap) { 3231 ptr = bo->map__wc; 3232 if (ptr == NULL) 3233 ptr = __kgem_bo_map__wc(kgem, bo); 3234 } 3235 if (ptr) { 3236 memcpy(ptr, kgem->batch, sizeof(uint32_t)*kgem->nbatch); 3237 if (kgem->surface != kgem->batch_size) { 3238 ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size); 3239 ret -= sizeof(uint32_t) * kgem->surface; 3240 ptr += size - ret; 3241 memcpy(ptr, kgem->batch + kgem->surface, 3242 (kgem->batch_size - kgem->surface)*sizeof(uint32_t)); 3243 } 3244 return 0; 3245 } 3246 3247 /* If there is no surface data, just upload the batch */ 3248 if (kgem->surface == kgem->batch_size) { 3249 if ((ret = gem_write__cachealigned(kgem->fd, bo->handle, 3250 0, sizeof(uint32_t)*kgem->nbatch, 3251 kgem->batch)) == 0) 3252 return 0; 3253 3254 goto expire; 3255 } 3256 3257 /* Are the batch pages conjoint with the surface pages? */ 3258 if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) { 3259 assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t))); 3260 if ((ret = gem_write__cachealigned(kgem->fd, bo->handle, 3261 0, kgem->batch_size*sizeof(uint32_t), 3262 kgem->batch)) == 0) 3263 return 0; 3264 3265 goto expire; 3266 } 3267 3268 /* Disjoint surface/batch, upload separately */ 3269 if ((ret = gem_write__cachealigned(kgem->fd, bo->handle, 3270 0, sizeof(uint32_t)*kgem->nbatch, 3271 kgem->batch))) 3272 goto expire; 3273 3274 ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size); 3275 ret -= sizeof(uint32_t) * kgem->surface; 3276 assert(size-ret >= kgem->nbatch*sizeof(uint32_t)); 3277 if (gem_write(kgem->fd, bo->handle, 3278 size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t), 3279 kgem->batch + kgem->surface)) 3280 goto expire; 3281 3282 return 0; 3283 3284expire: 3285 assert(ret != EINVAL); 3286 3287 (void)__kgem_throttle_retire(kgem, 0); 3288 if (kgem_expire_cache(kgem)) 3289 goto retry; 3290 3291 if (kgem_cleanup_cache(kgem)) 3292 goto retry; 3293 3294 ERR(("%s: failed to write batch (handle=%d): %d\n", 3295 __FUNCTION__, bo->handle, -ret)); 3296 return ret; 3297} 3298 3299void kgem_reset(struct kgem *kgem) 3300{ 3301 if (kgem->next_request) { 3302 struct kgem_request *rq = kgem->next_request; 3303 3304 while (!list_is_empty(&rq->buffers)) { 3305 struct kgem_bo *bo = 3306 list_first_entry(&rq->buffers, 3307 struct kgem_bo, 3308 request); 3309 list_del(&bo->request); 3310 3311 assert(RQ(bo->rq) == rq); 3312 3313 bo->binding.offset = 0; 3314 bo->exec = NULL; 3315 bo->target_handle = -1; 3316 bo->gpu_dirty = false; 3317 3318 if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) { 3319 assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); 3320 list_add(&bo->request, &kgem->flushing); 3321 bo->rq = (void *)kgem; 3322 kgem->need_retire = true; 3323 } else 3324 __kgem_bo_clear_busy(bo); 3325 3326 if (bo->refcnt || bo->rq) 3327 continue; 3328 3329 kgem_bo_move_to_cache(kgem, bo); 3330 } 3331 3332 if (rq != &kgem->static_request) { 3333 list_init(&rq->list); 3334 __kgem_request_free(rq); 3335 } 3336 } 3337 3338 kgem->nfence = 0; 3339 kgem->nexec = 0; 3340 kgem->nreloc = 0; 3341 kgem->nreloc__self = 0; 3342 kgem->aperture = 0; 3343 kgem->aperture_fenced = 0; 3344 kgem->aperture_max_fence = 0; 3345 kgem->nbatch = 0; 3346 kgem->surface = kgem->batch_size; 3347 kgem->mode = KGEM_NONE; 3348 kgem->needs_semaphore = false; 3349 kgem->needs_reservation = false; 3350 kgem->flush = 0; 3351 kgem->batch_flags = kgem->batch_flags_base; 3352 assert(kgem->batch); 3353 3354 kgem->next_request = __kgem_request_alloc(kgem); 3355 3356 kgem_sna_reset(kgem); 3357} 3358 3359static int compact_batch_surface(struct kgem *kgem, int *shrink) 3360{ 3361 int size, n; 3362 3363 if (!kgem->has_relaxed_delta) 3364 return kgem->batch_size * sizeof(uint32_t); 3365 3366 /* See if we can pack the contents into one or two pages */ 3367 n = ALIGN(kgem->batch_size, 1024); 3368 size = n - kgem->surface + kgem->nbatch; 3369 size = ALIGN(size, 1024); 3370 3371 *shrink = (n - size) * sizeof(uint32_t); 3372 return size * sizeof(uint32_t); 3373} 3374 3375static struct kgem_bo * 3376kgem_create_batch(struct kgem *kgem) 3377{ 3378#if !DBG_NO_SHRINK_BATCHES 3379 struct drm_i915_gem_set_domain set_domain; 3380 struct kgem_bo *bo; 3381 int shrink = 0; 3382 int size; 3383 3384 if (kgem->surface != kgem->batch_size) 3385 size = compact_batch_surface(kgem, &shrink); 3386 else 3387 size = kgem->nbatch * sizeof(uint32_t); 3388 3389 if (size <= 4096) { 3390 bo = list_first_entry(&kgem->pinned_batches[0], 3391 struct kgem_bo, 3392 list); 3393 if (!bo->rq) { 3394out_4096: 3395 assert(bo->refcnt > 0); 3396 list_move_tail(&bo->list, &kgem->pinned_batches[0]); 3397 bo = kgem_bo_reference(bo); 3398 goto write; 3399 } 3400 3401 if (!__kgem_busy(kgem, bo->handle)) { 3402 assert(RQ(bo->rq)->bo == bo); 3403 __kgem_retire_rq(kgem, RQ(bo->rq)); 3404 goto out_4096; 3405 } 3406 } 3407 3408 if (size <= 16384) { 3409 bo = list_first_entry(&kgem->pinned_batches[1], 3410 struct kgem_bo, 3411 list); 3412 if (!bo->rq) { 3413out_16384: 3414 assert(bo->refcnt > 0); 3415 list_move_tail(&bo->list, &kgem->pinned_batches[1]); 3416 bo = kgem_bo_reference(bo); 3417 goto write; 3418 } 3419 3420 if (!__kgem_busy(kgem, bo->handle)) { 3421 __kgem_retire_rq(kgem, RQ(bo->rq)); 3422 goto out_16384; 3423 } 3424 } 3425 3426 if (kgem->gen == 020) { 3427 bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY); 3428 if (bo) 3429 goto write; 3430 3431 /* Nothing available for reuse, rely on the kernel wa */ 3432 if (kgem->has_pinned_batches) { 3433 bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY); 3434 if (bo) { 3435 kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED; 3436 goto write; 3437 } 3438 } 3439 3440 if (size < 16384) { 3441 bo = list_first_entry(&kgem->pinned_batches[size > 4096], 3442 struct kgem_bo, 3443 list); 3444 list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]); 3445 3446 DBG(("%s: syncing due to busy batches\n", __FUNCTION__)); 3447 3448 VG_CLEAR(set_domain); 3449 set_domain.handle = bo->handle; 3450 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 3451 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 3452 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 3453 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 3454 kgem_throttle(kgem); 3455 return NULL; 3456 } 3457 3458 kgem_retire(kgem); 3459 assert(bo->rq == NULL); 3460 bo = kgem_bo_reference(bo); 3461 goto write; 3462 } 3463 } 3464 3465 bo = NULL; 3466 if (!kgem->has_llc) { 3467 bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); 3468 if (bo) { 3469write: 3470 kgem_fixup_relocs(kgem, bo, shrink); 3471 if (kgem_batch_write(kgem, bo, size)) { 3472 kgem_bo_destroy(kgem, bo); 3473 return NULL; 3474 } 3475 } 3476 } 3477 if (bo == NULL) 3478 bo = kgem_new_batch(kgem); 3479 return bo; 3480#else 3481 return kgem_new_batch(kgem); 3482#endif 3483} 3484 3485#if !NDEBUG 3486static bool dump_file(const char *path) 3487{ 3488 FILE *file; 3489 size_t len = 0; 3490 char *line = NULL; 3491 3492 file = fopen(path, "r"); 3493 if (file == NULL) 3494 return false; 3495 3496 while (getline(&line, &len, file) != -1) 3497 ErrorF("%s", line); 3498 3499 free(line); 3500 fclose(file); 3501 return true; 3502} 3503 3504static void dump_debugfs(struct kgem *kgem, const char *name) 3505{ 3506 char path[80]; 3507 int minor = kgem_get_minor(kgem); 3508 3509 if (minor < 0) 3510 return; 3511 3512 sprintf(path, "/sys/kernel/debug/dri/%d/%s", minor, name); 3513 if (dump_file(path)) 3514 return; 3515 3516 sprintf(path, "/debug/dri/%d/%s", minor, name); 3517 if (dump_file(path)) 3518 return; 3519} 3520 3521static void dump_gtt_info(struct kgem *kgem) 3522{ 3523 dump_debugfs(kgem, "i915_gem_gtt"); 3524} 3525 3526static void dump_fence_regs(struct kgem *kgem) 3527{ 3528 dump_debugfs(kgem, "i915_gem_fence_regs"); 3529} 3530#endif 3531 3532static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf) 3533{ 3534 int ret, err; 3535 3536retry: 3537 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); 3538 if (ret == 0) 3539 return 0; 3540 3541 DBG(("%s: failed ret=%d, throttling and discarding cache\n", __FUNCTION__, ret)); 3542 (void)__kgem_throttle_retire(kgem, 0); 3543 if (kgem_expire_cache(kgem)) 3544 goto retry; 3545 3546 if (kgem_cleanup_cache(kgem)) 3547 goto retry; 3548 3549 /* last gasp */ 3550 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); 3551 if (ret == 0) 3552 return 0; 3553 3554 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, 3555 "Failed to submit rendering commands, trying again with outputs disabled.\n"); 3556 3557 /* One last trick up our sleeve for when we run out of space. 3558 * We turn everything off to free up our pinned framebuffers, 3559 * sprites and cursors, and try one last time. 3560 */ 3561 err = errno; 3562 if (sna_mode_disable(container_of(kgem, struct sna, kgem))) { 3563 kgem_cleanup_cache(kgem); 3564 ret = do_ioctl(kgem->fd, 3565 DRM_IOCTL_I915_GEM_EXECBUFFER2, 3566 execbuf); 3567 DBG(("%s: last_gasp ret=%d\n", __FUNCTION__, ret)); 3568 sna_mode_enable(container_of(kgem, struct sna, kgem)); 3569 } 3570 errno = err; 3571 3572 return ret; 3573} 3574 3575void _kgem_submit(struct kgem *kgem) 3576{ 3577 struct kgem_request *rq; 3578 uint32_t batch_end; 3579 3580 assert(!DBG_NO_HW); 3581 assert(!kgem->wedged); 3582 3583 assert(kgem->nbatch); 3584 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); 3585 assert(kgem->nbatch <= kgem->surface); 3586 3587 batch_end = kgem_end_batch(kgem); 3588 kgem_sna_flush(kgem); 3589 3590 DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d [fenced=%d]\n", 3591 kgem->mode, kgem->ring, kgem->batch_flags, 3592 batch_end, kgem->nbatch, kgem->surface, kgem->batch_size, 3593 kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced)); 3594 3595 assert(kgem->nbatch <= kgem->batch_size); 3596 assert(kgem->nbatch <= kgem->surface); 3597 assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc)); 3598 assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); 3599 assert(kgem->nfence <= kgem->fence_max); 3600 3601 kgem_finish_buffers(kgem); 3602 3603#if SHOW_BATCH_BEFORE 3604 __kgem_batch_debug(kgem, batch_end); 3605#endif 3606 3607 rq = kgem->next_request; 3608 assert(rq->bo == NULL); 3609 3610 rq->bo = kgem_create_batch(kgem); 3611 if (rq->bo) { 3612 struct drm_i915_gem_execbuffer2 execbuf; 3613 int i, ret; 3614 3615 assert(!rq->bo->needs_flush); 3616 3617 i = kgem->nexec++; 3618 kgem->exec[i].handle = rq->bo->handle; 3619 kgem->exec[i].relocation_count = kgem->nreloc; 3620 kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; 3621 kgem->exec[i].alignment = 0; 3622 kgem->exec[i].offset = rq->bo->presumed_offset; 3623 kgem->exec[i].flags = 0; 3624 kgem->exec[i].rsvd1 = 0; 3625 kgem->exec[i].rsvd2 = 0; 3626 3627 rq->bo->exec = &kgem->exec[i]; 3628 rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */ 3629 list_add(&rq->bo->request, &rq->buffers); 3630 rq->ring = kgem->ring == KGEM_BLT; 3631 3632 memset(&execbuf, 0, sizeof(execbuf)); 3633 execbuf.buffers_ptr = (uintptr_t)kgem->exec; 3634 execbuf.buffer_count = kgem->nexec; 3635 execbuf.batch_len = batch_end*sizeof(uint32_t); 3636 execbuf.flags = kgem->ring | kgem->batch_flags; 3637 3638 if (DBG_DUMP) { 3639 int fd = open("/tmp/i915-batchbuffers.dump", 3640 O_WRONLY | O_CREAT | O_APPEND, 3641 0666); 3642 if (fd != -1) { 3643 ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); 3644 fd = close(fd); 3645 } 3646 } 3647 3648 ret = do_execbuf(kgem, &execbuf); 3649 if (DEBUG_SYNC && ret == 0) { 3650 struct drm_i915_gem_set_domain set_domain; 3651 3652 VG_CLEAR(set_domain); 3653 set_domain.handle = rq->bo->handle; 3654 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 3655 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 3656 3657 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); 3658 } 3659 if (ret < 0) { 3660 kgem_throttle(kgem); 3661 if (!kgem->wedged) { 3662 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, 3663 "Failed to submit rendering commands, disabling acceleration.\n"); 3664 __kgem_set_wedged(kgem); 3665 } 3666 3667#if !NDEBUG 3668 ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", 3669 kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, 3670 kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); 3671 3672 for (i = 0; i < kgem->nexec; i++) { 3673 struct kgem_bo *bo, *found = NULL; 3674 3675 list_for_each_entry(bo, &kgem->next_request->buffers, request) { 3676 if (bo->handle == kgem->exec[i].handle) { 3677 found = bo; 3678 break; 3679 } 3680 } 3681 ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", 3682 i, 3683 kgem->exec[i].handle, 3684 (int)kgem->exec[i].offset, 3685 found ? kgem_bo_size(found) : -1, 3686 found ? found->tiling : -1, 3687 (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), 3688 found ? found->snoop : -1, 3689 found ? found->purged : -1); 3690 } 3691 for (i = 0; i < kgem->nreloc; i++) { 3692 ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", 3693 i, 3694 (int)kgem->reloc[i].offset, 3695 kgem->reloc[i].target_handle, 3696 kgem->reloc[i].delta, 3697 kgem->reloc[i].read_domains, 3698 kgem->reloc[i].write_domain, 3699 (int)kgem->reloc[i].presumed_offset); 3700 } 3701 3702 { 3703 struct drm_i915_gem_get_aperture aperture; 3704 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) 3705 ErrorF("Aperture size %lld, available %lld\n", 3706 (long long)aperture.aper_size, 3707 (long long)aperture.aper_available_size); 3708 } 3709 3710 if (ret == -ENOSPC) 3711 dump_gtt_info(kgem); 3712 if (ret == -EDEADLK) 3713 dump_fence_regs(kgem); 3714 3715 if (DEBUG_SYNC) { 3716 int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); 3717 if (fd != -1) { 3718 int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); 3719 assert(ignored == batch_end*sizeof(uint32_t)); 3720 close(fd); 3721 } 3722 3723 FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); 3724 } 3725#endif 3726 } 3727 } 3728#if SHOW_BATCH_AFTER 3729 if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0) 3730 __kgem_batch_debug(kgem, batch_end); 3731#endif 3732 kgem_commit(kgem); 3733 if (kgem->wedged) 3734 kgem_cleanup(kgem); 3735 3736 kgem_reset(kgem); 3737 3738 assert(kgem->next_request != NULL); 3739} 3740 3741static bool find_hang_state(struct kgem *kgem, char *path, int maxlen) 3742{ 3743 int minor = kgem_get_minor(kgem); 3744 3745 /* Search for our hang state in a few canonical locations. 3746 * In the unlikely event of having multiple devices, we 3747 * will need to check which minor actually corresponds to ours. 3748 */ 3749 3750 snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor); 3751 if (access(path, R_OK) == 0) 3752 return true; 3753 3754 snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor); 3755 if (access(path, R_OK) == 0) 3756 return true; 3757 3758 snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor); 3759 if (access(path, R_OK) == 0) 3760 return true; 3761 3762 path[0] = '\0'; 3763 return false; 3764} 3765 3766void kgem_throttle(struct kgem *kgem) 3767{ 3768 if (kgem->wedged) 3769 return; 3770 3771 if (__kgem_throttle(kgem, true)) { 3772 static int once; 3773 char path[128]; 3774 3775 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, 3776 "Detected a hung GPU, disabling acceleration.\n"); 3777 if (!once && find_hang_state(kgem, path, sizeof(path))) { 3778 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, 3779 "When reporting this, please include %s and the full dmesg.\n", 3780 path); 3781 once = 1; 3782 } 3783 3784 __kgem_set_wedged(kgem); 3785 kgem->need_throttle = false; 3786 } 3787} 3788 3789int kgem_is_wedged(struct kgem *kgem) 3790{ 3791 return __kgem_throttle(kgem, true); 3792} 3793 3794static void kgem_purge_cache(struct kgem *kgem) 3795{ 3796 struct kgem_bo *bo, *next; 3797 int i; 3798 3799 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { 3800 list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) { 3801 if (!kgem_bo_is_retained(kgem, bo)) { 3802 DBG(("%s: purging %d\n", 3803 __FUNCTION__, bo->handle)); 3804 kgem_bo_free(kgem, bo); 3805 } 3806 } 3807 } 3808 3809 kgem->need_purge = false; 3810} 3811 3812void kgem_clean_scanout_cache(struct kgem *kgem) 3813{ 3814 while (!list_is_empty(&kgem->scanout)) { 3815 struct kgem_bo *bo; 3816 3817 bo = list_first_entry(&kgem->scanout, struct kgem_bo, list); 3818 3819 assert(bo->scanout); 3820 assert(!bo->refcnt); 3821 assert(!bo->prime); 3822 assert(bo->proxy == NULL); 3823 3824 if (bo->exec || __kgem_busy(kgem, bo->handle)) 3825 break; 3826 3827 DBG(("%s: handle=%d, fb=%d (reusable=%d)\n", 3828 __FUNCTION__, bo->handle, bo->delta, bo->reusable)); 3829 list_del(&bo->list); 3830 3831 kgem_bo_rmfb(kgem, bo); 3832 bo->scanout = false; 3833 3834 if (!bo->purged) { 3835 bo->reusable = true; 3836 if (kgem->has_llc && 3837 !gem_set_caching(kgem->fd, bo->handle, SNOOPED)) 3838 bo->reusable = false; 3839 3840 } 3841 3842 __kgem_bo_destroy(kgem, bo); 3843 } 3844} 3845 3846void kgem_clean_large_cache(struct kgem *kgem) 3847{ 3848 while (!list_is_empty(&kgem->large_inactive)) { 3849 kgem_bo_free(kgem, 3850 list_first_entry(&kgem->large_inactive, 3851 struct kgem_bo, list)); 3852 3853 } 3854} 3855 3856bool kgem_expire_cache(struct kgem *kgem) 3857{ 3858 time_t now, expire; 3859 struct kgem_bo *bo; 3860 unsigned int size = 0, count = 0; 3861 bool idle; 3862 unsigned int i; 3863 3864 time(&now); 3865 3866 while (__kgem_freed_bo) { 3867 bo = __kgem_freed_bo; 3868 __kgem_freed_bo = *(struct kgem_bo **)bo; 3869 free(bo); 3870 } 3871 3872 while (__kgem_freed_request) { 3873 struct kgem_request *rq = __kgem_freed_request; 3874 __kgem_freed_request = *(struct kgem_request **)rq; 3875 free(rq); 3876 } 3877 3878 kgem_clean_large_cache(kgem); 3879 if (container_of(kgem, struct sna, kgem)->scrn->vtSema) 3880 kgem_clean_scanout_cache(kgem); 3881 3882 expire = 0; 3883 list_for_each_entry(bo, &kgem->snoop, list) { 3884 if (bo->delta) { 3885 expire = now - MAX_INACTIVE_TIME/2; 3886 break; 3887 } 3888 3889 bo->delta = now; 3890 } 3891 if (expire) { 3892 while (!list_is_empty(&kgem->snoop)) { 3893 bo = list_last_entry(&kgem->snoop, struct kgem_bo, list); 3894 3895 if (bo->delta > expire) 3896 break; 3897 3898 kgem_bo_free(kgem, bo); 3899 } 3900 } 3901#ifdef DEBUG_MEMORY 3902 { 3903 long snoop_size = 0; 3904 int snoop_count = 0; 3905 list_for_each_entry(bo, &kgem->snoop, list) 3906 snoop_count++, snoop_size += bytes(bo); 3907 DBG(("%s: still allocated %d bo, %ld bytes, in snoop cache\n", 3908 __FUNCTION__, snoop_count, snoop_size)); 3909 } 3910#endif 3911 3912 kgem_retire(kgem); 3913 if (kgem->wedged) 3914 kgem_cleanup(kgem); 3915 3916 kgem->expire(kgem); 3917 3918 if (kgem->need_purge) 3919 kgem_purge_cache(kgem); 3920 3921 if (kgem->need_retire) 3922 kgem_retire(kgem); 3923 3924 expire = 0; 3925 idle = true; 3926 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { 3927 idle &= list_is_empty(&kgem->inactive[i]); 3928 list_for_each_entry(bo, &kgem->inactive[i], list) { 3929 if (bo->delta) { 3930 expire = now - MAX_INACTIVE_TIME; 3931 break; 3932 } 3933 3934 bo->delta = now; 3935 } 3936 } 3937 if (expire == 0) { 3938 DBG(("%s: idle? %d\n", __FUNCTION__, idle)); 3939 kgem->need_expire = !idle; 3940 return false; 3941 } 3942 3943 idle = true; 3944 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { 3945 struct list preserve; 3946 3947 list_init(&preserve); 3948 while (!list_is_empty(&kgem->inactive[i])) { 3949 bo = list_last_entry(&kgem->inactive[i], 3950 struct kgem_bo, list); 3951 3952 if (bo->delta > expire) { 3953 idle = false; 3954 break; 3955 } 3956 3957 if (bo->map__cpu && bo->delta + MAP_PRESERVE_TIME > expire) { 3958 idle = false; 3959 list_move_tail(&bo->list, &preserve); 3960 } else { 3961 count++; 3962 size += bytes(bo); 3963 kgem_bo_free(kgem, bo); 3964 DBG(("%s: expiring %d\n", 3965 __FUNCTION__, bo->handle)); 3966 } 3967 } 3968 if (!list_is_empty(&preserve)) { 3969 preserve.prev->next = kgem->inactive[i].next; 3970 kgem->inactive[i].next->prev = preserve.prev; 3971 kgem->inactive[i].next = preserve.next; 3972 preserve.next->prev = &kgem->inactive[i]; 3973 } 3974 } 3975 3976#ifdef DEBUG_MEMORY 3977 { 3978 long inactive_size = 0; 3979 int inactive_count = 0; 3980 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) 3981 list_for_each_entry(bo, &kgem->inactive[i], list) 3982 inactive_count++, inactive_size += bytes(bo); 3983 DBG(("%s: still allocated %d bo, %ld bytes, in inactive cache\n", 3984 __FUNCTION__, inactive_count, inactive_size)); 3985 } 3986#endif 3987 3988 DBG(("%s: expired %d objects, %d bytes, idle? %d\n", 3989 __FUNCTION__, count, size, idle)); 3990 3991 kgem->need_expire = !idle; 3992 return count; 3993 (void)count; 3994 (void)size; 3995} 3996 3997bool kgem_cleanup_cache(struct kgem *kgem) 3998{ 3999 unsigned int i; 4000 int n; 4001 4002 /* sync to the most recent request */ 4003 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { 4004 if (!list_is_empty(&kgem->requests[n])) { 4005 struct kgem_request *rq; 4006 struct drm_i915_gem_set_domain set_domain; 4007 4008 rq = list_first_entry(&kgem->requests[n], 4009 struct kgem_request, 4010 list); 4011 4012 DBG(("%s: sync on cleanup\n", __FUNCTION__)); 4013 4014 VG_CLEAR(set_domain); 4015 set_domain.handle = rq->bo->handle; 4016 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 4017 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 4018 (void)do_ioctl(kgem->fd, 4019 DRM_IOCTL_I915_GEM_SET_DOMAIN, 4020 &set_domain); 4021 } 4022 } 4023 4024 kgem_retire(kgem); 4025 kgem_cleanup(kgem); 4026 4027 if (!kgem->need_expire) 4028 return false; 4029 4030 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { 4031 while (!list_is_empty(&kgem->inactive[i])) 4032 kgem_bo_free(kgem, 4033 list_last_entry(&kgem->inactive[i], 4034 struct kgem_bo, list)); 4035 } 4036 4037 kgem_clean_large_cache(kgem); 4038 kgem_clean_scanout_cache(kgem); 4039 4040 while (!list_is_empty(&kgem->snoop)) 4041 kgem_bo_free(kgem, 4042 list_last_entry(&kgem->snoop, 4043 struct kgem_bo, list)); 4044 4045 while (__kgem_freed_bo) { 4046 struct kgem_bo *bo = __kgem_freed_bo; 4047 __kgem_freed_bo = *(struct kgem_bo **)bo; 4048 free(bo); 4049 } 4050 4051 kgem->need_purge = false; 4052 kgem->need_expire = false; 4053 return true; 4054} 4055 4056static struct kgem_bo * 4057search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) 4058{ 4059 struct kgem_bo *bo, *first = NULL; 4060 bool use_active = (flags & CREATE_INACTIVE) == 0; 4061 struct list *cache; 4062 4063 DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n", 4064 __FUNCTION__, num_pages, flags, use_active, 4065 num_pages >= MAX_CACHE_SIZE / PAGE_SIZE, 4066 MAX_CACHE_SIZE / PAGE_SIZE)); 4067 4068 assert(num_pages); 4069 4070 if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) { 4071 DBG(("%s: searching large buffers\n", __FUNCTION__)); 4072retry_large: 4073 cache = use_active ? &kgem->large : &kgem->large_inactive; 4074 list_for_each_entry_safe(bo, first, cache, list) { 4075 assert(bo->refcnt == 0); 4076 assert(bo->reusable); 4077 assert(!bo->scanout); 4078 4079 if (num_pages > num_pages(bo)) 4080 goto discard; 4081 4082 if (bo->tiling != I915_TILING_NONE) { 4083 if (use_active) 4084 goto discard; 4085 4086 if (!gem_set_tiling(kgem->fd, bo->handle, 4087 I915_TILING_NONE, 0)) 4088 goto discard; 4089 4090 bo->tiling = I915_TILING_NONE; 4091 bo->pitch = 0; 4092 } 4093 4094 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) 4095 goto discard; 4096 4097 list_del(&bo->list); 4098 if (RQ(bo->rq) == (void *)kgem) { 4099 assert(bo->exec == NULL); 4100 list_del(&bo->request); 4101 } 4102 4103 bo->delta = 0; 4104 assert_tiling(kgem, bo); 4105 return bo; 4106 4107discard: 4108 if (!use_active) 4109 kgem_bo_free(kgem, bo); 4110 } 4111 4112 if (use_active) { 4113 use_active = false; 4114 goto retry_large; 4115 } 4116 4117 if (__kgem_throttle_retire(kgem, flags)) 4118 goto retry_large; 4119 4120 return NULL; 4121 } 4122 4123 if (!use_active && list_is_empty(inactive(kgem, num_pages))) { 4124 DBG(("%s: inactive and cache bucket empty\n", 4125 __FUNCTION__)); 4126 4127 if (flags & CREATE_NO_RETIRE) { 4128 DBG(("%s: can not retire\n", __FUNCTION__)); 4129 return NULL; 4130 } 4131 4132 if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) { 4133 DBG(("%s: active cache bucket empty\n", __FUNCTION__)); 4134 return NULL; 4135 } 4136 4137 if (!__kgem_throttle_retire(kgem, flags)) { 4138 DBG(("%s: nothing retired\n", __FUNCTION__)); 4139 return NULL; 4140 } 4141 4142 if (list_is_empty(inactive(kgem, num_pages))) { 4143 DBG(("%s: active cache bucket still empty after retire\n", 4144 __FUNCTION__)); 4145 return NULL; 4146 } 4147 } 4148 4149 if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { 4150 int for_cpu = !!(flags & CREATE_CPU_MAP); 4151 DBG(("%s: searching for inactive %s map\n", 4152 __FUNCTION__, for_cpu ? "cpu" : "gtt")); 4153 cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)]; 4154 list_for_each_entry(bo, cache, vma) { 4155 assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)); 4156 assert(bucket(bo) == cache_bucket(num_pages)); 4157 assert(bo->proxy == NULL); 4158 assert(bo->rq == NULL); 4159 assert(bo->exec == NULL); 4160 assert(!bo->scanout); 4161 4162 if (num_pages > num_pages(bo)) { 4163 DBG(("inactive too small: %d < %d\n", 4164 num_pages(bo), num_pages)); 4165 continue; 4166 } 4167 4168 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 4169 kgem_bo_free(kgem, bo); 4170 break; 4171 } 4172 4173 if (I915_TILING_NONE != bo->tiling && 4174 !gem_set_tiling(kgem->fd, bo->handle, 4175 I915_TILING_NONE, 0)) 4176 continue; 4177 4178 kgem_bo_remove_from_inactive(kgem, bo); 4179 assert(list_is_empty(&bo->vma)); 4180 assert(list_is_empty(&bo->list)); 4181 4182 bo->tiling = I915_TILING_NONE; 4183 bo->pitch = 0; 4184 bo->delta = 0; 4185 DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n", 4186 __FUNCTION__, bo->handle, num_pages(bo))); 4187 assert(use_active || bo->domain != DOMAIN_GPU); 4188 assert(!bo->needs_flush); 4189 assert_tiling(kgem, bo); 4190 ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); 4191 return bo; 4192 } 4193 4194 if (flags & CREATE_EXACT) 4195 return NULL; 4196 4197 if (flags & CREATE_CPU_MAP && !kgem->has_llc) 4198 return NULL; 4199 } 4200 4201 cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages); 4202 list_for_each_entry(bo, cache, list) { 4203 assert(bo->refcnt == 0); 4204 assert(bo->reusable); 4205 assert(!!bo->rq == !!use_active); 4206 assert(bo->proxy == NULL); 4207 assert(!bo->scanout); 4208 4209 if (num_pages > num_pages(bo)) 4210 continue; 4211 4212 if (use_active && 4213 kgem->gen <= 040 && 4214 bo->tiling != I915_TILING_NONE) 4215 continue; 4216 4217 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 4218 kgem_bo_free(kgem, bo); 4219 break; 4220 } 4221 4222 if (I915_TILING_NONE != bo->tiling) { 4223 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) 4224 continue; 4225 4226 if (first) 4227 continue; 4228 4229 if (!gem_set_tiling(kgem->fd, bo->handle, 4230 I915_TILING_NONE, 0)) 4231 continue; 4232 4233 bo->tiling = I915_TILING_NONE; 4234 bo->pitch = 0; 4235 } 4236 4237 if (bo->map__gtt || bo->map__wc || bo->map__cpu) { 4238 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { 4239 int for_cpu = !!(flags & CREATE_CPU_MAP); 4240 if (for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)){ 4241 if (first != NULL) 4242 break; 4243 4244 first = bo; 4245 continue; 4246 } 4247 } else { 4248 if (first != NULL) 4249 break; 4250 4251 first = bo; 4252 continue; 4253 } 4254 } else { 4255 if (flags & CREATE_GTT_MAP && !kgem_bo_can_map(kgem, bo)) 4256 continue; 4257 4258 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { 4259 if (first != NULL) 4260 break; 4261 4262 first = bo; 4263 continue; 4264 } 4265 } 4266 4267 if (use_active) 4268 kgem_bo_remove_from_active(kgem, bo); 4269 else 4270 kgem_bo_remove_from_inactive(kgem, bo); 4271 4272 assert(bo->tiling == I915_TILING_NONE); 4273 bo->pitch = 0; 4274 bo->delta = 0; 4275 DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", 4276 __FUNCTION__, bo->handle, num_pages(bo), 4277 use_active ? "active" : "inactive")); 4278 assert(list_is_empty(&bo->list)); 4279 assert(list_is_empty(&bo->vma)); 4280 assert(use_active || bo->domain != DOMAIN_GPU); 4281 assert(!bo->needs_flush || use_active); 4282 assert_tiling(kgem, bo); 4283 ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); 4284 return bo; 4285 } 4286 4287 if (first) { 4288 assert(first->tiling == I915_TILING_NONE); 4289 4290 if (use_active) 4291 kgem_bo_remove_from_active(kgem, first); 4292 else 4293 kgem_bo_remove_from_inactive(kgem, first); 4294 4295 first->pitch = 0; 4296 first->delta = 0; 4297 DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n", 4298 __FUNCTION__, first->handle, num_pages(first), 4299 use_active ? "active" : "inactive")); 4300 assert(list_is_empty(&first->list)); 4301 assert(list_is_empty(&first->vma)); 4302 assert(use_active || first->domain != DOMAIN_GPU); 4303 assert(!first->needs_flush || use_active); 4304 ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active); 4305 return first; 4306 } 4307 4308 return NULL; 4309} 4310 4311struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) 4312{ 4313 struct drm_gem_open open_arg; 4314 struct drm_i915_gem_get_tiling tiling; 4315 struct kgem_bo *bo; 4316 4317 DBG(("%s(name=%d)\n", __FUNCTION__, name)); 4318 4319 VG_CLEAR(open_arg); 4320 open_arg.name = name; 4321 if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) 4322 return NULL; 4323 4324 DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle)); 4325 4326 VG_CLEAR(tiling); 4327 tiling.handle = open_arg.handle; 4328 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) { 4329 DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno)); 4330 gem_close(kgem->fd, open_arg.handle); 4331 return NULL; 4332 } 4333 4334 DBG(("%s: handle=%d, tiling=%d\n", __FUNCTION__, tiling.handle, tiling.tiling_mode)); 4335 4336 bo = __kgem_bo_alloc(open_arg.handle, open_arg.size / PAGE_SIZE); 4337 if (bo == NULL) { 4338 gem_close(kgem->fd, open_arg.handle); 4339 return NULL; 4340 } 4341 4342 bo->unique_id = kgem_get_unique_id(kgem); 4343 bo->tiling = tiling.tiling_mode; 4344 bo->reusable = false; 4345 bo->prime = true; 4346 bo->purged = true; /* no coherency guarantees */ 4347 4348 debug_alloc__bo(kgem, bo); 4349 return bo; 4350} 4351 4352struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size) 4353{ 4354#ifdef DRM_IOCTL_PRIME_FD_TO_HANDLE 4355 struct drm_prime_handle args; 4356 struct drm_i915_gem_get_tiling tiling; 4357 struct local_i915_gem_caching caching; 4358 struct kgem_bo *bo; 4359 off_t seek; 4360 4361 DBG(("%s(name=%d)\n", __FUNCTION__, name)); 4362 4363 VG_CLEAR(args); 4364 args.fd = name; 4365 args.flags = 0; 4366 if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args)) { 4367 DBG(("%s(name=%d) fd-to-handle failed, ret=%d\n", __FUNCTION__, name, errno)); 4368 return NULL; 4369 } 4370 4371 VG_CLEAR(tiling); 4372 tiling.handle = args.handle; 4373 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) { 4374 DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno)); 4375 gem_close(kgem->fd, args.handle); 4376 return NULL; 4377 } 4378 4379 /* Query actual size, overriding specified if available */ 4380 seek = lseek(args.fd, 0, SEEK_END); 4381 DBG(("%s: estimated size=%ld, actual=%lld\n", 4382 __FUNCTION__, (long)size, (long long)seek)); 4383 if (seek != -1) { 4384 if (size > seek) { 4385 DBG(("%s(name=%d) estimated required size [%d] is larger than actual [%ld]\n", __FUNCTION__, name, size, (long)seek)); 4386 gem_close(kgem->fd, args.handle); 4387 return NULL; 4388 } 4389 size = seek; 4390 } 4391 4392 DBG(("%s: new handle=%d, tiling=%d\n", __FUNCTION__, 4393 args.handle, tiling.tiling_mode)); 4394 bo = __kgem_bo_alloc(args.handle, NUM_PAGES(size)); 4395 if (bo == NULL) { 4396 gem_close(kgem->fd, args.handle); 4397 return NULL; 4398 } 4399 4400 bo->unique_id = kgem_get_unique_id(kgem); 4401 bo->tiling = tiling.tiling_mode; 4402 bo->reusable = false; 4403 bo->prime = true; 4404 bo->domain = DOMAIN_NONE; 4405 4406 /* is this a special bo (e.g. scanout or CPU coherent)? */ 4407 4408 VG_CLEAR(caching); 4409 caching.handle = args.handle; 4410 caching.caching = kgem->has_llc; 4411 (void)drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &caching); 4412 DBG(("%s: imported handle=%d has caching %d\n", __FUNCTION__, args.handle, caching.caching)); 4413 switch (caching.caching) { 4414 case 0: 4415 if (kgem->has_llc) { 4416 DBG(("%s: interpreting handle=%d as a foreign scanout\n", 4417 __FUNCTION__, args.handle)); 4418 bo->scanout = true; 4419 } 4420 break; 4421 case 1: 4422 if (!kgem->has_llc) { 4423 DBG(("%s: interpreting handle=%d as a foreign snooped buffer\n", 4424 __FUNCTION__, args.handle)); 4425 bo->snoop = true; 4426 if (bo->tiling) { 4427 DBG(("%s: illegal snooped tiled buffer\n", __FUNCTION__)); 4428 kgem_bo_free(kgem, bo); 4429 return NULL; 4430 } 4431 } 4432 break; 4433 case 2: 4434 DBG(("%s: interpreting handle=%d as a foreign scanout\n", 4435 __FUNCTION__, args.handle)); 4436 bo->scanout = true; 4437 break; 4438 } 4439 4440 debug_alloc__bo(kgem, bo); 4441 return bo; 4442#else 4443 return NULL; 4444#endif 4445} 4446 4447int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo) 4448{ 4449#if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC) 4450 struct drm_prime_handle args; 4451 4452 VG_CLEAR(args); 4453 args.handle = bo->handle; 4454 args.flags = O_CLOEXEC; 4455 4456 if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args)) 4457 return -1; 4458 4459 bo->reusable = false; 4460 return args.fd; 4461#else 4462 return -1; 4463#endif 4464} 4465 4466struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) 4467{ 4468 struct kgem_bo *bo; 4469 uint32_t handle; 4470 4471 DBG(("%s(%d)\n", __FUNCTION__, size)); 4472 assert(size); 4473 4474 if (flags & CREATE_GTT_MAP && kgem->has_llc) { 4475 flags &= ~CREATE_GTT_MAP; 4476 flags |= CREATE_CPU_MAP; 4477 } 4478 4479 size = NUM_PAGES(size); 4480 if ((flags & CREATE_UNCACHED) == 0) { 4481 bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); 4482 if (bo) { 4483 assert(bo->domain != DOMAIN_GPU); 4484 ASSERT_IDLE(kgem, bo->handle); 4485 bo->refcnt = 1; 4486 return bo; 4487 } 4488 4489 if (flags & CREATE_CACHED) 4490 return NULL; 4491 } 4492 4493 handle = gem_create(kgem->fd, size); 4494 if (handle == 0) 4495 return NULL; 4496 4497 DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size)); 4498 bo = __kgem_bo_alloc(handle, size); 4499 if (bo == NULL) { 4500 gem_close(kgem->fd, handle); 4501 return NULL; 4502 } 4503 4504 debug_alloc__bo(kgem, bo); 4505 return bo; 4506} 4507 4508int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp) 4509{ 4510 if (DBG_NO_TILING) 4511 return tiling < 0 ? tiling : I915_TILING_NONE; 4512 4513 if (kgem->gen < 040) { 4514 if (tiling && width * bpp > 8192 * 8) { 4515 DBG(("%s: pitch too large for tliing [%d]\n", 4516 __FUNCTION__, width*bpp/8)); 4517 tiling = I915_TILING_NONE; 4518 goto done; 4519 } 4520 } else { 4521 if (width*bpp > (MAXSHORT-512) * 8) { 4522 if (tiling > 0) 4523 tiling = -tiling; 4524 else if (tiling == 0) 4525 tiling = -I915_TILING_X; 4526 DBG(("%s: large pitch [%d], forcing TILING [%d]\n", 4527 __FUNCTION__, width*bpp/8, tiling)); 4528 } else if (tiling && (width|height) > 8192) { 4529 DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n", 4530 __FUNCTION__, width, height)); 4531 tiling = -I915_TILING_X; 4532 } 4533 4534 /* fences limited to 128k (256k on ivb) */ 4535 assert(width * bpp <= 128 * 1024 * 8); 4536 } 4537 4538 if (tiling < 0) 4539 return tiling; 4540 4541 if (tiling == I915_TILING_Y && !kgem->can_render_y) 4542 tiling = I915_TILING_X; 4543 4544 if (tiling && (height == 1 || width == 1)) { 4545 DBG(("%s: disabling tiling [%dx%d] for single row/col\n", 4546 __FUNCTION__,width, height)); 4547 tiling = I915_TILING_NONE; 4548 goto done; 4549 } 4550 if (tiling == I915_TILING_Y && height <= 16) { 4551 DBG(("%s: too short [%d] for TILING_Y\n", 4552 __FUNCTION__,height)); 4553 tiling = I915_TILING_X; 4554 } 4555 if (tiling && width * bpp > 8 * (4096 - 64)) { 4556 DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n", 4557 __FUNCTION__, 4558 width, height, width*bpp/8, 4559 tiling)); 4560 return -tiling; 4561 } 4562 if (tiling == I915_TILING_X && height < 4) { 4563 DBG(("%s: too short [%d] for TILING_X\n", 4564 __FUNCTION__, height)); 4565 tiling = I915_TILING_NONE; 4566 goto done; 4567 } 4568 4569 if (tiling == I915_TILING_X && width * bpp <= 8*512) { 4570 DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n", 4571 __FUNCTION__, width, bpp)); 4572 tiling = I915_TILING_NONE; 4573 goto done; 4574 } 4575 if (tiling == I915_TILING_Y && width * bpp < 8*128) { 4576 DBG(("%s: too thin [%d] for TILING_Y\n", 4577 __FUNCTION__, width)); 4578 tiling = I915_TILING_NONE; 4579 goto done; 4580 } 4581 4582 if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) { 4583 DBG(("%s: too small [%d bytes] for TILING_%c\n", __FUNCTION__, 4584 ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8, 4585 tiling == I915_TILING_X ? 'X' : 'Y')); 4586 tiling = I915_TILING_NONE; 4587 goto done; 4588 } 4589 4590 if (tiling && width * bpp >= 8 * 4096 / 2) { 4591 DBG(("%s: TLB near-miss between lines %dx%d (pitch=%d), forcing tiling %d\n", 4592 __FUNCTION__, 4593 width, height, width*bpp/8, 4594 tiling)); 4595 return -tiling; 4596 } 4597 4598done: 4599 DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling)); 4600 return tiling; 4601} 4602 4603static int bits_per_pixel(int depth) 4604{ 4605 switch (depth) { 4606 case 8: return 8; 4607 case 15: 4608 case 16: return 16; 4609 case 24: 4610 case 30: 4611 case 32: return 32; 4612 default: return 0; 4613 } 4614} 4615 4616unsigned kgem_can_create_2d(struct kgem *kgem, 4617 int width, int height, int depth) 4618{ 4619 uint32_t pitch, size; 4620 unsigned flags = 0; 4621 int tiling; 4622 int bpp; 4623 4624 DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth)); 4625 4626 bpp = bits_per_pixel(depth); 4627 if (bpp == 0) { 4628 DBG(("%s: unhandled depth %d\n", __FUNCTION__, depth)); 4629 return 0; 4630 } 4631 4632 if (width > MAXSHORT || height > MAXSHORT) { 4633 DBG(("%s: unhandled size %dx%d\n", 4634 __FUNCTION__, width, height)); 4635 return 0; 4636 } 4637 4638 size = kgem_surface_size(kgem, false, 0, 4639 width, height, bpp, 4640 I915_TILING_NONE, &pitch); 4641 DBG(("%s: untiled size=%d\n", __FUNCTION__, size)); 4642 if (size > 0) { 4643 if (size <= kgem->max_cpu_size) 4644 flags |= KGEM_CAN_CREATE_CPU; 4645 if (size > 4096 && size <= kgem->max_gpu_size) 4646 flags |= KGEM_CAN_CREATE_GPU; 4647 if (size <= PAGE_SIZE*kgem->aperture_mappable/4 || kgem->has_wc_mmap) 4648 flags |= KGEM_CAN_CREATE_GTT; 4649 if (size > kgem->large_object_size) 4650 flags |= KGEM_CAN_CREATE_LARGE; 4651 if (size > kgem->max_object_size) { 4652 DBG(("%s: too large (untiled) %d > %d\n", 4653 __FUNCTION__, size, kgem->max_object_size)); 4654 return 0; 4655 } 4656 } 4657 4658 tiling = kgem_choose_tiling(kgem, I915_TILING_X, 4659 width, height, bpp); 4660 if (tiling != I915_TILING_NONE) { 4661 size = kgem_surface_size(kgem, false, 0, 4662 width, height, bpp, tiling, 4663 &pitch); 4664 DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size)); 4665 if (size > 0 && size <= kgem->max_gpu_size) 4666 flags |= KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED; 4667 if (size > 0 && size <= PAGE_SIZE*kgem->aperture_mappable/4) 4668 flags |= KGEM_CAN_CREATE_GTT; 4669 if (size > PAGE_SIZE*kgem->aperture_mappable/4) 4670 flags &= ~KGEM_CAN_CREATE_GTT; 4671 if (size > kgem->large_object_size) 4672 flags |= KGEM_CAN_CREATE_LARGE; 4673 if (size > kgem->max_object_size) { 4674 DBG(("%s: too large (tiled) %d > %d\n", 4675 __FUNCTION__, size, kgem->max_object_size)); 4676 return 0; 4677 } 4678 if (kgem->gen < 040) { 4679 int fence_size = 1024 * 1024; 4680 while (fence_size < size) 4681 fence_size <<= 1; 4682 if (fence_size > kgem->max_gpu_size) 4683 flags &= ~KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED; 4684 if (fence_size > PAGE_SIZE*kgem->aperture_fenceable/4) 4685 flags &= ~KGEM_CAN_CREATE_GTT; 4686 } 4687 } 4688 4689 return flags; 4690} 4691 4692inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) 4693{ 4694 unsigned int size; 4695 4696 assert(bo->tiling); 4697 assert_tiling(kgem, bo); 4698 assert(kgem->gen < 040); 4699 4700 if (kgem->gen < 030) 4701 size = 512 * 1024 / PAGE_SIZE; 4702 else 4703 size = 1024 * 1024 / PAGE_SIZE; 4704 while (size < num_pages(bo)) 4705 size <<= 1; 4706 4707 return size; 4708} 4709 4710static struct kgem_bo * 4711__kgem_bo_create_as_display(struct kgem *kgem, int size, int tiling, int pitch) 4712{ 4713 struct local_i915_gem_create2 args; 4714 struct kgem_bo *bo; 4715 4716 if (!kgem->has_create2) 4717 return NULL; 4718 4719 memset(&args, 0, sizeof(args)); 4720 args.size = size * PAGE_SIZE; 4721 args.placement = LOCAL_I915_CREATE_PLACEMENT_STOLEN; 4722 args.caching = DISPLAY; 4723 args.tiling_mode = tiling; 4724 args.stride = pitch; 4725 4726 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args)) { 4727 args.placement = LOCAL_I915_CREATE_PLACEMENT_SYSTEM; 4728 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args)) 4729 return NULL; 4730 } 4731 4732 bo = __kgem_bo_alloc(args.handle, size); 4733 if (bo == NULL) { 4734 gem_close(kgem->fd, args.handle); 4735 return NULL; 4736 } 4737 4738 bo->unique_id = kgem_get_unique_id(kgem); 4739 bo->tiling = tiling; 4740 bo->pitch = pitch; 4741 if (args.placement == LOCAL_I915_CREATE_PLACEMENT_STOLEN) { 4742 bo->purged = true; /* for asserts against CPU access */ 4743 } 4744 bo->reusable = false; /* so that unclaimed scanouts are freed */ 4745 bo->domain = DOMAIN_NONE; 4746 4747 if (__kgem_busy(kgem, bo->handle)) { 4748 assert(bo->exec == NULL); 4749 list_add(&bo->request, &kgem->flushing); 4750 bo->rq = (void *)kgem; 4751 kgem->need_retire = true; 4752 } 4753 4754 assert_tiling(kgem, bo); 4755 debug_alloc__bo(kgem, bo); 4756 4757 return bo; 4758} 4759 4760static void __kgem_bo_make_scanout(struct kgem *kgem, 4761 struct kgem_bo *bo, 4762 int width, int height) 4763{ 4764 ScrnInfoPtr scrn = 4765 container_of(kgem, struct sna, kgem)->scrn; 4766 struct drm_mode_fb_cmd arg; 4767 4768 assert(bo->proxy == NULL); 4769 4770 if (!scrn->vtSema) 4771 return; 4772 4773 DBG(("%s: create fb %dx%d@%d/%d\n", 4774 __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel)); 4775 4776 VG_CLEAR(arg); 4777 arg.width = width; 4778 arg.height = height; 4779 arg.pitch = bo->pitch; 4780 arg.bpp = scrn->bitsPerPixel; 4781 arg.depth = scrn->depth; 4782 arg.handle = bo->handle; 4783 4784 /* First move the scanout out of cached memory */ 4785 if (kgem->has_llc) { 4786 if (!gem_set_caching(kgem->fd, bo->handle, DISPLAY) && 4787 !gem_set_caching(kgem->fd, bo->handle, UNCACHED)) 4788 return; 4789 } 4790 4791 bo->scanout = true; 4792 4793 /* Then pre-emptively move the object into the mappable 4794 * portion to avoid rebinding later when busy. 4795 */ 4796 if (bo->map__gtt == NULL) 4797 bo->map__gtt = __kgem_bo_map__gtt(kgem, bo); 4798 if (bo->map__gtt) { 4799 if (sigtrap_get() == 0) { 4800 *(uint32_t *)bo->map__gtt = 0; 4801 sigtrap_put(); 4802 } 4803 bo->domain = DOMAIN_GTT; 4804 } 4805 4806 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0) { 4807 DBG(("%s: attached fb=%d to handle=%d\n", 4808 __FUNCTION__, arg.fb_id, arg.handle)); 4809 bo->delta = arg.fb_id; 4810 } 4811} 4812 4813struct kgem_bo *kgem_create_2d(struct kgem *kgem, 4814 int width, 4815 int height, 4816 int bpp, 4817 int tiling, 4818 uint32_t flags) 4819{ 4820 struct list *cache; 4821 struct kgem_bo *bo; 4822 uint32_t pitch, tiled_height, size; 4823 uint32_t handle; 4824 int i, bucket, retry; 4825 bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT); 4826 4827 if (tiling < 0) 4828 exact = true, tiling = -tiling; 4829 4830 DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__, 4831 width, height, bpp, tiling, exact, 4832 !!(flags & CREATE_INACTIVE), 4833 !!(flags & CREATE_CPU_MAP), 4834 !!(flags & CREATE_GTT_MAP), 4835 !!(flags & CREATE_SCANOUT), 4836 !!(flags & CREATE_PRIME), 4837 !!(flags & CREATE_TEMPORARY))); 4838 4839 size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, 4840 width, height, bpp, tiling, &pitch); 4841 if (size == 0) { 4842 DBG(("%s: invalid surface size (too large?)\n", __FUNCTION__)); 4843 return NULL; 4844 } 4845 4846 size /= PAGE_SIZE; 4847 bucket = cache_bucket(size); 4848 4849 if (flags & CREATE_SCANOUT) { 4850 struct kgem_bo *last = NULL; 4851 4852 list_for_each_entry_reverse(bo, &kgem->scanout, list) { 4853 assert(bo->scanout); 4854 assert(!bo->flush); 4855 assert(!bo->refcnt); 4856 assert_tiling(kgem, bo); 4857 4858 if (size > num_pages(bo) || num_pages(bo) > 2*size) 4859 continue; 4860 4861 if (bo->tiling != tiling || bo->pitch != pitch) 4862 /* No tiling/pitch without recreating fb */ 4863 continue; 4864 4865 if (bo->delta && !check_scanout_size(kgem, bo, width, height)) 4866 continue; 4867 4868 if (flags & CREATE_INACTIVE && bo->rq) { 4869 last = bo; 4870 continue; 4871 } 4872 4873 list_del(&bo->list); 4874 4875 bo->unique_id = kgem_get_unique_id(kgem); 4876 DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4877 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4878 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4879 assert_tiling(kgem, bo); 4880 bo->refcnt = 1; 4881 return bo; 4882 } 4883 4884 if (last) { 4885 list_del(&last->list); 4886 4887 last->unique_id = kgem_get_unique_id(kgem); 4888 DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4889 last->pitch, last->tiling, last->handle, last->unique_id)); 4890 assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last)); 4891 assert_tiling(kgem, last); 4892 last->refcnt = 1; 4893 return last; 4894 } 4895 4896 if (container_of(kgem, struct sna, kgem)->scrn->vtSema) { 4897 ScrnInfoPtr scrn = container_of(kgem, struct sna, kgem)->scrn; 4898 4899 list_for_each_entry_reverse(bo, &kgem->scanout, list) { 4900 struct drm_mode_fb_cmd arg; 4901 4902 assert(bo->scanout); 4903 assert(!bo->refcnt); 4904 4905 if (size > num_pages(bo) || num_pages(bo) > 2*size) 4906 continue; 4907 4908 if (flags & CREATE_INACTIVE && bo->rq) 4909 continue; 4910 4911 list_del(&bo->list); 4912 4913 if (bo->tiling != tiling || bo->pitch != pitch) { 4914 if (bo->delta) { 4915 kgem_bo_rmfb(kgem, bo); 4916 bo->delta = 0; 4917 } 4918 4919 if (gem_set_tiling(kgem->fd, bo->handle, 4920 tiling, pitch)) { 4921 bo->tiling = tiling; 4922 bo->pitch = pitch; 4923 } else { 4924 kgem_bo_free(kgem, bo); 4925 break; 4926 } 4927 } 4928 4929 VG_CLEAR(arg); 4930 arg.width = width; 4931 arg.height = height; 4932 arg.pitch = bo->pitch; 4933 arg.bpp = scrn->bitsPerPixel; 4934 arg.depth = scrn->depth; 4935 arg.handle = bo->handle; 4936 4937 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg)) { 4938 kgem_bo_free(kgem, bo); 4939 break; 4940 } 4941 4942 bo->delta = arg.fb_id; 4943 bo->unique_id = kgem_get_unique_id(kgem); 4944 4945 DBG((" 2:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", 4946 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 4947 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 4948 assert_tiling(kgem, bo); 4949 bo->refcnt = 1; 4950 return bo; 4951 } 4952 } 4953 4954 bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch); 4955 if (bo) 4956 return bo; 4957 4958 flags |= CREATE_INACTIVE; 4959 } 4960 4961 if (bucket >= NUM_CACHE_BUCKETS) { 4962 DBG(("%s: large bo num pages=%d, bucket=%d\n", 4963 __FUNCTION__, size, bucket)); 4964 4965 if (flags & CREATE_INACTIVE) 4966 goto large_inactive; 4967 4968 tiled_height = kgem_aligned_height(kgem, height, tiling); 4969 4970 list_for_each_entry(bo, &kgem->large, list) { 4971 assert(!bo->purged); 4972 assert(!bo->scanout); 4973 assert(bo->refcnt == 0); 4974 assert(bo->reusable); 4975 assert_tiling(kgem, bo); 4976 4977 if (kgem->gen < 040) { 4978 if (bo->pitch < pitch) { 4979 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", 4980 bo->tiling, tiling, 4981 bo->pitch, pitch)); 4982 continue; 4983 } 4984 4985 if (bo->pitch * tiled_height > bytes(bo)) 4986 continue; 4987 } else { 4988 if (num_pages(bo) < size) 4989 continue; 4990 4991 if (bo->pitch != pitch || bo->tiling != tiling) { 4992 if (!gem_set_tiling(kgem->fd, bo->handle, 4993 tiling, pitch)) 4994 continue; 4995 4996 bo->pitch = pitch; 4997 bo->tiling = tiling; 4998 } 4999 } 5000 5001 kgem_bo_remove_from_active(kgem, bo); 5002 5003 bo->unique_id = kgem_get_unique_id(kgem); 5004 bo->delta = 0; 5005 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 5006 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5007 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5008 assert_tiling(kgem, bo); 5009 bo->refcnt = 1; 5010 return bo; 5011 } 5012 5013large_inactive: 5014 __kgem_throttle_retire(kgem, flags); 5015 list_for_each_entry(bo, &kgem->large_inactive, list) { 5016 assert(bo->refcnt == 0); 5017 assert(bo->reusable); 5018 assert(!bo->scanout); 5019 assert_tiling(kgem, bo); 5020 5021 if (size > num_pages(bo)) 5022 continue; 5023 5024 if (bo->tiling != tiling || 5025 (tiling != I915_TILING_NONE && bo->pitch != pitch)) { 5026 if (!gem_set_tiling(kgem->fd, bo->handle, 5027 tiling, pitch)) 5028 continue; 5029 5030 bo->tiling = tiling; 5031 bo->pitch = pitch; 5032 } 5033 5034 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 5035 kgem_bo_free(kgem, bo); 5036 break; 5037 } 5038 5039 list_del(&bo->list); 5040 5041 assert(bo->domain != DOMAIN_GPU); 5042 bo->unique_id = kgem_get_unique_id(kgem); 5043 bo->pitch = pitch; 5044 bo->delta = 0; 5045 DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", 5046 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5047 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5048 assert_tiling(kgem, bo); 5049 bo->refcnt = 1; 5050 5051 if (flags & CREATE_SCANOUT) 5052 __kgem_bo_make_scanout(kgem, bo, width, height); 5053 5054 return bo; 5055 } 5056 5057 goto create; 5058 } 5059 5060 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { 5061 int for_cpu = !!(flags & CREATE_CPU_MAP); 5062 if (kgem->has_llc && tiling == I915_TILING_NONE) 5063 for_cpu = 1; 5064 /* We presume that we will need to upload to this bo, 5065 * and so would prefer to have an active VMA. 5066 */ 5067 cache = &kgem->vma[for_cpu].inactive[bucket]; 5068 do { 5069 list_for_each_entry(bo, cache, vma) { 5070 assert(bucket(bo) == bucket); 5071 assert(bo->refcnt == 0); 5072 assert(!bo->scanout); 5073 assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)); 5074 assert(bo->rq == NULL); 5075 assert(bo->exec == NULL); 5076 assert(list_is_empty(&bo->request)); 5077 assert(bo->flush == false); 5078 assert_tiling(kgem, bo); 5079 5080 if (size > num_pages(bo)) { 5081 DBG(("inactive too small: %d < %d\n", 5082 num_pages(bo), size)); 5083 continue; 5084 } 5085 5086 if (flags & UNCACHED && !kgem->has_llc && bo->domain != DOMAIN_CPU) 5087 continue; 5088 5089 if (bo->tiling != tiling || 5090 (tiling != I915_TILING_NONE && bo->pitch != pitch)) { 5091 if (bo->map__gtt || 5092 !gem_set_tiling(kgem->fd, bo->handle, 5093 tiling, pitch)) { 5094 DBG(("inactive GTT vma with wrong tiling: %d < %d\n", 5095 bo->tiling, tiling)); 5096 continue; 5097 } 5098 bo->tiling = tiling; 5099 bo->pitch = pitch; 5100 } 5101 5102 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 5103 kgem_bo_free(kgem, bo); 5104 break; 5105 } 5106 5107 assert(bo->tiling == tiling); 5108 bo->pitch = pitch; 5109 bo->delta = 0; 5110 bo->unique_id = kgem_get_unique_id(kgem); 5111 5112 kgem_bo_remove_from_inactive(kgem, bo); 5113 assert(list_is_empty(&bo->list)); 5114 assert(list_is_empty(&bo->vma)); 5115 5116 DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n", 5117 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5118 assert(bo->reusable); 5119 assert(bo->domain != DOMAIN_GPU); 5120 ASSERT_IDLE(kgem, bo->handle); 5121 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5122 assert_tiling(kgem, bo); 5123 bo->refcnt = 1; 5124 return bo; 5125 } 5126 } while (!list_is_empty(cache) && 5127 __kgem_throttle_retire(kgem, flags)); 5128 5129 if (flags & CREATE_CPU_MAP && !kgem->has_llc) { 5130 if (list_is_empty(&kgem->active[bucket][tiling]) && 5131 list_is_empty(&kgem->inactive[bucket])) 5132 flags &= ~CREATE_CACHED; 5133 5134 goto create; 5135 } 5136 } 5137 5138 if (flags & CREATE_INACTIVE) 5139 goto skip_active_search; 5140 5141 /* Best active match */ 5142 retry = NUM_CACHE_BUCKETS - bucket; 5143 if (retry > 3 && (flags & CREATE_TEMPORARY) == 0) 5144 retry = 3; 5145search_active: 5146 assert(bucket < NUM_CACHE_BUCKETS); 5147 cache = &kgem->active[bucket][tiling]; 5148 if (tiling) { 5149 tiled_height = kgem_aligned_height(kgem, height, tiling); 5150 list_for_each_entry(bo, cache, list) { 5151 assert(!bo->purged); 5152 assert(bo->refcnt == 0); 5153 assert(bucket(bo) == bucket); 5154 assert(bo->reusable); 5155 assert(bo->tiling == tiling); 5156 assert(bo->flush == false); 5157 assert(!bo->scanout); 5158 assert_tiling(kgem, bo); 5159 5160 if (kgem->gen < 040) { 5161 if (bo->pitch < pitch) { 5162 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", 5163 bo->tiling, tiling, 5164 bo->pitch, pitch)); 5165 continue; 5166 } 5167 5168 if (bo->pitch * tiled_height > bytes(bo)) 5169 continue; 5170 } else { 5171 if (num_pages(bo) < size) 5172 continue; 5173 5174 if (bo->pitch != pitch) { 5175 if (!gem_set_tiling(kgem->fd, 5176 bo->handle, 5177 tiling, pitch)) 5178 continue; 5179 5180 bo->pitch = pitch; 5181 } 5182 } 5183 5184 kgem_bo_remove_from_active(kgem, bo); 5185 5186 bo->unique_id = kgem_get_unique_id(kgem); 5187 bo->delta = 0; 5188 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 5189 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5190 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5191 assert_tiling(kgem, bo); 5192 bo->refcnt = 1; 5193 return bo; 5194 } 5195 } else { 5196 list_for_each_entry(bo, cache, list) { 5197 assert(bucket(bo) == bucket); 5198 assert(!bo->purged); 5199 assert(bo->refcnt == 0); 5200 assert(bo->reusable); 5201 assert(!bo->scanout); 5202 assert(bo->tiling == tiling); 5203 assert(bo->flush == false); 5204 assert_tiling(kgem, bo); 5205 5206 if (num_pages(bo) < size) 5207 continue; 5208 5209 kgem_bo_remove_from_active(kgem, bo); 5210 5211 bo->pitch = pitch; 5212 bo->unique_id = kgem_get_unique_id(kgem); 5213 bo->delta = 0; 5214 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 5215 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5216 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5217 assert_tiling(kgem, bo); 5218 bo->refcnt = 1; 5219 return bo; 5220 } 5221 } 5222 5223 if (kgem->gen >= 040) { 5224 for (i = I915_TILING_Y; i >= I915_TILING_NONE; i--) { 5225 cache = &kgem->active[bucket][i]; 5226 list_for_each_entry(bo, cache, list) { 5227 assert(!bo->purged); 5228 assert(bo->refcnt == 0); 5229 assert(bo->reusable); 5230 assert(!bo->scanout); 5231 assert(bo->flush == false); 5232 assert_tiling(kgem, bo); 5233 5234 if (num_pages(bo) < size) 5235 continue; 5236 5237 if (bo->tiling != tiling || 5238 (tiling != I915_TILING_NONE && bo->pitch != pitch)) { 5239 if (!gem_set_tiling(kgem->fd, 5240 bo->handle, 5241 tiling, pitch)) 5242 continue; 5243 } 5244 5245 kgem_bo_remove_from_active(kgem, bo); 5246 5247 bo->unique_id = kgem_get_unique_id(kgem); 5248 bo->pitch = pitch; 5249 bo->tiling = tiling; 5250 bo->delta = 0; 5251 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 5252 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5253 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5254 assert_tiling(kgem, bo); 5255 bo->refcnt = 1; 5256 return bo; 5257 } 5258 } 5259 } else if (!exact) { /* allow an active near-miss? */ 5260 for (i = tiling; i >= I915_TILING_NONE; i--) { 5261 tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, 5262 width, height, bpp, tiling, &pitch); 5263 cache = active(kgem, tiled_height / PAGE_SIZE, i); 5264 tiled_height = kgem_aligned_height(kgem, height, i); 5265 list_for_each_entry(bo, cache, list) { 5266 assert(!bo->purged); 5267 assert(bo->refcnt == 0); 5268 assert(bo->reusable); 5269 assert(!bo->scanout); 5270 assert(bo->flush == false); 5271 assert_tiling(kgem, bo); 5272 5273 if (bo->tiling) { 5274 if (bo->pitch < pitch) { 5275 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", 5276 bo->tiling, tiling, 5277 bo->pitch, pitch)); 5278 continue; 5279 } 5280 } else 5281 bo->pitch = pitch; 5282 5283 if (bo->pitch * tiled_height > bytes(bo)) 5284 continue; 5285 5286 kgem_bo_remove_from_active(kgem, bo); 5287 5288 bo->unique_id = kgem_get_unique_id(kgem); 5289 bo->delta = 0; 5290 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 5291 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5292 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5293 assert_tiling(kgem, bo); 5294 bo->refcnt = 1; 5295 return bo; 5296 } 5297 } 5298 } 5299 5300 if (--retry) { 5301 bucket++; 5302 goto search_active; 5303 } 5304 5305skip_active_search: 5306 bucket = cache_bucket(size); 5307 retry = NUM_CACHE_BUCKETS - bucket; 5308 if (retry > 3) 5309 retry = 3; 5310search_inactive: 5311 /* Now just look for a close match and prefer any currently active */ 5312 assert(bucket < NUM_CACHE_BUCKETS); 5313 cache = &kgem->inactive[bucket]; 5314 list_for_each_entry(bo, cache, list) { 5315 assert(bucket(bo) == bucket); 5316 assert(bo->reusable); 5317 assert(!bo->scanout); 5318 assert(bo->flush == false); 5319 assert_tiling(kgem, bo); 5320 5321 if (size > num_pages(bo)) { 5322 DBG(("inactive too small: %d < %d\n", 5323 num_pages(bo), size)); 5324 continue; 5325 } 5326 5327 if (bo->tiling != tiling || 5328 (tiling != I915_TILING_NONE && bo->pitch != pitch)) { 5329 if (!gem_set_tiling(kgem->fd, bo->handle, 5330 tiling, pitch)) 5331 continue; 5332 } 5333 5334 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { 5335 kgem_bo_free(kgem, bo); 5336 break; 5337 } 5338 5339 kgem_bo_remove_from_inactive(kgem, bo); 5340 assert(list_is_empty(&bo->list)); 5341 assert(list_is_empty(&bo->vma)); 5342 5343 bo->pitch = pitch; 5344 bo->tiling = tiling; 5345 5346 bo->delta = 0; 5347 bo->unique_id = kgem_get_unique_id(kgem); 5348 assert(bo->pitch); 5349 DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n", 5350 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5351 assert(bo->refcnt == 0); 5352 assert(bo->reusable); 5353 assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU); 5354 ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE); 5355 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5356 assert_tiling(kgem, bo); 5357 bo->refcnt = 1; 5358 5359 if (flags & CREATE_SCANOUT) 5360 __kgem_bo_make_scanout(kgem, bo, width, height); 5361 5362 return bo; 5363 } 5364 5365 if ((flags & CREATE_NO_RETIRE) == 0) { 5366 list_for_each_entry_reverse(bo, &kgem->active[bucket][tiling], list) { 5367 if (bo->exec) 5368 break; 5369 5370 if (size > num_pages(bo)) 5371 continue; 5372 5373 if (__kgem_busy(kgem, bo->handle)) { 5374 if (flags & CREATE_NO_THROTTLE) 5375 goto no_retire; 5376 5377 do { 5378 if (!kgem->need_throttle) { 5379 DBG(("%s: not throttling for active handle=%d\n", __FUNCTION__, bo->handle)); 5380 goto no_retire; 5381 } 5382 5383 __kgem_throttle(kgem, false); 5384 } while (__kgem_busy(kgem, bo->handle)); 5385 } 5386 5387 DBG(("%s: flushed active handle=%d\n", __FUNCTION__, bo->handle)); 5388 5389 kgem_bo_remove_from_active(kgem, bo); 5390 __kgem_bo_clear_busy(bo); 5391 5392 if (tiling != I915_TILING_NONE && bo->pitch != pitch) { 5393 if (!gem_set_tiling(kgem->fd, bo->handle, tiling, pitch)) { 5394 kgem_bo_free(kgem, bo); 5395 goto no_retire; 5396 } 5397 } 5398 5399 bo->pitch = pitch; 5400 bo->unique_id = kgem_get_unique_id(kgem); 5401 bo->delta = 0; 5402 DBG((" 2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", 5403 bo->pitch, bo->tiling, bo->handle, bo->unique_id)); 5404 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); 5405 assert_tiling(kgem, bo); 5406 bo->refcnt = 1; 5407 5408 if (flags & CREATE_SCANOUT) 5409 __kgem_bo_make_scanout(kgem, bo, width, height); 5410 5411 return bo; 5412 } 5413no_retire: 5414 flags |= CREATE_NO_RETIRE; 5415 } 5416 5417 if (--retry) { 5418 bucket++; 5419 goto search_inactive; 5420 } 5421 5422create: 5423 if (flags & CREATE_CACHED) { 5424 DBG(("%s: no cached bo found, requested not to create a new bo\n", __FUNCTION__)); 5425 return NULL; 5426 } 5427 5428 if (bucket >= NUM_CACHE_BUCKETS) 5429 size = ALIGN(size, 1024); 5430 handle = gem_create(kgem->fd, size); 5431 if (handle == 0) { 5432 DBG(("%s: kernel allocation (gem_create) failure\n", __FUNCTION__)); 5433 return NULL; 5434 } 5435 5436 bo = __kgem_bo_alloc(handle, size); 5437 if (!bo) { 5438 DBG(("%s: malloc failed\n", __FUNCTION__)); 5439 gem_close(kgem->fd, handle); 5440 return NULL; 5441 } 5442 5443 bo->unique_id = kgem_get_unique_id(kgem); 5444 if (tiling == I915_TILING_NONE || 5445 gem_set_tiling(kgem->fd, handle, tiling, pitch)) { 5446 bo->tiling = tiling; 5447 bo->pitch = pitch; 5448 if (flags & CREATE_SCANOUT) 5449 __kgem_bo_make_scanout(kgem, bo, width, height); 5450 } else { 5451 if (flags & CREATE_EXACT) { 5452 DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__)); 5453 gem_close(kgem->fd, handle); 5454 free(bo); 5455 return NULL; 5456 } 5457 } 5458 5459 assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling)); 5460 assert_tiling(kgem, bo); 5461 5462 debug_alloc__bo(kgem, bo); 5463 5464 DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n", 5465 bo->pitch, bo->tiling, bo->handle, bo->unique_id, 5466 size, num_pages(bo), bucket(bo))); 5467 return bo; 5468} 5469 5470struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, 5471 int width, 5472 int height, 5473 int bpp, 5474 uint32_t flags) 5475{ 5476 struct kgem_bo *bo; 5477 int stride, size; 5478 5479 if (DBG_NO_CPU) 5480 return NULL; 5481 5482 DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp)); 5483 5484 if (kgem->has_llc) { 5485 bo = kgem_create_2d(kgem, width, height, bpp, 5486 I915_TILING_NONE, flags); 5487 if (bo == NULL) 5488 return bo; 5489 5490 assert(bo->tiling == I915_TILING_NONE); 5491 assert_tiling(kgem, bo); 5492 5493 if (kgem_bo_map__cpu(kgem, bo) == NULL) { 5494 kgem_bo_destroy(kgem, bo); 5495 return NULL; 5496 } 5497 5498 return bo; 5499 } 5500 5501 assert(width > 0 && height > 0); 5502 stride = ALIGN(width, 2) * bpp >> 3; 5503 stride = ALIGN(stride, 4); 5504 size = stride * ALIGN(height, 2); 5505 assert(size >= PAGE_SIZE); 5506 5507 DBG(("%s: %dx%d, %d bpp, stride=%d\n", 5508 __FUNCTION__, width, height, bpp, stride)); 5509 5510 bo = search_snoop_cache(kgem, NUM_PAGES(size), 0); 5511 if (bo) { 5512 assert(bo->tiling == I915_TILING_NONE); 5513 assert_tiling(kgem, bo); 5514 assert(bo->snoop); 5515 bo->refcnt = 1; 5516 bo->pitch = stride; 5517 bo->unique_id = kgem_get_unique_id(kgem); 5518 return bo; 5519 } 5520 5521 if (kgem->has_caching) { 5522 bo = kgem_create_linear(kgem, size, flags); 5523 if (bo == NULL) 5524 return NULL; 5525 5526 assert(bo->tiling == I915_TILING_NONE); 5527 assert_tiling(kgem, bo); 5528 5529 assert(!__kgem_busy(kgem, bo->handle)); 5530 if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) { 5531 kgem_bo_destroy(kgem, bo); 5532 return NULL; 5533 } 5534 bo->snoop = true; 5535 5536 if (kgem_bo_map__cpu(kgem, bo) == NULL) { 5537 kgem_bo_destroy(kgem, bo); 5538 return NULL; 5539 } 5540 5541 bo->pitch = stride; 5542 bo->unique_id = kgem_get_unique_id(kgem); 5543 return bo; 5544 } 5545 5546 if (kgem->has_userptr) { 5547 void *ptr; 5548 5549 /* XXX */ 5550 //if (posix_memalign(&ptr, 64, ALIGN(size, 64))) 5551 if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE))) 5552 return NULL; 5553 5554 bo = kgem_create_map(kgem, ptr, size, false); 5555 if (bo == NULL) { 5556 free(ptr); 5557 return NULL; 5558 } 5559 5560 bo->pitch = stride; 5561 bo->unique_id = kgem_get_unique_id(kgem); 5562 return bo; 5563 } 5564 5565 return NULL; 5566} 5567 5568void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) 5569{ 5570 DBG(("%s: handle=%d, proxy? %d\n", 5571 __FUNCTION__, bo->handle, bo->proxy != NULL)); 5572 5573 if (bo->proxy) { 5574 assert(!bo->reusable); 5575 kgem_bo_binding_free(kgem, bo); 5576 5577 assert(list_is_empty(&bo->list)); 5578 _list_del(&bo->vma); 5579 _list_del(&bo->request); 5580 5581 if (bo->io && bo->domain == DOMAIN_CPU) 5582 _kgem_bo_delete_buffer(kgem, bo); 5583 5584 kgem_bo_unref(kgem, bo->proxy); 5585 5586 if (DBG_NO_MALLOC_CACHE) { 5587 free(bo); 5588 } else { 5589 *(struct kgem_bo **)bo = __kgem_freed_bo; 5590 __kgem_freed_bo = bo; 5591 } 5592 } else 5593 __kgem_bo_destroy(kgem, bo); 5594} 5595 5596static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) 5597{ 5598 assert(bo->rq); 5599 assert(bo->exec == NULL); 5600 assert(bo->needs_flush); 5601 5602 /* The kernel will emit a flush *and* update its own flushing lists. */ 5603 if (!__kgem_busy(kgem, bo->handle)) 5604 __kgem_bo_clear_busy(bo); 5605 5606 DBG(("%s: handle=%d, busy?=%d\n", 5607 __FUNCTION__, bo->handle, bo->rq != NULL)); 5608} 5609 5610void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) 5611{ 5612 if (!bo->needs_flush) 5613 return; 5614 5615 kgem_bo_submit(kgem, bo); 5616 5617 /* If the kernel fails to emit the flush, then it will be forced when 5618 * we assume direct access. And as the usual failure is EIO, we do 5619 * not actually care. 5620 */ 5621 assert(bo->exec == NULL); 5622 if (bo->rq) 5623 __kgem_flush(kgem, bo); 5624 5625 /* Whatever actually happens, we can regard the GTT write domain 5626 * as being flushed. 5627 */ 5628 bo->gtt_dirty = false; 5629 bo->needs_flush = false; 5630 bo->domain = DOMAIN_NONE; 5631} 5632 5633inline static bool nearly_idle(struct kgem *kgem) 5634{ 5635 int ring = kgem->ring == KGEM_BLT; 5636 5637 if (list_is_singular(&kgem->requests[ring])) 5638 return true; 5639 5640 return __kgem_ring_is_idle(kgem, ring); 5641} 5642 5643inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo) 5644{ 5645 if (kgem->needs_semaphore) 5646 return false; 5647 5648 if (bo->rq == NULL || RQ_RING(bo->rq) == kgem->ring) 5649 return false; 5650 5651 kgem->needs_semaphore = true; 5652 return true; 5653} 5654 5655inline static bool needs_reservation(struct kgem *kgem, struct kgem_bo *bo) 5656{ 5657 if (kgem->needs_reservation) 5658 return false; 5659 5660 if (bo->presumed_offset) 5661 return false; 5662 5663 kgem->needs_reservation = true; 5664 return nearly_idle(kgem); 5665} 5666 5667inline static bool needs_batch_flush(struct kgem *kgem, struct kgem_bo *bo) 5668{ 5669 bool flush = false; 5670 5671 if (needs_semaphore(kgem, bo)) { 5672 DBG(("%s: flushing before handle=%d for required semaphore\n", __FUNCTION__, bo->handle)); 5673 flush = true; 5674 } 5675 5676 if (needs_reservation(kgem, bo)) { 5677 DBG(("%s: flushing before handle=%d for new reservation\n", __FUNCTION__, bo->handle)); 5678 flush = true; 5679 } 5680 5681 return kgem->nreloc ? flush : false; 5682} 5683 5684static bool aperture_check(struct kgem *kgem, unsigned num_pages) 5685{ 5686 struct drm_i915_gem_get_aperture aperture; 5687 int reserve; 5688 5689 if (kgem->aperture) 5690 return false; 5691 5692 /* Leave some space in case of alignment issues */ 5693 reserve = kgem->aperture_mappable / 2; 5694 if (kgem->gen < 033 && reserve < kgem->aperture_max_fence) 5695 reserve = kgem->aperture_max_fence; 5696 if (!kgem->has_llc) 5697 reserve += kgem->nexec * PAGE_SIZE * 2; 5698 5699 DBG(("%s: num_pages=%d, holding %d pages in reserve, total aperture %d\n", 5700 __FUNCTION__, num_pages, reserve, kgem->aperture_total)); 5701 num_pages += reserve; 5702 5703 VG_CLEAR(aperture); 5704 aperture.aper_available_size = kgem->aperture_total; 5705 aperture.aper_available_size *= PAGE_SIZE; 5706 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 5707 5708 DBG(("%s: aperture required %ld bytes, available %ld bytes\n", 5709 __FUNCTION__, 5710 (long)num_pages * PAGE_SIZE, 5711 (long)aperture.aper_available_size)); 5712 5713 return num_pages <= aperture.aper_available_size / PAGE_SIZE; 5714} 5715 5716static inline bool kgem_flush(struct kgem *kgem, bool flush) 5717{ 5718 if (unlikely(kgem->wedged)) 5719 return false; 5720 5721 if (kgem->nreloc == 0) 5722 return true; 5723 5724 if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE) 5725 return true; 5726 5727 if (kgem->flush == flush && kgem->aperture < kgem->aperture_low) 5728 return true; 5729 5730 DBG(("%s: opportunistic flushing? flush=%d,%d, aperture=%d/%d, idle?=%d\n", 5731 __FUNCTION__, kgem->flush, flush, kgem->aperture, kgem->aperture_low, kgem_ring_is_idle(kgem, kgem->ring))); 5732 return !kgem_ring_is_idle(kgem, kgem->ring); 5733} 5734 5735bool kgem_check_bo(struct kgem *kgem, ...) 5736{ 5737 va_list ap; 5738 struct kgem_bo *bo; 5739 int num_exec = 0; 5740 int num_pages = 0; 5741 bool flush = false; 5742 bool busy = true; 5743 5744 va_start(ap, kgem); 5745 while ((bo = va_arg(ap, struct kgem_bo *))) { 5746 while (bo->proxy) 5747 bo = bo->proxy; 5748 if (bo->exec) 5749 continue; 5750 5751 if (needs_batch_flush(kgem, bo)) { 5752 va_end(ap); 5753 return false; 5754 } 5755 5756 num_pages += num_pages(bo); 5757 num_exec++; 5758 5759 flush |= bo->flush; 5760 busy &= bo->rq != NULL; 5761 } 5762 va_end(ap); 5763 5764 DBG(("%s: num_pages=+%d, num_exec=+%d\n", 5765 __FUNCTION__, num_pages, num_exec)); 5766 5767 if (!num_pages) 5768 return true; 5769 5770 if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) { 5771 DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__, 5772 kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem))); 5773 return false; 5774 } 5775 5776 if (num_pages + kgem->aperture > kgem->aperture_high) { 5777 DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n", 5778 __FUNCTION__, kgem->aperture, num_pages, kgem->aperture_high)); 5779 return aperture_check(kgem, num_pages); 5780 } 5781 5782 if (busy) 5783 return true; 5784 5785 return kgem_flush(kgem, flush); 5786} 5787 5788bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo) 5789{ 5790 assert(bo->refcnt); 5791 while (bo->proxy) 5792 bo = bo->proxy; 5793 assert(bo->refcnt); 5794 5795 if (bo->exec) { 5796 if (kgem->gen < 040 && 5797 bo->tiling != I915_TILING_NONE && 5798 (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { 5799 uint32_t size; 5800 5801 assert(bo->tiling == I915_TILING_X); 5802 5803 if (kgem->nfence >= kgem->fence_max) 5804 return false; 5805 5806 if (kgem->aperture_fenced) { 5807 size = 3*kgem->aperture_fenced; 5808 if (kgem->aperture_total == kgem->aperture_mappable) 5809 size += kgem->aperture; 5810 if (size > kgem->aperture_fenceable && 5811 kgem_ring_is_idle(kgem, kgem->ring)) { 5812 DBG(("%s: opportunistic fence flush\n", __FUNCTION__)); 5813 return false; 5814 } 5815 } 5816 5817 size = kgem_bo_fenced_size(kgem, bo); 5818 if (size > kgem->aperture_max_fence) 5819 kgem->aperture_max_fence = size; 5820 size += kgem->aperture_fenced; 5821 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence) 5822 size = 2 * kgem->aperture_max_fence; 5823 if (kgem->aperture_total == kgem->aperture_mappable) 5824 size += kgem->aperture; 5825 if (size > kgem->aperture_fenceable) { 5826 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n", 5827 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable)); 5828 return false; 5829 } 5830 } 5831 5832 return true; 5833 } 5834 5835 if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1) 5836 return false; 5837 5838 if (needs_batch_flush(kgem, bo)) 5839 return false; 5840 5841 assert_tiling(kgem, bo); 5842 if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) { 5843 uint32_t size; 5844 5845 assert(bo->tiling == I915_TILING_X); 5846 5847 if (kgem->nfence >= kgem->fence_max) 5848 return false; 5849 5850 if (kgem->aperture_fenced) { 5851 size = 3*kgem->aperture_fenced; 5852 if (kgem->aperture_total == kgem->aperture_mappable) 5853 size += kgem->aperture; 5854 if (size > kgem->aperture_fenceable && 5855 kgem_ring_is_idle(kgem, kgem->ring)) { 5856 DBG(("%s: opportunistic fence flush\n", __FUNCTION__)); 5857 return false; 5858 } 5859 } 5860 5861 size = kgem_bo_fenced_size(kgem, bo); 5862 if (size > kgem->aperture_max_fence) 5863 kgem->aperture_max_fence = size; 5864 size += kgem->aperture_fenced; 5865 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence) 5866 size = 2 * kgem->aperture_max_fence; 5867 if (kgem->aperture_total == kgem->aperture_mappable) 5868 size += kgem->aperture; 5869 if (size > kgem->aperture_fenceable) { 5870 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n", 5871 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable)); 5872 return false; 5873 } 5874 } 5875 5876 if (kgem->aperture + kgem->aperture_fenced + num_pages(bo) > kgem->aperture_high) { 5877 DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n", 5878 __FUNCTION__, kgem->aperture, num_pages(bo), kgem->aperture_high)); 5879 return aperture_check(kgem, num_pages(bo)); 5880 } 5881 5882 if (bo->rq) 5883 return true; 5884 5885 return kgem_flush(kgem, bo->flush); 5886} 5887 5888bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) 5889{ 5890 va_list ap; 5891 struct kgem_bo *bo; 5892 int num_fence = 0; 5893 int num_exec = 0; 5894 int num_pages = 0; 5895 int fenced_size = 0; 5896 bool flush = false; 5897 bool busy = true; 5898 5899 va_start(ap, kgem); 5900 while ((bo = va_arg(ap, struct kgem_bo *))) { 5901 assert(bo->refcnt); 5902 while (bo->proxy) 5903 bo = bo->proxy; 5904 assert(bo->refcnt); 5905 if (bo->exec) { 5906 if (kgem->gen >= 040 || bo->tiling == I915_TILING_NONE) 5907 continue; 5908 5909 if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { 5910 fenced_size += kgem_bo_fenced_size(kgem, bo); 5911 num_fence++; 5912 } 5913 5914 continue; 5915 } 5916 5917 if (needs_batch_flush(kgem, bo)) { 5918 va_end(ap); 5919 return false; 5920 } 5921 5922 assert_tiling(kgem, bo); 5923 num_pages += num_pages(bo); 5924 num_exec++; 5925 if (kgem->gen < 040 && bo->tiling) { 5926 uint32_t size = kgem_bo_fenced_size(kgem, bo); 5927 if (size > kgem->aperture_max_fence) 5928 kgem->aperture_max_fence = size; 5929 fenced_size += size; 5930 num_fence++; 5931 } 5932 5933 flush |= bo->flush; 5934 busy &= bo->rq != NULL; 5935 } 5936 va_end(ap); 5937 5938 if (num_fence) { 5939 uint32_t size; 5940 5941 if (kgem->nfence + num_fence > kgem->fence_max) 5942 return false; 5943 5944 if (kgem->aperture_fenced) { 5945 size = 3*kgem->aperture_fenced; 5946 if (kgem->aperture_total == kgem->aperture_mappable) 5947 size += kgem->aperture; 5948 if (size > kgem->aperture_fenceable && 5949 kgem_ring_is_idle(kgem, kgem->ring)) { 5950 DBG(("%s: opportunistic fence flush\n", __FUNCTION__)); 5951 return false; 5952 } 5953 } 5954 5955 size = kgem->aperture_fenced; 5956 size += fenced_size; 5957 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence) 5958 size = 2 * kgem->aperture_max_fence; 5959 if (kgem->aperture_total == kgem->aperture_mappable) 5960 size += kgem->aperture; 5961 if (size > kgem->aperture_fenceable) { 5962 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n", 5963 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable)); 5964 return false; 5965 } 5966 } 5967 5968 if (num_pages == 0) 5969 return true; 5970 5971 if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) 5972 return false; 5973 5974 if (num_pages + kgem->aperture > kgem->aperture_high - kgem->aperture_fenced) { 5975 DBG(("%s: final aperture usage (%d + %d + %d) is greater than high water mark (%d)\n", 5976 __FUNCTION__, kgem->aperture, kgem->aperture_fenced, num_pages, kgem->aperture_high)); 5977 return aperture_check(kgem, num_pages); 5978 } 5979 5980 if (busy) 5981 return true; 5982 5983 return kgem_flush(kgem, flush); 5984} 5985 5986uint32_t kgem_add_reloc(struct kgem *kgem, 5987 uint32_t pos, 5988 struct kgem_bo *bo, 5989 uint32_t read_write_domain, 5990 uint32_t delta) 5991{ 5992 int index; 5993 5994 DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n", 5995 __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain)); 5996 5997 assert(kgem->gen < 0100); 5998 assert((read_write_domain & 0x7fff) == 0 || bo != NULL); 5999 6000 index = kgem->nreloc++; 6001 assert(index < ARRAY_SIZE(kgem->reloc)); 6002 kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); 6003 if (bo) { 6004 assert(kgem->mode != KGEM_NONE); 6005 assert(bo->refcnt); 6006 while (bo->proxy) { 6007 DBG(("%s: adding proxy [delta=%d] for handle=%d\n", 6008 __FUNCTION__, bo->delta, bo->handle)); 6009 delta += bo->delta; 6010 assert(bo->handle == bo->proxy->handle); 6011 /* need to release the cache upon batch submit */ 6012 if (bo->exec == NULL) { 6013 list_move_tail(&bo->request, 6014 &kgem->next_request->buffers); 6015 bo->rq = MAKE_REQUEST(kgem->next_request, 6016 kgem->ring); 6017 bo->exec = &_kgem_dummy_exec; 6018 bo->domain = DOMAIN_GPU; 6019 } 6020 6021 if (read_write_domain & 0x7fff && !bo->gpu_dirty) 6022 __kgem_bo_mark_dirty(bo); 6023 6024 bo = bo->proxy; 6025 assert(bo->refcnt); 6026 } 6027 assert(bo->refcnt); 6028 6029 if (bo->exec == NULL) 6030 kgem_add_bo(kgem, bo); 6031 assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); 6032 assert(RQ_RING(bo->rq) == kgem->ring); 6033 6034 if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) { 6035 if (bo->tiling && 6036 (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { 6037 assert(bo->tiling == I915_TILING_X); 6038 assert(kgem->nfence < kgem->fence_max); 6039 kgem->aperture_fenced += 6040 kgem_bo_fenced_size(kgem, bo); 6041 kgem->nfence++; 6042 } 6043 bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE; 6044 } 6045 6046 kgem->reloc[index].delta = delta; 6047 kgem->reloc[index].target_handle = bo->target_handle; 6048 kgem->reloc[index].presumed_offset = bo->presumed_offset; 6049 6050 if (read_write_domain & 0x7fff && !bo->gpu_dirty) { 6051 assert(!bo->snoop || kgem->can_blt_cpu); 6052 __kgem_bo_mark_dirty(bo); 6053 } 6054 6055 delta += bo->presumed_offset; 6056 } else { 6057 kgem->reloc[index].delta = delta; 6058 kgem->reloc[index].target_handle = ~0U; 6059 kgem->reloc[index].presumed_offset = 0; 6060 if (kgem->nreloc__self < 256) 6061 kgem->reloc__self[kgem->nreloc__self++] = index; 6062 } 6063 kgem->reloc[index].read_domains = read_write_domain >> 16; 6064 kgem->reloc[index].write_domain = read_write_domain & 0x7fff; 6065 6066 return delta; 6067} 6068 6069uint64_t kgem_add_reloc64(struct kgem *kgem, 6070 uint32_t pos, 6071 struct kgem_bo *bo, 6072 uint32_t read_write_domain, 6073 uint64_t delta) 6074{ 6075 int index; 6076 6077 DBG(("%s: handle=%d, pos=%d, delta=%ld, domains=%08x\n", 6078 __FUNCTION__, bo ? bo->handle : 0, pos, (long)delta, read_write_domain)); 6079 6080 assert(kgem->gen >= 0100); 6081 assert((read_write_domain & 0x7fff) == 0 || bo != NULL); 6082 6083 index = kgem->nreloc++; 6084 assert(index < ARRAY_SIZE(kgem->reloc)); 6085 kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); 6086 if (bo) { 6087 assert(kgem->mode != KGEM_NONE); 6088 assert(bo->refcnt); 6089 while (bo->proxy) { 6090 DBG(("%s: adding proxy [delta=%ld] for handle=%d\n", 6091 __FUNCTION__, (long)bo->delta, bo->handle)); 6092 delta += bo->delta; 6093 assert(bo->handle == bo->proxy->handle); 6094 /* need to release the cache upon batch submit */ 6095 if (bo->exec == NULL) { 6096 list_move_tail(&bo->request, 6097 &kgem->next_request->buffers); 6098 bo->rq = MAKE_REQUEST(kgem->next_request, 6099 kgem->ring); 6100 bo->exec = &_kgem_dummy_exec; 6101 bo->domain = DOMAIN_GPU; 6102 } 6103 6104 if (read_write_domain & 0x7fff && !bo->gpu_dirty) 6105 __kgem_bo_mark_dirty(bo); 6106 6107 bo = bo->proxy; 6108 assert(bo->refcnt); 6109 } 6110 assert(bo->refcnt); 6111 6112 if (bo->exec == NULL) 6113 kgem_add_bo(kgem, bo); 6114 assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); 6115 assert(RQ_RING(bo->rq) == kgem->ring); 6116 6117 DBG(("%s[%d] = (delta=%d, target handle=%d, presumed=%llx)\n", 6118 __FUNCTION__, index, delta, bo->target_handle, (long long)bo->presumed_offset)); 6119 kgem->reloc[index].delta = delta; 6120 kgem->reloc[index].target_handle = bo->target_handle; 6121 kgem->reloc[index].presumed_offset = bo->presumed_offset; 6122 6123 if (read_write_domain & 0x7fff && !bo->gpu_dirty) { 6124 assert(!bo->snoop || kgem->can_blt_cpu); 6125 __kgem_bo_mark_dirty(bo); 6126 } 6127 6128 delta += bo->presumed_offset; 6129 } else { 6130 DBG(("%s[%d] = (delta=%d, target handle=batch)\n", 6131 __FUNCTION__, index, delta)); 6132 kgem->reloc[index].delta = delta; 6133 kgem->reloc[index].target_handle = ~0U; 6134 kgem->reloc[index].presumed_offset = 0; 6135 if (kgem->nreloc__self < 256) 6136 kgem->reloc__self[kgem->nreloc__self++] = index; 6137 } 6138 kgem->reloc[index].read_domains = read_write_domain >> 16; 6139 kgem->reloc[index].write_domain = read_write_domain & 0x7fff; 6140 6141 return delta; 6142} 6143 6144static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) 6145{ 6146 int i, j; 6147 6148 DBG(("%s: type=%d, count=%d (bucket: %d)\n", 6149 __FUNCTION__, type, kgem->vma[type].count, bucket)); 6150 if (kgem->vma[type].count <= 0) 6151 return; 6152 6153 if (kgem->need_purge) 6154 kgem_purge_cache(kgem); 6155 6156 /* vma are limited on a per-process basis to around 64k. 6157 * This includes all malloc arenas as well as other file 6158 * mappings. In order to be fair and not hog the cache, 6159 * and more importantly not to exhaust that limit and to 6160 * start failing mappings, we keep our own number of open 6161 * vma to within a conservative value. 6162 */ 6163 i = 0; 6164 while (kgem->vma[type].count > 0) { 6165 struct kgem_bo *bo = NULL; 6166 6167 for (j = 0; 6168 bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive); 6169 j++) { 6170 struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)]; 6171 if (!list_is_empty(head)) 6172 bo = list_last_entry(head, struct kgem_bo, vma); 6173 } 6174 if (bo == NULL) 6175 break; 6176 6177 DBG(("%s: discarding inactive %s vma cache for %d\n", 6178 __FUNCTION__, type ? "CPU" : "GTT", bo->handle)); 6179 6180 assert(bo->rq == NULL); 6181 if (type) { 6182 VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo))); 6183 munmap(MAP(bo->map__cpu), bytes(bo)); 6184 bo->map__cpu = NULL; 6185 } else { 6186 if (bo->map__wc) { 6187 VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo))); 6188 munmap(bo->map__wc, bytes(bo)); 6189 bo->map__wc = NULL; 6190 } 6191 if (bo->map__gtt) { 6192 munmap(bo->map__gtt, bytes(bo)); 6193 bo->map__gtt = NULL; 6194 } 6195 } 6196 6197 list_del(&bo->vma); 6198 kgem->vma[type].count--; 6199 6200 if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) { 6201 DBG(("%s: freeing unpurgeable old mapping\n", 6202 __FUNCTION__)); 6203 kgem_bo_free(kgem, bo); 6204 } 6205 } 6206} 6207 6208static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo) 6209{ 6210 void *ptr; 6211 6212 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 6213 6214 assert(bo->proxy == NULL); 6215 assert(!bo->snoop); 6216 6217 kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); 6218 6219 if (bo->tiling || !kgem->has_wc_mmap) { 6220 assert(num_pages(bo) <= kgem->aperture_mappable / 2); 6221 assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); 6222 6223 ptr = bo->map__gtt; 6224 if (ptr == NULL) 6225 ptr = __kgem_bo_map__gtt(kgem, bo); 6226 } else { 6227 ptr = bo->map__wc; 6228 if (ptr == NULL) 6229 ptr = __kgem_bo_map__wc(kgem, bo); 6230 } 6231 6232 return ptr; 6233} 6234 6235void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) 6236{ 6237 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, 6238 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); 6239 6240 assert(bo->proxy == NULL); 6241 assert(list_is_empty(&bo->list)); 6242 assert_tiling(kgem, bo); 6243 assert(!bo->purged || bo->reusable); 6244 6245 if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) { 6246 DBG(("%s: converting request for GTT map into CPU map\n", 6247 __FUNCTION__)); 6248 return kgem_bo_map__cpu(kgem, bo); 6249 } 6250 6251 return __kgem_bo_map__gtt_or_wc(kgem, bo); 6252} 6253 6254void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) 6255{ 6256 void *ptr; 6257 6258 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, 6259 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); 6260 6261 assert(bo->proxy == NULL); 6262 assert(list_is_empty(&bo->list)); 6263 assert(bo->exec == NULL); 6264 assert_tiling(kgem, bo); 6265 assert(!bo->purged || bo->reusable); 6266 6267 if (bo->tiling == I915_TILING_NONE && !bo->scanout && 6268 (kgem->has_llc || bo->domain == DOMAIN_CPU)) { 6269 DBG(("%s: converting request for GTT map into CPU map\n", 6270 __FUNCTION__)); 6271 ptr = kgem_bo_map__cpu(kgem, bo); 6272 if (ptr) 6273 kgem_bo_sync__cpu(kgem, bo); 6274 return ptr; 6275 } 6276 6277 ptr = __kgem_bo_map__gtt_or_wc(kgem, bo); 6278 6279 if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) { 6280 struct drm_i915_gem_set_domain set_domain; 6281 6282 DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, 6283 bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle))); 6284 6285 /* XXX use PROT_READ to avoid the write flush? */ 6286 6287 VG_CLEAR(set_domain); 6288 set_domain.handle = bo->handle; 6289 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 6290 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 6291 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 6292 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 6293 kgem_throttle(kgem); 6294 } 6295 kgem_bo_retire(kgem, bo); 6296 bo->domain = DOMAIN_GTT; 6297 bo->gtt_dirty = true; 6298 } 6299 6300 return ptr; 6301} 6302 6303void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) 6304{ 6305 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, 6306 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); 6307 6308 assert(bo->proxy == NULL); 6309 assert(bo->exec == NULL); 6310 assert(list_is_empty(&bo->list)); 6311 assert_tiling(kgem, bo); 6312 assert(!bo->purged || bo->reusable); 6313 6314 return __kgem_bo_map__gtt_or_wc(kgem, bo); 6315} 6316 6317void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo) 6318{ 6319 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, 6320 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); 6321 6322 assert(bo->proxy == NULL); 6323 assert(bo->exec == NULL); 6324 assert(list_is_empty(&bo->list)); 6325 assert_tiling(kgem, bo); 6326 assert(!bo->purged || bo->reusable); 6327 6328 if (bo->map__wc) 6329 return bo->map__wc; 6330 6331 return __kgem_bo_map__wc(kgem, bo); 6332} 6333 6334void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) 6335{ 6336 DBG(("%s(handle=%d, size=%d, map=%p:%p)\n", 6337 __FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu)); 6338 assert(!bo->purged); 6339 assert(list_is_empty(&bo->list)); 6340 assert(bo->proxy == NULL); 6341 assert_tiling(kgem, bo); 6342 6343 if (bo->map__cpu) 6344 return MAP(bo->map__cpu); 6345 6346 kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo)); 6347 6348 return __kgem_bo_map__cpu(kgem, bo); 6349} 6350 6351void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo) 6352{ 6353 void *ptr; 6354 6355 if (bo->tiling == I915_TILING_NONE && kgem->has_llc) { 6356 ptr = MAP(bo->map__cpu); 6357 if (ptr == NULL) 6358 ptr = __kgem_bo_map__cpu(kgem, bo); 6359 } else if (bo->tiling || !kgem->has_wc_mmap) { 6360 ptr = bo->map__gtt; 6361 if (ptr == NULL) 6362 ptr = __kgem_bo_map__gtt(kgem, bo); 6363 } else { 6364 ptr = bo->map__wc; 6365 if (ptr == NULL) 6366 ptr = __kgem_bo_map__wc(kgem, bo); 6367 } 6368 6369 return ptr; 6370} 6371 6372 6373uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) 6374{ 6375 struct drm_gem_flink flink; 6376 6377 VG_CLEAR(flink); 6378 flink.handle = bo->handle; 6379 if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink)) 6380 return 0; 6381 6382 DBG(("%s: flinked handle=%d to name=%d, marking non-reusable\n", 6383 __FUNCTION__, flink.handle, flink.name)); 6384 6385 /* Ordinarily giving the name aware makes the buffer non-reusable. 6386 * However, we track the lifetime of all clients and their hold 6387 * on the buffer, and *presuming* they do not pass it on to a third 6388 * party, we track the lifetime accurately. 6389 */ 6390 bo->reusable = false; 6391 6392 kgem_bo_unclean(kgem, bo); 6393 6394 return flink.name; 6395} 6396 6397struct kgem_bo *kgem_create_map(struct kgem *kgem, 6398 void *ptr, uint32_t size, 6399 bool read_only) 6400{ 6401 struct kgem_bo *bo; 6402 uintptr_t first_page, last_page; 6403 uint32_t handle; 6404 6405 assert(MAP(ptr) == ptr); 6406 6407 DBG(("%s(%p size=%d, read-only?=%d) - has_userptr?=%d\n", __FUNCTION__, 6408 ptr, size, read_only, kgem->has_userptr)); 6409 if (!kgem->has_userptr) 6410 return NULL; 6411 6412 first_page = (uintptr_t)ptr; 6413 last_page = first_page + size + PAGE_SIZE - 1; 6414 6415 first_page &= ~(PAGE_SIZE-1); 6416 last_page &= ~(PAGE_SIZE-1); 6417 assert(last_page > first_page); 6418 6419 handle = gem_userptr(kgem->fd, 6420 (void *)first_page, last_page-first_page, 6421 read_only); 6422 if (handle == 0) { 6423 DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno)); 6424 return NULL; 6425 } 6426 6427 bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE); 6428 if (bo == NULL) { 6429 gem_close(kgem->fd, handle); 6430 return NULL; 6431 } 6432 6433 bo->unique_id = kgem_get_unique_id(kgem); 6434 bo->snoop = !kgem->has_llc; 6435 debug_alloc__bo(kgem, bo); 6436 6437 if (first_page != (uintptr_t)ptr) { 6438 struct kgem_bo *proxy; 6439 6440 proxy = kgem_create_proxy(kgem, bo, 6441 (uintptr_t)ptr - first_page, size); 6442 kgem_bo_destroy(kgem, bo); 6443 if (proxy == NULL) 6444 return NULL; 6445 6446 bo = proxy; 6447 } 6448 6449 bo->map__cpu = MAKE_USER_MAP(ptr); 6450 6451 DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d (proxy? %d)\n", 6452 __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle, bo->proxy != NULL)); 6453 return bo; 6454} 6455 6456void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) 6457{ 6458 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 6459 assert(!bo->scanout); 6460 assert_tiling(kgem, bo); 6461 6462 kgem_bo_submit(kgem, bo); 6463 6464 /* SHM pixmaps use proxies for subpage offsets */ 6465 assert(!bo->purged); 6466 while (bo->proxy) 6467 bo = bo->proxy; 6468 assert(!bo->purged); 6469 6470 if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { 6471 struct drm_i915_gem_set_domain set_domain; 6472 6473 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", 6474 __FUNCTION__, bo->handle, 6475 bo->needs_flush, bo->domain, 6476 __kgem_busy(kgem, bo->handle))); 6477 6478 VG_CLEAR(set_domain); 6479 set_domain.handle = bo->handle; 6480 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 6481 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 6482 6483 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 6484 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 6485 kgem_throttle(kgem); 6486 } 6487 kgem_bo_retire(kgem, bo); 6488 bo->domain = DOMAIN_CPU; 6489 } 6490} 6491 6492void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) 6493{ 6494 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 6495 assert(!bo->scanout || !write); 6496 assert_tiling(kgem, bo); 6497 6498 if (write || bo->needs_flush) 6499 kgem_bo_submit(kgem, bo); 6500 6501 /* SHM pixmaps use proxies for subpage offsets */ 6502 assert(!bo->purged); 6503 assert(bo->refcnt); 6504 while (bo->proxy) 6505 bo = bo->proxy; 6506 assert(bo->refcnt); 6507 assert(!bo->purged); 6508 6509 if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { 6510 struct drm_i915_gem_set_domain set_domain; 6511 6512 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", 6513 __FUNCTION__, bo->handle, 6514 bo->needs_flush, bo->domain, 6515 __kgem_busy(kgem, bo->handle))); 6516 6517 VG_CLEAR(set_domain); 6518 set_domain.handle = bo->handle; 6519 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 6520 set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0; 6521 6522 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 6523 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 6524 kgem_throttle(kgem); 6525 } 6526 if (write) { 6527 kgem_bo_retire(kgem, bo); 6528 bo->domain = DOMAIN_CPU; 6529 } else { 6530 if (bo->exec == NULL) 6531 kgem_bo_maybe_retire(kgem, bo); 6532 bo->domain = DOMAIN_NONE; 6533 } 6534 } 6535} 6536 6537void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) 6538{ 6539 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 6540 assert(bo->refcnt); 6541 assert(bo->proxy == NULL); 6542 assert_tiling(kgem, bo); 6543 6544 kgem_bo_submit(kgem, bo); 6545 6546 if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) { 6547 struct drm_i915_gem_set_domain set_domain; 6548 6549 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", 6550 __FUNCTION__, bo->handle, 6551 bo->needs_flush, bo->domain, 6552 __kgem_busy(kgem, bo->handle))); 6553 6554 VG_CLEAR(set_domain); 6555 set_domain.handle = bo->handle; 6556 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 6557 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 6558 6559 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 6560 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 6561 kgem_throttle(kgem); 6562 } 6563 kgem_bo_retire(kgem, bo); 6564 bo->domain = DOMAIN_GTT; 6565 bo->gtt_dirty = true; 6566 } 6567} 6568 6569void kgem_clear_dirty(struct kgem *kgem) 6570{ 6571 struct list * const buffers = &kgem->next_request->buffers; 6572 struct kgem_bo *bo; 6573 6574 list_for_each_entry(bo, buffers, request) { 6575 if (!bo->gpu_dirty) 6576 break; 6577 6578 bo->gpu_dirty = false; 6579 } 6580} 6581 6582struct kgem_bo *kgem_create_proxy(struct kgem *kgem, 6583 struct kgem_bo *target, 6584 int offset, int length) 6585{ 6586 struct kgem_bo *bo; 6587 6588 DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n", 6589 __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1, 6590 offset, length, target->io)); 6591 6592 bo = __kgem_bo_alloc(target->handle, length); 6593 if (bo == NULL) 6594 return NULL; 6595 6596 bo->unique_id = kgem_get_unique_id(kgem); 6597 bo->reusable = false; 6598 bo->size.bytes = length; 6599 6600 bo->io = target->io && target->proxy == NULL; 6601 bo->gpu_dirty = target->gpu_dirty; 6602 bo->tiling = target->tiling; 6603 bo->pitch = target->pitch; 6604 bo->flush = target->flush; 6605 bo->snoop = target->snoop; 6606 6607 assert(!bo->scanout); 6608 bo->proxy = kgem_bo_reference(target); 6609 bo->delta = offset; 6610 6611 /* Proxies are only tracked for busyness on the current rq */ 6612 if (target->exec && !bo->io) { 6613 assert(RQ(target->rq) == kgem->next_request); 6614 list_move_tail(&bo->request, &kgem->next_request->buffers); 6615 bo->exec = &_kgem_dummy_exec; 6616 bo->rq = target->rq; 6617 } 6618 6619 return bo; 6620} 6621 6622static struct kgem_buffer * 6623buffer_alloc(void) 6624{ 6625 struct kgem_buffer *bo; 6626 6627 bo = malloc(sizeof(*bo)); 6628 if (bo == NULL) 6629 return NULL; 6630 6631 bo->mem = NULL; 6632 bo->need_io = false; 6633 bo->mmapped = MMAPPED_CPU; 6634 6635 return bo; 6636} 6637 6638static struct kgem_buffer * 6639buffer_alloc_with_data(int num_pages) 6640{ 6641 struct kgem_buffer *bo; 6642 6643 bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE); 6644 if (bo == NULL) 6645 return NULL; 6646 6647 bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT); 6648 bo->mmapped = false; 6649 return bo; 6650} 6651 6652static inline bool 6653use_snoopable_buffer(struct kgem *kgem, uint32_t flags) 6654{ 6655 if ((flags & KGEM_BUFFER_WRITE) == 0) 6656 return kgem->gen >= 030; 6657 6658 return true; 6659} 6660 6661static void 6662init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old) 6663{ 6664 DBG(("%s: reusing handle=%d for buffer\n", 6665 __FUNCTION__, old->handle)); 6666 6667 assert(old->proxy == NULL); 6668 assert(list_is_empty(&old->list)); 6669 6670 memcpy(&bo->base, old, sizeof(*old)); 6671 if (old->rq) 6672 list_replace(&old->request, &bo->base.request); 6673 else 6674 list_init(&bo->base.request); 6675 list_replace(&old->vma, &bo->base.vma); 6676 list_init(&bo->base.list); 6677 free(old); 6678 6679 assert(bo->base.tiling == I915_TILING_NONE); 6680 6681 bo->base.refcnt = 1; 6682} 6683 6684static struct kgem_buffer * 6685search_snoopable_buffer(struct kgem *kgem, unsigned alloc) 6686{ 6687 struct kgem_buffer *bo; 6688 struct kgem_bo *old; 6689 6690 old = search_snoop_cache(kgem, alloc, 0); 6691 if (old) { 6692 if (!old->io) { 6693 bo = buffer_alloc(); 6694 if (bo == NULL) 6695 return NULL; 6696 6697 init_buffer_from_bo(bo, old); 6698 } else { 6699 bo = (struct kgem_buffer *)old; 6700 bo->base.refcnt = 1; 6701 } 6702 6703 DBG(("%s: created CPU handle=%d for buffer, size %d\n", 6704 __FUNCTION__, bo->base.handle, num_pages(&bo->base))); 6705 6706 assert(bo->base.snoop); 6707 assert(bo->base.tiling == I915_TILING_NONE); 6708 assert(num_pages(&bo->base) >= alloc); 6709 assert(bo->mmapped == MMAPPED_CPU); 6710 assert(bo->need_io == false); 6711 6712 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 6713 if (bo->mem == NULL) { 6714 bo->base.refcnt = 0; 6715 kgem_bo_free(kgem, &bo->base); 6716 bo = NULL; 6717 } 6718 6719 return bo; 6720 } 6721 6722 return NULL; 6723} 6724 6725static struct kgem_buffer * 6726create_snoopable_buffer(struct kgem *kgem, unsigned alloc) 6727{ 6728 struct kgem_buffer *bo; 6729 uint32_t handle; 6730 6731 if (kgem->has_llc) { 6732 struct kgem_bo *old; 6733 6734 bo = buffer_alloc(); 6735 if (bo == NULL) 6736 return NULL; 6737 6738 old = search_linear_cache(kgem, alloc, 6739 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); 6740 if (old) { 6741 init_buffer_from_bo(bo, old); 6742 } else { 6743 handle = gem_create(kgem->fd, alloc); 6744 if (handle == 0) { 6745 free(bo); 6746 return NULL; 6747 } 6748 6749 __kgem_bo_init(&bo->base, handle, alloc); 6750 debug_alloc__bo(kgem, &bo->base); 6751 DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n", 6752 __FUNCTION__, bo->base.handle, alloc)); 6753 } 6754 6755 assert(bo->base.refcnt == 1); 6756 assert(bo->mmapped == MMAPPED_CPU); 6757 assert(bo->need_io == false); 6758 6759 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 6760 if (bo->mem != NULL) 6761 return bo; 6762 6763 bo->base.refcnt = 0; /* for valgrind */ 6764 kgem_bo_free(kgem, &bo->base); 6765 } 6766 6767 if (kgem->has_caching) { 6768 struct kgem_bo *old; 6769 6770 bo = buffer_alloc(); 6771 if (bo == NULL) 6772 return NULL; 6773 6774 old = search_linear_cache(kgem, alloc, 6775 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); 6776 if (old) { 6777 init_buffer_from_bo(bo, old); 6778 } else { 6779 handle = gem_create(kgem->fd, alloc); 6780 if (handle == 0) { 6781 free(bo); 6782 return NULL; 6783 } 6784 6785 __kgem_bo_init(&bo->base, handle, alloc); 6786 debug_alloc__bo(kgem, &bo->base); 6787 DBG(("%s: created CPU handle=%d for buffer, size %d\n", 6788 __FUNCTION__, bo->base.handle, alloc)); 6789 } 6790 6791 assert(bo->base.refcnt == 1); 6792 assert(bo->mmapped == MMAPPED_CPU); 6793 assert(bo->need_io == false); 6794 assert(!__kgem_busy(kgem, bo->base.handle)); 6795 6796 if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED)) 6797 goto free_caching; 6798 6799 bo->base.snoop = true; 6800 6801 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 6802 if (bo->mem == NULL) 6803 goto free_caching; 6804 6805 return bo; 6806 6807free_caching: 6808 bo->base.refcnt = 0; /* for valgrind */ 6809 kgem_bo_free(kgem, &bo->base); 6810 } 6811 6812 if (kgem->has_userptr) { 6813 bo = buffer_alloc(); 6814 if (bo == NULL) 6815 return NULL; 6816 6817 //if (posix_memalign(&ptr, 64, ALIGN(size, 64))) 6818 if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) { 6819 free(bo); 6820 return NULL; 6821 } 6822 6823 handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false); 6824 if (handle == 0) { 6825 free(bo->mem); 6826 free(bo); 6827 return NULL; 6828 } 6829 6830 __kgem_bo_init(&bo->base, handle, alloc); 6831 debug_alloc__bo(kgem, &bo->base); 6832 DBG(("%s: created snoop handle=%d for buffer\n", 6833 __FUNCTION__, bo->base.handle)); 6834 6835 assert(bo->mmapped == MMAPPED_CPU); 6836 assert(bo->need_io == false); 6837 6838 bo->base.refcnt = 1; 6839 bo->base.snoop = true; 6840 bo->base.map__cpu = MAKE_USER_MAP(bo->mem); 6841 6842 return bo; 6843 } 6844 6845 return NULL; 6846} 6847 6848struct kgem_bo *kgem_create_buffer(struct kgem *kgem, 6849 uint32_t size, uint32_t flags, 6850 void **ret) 6851{ 6852 struct kgem_buffer *bo; 6853 unsigned offset, alloc; 6854 struct kgem_bo *old; 6855 6856 DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n", 6857 __FUNCTION__, size, flags, 6858 !!(flags & KGEM_BUFFER_WRITE), 6859 !!(flags & KGEM_BUFFER_INPLACE), 6860 !!(flags & KGEM_BUFFER_LAST))); 6861 assert(size); 6862 /* we should never be asked to create anything TOO large */ 6863 assert(size <= kgem->max_object_size); 6864 6865#if !DBG_NO_UPLOAD_CACHE 6866 list_for_each_entry(bo, &kgem->batch_buffers, base.list) { 6867 assert(bo->base.io); 6868 assert(bo->base.refcnt >= 1); 6869 6870 /* We can reuse any write buffer which we can fit */ 6871 if (flags == KGEM_BUFFER_LAST && 6872 bo->write == KGEM_BUFFER_WRITE && 6873 bo->base.refcnt == 1 && 6874 bo->mmapped == MMAPPED_NONE && 6875 size <= bytes(&bo->base)) { 6876 DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n", 6877 __FUNCTION__, size, bo->used, bytes(&bo->base))); 6878 gem_write__cachealigned(kgem->fd, bo->base.handle, 6879 0, bo->used, bo->mem); 6880 assert(list_is_empty(&bo->base.vma)); 6881 bo->need_io = 0; 6882 bo->write = 0; 6883 offset = 0; 6884 bo->used = size; 6885 goto done; 6886 } 6887 6888 if (flags & KGEM_BUFFER_WRITE) { 6889 if ((bo->write & KGEM_BUFFER_WRITE) == 0 || 6890 (((bo->write & ~flags) & KGEM_BUFFER_INPLACE) && 6891 !bo->base.snoop)) { 6892 DBG(("%s: skip write %x buffer, need %x\n", 6893 __FUNCTION__, bo->write, flags)); 6894 continue; 6895 } 6896 assert(bo->mmapped || bo->need_io); 6897 } else { 6898 if (bo->write & KGEM_BUFFER_WRITE) { 6899 DBG(("%s: skip write %x buffer, need %x\n", 6900 __FUNCTION__, bo->write, flags)); 6901 continue; 6902 } 6903 } 6904 6905 if (bo->used + size <= bytes(&bo->base)) { 6906 DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", 6907 __FUNCTION__, bo->used, size, bytes(&bo->base))); 6908 offset = bo->used; 6909 bo->used += size; 6910 goto done; 6911 } 6912 } 6913 6914 if (flags & KGEM_BUFFER_WRITE) { 6915 list_for_each_entry(bo, &kgem->active_buffers, base.list) { 6916 assert(bo->base.io); 6917 assert(bo->base.refcnt >= 1); 6918 assert(bo->base.exec == NULL); 6919 assert(bo->mmapped); 6920 assert(bo->mmapped == MMAPPED_GTT || kgem->has_llc || bo->base.snoop); 6921 6922 if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE && !bo->base.snoop) { 6923 DBG(("%s: skip write %x buffer, need %x\n", 6924 __FUNCTION__, bo->write, flags)); 6925 continue; 6926 } 6927 6928 if (bo->used + size <= bytes(&bo->base)) { 6929 DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", 6930 __FUNCTION__, bo->used, size, bytes(&bo->base))); 6931 offset = bo->used; 6932 bo->used += size; 6933 list_move(&bo->base.list, &kgem->batch_buffers); 6934 goto done; 6935 } 6936 6937 if (bo->base.refcnt == 1 && 6938 size <= bytes(&bo->base) && 6939 (bo->base.rq == NULL || 6940 !__kgem_busy(kgem, bo->base.handle))) { 6941 DBG(("%s: reusing whole buffer? size=%d, total=%d\n", 6942 __FUNCTION__, size, bytes(&bo->base))); 6943 __kgem_bo_clear_busy(&bo->base); 6944 assert(list_is_empty(&bo->base.vma)); 6945 6946 switch (bo->mmapped) { 6947 case MMAPPED_CPU: 6948 kgem_bo_sync__cpu(kgem, &bo->base); 6949 break; 6950 case MMAPPED_GTT: 6951 kgem_bo_sync__gtt(kgem, &bo->base); 6952 break; 6953 } 6954 6955 offset = 0; 6956 bo->used = size; 6957 list_move(&bo->base.list, &kgem->batch_buffers); 6958 goto done; 6959 } 6960 } 6961 } 6962#endif 6963 6964#if !DBG_NO_MAP_UPLOAD 6965 /* Be a little more generous and hope to hold fewer mmappings */ 6966 alloc = ALIGN(2*size, kgem->buffer_size); 6967 if (alloc > MAX_CACHE_SIZE) 6968 alloc = ALIGN(size, kgem->buffer_size); 6969 if (alloc > MAX_CACHE_SIZE) 6970 alloc = PAGE_ALIGN(size); 6971 assert(alloc); 6972 6973 alloc /= PAGE_SIZE; 6974 if (alloc > kgem->aperture_mappable / 4 && !kgem->has_wc_mmap) 6975 flags &= ~KGEM_BUFFER_INPLACE; 6976 6977 if (kgem->has_llc && 6978 (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) { 6979 bo = buffer_alloc(); 6980 if (bo == NULL) 6981 goto skip_llc; 6982 6983 old = NULL; 6984 if ((flags & KGEM_BUFFER_WRITE) == 0) 6985 old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP); 6986 if (old == NULL) 6987 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP); 6988 if (old == NULL) 6989 old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP); 6990 if (old) { 6991 DBG(("%s: found LLC handle=%d for buffer\n", 6992 __FUNCTION__, old->handle)); 6993 6994 init_buffer_from_bo(bo, old); 6995 } else { 6996 uint32_t handle = gem_create(kgem->fd, alloc); 6997 if (handle == 0) { 6998 free(bo); 6999 goto skip_llc; 7000 } 7001 __kgem_bo_init(&bo->base, handle, alloc); 7002 debug_alloc__bo(kgem, &bo->base); 7003 DBG(("%s: created LLC handle=%d for buffer\n", 7004 __FUNCTION__, bo->base.handle)); 7005 } 7006 7007 assert(bo->mmapped); 7008 assert(!bo->need_io); 7009 7010 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 7011 if (bo->mem) { 7012 if (flags & KGEM_BUFFER_WRITE) 7013 kgem_bo_sync__cpu(kgem, &bo->base); 7014 flags &= ~KGEM_BUFFER_INPLACE; 7015 goto init; 7016 } else { 7017 bo->base.refcnt = 0; /* for valgrind */ 7018 kgem_bo_free(kgem, &bo->base); 7019 } 7020 } 7021skip_llc: 7022 7023 if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) { 7024 /* The issue with using a GTT upload buffer is that we may 7025 * cause eviction-stalls in order to free up some GTT space. 7026 * An is-mappable? ioctl could help us detect when we are 7027 * about to block, or some per-page magic in the kernel. 7028 * 7029 * XXX This is especially noticeable on memory constrained 7030 * devices like gen2 or with relatively slow gpu like i3. 7031 */ 7032 DBG(("%s: searching for an inactive GTT map for upload\n", 7033 __FUNCTION__)); 7034 old = search_linear_cache(kgem, alloc, 7035 CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); 7036#if HAVE_I915_GEM_BUFFER_INFO 7037 if (old) { 7038 struct drm_i915_gem_buffer_info info; 7039 7040 /* An example of such a non-blocking ioctl might work */ 7041 7042 VG_CLEAR(info); 7043 info.handle = handle; 7044 if (do_ioctl(kgem->fd, 7045 DRM_IOCTL_I915_GEM_BUFFER_INFO, 7046 &fino) == 0) { 7047 old->presumed_offset = info.addr; 7048 if ((info.flags & I915_GEM_MAPPABLE) == 0) { 7049 kgem_bo_move_to_inactive(kgem, old); 7050 old = NULL; 7051 } 7052 } 7053 } 7054#endif 7055 if (old == NULL) 7056 old = search_linear_cache(kgem, NUM_PAGES(size), 7057 CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); 7058 if (old == NULL) { 7059 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); 7060 if (old && !kgem_bo_can_map(kgem, old)) { 7061 _kgem_bo_destroy(kgem, old); 7062 old = NULL; 7063 } 7064 } 7065 if (old) { 7066 DBG(("%s: reusing handle=%d for buffer\n", 7067 __FUNCTION__, old->handle)); 7068 assert(kgem_bo_can_map(kgem, old)); 7069 assert(!old->snoop); 7070 assert(old->rq == NULL); 7071 7072 bo = buffer_alloc(); 7073 if (bo == NULL) 7074 return NULL; 7075 7076 init_buffer_from_bo(bo, old); 7077 assert(num_pages(&bo->base) >= NUM_PAGES(size)); 7078 7079 assert(bo->mmapped); 7080 assert(bo->base.refcnt == 1); 7081 7082 bo->mem = kgem_bo_map(kgem, &bo->base); 7083 if (bo->mem) { 7084 if (bo->mem == MAP(bo->base.map__cpu)) 7085 flags &= ~KGEM_BUFFER_INPLACE; 7086 else 7087 bo->mmapped = MMAPPED_GTT; 7088 goto init; 7089 } else { 7090 bo->base.refcnt = 0; 7091 kgem_bo_free(kgem, &bo->base); 7092 } 7093 } 7094 } 7095#else 7096 flags &= ~KGEM_BUFFER_INPLACE; 7097#endif 7098 /* Be more parsimonious with pwrite/pread/cacheable buffers */ 7099 if ((flags & KGEM_BUFFER_INPLACE) == 0) 7100 alloc = NUM_PAGES(size); 7101 7102 if (use_snoopable_buffer(kgem, flags)) { 7103 bo = search_snoopable_buffer(kgem, alloc); 7104 if (bo) { 7105 if (flags & KGEM_BUFFER_WRITE) 7106 kgem_bo_sync__cpu(kgem, &bo->base); 7107 flags &= ~KGEM_BUFFER_INPLACE; 7108 goto init; 7109 } 7110 7111 if ((flags & KGEM_BUFFER_INPLACE) == 0) { 7112 bo = create_snoopable_buffer(kgem, alloc); 7113 if (bo) 7114 goto init; 7115 } 7116 } 7117 7118 flags &= ~KGEM_BUFFER_INPLACE; 7119 7120 old = NULL; 7121 if ((flags & KGEM_BUFFER_WRITE) == 0) 7122 old = search_linear_cache(kgem, alloc, 0); 7123 if (old == NULL) 7124 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); 7125 if (old) { 7126 DBG(("%s: reusing ordinary handle %d for io\n", 7127 __FUNCTION__, old->handle)); 7128 bo = buffer_alloc_with_data(num_pages(old)); 7129 if (bo == NULL) 7130 return NULL; 7131 7132 init_buffer_from_bo(bo, old); 7133 bo->need_io = flags & KGEM_BUFFER_WRITE; 7134 } else { 7135 unsigned hint; 7136 7137 if (use_snoopable_buffer(kgem, flags)) { 7138 bo = create_snoopable_buffer(kgem, alloc); 7139 if (bo) 7140 goto init; 7141 } 7142 7143 bo = buffer_alloc(); 7144 if (bo == NULL) 7145 return NULL; 7146 7147 hint = CREATE_INACTIVE; 7148 if (flags & KGEM_BUFFER_WRITE) 7149 hint |= CREATE_CPU_MAP; 7150 old = search_linear_cache(kgem, alloc, hint); 7151 if (old) { 7152 DBG(("%s: reusing handle=%d for buffer\n", 7153 __FUNCTION__, old->handle)); 7154 7155 init_buffer_from_bo(bo, old); 7156 } else { 7157 uint32_t handle = gem_create(kgem->fd, alloc); 7158 if (handle == 0) { 7159 free(bo); 7160 return NULL; 7161 } 7162 7163 DBG(("%s: created handle=%d for buffer\n", 7164 __FUNCTION__, handle)); 7165 7166 __kgem_bo_init(&bo->base, handle, alloc); 7167 debug_alloc__bo(kgem, &bo->base); 7168 } 7169 7170 assert(bo->mmapped); 7171 assert(!bo->need_io); 7172 assert(bo->base.refcnt == 1); 7173 7174 if (flags & KGEM_BUFFER_WRITE) { 7175 bo->mem = kgem_bo_map__cpu(kgem, &bo->base); 7176 if (bo->mem != NULL) { 7177 kgem_bo_sync__cpu(kgem, &bo->base); 7178 goto init; 7179 } 7180 } 7181 7182 DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__)); 7183 old = &bo->base; 7184 bo = buffer_alloc_with_data(num_pages(old)); 7185 if (bo == NULL) { 7186 old->refcnt= 0; 7187 kgem_bo_free(kgem, old); 7188 return NULL; 7189 } 7190 7191 init_buffer_from_bo(bo, old); 7192 7193 assert(bo->mem); 7194 assert(!bo->mmapped); 7195 assert(bo->base.refcnt == 1); 7196 7197 bo->need_io = flags & KGEM_BUFFER_WRITE; 7198 } 7199init: 7200 bo->base.io = true; 7201 assert(bo->base.refcnt == 1); 7202 assert(num_pages(&bo->base) >= NUM_PAGES(size)); 7203 assert(!bo->need_io || !bo->base.needs_flush); 7204 assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); 7205 assert(bo->mem); 7206 assert(bo->mmapped != MMAPPED_GTT || bo->base.map__gtt == bo->mem || bo->base.map__wc == bo->mem); 7207 assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem); 7208 7209 bo->used = size; 7210 bo->write = flags & KGEM_BUFFER_WRITE_INPLACE; 7211 offset = 0; 7212 7213 assert(list_is_empty(&bo->base.list)); 7214 list_add(&bo->base.list, &kgem->batch_buffers); 7215 7216 DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n", 7217 __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write)); 7218 7219done: 7220 bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT); 7221 assert(bo->used && bo->used <= bytes(&bo->base)); 7222 assert(bo->mem); 7223 *ret = (char *)bo->mem + offset; 7224 return kgem_create_proxy(kgem, &bo->base, offset, size); 7225} 7226 7227bool kgem_buffer_is_inplace(struct kgem_bo *_bo) 7228{ 7229 struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy; 7230 return bo->write & KGEM_BUFFER_WRITE_INPLACE; 7231} 7232 7233struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, 7234 int width, int height, int bpp, 7235 uint32_t flags, 7236 void **ret) 7237{ 7238 struct kgem_bo *bo; 7239 int stride; 7240 7241 assert(width > 0 && height > 0); 7242 assert(ret != NULL); 7243 stride = ALIGN(width, 2) * bpp >> 3; 7244 stride = ALIGN(stride, kgem->gen >= 0100 ? 32 : 4); 7245 7246 DBG(("%s: %dx%d, %d bpp, stride=%d\n", 7247 __FUNCTION__, width, height, bpp, stride)); 7248 7249 bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret); 7250 if (bo == NULL) { 7251 DBG(("%s: allocation failure for upload buffer\n", 7252 __FUNCTION__)); 7253 return NULL; 7254 } 7255 assert(*ret != NULL); 7256 assert(bo->proxy != NULL); 7257 7258 if (height & 1) { 7259 struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; 7260 int min; 7261 7262 assert(io->used); 7263 7264 /* Having padded this surface to ensure that accesses to 7265 * the last pair of rows is valid, remove the padding so 7266 * that it can be allocated to other pixmaps. 7267 */ 7268 min = bo->delta + height * stride; 7269 min = ALIGN(min, UPLOAD_ALIGNMENT); 7270 if (io->used != min) { 7271 DBG(("%s: trimming buffer from %d to %d\n", 7272 __FUNCTION__, io->used, min)); 7273 io->used = min; 7274 } 7275 bo->size.bytes -= stride; 7276 } 7277 7278 bo->map__cpu = *ret; 7279 bo->pitch = stride; 7280 bo->unique_id = kgem_get_unique_id(kgem); 7281 return bo; 7282} 7283 7284struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, 7285 const void *data, 7286 const BoxRec *box, 7287 int stride, int bpp) 7288{ 7289 int width = box->x2 - box->x1; 7290 int height = box->y2 - box->y1; 7291 struct kgem_bo *bo; 7292 void *dst; 7293 7294 if (!kgem_can_create_2d(kgem, width, height, bpp)) 7295 return NULL; 7296 7297 DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n", 7298 __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp)); 7299 7300 assert(data); 7301 assert(width > 0); 7302 assert(height > 0); 7303 assert(stride); 7304 assert(bpp); 7305 7306 bo = kgem_create_buffer_2d(kgem, 7307 width, height, bpp, 7308 KGEM_BUFFER_WRITE_INPLACE, &dst); 7309 if (bo == NULL) 7310 return NULL; 7311 7312 if (sigtrap_get()) { 7313 kgem_bo_destroy(kgem, bo); 7314 return NULL; 7315 } 7316 7317 memcpy_blt(data, dst, bpp, 7318 stride, bo->pitch, 7319 box->x1, box->y1, 7320 0, 0, 7321 width, height); 7322 7323 sigtrap_put(); 7324 return bo; 7325} 7326 7327void kgem_proxy_bo_attach(struct kgem_bo *bo, 7328 struct kgem_bo **ptr) 7329{ 7330 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); 7331 assert(bo->map__gtt == NULL); 7332 assert(bo->proxy); 7333 list_add(&bo->vma, &bo->proxy->vma); 7334 bo->map__gtt = ptr; 7335 *ptr = kgem_bo_reference(bo); 7336} 7337 7338void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) 7339{ 7340 struct kgem_buffer *bo; 7341 uint32_t offset = _bo->delta, length = _bo->size.bytes; 7342 7343 /* We expect the caller to have already submitted the batch */ 7344 assert(_bo->io); 7345 assert(_bo->exec == NULL); 7346 assert(_bo->rq == NULL); 7347 assert(_bo->proxy); 7348 7349 _bo = _bo->proxy; 7350 assert(_bo->proxy == NULL); 7351 assert(_bo->exec == NULL); 7352 7353 bo = (struct kgem_buffer *)_bo; 7354 7355 DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__, 7356 offset, length, bo->base.snoop)); 7357 7358 if (bo->mmapped) { 7359 struct drm_i915_gem_set_domain set_domain; 7360 7361 DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", 7362 __FUNCTION__, 7363 bo->base.needs_flush, 7364 bo->base.domain, 7365 __kgem_busy(kgem, bo->base.handle))); 7366 7367 assert(bo->mmapped == MMAPPED_GTT || bo->base.snoop || kgem->has_llc); 7368 7369 VG_CLEAR(set_domain); 7370 set_domain.handle = bo->base.handle; 7371 set_domain.write_domain = 0; 7372 set_domain.read_domains = 7373 bo->mmapped == MMAPPED_CPU ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT; 7374 7375 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { 7376 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); 7377 kgem_throttle(kgem); 7378 } 7379 } else { 7380 if (gem_read(kgem->fd, 7381 bo->base.handle, (char *)bo->mem+offset, 7382 offset, length)) 7383 return; 7384 } 7385 kgem_bo_maybe_retire(kgem, &bo->base); 7386 bo->base.domain = DOMAIN_NONE; 7387} 7388 7389uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) 7390{ 7391 struct kgem_bo_binding *b; 7392 7393 assert(bo->refcnt); 7394 7395 for (b = &bo->binding; b && b->offset; b = b->next) 7396 if (format == b->format) 7397 return b->offset; 7398 7399 return 0; 7400} 7401 7402void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) 7403{ 7404 struct kgem_bo_binding *b; 7405 7406 assert(bo->refcnt); 7407 7408 for (b = &bo->binding; b; b = b->next) { 7409 if (b->offset) 7410 continue; 7411 7412 b->offset = offset; 7413 b->format = format; 7414 7415 if (b->next) 7416 b->next->offset = 0; 7417 7418 return; 7419 } 7420 7421 b = malloc(sizeof(*b)); 7422 if (b) { 7423 b->next = bo->binding.next; 7424 b->format = format; 7425 b->offset = offset; 7426 bo->binding.next = b; 7427 } 7428} 7429 7430struct kgem_bo * 7431kgem_replace_bo(struct kgem *kgem, 7432 struct kgem_bo *src, 7433 uint32_t width, 7434 uint32_t height, 7435 uint32_t pitch, 7436 uint32_t bpp) 7437{ 7438 struct kgem_bo *dst; 7439 uint32_t br00, br13; 7440 uint32_t handle; 7441 uint32_t size; 7442 uint32_t *b; 7443 7444 DBG(("%s: replacing bo handle=%d, size=%dx%d pitch=%d, with pitch=%d\n", 7445 __FUNCTION__, src->handle, width, height, src->pitch, pitch)); 7446 7447 /* We only expect to be called to fixup small buffers, hence why 7448 * we only attempt to allocate a linear bo. 7449 */ 7450 assert(src->tiling == I915_TILING_NONE); 7451 assert(kgem_bo_can_blt(kgem, src)); 7452 7453 size = height * pitch; 7454 size = NUM_PAGES(size); 7455 7456 dst = search_linear_cache(kgem, size, 0); 7457 if (dst == NULL) 7458 dst = search_linear_cache(kgem, size, CREATE_INACTIVE); 7459 if (dst == NULL) { 7460 handle = gem_create(kgem->fd, size); 7461 if (handle == 0) 7462 return NULL; 7463 7464 dst = __kgem_bo_alloc(handle, size); 7465 if (dst == NULL) { 7466 gem_close(kgem->fd, handle); 7467 return NULL; 7468 } 7469 7470 debug_alloc__bo(kgem, dst); 7471 } 7472 dst->pitch = pitch; 7473 dst->unique_id = kgem_get_unique_id(kgem); 7474 dst->refcnt = 1; 7475 assert(dst->tiling == I915_TILING_NONE); 7476 assert(kgem_bo_can_blt(kgem, dst)); 7477 7478 kgem_set_mode(kgem, KGEM_BLT, dst); 7479 if (!kgem_check_batch(kgem, 10) || 7480 !kgem_check_reloc(kgem, 2) || 7481 !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { 7482 kgem_submit(kgem); 7483 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { 7484 kgem_bo_destroy(kgem, dst); 7485 return NULL; 7486 } 7487 _kgem_set_mode(kgem, KGEM_BLT); 7488 } 7489 7490 br00 = XY_SRC_COPY_BLT_CMD; 7491 br13 = pitch; 7492 pitch = src->pitch; 7493 if (kgem->gen >= 040 && src->tiling) { 7494 br00 |= BLT_SRC_TILED; 7495 pitch >>= 2; 7496 } 7497 7498 br13 |= 0xcc << 16; 7499 switch (bpp) { 7500 default: 7501 case 32: br00 |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 7502 br13 |= 1 << 25; /* RGB8888 */ 7503 case 16: br13 |= 1 << 24; /* RGB565 */ 7504 case 8: break; 7505 } 7506 7507 b = kgem->batch + kgem->nbatch; 7508 if (kgem->gen >= 0100) { 7509 b[0] = br00 | 8; 7510 b[1] = br13; 7511 b[2] = 0; 7512 b[3] = height << 16 | width; 7513 *(uint64_t *)(b+4) = 7514 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst, 7515 I915_GEM_DOMAIN_RENDER << 16 | 7516 I915_GEM_DOMAIN_RENDER | 7517 KGEM_RELOC_FENCED, 7518 0); 7519 b[6] = 0; 7520 b[7] = pitch; 7521 *(uint64_t *)(b+8) = 7522 kgem_add_reloc64(kgem, kgem->nbatch + 8, src, 7523 I915_GEM_DOMAIN_RENDER << 16 | 7524 KGEM_RELOC_FENCED, 7525 0); 7526 kgem->nbatch += 10; 7527 } else { 7528 b[0] = br00 | 6; 7529 b[1] = br13; 7530 b[2] = 0; 7531 b[3] = height << 16 | width; 7532 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst, 7533 I915_GEM_DOMAIN_RENDER << 16 | 7534 I915_GEM_DOMAIN_RENDER | 7535 KGEM_RELOC_FENCED, 7536 0); 7537 b[5] = 0; 7538 b[6] = pitch; 7539 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src, 7540 I915_GEM_DOMAIN_RENDER << 16 | 7541 KGEM_RELOC_FENCED, 7542 0); 7543 kgem->nbatch += 8; 7544 } 7545 7546 return dst; 7547} 7548 7549bool kgem_bo_convert_to_gpu(struct kgem *kgem, 7550 struct kgem_bo *bo, 7551 unsigned flags) 7552{ 7553 DBG(("%s: converting handle=%d from CPU to GPU, flags=%x, busy?=%d\n", 7554 __FUNCTION__, bo->handle, flags, __kgem_bo_is_busy(kgem, bo))); 7555 assert(bo->tiling == I915_TILING_NONE); 7556 7557 if (kgem->has_llc) 7558 return true; 7559 7560 if (flags & MOVE_ASYNC_HINT && __kgem_bo_is_busy(kgem, bo)) 7561 return false; 7562 7563 assert(bo->snoop); 7564 7565 kgem_bo_submit(kgem, bo); 7566 7567 if (!gem_set_caching(kgem->fd, bo->handle, UNCACHED)) 7568 return false; 7569 7570 bo->snoop = false; 7571 return true; 7572} 7573