1 /* $NetBSD: i915_gem_domain.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2014-2016 Intel Corporation 7 */ 8 9 #include <sys/cdefs.h> 10 __KERNEL_RCSID(0, "$NetBSD: i915_gem_domain.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $"); 11 12 #include "display/intel_frontbuffer.h" 13 14 #include "i915_drv.h" 15 #include "i915_gem_clflush.h" 16 #include "i915_gem_gtt.h" 17 #include "i915_gem_ioctls.h" 18 #include "i915_gem_object.h" 19 #include "i915_vma.h" 20 #include "i915_gem_lmem.h" 21 #include "i915_gem_mman.h" 22 23 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 24 { 25 /* 26 * We manually flush the CPU domain so that we can override and 27 * force the flush for the display, and perform it asyncrhonously. 28 */ 29 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 30 if (obj->cache_dirty) 31 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 32 obj->write_domain = 0; 33 } 34 35 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 36 { 37 if (!i915_gem_object_is_framebuffer(obj)) 38 return; 39 40 i915_gem_object_lock(obj); 41 __i915_gem_object_flush_for_display(obj); 42 i915_gem_object_unlock(obj); 43 } 44 45 /** 46 * Moves a single object to the WC read, and possibly write domain. 47 * @obj: object to act on 48 * @write: ask for write access or read only 49 * 50 * This function returns when the move is complete, including waiting on 51 * flushes to occur. 52 */ 53 int 54 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 55 { 56 int ret; 57 58 assert_object_held(obj); 59 60 ret = i915_gem_object_wait(obj, 61 I915_WAIT_INTERRUPTIBLE | 62 (write ? I915_WAIT_ALL : 0), 63 MAX_SCHEDULE_TIMEOUT); 64 if (ret) 65 return ret; 66 67 if (obj->write_domain == I915_GEM_DOMAIN_WC) 68 return 0; 69 70 /* Flush and acquire obj->pages so that we are coherent through 71 * direct access in memory with previous cached writes through 72 * shmemfs and that our cache domain tracking remains valid. 73 * For example, if the obj->filp was moved to swap without us 74 * being notified and releasing the pages, we would mistakenly 75 * continue to assume that the obj remained out of the CPU cached 76 * domain. 77 */ 78 ret = i915_gem_object_pin_pages(obj); 79 if (ret) 80 return ret; 81 82 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 83 84 /* Serialise direct access to this object with the barriers for 85 * coherent writes from the GPU, by effectively invalidating the 86 * WC domain upon first access. 87 */ 88 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 89 mb(); 90 91 /* It should now be out of any other write domains, and we can update 92 * the domain values for our changes. 93 */ 94 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 95 obj->read_domains |= I915_GEM_DOMAIN_WC; 96 if (write) { 97 obj->read_domains = I915_GEM_DOMAIN_WC; 98 obj->write_domain = I915_GEM_DOMAIN_WC; 99 obj->mm.dirty = true; 100 } 101 102 i915_gem_object_unpin_pages(obj); 103 return 0; 104 } 105 106 /** 107 * Moves a single object to the GTT read, and possibly write domain. 108 * @obj: object to act on 109 * @write: ask for write access or read only 110 * 111 * This function returns when the move is complete, including waiting on 112 * flushes to occur. 113 */ 114 int 115 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 116 { 117 int ret; 118 119 assert_object_held(obj); 120 121 ret = i915_gem_object_wait(obj, 122 I915_WAIT_INTERRUPTIBLE | 123 (write ? I915_WAIT_ALL : 0), 124 MAX_SCHEDULE_TIMEOUT); 125 if (ret) 126 return ret; 127 128 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 129 return 0; 130 131 /* Flush and acquire obj->pages so that we are coherent through 132 * direct access in memory with previous cached writes through 133 * shmemfs and that our cache domain tracking remains valid. 134 * For example, if the obj->filp was moved to swap without us 135 * being notified and releasing the pages, we would mistakenly 136 * continue to assume that the obj remained out of the CPU cached 137 * domain. 138 */ 139 ret = i915_gem_object_pin_pages(obj); 140 if (ret) 141 return ret; 142 143 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 144 145 /* Serialise direct access to this object with the barriers for 146 * coherent writes from the GPU, by effectively invalidating the 147 * GTT domain upon first access. 148 */ 149 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 150 mb(); 151 152 /* It should now be out of any other write domains, and we can update 153 * the domain values for our changes. 154 */ 155 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 156 obj->read_domains |= I915_GEM_DOMAIN_GTT; 157 if (write) { 158 struct i915_vma *vma; 159 160 obj->read_domains = I915_GEM_DOMAIN_GTT; 161 obj->write_domain = I915_GEM_DOMAIN_GTT; 162 obj->mm.dirty = true; 163 164 spin_lock(&obj->vma.lock); 165 for_each_ggtt_vma(vma, obj) 166 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 167 i915_vma_set_ggtt_write(vma); 168 spin_unlock(&obj->vma.lock); 169 } 170 171 i915_gem_object_unpin_pages(obj); 172 return 0; 173 } 174 175 /** 176 * Changes the cache-level of an object across all VMA. 177 * @obj: object to act on 178 * @cache_level: new cache level to set for the object 179 * 180 * After this function returns, the object will be in the new cache-level 181 * across all GTT and the contents of the backing storage will be coherent, 182 * with respect to the new cache-level. In order to keep the backing storage 183 * coherent for all users, we only allow a single cache level to be set 184 * globally on the object and prevent it from being changed whilst the 185 * hardware is reading from the object. That is if the object is currently 186 * on the scanout it will be set to uncached (or equivalent display 187 * cache coherency) and all non-MOCS GPU access will also be uncached so 188 * that all direct access to the scanout remains coherent. 189 */ 190 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 191 enum i915_cache_level cache_level) 192 { 193 int ret; 194 195 if (obj->cache_level == cache_level) 196 return 0; 197 198 ret = i915_gem_object_wait(obj, 199 I915_WAIT_INTERRUPTIBLE | 200 I915_WAIT_ALL, 201 MAX_SCHEDULE_TIMEOUT); 202 if (ret) 203 return ret; 204 205 ret = i915_gem_object_lock_interruptible(obj); 206 if (ret) 207 return ret; 208 209 /* Always invalidate stale cachelines */ 210 if (obj->cache_level != cache_level) { 211 i915_gem_object_set_cache_coherency(obj, cache_level); 212 obj->cache_dirty = true; 213 } 214 215 i915_gem_object_unlock(obj); 216 217 /* The cache-level will be applied when each vma is rebound. */ 218 return i915_gem_object_unbind(obj, 219 I915_GEM_OBJECT_UNBIND_ACTIVE | 220 I915_GEM_OBJECT_UNBIND_BARRIER); 221 } 222 223 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 224 struct drm_file *file) 225 { 226 struct drm_i915_gem_caching *args = data; 227 struct drm_i915_gem_object *obj; 228 int err = 0; 229 230 rcu_read_lock(); 231 obj = i915_gem_object_lookup_rcu(file, args->handle); 232 if (!obj) { 233 err = -ENOENT; 234 goto out; 235 } 236 237 switch (obj->cache_level) { 238 case I915_CACHE_LLC: 239 case I915_CACHE_L3_LLC: 240 args->caching = I915_CACHING_CACHED; 241 break; 242 243 case I915_CACHE_WT: 244 args->caching = I915_CACHING_DISPLAY; 245 break; 246 247 default: 248 args->caching = I915_CACHING_NONE; 249 break; 250 } 251 out: 252 rcu_read_unlock(); 253 return err; 254 } 255 256 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 257 struct drm_file *file) 258 { 259 struct drm_i915_private *i915 = to_i915(dev); 260 struct drm_i915_gem_caching *args = data; 261 struct drm_i915_gem_object *obj; 262 enum i915_cache_level level; 263 int ret = 0; 264 265 switch (args->caching) { 266 case I915_CACHING_NONE: 267 level = I915_CACHE_NONE; 268 break; 269 case I915_CACHING_CACHED: 270 /* 271 * Due to a HW issue on BXT A stepping, GPU stores via a 272 * snooped mapping may leave stale data in a corresponding CPU 273 * cacheline, whereas normally such cachelines would get 274 * invalidated. 275 */ 276 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 277 return -ENODEV; 278 279 level = I915_CACHE_LLC; 280 break; 281 case I915_CACHING_DISPLAY: 282 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 283 break; 284 default: 285 return -EINVAL; 286 } 287 288 obj = i915_gem_object_lookup(file, args->handle); 289 if (!obj) 290 return -ENOENT; 291 292 /* 293 * The caching mode of proxy object is handled by its generator, and 294 * not allowed to be changed by userspace. 295 */ 296 if (i915_gem_object_is_proxy(obj)) { 297 ret = -ENXIO; 298 goto out; 299 } 300 301 ret = i915_gem_object_set_cache_level(obj, level); 302 303 out: 304 i915_gem_object_put(obj); 305 return ret; 306 } 307 308 /* 309 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 310 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 311 * (for pageflips). We only flush the caches while preparing the buffer for 312 * display, the callers are responsible for frontbuffer flush. 313 */ 314 struct i915_vma * 315 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 316 u32 alignment, 317 const struct i915_ggtt_view *view, 318 unsigned int flags) 319 { 320 struct drm_i915_private *i915 = to_i915(obj->base.dev); 321 struct i915_vma *vma; 322 int ret; 323 324 /* Frame buffer must be in LMEM (no migration yet) */ 325 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 326 return ERR_PTR(-EINVAL); 327 328 /* 329 * The display engine is not coherent with the LLC cache on gen6. As 330 * a result, we make sure that the pinning that is about to occur is 331 * done with uncached PTEs. This is lowest common denominator for all 332 * chipsets. 333 * 334 * However for gen6+, we could do better by using the GFDT bit instead 335 * of uncaching, which would allow us to flush all the LLC-cached data 336 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 337 */ 338 ret = i915_gem_object_set_cache_level(obj, 339 HAS_WT(i915) ? 340 I915_CACHE_WT : I915_CACHE_NONE); 341 if (ret) 342 return ERR_PTR(ret); 343 344 /* 345 * As the user may map the buffer once pinned in the display plane 346 * (e.g. libkms for the bootup splash), we have to ensure that we 347 * always use map_and_fenceable for all scanout buffers. However, 348 * it may simply be too big to fit into mappable, in which case 349 * put it anyway and hope that userspace can cope (but always first 350 * try to preserve the existing ABI). 351 */ 352 vma = ERR_PTR(-ENOSPC); 353 if ((flags & PIN_MAPPABLE) == 0 && 354 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 355 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 356 flags | 357 PIN_MAPPABLE | 358 PIN_NONBLOCK); 359 if (IS_ERR(vma)) 360 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 361 if (IS_ERR(vma)) 362 return vma; 363 364 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 365 366 i915_gem_object_flush_if_display(obj); 367 368 return vma; 369 } 370 371 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 372 { 373 struct drm_i915_private *i915 = to_i915(obj->base.dev); 374 struct i915_vma *vma; 375 376 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 377 if (!atomic_read(&obj->bind_count)) 378 return; 379 380 mutex_lock(&i915->ggtt.vm.mutex); 381 spin_lock(&obj->vma.lock); 382 for_each_ggtt_vma(vma, obj) { 383 if (!drm_mm_node_allocated(&vma->node)) 384 continue; 385 386 GEM_BUG_ON(vma->vm != &i915->ggtt.vm); 387 list_move_tail(&vma->vm_link, &vma->vm->bound_list); 388 } 389 spin_unlock(&obj->vma.lock); 390 mutex_unlock(&i915->ggtt.vm.mutex); 391 392 if (i915_gem_object_is_shrinkable(obj)) { 393 unsigned long flags; 394 395 spin_lock_irqsave(&i915->mm.obj_lock, flags); 396 397 if (obj->mm.madv == I915_MADV_WILLNEED && 398 !atomic_read(&obj->mm.shrink_pin)) 399 list_move_tail(&obj->mm.link, &i915->mm.shrink_list); 400 401 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 402 } 403 } 404 405 void 406 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 407 { 408 struct drm_i915_gem_object *obj = vma->obj; 409 410 assert_object_held(obj); 411 412 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 413 i915_gem_object_bump_inactive_ggtt(obj); 414 415 i915_vma_unpin(vma); 416 } 417 418 /** 419 * Moves a single object to the CPU read, and possibly write domain. 420 * @obj: object to act on 421 * @write: requesting write or read-only access 422 * 423 * This function returns when the move is complete, including waiting on 424 * flushes to occur. 425 */ 426 int 427 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 428 { 429 int ret; 430 431 assert_object_held(obj); 432 433 ret = i915_gem_object_wait(obj, 434 I915_WAIT_INTERRUPTIBLE | 435 (write ? I915_WAIT_ALL : 0), 436 MAX_SCHEDULE_TIMEOUT); 437 if (ret) 438 return ret; 439 440 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 441 442 /* Flush the CPU cache if it's still invalid. */ 443 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 444 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 445 obj->read_domains |= I915_GEM_DOMAIN_CPU; 446 } 447 448 /* It should now be out of any other write domains, and we can update 449 * the domain values for our changes. 450 */ 451 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 452 453 /* If we're writing through the CPU, then the GPU read domains will 454 * need to be invalidated at next use. 455 */ 456 if (write) 457 __start_cpu_write(obj); 458 459 return 0; 460 } 461 462 /** 463 * Called when user space prepares to use an object with the CPU, either 464 * through the mmap ioctl's mapping or a GTT mapping. 465 * @dev: drm device 466 * @data: ioctl data blob 467 * @file: drm file 468 */ 469 int 470 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 471 struct drm_file *file) 472 { 473 struct drm_i915_gem_set_domain *args = data; 474 struct drm_i915_gem_object *obj; 475 u32 read_domains = args->read_domains; 476 u32 write_domain = args->write_domain; 477 int err; 478 479 /* Only handle setting domains to types used by the CPU. */ 480 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 481 return -EINVAL; 482 483 /* 484 * Having something in the write domain implies it's in the read 485 * domain, and only that read domain. Enforce that in the request. 486 */ 487 if (write_domain && read_domains != write_domain) 488 return -EINVAL; 489 490 if (!read_domains) 491 return 0; 492 493 obj = i915_gem_object_lookup(file, args->handle); 494 if (!obj) 495 return -ENOENT; 496 497 /* 498 * Already in the desired write domain? Nothing for us to do! 499 * 500 * We apply a little bit of cunning here to catch a broader set of 501 * no-ops. If obj->write_domain is set, we must be in the same 502 * obj->read_domains, and only that domain. Therefore, if that 503 * obj->write_domain matches the request read_domains, we are 504 * already in the same read/write domain and can skip the operation, 505 * without having to further check the requested write_domain. 506 */ 507 if (READ_ONCE(obj->write_domain) == read_domains) { 508 err = 0; 509 goto out; 510 } 511 512 /* 513 * Try to flush the object off the GPU without holding the lock. 514 * We will repeat the flush holding the lock in the normal manner 515 * to catch cases where we are gazumped. 516 */ 517 err = i915_gem_object_wait(obj, 518 I915_WAIT_INTERRUPTIBLE | 519 I915_WAIT_PRIORITY | 520 (write_domain ? I915_WAIT_ALL : 0), 521 MAX_SCHEDULE_TIMEOUT); 522 if (err) 523 goto out; 524 525 /* 526 * Proxy objects do not control access to the backing storage, ergo 527 * they cannot be used as a means to manipulate the cache domain 528 * tracking for that backing storage. The proxy object is always 529 * considered to be outside of any cache domain. 530 */ 531 if (i915_gem_object_is_proxy(obj)) { 532 err = -ENXIO; 533 goto out; 534 } 535 536 /* 537 * Flush and acquire obj->pages so that we are coherent through 538 * direct access in memory with previous cached writes through 539 * shmemfs and that our cache domain tracking remains valid. 540 * For example, if the obj->filp was moved to swap without us 541 * being notified and releasing the pages, we would mistakenly 542 * continue to assume that the obj remained out of the CPU cached 543 * domain. 544 */ 545 err = i915_gem_object_pin_pages(obj); 546 if (err) 547 goto out; 548 549 err = i915_gem_object_lock_interruptible(obj); 550 if (err) 551 goto out_unpin; 552 553 if (read_domains & I915_GEM_DOMAIN_WC) 554 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 555 else if (read_domains & I915_GEM_DOMAIN_GTT) 556 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 557 else 558 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 559 560 /* And bump the LRU for this access */ 561 i915_gem_object_bump_inactive_ggtt(obj); 562 563 i915_gem_object_unlock(obj); 564 565 if (write_domain) 566 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 567 568 out_unpin: 569 i915_gem_object_unpin_pages(obj); 570 out: 571 i915_gem_object_put(obj); 572 return err; 573 } 574 575 /* 576 * Pins the specified object's pages and synchronizes the object with 577 * GPU accesses. Sets needs_clflush to non-zero if the caller should 578 * flush the object from the CPU cache. 579 */ 580 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 581 unsigned int *needs_clflush) 582 { 583 int ret; 584 585 *needs_clflush = 0; 586 if (!i915_gem_object_has_struct_page(obj)) 587 return -ENODEV; 588 589 ret = i915_gem_object_lock_interruptible(obj); 590 if (ret) 591 return ret; 592 593 ret = i915_gem_object_wait(obj, 594 I915_WAIT_INTERRUPTIBLE, 595 MAX_SCHEDULE_TIMEOUT); 596 if (ret) 597 goto err_unlock; 598 599 ret = i915_gem_object_pin_pages(obj); 600 if (ret) 601 goto err_unlock; 602 603 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 604 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 605 ret = i915_gem_object_set_to_cpu_domain(obj, false); 606 if (ret) 607 goto err_unpin; 608 else 609 goto out; 610 } 611 612 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 613 614 /* If we're not in the cpu read domain, set ourself into the gtt 615 * read domain and manually flush cachelines (if required). This 616 * optimizes for the case when the gpu will dirty the data 617 * anyway again before the next pread happens. 618 */ 619 if (!obj->cache_dirty && 620 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 621 *needs_clflush = CLFLUSH_BEFORE; 622 623 out: 624 /* return with the pages pinned */ 625 return 0; 626 627 err_unpin: 628 i915_gem_object_unpin_pages(obj); 629 err_unlock: 630 i915_gem_object_unlock(obj); 631 return ret; 632 } 633 634 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 635 unsigned int *needs_clflush) 636 { 637 int ret; 638 639 *needs_clflush = 0; 640 if (!i915_gem_object_has_struct_page(obj)) 641 return -ENODEV; 642 643 ret = i915_gem_object_lock_interruptible(obj); 644 if (ret) 645 return ret; 646 647 ret = i915_gem_object_wait(obj, 648 I915_WAIT_INTERRUPTIBLE | 649 I915_WAIT_ALL, 650 MAX_SCHEDULE_TIMEOUT); 651 if (ret) 652 goto err_unlock; 653 654 ret = i915_gem_object_pin_pages(obj); 655 if (ret) 656 goto err_unlock; 657 658 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 659 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 660 ret = i915_gem_object_set_to_cpu_domain(obj, true); 661 if (ret) 662 goto err_unpin; 663 else 664 goto out; 665 } 666 667 i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 668 669 /* If we're not in the cpu write domain, set ourself into the 670 * gtt write domain and manually flush cachelines (as required). 671 * This optimizes for the case when the gpu will use the data 672 * right away and we therefore have to clflush anyway. 673 */ 674 if (!obj->cache_dirty) { 675 *needs_clflush |= CLFLUSH_AFTER; 676 677 /* 678 * Same trick applies to invalidate partially written 679 * cachelines read before writing. 680 */ 681 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 682 *needs_clflush |= CLFLUSH_BEFORE; 683 } 684 685 out: 686 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 687 obj->mm.dirty = true; 688 /* return with the pages pinned */ 689 return 0; 690 691 err_unpin: 692 i915_gem_object_unpin_pages(obj); 693 err_unlock: 694 i915_gem_object_unlock(obj); 695 return ret; 696 } 697