1 /* $NetBSD: i915_gem_shmem.c,v 1.12 2021/12/24 15:07:47 riastradh Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2014-2016 Intel Corporation 7 */ 8 9 #include <sys/cdefs.h> 10 __KERNEL_RCSID(0, "$NetBSD: i915_gem_shmem.c,v 1.12 2021/12/24 15:07:47 riastradh Exp $"); 11 12 #include <linux/pagevec.h> 13 #include <linux/swap.h> 14 15 #include "gem/i915_gem_region.h" 16 #include "i915_drv.h" 17 #include "i915_gemfs.h" 18 #include "i915_gem_object.h" 19 #include "i915_scatterlist.h" 20 #include "i915_trace.h" 21 22 /* 23 * Move pages to appropriate lru and release the pagevec, decrementing the 24 * ref count of those pages. 25 */ 26 #ifndef __NetBSD__ 27 static void check_release_pagevec(struct pagevec *pvec) 28 { 29 check_move_unevictable_pages(pvec); 30 __pagevec_release(pvec); 31 cond_resched(); 32 } 33 #endif 34 35 static int shmem_get_pages(struct drm_i915_gem_object *obj) 36 { 37 struct drm_i915_private *i915 = to_i915(obj->base.dev); 38 struct intel_memory_region *mem = obj->mm.region; 39 const unsigned long page_count = obj->base.size / PAGE_SIZE; 40 unsigned long i; 41 #ifdef __NetBSD__ 42 struct uvm_object *mapping; 43 #else 44 struct address_space *mapping; 45 #endif 46 struct sg_table *st; 47 struct scatterlist *sg; 48 struct sgt_iter sgt_iter; 49 struct page *page; 50 unsigned long last_pfn = 0; /* suppress gcc warning */ 51 unsigned int max_segment = i915_sg_segment_size(); 52 unsigned int sg_page_sizes; 53 #ifndef __NetBSD__ 54 struct pagevec pvec; 55 #endif 56 gfp_t noreclaim; 57 int ret; 58 59 /* 60 * Assert that the object is not currently in any GPU domain. As it 61 * wasn't in the GTT, there shouldn't be any way it could have been in 62 * a GPU cache 63 */ 64 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 65 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 66 67 /* 68 * If there's no chance of allocating enough pages for the whole 69 * object, bail early. 70 */ 71 if (obj->base.size > resource_size(&mem->region)) 72 return -ENOMEM; 73 74 st = kmalloc(sizeof(*st), GFP_KERNEL); 75 if (!st) 76 return -ENOMEM; 77 78 rebuild_st: 79 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 80 kfree(st); 81 return -ENOMEM; 82 } 83 84 /* 85 * Get the list of pages out of our struct file. They'll be pinned 86 * at this point until we release them. 87 * 88 * Fail silently without starting the shrinker 89 */ 90 #ifdef __NetBSD__ 91 mapping = obj->base.filp; 92 noreclaim = GFP_KERNEL; 93 #else 94 mapping = obj->base.filp->f_mapping; 95 mapping_set_unevictable(mapping); 96 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 97 #endif 98 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 99 100 sg = st->sgl; 101 st->nents = 0; 102 sg_page_sizes = 0; 103 for (i = 0; i < page_count; i++) { 104 const unsigned int shrink[] = { 105 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, 106 0, 107 }, *s = shrink; 108 gfp_t gfp = noreclaim; 109 110 do { 111 cond_resched(); 112 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 113 if (!IS_ERR(page)) 114 break; 115 116 if (!*s) { 117 ret = PTR_ERR(page); 118 goto err_sg; 119 } 120 121 i915_gem_shrink(i915, 2 * page_count, NULL, *s++); 122 123 /* 124 * We've tried hard to allocate the memory by reaping 125 * our own buffer, now let the real VM do its job and 126 * go down in flames if truly OOM. 127 * 128 * However, since graphics tend to be disposable, 129 * defer the oom here by reporting the ENOMEM back 130 * to userspace. 131 */ 132 #ifndef __NetBSD__ 133 if (!*s) { 134 /* reclaim and warn, but no oom */ 135 gfp = mapping_gfp_mask(mapping); 136 137 /* 138 * Our bo are always dirty and so we require 139 * kswapd to reclaim our pages (direct reclaim 140 * does not effectively begin pageout of our 141 * buffers on its own). However, direct reclaim 142 * only waits for kswapd when under allocation 143 * congestion. So as a result __GFP_RECLAIM is 144 * unreliable and fails to actually reclaim our 145 * dirty pages -- unless you try over and over 146 * again with !__GFP_NORETRY. However, we still 147 * want to fail this allocation rather than 148 * trigger the out-of-memory killer and for 149 * this we want __GFP_RETRY_MAYFAIL. 150 */ 151 gfp |= __GFP_RETRY_MAYFAIL; 152 } 153 #endif 154 } while (1); 155 156 #ifdef __NetBSD__ 157 __USE(last_pfn); 158 KASSERT(st->nents == i); 159 sg->sg_pgs[st->nents++] = page; 160 sg_page_sizes |= PAGE_SIZE; /* XXX compress contiguous pages */ 161 #else 162 if (!i || 163 sg->length >= max_segment || 164 page_to_pfn(page) != last_pfn + 1) { 165 if (i) { 166 sg_page_sizes |= sg->length; 167 sg = sg_next(sg); 168 } 169 st->nents++; 170 sg_set_page(sg, page, PAGE_SIZE, 0); 171 } else { 172 sg->length += PAGE_SIZE; 173 } 174 last_pfn = page_to_pfn(page); 175 176 /* Check that the i965g/gm workaround works. */ 177 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 178 #endif 179 } 180 #ifndef __NetBSD__ 181 if (sg) { /* loop terminated early; short sg table */ 182 sg_page_sizes |= sg->length; 183 sg_mark_end(sg); 184 } 185 #endif 186 187 /* Trim unused sg entries to avoid wasting memory. */ 188 i915_sg_trim(st); 189 190 ret = i915_gem_gtt_prepare_pages(obj, st); 191 if (ret) { 192 /* 193 * DMA remapping failed? One possible cause is that 194 * it could not reserve enough large entries, asking 195 * for PAGE_SIZE chunks instead may be helpful. 196 */ 197 if (max_segment > PAGE_SIZE) { 198 #ifdef __NetBSD__ 199 __USE(sgt_iter); 200 uvm_obj_unwirepages(mapping, 0, obj->base.size); 201 #else 202 for_each_sgt_page(page, sgt_iter, st) 203 put_page(page); 204 #endif 205 sg_free_table(st); 206 207 max_segment = PAGE_SIZE; 208 goto rebuild_st; 209 } else { 210 dev_warn(i915->drm.dev, 211 "Failed to DMA remap %lu pages\n", 212 page_count); 213 goto err_pages; 214 } 215 } 216 217 if (i915_gem_object_needs_bit17_swizzle(obj)) 218 i915_gem_object_do_bit_17_swizzle(obj, st); 219 220 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 221 222 return 0; 223 224 err_sg: 225 #ifndef __NetBSD__ 226 sg_mark_end(sg); 227 #endif 228 err_pages: 229 #ifdef __NetBSD__ 230 uvm_obj_unwirepages(mapping, 0, obj->base.size); 231 #else 232 mapping_clear_unevictable(mapping); 233 pagevec_init(&pvec); 234 for_each_sgt_page(page, sgt_iter, st) { 235 if (!pagevec_add(&pvec, page)) 236 check_release_pagevec(&pvec); 237 } 238 if (pagevec_count(&pvec)) 239 check_release_pagevec(&pvec); 240 #endif 241 sg_free_table(st); 242 kfree(st); 243 244 /* 245 * shmemfs first checks if there is enough memory to allocate the page 246 * and reports ENOSPC should there be insufficient, along with the usual 247 * ENOMEM for a genuine allocation failure. 248 * 249 * We use ENOSPC in our driver to mean that we have run out of aperture 250 * space and so want to translate the error from shmemfs back to our 251 * usual understanding of ENOMEM. 252 */ 253 if (ret == -ENOSPC) 254 ret = -ENOMEM; 255 256 return ret; 257 } 258 259 static void 260 shmem_truncate(struct drm_i915_gem_object *obj) 261 { 262 /* 263 * Our goal here is to return as much of the memory as 264 * is possible back to the system as we are called from OOM. 265 * To do this we must instruct the shmfs to drop all of its 266 * backing pages, *now*. 267 */ 268 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 269 obj->mm.madv = __I915_MADV_PURGED; 270 obj->mm.pages = ERR_PTR(-EFAULT); 271 } 272 273 static void 274 shmem_writeback(struct drm_i915_gem_object *obj) 275 { 276 #ifndef __NetBSD__ 277 struct address_space *mapping; 278 struct writeback_control wbc = { 279 .sync_mode = WB_SYNC_NONE, 280 .nr_to_write = SWAP_CLUSTER_MAX, 281 .range_start = 0, 282 .range_end = LLONG_MAX, 283 .for_reclaim = 1, 284 }; 285 unsigned long i; 286 287 /* 288 * Leave mmapings intact (GTT will have been revoked on unbinding, 289 * leaving only CPU mmapings around) and add those pages to the LRU 290 * instead of invoking writeback so they are aged and paged out 291 * as normal. 292 */ 293 mapping = obj->base.filp->f_mapping; 294 295 /* Begin writeback on each dirty page */ 296 for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { 297 struct page *page; 298 299 page = find_lock_entry(mapping, i); 300 if (!page || xa_is_value(page)) 301 continue; 302 303 if (!page_mapped(page) && clear_page_dirty_for_io(page)) { 304 int ret; 305 306 SetPageReclaim(page); 307 ret = mapping->a_ops->writepage(page, &wbc); 308 if (!PageWriteback(page)) 309 ClearPageReclaim(page); 310 if (!ret) 311 goto put; 312 } 313 unlock_page(page); 314 put: 315 put_page(page); 316 } 317 #endif 318 } 319 320 void 321 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 322 struct sg_table *pages, 323 bool needs_clflush) 324 { 325 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 326 327 if (obj->mm.madv == I915_MADV_DONTNEED) 328 obj->mm.dirty = false; 329 330 if (needs_clflush && 331 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 332 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 333 drm_clflush_sg(pages); 334 335 __start_cpu_write(obj); 336 } 337 338 static void 339 shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) 340 { 341 struct sgt_iter sgt_iter; 342 #ifndef __NetBSD__ 343 struct pagevec pvec; 344 struct page *page; 345 #endif 346 347 __i915_gem_object_release_shmem(obj, pages, true); 348 349 i915_gem_gtt_finish_pages(obj, pages); 350 351 if (i915_gem_object_needs_bit17_swizzle(obj)) 352 i915_gem_object_save_bit_17_swizzle(obj, pages); 353 354 #ifdef __NetBSD__ 355 __USE(sgt_iter); 356 if (obj->mm.dirty) { 357 unsigned i; 358 359 rw_enter(obj->base.filp->vmobjlock, RW_WRITER); 360 for (i = 0; i < pages->sgl->sg_npgs; i++) { 361 uvm_pagemarkdirty(&pages->sgl->sg_pgs[i]->p_vmp, 362 UVM_PAGE_STATUS_DIRTY); 363 } 364 rw_exit(obj->base.filp->vmobjlock); 365 } 366 uvm_obj_unwirepages(obj->base.filp, 0, obj->base.size); 367 #else 368 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); 369 370 pagevec_init(&pvec); 371 for_each_sgt_page(page, sgt_iter, pages) { 372 if (obj->mm.dirty) 373 set_page_dirty(page); 374 375 if (obj->mm.madv == I915_MADV_WILLNEED) 376 mark_page_accessed(page); 377 378 if (!pagevec_add(&pvec, page)) 379 check_release_pagevec(&pvec); 380 } 381 if (pagevec_count(&pvec)) 382 check_release_pagevec(&pvec); 383 #endif 384 obj->mm.dirty = false; 385 386 sg_free_table(pages); 387 kfree(pages); 388 } 389 390 static int 391 shmem_pwrite(struct drm_i915_gem_object *obj, 392 const struct drm_i915_gem_pwrite *arg) 393 { 394 #ifdef __NetBSD__ 395 struct uvm_object *mapping = obj->base.filp; 396 #else 397 struct address_space *mapping = obj->base.filp->f_mapping; 398 #endif 399 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 400 u64 remain, offset; 401 unsigned int pg; 402 403 /* Caller already validated user args */ 404 GEM_BUG_ON(!access_ok(user_data, arg->size)); 405 406 /* 407 * Before we instantiate/pin the backing store for our use, we 408 * can prepopulate the shmemfs filp efficiently using a write into 409 * the pagecache. We avoid the penalty of instantiating all the 410 * pages, important if the user is just writing to a few and never 411 * uses the object on the GPU, and using a direct write into shmemfs 412 * allows it to avoid the cost of retrieving a page (either swapin 413 * or clearing-before-use) before it is overwritten. 414 */ 415 if (i915_gem_object_has_pages(obj)) 416 return -ENODEV; 417 418 if (obj->mm.madv != I915_MADV_WILLNEED) 419 return -EFAULT; 420 421 /* 422 * Before the pages are instantiated the object is treated as being 423 * in the CPU domain. The pages will be clflushed as required before 424 * use, and we can freely write into the pages directly. If userspace 425 * races pwrite with any other operation; corruption will ensue - 426 * that is userspace's prerogative! 427 */ 428 429 remain = arg->size; 430 offset = arg->offset; 431 pg = offset_in_page(offset); 432 433 #ifdef __NetBSD__ 434 __USE(pg); 435 struct iovec iov = { .iov_base = user_data, .iov_len = remain }; 436 struct uio uio = { 437 .uio_iov = &iov, 438 .uio_iovcnt = 1, 439 .uio_offset = offset, 440 .uio_resid = remain, 441 .uio_rw = UIO_WRITE, 442 .uio_vmspace = curproc->p_vmspace, 443 }; 444 int ret; 445 446 /* XXX errno NetBSD->Linux */ 447 ret = -ubc_uiomove(mapping, &uio, remain, UVM_ADV_NORMAL, UBC_WRITE); 448 if (ret) 449 return ret; 450 #else 451 do { 452 unsigned int len, unwritten; 453 struct page *page; 454 void *data, *vaddr; 455 int err; 456 char c; 457 458 len = PAGE_SIZE - pg; 459 if (len > remain) 460 len = remain; 461 462 /* Prefault the user page to reduce potential recursion */ 463 err = __get_user(c, user_data); 464 if (err) 465 return err; 466 467 err = __get_user(c, user_data + len - 1); 468 if (err) 469 return err; 470 471 err = pagecache_write_begin(obj->base.filp, mapping, 472 offset, len, 0, 473 &page, &data); 474 if (err < 0) 475 return err; 476 477 vaddr = kmap_atomic(page); 478 unwritten = __copy_from_user_inatomic(vaddr + pg, 479 user_data, 480 len); 481 kunmap_atomic(vaddr); 482 483 err = pagecache_write_end(obj->base.filp, mapping, 484 offset, len, len - unwritten, 485 page, data); 486 if (err < 0) 487 return err; 488 489 /* We don't handle -EFAULT, leave it to the caller to check */ 490 if (unwritten) 491 return -ENODEV; 492 493 remain -= len; 494 user_data += len; 495 offset += len; 496 pg = 0; 497 } while (remain); 498 #endif 499 500 return 0; 501 } 502 503 static void shmem_release(struct drm_i915_gem_object *obj) 504 { 505 i915_gem_object_release_memory_region(obj); 506 507 #ifdef __NetBSD__ 508 /* XXX Who acquires the reference? */ 509 uao_detach(obj->base.filp); 510 #else 511 fput(obj->base.filp); 512 #endif 513 } 514 515 const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { 516 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 517 I915_GEM_OBJECT_IS_SHRINKABLE, 518 519 .get_pages = shmem_get_pages, 520 .put_pages = shmem_put_pages, 521 .truncate = shmem_truncate, 522 .writeback = shmem_writeback, 523 524 .pwrite = shmem_pwrite, 525 526 .release = shmem_release, 527 }; 528 529 static int __create_shmem(struct drm_i915_private *i915, 530 struct drm_gem_object *obj, 531 resource_size_t size) 532 { 533 #ifdef __NetBSD__ 534 return drm_gem_object_init(&i915->drm, obj, size); 535 #else 536 unsigned long flags = VM_NORESERVE; 537 struct file *filp; 538 539 drm_gem_private_object_init(&i915->drm, obj, size); 540 541 if (i915->mm.gemfs) 542 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 543 flags); 544 else 545 filp = shmem_file_setup("i915", size, flags); 546 if (IS_ERR(filp)) 547 return PTR_ERR(filp); 548 549 obj->filp = filp; 550 return 0; 551 #endif 552 } 553 554 static struct drm_i915_gem_object * 555 create_shmem(struct intel_memory_region *mem, 556 resource_size_t size, 557 unsigned int flags) 558 { 559 static struct lock_class_key lock_class; 560 struct drm_i915_private *i915 = mem->i915; 561 struct drm_i915_gem_object *obj; 562 struct address_space *mapping; 563 unsigned int cache_level; 564 gfp_t mask; 565 int ret; 566 567 obj = i915_gem_object_alloc(); 568 if (!obj) 569 return ERR_PTR(-ENOMEM); 570 571 ret = __create_shmem(i915, &obj->base, size); 572 if (ret) 573 goto fail; 574 575 #ifdef __NetBSD__ 576 __USE(mapping); 577 __USE(mask); 578 uao_set_pgfl(obj->base.filp, i915->ggtt.pgfl); 579 #else 580 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 581 if (IS_I965GM(i915) || IS_I965G(i915)) { 582 /* 965gm cannot relocate objects above 4GiB. */ 583 mask &= ~__GFP_HIGHMEM; 584 mask |= __GFP_DMA32; 585 } 586 587 mapping = obj->base.filp->f_mapping; 588 mapping_set_gfp_mask(mapping, mask); 589 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 590 #endif 591 592 i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class); 593 594 obj->write_domain = I915_GEM_DOMAIN_CPU; 595 obj->read_domains = I915_GEM_DOMAIN_CPU; 596 597 if (HAS_LLC(i915)) 598 /* On some devices, we can have the GPU use the LLC (the CPU 599 * cache) for about a 10% performance improvement 600 * compared to uncached. Graphics requests other than 601 * display scanout are coherent with the CPU in 602 * accessing this cache. This means in this mode we 603 * don't need to clflush on the CPU side, and on the 604 * GPU side we only need to flush internal caches to 605 * get data visible to the CPU. 606 * 607 * However, we maintain the display planes as UC, and so 608 * need to rebind when first used as such. 609 */ 610 cache_level = I915_CACHE_LLC; 611 else 612 cache_level = I915_CACHE_NONE; 613 614 i915_gem_object_set_cache_coherency(obj, cache_level); 615 616 i915_gem_object_init_memory_region(obj, mem, 0); 617 618 return obj; 619 620 fail: 621 i915_gem_object_free(obj); 622 return ERR_PTR(ret); 623 } 624 625 struct drm_i915_gem_object * 626 i915_gem_object_create_shmem(struct drm_i915_private *i915, 627 resource_size_t size) 628 { 629 return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], 630 size, 0); 631 } 632 633 /* Allocate a new GEM object and fill it with the supplied data */ 634 struct drm_i915_gem_object * 635 i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, 636 const void *data, resource_size_t size) 637 { 638 struct drm_i915_gem_object *obj; 639 #ifdef __NetBSD__ 640 struct iovec iov = { .iov_base = __UNCONST(data), .iov_len = size }; 641 struct uio uio = { 642 .uio_iov = &iov, 643 .uio_iovcnt = 1, 644 .uio_offset = 0, 645 .uio_resid = size, 646 .uio_rw = UIO_WRITE, 647 }; 648 #else 649 struct file *file; 650 resource_size_t offset; 651 #endif 652 int err; 653 654 obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); 655 if (IS_ERR(obj)) 656 return obj; 657 658 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 659 660 #ifdef __NetBSD__ 661 UIO_SETUP_SYSSPACE(&uio); 662 /* XXX errno NetBSD->Linux */ 663 err = -ubc_uiomove(obj->base.filp, &uio, size, UVM_ADV_NORMAL, 664 UBC_WRITE); 665 if (err) 666 goto fail; 667 #else 668 file = obj->base.filp; 669 offset = 0; 670 do { 671 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 672 struct page *page; 673 void *pgdata, *vaddr; 674 675 err = pagecache_write_begin(file, file->f_mapping, 676 offset, len, 0, 677 &page, &pgdata); 678 if (err < 0) 679 goto fail; 680 681 vaddr = kmap(page); 682 memcpy(vaddr, data, len); 683 kunmap(page); 684 685 err = pagecache_write_end(file, file->f_mapping, 686 offset, len, len, 687 page, pgdata); 688 if (err < 0) 689 goto fail; 690 691 size -= len; 692 data += len; 693 offset += len; 694 } while (size); 695 #endif 696 697 return obj; 698 699 fail: 700 i915_gem_object_put(obj); 701 return ERR_PTR(err); 702 } 703 704 static int init_shmem(struct intel_memory_region *mem) 705 { 706 int err; 707 708 err = i915_gemfs_init(mem->i915); 709 if (err) { 710 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", 711 err); 712 } 713 714 intel_memory_region_set_name(mem, "system"); 715 716 return 0; /* Don't error, we can simply fallback to the kernel mnt */ 717 } 718 719 static void release_shmem(struct intel_memory_region *mem) 720 { 721 i915_gemfs_fini(mem->i915); 722 } 723 724 static const struct intel_memory_region_ops shmem_region_ops = { 725 .init = init_shmem, 726 .release = release_shmem, 727 .create_object = create_shmem, 728 }; 729 730 struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) 731 { 732 return intel_memory_region_create(i915, 0, 733 totalram_pages() << PAGE_SHIFT, 734 PAGE_SIZE, 0, 735 &shmem_region_ops); 736 } 737