1 /* $NetBSD: huge_pages.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */ 2 3 /* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright 2017 Intel Corporation 7 */ 8 9 #include <sys/cdefs.h> 10 __KERNEL_RCSID(0, "$NetBSD: huge_pages.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $"); 11 12 #include <linux/prime_numbers.h> 13 14 #include "i915_selftest.h" 15 16 #include "gem/i915_gem_region.h" 17 #include "gem/i915_gem_lmem.h" 18 #include "gem/i915_gem_pm.h" 19 20 #include "gt/intel_gt.h" 21 22 #include "igt_gem_utils.h" 23 #include "mock_context.h" 24 25 #include "selftests/mock_drm.h" 26 #include "selftests/mock_gem_device.h" 27 #include "selftests/mock_region.h" 28 #include "selftests/i915_random.h" 29 30 static const unsigned int page_sizes[] = { 31 I915_GTT_PAGE_SIZE_2M, 32 I915_GTT_PAGE_SIZE_64K, 33 I915_GTT_PAGE_SIZE_4K, 34 }; 35 36 static unsigned int get_largest_page_size(struct drm_i915_private *i915, 37 u64 rem) 38 { 39 int i; 40 41 for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { 42 unsigned int page_size = page_sizes[i]; 43 44 if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) 45 return page_size; 46 } 47 48 return 0; 49 } 50 51 static void huge_pages_free_pages(struct sg_table *st) 52 { 53 struct scatterlist *sg; 54 55 for (sg = st->sgl; sg; sg = __sg_next(sg)) { 56 if (sg_page(sg)) 57 __free_pages(sg_page(sg), get_order(sg->length)); 58 } 59 60 sg_free_table(st); 61 kfree(st); 62 } 63 64 static int get_huge_pages(struct drm_i915_gem_object *obj) 65 { 66 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) 67 unsigned int page_mask = obj->mm.page_mask; 68 struct sg_table *st; 69 struct scatterlist *sg; 70 unsigned int sg_page_sizes; 71 u64 rem; 72 73 st = kmalloc(sizeof(*st), GFP); 74 if (!st) 75 return -ENOMEM; 76 77 if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { 78 kfree(st); 79 return -ENOMEM; 80 } 81 82 rem = obj->base.size; 83 sg = st->sgl; 84 st->nents = 0; 85 sg_page_sizes = 0; 86 87 /* 88 * Our goal here is simple, we want to greedily fill the object from 89 * largest to smallest page-size, while ensuring that we use *every* 90 * page-size as per the given page-mask. 91 */ 92 do { 93 unsigned int bit = ilog2(page_mask); 94 unsigned int page_size = BIT(bit); 95 int order = get_order(page_size); 96 97 do { 98 struct page *page; 99 100 GEM_BUG_ON(order >= MAX_ORDER); 101 page = alloc_pages(GFP | __GFP_ZERO, order); 102 if (!page) 103 goto err; 104 105 sg_set_page(sg, page, page_size, 0); 106 sg_page_sizes |= page_size; 107 st->nents++; 108 109 rem -= page_size; 110 if (!rem) { 111 sg_mark_end(sg); 112 break; 113 } 114 115 sg = __sg_next(sg); 116 } while ((rem - ((page_size-1) & page_mask)) >= page_size); 117 118 page_mask &= (page_size-1); 119 } while (page_mask); 120 121 if (i915_gem_gtt_prepare_pages(obj, st)) 122 goto err; 123 124 GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); 125 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 126 127 return 0; 128 129 err: 130 sg_set_page(sg, NULL, 0, 0); 131 sg_mark_end(sg); 132 huge_pages_free_pages(st); 133 134 return -ENOMEM; 135 } 136 137 static void put_huge_pages(struct drm_i915_gem_object *obj, 138 struct sg_table *pages) 139 { 140 i915_gem_gtt_finish_pages(obj, pages); 141 huge_pages_free_pages(pages); 142 143 obj->mm.dirty = false; 144 } 145 146 static const struct drm_i915_gem_object_ops huge_page_ops = { 147 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 148 I915_GEM_OBJECT_IS_SHRINKABLE, 149 .get_pages = get_huge_pages, 150 .put_pages = put_huge_pages, 151 }; 152 153 static struct drm_i915_gem_object * 154 huge_pages_object(struct drm_i915_private *i915, 155 u64 size, 156 unsigned int page_mask) 157 { 158 static struct lock_class_key lock_class; 159 struct drm_i915_gem_object *obj; 160 161 GEM_BUG_ON(!size); 162 GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); 163 164 if (size >> PAGE_SHIFT > INT_MAX) 165 return ERR_PTR(-E2BIG); 166 167 if (overflows_type(size, obj->base.size)) 168 return ERR_PTR(-E2BIG); 169 170 obj = i915_gem_object_alloc(); 171 if (!obj) 172 return ERR_PTR(-ENOMEM); 173 174 drm_gem_private_object_init(&i915->drm, &obj->base, size); 175 i915_gem_object_init(obj, &huge_page_ops, &lock_class); 176 177 i915_gem_object_set_volatile(obj); 178 179 obj->write_domain = I915_GEM_DOMAIN_CPU; 180 obj->read_domains = I915_GEM_DOMAIN_CPU; 181 obj->cache_level = I915_CACHE_NONE; 182 183 obj->mm.page_mask = page_mask; 184 185 return obj; 186 } 187 188 static int fake_get_huge_pages(struct drm_i915_gem_object *obj) 189 { 190 struct drm_i915_private *i915 = to_i915(obj->base.dev); 191 const u64 max_len = rounddown_pow_of_two(UINT_MAX); 192 struct sg_table *st; 193 struct scatterlist *sg; 194 unsigned int sg_page_sizes; 195 u64 rem; 196 197 st = kmalloc(sizeof(*st), GFP); 198 if (!st) 199 return -ENOMEM; 200 201 if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { 202 kfree(st); 203 return -ENOMEM; 204 } 205 206 /* Use optimal page sized chunks to fill in the sg table */ 207 rem = obj->base.size; 208 sg = st->sgl; 209 st->nents = 0; 210 sg_page_sizes = 0; 211 do { 212 unsigned int page_size = get_largest_page_size(i915, rem); 213 unsigned int len = min(page_size * div_u64(rem, page_size), 214 max_len); 215 216 GEM_BUG_ON(!page_size); 217 218 sg->offset = 0; 219 sg->length = len; 220 sg_dma_len(sg) = len; 221 sg_dma_address(sg) = page_size; 222 223 sg_page_sizes |= len; 224 225 st->nents++; 226 227 rem -= len; 228 if (!rem) { 229 sg_mark_end(sg); 230 break; 231 } 232 233 sg = sg_next(sg); 234 } while (1); 235 236 i915_sg_trim(st); 237 238 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 239 240 return 0; 241 } 242 243 static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) 244 { 245 struct drm_i915_private *i915 = to_i915(obj->base.dev); 246 struct sg_table *st; 247 struct scatterlist *sg; 248 unsigned int page_size; 249 250 st = kmalloc(sizeof(*st), GFP); 251 if (!st) 252 return -ENOMEM; 253 254 if (sg_alloc_table(st, 1, GFP)) { 255 kfree(st); 256 return -ENOMEM; 257 } 258 259 sg = st->sgl; 260 st->nents = 1; 261 262 page_size = get_largest_page_size(i915, obj->base.size); 263 GEM_BUG_ON(!page_size); 264 265 sg->offset = 0; 266 sg->length = obj->base.size; 267 sg_dma_len(sg) = obj->base.size; 268 sg_dma_address(sg) = page_size; 269 270 __i915_gem_object_set_pages(obj, st, sg->length); 271 272 return 0; 273 #undef GFP 274 } 275 276 static void fake_free_huge_pages(struct drm_i915_gem_object *obj, 277 struct sg_table *pages) 278 { 279 sg_free_table(pages); 280 kfree(pages); 281 } 282 283 static void fake_put_huge_pages(struct drm_i915_gem_object *obj, 284 struct sg_table *pages) 285 { 286 fake_free_huge_pages(obj, pages); 287 obj->mm.dirty = false; 288 } 289 290 static const struct drm_i915_gem_object_ops fake_ops = { 291 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 292 .get_pages = fake_get_huge_pages, 293 .put_pages = fake_put_huge_pages, 294 }; 295 296 static const struct drm_i915_gem_object_ops fake_ops_single = { 297 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 298 .get_pages = fake_get_huge_pages_single, 299 .put_pages = fake_put_huge_pages, 300 }; 301 302 static struct drm_i915_gem_object * 303 fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) 304 { 305 static struct lock_class_key lock_class; 306 struct drm_i915_gem_object *obj; 307 308 GEM_BUG_ON(!size); 309 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 310 311 if (size >> PAGE_SHIFT > UINT_MAX) 312 return ERR_PTR(-E2BIG); 313 314 if (overflows_type(size, obj->base.size)) 315 return ERR_PTR(-E2BIG); 316 317 obj = i915_gem_object_alloc(); 318 if (!obj) 319 return ERR_PTR(-ENOMEM); 320 321 drm_gem_private_object_init(&i915->drm, &obj->base, size); 322 323 if (single) 324 i915_gem_object_init(obj, &fake_ops_single, &lock_class); 325 else 326 i915_gem_object_init(obj, &fake_ops, &lock_class); 327 328 i915_gem_object_set_volatile(obj); 329 330 obj->write_domain = I915_GEM_DOMAIN_CPU; 331 obj->read_domains = I915_GEM_DOMAIN_CPU; 332 obj->cache_level = I915_CACHE_NONE; 333 334 return obj; 335 } 336 337 static int igt_check_page_sizes(struct i915_vma *vma) 338 { 339 struct drm_i915_private *i915 = vma->vm->i915; 340 unsigned int supported = INTEL_INFO(i915)->page_sizes; 341 struct drm_i915_gem_object *obj = vma->obj; 342 int err; 343 344 /* We have to wait for the async bind to complete before our asserts */ 345 err = i915_vma_sync(vma); 346 if (err) 347 return err; 348 349 if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { 350 pr_err("unsupported page_sizes.sg=%u, supported=%u\n", 351 vma->page_sizes.sg & ~supported, supported); 352 err = -EINVAL; 353 } 354 355 if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { 356 pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", 357 vma->page_sizes.gtt & ~supported, supported); 358 err = -EINVAL; 359 } 360 361 if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { 362 pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", 363 vma->page_sizes.phys, obj->mm.page_sizes.phys); 364 err = -EINVAL; 365 } 366 367 if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { 368 pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", 369 vma->page_sizes.sg, obj->mm.page_sizes.sg); 370 err = -EINVAL; 371 } 372 373 if (obj->mm.page_sizes.gtt) { 374 pr_err("obj->page_sizes.gtt(%u) should never be set\n", 375 obj->mm.page_sizes.gtt); 376 err = -EINVAL; 377 } 378 379 return err; 380 } 381 382 static int igt_mock_exhaust_device_supported_pages(void *arg) 383 { 384 struct i915_ppgtt *ppgtt = arg; 385 struct drm_i915_private *i915 = ppgtt->vm.i915; 386 unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; 387 struct drm_i915_gem_object *obj; 388 struct i915_vma *vma; 389 int i, j, single; 390 int err; 391 392 /* 393 * Sanity check creating objects with every valid page support 394 * combination for our mock device. 395 */ 396 397 for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { 398 unsigned int combination = 0; 399 400 for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { 401 if (i & BIT(j)) 402 combination |= page_sizes[j]; 403 } 404 405 mkwrite_device_info(i915)->page_sizes = combination; 406 407 for (single = 0; single <= 1; ++single) { 408 obj = fake_huge_pages_object(i915, combination, !!single); 409 if (IS_ERR(obj)) { 410 err = PTR_ERR(obj); 411 goto out_device; 412 } 413 414 if (obj->base.size != combination) { 415 pr_err("obj->base.size=%zu, expected=%u\n", 416 obj->base.size, combination); 417 err = -EINVAL; 418 goto out_put; 419 } 420 421 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 422 if (IS_ERR(vma)) { 423 err = PTR_ERR(vma); 424 goto out_put; 425 } 426 427 err = i915_vma_pin(vma, 0, 0, PIN_USER); 428 if (err) 429 goto out_close; 430 431 err = igt_check_page_sizes(vma); 432 433 if (vma->page_sizes.sg != combination) { 434 pr_err("page_sizes.sg=%u, expected=%u\n", 435 vma->page_sizes.sg, combination); 436 err = -EINVAL; 437 } 438 439 i915_vma_unpin(vma); 440 i915_vma_close(vma); 441 442 i915_gem_object_put(obj); 443 444 if (err) 445 goto out_device; 446 } 447 } 448 449 goto out_device; 450 451 out_close: 452 i915_vma_close(vma); 453 out_put: 454 i915_gem_object_put(obj); 455 out_device: 456 mkwrite_device_info(i915)->page_sizes = saved_mask; 457 458 return err; 459 } 460 461 static int igt_mock_memory_region_huge_pages(void *arg) 462 { 463 const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; 464 struct i915_ppgtt *ppgtt = arg; 465 struct drm_i915_private *i915 = ppgtt->vm.i915; 466 unsigned long supported = INTEL_INFO(i915)->page_sizes; 467 struct intel_memory_region *mem; 468 struct drm_i915_gem_object *obj; 469 struct i915_vma *vma; 470 int bit; 471 int err = 0; 472 473 mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); 474 if (IS_ERR(mem)) { 475 pr_err("%s failed to create memory region\n", __func__); 476 return PTR_ERR(mem); 477 } 478 479 for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 480 unsigned int page_size = BIT(bit); 481 resource_size_t phys; 482 int i; 483 484 for (i = 0; i < ARRAY_SIZE(flags); ++i) { 485 obj = i915_gem_object_create_region(mem, page_size, 486 flags[i]); 487 if (IS_ERR(obj)) { 488 err = PTR_ERR(obj); 489 goto out_region; 490 } 491 492 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 493 if (IS_ERR(vma)) { 494 err = PTR_ERR(vma); 495 goto out_put; 496 } 497 498 err = i915_vma_pin(vma, 0, 0, PIN_USER); 499 if (err) 500 goto out_close; 501 502 err = igt_check_page_sizes(vma); 503 if (err) 504 goto out_unpin; 505 506 phys = i915_gem_object_get_dma_address(obj, 0); 507 if (!IS_ALIGNED(phys, page_size)) { 508 pr_err("%s addr misaligned(%pa) page_size=%u\n", 509 __func__, &phys, page_size); 510 err = -EINVAL; 511 goto out_unpin; 512 } 513 514 if (vma->page_sizes.gtt != page_size) { 515 pr_err("%s page_sizes.gtt=%u, expected=%u\n", 516 __func__, vma->page_sizes.gtt, 517 page_size); 518 err = -EINVAL; 519 goto out_unpin; 520 } 521 522 i915_vma_unpin(vma); 523 i915_vma_close(vma); 524 525 __i915_gem_object_put_pages(obj); 526 i915_gem_object_put(obj); 527 } 528 } 529 530 goto out_region; 531 532 out_unpin: 533 i915_vma_unpin(vma); 534 out_close: 535 i915_vma_close(vma); 536 out_put: 537 i915_gem_object_put(obj); 538 out_region: 539 intel_memory_region_put(mem); 540 return err; 541 } 542 543 static int igt_mock_ppgtt_misaligned_dma(void *arg) 544 { 545 struct i915_ppgtt *ppgtt = arg; 546 struct drm_i915_private *i915 = ppgtt->vm.i915; 547 unsigned long supported = INTEL_INFO(i915)->page_sizes; 548 struct drm_i915_gem_object *obj; 549 int bit; 550 int err; 551 552 /* 553 * Sanity check dma misalignment for huge pages -- the dma addresses we 554 * insert into the paging structures need to always respect the page 555 * size alignment. 556 */ 557 558 bit = ilog2(I915_GTT_PAGE_SIZE_64K); 559 560 for_each_set_bit_from(bit, &supported, 561 ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 562 IGT_TIMEOUT(end_time); 563 unsigned int page_size = BIT(bit); 564 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 565 unsigned int offset; 566 unsigned int size = 567 round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; 568 struct i915_vma *vma; 569 570 obj = fake_huge_pages_object(i915, size, true); 571 if (IS_ERR(obj)) 572 return PTR_ERR(obj); 573 574 if (obj->base.size != size) { 575 pr_err("obj->base.size=%zu, expected=%u\n", 576 obj->base.size, size); 577 err = -EINVAL; 578 goto out_put; 579 } 580 581 err = i915_gem_object_pin_pages(obj); 582 if (err) 583 goto out_put; 584 585 /* Force the page size for this object */ 586 obj->mm.page_sizes.sg = page_size; 587 588 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 589 if (IS_ERR(vma)) { 590 err = PTR_ERR(vma); 591 goto out_unpin; 592 } 593 594 err = i915_vma_pin(vma, 0, 0, flags); 595 if (err) { 596 i915_vma_close(vma); 597 goto out_unpin; 598 } 599 600 601 err = igt_check_page_sizes(vma); 602 603 if (vma->page_sizes.gtt != page_size) { 604 pr_err("page_sizes.gtt=%u, expected %u\n", 605 vma->page_sizes.gtt, page_size); 606 err = -EINVAL; 607 } 608 609 i915_vma_unpin(vma); 610 611 if (err) { 612 i915_vma_close(vma); 613 goto out_unpin; 614 } 615 616 /* 617 * Try all the other valid offsets until the next 618 * boundary -- should always fall back to using 4K 619 * pages. 620 */ 621 for (offset = 4096; offset < page_size; offset += 4096) { 622 err = i915_vma_unbind(vma); 623 if (err) { 624 i915_vma_close(vma); 625 goto out_unpin; 626 } 627 628 err = i915_vma_pin(vma, 0, 0, flags | offset); 629 if (err) { 630 i915_vma_close(vma); 631 goto out_unpin; 632 } 633 634 err = igt_check_page_sizes(vma); 635 636 if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { 637 pr_err("page_sizes.gtt=%u, expected %llu\n", 638 vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); 639 err = -EINVAL; 640 } 641 642 i915_vma_unpin(vma); 643 644 if (err) { 645 i915_vma_close(vma); 646 goto out_unpin; 647 } 648 649 if (igt_timeout(end_time, 650 "%s timed out at offset %x with page-size %x\n", 651 __func__, offset, page_size)) 652 break; 653 } 654 655 i915_vma_close(vma); 656 657 i915_gem_object_unpin_pages(obj); 658 __i915_gem_object_put_pages(obj); 659 i915_gem_object_put(obj); 660 } 661 662 return 0; 663 664 out_unpin: 665 i915_gem_object_unpin_pages(obj); 666 out_put: 667 i915_gem_object_put(obj); 668 669 return err; 670 } 671 672 static void close_object_list(struct list_head *objects, 673 struct i915_ppgtt *ppgtt) 674 { 675 struct drm_i915_gem_object *obj, *on; 676 677 list_for_each_entry_safe(obj, on, objects, st_link) { 678 struct i915_vma *vma; 679 680 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 681 if (!IS_ERR(vma)) 682 i915_vma_close(vma); 683 684 list_del(&obj->st_link); 685 i915_gem_object_unpin_pages(obj); 686 __i915_gem_object_put_pages(obj); 687 i915_gem_object_put(obj); 688 } 689 } 690 691 static int igt_mock_ppgtt_huge_fill(void *arg) 692 { 693 struct i915_ppgtt *ppgtt = arg; 694 struct drm_i915_private *i915 = ppgtt->vm.i915; 695 unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT; 696 unsigned long page_num; 697 bool single = false; 698 LIST_HEAD(objects); 699 IGT_TIMEOUT(end_time); 700 int err = -ENODEV; 701 702 for_each_prime_number_from(page_num, 1, max_pages) { 703 struct drm_i915_gem_object *obj; 704 u64 size = page_num << PAGE_SHIFT; 705 struct i915_vma *vma; 706 unsigned int expected_gtt = 0; 707 int i; 708 709 obj = fake_huge_pages_object(i915, size, single); 710 if (IS_ERR(obj)) { 711 err = PTR_ERR(obj); 712 break; 713 } 714 715 if (obj->base.size != size) { 716 pr_err("obj->base.size=%zd, expected=%llu\n", 717 obj->base.size, size); 718 i915_gem_object_put(obj); 719 err = -EINVAL; 720 break; 721 } 722 723 err = i915_gem_object_pin_pages(obj); 724 if (err) { 725 i915_gem_object_put(obj); 726 break; 727 } 728 729 list_add(&obj->st_link, &objects); 730 731 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 732 if (IS_ERR(vma)) { 733 err = PTR_ERR(vma); 734 break; 735 } 736 737 err = i915_vma_pin(vma, 0, 0, PIN_USER); 738 if (err) 739 break; 740 741 err = igt_check_page_sizes(vma); 742 if (err) { 743 i915_vma_unpin(vma); 744 break; 745 } 746 747 /* 748 * Figure out the expected gtt page size knowing that we go from 749 * largest to smallest page size sg chunks, and that we align to 750 * the largest page size. 751 */ 752 for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { 753 unsigned int page_size = page_sizes[i]; 754 755 if (HAS_PAGE_SIZES(i915, page_size) && 756 size >= page_size) { 757 expected_gtt |= page_size; 758 size &= page_size-1; 759 } 760 } 761 762 GEM_BUG_ON(!expected_gtt); 763 GEM_BUG_ON(size); 764 765 if (expected_gtt & I915_GTT_PAGE_SIZE_4K) 766 expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; 767 768 i915_vma_unpin(vma); 769 770 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 771 if (!IS_ALIGNED(vma->node.start, 772 I915_GTT_PAGE_SIZE_2M)) { 773 pr_err("node.start(%llx) not aligned to 2M\n", 774 vma->node.start); 775 err = -EINVAL; 776 break; 777 } 778 779 if (!IS_ALIGNED(vma->node.size, 780 I915_GTT_PAGE_SIZE_2M)) { 781 pr_err("node.size(%llx) not aligned to 2M\n", 782 vma->node.size); 783 err = -EINVAL; 784 break; 785 } 786 } 787 788 if (vma->page_sizes.gtt != expected_gtt) { 789 pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", 790 vma->page_sizes.gtt, expected_gtt, 791 obj->base.size, yesno(!!single)); 792 err = -EINVAL; 793 break; 794 } 795 796 if (igt_timeout(end_time, 797 "%s timed out at size %zd\n", 798 __func__, obj->base.size)) 799 break; 800 801 single = !single; 802 } 803 804 close_object_list(&objects, ppgtt); 805 806 if (err == -ENOMEM || err == -ENOSPC) 807 err = 0; 808 809 return err; 810 } 811 812 static int igt_mock_ppgtt_64K(void *arg) 813 { 814 struct i915_ppgtt *ppgtt = arg; 815 struct drm_i915_private *i915 = ppgtt->vm.i915; 816 struct drm_i915_gem_object *obj; 817 const struct object_info { 818 unsigned int size; 819 unsigned int gtt; 820 unsigned int offset; 821 } objects[] = { 822 /* Cases with forced padding/alignment */ 823 { 824 .size = SZ_64K, 825 .gtt = I915_GTT_PAGE_SIZE_64K, 826 .offset = 0, 827 }, 828 { 829 .size = SZ_64K + SZ_4K, 830 .gtt = I915_GTT_PAGE_SIZE_4K, 831 .offset = 0, 832 }, 833 { 834 .size = SZ_64K - SZ_4K, 835 .gtt = I915_GTT_PAGE_SIZE_4K, 836 .offset = 0, 837 }, 838 { 839 .size = SZ_2M, 840 .gtt = I915_GTT_PAGE_SIZE_64K, 841 .offset = 0, 842 }, 843 { 844 .size = SZ_2M - SZ_4K, 845 .gtt = I915_GTT_PAGE_SIZE_4K, 846 .offset = 0, 847 }, 848 { 849 .size = SZ_2M + SZ_4K, 850 .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, 851 .offset = 0, 852 }, 853 { 854 .size = SZ_2M + SZ_64K, 855 .gtt = I915_GTT_PAGE_SIZE_64K, 856 .offset = 0, 857 }, 858 { 859 .size = SZ_2M - SZ_64K, 860 .gtt = I915_GTT_PAGE_SIZE_64K, 861 .offset = 0, 862 }, 863 /* Try without any forced padding/alignment */ 864 { 865 .size = SZ_64K, 866 .offset = SZ_2M, 867 .gtt = I915_GTT_PAGE_SIZE_4K, 868 }, 869 { 870 .size = SZ_128K, 871 .offset = SZ_2M - SZ_64K, 872 .gtt = I915_GTT_PAGE_SIZE_4K, 873 }, 874 }; 875 struct i915_vma *vma; 876 int i, single; 877 int err; 878 879 /* 880 * Sanity check some of the trickiness with 64K pages -- either we can 881 * safely mark the whole page-table(2M block) as 64K, or we have to 882 * always fallback to 4K. 883 */ 884 885 if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) 886 return 0; 887 888 for (i = 0; i < ARRAY_SIZE(objects); ++i) { 889 unsigned int size = objects[i].size; 890 unsigned int expected_gtt = objects[i].gtt; 891 unsigned int offset = objects[i].offset; 892 unsigned int flags = PIN_USER; 893 894 for (single = 0; single <= 1; single++) { 895 obj = fake_huge_pages_object(i915, size, !!single); 896 if (IS_ERR(obj)) 897 return PTR_ERR(obj); 898 899 err = i915_gem_object_pin_pages(obj); 900 if (err) 901 goto out_object_put; 902 903 /* 904 * Disable 2M pages -- We only want to use 64K/4K pages 905 * for this test. 906 */ 907 obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; 908 909 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 910 if (IS_ERR(vma)) { 911 err = PTR_ERR(vma); 912 goto out_object_unpin; 913 } 914 915 if (offset) 916 flags |= PIN_OFFSET_FIXED | offset; 917 918 err = i915_vma_pin(vma, 0, 0, flags); 919 if (err) 920 goto out_vma_close; 921 922 err = igt_check_page_sizes(vma); 923 if (err) 924 goto out_vma_unpin; 925 926 if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 927 if (!IS_ALIGNED(vma->node.start, 928 I915_GTT_PAGE_SIZE_2M)) { 929 pr_err("node.start(%llx) not aligned to 2M\n", 930 vma->node.start); 931 err = -EINVAL; 932 goto out_vma_unpin; 933 } 934 935 if (!IS_ALIGNED(vma->node.size, 936 I915_GTT_PAGE_SIZE_2M)) { 937 pr_err("node.size(%llx) not aligned to 2M\n", 938 vma->node.size); 939 err = -EINVAL; 940 goto out_vma_unpin; 941 } 942 } 943 944 if (vma->page_sizes.gtt != expected_gtt) { 945 pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", 946 vma->page_sizes.gtt, expected_gtt, i, 947 yesno(!!single)); 948 err = -EINVAL; 949 goto out_vma_unpin; 950 } 951 952 i915_vma_unpin(vma); 953 i915_vma_close(vma); 954 955 i915_gem_object_unpin_pages(obj); 956 __i915_gem_object_put_pages(obj); 957 i915_gem_object_put(obj); 958 } 959 } 960 961 return 0; 962 963 out_vma_unpin: 964 i915_vma_unpin(vma); 965 out_vma_close: 966 i915_vma_close(vma); 967 out_object_unpin: 968 i915_gem_object_unpin_pages(obj); 969 out_object_put: 970 i915_gem_object_put(obj); 971 972 return err; 973 } 974 975 static int gpu_write(struct intel_context *ce, 976 struct i915_vma *vma, 977 u32 dw, 978 u32 val) 979 { 980 int err; 981 982 i915_gem_object_lock(vma->obj); 983 err = i915_gem_object_set_to_gtt_domain(vma->obj, true); 984 i915_gem_object_unlock(vma->obj); 985 if (err) 986 return err; 987 988 return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), 989 vma->size >> PAGE_SHIFT, val); 990 } 991 992 static int 993 __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) 994 { 995 unsigned int needs_flush; 996 unsigned long n; 997 int err; 998 999 err = i915_gem_object_prepare_read(obj, &needs_flush); 1000 if (err) 1001 return err; 1002 1003 for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { 1004 u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); 1005 1006 if (needs_flush & CLFLUSH_BEFORE) 1007 drm_clflush_virt_range(ptr, PAGE_SIZE); 1008 1009 if (ptr[dword] != val) { 1010 pr_err("n=%lu ptr[%u]=%u, val=%u\n", 1011 n, dword, ptr[dword], val); 1012 kunmap_atomic(ptr); 1013 err = -EINVAL; 1014 break; 1015 } 1016 1017 kunmap_atomic(ptr); 1018 } 1019 1020 i915_gem_object_finish_access(obj); 1021 1022 return err; 1023 } 1024 1025 static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1026 { 1027 unsigned long n = obj->base.size >> PAGE_SHIFT; 1028 u32 *ptr; 1029 int err; 1030 1031 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1032 if (err) 1033 return err; 1034 1035 ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1036 if (IS_ERR(ptr)) 1037 return PTR_ERR(ptr); 1038 1039 ptr += dword; 1040 while (n--) { 1041 if (*ptr != val) { 1042 pr_err("base[%u]=%08x, val=%08x\n", 1043 dword, *ptr, val); 1044 err = -EINVAL; 1045 break; 1046 } 1047 1048 ptr += PAGE_SIZE / sizeof(*ptr); 1049 } 1050 1051 i915_gem_object_unpin_map(obj); 1052 return err; 1053 } 1054 1055 static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1056 { 1057 if (i915_gem_object_has_struct_page(obj)) 1058 return __cpu_check_shmem(obj, dword, val); 1059 else 1060 return __cpu_check_vmap(obj, dword, val); 1061 } 1062 1063 static int __igt_write_huge(struct intel_context *ce, 1064 struct drm_i915_gem_object *obj, 1065 u64 size, u64 offset, 1066 u32 dword, u32 val) 1067 { 1068 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 1069 struct i915_vma *vma; 1070 int err; 1071 1072 vma = i915_vma_instance(obj, ce->vm, NULL); 1073 if (IS_ERR(vma)) 1074 return PTR_ERR(vma); 1075 1076 err = i915_vma_unbind(vma); 1077 if (err) 1078 goto out_vma_close; 1079 1080 err = i915_vma_pin(vma, size, 0, flags | offset); 1081 if (err) { 1082 /* 1083 * The ggtt may have some pages reserved so 1084 * refrain from erroring out. 1085 */ 1086 if (err == -ENOSPC && i915_is_ggtt(ce->vm)) 1087 err = 0; 1088 1089 goto out_vma_close; 1090 } 1091 1092 err = igt_check_page_sizes(vma); 1093 if (err) 1094 goto out_vma_unpin; 1095 1096 err = gpu_write(ce, vma, dword, val); 1097 if (err) { 1098 pr_err("gpu-write failed at offset=%llx\n", offset); 1099 goto out_vma_unpin; 1100 } 1101 1102 err = cpu_check(obj, dword, val); 1103 if (err) { 1104 pr_err("cpu-check failed at offset=%llx\n", offset); 1105 goto out_vma_unpin; 1106 } 1107 1108 out_vma_unpin: 1109 i915_vma_unpin(vma); 1110 out_vma_close: 1111 __i915_vma_put(vma); 1112 return err; 1113 } 1114 1115 static int igt_write_huge(struct i915_gem_context *ctx, 1116 struct drm_i915_gem_object *obj) 1117 { 1118 struct i915_gem_engines *engines; 1119 struct i915_gem_engines_iter it; 1120 struct intel_context *ce; 1121 I915_RND_STATE(prng); 1122 IGT_TIMEOUT(end_time); 1123 unsigned int max_page_size; 1124 unsigned int count; 1125 u64 max; 1126 u64 num; 1127 u64 size; 1128 int *order; 1129 int i, n; 1130 int err = 0; 1131 1132 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1133 1134 size = obj->base.size; 1135 if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) 1136 size = round_up(size, I915_GTT_PAGE_SIZE_2M); 1137 1138 n = 0; 1139 count = 0; 1140 max = U64_MAX; 1141 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1142 count++; 1143 if (!intel_engine_can_store_dword(ce->engine)) 1144 continue; 1145 1146 max = min(max, ce->vm->total); 1147 n++; 1148 } 1149 i915_gem_context_unlock_engines(ctx); 1150 if (!n) 1151 return 0; 1152 1153 /* 1154 * To keep things interesting when alternating between engines in our 1155 * randomized order, lets also make feeding to the same engine a few 1156 * times in succession a possibility by enlarging the permutation array. 1157 */ 1158 order = i915_random_order(count * count, &prng); 1159 if (!order) 1160 return -ENOMEM; 1161 1162 max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); 1163 max = div_u64(max - size, max_page_size); 1164 1165 /* 1166 * Try various offsets in an ascending/descending fashion until we 1167 * timeout -- we want to avoid issues hidden by effectively always using 1168 * offset = 0. 1169 */ 1170 i = 0; 1171 engines = i915_gem_context_lock_engines(ctx); 1172 for_each_prime_number_from(num, 0, max) { 1173 u64 offset_low = num * max_page_size; 1174 u64 offset_high = (max - num) * max_page_size; 1175 u32 dword = offset_in_page(num) / 4; 1176 struct intel_context *ce; 1177 1178 ce = engines->engines[order[i] % engines->num_engines]; 1179 i = (i + 1) % (count * count); 1180 if (!ce || !intel_engine_can_store_dword(ce->engine)) 1181 continue; 1182 1183 /* 1184 * In order to utilize 64K pages we need to both pad the vma 1185 * size and ensure the vma offset is at the start of the pt 1186 * boundary, however to improve coverage we opt for testing both 1187 * aligned and unaligned offsets. 1188 */ 1189 if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) 1190 offset_low = round_down(offset_low, 1191 I915_GTT_PAGE_SIZE_2M); 1192 1193 err = __igt_write_huge(ce, obj, size, offset_low, 1194 dword, num + 1); 1195 if (err) 1196 break; 1197 1198 err = __igt_write_huge(ce, obj, size, offset_high, 1199 dword, num + 1); 1200 if (err) 1201 break; 1202 1203 if (igt_timeout(end_time, 1204 "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", 1205 __func__, ce->engine->name, offset_low, offset_high, 1206 max_page_size)) 1207 break; 1208 } 1209 i915_gem_context_unlock_engines(ctx); 1210 1211 kfree(order); 1212 1213 return err; 1214 } 1215 1216 static int igt_ppgtt_exhaust_huge(void *arg) 1217 { 1218 struct i915_gem_context *ctx = arg; 1219 struct drm_i915_private *i915 = ctx->i915; 1220 unsigned long supported = INTEL_INFO(i915)->page_sizes; 1221 static unsigned int pages[ARRAY_SIZE(page_sizes)]; 1222 struct drm_i915_gem_object *obj; 1223 unsigned int size_mask; 1224 unsigned int page_mask; 1225 int n, i; 1226 int err = -ENODEV; 1227 1228 if (supported == I915_GTT_PAGE_SIZE_4K) 1229 return 0; 1230 1231 /* 1232 * Sanity check creating objects with a varying mix of page sizes -- 1233 * ensuring that our writes lands in the right place. 1234 */ 1235 1236 n = 0; 1237 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) 1238 pages[n++] = BIT(i); 1239 1240 for (size_mask = 2; size_mask < BIT(n); size_mask++) { 1241 unsigned int size = 0; 1242 1243 for (i = 0; i < n; i++) { 1244 if (size_mask & BIT(i)) 1245 size |= pages[i]; 1246 } 1247 1248 /* 1249 * For our page mask we want to enumerate all the page-size 1250 * combinations which will fit into our chosen object size. 1251 */ 1252 for (page_mask = 2; page_mask <= size_mask; page_mask++) { 1253 unsigned int page_sizes = 0; 1254 1255 for (i = 0; i < n; i++) { 1256 if (page_mask & BIT(i)) 1257 page_sizes |= pages[i]; 1258 } 1259 1260 /* 1261 * Ensure that we can actually fill the given object 1262 * with our chosen page mask. 1263 */ 1264 if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) 1265 continue; 1266 1267 obj = huge_pages_object(i915, size, page_sizes); 1268 if (IS_ERR(obj)) { 1269 err = PTR_ERR(obj); 1270 goto out_device; 1271 } 1272 1273 err = i915_gem_object_pin_pages(obj); 1274 if (err) { 1275 i915_gem_object_put(obj); 1276 1277 if (err == -ENOMEM) { 1278 pr_info("unable to get pages, size=%u, pages=%u\n", 1279 size, page_sizes); 1280 err = 0; 1281 break; 1282 } 1283 1284 pr_err("pin_pages failed, size=%u, pages=%u\n", 1285 size_mask, page_mask); 1286 1287 goto out_device; 1288 } 1289 1290 /* Force the page-size for the gtt insertion */ 1291 obj->mm.page_sizes.sg = page_sizes; 1292 1293 err = igt_write_huge(ctx, obj); 1294 if (err) { 1295 pr_err("exhaust write-huge failed with size=%u\n", 1296 size); 1297 goto out_unpin; 1298 } 1299 1300 i915_gem_object_unpin_pages(obj); 1301 __i915_gem_object_put_pages(obj); 1302 i915_gem_object_put(obj); 1303 } 1304 } 1305 1306 goto out_device; 1307 1308 out_unpin: 1309 i915_gem_object_unpin_pages(obj); 1310 i915_gem_object_put(obj); 1311 out_device: 1312 mkwrite_device_info(i915)->page_sizes = supported; 1313 1314 return err; 1315 } 1316 1317 typedef struct drm_i915_gem_object * 1318 (*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); 1319 1320 static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) 1321 { 1322 return i915->mm.gemfs && has_transparent_hugepage(); 1323 } 1324 1325 static struct drm_i915_gem_object * 1326 igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags) 1327 { 1328 if (!igt_can_allocate_thp(i915)) { 1329 pr_info("%s missing THP support, skipping\n", __func__); 1330 return ERR_PTR(-ENODEV); 1331 } 1332 1333 return i915_gem_object_create_shmem(i915, size); 1334 } 1335 1336 static struct drm_i915_gem_object * 1337 igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) 1338 { 1339 return i915_gem_object_create_internal(i915, size); 1340 } 1341 1342 static struct drm_i915_gem_object * 1343 igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags) 1344 { 1345 return huge_pages_object(i915, size, size); 1346 } 1347 1348 static struct drm_i915_gem_object * 1349 igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) 1350 { 1351 return i915_gem_object_create_lmem(i915, size, flags); 1352 } 1353 1354 static u32 igt_random_size(struct rnd_state *prng, 1355 u32 min_page_size, 1356 u32 max_page_size) 1357 { 1358 u64 mask; 1359 u32 size; 1360 1361 GEM_BUG_ON(!is_power_of_2(min_page_size)); 1362 GEM_BUG_ON(!is_power_of_2(max_page_size)); 1363 GEM_BUG_ON(min_page_size < PAGE_SIZE); 1364 GEM_BUG_ON(min_page_size > max_page_size); 1365 1366 mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK; 1367 size = prandom_u32_state(prng) & mask; 1368 if (size < min_page_size) 1369 size |= min_page_size; 1370 1371 return size; 1372 } 1373 1374 static int igt_ppgtt_smoke_huge(void *arg) 1375 { 1376 struct i915_gem_context *ctx = arg; 1377 struct drm_i915_private *i915 = ctx->i915; 1378 struct drm_i915_gem_object *obj; 1379 I915_RND_STATE(prng); 1380 struct { 1381 igt_create_fn fn; 1382 u32 min; 1383 u32 max; 1384 } backends[] = { 1385 { igt_create_internal, SZ_64K, SZ_2M, }, 1386 { igt_create_shmem, SZ_64K, SZ_32M, }, 1387 { igt_create_local, SZ_64K, SZ_1G, }, 1388 }; 1389 int err; 1390 int i; 1391 1392 /* 1393 * Sanity check that the HW uses huge pages correctly through our 1394 * various backends -- ensure that our writes land in the right place. 1395 */ 1396 1397 for (i = 0; i < ARRAY_SIZE(backends); ++i) { 1398 u32 min = backends[i].min; 1399 u32 max = backends[i].max; 1400 u32 size = max; 1401 try_again: 1402 size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); 1403 1404 obj = backends[i].fn(i915, size, 0); 1405 if (IS_ERR(obj)) { 1406 err = PTR_ERR(obj); 1407 if (err == -E2BIG) { 1408 size >>= 1; 1409 goto try_again; 1410 } else if (err == -ENODEV) { 1411 err = 0; 1412 continue; 1413 } 1414 1415 return err; 1416 } 1417 1418 err = i915_gem_object_pin_pages(obj); 1419 if (err) { 1420 if (err == -ENXIO || err == -E2BIG) { 1421 i915_gem_object_put(obj); 1422 size >>= 1; 1423 goto try_again; 1424 } 1425 goto out_put; 1426 } 1427 1428 if (obj->mm.page_sizes.phys < min) { 1429 pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n", 1430 __func__, size, i); 1431 err = -ENOMEM; 1432 goto out_unpin; 1433 } 1434 1435 err = igt_write_huge(ctx, obj); 1436 if (err) { 1437 pr_err("%s write-huge failed with size=%u, i=%d\n", 1438 __func__, size, i); 1439 } 1440 out_unpin: 1441 i915_gem_object_unpin_pages(obj); 1442 __i915_gem_object_put_pages(obj); 1443 out_put: 1444 i915_gem_object_put(obj); 1445 1446 if (err == -ENOMEM || err == -ENXIO) 1447 err = 0; 1448 1449 if (err) 1450 break; 1451 1452 cond_resched(); 1453 } 1454 1455 return err; 1456 } 1457 1458 static int igt_ppgtt_sanity_check(void *arg) 1459 { 1460 struct i915_gem_context *ctx = arg; 1461 struct drm_i915_private *i915 = ctx->i915; 1462 unsigned int supported = INTEL_INFO(i915)->page_sizes; 1463 struct { 1464 igt_create_fn fn; 1465 unsigned int flags; 1466 } backends[] = { 1467 { igt_create_system, 0, }, 1468 { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, 1469 }; 1470 struct { 1471 u32 size; 1472 u32 pages; 1473 } combos[] = { 1474 { SZ_64K, SZ_64K }, 1475 { SZ_2M, SZ_2M }, 1476 { SZ_2M, SZ_64K }, 1477 { SZ_2M - SZ_64K, SZ_64K }, 1478 { SZ_2M - SZ_4K, SZ_64K | SZ_4K }, 1479 { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, 1480 { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, 1481 { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, 1482 }; 1483 int i, j; 1484 int err; 1485 1486 if (supported == I915_GTT_PAGE_SIZE_4K) 1487 return 0; 1488 1489 /* 1490 * Sanity check that the HW behaves with a limited set of combinations. 1491 * We already have a bunch of randomised testing, which should give us 1492 * a decent amount of variation between runs, however we should keep 1493 * this to limit the chances of introducing a temporary regression, by 1494 * testing the most obvious cases that might make something blow up. 1495 */ 1496 1497 for (i = 0; i < ARRAY_SIZE(backends); ++i) { 1498 for (j = 0; j < ARRAY_SIZE(combos); ++j) { 1499 struct drm_i915_gem_object *obj; 1500 u32 size = combos[j].size; 1501 u32 pages = combos[j].pages; 1502 1503 obj = backends[i].fn(i915, size, backends[i].flags); 1504 if (IS_ERR(obj)) { 1505 err = PTR_ERR(obj); 1506 if (err == -ENODEV) { 1507 pr_info("Device lacks local memory, skipping\n"); 1508 err = 0; 1509 break; 1510 } 1511 1512 return err; 1513 } 1514 1515 err = i915_gem_object_pin_pages(obj); 1516 if (err) { 1517 i915_gem_object_put(obj); 1518 goto out; 1519 } 1520 1521 GEM_BUG_ON(pages > obj->base.size); 1522 pages = pages & supported; 1523 1524 if (pages) 1525 obj->mm.page_sizes.sg = pages; 1526 1527 err = igt_write_huge(ctx, obj); 1528 1529 i915_gem_object_unpin_pages(obj); 1530 __i915_gem_object_put_pages(obj); 1531 i915_gem_object_put(obj); 1532 1533 if (err) { 1534 pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n", 1535 __func__, size, pages, i, j); 1536 goto out; 1537 } 1538 } 1539 1540 cond_resched(); 1541 } 1542 1543 out: 1544 if (err == -ENOMEM) 1545 err = 0; 1546 1547 return err; 1548 } 1549 1550 static int igt_ppgtt_pin_update(void *arg) 1551 { 1552 struct i915_gem_context *ctx = arg; 1553 struct drm_i915_private *dev_priv = ctx->i915; 1554 unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; 1555 struct drm_i915_gem_object *obj; 1556 struct i915_gem_engines_iter it; 1557 struct i915_address_space *vm; 1558 struct intel_context *ce; 1559 struct i915_vma *vma; 1560 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 1561 unsigned int n; 1562 int first, last; 1563 int err = 0; 1564 1565 /* 1566 * Make sure there's no funny business when doing a PIN_UPDATE -- in the 1567 * past we had a subtle issue with being able to incorrectly do multiple 1568 * alloc va ranges on the same object when doing a PIN_UPDATE, which 1569 * resulted in some pretty nasty bugs, though only when using 1570 * huge-gtt-pages. 1571 */ 1572 1573 vm = i915_gem_context_get_vm_rcu(ctx); 1574 if (!i915_vm_is_4lvl(vm)) { 1575 pr_info("48b PPGTT not supported, skipping\n"); 1576 goto out_vm; 1577 } 1578 1579 first = ilog2(I915_GTT_PAGE_SIZE_64K); 1580 last = ilog2(I915_GTT_PAGE_SIZE_2M); 1581 1582 for_each_set_bit_from(first, &supported, last + 1) { 1583 unsigned int page_size = BIT(first); 1584 1585 obj = i915_gem_object_create_internal(dev_priv, page_size); 1586 if (IS_ERR(obj)) 1587 return PTR_ERR(obj); 1588 1589 vma = i915_vma_instance(obj, vm, NULL); 1590 if (IS_ERR(vma)) { 1591 err = PTR_ERR(vma); 1592 goto out_put; 1593 } 1594 1595 err = i915_vma_pin(vma, SZ_2M, 0, flags); 1596 if (err) 1597 goto out_close; 1598 1599 if (vma->page_sizes.sg < page_size) { 1600 pr_info("Unable to allocate page-size %x, finishing test early\n", 1601 page_size); 1602 goto out_unpin; 1603 } 1604 1605 err = igt_check_page_sizes(vma); 1606 if (err) 1607 goto out_unpin; 1608 1609 if (vma->page_sizes.gtt != page_size) { 1610 dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); 1611 1612 /* 1613 * The only valid reason for this to ever fail would be 1614 * if the dma-mapper screwed us over when we did the 1615 * dma_map_sg(), since it has the final say over the dma 1616 * address. 1617 */ 1618 if (IS_ALIGNED(addr, page_size)) { 1619 pr_err("page_sizes.gtt=%u, expected=%u\n", 1620 vma->page_sizes.gtt, page_size); 1621 err = -EINVAL; 1622 } else { 1623 pr_info("dma address misaligned, finishing test early\n"); 1624 } 1625 1626 goto out_unpin; 1627 } 1628 1629 err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); 1630 if (err) 1631 goto out_unpin; 1632 1633 i915_vma_unpin(vma); 1634 i915_vma_close(vma); 1635 1636 i915_gem_object_put(obj); 1637 } 1638 1639 obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); 1640 if (IS_ERR(obj)) 1641 return PTR_ERR(obj); 1642 1643 vma = i915_vma_instance(obj, vm, NULL); 1644 if (IS_ERR(vma)) { 1645 err = PTR_ERR(vma); 1646 goto out_put; 1647 } 1648 1649 err = i915_vma_pin(vma, 0, 0, flags); 1650 if (err) 1651 goto out_close; 1652 1653 /* 1654 * Make sure we don't end up with something like where the pde is still 1655 * pointing to the 2M page, and the pt we just filled-in is dangling -- 1656 * we can check this by writing to the first page where it would then 1657 * land in the now stale 2M page. 1658 */ 1659 1660 n = 0; 1661 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1662 if (!intel_engine_can_store_dword(ce->engine)) 1663 continue; 1664 1665 err = gpu_write(ce, vma, n++, 0xdeadbeaf); 1666 if (err) 1667 break; 1668 } 1669 i915_gem_context_unlock_engines(ctx); 1670 if (err) 1671 goto out_unpin; 1672 1673 while (n--) { 1674 err = cpu_check(obj, n, 0xdeadbeaf); 1675 if (err) 1676 goto out_unpin; 1677 } 1678 1679 out_unpin: 1680 i915_vma_unpin(vma); 1681 out_close: 1682 i915_vma_close(vma); 1683 out_put: 1684 i915_gem_object_put(obj); 1685 out_vm: 1686 i915_vm_put(vm); 1687 1688 return err; 1689 } 1690 1691 static int igt_tmpfs_fallback(void *arg) 1692 { 1693 struct i915_gem_context *ctx = arg; 1694 struct drm_i915_private *i915 = ctx->i915; 1695 struct vfsmount *gemfs = i915->mm.gemfs; 1696 struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); 1697 struct drm_i915_gem_object *obj; 1698 struct i915_vma *vma; 1699 u32 *vaddr; 1700 int err = 0; 1701 1702 /* 1703 * Make sure that we don't burst into a ball of flames upon falling back 1704 * to tmpfs, which we rely on if on the off-chance we encouter a failure 1705 * when setting up gemfs. 1706 */ 1707 1708 i915->mm.gemfs = NULL; 1709 1710 obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); 1711 if (IS_ERR(obj)) { 1712 err = PTR_ERR(obj); 1713 goto out_restore; 1714 } 1715 1716 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 1717 if (IS_ERR(vaddr)) { 1718 err = PTR_ERR(vaddr); 1719 goto out_put; 1720 } 1721 *vaddr = 0xdeadbeaf; 1722 1723 __i915_gem_object_flush_map(obj, 0, 64); 1724 i915_gem_object_unpin_map(obj); 1725 1726 vma = i915_vma_instance(obj, vm, NULL); 1727 if (IS_ERR(vma)) { 1728 err = PTR_ERR(vma); 1729 goto out_put; 1730 } 1731 1732 err = i915_vma_pin(vma, 0, 0, PIN_USER); 1733 if (err) 1734 goto out_close; 1735 1736 err = igt_check_page_sizes(vma); 1737 1738 i915_vma_unpin(vma); 1739 out_close: 1740 i915_vma_close(vma); 1741 out_put: 1742 i915_gem_object_put(obj); 1743 out_restore: 1744 i915->mm.gemfs = gemfs; 1745 1746 i915_vm_put(vm); 1747 return err; 1748 } 1749 1750 static int igt_shrink_thp(void *arg) 1751 { 1752 struct i915_gem_context *ctx = arg; 1753 struct drm_i915_private *i915 = ctx->i915; 1754 struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); 1755 struct drm_i915_gem_object *obj; 1756 struct i915_gem_engines_iter it; 1757 struct intel_context *ce; 1758 struct i915_vma *vma; 1759 unsigned int flags = PIN_USER; 1760 unsigned int n; 1761 int err = 0; 1762 1763 /* 1764 * Sanity check shrinking huge-paged object -- make sure nothing blows 1765 * up. 1766 */ 1767 1768 if (!igt_can_allocate_thp(i915)) { 1769 pr_info("missing THP support, skipping\n"); 1770 goto out_vm; 1771 } 1772 1773 obj = i915_gem_object_create_shmem(i915, SZ_2M); 1774 if (IS_ERR(obj)) { 1775 err = PTR_ERR(obj); 1776 goto out_vm; 1777 } 1778 1779 vma = i915_vma_instance(obj, vm, NULL); 1780 if (IS_ERR(vma)) { 1781 err = PTR_ERR(vma); 1782 goto out_put; 1783 } 1784 1785 err = i915_vma_pin(vma, 0, 0, flags); 1786 if (err) 1787 goto out_close; 1788 1789 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { 1790 pr_info("failed to allocate THP, finishing test early\n"); 1791 goto out_unpin; 1792 } 1793 1794 err = igt_check_page_sizes(vma); 1795 if (err) 1796 goto out_unpin; 1797 1798 n = 0; 1799 1800 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1801 if (!intel_engine_can_store_dword(ce->engine)) 1802 continue; 1803 1804 err = gpu_write(ce, vma, n++, 0xdeadbeaf); 1805 if (err) 1806 break; 1807 } 1808 i915_gem_context_unlock_engines(ctx); 1809 i915_vma_unpin(vma); 1810 if (err) 1811 goto out_close; 1812 1813 /* 1814 * Now that the pages are *unpinned* shrink-all should invoke 1815 * shmem to truncate our pages. 1816 */ 1817 i915_gem_shrink_all(i915); 1818 if (i915_gem_object_has_pages(obj)) { 1819 pr_err("shrink-all didn't truncate the pages\n"); 1820 err = -EINVAL; 1821 goto out_close; 1822 } 1823 1824 if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { 1825 pr_err("residual page-size bits left\n"); 1826 err = -EINVAL; 1827 goto out_close; 1828 } 1829 1830 err = i915_vma_pin(vma, 0, 0, flags); 1831 if (err) 1832 goto out_close; 1833 1834 while (n--) { 1835 err = cpu_check(obj, n, 0xdeadbeaf); 1836 if (err) 1837 break; 1838 } 1839 1840 out_unpin: 1841 i915_vma_unpin(vma); 1842 out_close: 1843 i915_vma_close(vma); 1844 out_put: 1845 i915_gem_object_put(obj); 1846 out_vm: 1847 i915_vm_put(vm); 1848 1849 return err; 1850 } 1851 1852 int i915_gem_huge_page_mock_selftests(void) 1853 { 1854 static const struct i915_subtest tests[] = { 1855 SUBTEST(igt_mock_exhaust_device_supported_pages), 1856 SUBTEST(igt_mock_memory_region_huge_pages), 1857 SUBTEST(igt_mock_ppgtt_misaligned_dma), 1858 SUBTEST(igt_mock_ppgtt_huge_fill), 1859 SUBTEST(igt_mock_ppgtt_64K), 1860 }; 1861 struct drm_i915_private *dev_priv; 1862 struct i915_ppgtt *ppgtt; 1863 int err; 1864 1865 dev_priv = mock_gem_device(); 1866 if (!dev_priv) 1867 return -ENOMEM; 1868 1869 /* Pretend to be a device which supports the 48b PPGTT */ 1870 mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; 1871 mkwrite_device_info(dev_priv)->ppgtt_size = 48; 1872 1873 ppgtt = i915_ppgtt_create(&dev_priv->gt); 1874 if (IS_ERR(ppgtt)) { 1875 err = PTR_ERR(ppgtt); 1876 goto out_unlock; 1877 } 1878 1879 if (!i915_vm_is_4lvl(&ppgtt->vm)) { 1880 pr_err("failed to create 48b PPGTT\n"); 1881 err = -EINVAL; 1882 goto out_close; 1883 } 1884 1885 /* If we were ever hit this then it's time to mock the 64K scratch */ 1886 if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { 1887 pr_err("PPGTT missing 64K scratch page\n"); 1888 err = -EINVAL; 1889 goto out_close; 1890 } 1891 1892 err = i915_subtests(tests, ppgtt); 1893 1894 out_close: 1895 i915_vm_put(&ppgtt->vm); 1896 1897 out_unlock: 1898 drm_dev_put(&dev_priv->drm); 1899 return err; 1900 } 1901 1902 int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) 1903 { 1904 static const struct i915_subtest tests[] = { 1905 SUBTEST(igt_shrink_thp), 1906 SUBTEST(igt_ppgtt_pin_update), 1907 SUBTEST(igt_tmpfs_fallback), 1908 SUBTEST(igt_ppgtt_exhaust_huge), 1909 SUBTEST(igt_ppgtt_smoke_huge), 1910 SUBTEST(igt_ppgtt_sanity_check), 1911 }; 1912 struct i915_gem_context *ctx; 1913 struct i915_address_space *vm; 1914 struct file *file; 1915 int err; 1916 1917 if (!HAS_PPGTT(i915)) { 1918 pr_info("PPGTT not supported, skipping live-selftests\n"); 1919 return 0; 1920 } 1921 1922 if (intel_gt_is_wedged(&i915->gt)) 1923 return 0; 1924 1925 file = mock_file(i915); 1926 if (IS_ERR(file)) 1927 return PTR_ERR(file); 1928 1929 ctx = live_context(i915, file); 1930 if (IS_ERR(ctx)) { 1931 err = PTR_ERR(ctx); 1932 goto out_file; 1933 } 1934 1935 mutex_lock(&ctx->mutex); 1936 vm = i915_gem_context_vm(ctx); 1937 if (vm) 1938 WRITE_ONCE(vm->scrub_64K, true); 1939 mutex_unlock(&ctx->mutex); 1940 1941 err = i915_subtests(tests, ctx); 1942 1943 out_file: 1944 fput(file); 1945 return err; 1946 } 1947