1 /* $NetBSD: intel_gtt.c,v 1.9 2021/12/19 12:10:42 riastradh Exp $ */ 2 3 // SPDX-License-Identifier: MIT 4 /* 5 * Copyright 2020 Intel Corporation 6 */ 7 8 #include <sys/cdefs.h> 9 __KERNEL_RCSID(0, "$NetBSD: intel_gtt.c,v 1.9 2021/12/19 12:10:42 riastradh Exp $"); 10 11 #include <linux/slab.h> /* fault-inject.h is not standalone! */ 12 13 #include <linux/fault-inject.h> 14 15 #include "i915_trace.h" 16 #include "intel_gt.h" 17 #include "intel_gtt.h" 18 19 #include <linux/nbsd-namespace.h> 20 21 #ifndef __NetBSD__ 22 void stash_init(struct pagestash *stash) 23 { 24 pagevec_init(&stash->pvec); 25 spin_lock_init(&stash->lock); 26 } 27 28 static struct page *stash_pop_page(struct pagestash *stash) 29 { 30 struct page *page = NULL; 31 32 spin_lock(&stash->lock); 33 if (likely(stash->pvec.nr)) 34 page = stash->pvec.pages[--stash->pvec.nr]; 35 spin_unlock(&stash->lock); 36 37 return page; 38 } 39 40 static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) 41 { 42 unsigned int nr; 43 44 spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); 45 46 nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); 47 memcpy(stash->pvec.pages + stash->pvec.nr, 48 pvec->pages + pvec->nr - nr, 49 sizeof(pvec->pages[0]) * nr); 50 stash->pvec.nr += nr; 51 52 spin_unlock(&stash->lock); 53 54 pvec->nr -= nr; 55 } 56 57 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) 58 { 59 struct pagevec stack; 60 struct page *page; 61 62 if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 63 i915_gem_shrink_all(vm->i915); 64 65 page = stash_pop_page(&vm->free_pages); 66 if (page) 67 return page; 68 69 if (!vm->pt_kmap_wc) 70 return alloc_page(gfp); 71 72 /* Look in our global stash of WC pages... */ 73 page = stash_pop_page(&vm->i915->mm.wc_stash); 74 if (page) 75 return page; 76 77 /* 78 * Otherwise batch allocate pages to amortize cost of set_pages_wc. 79 * 80 * We have to be careful as page allocation may trigger the shrinker 81 * (via direct reclaim) which will fill up the WC stash underneath us. 82 * So we add our WB pages into a temporary pvec on the stack and merge 83 * them into the WC stash after all the allocations are complete. 84 */ 85 pagevec_init(&stack); 86 do { 87 struct page *page; 88 89 page = alloc_page(gfp); 90 if (unlikely(!page)) 91 break; 92 93 stack.pages[stack.nr++] = page; 94 } while (pagevec_space(&stack)); 95 96 if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { 97 page = stack.pages[--stack.nr]; 98 99 /* Merge spare WC pages to the global stash */ 100 if (stack.nr) 101 stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); 102 103 /* Push any surplus WC pages onto the local VM stash */ 104 if (stack.nr) 105 stash_push_pagevec(&vm->free_pages, &stack); 106 } 107 108 /* Return unwanted leftovers */ 109 if (unlikely(stack.nr)) { 110 WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); 111 __pagevec_release(&stack); 112 } 113 114 return page; 115 } 116 117 static void vm_free_pages_release(struct i915_address_space *vm, 118 bool immediate) 119 { 120 struct pagevec *pvec = &vm->free_pages.pvec; 121 struct pagevec stack; 122 123 lockdep_assert_held(&vm->free_pages.lock); 124 GEM_BUG_ON(!pagevec_count(pvec)); 125 126 if (vm->pt_kmap_wc) { 127 /* 128 * When we use WC, first fill up the global stash and then 129 * only if full immediately free the overflow. 130 */ 131 stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); 132 133 /* 134 * As we have made some room in the VM's free_pages, 135 * we can wait for it to fill again. Unless we are 136 * inside i915_address_space_fini() and must 137 * immediately release the pages! 138 */ 139 if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) 140 return; 141 142 /* 143 * We have to drop the lock to allow ourselves to sleep, 144 * so take a copy of the pvec and clear the stash for 145 * others to use it as we sleep. 146 */ 147 stack = *pvec; 148 pagevec_reinit(pvec); 149 spin_unlock(&vm->free_pages.lock); 150 151 pvec = &stack; 152 set_pages_array_wb(pvec->pages, pvec->nr); 153 154 spin_lock(&vm->free_pages.lock); 155 } 156 157 __pagevec_release(pvec); 158 } 159 160 static void vm_free_page(struct i915_address_space *vm, struct page *page) 161 { 162 /* 163 * On !llc, we need to change the pages back to WB. We only do so 164 * in bulk, so we rarely need to change the page attributes here, 165 * but doing so requires a stop_machine() from deep inside arch/x86/mm. 166 * To make detection of the possible sleep more likely, use an 167 * unconditional might_sleep() for everybody. 168 */ 169 might_sleep(); 170 spin_lock(&vm->free_pages.lock); 171 while (!pagevec_space(&vm->free_pages.pvec)) 172 vm_free_pages_release(vm, false); 173 GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); 174 pagevec_add(&vm->free_pages.pvec, page); 175 spin_unlock(&vm->free_pages.lock); 176 } 177 #endif 178 179 void __i915_vm_close(struct i915_address_space *vm) 180 { 181 struct i915_vma *vma, *vn; 182 183 mutex_lock(&vm->mutex); 184 list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 185 struct drm_i915_gem_object *obj = vma->obj; 186 187 /* Keep the obj (and hence the vma) alive as _we_ destroy it */ 188 if (!kref_get_unless_zero(&obj->base.refcount)) 189 continue; 190 191 atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 192 WARN_ON(__i915_vma_unbind(vma)); 193 __i915_vma_put(vma); 194 195 i915_gem_object_put(obj); 196 } 197 GEM_BUG_ON(!list_empty(&vm->bound_list)); 198 mutex_unlock(&vm->mutex); 199 } 200 201 void i915_address_space_fini(struct i915_address_space *vm) 202 { 203 #ifndef __NetBSD__ 204 spin_lock(&vm->free_pages.lock); 205 if (pagevec_count(&vm->free_pages.pvec)) 206 vm_free_pages_release(vm, true); 207 GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); 208 spin_unlock(&vm->free_pages.lock); 209 #endif 210 211 drm_mm_takedown(&vm->mm); 212 213 mutex_destroy(&vm->mutex); 214 } 215 216 static void __i915_vm_release(struct work_struct *work) 217 { 218 struct i915_address_space *vm = 219 container_of(work, struct i915_address_space, rcu.work); 220 221 vm->cleanup(vm); 222 i915_address_space_fini(vm); 223 224 kfree(vm); 225 } 226 227 void i915_vm_release(struct kref *kref) 228 { 229 struct i915_address_space *vm = 230 container_of(kref, struct i915_address_space, ref); 231 232 GEM_BUG_ON(i915_is_ggtt(vm)); 233 trace_i915_ppgtt_release(vm); 234 235 queue_rcu_work(vm->i915->wq, &vm->rcu); 236 } 237 238 void i915_address_space_init(struct i915_address_space *vm, int subclass) 239 { 240 kref_init(&vm->ref); 241 INIT_RCU_WORK(&vm->rcu, __i915_vm_release); 242 atomic_set(&vm->open, 1); 243 244 /* 245 * The vm->mutex must be reclaim safe (for use in the shrinker). 246 * Do a dummy acquire now under fs_reclaim so that any allocation 247 * attempt holding the lock is immediately reported by lockdep. 248 */ 249 mutex_init(&vm->mutex); 250 lockdep_set_subclass(&vm->mutex, subclass); 251 i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); 252 253 GEM_BUG_ON(!vm->total); 254 drm_mm_init(&vm->mm, 0, vm->total); 255 vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 256 257 #ifdef __NetBSD__ 258 vm->dmat = vm->i915->drm.dmat; 259 #else 260 stash_init(&vm->free_pages); 261 #endif 262 263 INIT_LIST_HEAD(&vm->bound_list); 264 } 265 266 void clear_pages(struct i915_vma *vma) 267 { 268 GEM_BUG_ON(!vma->pages); 269 270 if (vma->pages != vma->obj->mm.pages) { 271 sg_free_table(vma->pages); 272 kfree(vma->pages); 273 } 274 vma->pages = NULL; 275 276 memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); 277 } 278 279 static int __setup_page_dma(struct i915_address_space *vm, 280 struct i915_page_dma *p, 281 gfp_t gfp) 282 { 283 #ifdef __NetBSD__ 284 int busdmaflags = 0; 285 int error; 286 int nseg = 1; 287 288 if (gfp & __GFP_WAIT) 289 busdmaflags |= BUS_DMA_WAITOK; 290 else 291 busdmaflags |= BUS_DMA_NOWAIT; 292 293 error = bus_dmamem_alloc(vm->dmat, PAGE_SIZE, PAGE_SIZE, 0, &p->seg, 294 nseg, &nseg, busdmaflags); 295 if (error) { 296 fail0: p->map = NULL; 297 return -error; /* XXX errno NetBSD->Linux */ 298 } 299 KASSERT(nseg == 1); 300 error = bus_dmamap_create(vm->dmat, PAGE_SIZE, 1, PAGE_SIZE, 0, 301 busdmaflags, &p->map); 302 if (error) { 303 fail1: bus_dmamem_free(vm->dmat, &p->seg, 1); 304 goto fail0; 305 } 306 error = bus_dmamap_load_raw(vm->dmat, p->map, &p->seg, 1, PAGE_SIZE, 307 busdmaflags); 308 if (error) { 309 fail2: __unused 310 bus_dmamap_destroy(vm->dmat, p->map); 311 goto fail1; 312 } 313 314 p->page = container_of(PHYS_TO_VM_PAGE(p->seg.ds_addr), struct page, 315 p_vmp); 316 317 if (gfp & __GFP_ZERO) { 318 void *va = kmap_atomic(p->page); 319 memset(va, 0, PAGE_SIZE); 320 kunmap_atomic(va); 321 } 322 #else 323 p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); 324 if (unlikely(!p->page)) 325 return -ENOMEM; 326 327 p->daddr = dma_map_page_attrs(vm->dma, 328 p->page, 0, PAGE_SIZE, 329 PCI_DMA_BIDIRECTIONAL, 330 DMA_ATTR_SKIP_CPU_SYNC | 331 DMA_ATTR_NO_WARN); 332 if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { 333 vm_free_page(vm, p->page); 334 return -ENOMEM; 335 } 336 #endif 337 338 return 0; 339 } 340 341 int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) 342 { 343 return __setup_page_dma(vm, p, __GFP_HIGHMEM); 344 } 345 346 void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) 347 { 348 #ifdef __NetBSD__ 349 bus_dmamap_unload(vm->dmat, p->map); 350 bus_dmamap_destroy(vm->dmat, p->map); 351 bus_dmamem_free(vm->dmat, &p->seg, 1); 352 #else 353 dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 354 vm_free_page(vm, p->page); 355 #endif 356 } 357 358 void 359 fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) 360 { 361 kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); 362 } 363 364 int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) 365 { 366 unsigned long size; 367 368 /* 369 * In order to utilize 64K pages for an object with a size < 2M, we will 370 * need to support a 64K scratch page, given that every 16th entry for a 371 * page-table operating in 64K mode must point to a properly aligned 64K 372 * region, including any PTEs which happen to point to scratch. 373 * 374 * This is only relevant for the 48b PPGTT where we support 375 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the 376 * scratch (read-only) between all vm, we create one 64k scratch page 377 * for all. 378 */ 379 size = I915_GTT_PAGE_SIZE_4K; 380 if (i915_vm_is_4lvl(vm) && 381 HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { 382 size = I915_GTT_PAGE_SIZE_64K; 383 gfp |= __GFP_NOWARN; 384 } 385 gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; 386 387 do { 388 unsigned int order = get_order(size); 389 #ifdef __NetBSD__ 390 struct vm_page *vm_page; 391 void *kva; 392 int nseg; 393 int ret; 394 395 /* Allocate a scratch page. */ 396 /* XXX errno NetBSD->Linux */ 397 ret = -bus_dmamem_alloc(vm->dmat, size, size, 0, 398 &vm->scratch[0].base.seg, 1, &nseg, BUS_DMA_NOWAIT); 399 if (ret) 400 goto skip; 401 KASSERT(nseg == 1); 402 KASSERT(vm->scratch[0].base.seg.ds_len == size); 403 404 /* Create a DMA map. */ 405 ret = -bus_dmamap_create(vm->dmat, size, 1, size, 0, 406 BUS_DMA_NOWAIT, &vm->scratch[0].base.map); 407 if (ret) 408 goto free_dmamem; 409 410 /* Load the segment into the DMA map. */ 411 ret = -bus_dmamap_load_raw(vm->dmat, vm->scratch[0].base.map, 412 &vm->scratch[0].base.seg, 1, size, BUS_DMA_NOWAIT); 413 if (ret) 414 goto destroy_dmamap; 415 KASSERT(vm->scratch[0].base.map->dm_nsegs == 1); 416 KASSERT(vm->scratch[0].base.map->dm_segs[0].ds_len == size); 417 418 /* Zero the page. */ 419 ret = -bus_dmamem_map(vm->dmat, &vm->scratch[0].base.seg, 1, 420 size, &kva, BUS_DMA_NOWAIT|BUS_DMA_NOCACHE); 421 if (ret) 422 goto unload_dmamap; 423 memset(kva, 0, size); 424 bus_dmamap_sync(vm->dmat, vm->scratch[0].base.map, 0, size, 425 BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); 426 bus_dmamem_unmap(vm->dmat, kva, size); 427 428 /* XXX Is this page guaranteed to work as a huge page? */ 429 vm_page = PHYS_TO_VM_PAGE(vm->scratch[0].base.seg.ds_addr); 430 vm->scratch[0].base.page = container_of(vm_page, struct page, 431 p_vmp); 432 #else 433 struct page *page; 434 dma_addr_t addr; 435 436 page = alloc_pages(gfp, order); 437 if (unlikely(!page)) 438 goto skip; 439 440 addr = dma_map_page_attrs(vm->dma, 441 page, 0, size, 442 PCI_DMA_BIDIRECTIONAL, 443 DMA_ATTR_SKIP_CPU_SYNC | 444 DMA_ATTR_NO_WARN); 445 if (unlikely(dma_mapping_error(vm->dma, addr))) 446 goto free_page; 447 448 if (unlikely(!IS_ALIGNED(addr, size))) 449 goto unmap_page; 450 451 vm->scratch[0].base.page = page; 452 vm->scratch[0].base.daddr = addr; 453 #endif 454 vm->scratch_order = order; 455 return 0; 456 457 #ifdef __NetBSD__ 458 unload_dmamap: bus_dmamap_unload(vm->dmat, vm->scratch[0].base.map); 459 destroy_dmamap: bus_dmamap_destroy(vm->dmat, vm->scratch[0].base.map); 460 vm->scratch[0].base.map = NULL; /* paranoia */ 461 free_dmamem: bus_dmamem_free(vm->dmat, &vm->scratch[0].base.seg, 1); 462 #else 463 unmap_page: 464 dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); 465 free_page: 466 __free_pages(page, order); 467 #endif 468 skip: 469 if (size == I915_GTT_PAGE_SIZE_4K) 470 return -ENOMEM; 471 472 size = I915_GTT_PAGE_SIZE_4K; 473 gfp &= ~__GFP_NOWARN; 474 } while (1); 475 } 476 477 void cleanup_scratch_page(struct i915_address_space *vm) 478 { 479 struct i915_page_dma *p = px_base(&vm->scratch[0]); 480 #ifdef __NetBSD__ 481 bus_dmamap_unload(vm->dmat, p->map); 482 bus_dmamap_destroy(vm->dmat, p->map); 483 vm->scratch[0].base.map = NULL; /* paranoia */ 484 bus_dmamem_free(vm->dmat, &p->seg, 1); 485 #else 486 unsigned int order = vm->scratch_order; 487 488 dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, 489 PCI_DMA_BIDIRECTIONAL); 490 __free_pages(p->page, order); 491 #endif 492 } 493 494 void free_scratch(struct i915_address_space *vm) 495 { 496 int i; 497 498 if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ 499 return; 500 501 for (i = 1; i <= vm->top; i++) { 502 if (!px_dma(&vm->scratch[i])) 503 break; 504 cleanup_page_dma(vm, px_base(&vm->scratch[i])); 505 } 506 507 cleanup_scratch_page(vm); 508 } 509 510 void gtt_write_workarounds(struct intel_gt *gt) 511 { 512 struct drm_i915_private *i915 = gt->i915; 513 struct intel_uncore *uncore = gt->uncore; 514 515 /* 516 * This function is for gtt related workarounds. This function is 517 * called on driver load and after a GPU reset, so you can place 518 * workarounds here even if they get overwritten by GPU reset. 519 */ 520 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 521 if (IS_BROADWELL(i915)) 522 intel_uncore_write(uncore, 523 GEN8_L3_LRA_1_GPGPU, 524 GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 525 else if (IS_CHERRYVIEW(i915)) 526 intel_uncore_write(uncore, 527 GEN8_L3_LRA_1_GPGPU, 528 GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 529 else if (IS_GEN9_LP(i915)) 530 intel_uncore_write(uncore, 531 GEN8_L3_LRA_1_GPGPU, 532 GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 533 else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) 534 intel_uncore_write(uncore, 535 GEN8_L3_LRA_1_GPGPU, 536 GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 537 538 /* 539 * To support 64K PTEs we need to first enable the use of the 540 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 541 * mmio, otherwise the page-walker will simply ignore the IPS bit. This 542 * shouldn't be needed after GEN10. 543 * 544 * 64K pages were first introduced from BDW+, although technically they 545 * only *work* from gen9+. For pre-BDW we instead have the option for 546 * 32K pages, but we don't currently have any support for it in our 547 * driver. 548 */ 549 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && 550 INTEL_GEN(i915) <= 10) 551 intel_uncore_rmw(uncore, 552 GEN8_GAMW_ECO_DEV_RW_IA, 553 0, 554 GAMW_ECO_ENABLE_64K_IPS_FIELD); 555 556 if (IS_GEN_RANGE(i915, 8, 11)) { 557 bool can_use_gtt_cache = true; 558 559 /* 560 * According to the BSpec if we use 2M/1G pages then we also 561 * need to disable the GTT cache. At least on BDW we can see 562 * visual corruption when using 2M pages, and not disabling the 563 * GTT cache. 564 */ 565 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) 566 can_use_gtt_cache = false; 567 568 /* WaGttCachingOffByDefault */ 569 intel_uncore_write(uncore, 570 HSW_GTT_CACHE_EN, 571 can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 572 WARN_ON_ONCE(can_use_gtt_cache && 573 intel_uncore_read(uncore, 574 HSW_GTT_CACHE_EN) == 0); 575 } 576 } 577 578 u64 gen8_pte_encode(dma_addr_t addr, 579 enum i915_cache_level level, 580 u32 flags) 581 { 582 gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; 583 584 if (unlikely(flags & PTE_READ_ONLY)) 585 pte &= ~_PAGE_RW; 586 587 switch (level) { 588 case I915_CACHE_NONE: 589 pte |= PPAT_UNCACHED; 590 break; 591 case I915_CACHE_WT: 592 pte |= PPAT_DISPLAY_ELLC; 593 break; 594 default: 595 pte |= PPAT_CACHED; 596 break; 597 } 598 599 return pte; 600 } 601 602 static void tgl_setup_private_ppat(struct intel_uncore *uncore) 603 { 604 /* TGL doesn't support LLC or AGE settings */ 605 intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); 606 intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); 607 intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); 608 intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); 609 intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); 610 intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); 611 intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); 612 intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); 613 } 614 615 static void cnl_setup_private_ppat(struct intel_uncore *uncore) 616 { 617 intel_uncore_write(uncore, 618 GEN10_PAT_INDEX(0), 619 GEN8_PPAT_WB | GEN8_PPAT_LLC); 620 intel_uncore_write(uncore, 621 GEN10_PAT_INDEX(1), 622 GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 623 intel_uncore_write(uncore, 624 GEN10_PAT_INDEX(2), 625 GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 626 intel_uncore_write(uncore, 627 GEN10_PAT_INDEX(3), 628 GEN8_PPAT_UC); 629 intel_uncore_write(uncore, 630 GEN10_PAT_INDEX(4), 631 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 632 intel_uncore_write(uncore, 633 GEN10_PAT_INDEX(5), 634 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 635 intel_uncore_write(uncore, 636 GEN10_PAT_INDEX(6), 637 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 638 intel_uncore_write(uncore, 639 GEN10_PAT_INDEX(7), 640 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 641 } 642 643 /* 644 * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 645 * bits. When using advanced contexts each context stores its own PAT, but 646 * writing this data shouldn't be harmful even in those cases. 647 */ 648 static void bdw_setup_private_ppat(struct intel_uncore *uncore) 649 { 650 u64 pat; 651 652 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 653 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 654 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 655 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 656 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 657 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 658 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 659 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 660 661 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 662 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 663 } 664 665 static void chv_setup_private_ppat(struct intel_uncore *uncore) 666 { 667 u64 pat; 668 669 /* 670 * Map WB on BDW to snooped on CHV. 671 * 672 * Only the snoop bit has meaning for CHV, the rest is 673 * ignored. 674 * 675 * The hardware will never snoop for certain types of accesses: 676 * - CPU GTT (GMADR->GGTT->no snoop->memory) 677 * - PPGTT page tables 678 * - some other special cycles 679 * 680 * As with BDW, we also need to consider the following for GT accesses: 681 * "For GGTT, there is NO pat_sel[2:0] from the entry, 682 * so RTL will always use the value corresponding to 683 * pat_sel = 000". 684 * Which means we must set the snoop bit in PAT entry 0 685 * in order to keep the global status page working. 686 */ 687 688 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 689 GEN8_PPAT(1, 0) | 690 GEN8_PPAT(2, 0) | 691 GEN8_PPAT(3, 0) | 692 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 693 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 694 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 695 GEN8_PPAT(7, CHV_PPAT_SNOOP); 696 697 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 698 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 699 } 700 701 void setup_private_pat(struct intel_uncore *uncore) 702 { 703 struct drm_i915_private *i915 = uncore->i915; 704 705 GEM_BUG_ON(INTEL_GEN(i915) < 8); 706 707 if (INTEL_GEN(i915) >= 12) 708 tgl_setup_private_ppat(uncore); 709 else if (INTEL_GEN(i915) >= 10) 710 cnl_setup_private_ppat(uncore); 711 else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) 712 chv_setup_private_ppat(uncore); 713 else 714 bdw_setup_private_ppat(uncore); 715 } 716 717 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 718 #include "selftests/mock_gtt.c" 719 #endif 720