1 /* $NetBSD: gen6_ppgtt.c,v 1.8 2021/12/19 12:27:32 riastradh Exp $ */ 2 3 // SPDX-License-Identifier: MIT 4 /* 5 * Copyright 2020 Intel Corporation 6 */ 7 8 #include <sys/cdefs.h> 9 __KERNEL_RCSID(0, "$NetBSD: gen6_ppgtt.c,v 1.8 2021/12/19 12:27:32 riastradh Exp $"); 10 11 #include <linux/log2.h> 12 13 #include "gen6_ppgtt.h" 14 #include "i915_scatterlist.h" 15 #include "i915_trace.h" 16 #include "i915_vgpu.h" 17 #include "intel_gt.h" 18 #include <linux/nbsd-namespace.h> 19 20 /* Write pde (index) from the page directory @pd to the page table @pt */ 21 static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, 22 const unsigned int pde, 23 const struct i915_page_table *pt) 24 { 25 /* Caller needs to make sure the write completes if necessary */ 26 #ifdef __NetBSD__ 27 CTASSERT(sizeof(gen6_pte_t) == 4); 28 bus_space_write_4(ppgtt->pd_bst, ppgtt->pd_bsh, pde*sizeof(gen6_pte_t), 29 GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID); 30 #else 31 iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, 32 ppgtt->pd_addr + pde); 33 #endif 34 } 35 36 void gen7_ppgtt_enable(struct intel_gt *gt) 37 { 38 struct drm_i915_private *i915 = gt->i915; 39 struct intel_uncore *uncore = gt->uncore; 40 struct intel_engine_cs *engine; 41 enum intel_engine_id id; 42 u32 ecochk; 43 44 intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); 45 46 ecochk = intel_uncore_read(uncore, GAM_ECOCHK); 47 if (IS_HASWELL(i915)) { 48 ecochk |= ECOCHK_PPGTT_WB_HSW; 49 } else { 50 ecochk |= ECOCHK_PPGTT_LLC_IVB; 51 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 52 } 53 intel_uncore_write(uncore, GAM_ECOCHK, ecochk); 54 55 for_each_engine(engine, gt, id) { 56 /* GFX_MODE is per-ring on gen7+ */ 57 ENGINE_WRITE(engine, 58 RING_MODE_GEN7, 59 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 60 } 61 } 62 63 void gen6_ppgtt_enable(struct intel_gt *gt) 64 { 65 struct intel_uncore *uncore = gt->uncore; 66 67 intel_uncore_rmw(uncore, 68 GAC_ECO_BITS, 69 0, 70 ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); 71 72 intel_uncore_rmw(uncore, 73 GAB_CTL, 74 0, 75 GAB_CTL_CONT_AFTER_PAGEFAULT); 76 77 intel_uncore_rmw(uncore, 78 GAM_ECOCHK, 79 0, 80 ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 81 82 if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ 83 intel_uncore_write(uncore, 84 GFX_MODE, 85 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 86 } 87 88 /* PPGTT support for Sandybdrige/Gen6 and later */ 89 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 90 u64 start, u64 length) 91 { 92 struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); 93 const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 94 const gen6_pte_t scratch_pte = vm->scratch[0].encode; 95 unsigned int pde = first_entry / GEN6_PTES; 96 unsigned int pte = first_entry % GEN6_PTES; 97 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 98 99 while (num_entries) { 100 struct i915_page_table * const pt = 101 i915_pt_entry(ppgtt->base.pd, pde++); 102 const unsigned int count = min(num_entries, GEN6_PTES - pte); 103 gen6_pte_t *vaddr; 104 105 GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); 106 107 num_entries -= count; 108 109 GEM_BUG_ON(count > atomic_read(&pt->used)); 110 if (!atomic_sub_return(count, &pt->used)) 111 ppgtt->scan_for_unused_pt = true; 112 113 /* 114 * Note that the hw doesn't support removing PDE on the fly 115 * (they are cached inside the context with no means to 116 * invalidate the cache), so we can only reset the PTE 117 * entries back to scratch. 118 */ 119 120 vaddr = kmap_atomic_px(pt); 121 memset32(vaddr + pte, scratch_pte, count); 122 kunmap_atomic(vaddr); 123 124 pte = 0; 125 } 126 } 127 128 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 129 struct i915_vma *vma, 130 enum i915_cache_level cache_level, 131 u32 flags) 132 { 133 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 134 struct i915_page_directory * const pd = ppgtt->pd; 135 unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE; 136 unsigned int act_pt = first_entry / GEN6_PTES; 137 unsigned int act_pte = first_entry % GEN6_PTES; 138 const u32 pte_encode = vm->pte_encode(0, cache_level, flags); 139 struct sgt_dma iter = sgt_dma(vma); 140 gen6_pte_t *vaddr; 141 142 GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); 143 144 vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); 145 do { 146 #ifdef __NetBSD__ 147 KASSERT(iter.seg < iter.map->dm_nsegs); 148 KASSERT((iter.off & (PAGE_SIZE - 1)) == 0); 149 const bus_dma_segment_t *seg = &iter.map->dm_segs[iter.seg]; 150 KASSERT((seg->ds_addr & (PAGE_SIZE - 1)) == 0); 151 KASSERT((seg->ds_len & (PAGE_SIZE - 1)) == 0); 152 KASSERT(iter.off <= seg->ds_len - PAGE_SIZE); 153 vaddr[act_pte] = pte_encode | 154 GEN6_PTE_ADDR_ENCODE(seg->ds_addr + iter.off); 155 iter.off += PAGE_SIZE; 156 if (iter.off >= seg->ds_len) { 157 GEM_BUG_ON(iter.off > seg->ds_len); 158 iter.off = 0; 159 if (++iter.seg >= iter.map->dm_nsegs) { 160 GEM_BUG_ON(iter.seg > iter.map->dm_nsegs); 161 break; 162 } 163 } 164 #else 165 GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE); 166 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); 167 168 iter.dma += I915_GTT_PAGE_SIZE; 169 if (iter.dma == iter.max) { 170 iter.sg = __sg_next(iter.sg); 171 if (!iter.sg) 172 break; 173 174 iter.dma = sg_dma_address(iter.sg); 175 iter.max = iter.dma + iter.sg->length; 176 } 177 #endif 178 179 if (++act_pte == GEN6_PTES) { 180 kunmap_atomic(vaddr); 181 vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); 182 act_pte = 0; 183 } 184 } while (1); 185 kunmap_atomic(vaddr); 186 187 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 188 } 189 190 static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) 191 { 192 struct i915_page_directory * const pd = ppgtt->base.pd; 193 struct i915_page_table *pt; 194 unsigned int pde; 195 196 start = round_down(start, SZ_64K); 197 end = round_up(end, SZ_64K) - start; 198 199 mutex_lock(&ppgtt->flush); 200 201 gen6_for_each_pde(pt, pd, start, end, pde) 202 gen6_write_pde(ppgtt, pde, pt); 203 204 mb(); 205 #ifdef __NetBSD__ 206 (void)bus_space_read_4(ppgtt->pd_bst, ppgtt->pd_bsh, 4*(pde - 1)); 207 #else 208 ioread32(ppgtt->pd_addr + pde - 1); 209 #endif 210 gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt); 211 mb(); 212 213 mutex_unlock(&ppgtt->flush); 214 } 215 216 static int gen6_alloc_va_range(struct i915_address_space *vm, 217 u64 start, u64 length) 218 { 219 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); 220 struct i915_page_directory * const pd = ppgtt->base.pd; 221 struct i915_page_table *pt, *alloc = NULL; 222 intel_wakeref_t wakeref; 223 u64 from = start; 224 unsigned int pde; 225 int ret = 0; 226 227 wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); 228 229 spin_lock(&pd->lock); 230 gen6_for_each_pde(pt, pd, start, length, pde) { 231 const unsigned int count = gen6_pte_count(start, length); 232 233 if (px_base(pt) == px_base(&vm->scratch[1])) { 234 spin_unlock(&pd->lock); 235 236 pt = fetch_and_zero(&alloc); 237 if (!pt) 238 pt = alloc_pt(vm); 239 if (IS_ERR(pt)) { 240 ret = PTR_ERR(pt); 241 goto unwind_out; 242 } 243 244 fill32_px(pt, vm->scratch[0].encode); 245 246 spin_lock(&pd->lock); 247 if (pd->entry[pde] == &vm->scratch[1]) { 248 pd->entry[pde] = pt; 249 } else { 250 alloc = pt; 251 pt = pd->entry[pde]; 252 } 253 } 254 255 atomic_add(count, &pt->used); 256 } 257 spin_unlock(&pd->lock); 258 259 if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) 260 gen6_flush_pd(ppgtt, from, start); 261 262 goto out; 263 264 unwind_out: 265 gen6_ppgtt_clear_range(vm, from, start - from); 266 out: 267 if (alloc) 268 free_px(vm, alloc); 269 intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); 270 return ret; 271 } 272 273 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) 274 { 275 struct i915_address_space * const vm = &ppgtt->base.vm; 276 struct i915_page_directory * const pd = ppgtt->base.pd; 277 int ret; 278 279 ret = setup_scratch_page(vm, __GFP_HIGHMEM); 280 if (ret) 281 return ret; 282 283 vm->scratch[0].encode = 284 vm->pte_encode(px_dma(&vm->scratch[0]), 285 I915_CACHE_NONE, PTE_READ_ONLY); 286 287 if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { 288 cleanup_scratch_page(vm); 289 return -ENOMEM; 290 } 291 292 fill32_px(&vm->scratch[1], vm->scratch[0].encode); 293 memset_p(pd->entry, &vm->scratch[1], I915_PDES); 294 295 return 0; 296 } 297 298 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) 299 { 300 struct i915_page_directory * const pd = ppgtt->base.pd; 301 struct i915_page_dma * const scratch = 302 px_base(&ppgtt->base.vm.scratch[1]); 303 struct i915_page_table *pt; 304 u32 pde; 305 306 gen6_for_all_pdes(pt, pd, pde) 307 if (px_base(pt) != scratch) 308 free_px(&ppgtt->base.vm, pt); 309 } 310 311 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 312 { 313 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); 314 315 __i915_vma_put(ppgtt->vma); 316 317 gen6_ppgtt_free_pd(ppgtt); 318 free_scratch(vm); 319 320 mutex_destroy(&ppgtt->flush); 321 mutex_destroy(&ppgtt->pin_mutex); 322 spin_lock_destroy(&ppgtt->base.pd->lock); 323 kfree(ppgtt->base.pd); 324 } 325 326 static int pd_vma_set_pages(struct i915_vma *vma) 327 { 328 vma->pages = ERR_PTR(-ENODEV); 329 return 0; 330 } 331 332 static void pd_vma_clear_pages(struct i915_vma *vma) 333 { 334 GEM_BUG_ON(!vma->pages); 335 336 vma->pages = NULL; 337 } 338 339 static int pd_vma_bind(struct i915_vma *vma, 340 enum i915_cache_level cache_level, 341 u32 unused) 342 { 343 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); 344 struct gen6_ppgtt *ppgtt = vma->private; 345 u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; 346 347 px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); 348 #ifdef __NetBSD__ 349 { 350 bus_size_t npgs = vma->size >> PAGE_SHIFT; 351 bus_size_t gtt_nbytes = npgs * sizeof(gen6_pte_t); 352 bus_size_t ggtt_offset_bytes = 353 (bus_size_t)ggtt_offset * sizeof(gen6_pte_t); 354 int ret; 355 356 KASSERTMSG(gtt_nbytes <= ggtt->gsmsz - ggtt_offset_bytes, 357 "oversize ppgtt size 0x%"PRIx64" bytes 0x%"PRIx64" pgs," 358 " requiring 0x%"PRIx64" bytes of ptes at 0x%"PRIx64";" 359 " gsm has 0x%"PRIx64" bytes total" 360 " with only 0x%"PRIx64" for ptes", 361 (uint64_t)vma->size, (uint64_t)npgs, 362 (uint64_t)gtt_nbytes, (uint64_t)ggtt_offset_bytes, 363 (uint64_t)ggtt->gsmsz, 364 (uint64_t)(ggtt->gsmsz - ggtt_offset_bytes)); 365 ret = -bus_space_subregion(ggtt->gsmt, ggtt->gsmh, ggtt_offset_bytes, 366 gtt_nbytes, &ppgtt->pd_bsh); 367 if (ret) { 368 DRM_ERROR("Unable to subregion the GGTT: %d\n", ret); 369 return ret; 370 } 371 ppgtt->pd_bst = ggtt->gsmt; 372 } 373 #else 374 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; 375 #endif 376 377 gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); 378 return 0; 379 } 380 381 static void pd_vma_unbind(struct i915_vma *vma) 382 { 383 struct gen6_ppgtt *ppgtt = vma->private; 384 struct i915_page_directory * const pd = ppgtt->base.pd; 385 struct i915_page_dma * const scratch = 386 px_base(&ppgtt->base.vm.scratch[1]); 387 struct i915_page_table *pt; 388 unsigned int pde; 389 390 if (!ppgtt->scan_for_unused_pt) 391 return; 392 393 /* Free all no longer used page tables */ 394 gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { 395 if (px_base(pt) == scratch || atomic_read(&pt->used)) 396 continue; 397 398 free_px(&ppgtt->base.vm, pt); 399 pd->entry[pde] = scratch; 400 } 401 402 ppgtt->scan_for_unused_pt = false; 403 } 404 405 static const struct i915_vma_ops pd_vma_ops = { 406 .set_pages = pd_vma_set_pages, 407 .clear_pages = pd_vma_clear_pages, 408 .bind_vma = pd_vma_bind, 409 .unbind_vma = pd_vma_unbind, 410 }; 411 412 static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) 413 { 414 struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; 415 struct i915_vma *vma; 416 417 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 418 GEM_BUG_ON(size > ggtt->vm.total); 419 420 vma = i915_vma_alloc(); 421 if (!vma) 422 return ERR_PTR(-ENOMEM); 423 424 i915_active_init(&vma->active, NULL, NULL); 425 426 kref_init(&vma->ref); 427 mutex_init(&vma->pages_mutex); 428 vma->vm = i915_vm_get(&ggtt->vm); 429 vma->ops = &pd_vma_ops; 430 vma->private = ppgtt; 431 432 vma->size = size; 433 vma->fence_size = size; 434 atomic_set(&vma->flags, I915_VMA_GGTT); 435 vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ 436 437 INIT_LIST_HEAD(&vma->obj_link); 438 INIT_LIST_HEAD(&vma->closed_link); 439 440 return vma; 441 } 442 443 int gen6_ppgtt_pin(struct i915_ppgtt *base) 444 { 445 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); 446 int err; 447 448 GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); 449 450 /* 451 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt 452 * which will be pinned into every active context. 453 * (When vma->pin_count becomes atomic, I expect we will naturally 454 * need a larger, unpacked, type and kill this redundancy.) 455 */ 456 if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) 457 return 0; 458 459 if (mutex_lock_interruptible(&ppgtt->pin_mutex)) 460 return -EINTR; 461 462 /* 463 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The 464 * allocator works in address space sizes, so it's multiplied by page 465 * size. We allocate at the top of the GTT to avoid fragmentation. 466 */ 467 err = 0; 468 if (!atomic_read(&ppgtt->pin_count)) 469 err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); 470 if (!err) 471 atomic_inc(&ppgtt->pin_count); 472 mutex_unlock(&ppgtt->pin_mutex); 473 474 return err; 475 } 476 477 void gen6_ppgtt_unpin(struct i915_ppgtt *base) 478 { 479 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); 480 481 GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); 482 if (atomic_dec_and_test(&ppgtt->pin_count)) 483 i915_vma_unpin(ppgtt->vma); 484 } 485 486 void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) 487 { 488 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); 489 490 if (!atomic_read(&ppgtt->pin_count)) 491 return; 492 493 i915_vma_unpin(ppgtt->vma); 494 atomic_set(&ppgtt->pin_count, 0); 495 } 496 497 struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) 498 { 499 struct i915_ggtt * const ggtt = gt->ggtt; 500 struct gen6_ppgtt *ppgtt; 501 int err; 502 503 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 504 if (!ppgtt) 505 return ERR_PTR(-ENOMEM); 506 507 mutex_init(&ppgtt->flush); 508 mutex_init(&ppgtt->pin_mutex); 509 510 ppgtt_init(&ppgtt->base, gt); 511 ppgtt->base.vm.top = 1; 512 513 ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; 514 ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; 515 ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; 516 ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; 517 ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; 518 519 ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; 520 521 ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); 522 if (!ppgtt->base.pd) { 523 err = -ENOMEM; 524 goto err_free; 525 } 526 527 err = gen6_ppgtt_init_scratch(ppgtt); 528 if (err) 529 goto err_pd; 530 531 ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); 532 if (IS_ERR(ppgtt->vma)) { 533 err = PTR_ERR(ppgtt->vma); 534 goto err_scratch; 535 } 536 537 return &ppgtt->base; 538 539 err_scratch: 540 free_scratch(&ppgtt->base.vm); 541 err_pd: 542 spin_lock_destroy(&ppgtt->base.pd->lock); 543 kfree(ppgtt->base.pd); 544 err_free: 545 mutex_destroy(&ppgtt->pin_mutex); 546 kfree(ppgtt); 547 return ERR_PTR(err); 548 } 549