1 1.9 riastrad /* $NetBSD: intel_gtt.c,v 1.9 2021/12/19 12:10:42 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad // SPDX-License-Identifier: MIT 4 1.1 riastrad /* 5 1.1 riastrad * Copyright 2020 Intel Corporation 6 1.1 riastrad */ 7 1.1 riastrad 8 1.1 riastrad #include <sys/cdefs.h> 9 1.9 riastrad __KERNEL_RCSID(0, "$NetBSD: intel_gtt.c,v 1.9 2021/12/19 12:10:42 riastradh Exp $"); 10 1.1 riastrad 11 1.1 riastrad #include <linux/slab.h> /* fault-inject.h is not standalone! */ 12 1.1 riastrad 13 1.1 riastrad #include <linux/fault-inject.h> 14 1.1 riastrad 15 1.1 riastrad #include "i915_trace.h" 16 1.1 riastrad #include "intel_gt.h" 17 1.1 riastrad #include "intel_gtt.h" 18 1.1 riastrad 19 1.7 riastrad #include <linux/nbsd-namespace.h> 20 1.7 riastrad 21 1.3 riastrad #ifndef __NetBSD__ 22 1.1 riastrad void stash_init(struct pagestash *stash) 23 1.1 riastrad { 24 1.1 riastrad pagevec_init(&stash->pvec); 25 1.1 riastrad spin_lock_init(&stash->lock); 26 1.1 riastrad } 27 1.1 riastrad 28 1.1 riastrad static struct page *stash_pop_page(struct pagestash *stash) 29 1.1 riastrad { 30 1.1 riastrad struct page *page = NULL; 31 1.1 riastrad 32 1.1 riastrad spin_lock(&stash->lock); 33 1.1 riastrad if (likely(stash->pvec.nr)) 34 1.1 riastrad page = stash->pvec.pages[--stash->pvec.nr]; 35 1.1 riastrad spin_unlock(&stash->lock); 36 1.1 riastrad 37 1.1 riastrad return page; 38 1.1 riastrad } 39 1.1 riastrad 40 1.1 riastrad static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) 41 1.1 riastrad { 42 1.1 riastrad unsigned int nr; 43 1.1 riastrad 44 1.1 riastrad spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); 45 1.1 riastrad 46 1.1 riastrad nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); 47 1.1 riastrad memcpy(stash->pvec.pages + stash->pvec.nr, 48 1.1 riastrad pvec->pages + pvec->nr - nr, 49 1.1 riastrad sizeof(pvec->pages[0]) * nr); 50 1.1 riastrad stash->pvec.nr += nr; 51 1.1 riastrad 52 1.1 riastrad spin_unlock(&stash->lock); 53 1.1 riastrad 54 1.1 riastrad pvec->nr -= nr; 55 1.1 riastrad } 56 1.1 riastrad 57 1.1 riastrad static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) 58 1.1 riastrad { 59 1.1 riastrad struct pagevec stack; 60 1.1 riastrad struct page *page; 61 1.1 riastrad 62 1.1 riastrad if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 63 1.1 riastrad i915_gem_shrink_all(vm->i915); 64 1.1 riastrad 65 1.1 riastrad page = stash_pop_page(&vm->free_pages); 66 1.1 riastrad if (page) 67 1.1 riastrad return page; 68 1.1 riastrad 69 1.1 riastrad if (!vm->pt_kmap_wc) 70 1.1 riastrad return alloc_page(gfp); 71 1.1 riastrad 72 1.1 riastrad /* Look in our global stash of WC pages... */ 73 1.1 riastrad page = stash_pop_page(&vm->i915->mm.wc_stash); 74 1.1 riastrad if (page) 75 1.1 riastrad return page; 76 1.1 riastrad 77 1.1 riastrad /* 78 1.1 riastrad * Otherwise batch allocate pages to amortize cost of set_pages_wc. 79 1.1 riastrad * 80 1.1 riastrad * We have to be careful as page allocation may trigger the shrinker 81 1.1 riastrad * (via direct reclaim) which will fill up the WC stash underneath us. 82 1.1 riastrad * So we add our WB pages into a temporary pvec on the stack and merge 83 1.1 riastrad * them into the WC stash after all the allocations are complete. 84 1.1 riastrad */ 85 1.1 riastrad pagevec_init(&stack); 86 1.1 riastrad do { 87 1.1 riastrad struct page *page; 88 1.1 riastrad 89 1.1 riastrad page = alloc_page(gfp); 90 1.1 riastrad if (unlikely(!page)) 91 1.1 riastrad break; 92 1.1 riastrad 93 1.1 riastrad stack.pages[stack.nr++] = page; 94 1.1 riastrad } while (pagevec_space(&stack)); 95 1.1 riastrad 96 1.1 riastrad if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { 97 1.1 riastrad page = stack.pages[--stack.nr]; 98 1.1 riastrad 99 1.1 riastrad /* Merge spare WC pages to the global stash */ 100 1.1 riastrad if (stack.nr) 101 1.1 riastrad stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); 102 1.1 riastrad 103 1.1 riastrad /* Push any surplus WC pages onto the local VM stash */ 104 1.1 riastrad if (stack.nr) 105 1.1 riastrad stash_push_pagevec(&vm->free_pages, &stack); 106 1.1 riastrad } 107 1.1 riastrad 108 1.1 riastrad /* Return unwanted leftovers */ 109 1.1 riastrad if (unlikely(stack.nr)) { 110 1.1 riastrad WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); 111 1.1 riastrad __pagevec_release(&stack); 112 1.1 riastrad } 113 1.1 riastrad 114 1.1 riastrad return page; 115 1.1 riastrad } 116 1.1 riastrad 117 1.1 riastrad static void vm_free_pages_release(struct i915_address_space *vm, 118 1.1 riastrad bool immediate) 119 1.1 riastrad { 120 1.1 riastrad struct pagevec *pvec = &vm->free_pages.pvec; 121 1.1 riastrad struct pagevec stack; 122 1.1 riastrad 123 1.1 riastrad lockdep_assert_held(&vm->free_pages.lock); 124 1.1 riastrad GEM_BUG_ON(!pagevec_count(pvec)); 125 1.1 riastrad 126 1.1 riastrad if (vm->pt_kmap_wc) { 127 1.1 riastrad /* 128 1.1 riastrad * When we use WC, first fill up the global stash and then 129 1.1 riastrad * only if full immediately free the overflow. 130 1.1 riastrad */ 131 1.1 riastrad stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); 132 1.1 riastrad 133 1.1 riastrad /* 134 1.1 riastrad * As we have made some room in the VM's free_pages, 135 1.1 riastrad * we can wait for it to fill again. Unless we are 136 1.1 riastrad * inside i915_address_space_fini() and must 137 1.1 riastrad * immediately release the pages! 138 1.1 riastrad */ 139 1.1 riastrad if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) 140 1.1 riastrad return; 141 1.1 riastrad 142 1.1 riastrad /* 143 1.1 riastrad * We have to drop the lock to allow ourselves to sleep, 144 1.1 riastrad * so take a copy of the pvec and clear the stash for 145 1.1 riastrad * others to use it as we sleep. 146 1.1 riastrad */ 147 1.1 riastrad stack = *pvec; 148 1.1 riastrad pagevec_reinit(pvec); 149 1.1 riastrad spin_unlock(&vm->free_pages.lock); 150 1.1 riastrad 151 1.1 riastrad pvec = &stack; 152 1.1 riastrad set_pages_array_wb(pvec->pages, pvec->nr); 153 1.1 riastrad 154 1.1 riastrad spin_lock(&vm->free_pages.lock); 155 1.1 riastrad } 156 1.1 riastrad 157 1.1 riastrad __pagevec_release(pvec); 158 1.1 riastrad } 159 1.1 riastrad 160 1.1 riastrad static void vm_free_page(struct i915_address_space *vm, struct page *page) 161 1.1 riastrad { 162 1.1 riastrad /* 163 1.1 riastrad * On !llc, we need to change the pages back to WB. We only do so 164 1.1 riastrad * in bulk, so we rarely need to change the page attributes here, 165 1.1 riastrad * but doing so requires a stop_machine() from deep inside arch/x86/mm. 166 1.1 riastrad * To make detection of the possible sleep more likely, use an 167 1.1 riastrad * unconditional might_sleep() for everybody. 168 1.1 riastrad */ 169 1.1 riastrad might_sleep(); 170 1.1 riastrad spin_lock(&vm->free_pages.lock); 171 1.1 riastrad while (!pagevec_space(&vm->free_pages.pvec)) 172 1.1 riastrad vm_free_pages_release(vm, false); 173 1.1 riastrad GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); 174 1.1 riastrad pagevec_add(&vm->free_pages.pvec, page); 175 1.1 riastrad spin_unlock(&vm->free_pages.lock); 176 1.1 riastrad } 177 1.3 riastrad #endif 178 1.1 riastrad 179 1.1 riastrad void __i915_vm_close(struct i915_address_space *vm) 180 1.1 riastrad { 181 1.1 riastrad struct i915_vma *vma, *vn; 182 1.1 riastrad 183 1.1 riastrad mutex_lock(&vm->mutex); 184 1.1 riastrad list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 185 1.1 riastrad struct drm_i915_gem_object *obj = vma->obj; 186 1.1 riastrad 187 1.1 riastrad /* Keep the obj (and hence the vma) alive as _we_ destroy it */ 188 1.1 riastrad if (!kref_get_unless_zero(&obj->base.refcount)) 189 1.1 riastrad continue; 190 1.1 riastrad 191 1.1 riastrad atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 192 1.1 riastrad WARN_ON(__i915_vma_unbind(vma)); 193 1.1 riastrad __i915_vma_put(vma); 194 1.1 riastrad 195 1.1 riastrad i915_gem_object_put(obj); 196 1.1 riastrad } 197 1.1 riastrad GEM_BUG_ON(!list_empty(&vm->bound_list)); 198 1.1 riastrad mutex_unlock(&vm->mutex); 199 1.1 riastrad } 200 1.1 riastrad 201 1.1 riastrad void i915_address_space_fini(struct i915_address_space *vm) 202 1.1 riastrad { 203 1.3 riastrad #ifndef __NetBSD__ 204 1.1 riastrad spin_lock(&vm->free_pages.lock); 205 1.1 riastrad if (pagevec_count(&vm->free_pages.pvec)) 206 1.1 riastrad vm_free_pages_release(vm, true); 207 1.1 riastrad GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); 208 1.1 riastrad spin_unlock(&vm->free_pages.lock); 209 1.3 riastrad #endif 210 1.1 riastrad 211 1.1 riastrad drm_mm_takedown(&vm->mm); 212 1.1 riastrad 213 1.1 riastrad mutex_destroy(&vm->mutex); 214 1.1 riastrad } 215 1.1 riastrad 216 1.1 riastrad static void __i915_vm_release(struct work_struct *work) 217 1.1 riastrad { 218 1.1 riastrad struct i915_address_space *vm = 219 1.1 riastrad container_of(work, struct i915_address_space, rcu.work); 220 1.1 riastrad 221 1.1 riastrad vm->cleanup(vm); 222 1.1 riastrad i915_address_space_fini(vm); 223 1.1 riastrad 224 1.1 riastrad kfree(vm); 225 1.1 riastrad } 226 1.1 riastrad 227 1.1 riastrad void i915_vm_release(struct kref *kref) 228 1.1 riastrad { 229 1.1 riastrad struct i915_address_space *vm = 230 1.1 riastrad container_of(kref, struct i915_address_space, ref); 231 1.1 riastrad 232 1.1 riastrad GEM_BUG_ON(i915_is_ggtt(vm)); 233 1.1 riastrad trace_i915_ppgtt_release(vm); 234 1.1 riastrad 235 1.1 riastrad queue_rcu_work(vm->i915->wq, &vm->rcu); 236 1.1 riastrad } 237 1.1 riastrad 238 1.1 riastrad void i915_address_space_init(struct i915_address_space *vm, int subclass) 239 1.1 riastrad { 240 1.1 riastrad kref_init(&vm->ref); 241 1.1 riastrad INIT_RCU_WORK(&vm->rcu, __i915_vm_release); 242 1.1 riastrad atomic_set(&vm->open, 1); 243 1.1 riastrad 244 1.1 riastrad /* 245 1.1 riastrad * The vm->mutex must be reclaim safe (for use in the shrinker). 246 1.1 riastrad * Do a dummy acquire now under fs_reclaim so that any allocation 247 1.1 riastrad * attempt holding the lock is immediately reported by lockdep. 248 1.1 riastrad */ 249 1.1 riastrad mutex_init(&vm->mutex); 250 1.1 riastrad lockdep_set_subclass(&vm->mutex, subclass); 251 1.1 riastrad i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); 252 1.1 riastrad 253 1.1 riastrad GEM_BUG_ON(!vm->total); 254 1.1 riastrad drm_mm_init(&vm->mm, 0, vm->total); 255 1.1 riastrad vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 256 1.1 riastrad 257 1.3 riastrad #ifdef __NetBSD__ 258 1.7 riastrad vm->dmat = vm->i915->drm.dmat; 259 1.3 riastrad #else 260 1.1 riastrad stash_init(&vm->free_pages); 261 1.3 riastrad #endif 262 1.1 riastrad 263 1.1 riastrad INIT_LIST_HEAD(&vm->bound_list); 264 1.1 riastrad } 265 1.1 riastrad 266 1.1 riastrad void clear_pages(struct i915_vma *vma) 267 1.1 riastrad { 268 1.1 riastrad GEM_BUG_ON(!vma->pages); 269 1.1 riastrad 270 1.1 riastrad if (vma->pages != vma->obj->mm.pages) { 271 1.1 riastrad sg_free_table(vma->pages); 272 1.1 riastrad kfree(vma->pages); 273 1.1 riastrad } 274 1.1 riastrad vma->pages = NULL; 275 1.1 riastrad 276 1.1 riastrad memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); 277 1.1 riastrad } 278 1.1 riastrad 279 1.1 riastrad static int __setup_page_dma(struct i915_address_space *vm, 280 1.1 riastrad struct i915_page_dma *p, 281 1.1 riastrad gfp_t gfp) 282 1.1 riastrad { 283 1.3 riastrad #ifdef __NetBSD__ 284 1.3 riastrad int busdmaflags = 0; 285 1.3 riastrad int error; 286 1.3 riastrad int nseg = 1; 287 1.3 riastrad 288 1.4 riastrad if (gfp & __GFP_WAIT) 289 1.3 riastrad busdmaflags |= BUS_DMA_WAITOK; 290 1.3 riastrad else 291 1.3 riastrad busdmaflags |= BUS_DMA_NOWAIT; 292 1.3 riastrad 293 1.3 riastrad error = bus_dmamem_alloc(vm->dmat, PAGE_SIZE, PAGE_SIZE, 0, &p->seg, 294 1.3 riastrad nseg, &nseg, busdmaflags); 295 1.3 riastrad if (error) { 296 1.3 riastrad fail0: p->map = NULL; 297 1.3 riastrad return -error; /* XXX errno NetBSD->Linux */ 298 1.3 riastrad } 299 1.3 riastrad KASSERT(nseg == 1); 300 1.3 riastrad error = bus_dmamap_create(vm->dmat, PAGE_SIZE, 1, PAGE_SIZE, 0, 301 1.3 riastrad busdmaflags, &p->map); 302 1.3 riastrad if (error) { 303 1.3 riastrad fail1: bus_dmamem_free(vm->dmat, &p->seg, 1); 304 1.3 riastrad goto fail0; 305 1.3 riastrad } 306 1.3 riastrad error = bus_dmamap_load_raw(vm->dmat, p->map, &p->seg, 1, PAGE_SIZE, 307 1.3 riastrad busdmaflags); 308 1.3 riastrad if (error) { 309 1.3 riastrad fail2: __unused 310 1.3 riastrad bus_dmamap_destroy(vm->dmat, p->map); 311 1.3 riastrad goto fail1; 312 1.3 riastrad } 313 1.3 riastrad 314 1.3 riastrad p->page = container_of(PHYS_TO_VM_PAGE(p->seg.ds_addr), struct page, 315 1.3 riastrad p_vmp); 316 1.3 riastrad 317 1.4 riastrad if (gfp & __GFP_ZERO) { 318 1.3 riastrad void *va = kmap_atomic(p->page); 319 1.3 riastrad memset(va, 0, PAGE_SIZE); 320 1.3 riastrad kunmap_atomic(va); 321 1.3 riastrad } 322 1.3 riastrad #else 323 1.1 riastrad p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); 324 1.1 riastrad if (unlikely(!p->page)) 325 1.1 riastrad return -ENOMEM; 326 1.1 riastrad 327 1.1 riastrad p->daddr = dma_map_page_attrs(vm->dma, 328 1.1 riastrad p->page, 0, PAGE_SIZE, 329 1.1 riastrad PCI_DMA_BIDIRECTIONAL, 330 1.1 riastrad DMA_ATTR_SKIP_CPU_SYNC | 331 1.1 riastrad DMA_ATTR_NO_WARN); 332 1.1 riastrad if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { 333 1.1 riastrad vm_free_page(vm, p->page); 334 1.1 riastrad return -ENOMEM; 335 1.1 riastrad } 336 1.3 riastrad #endif 337 1.1 riastrad 338 1.1 riastrad return 0; 339 1.1 riastrad } 340 1.1 riastrad 341 1.1 riastrad int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) 342 1.1 riastrad { 343 1.1 riastrad return __setup_page_dma(vm, p, __GFP_HIGHMEM); 344 1.1 riastrad } 345 1.1 riastrad 346 1.1 riastrad void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) 347 1.1 riastrad { 348 1.3 riastrad #ifdef __NetBSD__ 349 1.3 riastrad bus_dmamap_unload(vm->dmat, p->map); 350 1.3 riastrad bus_dmamap_destroy(vm->dmat, p->map); 351 1.3 riastrad bus_dmamem_free(vm->dmat, &p->seg, 1); 352 1.3 riastrad #else 353 1.1 riastrad dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 354 1.1 riastrad vm_free_page(vm, p->page); 355 1.3 riastrad #endif 356 1.1 riastrad } 357 1.1 riastrad 358 1.1 riastrad void 359 1.1 riastrad fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) 360 1.1 riastrad { 361 1.1 riastrad kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); 362 1.1 riastrad } 363 1.1 riastrad 364 1.1 riastrad int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) 365 1.1 riastrad { 366 1.1 riastrad unsigned long size; 367 1.1 riastrad 368 1.1 riastrad /* 369 1.1 riastrad * In order to utilize 64K pages for an object with a size < 2M, we will 370 1.1 riastrad * need to support a 64K scratch page, given that every 16th entry for a 371 1.1 riastrad * page-table operating in 64K mode must point to a properly aligned 64K 372 1.1 riastrad * region, including any PTEs which happen to point to scratch. 373 1.1 riastrad * 374 1.1 riastrad * This is only relevant for the 48b PPGTT where we support 375 1.1 riastrad * huge-gtt-pages, see also i915_vma_insert(). However, as we share the 376 1.1 riastrad * scratch (read-only) between all vm, we create one 64k scratch page 377 1.1 riastrad * for all. 378 1.1 riastrad */ 379 1.1 riastrad size = I915_GTT_PAGE_SIZE_4K; 380 1.1 riastrad if (i915_vm_is_4lvl(vm) && 381 1.1 riastrad HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { 382 1.1 riastrad size = I915_GTT_PAGE_SIZE_64K; 383 1.1 riastrad gfp |= __GFP_NOWARN; 384 1.1 riastrad } 385 1.1 riastrad gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; 386 1.1 riastrad 387 1.1 riastrad do { 388 1.5 riastrad unsigned int order = get_order(size); 389 1.4 riastrad #ifdef __NetBSD__ 390 1.4 riastrad struct vm_page *vm_page; 391 1.4 riastrad void *kva; 392 1.4 riastrad int nseg; 393 1.4 riastrad int ret; 394 1.4 riastrad 395 1.4 riastrad /* Allocate a scratch page. */ 396 1.4 riastrad /* XXX errno NetBSD->Linux */ 397 1.4 riastrad ret = -bus_dmamem_alloc(vm->dmat, size, size, 0, 398 1.7 riastrad &vm->scratch[0].base.seg, 1, &nseg, BUS_DMA_NOWAIT); 399 1.4 riastrad if (ret) 400 1.4 riastrad goto skip; 401 1.4 riastrad KASSERT(nseg == 1); 402 1.7 riastrad KASSERT(vm->scratch[0].base.seg.ds_len == size); 403 1.4 riastrad 404 1.4 riastrad /* Create a DMA map. */ 405 1.4 riastrad ret = -bus_dmamap_create(vm->dmat, size, 1, size, 0, 406 1.7 riastrad BUS_DMA_NOWAIT, &vm->scratch[0].base.map); 407 1.4 riastrad if (ret) 408 1.4 riastrad goto free_dmamem; 409 1.4 riastrad 410 1.4 riastrad /* Load the segment into the DMA map. */ 411 1.7 riastrad ret = -bus_dmamap_load_raw(vm->dmat, vm->scratch[0].base.map, 412 1.7 riastrad &vm->scratch[0].base.seg, 1, size, BUS_DMA_NOWAIT); 413 1.4 riastrad if (ret) 414 1.4 riastrad goto destroy_dmamap; 415 1.7 riastrad KASSERT(vm->scratch[0].base.map->dm_nsegs == 1); 416 1.7 riastrad KASSERT(vm->scratch[0].base.map->dm_segs[0].ds_len == size); 417 1.4 riastrad 418 1.4 riastrad /* Zero the page. */ 419 1.7 riastrad ret = -bus_dmamem_map(vm->dmat, &vm->scratch[0].base.seg, 1, 420 1.6 riastrad size, &kva, BUS_DMA_NOWAIT|BUS_DMA_NOCACHE); 421 1.4 riastrad if (ret) 422 1.4 riastrad goto unload_dmamap; 423 1.4 riastrad memset(kva, 0, size); 424 1.7 riastrad bus_dmamap_sync(vm->dmat, vm->scratch[0].base.map, 0, size, 425 1.6 riastrad BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); 426 1.4 riastrad bus_dmamem_unmap(vm->dmat, kva, size); 427 1.4 riastrad 428 1.4 riastrad /* XXX Is this page guaranteed to work as a huge page? */ 429 1.7 riastrad vm_page = PHYS_TO_VM_PAGE(vm->scratch[0].base.seg.ds_addr); 430 1.7 riastrad vm->scratch[0].base.page = container_of(vm_page, struct page, 431 1.4 riastrad p_vmp); 432 1.4 riastrad #else 433 1.1 riastrad struct page *page; 434 1.1 riastrad dma_addr_t addr; 435 1.1 riastrad 436 1.1 riastrad page = alloc_pages(gfp, order); 437 1.1 riastrad if (unlikely(!page)) 438 1.1 riastrad goto skip; 439 1.1 riastrad 440 1.1 riastrad addr = dma_map_page_attrs(vm->dma, 441 1.1 riastrad page, 0, size, 442 1.1 riastrad PCI_DMA_BIDIRECTIONAL, 443 1.1 riastrad DMA_ATTR_SKIP_CPU_SYNC | 444 1.1 riastrad DMA_ATTR_NO_WARN); 445 1.1 riastrad if (unlikely(dma_mapping_error(vm->dma, addr))) 446 1.1 riastrad goto free_page; 447 1.1 riastrad 448 1.1 riastrad if (unlikely(!IS_ALIGNED(addr, size))) 449 1.1 riastrad goto unmap_page; 450 1.1 riastrad 451 1.1 riastrad vm->scratch[0].base.page = page; 452 1.1 riastrad vm->scratch[0].base.daddr = addr; 453 1.5 riastrad #endif 454 1.1 riastrad vm->scratch_order = order; 455 1.1 riastrad return 0; 456 1.1 riastrad 457 1.4 riastrad #ifdef __NetBSD__ 458 1.7 riastrad unload_dmamap: bus_dmamap_unload(vm->dmat, vm->scratch[0].base.map); 459 1.7 riastrad destroy_dmamap: bus_dmamap_destroy(vm->dmat, vm->scratch[0].base.map); 460 1.7 riastrad vm->scratch[0].base.map = NULL; /* paranoia */ 461 1.7 riastrad free_dmamem: bus_dmamem_free(vm->dmat, &vm->scratch[0].base.seg, 1); 462 1.4 riastrad #else 463 1.1 riastrad unmap_page: 464 1.1 riastrad dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); 465 1.1 riastrad free_page: 466 1.1 riastrad __free_pages(page, order); 467 1.4 riastrad #endif 468 1.1 riastrad skip: 469 1.1 riastrad if (size == I915_GTT_PAGE_SIZE_4K) 470 1.1 riastrad return -ENOMEM; 471 1.1 riastrad 472 1.1 riastrad size = I915_GTT_PAGE_SIZE_4K; 473 1.1 riastrad gfp &= ~__GFP_NOWARN; 474 1.1 riastrad } while (1); 475 1.1 riastrad } 476 1.1 riastrad 477 1.1 riastrad void cleanup_scratch_page(struct i915_address_space *vm) 478 1.1 riastrad { 479 1.1 riastrad struct i915_page_dma *p = px_base(&vm->scratch[0]); 480 1.4 riastrad #ifdef __NetBSD__ 481 1.4 riastrad bus_dmamap_unload(vm->dmat, p->map); 482 1.4 riastrad bus_dmamap_destroy(vm->dmat, p->map); 483 1.7 riastrad vm->scratch[0].base.map = NULL; /* paranoia */ 484 1.4 riastrad bus_dmamem_free(vm->dmat, &p->seg, 1); 485 1.4 riastrad #else 486 1.1 riastrad unsigned int order = vm->scratch_order; 487 1.1 riastrad 488 1.1 riastrad dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, 489 1.1 riastrad PCI_DMA_BIDIRECTIONAL); 490 1.1 riastrad __free_pages(p->page, order); 491 1.4 riastrad #endif 492 1.1 riastrad } 493 1.1 riastrad 494 1.1 riastrad void free_scratch(struct i915_address_space *vm) 495 1.1 riastrad { 496 1.1 riastrad int i; 497 1.1 riastrad 498 1.1 riastrad if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ 499 1.1 riastrad return; 500 1.1 riastrad 501 1.1 riastrad for (i = 1; i <= vm->top; i++) { 502 1.1 riastrad if (!px_dma(&vm->scratch[i])) 503 1.1 riastrad break; 504 1.1 riastrad cleanup_page_dma(vm, px_base(&vm->scratch[i])); 505 1.1 riastrad } 506 1.1 riastrad 507 1.1 riastrad cleanup_scratch_page(vm); 508 1.1 riastrad } 509 1.1 riastrad 510 1.1 riastrad void gtt_write_workarounds(struct intel_gt *gt) 511 1.1 riastrad { 512 1.1 riastrad struct drm_i915_private *i915 = gt->i915; 513 1.1 riastrad struct intel_uncore *uncore = gt->uncore; 514 1.1 riastrad 515 1.1 riastrad /* 516 1.1 riastrad * This function is for gtt related workarounds. This function is 517 1.1 riastrad * called on driver load and after a GPU reset, so you can place 518 1.1 riastrad * workarounds here even if they get overwritten by GPU reset. 519 1.1 riastrad */ 520 1.1 riastrad /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 521 1.1 riastrad if (IS_BROADWELL(i915)) 522 1.1 riastrad intel_uncore_write(uncore, 523 1.1 riastrad GEN8_L3_LRA_1_GPGPU, 524 1.1 riastrad GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 525 1.1 riastrad else if (IS_CHERRYVIEW(i915)) 526 1.1 riastrad intel_uncore_write(uncore, 527 1.1 riastrad GEN8_L3_LRA_1_GPGPU, 528 1.1 riastrad GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 529 1.1 riastrad else if (IS_GEN9_LP(i915)) 530 1.1 riastrad intel_uncore_write(uncore, 531 1.1 riastrad GEN8_L3_LRA_1_GPGPU, 532 1.1 riastrad GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 533 1.1 riastrad else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) 534 1.1 riastrad intel_uncore_write(uncore, 535 1.1 riastrad GEN8_L3_LRA_1_GPGPU, 536 1.1 riastrad GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 537 1.1 riastrad 538 1.1 riastrad /* 539 1.1 riastrad * To support 64K PTEs we need to first enable the use of the 540 1.1 riastrad * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 541 1.1 riastrad * mmio, otherwise the page-walker will simply ignore the IPS bit. This 542 1.1 riastrad * shouldn't be needed after GEN10. 543 1.1 riastrad * 544 1.1 riastrad * 64K pages were first introduced from BDW+, although technically they 545 1.1 riastrad * only *work* from gen9+. For pre-BDW we instead have the option for 546 1.1 riastrad * 32K pages, but we don't currently have any support for it in our 547 1.1 riastrad * driver. 548 1.1 riastrad */ 549 1.1 riastrad if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && 550 1.1 riastrad INTEL_GEN(i915) <= 10) 551 1.1 riastrad intel_uncore_rmw(uncore, 552 1.1 riastrad GEN8_GAMW_ECO_DEV_RW_IA, 553 1.1 riastrad 0, 554 1.1 riastrad GAMW_ECO_ENABLE_64K_IPS_FIELD); 555 1.1 riastrad 556 1.1 riastrad if (IS_GEN_RANGE(i915, 8, 11)) { 557 1.1 riastrad bool can_use_gtt_cache = true; 558 1.1 riastrad 559 1.1 riastrad /* 560 1.1 riastrad * According to the BSpec if we use 2M/1G pages then we also 561 1.1 riastrad * need to disable the GTT cache. At least on BDW we can see 562 1.1 riastrad * visual corruption when using 2M pages, and not disabling the 563 1.1 riastrad * GTT cache. 564 1.1 riastrad */ 565 1.1 riastrad if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) 566 1.1 riastrad can_use_gtt_cache = false; 567 1.1 riastrad 568 1.1 riastrad /* WaGttCachingOffByDefault */ 569 1.1 riastrad intel_uncore_write(uncore, 570 1.1 riastrad HSW_GTT_CACHE_EN, 571 1.1 riastrad can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 572 1.1 riastrad WARN_ON_ONCE(can_use_gtt_cache && 573 1.1 riastrad intel_uncore_read(uncore, 574 1.1 riastrad HSW_GTT_CACHE_EN) == 0); 575 1.1 riastrad } 576 1.1 riastrad } 577 1.1 riastrad 578 1.1 riastrad u64 gen8_pte_encode(dma_addr_t addr, 579 1.1 riastrad enum i915_cache_level level, 580 1.1 riastrad u32 flags) 581 1.1 riastrad { 582 1.1 riastrad gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; 583 1.1 riastrad 584 1.1 riastrad if (unlikely(flags & PTE_READ_ONLY)) 585 1.1 riastrad pte &= ~_PAGE_RW; 586 1.1 riastrad 587 1.1 riastrad switch (level) { 588 1.1 riastrad case I915_CACHE_NONE: 589 1.1 riastrad pte |= PPAT_UNCACHED; 590 1.1 riastrad break; 591 1.1 riastrad case I915_CACHE_WT: 592 1.1 riastrad pte |= PPAT_DISPLAY_ELLC; 593 1.1 riastrad break; 594 1.1 riastrad default: 595 1.1 riastrad pte |= PPAT_CACHED; 596 1.1 riastrad break; 597 1.1 riastrad } 598 1.1 riastrad 599 1.1 riastrad return pte; 600 1.1 riastrad } 601 1.1 riastrad 602 1.1 riastrad static void tgl_setup_private_ppat(struct intel_uncore *uncore) 603 1.1 riastrad { 604 1.1 riastrad /* TGL doesn't support LLC or AGE settings */ 605 1.1 riastrad intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); 606 1.1 riastrad intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); 607 1.1 riastrad intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); 608 1.1 riastrad intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); 609 1.1 riastrad intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); 610 1.1 riastrad intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); 611 1.1 riastrad intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); 612 1.1 riastrad intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); 613 1.1 riastrad } 614 1.1 riastrad 615 1.1 riastrad static void cnl_setup_private_ppat(struct intel_uncore *uncore) 616 1.1 riastrad { 617 1.1 riastrad intel_uncore_write(uncore, 618 1.1 riastrad GEN10_PAT_INDEX(0), 619 1.1 riastrad GEN8_PPAT_WB | GEN8_PPAT_LLC); 620 1.1 riastrad intel_uncore_write(uncore, 621 1.1 riastrad GEN10_PAT_INDEX(1), 622 1.1 riastrad GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 623 1.1 riastrad intel_uncore_write(uncore, 624 1.1 riastrad GEN10_PAT_INDEX(2), 625 1.1 riastrad GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 626 1.1 riastrad intel_uncore_write(uncore, 627 1.1 riastrad GEN10_PAT_INDEX(3), 628 1.1 riastrad GEN8_PPAT_UC); 629 1.1 riastrad intel_uncore_write(uncore, 630 1.1 riastrad GEN10_PAT_INDEX(4), 631 1.1 riastrad GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 632 1.1 riastrad intel_uncore_write(uncore, 633 1.1 riastrad GEN10_PAT_INDEX(5), 634 1.1 riastrad GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 635 1.1 riastrad intel_uncore_write(uncore, 636 1.1 riastrad GEN10_PAT_INDEX(6), 637 1.1 riastrad GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 638 1.1 riastrad intel_uncore_write(uncore, 639 1.1 riastrad GEN10_PAT_INDEX(7), 640 1.1 riastrad GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 641 1.1 riastrad } 642 1.1 riastrad 643 1.1 riastrad /* 644 1.1 riastrad * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 645 1.1 riastrad * bits. When using advanced contexts each context stores its own PAT, but 646 1.1 riastrad * writing this data shouldn't be harmful even in those cases. 647 1.1 riastrad */ 648 1.1 riastrad static void bdw_setup_private_ppat(struct intel_uncore *uncore) 649 1.1 riastrad { 650 1.1 riastrad u64 pat; 651 1.1 riastrad 652 1.1 riastrad pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 653 1.1 riastrad GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 654 1.1 riastrad GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 655 1.1 riastrad GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 656 1.1 riastrad GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 657 1.1 riastrad GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 658 1.1 riastrad GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 659 1.1 riastrad GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 660 1.1 riastrad 661 1.1 riastrad intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 662 1.1 riastrad intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 663 1.1 riastrad } 664 1.1 riastrad 665 1.1 riastrad static void chv_setup_private_ppat(struct intel_uncore *uncore) 666 1.1 riastrad { 667 1.1 riastrad u64 pat; 668 1.1 riastrad 669 1.1 riastrad /* 670 1.1 riastrad * Map WB on BDW to snooped on CHV. 671 1.1 riastrad * 672 1.1 riastrad * Only the snoop bit has meaning for CHV, the rest is 673 1.1 riastrad * ignored. 674 1.1 riastrad * 675 1.1 riastrad * The hardware will never snoop for certain types of accesses: 676 1.1 riastrad * - CPU GTT (GMADR->GGTT->no snoop->memory) 677 1.1 riastrad * - PPGTT page tables 678 1.1 riastrad * - some other special cycles 679 1.1 riastrad * 680 1.1 riastrad * As with BDW, we also need to consider the following for GT accesses: 681 1.1 riastrad * "For GGTT, there is NO pat_sel[2:0] from the entry, 682 1.1 riastrad * so RTL will always use the value corresponding to 683 1.1 riastrad * pat_sel = 000". 684 1.1 riastrad * Which means we must set the snoop bit in PAT entry 0 685 1.1 riastrad * in order to keep the global status page working. 686 1.1 riastrad */ 687 1.1 riastrad 688 1.1 riastrad pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 689 1.1 riastrad GEN8_PPAT(1, 0) | 690 1.1 riastrad GEN8_PPAT(2, 0) | 691 1.1 riastrad GEN8_PPAT(3, 0) | 692 1.1 riastrad GEN8_PPAT(4, CHV_PPAT_SNOOP) | 693 1.1 riastrad GEN8_PPAT(5, CHV_PPAT_SNOOP) | 694 1.1 riastrad GEN8_PPAT(6, CHV_PPAT_SNOOP) | 695 1.1 riastrad GEN8_PPAT(7, CHV_PPAT_SNOOP); 696 1.1 riastrad 697 1.1 riastrad intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 698 1.1 riastrad intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 699 1.1 riastrad } 700 1.1 riastrad 701 1.1 riastrad void setup_private_pat(struct intel_uncore *uncore) 702 1.1 riastrad { 703 1.1 riastrad struct drm_i915_private *i915 = uncore->i915; 704 1.1 riastrad 705 1.1 riastrad GEM_BUG_ON(INTEL_GEN(i915) < 8); 706 1.1 riastrad 707 1.1 riastrad if (INTEL_GEN(i915) >= 12) 708 1.1 riastrad tgl_setup_private_ppat(uncore); 709 1.1 riastrad else if (INTEL_GEN(i915) >= 10) 710 1.1 riastrad cnl_setup_private_ppat(uncore); 711 1.1 riastrad else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) 712 1.1 riastrad chv_setup_private_ppat(uncore); 713 1.1 riastrad else 714 1.1 riastrad bdw_setup_private_ppat(uncore); 715 1.1 riastrad } 716 1.1 riastrad 717 1.1 riastrad #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 718 1.1 riastrad #include "selftests/mock_gtt.c" 719 1.1 riastrad #endif 720