i915_gem_gtt.c revision 1.4.18.1 1 /* $NetBSD: i915_gem_gtt.c,v 1.4.18.1 2019/06/10 22:08:05 christos Exp $ */
2
3 /*
4 * Copyright 2010 Daniel Vetter
5 * Copyright 2011-2014 Intel Corporation
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 *
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: i915_gem_gtt.c,v 1.4.18.1 2019/06/10 22:08:05 christos Exp $");
30
31 #include <linux/bitmap.h>
32 #include <linux/err.h>
33 #include <linux/seq_file.h>
34 #include <drm/drmP.h>
35 #include <drm/i915_drm.h>
36 #include "i915_drv.h"
37 #include "i915_vgpu.h"
38 #include "i915_trace.h"
39 #include "intel_drv.h"
40
41 #ifdef __NetBSD__
42 #include <drm/bus_dma_hacks.h>
43 #include <x86/machdep.h>
44 #include <x86/pte.h>
45 #define _PAGE_PRESENT PG_V /* 0x01 PTE is present / valid */
46 #define _PAGE_RW PG_RW /* 0x02 read/write */
47 #define _PAGE_PWT PG_WT /* 0x08 write-through */
48 #define _PAGE_PCD PG_N /* 0x10 page cache disabled / non-cacheable */
49 #define _PAGE_PAT PG_PAT /* 0x80 page attribute table on PTE */
50 #endif
51
52 /**
53 * DOC: Global GTT views
54 *
55 * Background and previous state
56 *
57 * Historically objects could exists (be bound) in global GTT space only as
58 * singular instances with a view representing all of the object's backing pages
59 * in a linear fashion. This view will be called a normal view.
60 *
61 * To support multiple views of the same object, where the number of mapped
62 * pages is not equal to the backing store, or where the layout of the pages
63 * is not linear, concept of a GGTT view was added.
64 *
65 * One example of an alternative view is a stereo display driven by a single
66 * image. In this case we would have a framebuffer looking like this
67 * (2x2 pages):
68 *
69 * 12
70 * 34
71 *
72 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
73 * rendering. In contrast, fed to the display engine would be an alternative
74 * view which could look something like this:
75 *
76 * 1212
77 * 3434
78 *
79 * In this example both the size and layout of pages in the alternative view is
80 * different from the normal view.
81 *
82 * Implementation and usage
83 *
84 * GGTT views are implemented using VMAs and are distinguished via enum
85 * i915_ggtt_view_type and struct i915_ggtt_view.
86 *
87 * A new flavour of core GEM functions which work with GGTT bound objects were
88 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
89 * renaming in large amounts of code. They take the struct i915_ggtt_view
90 * parameter encapsulating all metadata required to implement a view.
91 *
92 * As a helper for callers which are only interested in the normal view,
93 * globally const i915_ggtt_view_normal singleton instance exists. All old core
94 * GEM API functions, the ones not taking the view parameter, are operating on,
95 * or with the normal GGTT view.
96 *
97 * Code wanting to add or use a new GGTT view needs to:
98 *
99 * 1. Add a new enum with a suitable name.
100 * 2. Extend the metadata in the i915_ggtt_view structure if required.
101 * 3. Add support to i915_get_vma_pages().
102 *
103 * New views are required to build a scatter-gather table from within the
104 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
105 * exists for the lifetime of an VMA.
106 *
107 * Core API is designed to have copy semantics which means that passed in
108 * struct i915_ggtt_view does not need to be persistent (left around after
109 * calling the core API functions).
110 *
111 */
112
113 static int
114 i915_get_ggtt_vma_pages(struct i915_vma *vma);
115
116 const struct i915_ggtt_view i915_ggtt_view_normal;
117 const struct i915_ggtt_view i915_ggtt_view_rotated = {
118 .type = I915_GGTT_VIEW_ROTATED
119 };
120
121 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
122 {
123 bool has_aliasing_ppgtt;
124 bool has_full_ppgtt;
125
126 has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
127 has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
128
129 if (intel_vgpu_active(dev))
130 has_full_ppgtt = false; /* emulation is too hard */
131
132 /*
133 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
134 * execlists, the sole mechanism available to submit work.
135 */
136 if (INTEL_INFO(dev)->gen < 9 &&
137 (enable_ppgtt == 0 || !has_aliasing_ppgtt))
138 return 0;
139
140 if (enable_ppgtt == 1)
141 return 1;
142
143 if (enable_ppgtt == 2 && has_full_ppgtt)
144 return 2;
145
146 #ifdef CONFIG_INTEL_IOMMU
147 /* Disable ppgtt on SNB if VT-d is on. */
148 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
149 DRM_INFO("Disabling PPGTT because VT-d is on\n");
150 return 0;
151 }
152 #endif
153
154 /* Early VLV doesn't have this */
155 if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
156 dev->pdev->revision < 0xb) {
157 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
158 return 0;
159 }
160
161 if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
162 return 2;
163 else
164 return has_aliasing_ppgtt ? 1 : 0;
165 }
166
167 static int ppgtt_bind_vma(struct i915_vma *vma,
168 enum i915_cache_level cache_level,
169 u32 unused)
170 {
171 u32 pte_flags = 0;
172
173 /* Currently applicable only to VLV */
174 if (vma->obj->gt_ro)
175 pte_flags |= PTE_READ_ONLY;
176
177 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
178 cache_level, pte_flags);
179
180 return 0;
181 }
182
183 static void ppgtt_unbind_vma(struct i915_vma *vma)
184 {
185 vma->vm->clear_range(vma->vm,
186 vma->node.start,
187 vma->obj->base.size,
188 true);
189 }
190
191 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
192 enum i915_cache_level level,
193 bool valid)
194 {
195 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
196 pte |= addr;
197
198 switch (level) {
199 case I915_CACHE_NONE:
200 pte |= PPAT_UNCACHED_INDEX;
201 break;
202 case I915_CACHE_WT:
203 pte |= PPAT_DISPLAY_ELLC_INDEX;
204 break;
205 default:
206 pte |= PPAT_CACHED_INDEX;
207 break;
208 }
209
210 return pte;
211 }
212
213 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
214 const enum i915_cache_level level)
215 {
216 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
217 pde |= addr;
218 if (level != I915_CACHE_NONE)
219 pde |= PPAT_CACHED_PDE_INDEX;
220 else
221 pde |= PPAT_UNCACHED_INDEX;
222 return pde;
223 }
224
225 #define gen8_pdpe_encode gen8_pde_encode
226 #define gen8_pml4e_encode gen8_pde_encode
227
228 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
229 enum i915_cache_level level,
230 bool valid, u32 unused)
231 {
232 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
233 pte |= GEN6_PTE_ADDR_ENCODE(addr);
234
235 switch (level) {
236 case I915_CACHE_L3_LLC:
237 case I915_CACHE_LLC:
238 pte |= GEN6_PTE_CACHE_LLC;
239 break;
240 case I915_CACHE_NONE:
241 pte |= GEN6_PTE_UNCACHED;
242 break;
243 default:
244 MISSING_CASE(level);
245 }
246
247 return pte;
248 }
249
250 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
251 enum i915_cache_level level,
252 bool valid, u32 unused)
253 {
254 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
255 pte |= GEN6_PTE_ADDR_ENCODE(addr);
256
257 switch (level) {
258 case I915_CACHE_L3_LLC:
259 pte |= GEN7_PTE_CACHE_L3_LLC;
260 break;
261 case I915_CACHE_LLC:
262 pte |= GEN6_PTE_CACHE_LLC;
263 break;
264 case I915_CACHE_NONE:
265 pte |= GEN6_PTE_UNCACHED;
266 break;
267 default:
268 MISSING_CASE(level);
269 }
270
271 return pte;
272 }
273
274 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
275 enum i915_cache_level level,
276 bool valid, u32 flags)
277 {
278 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
279 pte |= GEN6_PTE_ADDR_ENCODE(addr);
280
281 if (!(flags & PTE_READ_ONLY))
282 pte |= BYT_PTE_WRITEABLE;
283
284 if (level != I915_CACHE_NONE)
285 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
286
287 return pte;
288 }
289
290 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
291 enum i915_cache_level level,
292 bool valid, u32 unused)
293 {
294 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
295 pte |= HSW_PTE_ADDR_ENCODE(addr);
296
297 if (level != I915_CACHE_NONE)
298 pte |= HSW_WB_LLC_AGE3;
299
300 return pte;
301 }
302
303 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
304 enum i915_cache_level level,
305 bool valid, u32 unused)
306 {
307 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
308 pte |= HSW_PTE_ADDR_ENCODE(addr);
309
310 switch (level) {
311 case I915_CACHE_NONE:
312 break;
313 case I915_CACHE_WT:
314 pte |= HSW_WT_ELLC_LLC_AGE3;
315 break;
316 default:
317 pte |= HSW_WB_ELLC_LLC_AGE3;
318 break;
319 }
320
321 return pte;
322 }
323
324 static void *kmap_page_dma(struct i915_page_dma *);
325 static void kunmap_page_dma(struct drm_device *, void *);
326
327 static int __setup_page_dma(struct drm_device *dev,
328 struct i915_page_dma *p, gfp_t flags)
329 {
330 #ifdef __NetBSD__
331 int busdmaflags = 0;
332 int error;
333 int nseg = 1;
334
335 if (flags & __GFP_WAIT)
336 busdmaflags |= BUS_DMA_WAITOK;
337 else
338 busdmaflags |= BUS_DMA_NOWAIT;
339
340 error = bus_dmamem_alloc(dev->dmat, PAGE_SIZE, PAGE_SIZE, 0, &p->seg,
341 nseg, &nseg, busdmaflags);
342 if (error) {
343 fail0: p->map = NULL;
344 return -error; /* XXX errno NetBSD->Linux */
345 }
346 KASSERT(nseg == 1);
347 error = bus_dmamap_create(dev->dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
348 busdmaflags, &p->map);
349 if (error) {
350 fail1: bus_dmamem_free(dev->dmat, &p->seg, 1);
351 goto fail0;
352 }
353 error = bus_dmamap_load_raw(dev->dmat, p->map, &p->seg, 1, PAGE_SIZE,
354 busdmaflags);
355 if (error) {
356 fail2: __unused
357 bus_dmamap_destroy(dev->dmat, p->map);
358 goto fail1;
359 }
360
361 if (flags & __GFP_ZERO) {
362 void *va = kmap_page_dma(p);
363 memset(va, 0, PAGE_SIZE);
364 kunmap_page_dma(dev, va);
365 }
366 #else
367 struct device *device = &dev->pdev->dev;
368
369 p->page = alloc_page(flags);
370 if (!p->page)
371 return -ENOMEM;
372
373 p->daddr = dma_map_page(device,
374 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
375
376 if (dma_mapping_error(device, p->daddr)) {
377 __free_page(p->page);
378 return -EINVAL;
379 }
380 #endif
381
382 return 0;
383 }
384
385 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
386 {
387 return __setup_page_dma(dev, p, GFP_KERNEL);
388 }
389
390 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
391 {
392 #ifdef __NetBSD__
393 if (WARN_ON(!p->map))
394 return;
395
396 bus_dmamap_unload(dev->dmat, p->map);
397 bus_dmamap_destroy(dev->dmat, p->map);
398 bus_dmamem_free(dev->dmat, &p->seg, 1);
399 p->map = NULL;
400 #else
401 if (WARN_ON(!p->page))
402 return;
403
404 dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
405 __free_page(p->page);
406 memset(p, 0, sizeof(*p));
407 #endif
408 }
409
410 static void *kmap_page_dma(struct i915_page_dma *p)
411 {
412 #ifdef __NetBSD__
413 return kmap_atomic(container_of(PHYS_TO_VM_PAGE(p->seg.ds_addr),
414 struct page, p_vmp));
415 #else
416 return kmap_atomic(p->page);
417 #endif
418 }
419
420 /* We use the flushing unmap only with ppgtt structures:
421 * page directories, page tables and scratch pages.
422 */
423 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
424 {
425 /* There are only few exceptions for gen >=6. chv and bxt.
426 * And we are not sure about the latter so play safe for now.
427 */
428 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
429 drm_clflush_virt_range(vaddr, PAGE_SIZE);
430
431 kunmap_atomic(vaddr);
432 }
433
434 #define kmap_px(px) kmap_page_dma(px_base(px))
435 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
436
437 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
438 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
439 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
440 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
441
442 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
443 const uint64_t val)
444 {
445 int i;
446 uint64_t * const vaddr = kmap_page_dma(p);
447
448 for (i = 0; i < 512; i++)
449 vaddr[i] = val;
450
451 kunmap_page_dma(dev, vaddr);
452 }
453
454 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
455 const uint32_t val32)
456 {
457 uint64_t v = val32;
458
459 v = v << 32 | val32;
460
461 fill_page_dma(dev, p, v);
462 }
463
464 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
465 {
466 struct i915_page_scratch *sp;
467 int ret;
468
469 sp = kzalloc(sizeof(*sp), GFP_KERNEL);
470 if (sp == NULL)
471 return ERR_PTR(-ENOMEM);
472
473 ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
474 if (ret) {
475 kfree(sp);
476 return ERR_PTR(ret);
477 }
478
479 #ifndef __NetBSD__ /* XXX ??? */
480 set_pages_uc(px_page(sp), 1);
481 #endif
482
483 return sp;
484 }
485
486 static void free_scratch_page(struct drm_device *dev,
487 struct i915_page_scratch *sp)
488 {
489 #ifndef __NetBSD__ /* XXX ??? */
490 set_pages_wb(px_page(sp), 1);
491 #endif
492
493 cleanup_px(dev, sp);
494 kfree(sp);
495 }
496
497 static struct i915_page_table *alloc_pt(struct drm_device *dev)
498 {
499 struct i915_page_table *pt;
500 const size_t count = INTEL_INFO(dev)->gen >= 8 ?
501 GEN8_PTES : GEN6_PTES;
502 int ret = -ENOMEM;
503
504 pt = kzalloc(sizeof(*pt), GFP_KERNEL);
505 if (!pt)
506 return ERR_PTR(-ENOMEM);
507
508 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
509 GFP_KERNEL);
510
511 if (!pt->used_ptes)
512 goto fail_bitmap;
513
514 ret = setup_px(dev, pt);
515 if (ret)
516 goto fail_page_m;
517
518 return pt;
519
520 fail_page_m:
521 kfree(pt->used_ptes);
522 fail_bitmap:
523 kfree(pt);
524
525 return ERR_PTR(ret);
526 }
527
528 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
529 {
530 cleanup_px(dev, pt);
531 kfree(pt->used_ptes);
532 kfree(pt);
533 }
534
535 static void gen8_initialize_pt(struct i915_address_space *vm,
536 struct i915_page_table *pt)
537 {
538 gen8_pte_t scratch_pte;
539
540 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
541 I915_CACHE_LLC, true);
542
543 fill_px(vm->dev, pt, scratch_pte);
544 }
545
546 static void gen6_initialize_pt(struct i915_address_space *vm,
547 struct i915_page_table *pt)
548 {
549 gen6_pte_t scratch_pte;
550
551 WARN_ON(px_dma(vm->scratch_page) == 0);
552
553 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
554 I915_CACHE_LLC, true, 0);
555
556 fill32_px(vm->dev, pt, scratch_pte);
557 }
558
559 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
560 {
561 struct i915_page_directory *pd;
562 int ret = -ENOMEM;
563
564 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
565 if (!pd)
566 return ERR_PTR(-ENOMEM);
567
568 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
569 sizeof(*pd->used_pdes), GFP_KERNEL);
570 if (!pd->used_pdes)
571 goto fail_bitmap;
572
573 ret = setup_px(dev, pd);
574 if (ret)
575 goto fail_page_m;
576
577 return pd;
578
579 fail_page_m:
580 kfree(pd->used_pdes);
581 fail_bitmap:
582 kfree(pd);
583
584 return ERR_PTR(ret);
585 }
586
587 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
588 {
589 if (px_page(pd)) {
590 cleanup_px(dev, pd);
591 kfree(pd->used_pdes);
592 kfree(pd);
593 }
594 }
595
596 static void gen8_initialize_pd(struct i915_address_space *vm,
597 struct i915_page_directory *pd)
598 {
599 gen8_pde_t scratch_pde;
600
601 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
602
603 fill_px(vm->dev, pd, scratch_pde);
604 }
605
606 static int __pdp_init(struct drm_device *dev,
607 struct i915_page_directory_pointer *pdp)
608 {
609 size_t pdpes = I915_PDPES_PER_PDP(dev);
610
611 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
612 sizeof(unsigned long),
613 GFP_KERNEL);
614 if (!pdp->used_pdpes)
615 return -ENOMEM;
616
617 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
618 GFP_KERNEL);
619 if (!pdp->page_directory) {
620 kfree(pdp->used_pdpes);
621 /* the PDP might be the statically allocated top level. Keep it
622 * as clean as possible */
623 pdp->used_pdpes = NULL;
624 return -ENOMEM;
625 }
626
627 return 0;
628 }
629
630 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
631 {
632 kfree(pdp->used_pdpes);
633 kfree(pdp->page_directory);
634 pdp->page_directory = NULL;
635 }
636
637 static struct
638 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
639 {
640 struct i915_page_directory_pointer *pdp;
641 int ret = -ENOMEM;
642
643 WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
644
645 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
646 if (!pdp)
647 return ERR_PTR(-ENOMEM);
648
649 ret = __pdp_init(dev, pdp);
650 if (ret)
651 goto fail_bitmap;
652
653 ret = setup_px(dev, pdp);
654 if (ret)
655 goto fail_page_m;
656
657 return pdp;
658
659 fail_page_m:
660 __pdp_fini(pdp);
661 fail_bitmap:
662 kfree(pdp);
663
664 return ERR_PTR(ret);
665 }
666
667 static void free_pdp(struct drm_device *dev,
668 struct i915_page_directory_pointer *pdp)
669 {
670 __pdp_fini(pdp);
671 if (USES_FULL_48BIT_PPGTT(dev)) {
672 cleanup_px(dev, pdp);
673 kfree(pdp);
674 }
675 }
676
677 static void gen8_initialize_pdp(struct i915_address_space *vm,
678 struct i915_page_directory_pointer *pdp)
679 {
680 gen8_ppgtt_pdpe_t scratch_pdpe;
681
682 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
683
684 fill_px(vm->dev, pdp, scratch_pdpe);
685 }
686
687 static void gen8_initialize_pml4(struct i915_address_space *vm,
688 struct i915_pml4 *pml4)
689 {
690 gen8_ppgtt_pml4e_t scratch_pml4e;
691
692 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
693 I915_CACHE_LLC);
694
695 fill_px(vm->dev, pml4, scratch_pml4e);
696 }
697
698 static void
699 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
700 struct i915_page_directory_pointer *pdp,
701 struct i915_page_directory *pd,
702 int index)
703 {
704 gen8_ppgtt_pdpe_t *page_directorypo;
705
706 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
707 return;
708
709 page_directorypo = kmap_px(pdp);
710 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
711 kunmap_px(ppgtt, page_directorypo);
712 }
713
714 static void
715 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
716 struct i915_pml4 *pml4,
717 struct i915_page_directory_pointer *pdp,
718 int index)
719 {
720 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
721
722 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
723 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
724 kunmap_px(ppgtt, pagemap);
725 }
726
727 /* Broadwell Page Directory Pointer Descriptors */
728 static int gen8_write_pdp(struct drm_i915_gem_request *req,
729 unsigned entry,
730 dma_addr_t addr)
731 {
732 struct intel_engine_cs *ring = req->ring;
733 int ret;
734
735 BUG_ON(entry >= 4);
736
737 ret = intel_ring_begin(req, 6);
738 if (ret)
739 return ret;
740
741 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
742 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
743 intel_ring_emit(ring, upper_32_bits(addr));
744 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
745 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
746 intel_ring_emit(ring, lower_32_bits(addr));
747 intel_ring_advance(ring);
748
749 return 0;
750 }
751
752 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
753 struct drm_i915_gem_request *req)
754 {
755 int i, ret;
756
757 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
758 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
759
760 ret = gen8_write_pdp(req, i, pd_daddr);
761 if (ret)
762 return ret;
763 }
764
765 return 0;
766 }
767
768 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
769 struct drm_i915_gem_request *req)
770 {
771 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
772 }
773
774 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
775 struct i915_page_directory_pointer *pdp,
776 uint64_t start,
777 uint64_t length,
778 gen8_pte_t scratch_pte)
779 {
780 struct i915_hw_ppgtt *ppgtt =
781 container_of(vm, struct i915_hw_ppgtt, base);
782 gen8_pte_t *pt_vaddr;
783 unsigned pdpe = gen8_pdpe_index(start);
784 unsigned pde = gen8_pde_index(start);
785 unsigned pte = gen8_pte_index(start);
786 unsigned num_entries = length >> PAGE_SHIFT;
787 unsigned last_pte, i;
788
789 if (WARN_ON(!pdp))
790 return;
791
792 while (num_entries) {
793 struct i915_page_directory *pd;
794 struct i915_page_table *pt;
795
796 if (WARN_ON(!pdp->page_directory[pdpe]))
797 break;
798
799 pd = pdp->page_directory[pdpe];
800
801 if (WARN_ON(!pd->page_table[pde]))
802 break;
803
804 pt = pd->page_table[pde];
805
806 if (WARN_ON(!px_page(pt)))
807 break;
808
809 last_pte = pte + num_entries;
810 if (last_pte > GEN8_PTES)
811 last_pte = GEN8_PTES;
812
813 pt_vaddr = kmap_px(pt);
814
815 for (i = pte; i < last_pte; i++) {
816 pt_vaddr[i] = scratch_pte;
817 num_entries--;
818 }
819
820 kunmap_px(ppgtt, pt_vaddr);
821
822 pte = 0;
823 if (++pde == I915_PDES) {
824 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
825 break;
826 pde = 0;
827 }
828 }
829 }
830
831 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
832 uint64_t start,
833 uint64_t length,
834 bool use_scratch)
835 {
836 struct i915_hw_ppgtt *ppgtt =
837 container_of(vm, struct i915_hw_ppgtt, base);
838 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
839 I915_CACHE_LLC, use_scratch);
840
841 if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
842 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
843 scratch_pte);
844 } else {
845 uint64_t templ4, pml4e;
846 struct i915_page_directory_pointer *pdp;
847
848 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
849 gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
850 scratch_pte);
851 }
852 }
853 }
854
855 #ifdef __NetBSD__
856 static void
857 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
858 struct i915_page_directory_pointer *pdp, bus_dmamap_t dmamap,
859 unsigned *segp, uint64_t start, enum i915_cache_level cache_level)
860 {
861 struct i915_hw_ppgtt *ppgtt =
862 container_of(vm, struct i915_hw_ppgtt, base);
863 gen8_pte_t *pt_vaddr;
864 unsigned pdpe = gen8_pdpe_index(start);
865 unsigned pde = gen8_pde_index(start);
866 unsigned pte = gen8_pte_index(start);
867
868 pt_vaddr = NULL;
869 for (; *segp < dmamap->dm_nsegs; (*segp)++) {
870 KASSERT(dmamap->dm_segs[*segp].ds_len == PAGE_SIZE);
871 if (pt_vaddr == NULL) {
872 struct i915_page_directory *pd =
873 pdp->page_directory[pdpe];
874 struct i915_page_table *pt = pd->page_table[pde];
875 pt_vaddr = kmap_px(pt);
876 }
877 pt_vaddr[pte] = gen8_pte_encode(dmamap->dm_segs[*segp].ds_addr,
878 cache_level, true);
879 if (++pte == GEN8_PTES) {
880 kunmap_px(ppgtt, pt_vaddr);
881 pt_vaddr = NULL;
882 if (++pde == I915_PDES) {
883 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
884 break;
885 pde = 0;
886 }
887 pte = 0;
888 }
889 }
890 if (pt_vaddr)
891 kunmap_px(ppgtt, pt_vaddr);
892 }
893 #else
894 static void
895 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
896 struct i915_page_directory_pointer *pdp,
897 struct sg_page_iter *sg_iter,
898 uint64_t start,
899 enum i915_cache_level cache_level)
900 {
901 struct i915_hw_ppgtt *ppgtt =
902 container_of(vm, struct i915_hw_ppgtt, base);
903 gen8_pte_t *pt_vaddr;
904 unsigned pdpe = gen8_pdpe_index(start);
905 unsigned pde = gen8_pde_index(start);
906 unsigned pte = gen8_pte_index(start);
907
908 pt_vaddr = NULL;
909
910 while (__sg_page_iter_next(sg_iter)) {
911 if (pt_vaddr == NULL) {
912 struct i915_page_directory *pd = pdp->page_directory[pdpe];
913 struct i915_page_table *pt = pd->page_table[pde];
914 pt_vaddr = kmap_px(pt);
915 }
916
917 pt_vaddr[pte] =
918 gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
919 cache_level, true);
920 if (++pte == GEN8_PTES) {
921 kunmap_px(ppgtt, pt_vaddr);
922 pt_vaddr = NULL;
923 if (++pde == I915_PDES) {
924 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
925 break;
926 pde = 0;
927 }
928 pte = 0;
929 }
930 }
931
932 if (pt_vaddr)
933 kunmap_px(ppgtt, pt_vaddr);
934 }
935 #endif
936
937 #ifdef __NetBSD__
938 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
939 bus_dmamap_t dmamap, uint64_t start, enum i915_cache_level cache_level,
940 u32 unused)
941 {
942 struct i915_hw_ppgtt *ppgtt =
943 container_of(vm, struct i915_hw_ppgtt, base);
944 unsigned seg = 0;
945
946 if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
947 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, dmamap, &seg,
948 start, cache_level);
949 } else {
950 struct i915_page_directory_pointer *pdp;
951 uint64_t templ4, pml4e;
952 uint64_t length = dmamap->dm_mapsize;
953
954 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4,
955 pml4e) {
956 gen8_ppgtt_insert_pte_entries(vm, pdp, dmamap, &seg,
957 start, cache_level);
958 }
959 }
960 }
961 #else
962 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
963 struct sg_table *pages,
964 uint64_t start,
965 enum i915_cache_level cache_level,
966 u32 unused)
967 {
968 struct i915_hw_ppgtt *ppgtt =
969 container_of(vm, struct i915_hw_ppgtt, base);
970 struct sg_page_iter sg_iter;
971
972 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
973
974 if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
975 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
976 cache_level);
977 } else {
978 struct i915_page_directory_pointer *pdp;
979 uint64_t templ4, pml4e;
980 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
981
982 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
983 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
984 start, cache_level);
985 }
986 }
987 }
988 #endif
989
990 static void gen8_free_page_tables(struct drm_device *dev,
991 struct i915_page_directory *pd)
992 {
993 int i;
994
995 if (!px_page(pd))
996 return;
997
998 for_each_set_bit(i, pd->used_pdes, I915_PDES) {
999 if (WARN_ON(!pd->page_table[i]))
1000 continue;
1001
1002 free_pt(dev, pd->page_table[i]);
1003 pd->page_table[i] = NULL;
1004 }
1005 }
1006
1007 static int gen8_init_scratch(struct i915_address_space *vm)
1008 {
1009 struct drm_device *dev = vm->dev;
1010
1011 vm->scratch_page = alloc_scratch_page(dev);
1012 if (IS_ERR(vm->scratch_page))
1013 return PTR_ERR(vm->scratch_page);
1014
1015 vm->scratch_pt = alloc_pt(dev);
1016 if (IS_ERR(vm->scratch_pt)) {
1017 free_scratch_page(dev, vm->scratch_page);
1018 return PTR_ERR(vm->scratch_pt);
1019 }
1020
1021 vm->scratch_pd = alloc_pd(dev);
1022 if (IS_ERR(vm->scratch_pd)) {
1023 free_pt(dev, vm->scratch_pt);
1024 free_scratch_page(dev, vm->scratch_page);
1025 return PTR_ERR(vm->scratch_pd);
1026 }
1027
1028 if (USES_FULL_48BIT_PPGTT(dev)) {
1029 vm->scratch_pdp = alloc_pdp(dev);
1030 if (IS_ERR(vm->scratch_pdp)) {
1031 free_pd(dev, vm->scratch_pd);
1032 free_pt(dev, vm->scratch_pt);
1033 free_scratch_page(dev, vm->scratch_page);
1034 return PTR_ERR(vm->scratch_pdp);
1035 }
1036 }
1037
1038 gen8_initialize_pt(vm, vm->scratch_pt);
1039 gen8_initialize_pd(vm, vm->scratch_pd);
1040 if (USES_FULL_48BIT_PPGTT(dev))
1041 gen8_initialize_pdp(vm, vm->scratch_pdp);
1042
1043 return 0;
1044 }
1045
1046 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1047 {
1048 enum vgt_g2v_type msg;
1049 struct drm_device *dev = ppgtt->base.dev;
1050 struct drm_i915_private *dev_priv = dev->dev_private;
1051 unsigned int offset = vgtif_reg(pdp0_lo);
1052 int i;
1053
1054 if (USES_FULL_48BIT_PPGTT(dev)) {
1055 u64 daddr = px_dma(&ppgtt->pml4);
1056
1057 I915_WRITE(offset, lower_32_bits(daddr));
1058 I915_WRITE(offset + 4, upper_32_bits(daddr));
1059
1060 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1061 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1062 } else {
1063 for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
1064 u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1065
1066 I915_WRITE(offset, lower_32_bits(daddr));
1067 I915_WRITE(offset + 4, upper_32_bits(daddr));
1068
1069 offset += 8;
1070 }
1071
1072 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1073 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1074 }
1075
1076 I915_WRITE(vgtif_reg(g2v_notify), msg);
1077
1078 return 0;
1079 }
1080
1081 static void gen8_free_scratch(struct i915_address_space *vm)
1082 {
1083 struct drm_device *dev = vm->dev;
1084
1085 if (USES_FULL_48BIT_PPGTT(dev))
1086 free_pdp(dev, vm->scratch_pdp);
1087 free_pd(dev, vm->scratch_pd);
1088 free_pt(dev, vm->scratch_pt);
1089 free_scratch_page(dev, vm->scratch_page);
1090 }
1091
1092 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
1093 struct i915_page_directory_pointer *pdp)
1094 {
1095 int i;
1096
1097 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
1098 if (WARN_ON(!pdp->page_directory[i]))
1099 continue;
1100
1101 gen8_free_page_tables(dev, pdp->page_directory[i]);
1102 free_pd(dev, pdp->page_directory[i]);
1103 }
1104
1105 free_pdp(dev, pdp);
1106 }
1107
1108 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1109 {
1110 int i;
1111
1112 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
1113 if (WARN_ON(!ppgtt->pml4.pdps[i]))
1114 continue;
1115
1116 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
1117 }
1118
1119 cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
1120 }
1121
1122 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1123 {
1124 struct i915_hw_ppgtt *ppgtt =
1125 container_of(vm, struct i915_hw_ppgtt, base);
1126
1127 if (intel_vgpu_active(vm->dev))
1128 gen8_ppgtt_notify_vgt(ppgtt, false);
1129
1130 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
1131 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
1132 else
1133 gen8_ppgtt_cleanup_4lvl(ppgtt);
1134
1135 gen8_free_scratch(vm);
1136 }
1137
1138 /**
1139 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1140 * @vm: Master vm structure.
1141 * @pd: Page directory for this address range.
1142 * @start: Starting virtual address to begin allocations.
1143 * @length: Size of the allocations.
1144 * @new_pts: Bitmap set by function with new allocations. Likely used by the
1145 * caller to free on error.
1146 *
1147 * Allocate the required number of page tables. Extremely similar to
1148 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1149 * the page directory boundary (instead of the page directory pointer). That
1150 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1151 * possible, and likely that the caller will need to use multiple calls of this
1152 * function to achieve the appropriate allocation.
1153 *
1154 * Return: 0 if success; negative error code otherwise.
1155 */
1156 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1157 struct i915_page_directory *pd,
1158 uint64_t start,
1159 uint64_t length,
1160 unsigned long *new_pts)
1161 {
1162 struct drm_device *dev = vm->dev;
1163 struct i915_page_table *pt;
1164 uint64_t temp;
1165 uint32_t pde;
1166
1167 gen8_for_each_pde(pt, pd, start, length, temp, pde) {
1168 /* Don't reallocate page tables */
1169 if (test_bit(pde, pd->used_pdes)) {
1170 /* Scratch is never allocated this way */
1171 WARN_ON(pt == vm->scratch_pt);
1172 continue;
1173 }
1174
1175 pt = alloc_pt(dev);
1176 if (IS_ERR(pt))
1177 goto unwind_out;
1178
1179 gen8_initialize_pt(vm, pt);
1180 pd->page_table[pde] = pt;
1181 __set_bit(pde, new_pts);
1182 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1183 }
1184
1185 return 0;
1186
1187 unwind_out:
1188 for_each_set_bit(pde, new_pts, I915_PDES)
1189 free_pt(dev, pd->page_table[pde]);
1190
1191 return -ENOMEM;
1192 }
1193
1194 /**
1195 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1196 * @vm: Master vm structure.
1197 * @pdp: Page directory pointer for this address range.
1198 * @start: Starting virtual address to begin allocations.
1199 * @length: Size of the allocations.
1200 * @new_pds: Bitmap set by function with new allocations. Likely used by the
1201 * caller to free on error.
1202 *
1203 * Allocate the required number of page directories starting at the pde index of
1204 * @start, and ending at the pde index @start + @length. This function will skip
1205 * over already allocated page directories within the range, and only allocate
1206 * new ones, setting the appropriate pointer within the pdp as well as the
1207 * correct position in the bitmap @new_pds.
1208 *
1209 * The function will only allocate the pages within the range for a give page
1210 * directory pointer. In other words, if @start + @length straddles a virtually
1211 * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1212 * required by the caller, This is not currently possible, and the BUG in the
1213 * code will prevent it.
1214 *
1215 * Return: 0 if success; negative error code otherwise.
1216 */
1217 static int
1218 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1219 struct i915_page_directory_pointer *pdp,
1220 uint64_t start,
1221 uint64_t length,
1222 unsigned long *new_pds)
1223 {
1224 struct drm_device *dev = vm->dev;
1225 struct i915_page_directory *pd;
1226 uint64_t temp;
1227 uint32_t pdpe;
1228 uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1229
1230 WARN_ON(!bitmap_empty(new_pds, pdpes));
1231
1232 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1233 if (test_bit(pdpe, pdp->used_pdpes))
1234 continue;
1235
1236 pd = alloc_pd(dev);
1237 if (IS_ERR(pd))
1238 goto unwind_out;
1239
1240 gen8_initialize_pd(vm, pd);
1241 pdp->page_directory[pdpe] = pd;
1242 __set_bit(pdpe, new_pds);
1243 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1244 }
1245
1246 return 0;
1247
1248 unwind_out:
1249 for_each_set_bit(pdpe, new_pds, pdpes)
1250 free_pd(dev, pdp->page_directory[pdpe]);
1251
1252 return -ENOMEM;
1253 }
1254
1255 /**
1256 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1257 * @vm: Master vm structure.
1258 * @pml4: Page map level 4 for this address range.
1259 * @start: Starting virtual address to begin allocations.
1260 * @length: Size of the allocations.
1261 * @new_pdps: Bitmap set by function with new allocations. Likely used by the
1262 * caller to free on error.
1263 *
1264 * Allocate the required number of page directory pointers. Extremely similar to
1265 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1266 * The main difference is here we are limited by the pml4 boundary (instead of
1267 * the page directory pointer).
1268 *
1269 * Return: 0 if success; negative error code otherwise.
1270 */
1271 static int
1272 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1273 struct i915_pml4 *pml4,
1274 uint64_t start,
1275 uint64_t length,
1276 unsigned long *new_pdps)
1277 {
1278 struct drm_device *dev = vm->dev;
1279 struct i915_page_directory_pointer *pdp;
1280 uint64_t temp;
1281 uint32_t pml4e;
1282
1283 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1284
1285 gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
1286 if (!test_bit(pml4e, pml4->used_pml4es)) {
1287 pdp = alloc_pdp(dev);
1288 if (IS_ERR(pdp))
1289 goto unwind_out;
1290
1291 gen8_initialize_pdp(vm, pdp);
1292 pml4->pdps[pml4e] = pdp;
1293 __set_bit(pml4e, new_pdps);
1294 trace_i915_page_directory_pointer_entry_alloc(vm,
1295 pml4e,
1296 start,
1297 GEN8_PML4E_SHIFT);
1298 }
1299 }
1300
1301 return 0;
1302
1303 unwind_out:
1304 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1305 free_pdp(dev, pml4->pdps[pml4e]);
1306
1307 return -ENOMEM;
1308 }
1309
1310 static void
1311 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1312 {
1313 kfree(new_pts);
1314 kfree(new_pds);
1315 }
1316
1317 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1318 * of these are based on the number of PDPEs in the system.
1319 */
1320 static
1321 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1322 unsigned long **new_pts,
1323 uint32_t pdpes)
1324 {
1325 unsigned long *pds;
1326 unsigned long *pts;
1327
1328 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1329 if (!pds)
1330 return -ENOMEM;
1331
1332 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1333 GFP_TEMPORARY);
1334 if (!pts)
1335 goto err_out;
1336
1337 *new_pds = pds;
1338 *new_pts = pts;
1339
1340 return 0;
1341
1342 err_out:
1343 free_gen8_temp_bitmaps(pds, pts);
1344 return -ENOMEM;
1345 }
1346
1347 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1348 * the page table structures, we mark them dirty so that
1349 * context switching/execlist queuing code takes extra steps
1350 * to ensure that tlbs are flushed.
1351 */
1352 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1353 {
1354 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1355 }
1356
1357 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1358 struct i915_page_directory_pointer *pdp,
1359 uint64_t start,
1360 uint64_t length)
1361 {
1362 struct i915_hw_ppgtt *ppgtt =
1363 container_of(vm, struct i915_hw_ppgtt, base);
1364 unsigned long *new_page_dirs, *new_page_tables;
1365 struct drm_device *dev = vm->dev;
1366 struct i915_page_directory *pd;
1367 const uint64_t orig_start = start;
1368 const uint64_t orig_length = length;
1369 uint64_t temp;
1370 uint32_t pdpe;
1371 uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1372 int ret;
1373
1374 /* Wrap is never okay since we can only represent 48b, and we don't
1375 * actually use the other side of the canonical address space.
1376 */
1377 if (WARN_ON(start + length < start))
1378 return -ENODEV;
1379
1380 if (WARN_ON(start + length > vm->total))
1381 return -ENODEV;
1382
1383 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1384 if (ret)
1385 return ret;
1386
1387 /* Do the allocations first so we can easily bail out */
1388 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1389 new_page_dirs);
1390 if (ret) {
1391 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1392 return ret;
1393 }
1394
1395 /* For every page directory referenced, allocate page tables */
1396 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1397 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1398 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1399 if (ret)
1400 goto err_out;
1401 }
1402
1403 start = orig_start;
1404 length = orig_length;
1405
1406 /* Allocations have completed successfully, so set the bitmaps, and do
1407 * the mappings. */
1408 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1409 gen8_pde_t *const page_directory = kmap_px(pd);
1410 struct i915_page_table *pt;
1411 uint64_t pd_len = length;
1412 uint64_t pd_start = start;
1413 uint32_t pde;
1414
1415 /* Every pd should be allocated, we just did that above. */
1416 WARN_ON(!pd);
1417
1418 gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1419 /* Same reasoning as pd */
1420 WARN_ON(!pt);
1421 WARN_ON(!pd_len);
1422 WARN_ON(!gen8_pte_count(pd_start, pd_len));
1423
1424 /* Set our used ptes within the page table */
1425 bitmap_set(pt->used_ptes,
1426 gen8_pte_index(pd_start),
1427 gen8_pte_count(pd_start, pd_len));
1428
1429 /* Our pde is now pointing to the pagetable, pt */
1430 __set_bit(pde, pd->used_pdes);
1431
1432 /* Map the PDE to the page table */
1433 page_directory[pde] = gen8_pde_encode(px_dma(pt),
1434 I915_CACHE_LLC);
1435 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1436 gen8_pte_index(start),
1437 gen8_pte_count(start, length),
1438 GEN8_PTES);
1439
1440 /* NB: We haven't yet mapped ptes to pages. At this
1441 * point we're still relying on insert_entries() */
1442 }
1443
1444 kunmap_px(ppgtt, page_directory);
1445 __set_bit(pdpe, pdp->used_pdpes);
1446 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1447 }
1448
1449 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1450 mark_tlbs_dirty(ppgtt);
1451 return 0;
1452
1453 err_out:
1454 while (pdpe--) {
1455 for_each_set_bit(temp, new_page_tables + pdpe *
1456 BITS_TO_LONGS(I915_PDES), I915_PDES)
1457 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1458 }
1459
1460 for_each_set_bit(pdpe, new_page_dirs, pdpes)
1461 free_pd(dev, pdp->page_directory[pdpe]);
1462
1463 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1464 mark_tlbs_dirty(ppgtt);
1465 return ret;
1466 }
1467
1468 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1469 struct i915_pml4 *pml4,
1470 uint64_t start,
1471 uint64_t length)
1472 {
1473 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1474 struct i915_hw_ppgtt *ppgtt =
1475 container_of(vm, struct i915_hw_ppgtt, base);
1476 struct i915_page_directory_pointer *pdp;
1477 uint64_t temp, pml4e;
1478 int ret = 0;
1479
1480 /* Do the pml4 allocations first, so we don't need to track the newly
1481 * allocated tables below the pdp */
1482 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1483
1484 /* The pagedirectory and pagetable allocations are done in the shared 3
1485 * and 4 level code. Just allocate the pdps.
1486 */
1487 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1488 new_pdps);
1489 if (ret)
1490 return ret;
1491
1492 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1493 "The allocation has spanned more than 512GB. "
1494 "It is highly likely this is incorrect.");
1495
1496 gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
1497 WARN_ON(!pdp);
1498
1499 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1500 if (ret)
1501 goto err_out;
1502
1503 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1504 }
1505
1506 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1507 GEN8_PML4ES_PER_PML4);
1508
1509 return 0;
1510
1511 err_out:
1512 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1513 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1514
1515 return ret;
1516 }
1517
1518 static int gen8_alloc_va_range(struct i915_address_space *vm,
1519 uint64_t start, uint64_t length)
1520 {
1521 struct i915_hw_ppgtt *ppgtt =
1522 container_of(vm, struct i915_hw_ppgtt, base);
1523
1524 if (USES_FULL_48BIT_PPGTT(vm->dev))
1525 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1526 else
1527 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1528 }
1529
1530 #ifndef __NetBSD__ /* XXX debugfs */
1531 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1532 uint64_t start, uint64_t length,
1533 gen8_pte_t scratch_pte,
1534 struct seq_file *m)
1535 {
1536 struct i915_page_directory *pd;
1537 uint64_t temp;
1538 uint32_t pdpe;
1539
1540 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1541 struct i915_page_table *pt;
1542 uint64_t pd_len = length;
1543 uint64_t pd_start = start;
1544 uint32_t pde;
1545
1546 if (!test_bit(pdpe, pdp->used_pdpes))
1547 continue;
1548
1549 seq_printf(m, "\tPDPE #%d\n", pdpe);
1550 gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1551 uint32_t pte;
1552 gen8_pte_t *pt_vaddr;
1553
1554 if (!test_bit(pde, pd->used_pdes))
1555 continue;
1556
1557 pt_vaddr = kmap_px(pt);
1558 for (pte = 0; pte < GEN8_PTES; pte += 4) {
1559 uint64_t va =
1560 (pdpe << GEN8_PDPE_SHIFT) |
1561 (pde << GEN8_PDE_SHIFT) |
1562 (pte << GEN8_PTE_SHIFT);
1563 int i;
1564 bool found = false;
1565
1566 for (i = 0; i < 4; i++)
1567 if (pt_vaddr[pte + i] != scratch_pte)
1568 found = true;
1569 if (!found)
1570 continue;
1571
1572 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1573 for (i = 0; i < 4; i++) {
1574 if (pt_vaddr[pte + i] != scratch_pte)
1575 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1576 else
1577 seq_puts(m, " SCRATCH ");
1578 }
1579 seq_puts(m, "\n");
1580 }
1581 /* don't use kunmap_px, it could trigger
1582 * an unnecessary flush.
1583 */
1584 kunmap_atomic(pt_vaddr);
1585 }
1586 }
1587 }
1588
1589 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1590 {
1591 struct i915_address_space *vm = &ppgtt->base;
1592 uint64_t start = ppgtt->base.start;
1593 uint64_t length = ppgtt->base.total;
1594 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1595 I915_CACHE_LLC, true);
1596
1597 if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1598 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1599 } else {
1600 uint64_t templ4, pml4e;
1601 struct i915_pml4 *pml4 = &ppgtt->pml4;
1602 struct i915_page_directory_pointer *pdp;
1603
1604 gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
1605 if (!test_bit(pml4e, pml4->used_pml4es))
1606 continue;
1607
1608 seq_printf(m, " PML4E #%llu\n", pml4e);
1609 gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1610 }
1611 }
1612 }
1613 #endif
1614
1615 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1616 {
1617 unsigned long *new_page_dirs, *new_page_tables;
1618 uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1619 int ret;
1620
1621 /* We allocate temp bitmap for page tables for no gain
1622 * but as this is for init only, lets keep the things simple
1623 */
1624 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1625 if (ret)
1626 return ret;
1627
1628 /* Allocate for all pdps regardless of how the ppgtt
1629 * was defined.
1630 */
1631 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1632 0, 1ULL << 32,
1633 new_page_dirs);
1634 if (!ret)
1635 *ppgtt->pdp.used_pdpes = *new_page_dirs;
1636
1637 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1638
1639 return ret;
1640 }
1641
1642 /*
1643 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1644 * with a net effect resembling a 2-level page table in normal x86 terms. Each
1645 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1646 * space.
1647 *
1648 */
1649 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1650 {
1651 int ret;
1652
1653 ret = gen8_init_scratch(&ppgtt->base);
1654 if (ret)
1655 return ret;
1656
1657 ppgtt->base.start = 0;
1658 ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1659 ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1660 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1661 ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1662 ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1663 ppgtt->base.bind_vma = ppgtt_bind_vma;
1664 #ifndef __NetBSD__ /* XXX debugfs */
1665 ppgtt->debug_dump = gen8_dump_ppgtt;
1666 #endif
1667
1668 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1669 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1670 if (ret)
1671 goto free_scratch;
1672
1673 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1674
1675 ppgtt->base.total = 1ULL << 48;
1676 ppgtt->switch_mm = gen8_48b_mm_switch;
1677 } else {
1678 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1679 if (ret)
1680 goto free_scratch;
1681
1682 ppgtt->base.total = 1ULL << 32;
1683 ppgtt->switch_mm = gen8_legacy_mm_switch;
1684 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1685 0, 0,
1686 GEN8_PML4E_SHIFT);
1687
1688 if (intel_vgpu_active(ppgtt->base.dev)) {
1689 ret = gen8_preallocate_top_level_pdps(ppgtt);
1690 if (ret)
1691 goto free_scratch;
1692 }
1693 }
1694
1695 if (intel_vgpu_active(ppgtt->base.dev))
1696 gen8_ppgtt_notify_vgt(ppgtt, true);
1697
1698 return 0;
1699
1700 free_scratch:
1701 gen8_free_scratch(&ppgtt->base);
1702 return ret;
1703 }
1704
1705 #ifndef __NetBSD__ /* XXX debugfs */
1706 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1707 {
1708 struct i915_address_space *vm = &ppgtt->base;
1709 struct i915_page_table *unused;
1710 gen6_pte_t scratch_pte;
1711 uint32_t pd_entry;
1712 uint32_t pte, pde, temp;
1713 uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1714
1715 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1716 I915_CACHE_LLC, true, 0);
1717
1718 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1719 u32 expected;
1720 gen6_pte_t *pt_vaddr;
1721 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1722 pd_entry = readl(ppgtt->pd_addr + pde);
1723 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1724
1725 if (pd_entry != expected)
1726 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1727 pde,
1728 pd_entry,
1729 expected);
1730 seq_printf(m, "\tPDE: %x\n", pd_entry);
1731
1732 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1733
1734 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1735 unsigned long va =
1736 (pde * PAGE_SIZE * GEN6_PTES) +
1737 (pte * PAGE_SIZE);
1738 int i;
1739 bool found = false;
1740 for (i = 0; i < 4; i++)
1741 if (pt_vaddr[pte + i] != scratch_pte)
1742 found = true;
1743 if (!found)
1744 continue;
1745
1746 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1747 for (i = 0; i < 4; i++) {
1748 if (pt_vaddr[pte + i] != scratch_pte)
1749 seq_printf(m, " %08x", pt_vaddr[pte + i]);
1750 else
1751 seq_puts(m, " SCRATCH ");
1752 }
1753 seq_puts(m, "\n");
1754 }
1755 kunmap_px(ppgtt, pt_vaddr);
1756 }
1757 }
1758 #endif
1759
1760 /* Write pde (index) from the page directory @pd to the page table @pt */
1761 static void gen6_write_pde(struct i915_page_directory *pd,
1762 const int pde, struct i915_page_table *pt)
1763 {
1764 /* Caller needs to make sure the write completes if necessary */
1765 struct i915_hw_ppgtt *ppgtt =
1766 container_of(pd, struct i915_hw_ppgtt, pd);
1767 #ifdef __NetBSD__
1768 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
1769 const bus_space_tag_t bst = dev_priv->gtt.bst;
1770 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
1771 const bus_addr_t pd_base = ppgtt->pd.base.ggtt_offset;
1772 #endif
1773 u32 pd_entry;
1774
1775 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1776 pd_entry |= GEN6_PDE_VALID;
1777
1778 #ifdef __NetBSD__
1779 CTASSERT(sizeof(gen6_pte_t) == 4);
1780 bus_space_write_4(bst, bsh, pd_base + 4*pde, pd_entry);
1781 #else
1782 writel(pd_entry, ppgtt->pd_addr + pde);
1783 #endif
1784 }
1785
1786 /* Write all the page tables found in the ppgtt structure to incrementing page
1787 * directories. */
1788 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1789 struct i915_page_directory *pd,
1790 uint32_t start, uint32_t length)
1791 {
1792 struct i915_page_table *pt;
1793 uint32_t pde, temp;
1794
1795 gen6_for_each_pde(pt, pd, start, length, temp, pde)
1796 gen6_write_pde(pd, pde, pt);
1797
1798 /* Make sure write is complete before other code can use this page
1799 * table. Also require for WC mapped PTEs */
1800 #ifdef __NetBSD__
1801 bus_space_read_4(dev_priv->gtt.bst, dev_priv->gtt.bsh, 0);
1802 #else
1803 readl(dev_priv->gtt.gsm);
1804 #endif
1805 }
1806
1807 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1808 {
1809 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1810
1811 return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1812 }
1813
1814 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1815 struct drm_i915_gem_request *req)
1816 {
1817 struct intel_engine_cs *ring = req->ring;
1818 int ret;
1819
1820 /* NB: TLBs must be flushed and invalidated before a switch */
1821 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1822 if (ret)
1823 return ret;
1824
1825 ret = intel_ring_begin(req, 6);
1826 if (ret)
1827 return ret;
1828
1829 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1830 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1831 intel_ring_emit(ring, PP_DIR_DCLV_2G);
1832 intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1833 intel_ring_emit(ring, get_pd_offset(ppgtt));
1834 intel_ring_emit(ring, MI_NOOP);
1835 intel_ring_advance(ring);
1836
1837 return 0;
1838 }
1839
1840 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1841 struct drm_i915_gem_request *req)
1842 {
1843 struct intel_engine_cs *ring = req->ring;
1844 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1845
1846 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1847 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1848 return 0;
1849 }
1850
1851 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1852 struct drm_i915_gem_request *req)
1853 {
1854 struct intel_engine_cs *ring = req->ring;
1855 int ret;
1856
1857 /* NB: TLBs must be flushed and invalidated before a switch */
1858 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1859 if (ret)
1860 return ret;
1861
1862 ret = intel_ring_begin(req, 6);
1863 if (ret)
1864 return ret;
1865
1866 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1867 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1868 intel_ring_emit(ring, PP_DIR_DCLV_2G);
1869 intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1870 intel_ring_emit(ring, get_pd_offset(ppgtt));
1871 intel_ring_emit(ring, MI_NOOP);
1872 intel_ring_advance(ring);
1873
1874 /* XXX: RCS is the only one to auto invalidate the TLBs? */
1875 if (ring->id != RCS) {
1876 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1877 if (ret)
1878 return ret;
1879 }
1880
1881 return 0;
1882 }
1883
1884 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1885 struct drm_i915_gem_request *req)
1886 {
1887 struct intel_engine_cs *ring = req->ring;
1888 struct drm_device *dev = ppgtt->base.dev;
1889 struct drm_i915_private *dev_priv = dev->dev_private;
1890
1891
1892 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1893 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1894
1895 POSTING_READ(RING_PP_DIR_DCLV(ring));
1896
1897 return 0;
1898 }
1899
1900 static void gen8_ppgtt_enable(struct drm_device *dev)
1901 {
1902 struct drm_i915_private *dev_priv = dev->dev_private;
1903 struct intel_engine_cs *ring;
1904 int j;
1905
1906 for_each_ring(ring, dev_priv, j) {
1907 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1908 I915_WRITE(RING_MODE_GEN7(ring),
1909 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1910 }
1911 }
1912
1913 static void gen7_ppgtt_enable(struct drm_device *dev)
1914 {
1915 struct drm_i915_private *dev_priv = dev->dev_private;
1916 struct intel_engine_cs *ring;
1917 uint32_t ecochk, ecobits;
1918 int i;
1919
1920 ecobits = I915_READ(GAC_ECO_BITS);
1921 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1922
1923 ecochk = I915_READ(GAM_ECOCHK);
1924 if (IS_HASWELL(dev)) {
1925 ecochk |= ECOCHK_PPGTT_WB_HSW;
1926 } else {
1927 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1928 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1929 }
1930 I915_WRITE(GAM_ECOCHK, ecochk);
1931
1932 for_each_ring(ring, dev_priv, i) {
1933 /* GFX_MODE is per-ring on gen7+ */
1934 I915_WRITE(RING_MODE_GEN7(ring),
1935 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1936 }
1937 }
1938
1939 static void gen6_ppgtt_enable(struct drm_device *dev)
1940 {
1941 struct drm_i915_private *dev_priv = dev->dev_private;
1942 uint32_t ecochk, gab_ctl, ecobits;
1943
1944 ecobits = I915_READ(GAC_ECO_BITS);
1945 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1946 ECOBITS_PPGTT_CACHE64B);
1947
1948 gab_ctl = I915_READ(GAB_CTL);
1949 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1950
1951 ecochk = I915_READ(GAM_ECOCHK);
1952 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1953
1954 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1955 }
1956
1957 /* PPGTT support for Sandybdrige/Gen6 and later */
1958 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1959 uint64_t start,
1960 uint64_t length,
1961 bool use_scratch)
1962 {
1963 struct i915_hw_ppgtt *ppgtt =
1964 container_of(vm, struct i915_hw_ppgtt, base);
1965 gen6_pte_t *pt_vaddr, scratch_pte;
1966 unsigned first_entry = start >> PAGE_SHIFT;
1967 unsigned num_entries = length >> PAGE_SHIFT;
1968 unsigned act_pt = first_entry / GEN6_PTES;
1969 unsigned first_pte = first_entry % GEN6_PTES;
1970 unsigned last_pte, i;
1971
1972 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1973 I915_CACHE_LLC, true, 0);
1974
1975 while (num_entries) {
1976 last_pte = first_pte + num_entries;
1977 if (last_pte > GEN6_PTES)
1978 last_pte = GEN6_PTES;
1979
1980 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1981
1982 for (i = first_pte; i < last_pte; i++)
1983 pt_vaddr[i] = scratch_pte;
1984
1985 kunmap_px(ppgtt, pt_vaddr);
1986
1987 num_entries -= last_pte - first_pte;
1988 first_pte = 0;
1989 act_pt++;
1990 }
1991 }
1992
1993 #ifdef __NetBSD__
1994 static void
1995 gen6_ppgtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
1996 uint64_t start, enum i915_cache_level cache_level, uint32_t flags)
1997 {
1998 struct i915_hw_ppgtt *ppgtt =
1999 container_of(vm, struct i915_hw_ppgtt, base);
2000 gen6_pte_t *pt_vaddr;
2001 unsigned first_entry = start >> PAGE_SHIFT;
2002 unsigned act_pt = first_entry / GEN6_PTES;
2003 unsigned act_pte = first_entry % GEN6_PTES;
2004 unsigned seg;
2005
2006 pt_vaddr = NULL;
2007 KASSERT(0 < dmamap->dm_nsegs);
2008 for (seg = 0; seg < dmamap->dm_nsegs; seg++) {
2009 KASSERT(dmamap->dm_segs[seg].ds_len == PAGE_SIZE);
2010 if (pt_vaddr == NULL)
2011 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
2012 pt_vaddr[act_pte] =
2013 vm->pte_encode(dmamap->dm_segs[seg].ds_addr, cache_level,
2014 true, flags);
2015 if (++act_pte == GEN6_PTES) {
2016 kunmap_px(ppgtt, pt_vaddr);
2017 pt_vaddr = NULL;
2018 act_pt++;
2019 act_pte = 0;
2020 }
2021 }
2022 if (pt_vaddr)
2023 kunmap_px(ppgtt, pt_vaddr);
2024 }
2025 #else
2026 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
2027 struct sg_table *pages,
2028 uint64_t start,
2029 enum i915_cache_level cache_level, u32 flags)
2030 {
2031 struct i915_hw_ppgtt *ppgtt =
2032 container_of(vm, struct i915_hw_ppgtt, base);
2033 gen6_pte_t *pt_vaddr;
2034 unsigned first_entry = start >> PAGE_SHIFT;
2035 unsigned act_pt = first_entry / GEN6_PTES;
2036 unsigned act_pte = first_entry % GEN6_PTES;
2037 struct sg_page_iter sg_iter;
2038
2039 pt_vaddr = NULL;
2040 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
2041 if (pt_vaddr == NULL)
2042 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
2043
2044 pt_vaddr[act_pte] =
2045 vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
2046 cache_level, true, flags);
2047
2048 if (++act_pte == GEN6_PTES) {
2049 kunmap_px(ppgtt, pt_vaddr);
2050 pt_vaddr = NULL;
2051 act_pt++;
2052 act_pte = 0;
2053 }
2054 }
2055 if (pt_vaddr)
2056 kunmap_px(ppgtt, pt_vaddr);
2057 }
2058 #endif
2059
2060 static int gen6_alloc_va_range(struct i915_address_space *vm,
2061 uint64_t start_in, uint64_t length_in)
2062 {
2063 DECLARE_BITMAP(new_page_tables, I915_PDES);
2064 struct drm_device *dev = vm->dev;
2065 struct drm_i915_private *dev_priv = dev->dev_private;
2066 struct i915_hw_ppgtt *ppgtt =
2067 container_of(vm, struct i915_hw_ppgtt, base);
2068 struct i915_page_table *pt;
2069 uint32_t start, length, start_save, length_save;
2070 uint32_t pde, temp;
2071 int ret;
2072
2073 if (WARN_ON(start_in + length_in > ppgtt->base.total))
2074 return -ENODEV;
2075
2076 start = start_save = start_in;
2077 length = length_save = length_in;
2078
2079 bitmap_zero(new_page_tables, I915_PDES);
2080
2081 /* The allocation is done in two stages so that we can bail out with
2082 * minimal amount of pain. The first stage finds new page tables that
2083 * need allocation. The second stage marks use ptes within the page
2084 * tables.
2085 */
2086 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
2087 if (pt != vm->scratch_pt) {
2088 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
2089 continue;
2090 }
2091
2092 /* We've already allocated a page table */
2093 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
2094
2095 pt = alloc_pt(dev);
2096 if (IS_ERR(pt)) {
2097 ret = PTR_ERR(pt);
2098 goto unwind_out;
2099 }
2100
2101 gen6_initialize_pt(vm, pt);
2102
2103 ppgtt->pd.page_table[pde] = pt;
2104 __set_bit(pde, new_page_tables);
2105 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
2106 }
2107
2108 start = start_save;
2109 length = length_save;
2110
2111 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
2112 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
2113
2114 bitmap_zero(tmp_bitmap, GEN6_PTES);
2115 bitmap_set(tmp_bitmap, gen6_pte_index(start),
2116 gen6_pte_count(start, length));
2117
2118 if (__test_and_clear_bit(pde, new_page_tables))
2119 gen6_write_pde(&ppgtt->pd, pde, pt);
2120
2121 trace_i915_page_table_entry_map(vm, pde, pt,
2122 gen6_pte_index(start),
2123 gen6_pte_count(start, length),
2124 GEN6_PTES);
2125 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
2126 GEN6_PTES);
2127 }
2128
2129 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
2130
2131 /* Make sure write is complete before other code can use this page
2132 * table. Also require for WC mapped PTEs */
2133 #ifdef __NetBSD__
2134 bus_space_read_4(dev_priv->gtt.bst, dev_priv->gtt.bsh, 0);
2135 #else
2136 readl(dev_priv->gtt.gsm);
2137 #endif
2138
2139 mark_tlbs_dirty(ppgtt);
2140 return 0;
2141
2142 unwind_out:
2143 for_each_set_bit(pde, new_page_tables, I915_PDES) {
2144 struct i915_page_table *pt = ppgtt->pd.page_table[pde];
2145
2146 ppgtt->pd.page_table[pde] = vm->scratch_pt;
2147 free_pt(vm->dev, pt);
2148 }
2149
2150 mark_tlbs_dirty(ppgtt);
2151 return ret;
2152 }
2153
2154 static int gen6_init_scratch(struct i915_address_space *vm)
2155 {
2156 struct drm_device *dev = vm->dev;
2157
2158 vm->scratch_page = alloc_scratch_page(dev);
2159 if (IS_ERR(vm->scratch_page))
2160 return PTR_ERR(vm->scratch_page);
2161
2162 vm->scratch_pt = alloc_pt(dev);
2163 if (IS_ERR(vm->scratch_pt)) {
2164 free_scratch_page(dev, vm->scratch_page);
2165 return PTR_ERR(vm->scratch_pt);
2166 }
2167
2168 gen6_initialize_pt(vm, vm->scratch_pt);
2169
2170 return 0;
2171 }
2172
2173 static void gen6_free_scratch(struct i915_address_space *vm)
2174 {
2175 struct drm_device *dev = vm->dev;
2176
2177 free_pt(dev, vm->scratch_pt);
2178 free_scratch_page(dev, vm->scratch_page);
2179 }
2180
2181 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
2182 {
2183 struct i915_hw_ppgtt *ppgtt =
2184 container_of(vm, struct i915_hw_ppgtt, base);
2185 struct i915_page_table *pt;
2186 uint32_t pde;
2187
2188 drm_mm_remove_node(&ppgtt->node);
2189
2190 gen6_for_all_pdes(pt, ppgtt, pde) {
2191 if (pt != vm->scratch_pt)
2192 free_pt(ppgtt->base.dev, pt);
2193 }
2194
2195 gen6_free_scratch(vm);
2196 }
2197
2198 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
2199 {
2200 struct i915_address_space *vm = &ppgtt->base;
2201 struct drm_device *dev = ppgtt->base.dev;
2202 struct drm_i915_private *dev_priv = dev->dev_private;
2203 bool retried = false;
2204 int ret;
2205
2206 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2207 * allocator works in address space sizes, so it's multiplied by page
2208 * size. We allocate at the top of the GTT to avoid fragmentation.
2209 */
2210 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
2211
2212 ret = gen6_init_scratch(vm);
2213 if (ret)
2214 return ret;
2215
2216 alloc:
2217 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
2218 &ppgtt->node, GEN6_PD_SIZE,
2219 GEN6_PD_ALIGN, 0,
2220 0, dev_priv->gtt.base.total,
2221 DRM_MM_TOPDOWN);
2222 if (ret == -ENOSPC && !retried) {
2223 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
2224 GEN6_PD_SIZE, GEN6_PD_ALIGN,
2225 I915_CACHE_NONE,
2226 0, dev_priv->gtt.base.total,
2227 0);
2228 if (ret)
2229 goto err_out;
2230
2231 retried = true;
2232 goto alloc;
2233 }
2234
2235 if (ret)
2236 goto err_out;
2237
2238
2239 if (ppgtt->node.start < dev_priv->gtt.mappable_end)
2240 DRM_DEBUG("Forced to use aperture for PDEs\n");
2241
2242 return 0;
2243
2244 err_out:
2245 gen6_free_scratch(vm);
2246 return ret;
2247 }
2248
2249 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2250 {
2251 return gen6_ppgtt_allocate_page_directories(ppgtt);
2252 }
2253
2254 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2255 uint64_t start, uint64_t length)
2256 {
2257 struct i915_page_table *unused __unused;
2258 uint32_t pde, temp;
2259
2260 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
2261 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2262 }
2263
2264 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2265 {
2266 struct drm_device *dev = ppgtt->base.dev;
2267 struct drm_i915_private *dev_priv = dev->dev_private;
2268 int ret;
2269
2270 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
2271 if (IS_GEN6(dev)) {
2272 ppgtt->switch_mm = gen6_mm_switch;
2273 } else if (IS_HASWELL(dev)) {
2274 ppgtt->switch_mm = hsw_mm_switch;
2275 } else if (IS_GEN7(dev)) {
2276 ppgtt->switch_mm = gen7_mm_switch;
2277 } else
2278 BUG();
2279
2280 if (intel_vgpu_active(dev))
2281 ppgtt->switch_mm = vgpu_mm_switch;
2282
2283 ret = gen6_ppgtt_alloc(ppgtt);
2284 if (ret)
2285 return ret;
2286
2287 ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2288 ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2289 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2290 ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2291 ppgtt->base.bind_vma = ppgtt_bind_vma;
2292 ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2293 ppgtt->base.start = 0;
2294 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2295 #ifndef __NetBSD__
2296 ppgtt->debug_dump = gen6_dump_ppgtt;
2297 #endif
2298
2299 ppgtt->pd.base.ggtt_offset =
2300 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2301
2302 #ifndef __NetBSD__
2303 ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
2304 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2305 #endif
2306
2307 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2308
2309 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2310
2311 DRM_DEBUG_DRIVER("Allocated pde space (%"PRId64"M) at GTT entry: %"PRIx64"\n",
2312 ppgtt->node.size >> 20,
2313 ppgtt->node.start / PAGE_SIZE);
2314
2315 DRM_DEBUG("Adding PPGTT at offset %x\n",
2316 ppgtt->pd.base.ggtt_offset << 10);
2317
2318 return 0;
2319 }
2320
2321 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2322 {
2323 ppgtt->base.dev = dev;
2324
2325 if (INTEL_INFO(dev)->gen < 8)
2326 return gen6_ppgtt_init(ppgtt);
2327 else
2328 return gen8_ppgtt_init(ppgtt);
2329 }
2330
2331 static void i915_address_space_init(struct i915_address_space *vm,
2332 struct drm_i915_private *dev_priv)
2333 {
2334 drm_mm_init(&vm->mm, vm->start, vm->total);
2335 vm->dev = dev_priv->dev;
2336 INIT_LIST_HEAD(&vm->active_list);
2337 INIT_LIST_HEAD(&vm->inactive_list);
2338 list_add_tail(&vm->global_link, &dev_priv->vm_list);
2339 }
2340
2341 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2342 {
2343 struct drm_i915_private *dev_priv = dev->dev_private;
2344 int ret = 0;
2345
2346 ret = __hw_ppgtt_init(dev, ppgtt);
2347 if (ret == 0) {
2348 kref_init(&ppgtt->ref);
2349 i915_address_space_init(&ppgtt->base, dev_priv);
2350 }
2351
2352 return ret;
2353 }
2354
2355 int i915_ppgtt_init_hw(struct drm_device *dev)
2356 {
2357 /* In the case of execlists, PPGTT is enabled by the context descriptor
2358 * and the PDPs are contained within the context itself. We don't
2359 * need to do anything here. */
2360 if (i915.enable_execlists)
2361 return 0;
2362
2363 if (!USES_PPGTT(dev))
2364 return 0;
2365
2366 if (IS_GEN6(dev))
2367 gen6_ppgtt_enable(dev);
2368 else if (IS_GEN7(dev))
2369 gen7_ppgtt_enable(dev);
2370 else if (INTEL_INFO(dev)->gen >= 8)
2371 gen8_ppgtt_enable(dev);
2372 else
2373 MISSING_CASE(INTEL_INFO(dev)->gen);
2374
2375 return 0;
2376 }
2377
2378 int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
2379 {
2380 struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
2381 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2382
2383 if (i915.enable_execlists)
2384 return 0;
2385
2386 if (!ppgtt)
2387 return 0;
2388
2389 return ppgtt->switch_mm(ppgtt, req);
2390 }
2391
2392 struct i915_hw_ppgtt *
2393 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2394 {
2395 struct i915_hw_ppgtt *ppgtt;
2396 int ret;
2397
2398 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2399 if (!ppgtt)
2400 return ERR_PTR(-ENOMEM);
2401
2402 ret = i915_ppgtt_init(dev, ppgtt);
2403 if (ret) {
2404 kfree(ppgtt);
2405 return ERR_PTR(ret);
2406 }
2407
2408 ppgtt->file_priv = fpriv;
2409
2410 trace_i915_ppgtt_create(&ppgtt->base);
2411
2412 return ppgtt;
2413 }
2414
2415 void i915_ppgtt_release(struct kref *kref)
2416 {
2417 struct i915_hw_ppgtt *ppgtt =
2418 container_of(kref, struct i915_hw_ppgtt, ref);
2419
2420 trace_i915_ppgtt_release(&ppgtt->base);
2421
2422 /* vmas should already be unbound */
2423 WARN_ON(!list_empty(&ppgtt->base.active_list));
2424 WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2425
2426 list_del(&ppgtt->base.global_link);
2427 drm_mm_takedown(&ppgtt->base.mm);
2428
2429 ppgtt->base.cleanup(&ppgtt->base);
2430 kfree(ppgtt);
2431 }
2432
2433 extern int intel_iommu_gfx_mapped;
2434 /* Certain Gen5 chipsets require require idling the GPU before
2435 * unmapping anything from the GTT when VT-d is enabled.
2436 */
2437 static bool needs_idle_maps(struct drm_device *dev)
2438 {
2439 #ifdef CONFIG_INTEL_IOMMU
2440 /* Query intel_iommu to see if we need the workaround. Presumably that
2441 * was loaded first.
2442 */
2443 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2444 return true;
2445 #endif
2446 return false;
2447 }
2448
2449 static bool do_idling(struct drm_i915_private *dev_priv)
2450 {
2451 bool ret = dev_priv->mm.interruptible;
2452
2453 if (unlikely(dev_priv->gtt.do_idle_maps)) {
2454 dev_priv->mm.interruptible = false;
2455 if (i915_gpu_idle(dev_priv->dev)) {
2456 DRM_ERROR("Couldn't idle GPU\n");
2457 /* Wait a bit, in hopes it avoids the hang */
2458 udelay(10);
2459 }
2460 }
2461
2462 return ret;
2463 }
2464
2465 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2466 {
2467 if (unlikely(dev_priv->gtt.do_idle_maps))
2468 dev_priv->mm.interruptible = interruptible;
2469 }
2470
2471 void i915_check_and_clear_faults(struct drm_device *dev)
2472 {
2473 struct drm_i915_private *dev_priv = dev->dev_private;
2474 struct intel_engine_cs *ring;
2475 int i;
2476
2477 if (INTEL_INFO(dev)->gen < 6)
2478 return;
2479
2480 for_each_ring(ring, dev_priv, i) {
2481 u32 fault_reg;
2482 fault_reg = I915_READ(RING_FAULT_REG(ring));
2483 if (fault_reg & RING_FAULT_VALID) {
2484 DRM_DEBUG_DRIVER("Unexpected fault\n"
2485 "\tAddr: 0x%08"PRIx32"\n"
2486 "\tAddress space: %s\n"
2487 "\tSource ID: %d\n"
2488 "\tType: %d\n",
2489 fault_reg & PAGE_MASK,
2490 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2491 RING_FAULT_SRCID(fault_reg),
2492 RING_FAULT_FAULT_TYPE(fault_reg));
2493 I915_WRITE(RING_FAULT_REG(ring),
2494 fault_reg & ~RING_FAULT_VALID);
2495 }
2496 }
2497 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
2498 }
2499
2500 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2501 {
2502 if (INTEL_INFO(dev_priv->dev)->gen < 6) {
2503 intel_gtt_chipset_flush();
2504 } else {
2505 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2506 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2507 }
2508 }
2509
2510 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2511 {
2512 struct drm_i915_private *dev_priv = dev->dev_private;
2513
2514 /* Don't bother messing with faults pre GEN6 as we have little
2515 * documentation supporting that it's a good idea.
2516 */
2517 if (INTEL_INFO(dev)->gen < 6)
2518 return;
2519
2520 i915_check_and_clear_faults(dev);
2521
2522 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
2523 dev_priv->gtt.base.start,
2524 dev_priv->gtt.base.total,
2525 true);
2526
2527 i915_ggtt_flush(dev_priv);
2528 }
2529
2530 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2531 {
2532 #ifdef __NetBSD__
2533 KASSERT(0 < obj->base.size);
2534 /* XXX errno NetBSD->Linux */
2535 return -bus_dmamap_load_pglist(obj->base.dev->dmat, obj->pages,
2536 &obj->pageq, obj->base.size, BUS_DMA_NOWAIT);
2537 #else
2538 if (!dma_map_sg(&obj->base.dev->pdev->dev,
2539 obj->pages->sgl, obj->pages->nents,
2540 PCI_DMA_BIDIRECTIONAL))
2541 return -ENOSPC;
2542
2543 return 0;
2544 #endif
2545 }
2546
2547 #ifdef __NetBSD__
2548 static gen8_pte_t
2549 gen8_get_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i)
2550 {
2551 CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
2552 CTASSERT(sizeof(gen8_pte_t) == 8);
2553 #ifdef _LP64 /* XXX How to detect bus_space_read_8? */
2554 return bus_space_read_8(bst, bsh, 8*i);
2555 #else
2556 /*
2557 * XXX I'm not sure this case can actually happen in practice:
2558 * 32-bit gen8 chipsets?
2559 */
2560 return bus_space_read_4(bst, bsh, 8*i) |
2561 ((uint64_t)bus_space_read_4(bst, bsh, 8*i + 4) << 32);
2562 #endif
2563 }
2564
2565 static inline void
2566 gen8_set_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i,
2567 gen8_pte_t pte)
2568 {
2569 CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
2570 CTASSERT(sizeof(gen8_pte_t) == 8);
2571 #ifdef _LP64 /* XXX How to detect bus_space_write_8? */
2572 bus_space_write_8(bst, bsh, 8*i, pte);
2573 #else
2574 bus_space_write_4(bst, bsh, 8*i, (uint32_t)pte);
2575 bus_space_write_4(bst, bsh, 8*i + 4, (uint32_t)(pte >> 32));
2576 #endif
2577 }
2578 #else
2579 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2580 {
2581 #ifdef writeq
2582 writeq(pte, addr);
2583 #else
2584 iowrite32((u32)pte, addr);
2585 iowrite32(pte >> 32, addr + 4);
2586 #endif
2587 }
2588 #endif
2589
2590 #ifdef __NetBSD__
2591 static void
2592 gen8_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
2593 uint64_t start, enum i915_cache_level level, uint32_t unused_flags)
2594 {
2595 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2596 unsigned first_entry = start >> PAGE_SHIFT;
2597 const bus_space_tag_t bst = dev_priv->gtt.bst;
2598 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2599 unsigned i;
2600
2601 KASSERT(0 < dmamap->dm_nsegs);
2602 for (i = 0; i < dmamap->dm_nsegs; i++) {
2603 KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
2604 gen8_set_pte(bst, bsh, first_entry + i,
2605 gen8_pte_encode(dmamap->dm_segs[i].ds_addr, level, true));
2606 }
2607 if (0 < i) {
2608 /* Posting read. */
2609 WARN_ON(gen8_get_pte(bst, bsh, (first_entry + i - 1))
2610 != gen8_pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
2611 true));
2612 }
2613 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2614 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2615 }
2616 #else
2617 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2618 struct sg_table *st,
2619 uint64_t start,
2620 enum i915_cache_level level, u32 unused)
2621 {
2622 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2623 unsigned first_entry = start >> PAGE_SHIFT;
2624 gen8_pte_t __iomem *gtt_entries =
2625 (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2626 int i = 0;
2627 struct sg_page_iter sg_iter;
2628 dma_addr_t addr = 0; /* shut up gcc */
2629
2630 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2631 addr = sg_dma_address(sg_iter.sg) +
2632 (sg_iter.sg_pgoffset << PAGE_SHIFT);
2633 gen8_set_pte(>t_entries[i],
2634 gen8_pte_encode(addr, level, true));
2635 i++;
2636 }
2637
2638 /*
2639 * XXX: This serves as a posting read to make sure that the PTE has
2640 * actually been updated. There is some concern that even though
2641 * registers and PTEs are within the same BAR that they are potentially
2642 * of NUMA access patterns. Therefore, even with the way we assume
2643 * hardware should work, we must keep this posting read for paranoia.
2644 */
2645 if (i != 0)
2646 WARN_ON(readq(>t_entries[i-1])
2647 != gen8_pte_encode(addr, level, true));
2648
2649 /* This next bit makes the above posting read even more important. We
2650 * want to flush the TLBs only after we're certain all the PTE updates
2651 * have finished.
2652 */
2653 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2654 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2655 }
2656 #endif
2657
2658 /*
2659 * Binds an object into the global gtt with the specified cache level. The object
2660 * will be accessible to the GPU via commands whose operands reference offsets
2661 * within the global GTT as well as accessible by the GPU through the GMADR
2662 * mapped BAR (dev_priv->mm.gtt->gtt).
2663 */
2664 #ifdef __NetBSD__
2665 static void
2666 gen6_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
2667 uint64_t start, enum i915_cache_level level, uint32_t flags)
2668 {
2669 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2670 unsigned first_entry = start >> PAGE_SHIFT;
2671 const bus_space_tag_t bst = dev_priv->gtt.bst;
2672 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2673 unsigned i;
2674
2675 KASSERT(0 < dmamap->dm_nsegs);
2676 for (i = 0; i < dmamap->dm_nsegs; i++) {
2677 KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
2678 CTASSERT(sizeof(gen6_pte_t) == 4);
2679 bus_space_write_4(bst, bsh, 4*(first_entry + i),
2680 vm->pte_encode(dmamap->dm_segs[i].ds_addr, level, true,
2681 flags));
2682 }
2683 if (0 < i) {
2684 /* Posting read. */
2685 WARN_ON(bus_space_read_4(bst, bsh, 4*(first_entry + i - 1))
2686 != vm->pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
2687 true, flags));
2688 }
2689 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2690 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2691 }
2692 #else
2693 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2694 struct sg_table *st,
2695 uint64_t start,
2696 enum i915_cache_level level, u32 flags)
2697 {
2698 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2699 unsigned first_entry = start >> PAGE_SHIFT;
2700 gen6_pte_t __iomem *gtt_entries =
2701 (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2702 int i = 0;
2703 struct sg_page_iter sg_iter;
2704 dma_addr_t addr = 0;
2705
2706 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2707 addr = sg_page_iter_dma_address(&sg_iter);
2708 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]);
2709 i++;
2710 }
2711
2712 /* XXX: This serves as a posting read to make sure that the PTE has
2713 * actually been updated. There is some concern that even though
2714 * registers and PTEs are within the same BAR that they are potentially
2715 * of NUMA access patterns. Therefore, even with the way we assume
2716 * hardware should work, we must keep this posting read for paranoia.
2717 */
2718 if (i != 0) {
2719 unsigned long gtt = readl(>t_entries[i-1]);
2720 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
2721 }
2722
2723 /* This next bit makes the above posting read even more important. We
2724 * want to flush the TLBs only after we're certain all the PTE updates
2725 * have finished.
2726 */
2727 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2728 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2729 }
2730 #endif
2731
2732 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2733 uint64_t start,
2734 uint64_t length,
2735 bool use_scratch)
2736 {
2737 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2738 unsigned first_entry = start >> PAGE_SHIFT;
2739 unsigned num_entries = length >> PAGE_SHIFT;
2740 #ifdef __NetBSD__
2741 const bus_space_tag_t bst = dev_priv->gtt.bst;
2742 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2743 gen8_pte_t scratch_pte;
2744 #else
2745 gen8_pte_t scratch_pte, __iomem *gtt_base =
2746 (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2747 #endif
2748 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2749 int i;
2750
2751 if (WARN(num_entries > max_entries,
2752 "First entry = %d; Num entries = %d (max=%d)\n",
2753 first_entry, num_entries, max_entries))
2754 num_entries = max_entries;
2755
2756 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2757 I915_CACHE_LLC,
2758 use_scratch);
2759 #ifdef __NetBSD__
2760 for (i = 0; i < num_entries; i++)
2761 gen8_set_pte(bst, bsh, first_entry + i, scratch_pte);
2762 (void)gen8_get_pte(bst, bsh, first_entry);
2763 #else
2764 for (i = 0; i < num_entries; i++)
2765 gen8_set_pte(>t_base[i], scratch_pte);
2766 readl(gtt_base);
2767 #endif
2768 }
2769
2770 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2771 uint64_t start,
2772 uint64_t length,
2773 bool use_scratch)
2774 {
2775 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2776 unsigned first_entry = start >> PAGE_SHIFT;
2777 unsigned num_entries = length >> PAGE_SHIFT;
2778 #ifdef __NetBSD__
2779 const bus_space_tag_t bst = dev_priv->gtt.bst;
2780 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2781 gen8_pte_t scratch_pte;
2782 #else
2783 gen6_pte_t scratch_pte, __iomem *gtt_base =
2784 (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2785 #endif
2786 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2787 int i;
2788
2789 if (WARN(num_entries > max_entries,
2790 "First entry = %d; Num entries = %d (max=%d)\n",
2791 first_entry, num_entries, max_entries))
2792 num_entries = max_entries;
2793
2794 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2795 I915_CACHE_LLC, use_scratch, 0);
2796
2797 #ifdef __NetBSD__
2798 CTASSERT(sizeof(gen6_pte_t) == 4);
2799 for (i = 0; i < num_entries; i++)
2800 bus_space_write_4(bst, bsh, 4*(first_entry + i), scratch_pte);
2801 (void)bus_space_read_4(bst, bsh, 4*first_entry);
2802 #else
2803 for (i = 0; i < num_entries; i++)
2804 iowrite32(scratch_pte, >t_base[i]);
2805 readl(gtt_base);
2806 #endif
2807 }
2808
2809 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2810 #ifdef __NetBSD__
2811 bus_dmamap_t pages,
2812 #else
2813 struct sg_table *pages,
2814 #endif
2815 uint64_t start,
2816 enum i915_cache_level cache_level, u32 unused)
2817 {
2818 unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2819 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2820
2821 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2822 }
2823
2824 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2825 uint64_t start,
2826 uint64_t length,
2827 bool unused)
2828 {
2829 unsigned first_entry = start >> PAGE_SHIFT;
2830 unsigned num_entries = length >> PAGE_SHIFT;
2831 intel_gtt_clear_range(first_entry, num_entries);
2832 }
2833
2834 static int ggtt_bind_vma(struct i915_vma *vma,
2835 enum i915_cache_level cache_level,
2836 u32 flags)
2837 {
2838 struct drm_i915_gem_object *obj = vma->obj;
2839 u32 pte_flags = 0;
2840 int ret;
2841
2842 ret = i915_get_ggtt_vma_pages(vma);
2843 if (ret)
2844 return ret;
2845
2846 /* Currently applicable only to VLV */
2847 if (obj->gt_ro)
2848 pte_flags |= PTE_READ_ONLY;
2849
2850 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2851 vma->node.start,
2852 cache_level, pte_flags);
2853
2854 /*
2855 * Without aliasing PPGTT there's no difference between
2856 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2857 * upgrade to both bound if we bind either to avoid double-binding.
2858 */
2859 vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2860
2861 return 0;
2862 }
2863
2864 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2865 enum i915_cache_level cache_level,
2866 u32 flags)
2867 {
2868 struct drm_device *dev = vma->vm->dev;
2869 struct drm_i915_private *dev_priv = dev->dev_private;
2870 struct drm_i915_gem_object *obj = vma->obj;
2871 #ifdef __NetBSD__
2872 bus_dmamap_t pages = obj->pages;
2873 #else
2874 struct sg_table *pages = obj->pages;
2875 #endif
2876 u32 pte_flags = 0;
2877 int ret;
2878
2879 ret = i915_get_ggtt_vma_pages(vma);
2880 if (ret)
2881 return ret;
2882 pages = vma->ggtt_view.pages;
2883
2884 /* Currently applicable only to VLV */
2885 if (obj->gt_ro)
2886 pte_flags |= PTE_READ_ONLY;
2887
2888
2889 if (flags & GLOBAL_BIND) {
2890 vma->vm->insert_entries(vma->vm, pages,
2891 vma->node.start,
2892 cache_level, pte_flags);
2893 }
2894
2895 if (flags & LOCAL_BIND) {
2896 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2897 appgtt->base.insert_entries(&appgtt->base, pages,
2898 vma->node.start,
2899 cache_level, pte_flags);
2900 }
2901
2902 return 0;
2903 }
2904
2905 static void ggtt_unbind_vma(struct i915_vma *vma)
2906 {
2907 struct drm_device *dev = vma->vm->dev;
2908 struct drm_i915_private *dev_priv = dev->dev_private;
2909 struct drm_i915_gem_object *obj = vma->obj;
2910 const uint64_t size = min_t(uint64_t,
2911 obj->base.size,
2912 vma->node.size);
2913
2914 if (vma->bound & GLOBAL_BIND) {
2915 vma->vm->clear_range(vma->vm,
2916 vma->node.start,
2917 size,
2918 true);
2919 }
2920
2921 if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2922 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2923
2924 appgtt->base.clear_range(&appgtt->base,
2925 vma->node.start,
2926 size,
2927 true);
2928 }
2929 }
2930
2931 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2932 {
2933 struct drm_device *dev = obj->base.dev;
2934 struct drm_i915_private *dev_priv = dev->dev_private;
2935 bool interruptible;
2936
2937 interruptible = do_idling(dev_priv);
2938
2939 #ifdef __NetBSD__
2940 bus_dmamap_unload(dev->dmat, obj->pages);
2941 #else
2942 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2943 PCI_DMA_BIDIRECTIONAL);
2944 #endif
2945
2946 undo_idling(dev_priv, interruptible);
2947 }
2948
2949 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2950 unsigned long color,
2951 u64 *start,
2952 u64 *end)
2953 {
2954 if (node->color != color)
2955 *start += 4096;
2956
2957 if (!list_empty(&node->node_list)) {
2958 node = list_entry(node->node_list.next,
2959 struct drm_mm_node,
2960 node_list);
2961 if (node->allocated && node->color != color)
2962 *end -= 4096;
2963 }
2964 }
2965
2966 static int i915_gem_setup_global_gtt(struct drm_device *dev,
2967 u64 start,
2968 u64 mappable_end,
2969 u64 end)
2970 {
2971 /* Let GEM Manage all of the aperture.
2972 *
2973 * However, leave one page at the end still bound to the scratch page.
2974 * There are a number of places where the hardware apparently prefetches
2975 * past the end of the object, and we've seen multiple hangs with the
2976 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2977 * aperture. One page should be enough to keep any prefetching inside
2978 * of the aperture.
2979 */
2980 struct drm_i915_private *dev_priv = dev->dev_private;
2981 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2982 struct drm_mm_node *entry;
2983 struct drm_i915_gem_object *obj;
2984 unsigned long hole_start, hole_end;
2985 int ret;
2986
2987 BUG_ON(mappable_end > end);
2988
2989 ggtt_vm->start = start;
2990
2991 /* Subtract the guard page before address space initialization to
2992 * shrink the range used by drm_mm */
2993 ggtt_vm->total = end - start - PAGE_SIZE;
2994 i915_address_space_init(ggtt_vm, dev_priv);
2995 ggtt_vm->total += PAGE_SIZE;
2996
2997 if (intel_vgpu_active(dev)) {
2998 ret = intel_vgt_balloon(dev);
2999 if (ret)
3000 return ret;
3001 }
3002
3003 if (!HAS_LLC(dev))
3004 ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
3005
3006 /* Mark any preallocated objects as occupied */
3007 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3008 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
3009
3010 DRM_DEBUG_KMS("reserving preallocated space: %"PRIx64" + %zx\n",
3011 i915_gem_obj_ggtt_offset(obj), obj->base.size);
3012
3013 WARN_ON(i915_gem_obj_ggtt_bound(obj));
3014 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
3015 if (ret) {
3016 DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
3017 return ret;
3018 }
3019 vma->bound |= GLOBAL_BIND;
3020 __i915_vma_set_map_and_fenceable(vma);
3021 list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
3022 }
3023
3024 /* Clear any non-preallocated blocks */
3025 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
3026 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
3027 hole_start, hole_end);
3028 ggtt_vm->clear_range(ggtt_vm, hole_start,
3029 hole_end - hole_start, true);
3030 }
3031
3032 /* And finally clear the reserved guard page */
3033 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
3034
3035 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
3036 struct i915_hw_ppgtt *ppgtt;
3037
3038 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
3039 if (!ppgtt)
3040 return -ENOMEM;
3041
3042 ret = __hw_ppgtt_init(dev, ppgtt);
3043 if (ret) {
3044 ppgtt->base.cleanup(&ppgtt->base);
3045 kfree(ppgtt);
3046 return ret;
3047 }
3048
3049 if (ppgtt->base.allocate_va_range)
3050 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
3051 ppgtt->base.total);
3052 if (ret) {
3053 ppgtt->base.cleanup(&ppgtt->base);
3054 kfree(ppgtt);
3055 return ret;
3056 }
3057
3058 ppgtt->base.clear_range(&ppgtt->base,
3059 ppgtt->base.start,
3060 ppgtt->base.total,
3061 true);
3062
3063 dev_priv->mm.aliasing_ppgtt = ppgtt;
3064 WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
3065 dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
3066 }
3067
3068 return 0;
3069 }
3070
3071 void i915_gem_init_global_gtt(struct drm_device *dev)
3072 {
3073 struct drm_i915_private *dev_priv = dev->dev_private;
3074 u64 gtt_size, mappable_size;
3075
3076 gtt_size = dev_priv->gtt.base.total;
3077 mappable_size = dev_priv->gtt.mappable_end;
3078
3079 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
3080 }
3081
3082 void i915_global_gtt_cleanup(struct drm_device *dev)
3083 {
3084 struct drm_i915_private *dev_priv = dev->dev_private;
3085 struct i915_address_space *vm = &dev_priv->gtt.base;
3086
3087 if (dev_priv->mm.aliasing_ppgtt) {
3088 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
3089
3090 ppgtt->base.cleanup(&ppgtt->base);
3091 kfree(ppgtt);
3092 }
3093
3094 if (drm_mm_initialized(&vm->mm)) {
3095 if (intel_vgpu_active(dev))
3096 intel_vgt_deballoon();
3097
3098 drm_mm_takedown(&vm->mm);
3099 list_del(&vm->global_link);
3100 }
3101
3102 vm->cleanup(vm);
3103 }
3104
3105 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
3106 {
3107 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
3108 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
3109 return snb_gmch_ctl << 20;
3110 }
3111
3112 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
3113 {
3114 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
3115 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
3116 if (bdw_gmch_ctl)
3117 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
3118
3119 #ifdef CONFIG_X86_32
3120 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
3121 if (bdw_gmch_ctl > 4)
3122 bdw_gmch_ctl = 4;
3123 #endif
3124
3125 return bdw_gmch_ctl << 20;
3126 }
3127
3128 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
3129 {
3130 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
3131 gmch_ctrl &= SNB_GMCH_GGMS_MASK;
3132
3133 if (gmch_ctrl)
3134 return 1 << (20 + gmch_ctrl);
3135
3136 return 0;
3137 }
3138
3139 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
3140 {
3141 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
3142 snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
3143 return snb_gmch_ctl << 25; /* 32 MB units */
3144 }
3145
3146 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
3147 {
3148 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
3149 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
3150 return bdw_gmch_ctl << 25; /* 32 MB units */
3151 }
3152
3153 static size_t chv_get_stolen_size(u16 gmch_ctrl)
3154 {
3155 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
3156 gmch_ctrl &= SNB_GMCH_GMS_MASK;
3157
3158 /*
3159 * 0x0 to 0x10: 32MB increments starting at 0MB
3160 * 0x11 to 0x16: 4MB increments starting at 8MB
3161 * 0x17 to 0x1d: 4MB increments start at 36MB
3162 */
3163 if (gmch_ctrl < 0x11)
3164 return gmch_ctrl << 25;
3165 else if (gmch_ctrl < 0x17)
3166 return (gmch_ctrl - 0x11 + 2) << 22;
3167 else
3168 return (gmch_ctrl - 0x17 + 9) << 22;
3169 }
3170
3171 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
3172 {
3173 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
3174 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
3175
3176 if (gen9_gmch_ctl < 0xf0)
3177 return gen9_gmch_ctl << 25; /* 32 MB units */
3178 else
3179 /* 4MB increments starting at 0xf0 for 4MB */
3180 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
3181 }
3182
3183 static int ggtt_probe_common(struct drm_device *dev,
3184 size_t gtt_size)
3185 {
3186 struct drm_i915_private *dev_priv = dev->dev_private;
3187 struct i915_page_scratch *scratch_page;
3188 phys_addr_t gtt_phys_addr;
3189
3190 /* For Modern GENs the PTEs and register space are split in the BAR */
3191 gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
3192 (pci_resource_len(dev->pdev, 0) / 2);
3193
3194 #ifdef __NetBSD__
3195 int ret;
3196 dev_priv->gtt.bst = dev->pdev->pd_pa.pa_memt;
3197 /* XXX errno NetBSD->Linux */
3198 ret = -bus_space_map(dev_priv->gtt.bst, gtt_phys_addr, gtt_size,
3199 IS_BROXTON(dev) ? 0 : BUS_SPACE_MAP_PREFETCHABLE,
3200 &dev_priv->gtt.bsh);
3201 if (ret) {
3202 DRM_ERROR("Failed to map the graphics translation table: %d\n",
3203 ret);
3204 return ret;
3205 }
3206 dev_priv->gtt.size = gtt_size;
3207 #else
3208 /*
3209 * On BXT writes larger than 64 bit to the GTT pagetable range will be
3210 * dropped. For WC mappings in general we have 64 byte burst writes
3211 * when the WC buffer is flushed, so we can't use it, but have to
3212 * resort to an uncached mapping. The WC issue is easily caught by the
3213 * readback check when writing GTT PTE entries.
3214 */
3215 if (IS_BROXTON(dev))
3216 dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
3217 else
3218 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
3219 if (!dev_priv->gtt.gsm) {
3220 DRM_ERROR("Failed to map the gtt page table\n");
3221 return -ENOMEM;
3222 }
3223 #endif
3224
3225 scratch_page = alloc_scratch_page(dev);
3226 if (IS_ERR(scratch_page)) {
3227 DRM_ERROR("Scratch setup failed\n");
3228 /* iounmap will also get called at remove, but meh */
3229 #ifdef __NetBSD__
3230 bus_space_unmap(dev_priv->gtt.bst, dev_priv->gtt.bsh,
3231 dev_priv->gtt.size);
3232 #else
3233 iounmap(dev_priv->gtt.gsm);
3234 #endif
3235 return PTR_ERR(scratch_page);
3236 }
3237
3238 dev_priv->gtt.base.scratch_page = scratch_page;
3239
3240 return 0;
3241 }
3242
3243 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3244 * bits. When using advanced contexts each context stores its own PAT, but
3245 * writing this data shouldn't be harmful even in those cases. */
3246 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
3247 {
3248 uint64_t pat;
3249
3250 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
3251 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
3252 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
3253 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
3254 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
3255 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
3256 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
3257 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3258
3259 if (!USES_PPGTT(dev_priv->dev))
3260 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3261 * so RTL will always use the value corresponding to
3262 * pat_sel = 000".
3263 * So let's disable cache for GGTT to avoid screen corruptions.
3264 * MOCS still can be used though.
3265 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3266 * before this patch, i.e. the same uncached + snooping access
3267 * like on gen6/7 seems to be in effect.
3268 * - So this just fixes blitter/render access. Again it looks
3269 * like it's not just uncached access, but uncached + snooping.
3270 * So we can still hold onto all our assumptions wrt cpu
3271 * clflushing on LLC machines.
3272 */
3273 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
3274
3275 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
3276 * write would work. */
3277 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3278 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3279 }
3280
3281 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3282 {
3283 uint64_t pat;
3284
3285 /*
3286 * Map WB on BDW to snooped on CHV.
3287 *
3288 * Only the snoop bit has meaning for CHV, the rest is
3289 * ignored.
3290 *
3291 * The hardware will never snoop for certain types of accesses:
3292 * - CPU GTT (GMADR->GGTT->no snoop->memory)
3293 * - PPGTT page tables
3294 * - some other special cycles
3295 *
3296 * As with BDW, we also need to consider the following for GT accesses:
3297 * "For GGTT, there is NO pat_sel[2:0] from the entry,
3298 * so RTL will always use the value corresponding to
3299 * pat_sel = 000".
3300 * Which means we must set the snoop bit in PAT entry 0
3301 * in order to keep the global status page working.
3302 */
3303 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3304 GEN8_PPAT(1, 0) |
3305 GEN8_PPAT(2, 0) |
3306 GEN8_PPAT(3, 0) |
3307 GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3308 GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3309 GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3310 GEN8_PPAT(7, CHV_PPAT_SNOOP);
3311
3312 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3313 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3314 }
3315
3316 static int gen8_gmch_probe(struct drm_device *dev,
3317 u64 *gtt_total,
3318 size_t *stolen,
3319 phys_addr_t *mappable_base,
3320 u64 *mappable_end)
3321 {
3322 struct drm_i915_private *dev_priv = dev->dev_private;
3323 u64 gtt_size;
3324 u16 snb_gmch_ctl;
3325 int ret;
3326
3327 /* TODO: We're not aware of mappable constraints on gen8 yet */
3328 *mappable_base = pci_resource_start(dev->pdev, 2);
3329 *mappable_end = pci_resource_len(dev->pdev, 2);
3330
3331 #ifndef __NetBSD__
3332 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3333 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3334 #endif
3335
3336 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3337
3338 if (INTEL_INFO(dev)->gen >= 9) {
3339 *stolen = gen9_get_stolen_size(snb_gmch_ctl);
3340 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3341 } else if (IS_CHERRYVIEW(dev)) {
3342 *stolen = chv_get_stolen_size(snb_gmch_ctl);
3343 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
3344 } else {
3345 *stolen = gen8_get_stolen_size(snb_gmch_ctl);
3346 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3347 }
3348
3349 *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3350
3351 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3352 chv_setup_private_ppat(dev_priv);
3353 else
3354 bdw_setup_private_ppat(dev_priv);
3355
3356 ret = ggtt_probe_common(dev, gtt_size);
3357
3358 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
3359 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
3360 dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3361 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3362
3363 /* XXX 39-bit addresses? Really? See pci_set_dma_mask above... */
3364 dev_priv->gtt.max_paddr = __BITS(38, 0);
3365
3366 return ret;
3367 }
3368
3369 static int gen6_gmch_probe(struct drm_device *dev,
3370 u64 *gtt_total,
3371 size_t *stolen,
3372 phys_addr_t *mappable_base,
3373 u64 *mappable_end)
3374 {
3375 struct drm_i915_private *dev_priv = dev->dev_private;
3376 unsigned int gtt_size;
3377 u16 snb_gmch_ctl;
3378 int ret;
3379
3380 *mappable_base = pci_resource_start(dev->pdev, 2);
3381 *mappable_end = pci_resource_len(dev->pdev, 2);
3382
3383 /* 64/512MB is the current min/max we actually know of, but this is just
3384 * a coarse sanity check.
3385 */
3386 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
3387 DRM_ERROR("Unknown GMADR size (%"PRIx64")\n",
3388 dev_priv->gtt.mappable_end);
3389 return -ENXIO;
3390 }
3391
3392 #ifndef __NetBSD__
3393 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3394 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3395 #endif
3396 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3397
3398 *stolen = gen6_get_stolen_size(snb_gmch_ctl);
3399
3400 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
3401 *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3402
3403 ret = ggtt_probe_common(dev, gtt_size);
3404
3405 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
3406 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
3407 dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3408 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3409
3410 dev_priv->gtt.max_paddr = __BITS(39, 0);
3411
3412 return ret;
3413 }
3414
3415 static void gen6_gmch_remove(struct i915_address_space *vm)
3416 {
3417 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
3418
3419 #ifdef __NetBSD__
3420 bus_space_unmap(gtt->bst, gtt->bsh, gtt->size);
3421 #else
3422 iounmap(gtt->gsm);
3423 #endif
3424 free_scratch_page(vm->dev, vm->scratch_page);
3425 }
3426
3427 static int i915_gmch_probe(struct drm_device *dev,
3428 u64 *gtt_total,
3429 size_t *stolen,
3430 phys_addr_t *mappable_base,
3431 u64 *mappable_end)
3432 {
3433 struct drm_i915_private *dev_priv = dev->dev_private;
3434 int ret;
3435
3436 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
3437 if (!ret) {
3438 DRM_ERROR("failed to set up gmch\n");
3439 return -EIO;
3440 }
3441
3442 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
3443
3444 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
3445 dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
3446 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
3447 dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3448 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3449
3450 if (unlikely(dev_priv->gtt.do_idle_maps))
3451 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3452
3453 if (INTEL_INFO(dev)->gen <= 2)
3454 dev_priv->gtt.max_paddr = __BITS(29, 0);
3455 else if ((INTEL_INFO(dev)->gen <= 3) ||
3456 IS_BROADWATER(dev) || IS_CRESTLINE(dev))
3457 dev_priv->gtt.max_paddr = __BITS(31, 0);
3458 else if (INTEL_INFO(dev)->gen <= 5)
3459 dev_priv->gtt.max_paddr = __BITS(35, 0);
3460 else
3461 dev_priv->gtt.max_paddr = __BITS(39, 0);
3462
3463 return 0;
3464 }
3465
3466 static void i915_gmch_remove(struct i915_address_space *vm)
3467 {
3468 intel_gmch_remove();
3469 }
3470
3471 int i915_gem_gtt_init(struct drm_device *dev)
3472 {
3473 struct drm_i915_private *dev_priv = dev->dev_private;
3474 struct i915_gtt *gtt = &dev_priv->gtt;
3475 int ret;
3476
3477 if (INTEL_INFO(dev)->gen <= 5) {
3478 gtt->gtt_probe = i915_gmch_probe;
3479 gtt->base.cleanup = i915_gmch_remove;
3480 } else if (INTEL_INFO(dev)->gen < 8) {
3481 gtt->gtt_probe = gen6_gmch_probe;
3482 gtt->base.cleanup = gen6_gmch_remove;
3483 if (IS_HASWELL(dev) && dev_priv->ellc_size)
3484 gtt->base.pte_encode = iris_pte_encode;
3485 else if (IS_HASWELL(dev))
3486 gtt->base.pte_encode = hsw_pte_encode;
3487 else if (IS_VALLEYVIEW(dev))
3488 gtt->base.pte_encode = byt_pte_encode;
3489 else if (INTEL_INFO(dev)->gen >= 7)
3490 gtt->base.pte_encode = ivb_pte_encode;
3491 else
3492 gtt->base.pte_encode = snb_pte_encode;
3493 } else {
3494 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
3495 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
3496 }
3497
3498 gtt->base.dev = dev;
3499
3500 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
3501 >t->mappable_base, >t->mappable_end);
3502 if (ret)
3503 return ret;
3504
3505 #ifdef __NetBSD__
3506 dev_priv->gtt.pgfl = x86_select_freelist(dev_priv->gtt.max_paddr);
3507 ret = drm_limit_dma_space(dev, 0, dev_priv->gtt.max_paddr);
3508 if (ret) {
3509 DRM_ERROR("Unable to limit DMA paddr allocations: %d!\n", ret);
3510 gtt->base.cleanup(>t->base);
3511 return ret;
3512 }
3513 #endif
3514
3515 /* GMADR is the PCI mmio aperture into the global GTT. */
3516 DRM_INFO("Memory usable by graphics device = %"PRIu64"M\n",
3517 gtt->base.total >> 20);
3518 DRM_DEBUG_DRIVER("GMADR size = %"PRId64"M\n", gtt->mappable_end >> 20);
3519 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
3520 #ifdef CONFIG_INTEL_IOMMU
3521 if (intel_iommu_gfx_mapped)
3522 DRM_INFO("VT-d active for gfx access\n");
3523 #endif
3524 /*
3525 * i915.enable_ppgtt is read-only, so do an early pass to validate the
3526 * user's requested state against the hardware/driver capabilities. We
3527 * do this now so that we can print out any log messages once rather
3528 * than every time we check intel_enable_ppgtt().
3529 */
3530 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
3531 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
3532
3533 return 0;
3534 }
3535
3536 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3537 {
3538 struct drm_i915_private *dev_priv = dev->dev_private;
3539 struct drm_i915_gem_object *obj;
3540 struct i915_address_space *vm;
3541 struct i915_vma *vma;
3542 bool flush;
3543
3544 i915_check_and_clear_faults(dev);
3545
3546 /* First fill our portion of the GTT with scratch pages */
3547 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
3548 dev_priv->gtt.base.start,
3549 dev_priv->gtt.base.total,
3550 true);
3551
3552 /* Cache flush objects bound into GGTT and rebind them. */
3553 vm = &dev_priv->gtt.base;
3554 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3555 flush = false;
3556 list_for_each_entry(vma, &obj->vma_list, vma_link) {
3557 if (vma->vm != vm)
3558 continue;
3559
3560 WARN_ON(i915_vma_bind(vma, obj->cache_level,
3561 PIN_UPDATE));
3562
3563 flush = true;
3564 }
3565
3566 if (flush)
3567 i915_gem_clflush_object(obj, obj->pin_display);
3568 }
3569
3570 if (INTEL_INFO(dev)->gen >= 8) {
3571 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3572 chv_setup_private_ppat(dev_priv);
3573 else
3574 bdw_setup_private_ppat(dev_priv);
3575
3576 return;
3577 }
3578
3579 if (USES_PPGTT(dev)) {
3580 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3581 /* TODO: Perhaps it shouldn't be gen6 specific */
3582
3583 struct i915_hw_ppgtt *ppgtt =
3584 container_of(vm, struct i915_hw_ppgtt,
3585 base);
3586
3587 if (i915_is_ggtt(vm))
3588 ppgtt = dev_priv->mm.aliasing_ppgtt;
3589
3590 gen6_write_page_range(dev_priv, &ppgtt->pd,
3591 0, ppgtt->base.total);
3592 }
3593 }
3594
3595 i915_ggtt_flush(dev_priv);
3596 }
3597
3598 static struct i915_vma *
3599 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3600 struct i915_address_space *vm,
3601 const struct i915_ggtt_view *ggtt_view)
3602 {
3603 struct i915_vma *vma;
3604
3605 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3606 return ERR_PTR(-EINVAL);
3607
3608 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3609 if (vma == NULL)
3610 return ERR_PTR(-ENOMEM);
3611
3612 INIT_LIST_HEAD(&vma->vma_link);
3613 INIT_LIST_HEAD(&vma->mm_list);
3614 INIT_LIST_HEAD(&vma->exec_list);
3615 vma->vm = vm;
3616 vma->obj = obj;
3617
3618 if (i915_is_ggtt(vm))
3619 vma->ggtt_view = *ggtt_view;
3620
3621 list_add_tail(&vma->vma_link, &obj->vma_list);
3622 if (!i915_is_ggtt(vm))
3623 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3624
3625 return vma;
3626 }
3627
3628 struct i915_vma *
3629 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3630 struct i915_address_space *vm)
3631 {
3632 struct i915_vma *vma;
3633
3634 vma = i915_gem_obj_to_vma(obj, vm);
3635 if (!vma)
3636 vma = __i915_gem_vma_create(obj, vm,
3637 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3638
3639 return vma;
3640 }
3641
3642 struct i915_vma *
3643 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3644 const struct i915_ggtt_view *view)
3645 {
3646 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
3647 struct i915_vma *vma;
3648
3649 if (WARN_ON(!view))
3650 return ERR_PTR(-EINVAL);
3651
3652 vma = i915_gem_obj_to_ggtt_view(obj, view);
3653
3654 if (IS_ERR(vma))
3655 return vma;
3656
3657 if (!vma)
3658 vma = __i915_gem_vma_create(obj, ggtt, view);
3659
3660 return vma;
3661
3662 }
3663
3664 #ifndef __NetBSD__
3665 static struct scatterlist *
3666 rotate_pages(dma_addr_t *in, unsigned int offset,
3667 unsigned int width, unsigned int height,
3668 struct sg_table *st, struct scatterlist *sg)
3669 {
3670 unsigned int column, row;
3671 unsigned int src_idx;
3672
3673 if (!sg) {
3674 st->nents = 0;
3675 sg = st->sgl;
3676 }
3677
3678 for (column = 0; column < width; column++) {
3679 src_idx = width * (height - 1) + column;
3680 for (row = 0; row < height; row++) {
3681 st->nents++;
3682 /* We don't need the pages, but need to initialize
3683 * the entries so the sg list can be happily traversed.
3684 * The only thing we need are DMA addresses.
3685 */
3686 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3687 sg_dma_address(sg) = in[offset + src_idx];
3688 sg_dma_len(sg) = PAGE_SIZE;
3689 sg = sg_next(sg);
3690 src_idx -= width;
3691 }
3692 }
3693
3694 return sg;
3695 }
3696
3697 static struct sg_table *
3698 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
3699 struct drm_i915_gem_object *obj)
3700 {
3701 struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
3702 unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
3703 unsigned int size_pages_uv;
3704 struct sg_page_iter sg_iter;
3705 unsigned long i;
3706 dma_addr_t *page_addr_list;
3707 struct sg_table *st;
3708 unsigned int uv_start_page;
3709 struct scatterlist *sg;
3710 int ret = -ENOMEM;
3711
3712 /* Allocate a temporary list of source pages for random access. */
3713 page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
3714 sizeof(dma_addr_t));
3715 if (!page_addr_list)
3716 return ERR_PTR(ret);
3717
3718 /* Account for UV plane with NV12. */
3719 if (rot_info->pixel_format == DRM_FORMAT_NV12)
3720 size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
3721 else
3722 size_pages_uv = 0;
3723
3724 /* Allocate target SG list. */
3725 st = kmalloc(sizeof(*st), GFP_KERNEL);
3726 if (!st)
3727 goto err_st_alloc;
3728
3729 ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3730 if (ret)
3731 goto err_sg_alloc;
3732
3733 /* Populate source page list from the object. */
3734 i = 0;
3735 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
3736 page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
3737 i++;
3738 }
3739
3740 /* Rotate the pages. */
3741 sg = rotate_pages(page_addr_list, 0,
3742 rot_info->width_pages, rot_info->height_pages,
3743 st, NULL);
3744
3745 /* Append the UV plane if NV12. */
3746 if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3747 uv_start_page = size_pages;
3748
3749 /* Check for tile-row un-alignment. */
3750 if (offset_in_page(rot_info->uv_offset))
3751 uv_start_page--;
3752
3753 rot_info->uv_start_page = uv_start_page;
3754
3755 rotate_pages(page_addr_list, uv_start_page,
3756 rot_info->width_pages_uv,
3757 rot_info->height_pages_uv,
3758 st, sg);
3759 }
3760
3761 DRM_DEBUG_KMS(
3762 "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
3763 obj->base.size, rot_info->pitch, rot_info->height,
3764 rot_info->pixel_format, rot_info->width_pages,
3765 rot_info->height_pages, size_pages + size_pages_uv,
3766 size_pages);
3767
3768 drm_free_large(page_addr_list);
3769
3770 return st;
3771
3772 err_sg_alloc:
3773 kfree(st);
3774 err_st_alloc:
3775 drm_free_large(page_addr_list);
3776
3777 DRM_DEBUG_KMS(
3778 "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
3779 obj->base.size, ret, rot_info->pitch, rot_info->height,
3780 rot_info->pixel_format, rot_info->width_pages,
3781 rot_info->height_pages, size_pages + size_pages_uv,
3782 size_pages);
3783 return ERR_PTR(ret);
3784 }
3785
3786 static struct sg_table *
3787 intel_partial_pages(const struct i915_ggtt_view *view,
3788 struct drm_i915_gem_object *obj)
3789 {
3790 struct sg_table *st;
3791 struct scatterlist *sg;
3792 struct sg_page_iter obj_sg_iter;
3793 int ret = -ENOMEM;
3794
3795 st = kmalloc(sizeof(*st), GFP_KERNEL);
3796 if (!st)
3797 goto err_st_alloc;
3798
3799 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3800 if (ret)
3801 goto err_sg_alloc;
3802
3803 sg = st->sgl;
3804 st->nents = 0;
3805 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3806 view->params.partial.offset)
3807 {
3808 if (st->nents >= view->params.partial.size)
3809 break;
3810
3811 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3812 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3813 sg_dma_len(sg) = PAGE_SIZE;
3814
3815 sg = sg_next(sg);
3816 st->nents++;
3817 }
3818
3819 return st;
3820
3821 err_sg_alloc:
3822 kfree(st);
3823 err_st_alloc:
3824 return ERR_PTR(ret);
3825 }
3826 #endif
3827
3828 static int
3829 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3830 {
3831 int ret = 0;
3832
3833 if (vma->ggtt_view.pages)
3834 return 0;
3835
3836 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3837 vma->ggtt_view.pages = vma->obj->pages;
3838 #ifndef __NetBSD__
3839 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3840 vma->ggtt_view.pages =
3841 intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
3842 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3843 vma->ggtt_view.pages =
3844 intel_partial_pages(&vma->ggtt_view, vma->obj);
3845 #endif
3846 else
3847 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3848 vma->ggtt_view.type);
3849
3850 if (!vma->ggtt_view.pages) {
3851 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3852 vma->ggtt_view.type);
3853 ret = -EINVAL;
3854 } else if (IS_ERR(vma->ggtt_view.pages)) {
3855 ret = PTR_ERR(vma->ggtt_view.pages);
3856 vma->ggtt_view.pages = NULL;
3857 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3858 vma->ggtt_view.type, ret);
3859 }
3860
3861 return ret;
3862 }
3863
3864 /**
3865 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3866 * @vma: VMA to map
3867 * @cache_level: mapping cache level
3868 * @flags: flags like global or local mapping
3869 *
3870 * DMA addresses are taken from the scatter-gather table of this object (or of
3871 * this VMA in case of non-default GGTT views) and PTE entries set up.
3872 * Note that DMA addresses are also the only part of the SG table we care about.
3873 */
3874 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3875 u32 flags)
3876 {
3877 int ret;
3878 u32 bind_flags;
3879
3880 if (WARN_ON(flags == 0))
3881 return -EINVAL;
3882
3883 bind_flags = 0;
3884 if (flags & PIN_GLOBAL)
3885 bind_flags |= GLOBAL_BIND;
3886 if (flags & PIN_USER)
3887 bind_flags |= LOCAL_BIND;
3888
3889 if (flags & PIN_UPDATE)
3890 bind_flags |= vma->bound;
3891 else
3892 bind_flags &= ~vma->bound;
3893
3894 if (bind_flags == 0)
3895 return 0;
3896
3897 if (vma->bound == 0 && vma->vm->allocate_va_range) {
3898 trace_i915_va_alloc(vma->vm,
3899 vma->node.start,
3900 vma->node.size,
3901 VM_TO_TRACE_NAME(vma->vm));
3902
3903 /* XXX: i915_vma_pin() will fix this +- hack */
3904 vma->pin_count++;
3905 ret = vma->vm->allocate_va_range(vma->vm,
3906 vma->node.start,
3907 vma->node.size);
3908 vma->pin_count--;
3909 if (ret)
3910 return ret;
3911 }
3912
3913 ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3914 if (ret)
3915 return ret;
3916
3917 vma->bound |= bind_flags;
3918
3919 return 0;
3920 }
3921
3922 /**
3923 * i915_ggtt_view_size - Get the size of a GGTT view.
3924 * @obj: Object the view is of.
3925 * @view: The view in question.
3926 *
3927 * @return The size of the GGTT view in bytes.
3928 */
3929 size_t
3930 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3931 const struct i915_ggtt_view *view)
3932 {
3933 if (view->type == I915_GGTT_VIEW_NORMAL) {
3934 return obj->base.size;
3935 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3936 return view->rotation_info.size;
3937 } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3938 return view->params.partial.size << PAGE_SHIFT;
3939 } else {
3940 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3941 return obj->base.size;
3942 }
3943 }
3944