i915_gem_gtt.c revision 1.13 1 /* $NetBSD: i915_gem_gtt.c,v 1.13 2018/08/27 14:52:40 riastradh Exp $ */
2
3 /*
4 * Copyright 2010 Daniel Vetter
5 * Copyright 2011-2014 Intel Corporation
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 *
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: i915_gem_gtt.c,v 1.13 2018/08/27 14:52:40 riastradh Exp $");
30
31 #include <linux/bitmap.h>
32 #include <linux/err.h>
33 #include <linux/seq_file.h>
34 #include <drm/drmP.h>
35 #include <drm/i915_drm.h>
36 #include "i915_drv.h"
37 #include "i915_vgpu.h"
38 #include "i915_trace.h"
39 #include "intel_drv.h"
40
41 #ifdef __NetBSD__
42 #include <drm/bus_dma_hacks.h>
43 #include <x86/machdep.h>
44 #include <x86/pte.h>
45 #define _PAGE_PRESENT PG_V /* 0x01 PTE is present / valid */
46 #define _PAGE_RW PG_RW /* 0x02 read/write */
47 #define _PAGE_PWT PG_WT /* 0x08 write-through */
48 #define _PAGE_PCD PG_N /* 0x10 page cache disabled / non-cacheable */
49 #define _PAGE_PAT PG_PAT /* 0x80 page attribute table on PTE */
50 #endif
51
52 /**
53 * DOC: Global GTT views
54 *
55 * Background and previous state
56 *
57 * Historically objects could exists (be bound) in global GTT space only as
58 * singular instances with a view representing all of the object's backing pages
59 * in a linear fashion. This view will be called a normal view.
60 *
61 * To support multiple views of the same object, where the number of mapped
62 * pages is not equal to the backing store, or where the layout of the pages
63 * is not linear, concept of a GGTT view was added.
64 *
65 * One example of an alternative view is a stereo display driven by a single
66 * image. In this case we would have a framebuffer looking like this
67 * (2x2 pages):
68 *
69 * 12
70 * 34
71 *
72 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
73 * rendering. In contrast, fed to the display engine would be an alternative
74 * view which could look something like this:
75 *
76 * 1212
77 * 3434
78 *
79 * In this example both the size and layout of pages in the alternative view is
80 * different from the normal view.
81 *
82 * Implementation and usage
83 *
84 * GGTT views are implemented using VMAs and are distinguished via enum
85 * i915_ggtt_view_type and struct i915_ggtt_view.
86 *
87 * A new flavour of core GEM functions which work with GGTT bound objects were
88 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
89 * renaming in large amounts of code. They take the struct i915_ggtt_view
90 * parameter encapsulating all metadata required to implement a view.
91 *
92 * As a helper for callers which are only interested in the normal view,
93 * globally const i915_ggtt_view_normal singleton instance exists. All old core
94 * GEM API functions, the ones not taking the view parameter, are operating on,
95 * or with the normal GGTT view.
96 *
97 * Code wanting to add or use a new GGTT view needs to:
98 *
99 * 1. Add a new enum with a suitable name.
100 * 2. Extend the metadata in the i915_ggtt_view structure if required.
101 * 3. Add support to i915_get_vma_pages().
102 *
103 * New views are required to build a scatter-gather table from within the
104 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
105 * exists for the lifetime of an VMA.
106 *
107 * Core API is designed to have copy semantics which means that passed in
108 * struct i915_ggtt_view does not need to be persistent (left around after
109 * calling the core API functions).
110 *
111 */
112
113 static int
114 i915_get_ggtt_vma_pages(struct i915_vma *vma);
115
116 const struct i915_ggtt_view i915_ggtt_view_normal;
117 const struct i915_ggtt_view i915_ggtt_view_rotated = {
118 .type = I915_GGTT_VIEW_ROTATED
119 };
120
121 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
122 {
123 bool has_aliasing_ppgtt;
124 bool has_full_ppgtt;
125
126 has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
127 has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
128
129 if (intel_vgpu_active(dev))
130 has_full_ppgtt = false; /* emulation is too hard */
131
132 /*
133 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
134 * execlists, the sole mechanism available to submit work.
135 */
136 if (INTEL_INFO(dev)->gen < 9 &&
137 (enable_ppgtt == 0 || !has_aliasing_ppgtt))
138 return 0;
139
140 if (enable_ppgtt == 1)
141 return 1;
142
143 if (enable_ppgtt == 2 && has_full_ppgtt)
144 return 2;
145
146 #ifdef CONFIG_INTEL_IOMMU
147 /* Disable ppgtt on SNB if VT-d is on. */
148 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
149 DRM_INFO("Disabling PPGTT because VT-d is on\n");
150 return 0;
151 }
152 #endif
153
154 /* Early VLV doesn't have this */
155 if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
156 dev->pdev->revision < 0xb) {
157 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
158 return 0;
159 }
160
161 if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
162 return 2;
163 else
164 return has_aliasing_ppgtt ? 1 : 0;
165 }
166
167 static int ppgtt_bind_vma(struct i915_vma *vma,
168 enum i915_cache_level cache_level,
169 u32 unused)
170 {
171 u32 pte_flags = 0;
172
173 /* Currently applicable only to VLV */
174 if (vma->obj->gt_ro)
175 pte_flags |= PTE_READ_ONLY;
176
177 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
178 cache_level, pte_flags);
179
180 return 0;
181 }
182
183 static void ppgtt_unbind_vma(struct i915_vma *vma)
184 {
185 vma->vm->clear_range(vma->vm,
186 vma->node.start,
187 vma->obj->base.size,
188 true);
189 }
190
191 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
192 enum i915_cache_level level,
193 bool valid)
194 {
195 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
196 pte |= addr;
197
198 switch (level) {
199 case I915_CACHE_NONE:
200 pte |= PPAT_UNCACHED_INDEX;
201 break;
202 case I915_CACHE_WT:
203 pte |= PPAT_DISPLAY_ELLC_INDEX;
204 break;
205 default:
206 pte |= PPAT_CACHED_INDEX;
207 break;
208 }
209
210 return pte;
211 }
212
213 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
214 const enum i915_cache_level level)
215 {
216 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
217 pde |= addr;
218 if (level != I915_CACHE_NONE)
219 pde |= PPAT_CACHED_PDE_INDEX;
220 else
221 pde |= PPAT_UNCACHED_INDEX;
222 return pde;
223 }
224
225 #define gen8_pdpe_encode gen8_pde_encode
226 #define gen8_pml4e_encode gen8_pde_encode
227
228 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
229 enum i915_cache_level level,
230 bool valid, u32 unused)
231 {
232 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
233 pte |= GEN6_PTE_ADDR_ENCODE(addr);
234
235 switch (level) {
236 case I915_CACHE_L3_LLC:
237 case I915_CACHE_LLC:
238 pte |= GEN6_PTE_CACHE_LLC;
239 break;
240 case I915_CACHE_NONE:
241 pte |= GEN6_PTE_UNCACHED;
242 break;
243 default:
244 MISSING_CASE(level);
245 }
246
247 return pte;
248 }
249
250 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
251 enum i915_cache_level level,
252 bool valid, u32 unused)
253 {
254 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
255 pte |= GEN6_PTE_ADDR_ENCODE(addr);
256
257 switch (level) {
258 case I915_CACHE_L3_LLC:
259 pte |= GEN7_PTE_CACHE_L3_LLC;
260 break;
261 case I915_CACHE_LLC:
262 pte |= GEN6_PTE_CACHE_LLC;
263 break;
264 case I915_CACHE_NONE:
265 pte |= GEN6_PTE_UNCACHED;
266 break;
267 default:
268 MISSING_CASE(level);
269 }
270
271 return pte;
272 }
273
274 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
275 enum i915_cache_level level,
276 bool valid, u32 flags)
277 {
278 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
279 pte |= GEN6_PTE_ADDR_ENCODE(addr);
280
281 if (!(flags & PTE_READ_ONLY))
282 pte |= BYT_PTE_WRITEABLE;
283
284 if (level != I915_CACHE_NONE)
285 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
286
287 return pte;
288 }
289
290 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
291 enum i915_cache_level level,
292 bool valid, u32 unused)
293 {
294 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
295 pte |= HSW_PTE_ADDR_ENCODE(addr);
296
297 if (level != I915_CACHE_NONE)
298 pte |= HSW_WB_LLC_AGE3;
299
300 return pte;
301 }
302
303 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
304 enum i915_cache_level level,
305 bool valid, u32 unused)
306 {
307 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
308 pte |= HSW_PTE_ADDR_ENCODE(addr);
309
310 switch (level) {
311 case I915_CACHE_NONE:
312 break;
313 case I915_CACHE_WT:
314 pte |= HSW_WT_ELLC_LLC_AGE3;
315 break;
316 default:
317 pte |= HSW_WB_ELLC_LLC_AGE3;
318 break;
319 }
320
321 return pte;
322 }
323
324 static void *kmap_page_dma(struct i915_page_dma *);
325 static void kunmap_page_dma(struct drm_device *, void *);
326
327 static int __setup_page_dma(struct drm_device *dev,
328 struct i915_page_dma *p, gfp_t flags)
329 {
330 #ifdef __NetBSD__
331 int busdmaflags = 0;
332 int error;
333 int nseg = 1;
334
335 if (flags & __GFP_WAIT)
336 busdmaflags |= BUS_DMA_WAITOK;
337 else
338 busdmaflags |= BUS_DMA_NOWAIT;
339
340 error = bus_dmamem_alloc(dev->dmat, PAGE_SIZE, PAGE_SIZE, 0, &p->seg,
341 nseg, &nseg, busdmaflags);
342 if (error) {
343 fail0: p->map = NULL;
344 return -error; /* XXX errno NetBSD->Linux */
345 }
346 KASSERT(nseg == 1);
347 error = bus_dmamap_create(dev->dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
348 busdmaflags, &p->map);
349 if (error) {
350 fail1: bus_dmamem_free(dev->dmat, &p->seg, 1);
351 goto fail0;
352 }
353 error = bus_dmamap_load_raw(dev->dmat, p->map, &p->seg, 1, PAGE_SIZE,
354 busdmaflags);
355 if (error) {
356 fail2: __unused
357 bus_dmamap_destroy(dev->dmat, p->map);
358 goto fail1;
359 }
360
361 if (flags & __GFP_ZERO) {
362 void *va = kmap_page_dma(p);
363 memset(va, 0, PAGE_SIZE);
364 kunmap_page_dma(dev, va);
365 }
366 #else
367 struct device *device = &dev->pdev->dev;
368
369 p->page = alloc_page(flags);
370 if (!p->page)
371 return -ENOMEM;
372
373 p->daddr = dma_map_page(device,
374 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
375
376 if (dma_mapping_error(device, p->daddr)) {
377 __free_page(p->page);
378 return -EINVAL;
379 }
380 #endif
381
382 return 0;
383 }
384
385 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
386 {
387 return __setup_page_dma(dev, p, GFP_KERNEL);
388 }
389
390 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
391 {
392 #ifdef __NetBSD__
393 if (WARN_ON(!p->map))
394 return;
395
396 bus_dmamap_unload(dev->dmat, p->map);
397 bus_dmamap_destroy(dev->dmat, p->map);
398 bus_dmamem_free(dev->dmat, &p->seg, 1);
399 p->map = NULL;
400 #else
401 if (WARN_ON(!p->page))
402 return;
403
404 dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
405 __free_page(p->page);
406 memset(p, 0, sizeof(*p));
407 #endif
408 }
409
410 static void *kmap_page_dma(struct i915_page_dma *p)
411 {
412 #ifdef __NetBSD__
413 return kmap_atomic(container_of(PHYS_TO_VM_PAGE(p->seg.ds_addr),
414 struct page, p_vmp));
415 #else
416 return kmap_atomic(p->page);
417 #endif
418 }
419
420 /* We use the flushing unmap only with ppgtt structures:
421 * page directories, page tables and scratch pages.
422 */
423 static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
424 {
425 /* There are only few exceptions for gen >=6. chv and bxt.
426 * And we are not sure about the latter so play safe for now.
427 */
428 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
429 drm_clflush_virt_range(vaddr, PAGE_SIZE);
430
431 kunmap_atomic(vaddr);
432 }
433
434 #define kmap_px(px) kmap_page_dma(px_base(px))
435 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
436
437 #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
438 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
439 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
440 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
441
442 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
443 const uint64_t val)
444 {
445 int i;
446 uint64_t * const vaddr = kmap_page_dma(p);
447
448 for (i = 0; i < 512; i++)
449 vaddr[i] = val;
450
451 kunmap_page_dma(dev, vaddr);
452 }
453
454 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
455 const uint32_t val32)
456 {
457 uint64_t v = val32;
458
459 v = v << 32 | val32;
460
461 fill_page_dma(dev, p, v);
462 }
463
464 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
465 {
466 struct i915_page_scratch *sp;
467 int ret;
468
469 sp = kzalloc(sizeof(*sp), GFP_KERNEL);
470 if (sp == NULL)
471 return ERR_PTR(-ENOMEM);
472
473 ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
474 if (ret) {
475 kfree(sp);
476 return ERR_PTR(ret);
477 }
478
479 #ifndef __NetBSD__ /* XXX ??? */
480 set_pages_uc(px_page(sp), 1);
481 #endif
482
483 return sp;
484 }
485
486 static void free_scratch_page(struct drm_device *dev,
487 struct i915_page_scratch *sp)
488 {
489 #ifndef __NetBSD__ /* XXX ??? */
490 set_pages_wb(px_page(sp), 1);
491 #endif
492
493 cleanup_px(dev, sp);
494 kfree(sp);
495 }
496
497 static struct i915_page_table *alloc_pt(struct drm_device *dev)
498 {
499 struct i915_page_table *pt;
500 const size_t count = INTEL_INFO(dev)->gen >= 8 ?
501 GEN8_PTES : GEN6_PTES;
502 int ret = -ENOMEM;
503
504 pt = kzalloc(sizeof(*pt), GFP_KERNEL);
505 if (!pt)
506 return ERR_PTR(-ENOMEM);
507
508 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
509 GFP_KERNEL);
510
511 if (!pt->used_ptes)
512 goto fail_bitmap;
513
514 ret = setup_px(dev, pt);
515 if (ret)
516 goto fail_page_m;
517
518 return pt;
519
520 fail_page_m:
521 kfree(pt->used_ptes);
522 fail_bitmap:
523 kfree(pt);
524
525 return ERR_PTR(ret);
526 }
527
528 static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
529 {
530 cleanup_px(dev, pt);
531 kfree(pt->used_ptes);
532 kfree(pt);
533 }
534
535 static void gen8_initialize_pt(struct i915_address_space *vm,
536 struct i915_page_table *pt)
537 {
538 gen8_pte_t scratch_pte;
539
540 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
541 I915_CACHE_LLC, true);
542
543 fill_px(vm->dev, pt, scratch_pte);
544 }
545
546 static void gen6_initialize_pt(struct i915_address_space *vm,
547 struct i915_page_table *pt)
548 {
549 gen6_pte_t scratch_pte;
550
551 WARN_ON(px_dma(vm->scratch_page) == 0);
552
553 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
554 I915_CACHE_LLC, true, 0);
555
556 fill32_px(vm->dev, pt, scratch_pte);
557 }
558
559 static struct i915_page_directory *alloc_pd(struct drm_device *dev)
560 {
561 struct i915_page_directory *pd;
562 int ret = -ENOMEM;
563
564 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
565 if (!pd)
566 return ERR_PTR(-ENOMEM);
567
568 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
569 sizeof(*pd->used_pdes), GFP_KERNEL);
570 if (!pd->used_pdes)
571 goto fail_bitmap;
572
573 ret = setup_px(dev, pd);
574 if (ret)
575 goto fail_page_m;
576
577 return pd;
578
579 fail_page_m:
580 kfree(pd->used_pdes);
581 fail_bitmap:
582 kfree(pd);
583
584 return ERR_PTR(ret);
585 }
586
587 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
588 {
589 if (px_page(pd)) {
590 cleanup_px(dev, pd);
591 kfree(pd->used_pdes);
592 kfree(pd);
593 }
594 }
595
596 static void gen8_initialize_pd(struct i915_address_space *vm,
597 struct i915_page_directory *pd)
598 {
599 gen8_pde_t scratch_pde;
600
601 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
602
603 fill_px(vm->dev, pd, scratch_pde);
604 }
605
606 static int __pdp_init(struct drm_device *dev,
607 struct i915_page_directory_pointer *pdp)
608 {
609 size_t pdpes = I915_PDPES_PER_PDP(dev);
610
611 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
612 sizeof(unsigned long),
613 GFP_KERNEL);
614 if (!pdp->used_pdpes)
615 return -ENOMEM;
616
617 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
618 GFP_KERNEL);
619 if (!pdp->page_directory) {
620 kfree(pdp->used_pdpes);
621 /* the PDP might be the statically allocated top level. Keep it
622 * as clean as possible */
623 pdp->used_pdpes = NULL;
624 return -ENOMEM;
625 }
626
627 return 0;
628 }
629
630 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
631 {
632 kfree(pdp->used_pdpes);
633 kfree(pdp->page_directory);
634 pdp->page_directory = NULL;
635 }
636
637 static struct
638 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
639 {
640 struct i915_page_directory_pointer *pdp;
641 int ret = -ENOMEM;
642
643 WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
644
645 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
646 if (!pdp)
647 return ERR_PTR(-ENOMEM);
648
649 ret = __pdp_init(dev, pdp);
650 if (ret)
651 goto fail_bitmap;
652
653 ret = setup_px(dev, pdp);
654 if (ret)
655 goto fail_page_m;
656
657 return pdp;
658
659 fail_page_m:
660 __pdp_fini(pdp);
661 fail_bitmap:
662 kfree(pdp);
663
664 return ERR_PTR(ret);
665 }
666
667 static void free_pdp(struct drm_device *dev,
668 struct i915_page_directory_pointer *pdp)
669 {
670 __pdp_fini(pdp);
671 if (USES_FULL_48BIT_PPGTT(dev)) {
672 cleanup_px(dev, pdp);
673 kfree(pdp);
674 }
675 }
676
677 static void gen8_initialize_pdp(struct i915_address_space *vm,
678 struct i915_page_directory_pointer *pdp)
679 {
680 gen8_ppgtt_pdpe_t scratch_pdpe;
681
682 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
683
684 fill_px(vm->dev, pdp, scratch_pdpe);
685 }
686
687 static void gen8_initialize_pml4(struct i915_address_space *vm,
688 struct i915_pml4 *pml4)
689 {
690 gen8_ppgtt_pml4e_t scratch_pml4e;
691
692 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
693 I915_CACHE_LLC);
694
695 fill_px(vm->dev, pml4, scratch_pml4e);
696 }
697
698 static void
699 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
700 struct i915_page_directory_pointer *pdp,
701 struct i915_page_directory *pd,
702 int index)
703 {
704 gen8_ppgtt_pdpe_t *page_directorypo;
705
706 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
707 return;
708
709 page_directorypo = kmap_px(pdp);
710 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
711 kunmap_px(ppgtt, page_directorypo);
712 }
713
714 static void
715 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
716 struct i915_pml4 *pml4,
717 struct i915_page_directory_pointer *pdp,
718 int index)
719 {
720 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
721
722 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
723 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
724 kunmap_px(ppgtt, pagemap);
725 }
726
727 /* Broadwell Page Directory Pointer Descriptors */
728 static int gen8_write_pdp(struct drm_i915_gem_request *req,
729 unsigned entry,
730 dma_addr_t addr)
731 {
732 struct intel_engine_cs *ring = req->ring;
733 int ret;
734
735 BUG_ON(entry >= 4);
736
737 ret = intel_ring_begin(req, 6);
738 if (ret)
739 return ret;
740
741 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
742 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
743 intel_ring_emit(ring, upper_32_bits(addr));
744 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
745 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
746 intel_ring_emit(ring, lower_32_bits(addr));
747 intel_ring_advance(ring);
748
749 return 0;
750 }
751
752 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
753 struct drm_i915_gem_request *req)
754 {
755 int i, ret;
756
757 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
758 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
759
760 ret = gen8_write_pdp(req, i, pd_daddr);
761 if (ret)
762 return ret;
763 }
764
765 return 0;
766 }
767
768 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
769 struct drm_i915_gem_request *req)
770 {
771 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
772 }
773
774 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
775 struct i915_page_directory_pointer *pdp,
776 uint64_t start,
777 uint64_t length,
778 gen8_pte_t scratch_pte)
779 {
780 struct i915_hw_ppgtt *ppgtt =
781 container_of(vm, struct i915_hw_ppgtt, base);
782 gen8_pte_t *pt_vaddr;
783 unsigned pdpe = gen8_pdpe_index(start);
784 unsigned pde = gen8_pde_index(start);
785 unsigned pte = gen8_pte_index(start);
786 unsigned num_entries = length >> PAGE_SHIFT;
787 unsigned last_pte, i;
788
789 if (WARN_ON(!pdp))
790 return;
791
792 while (num_entries) {
793 struct i915_page_directory *pd;
794 struct i915_page_table *pt;
795
796 if (WARN_ON(!pdp->page_directory[pdpe]))
797 break;
798
799 pd = pdp->page_directory[pdpe];
800
801 if (WARN_ON(!pd->page_table[pde]))
802 break;
803
804 pt = pd->page_table[pde];
805
806 if (WARN_ON(!px_page(pt)))
807 break;
808
809 last_pte = pte + num_entries;
810 if (last_pte > GEN8_PTES)
811 last_pte = GEN8_PTES;
812
813 pt_vaddr = kmap_px(pt);
814
815 for (i = pte; i < last_pte; i++) {
816 pt_vaddr[i] = scratch_pte;
817 num_entries--;
818 }
819
820 kunmap_px(ppgtt, pt);
821
822 pte = 0;
823 if (++pde == I915_PDES) {
824 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
825 break;
826 pde = 0;
827 }
828 }
829 }
830
831 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
832 uint64_t start,
833 uint64_t length,
834 bool use_scratch)
835 {
836 struct i915_hw_ppgtt *ppgtt =
837 container_of(vm, struct i915_hw_ppgtt, base);
838 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
839 I915_CACHE_LLC, use_scratch);
840
841 if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
842 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
843 scratch_pte);
844 } else {
845 uint64_t templ4, pml4e;
846 struct i915_page_directory_pointer *pdp;
847
848 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
849 gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
850 scratch_pte);
851 }
852 }
853 }
854
855 #ifdef __NetBSD__
856 static void
857 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
858 struct i915_page_directory_pointer *pdp, bus_dmamap_t dmamap,
859 unsigned *segp, uint64_t start, enum i915_cache_level cache_level)
860 {
861 struct i915_hw_ppgtt *ppgtt =
862 container_of(vm, struct i915_hw_ppgtt, base);
863 gen8_pte_t *pt_vaddr;
864 unsigned pdpe = gen8_pdpe_index(start);
865 unsigned pde = gen8_pde_index(start);
866 unsigned pte = gen8_pte_index(start);
867
868 pt_vaddr = NULL;
869 for (; *segp < dmamap->dm_nsegs; (*segp)++) {
870 KASSERT(dmamap->dm_segs[*segp].ds_len == PAGE_SIZE);
871 if (pt_vaddr == NULL) {
872 struct i915_page_directory *pd =
873 pdp->page_directory[pdpe];
874 struct i915_page_table *pt = pd->page_table[pde];
875 pt_vaddr = kmap_px(pt);
876 }
877 pt_vaddr[pte] = gen8_pte_encode(dmamap->dm_segs[*segp].ds_addr,
878 cache_level, true);
879 if (++pte == GEN8_PTES) {
880 kunmap_px(ppgtt, pt_vaddr);
881 pt_vaddr = NULL;
882 if (++pde == I915_PDES) {
883 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
884 break;
885 pde = 0;
886 }
887 pte = 0;
888 }
889 }
890 if (pt_vaddr)
891 kunmap_px(ppgtt, pt_vaddr);
892 }
893 #else
894 static void
895 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
896 struct i915_page_directory_pointer *pdp,
897 struct sg_page_iter *sg_iter,
898 uint64_t start,
899 enum i915_cache_level cache_level)
900 {
901 struct i915_hw_ppgtt *ppgtt =
902 container_of(vm, struct i915_hw_ppgtt, base);
903 gen8_pte_t *pt_vaddr;
904 unsigned pdpe = gen8_pdpe_index(start);
905 unsigned pde = gen8_pde_index(start);
906 unsigned pte = gen8_pte_index(start);
907
908 pt_vaddr = NULL;
909
910 while (__sg_page_iter_next(sg_iter)) {
911 if (pt_vaddr == NULL) {
912 struct i915_page_directory *pd = pdp->page_directory[pdpe];
913 struct i915_page_table *pt = pd->page_table[pde];
914 pt_vaddr = kmap_px(pt);
915 }
916
917 pt_vaddr[pte] =
918 gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
919 cache_level, true);
920 if (++pte == GEN8_PTES) {
921 kunmap_px(ppgtt, pt_vaddr);
922 pt_vaddr = NULL;
923 if (++pde == I915_PDES) {
924 if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
925 break;
926 pde = 0;
927 }
928 pte = 0;
929 }
930 }
931
932 if (pt_vaddr)
933 kunmap_px(ppgtt, pt_vaddr);
934 }
935 #endif
936
937 #ifdef __NetBSD__
938 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
939 bus_dmamap_t dmamap, uint64_t start, enum i915_cache_level cache_level,
940 u32 unused)
941 {
942 struct i915_hw_ppgtt *ppgtt =
943 container_of(vm, struct i915_hw_ppgtt, base);
944 unsigned seg = 0;
945
946 if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
947 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, dmamap, &seg,
948 start, cache_level);
949 } else {
950 struct i915_page_directory_pointer *pdp;
951 uint64_t templ4, pml4e;
952 uint64_t length = dmamap->dm_mapsize;
953
954 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4,
955 pml4e) {
956 gen8_ppgtt_insert_pte_entries(vm, pdp, dmamap, &seg,
957 start, cache_level);
958 }
959 }
960 }
961 #else
962 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
963 struct sg_table *pages,
964 uint64_t start,
965 enum i915_cache_level cache_level,
966 u32 unused)
967 {
968 struct i915_hw_ppgtt *ppgtt =
969 container_of(vm, struct i915_hw_ppgtt, base);
970 struct sg_page_iter sg_iter;
971
972 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
973
974 if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
975 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
976 cache_level);
977 } else {
978 struct i915_page_directory_pointer *pdp;
979 uint64_t templ4, pml4e;
980 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
981
982 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
983 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
984 start, cache_level);
985 }
986 }
987 }
988 #endif
989
990 static void gen8_free_page_tables(struct drm_device *dev,
991 struct i915_page_directory *pd)
992 {
993 int i;
994
995 if (!px_page(pd))
996 return;
997
998 for_each_set_bit(i, pd->used_pdes, I915_PDES) {
999 if (WARN_ON(!pd->page_table[i]))
1000 continue;
1001
1002 free_pt(dev, pd->page_table[i]);
1003 pd->page_table[i] = NULL;
1004 }
1005 }
1006
1007 static int gen8_init_scratch(struct i915_address_space *vm)
1008 {
1009 struct drm_device *dev = vm->dev;
1010
1011 vm->scratch_page = alloc_scratch_page(dev);
1012 if (IS_ERR(vm->scratch_page))
1013 return PTR_ERR(vm->scratch_page);
1014
1015 vm->scratch_pt = alloc_pt(dev);
1016 if (IS_ERR(vm->scratch_pt)) {
1017 free_scratch_page(dev, vm->scratch_page);
1018 return PTR_ERR(vm->scratch_pt);
1019 }
1020
1021 vm->scratch_pd = alloc_pd(dev);
1022 if (IS_ERR(vm->scratch_pd)) {
1023 free_pt(dev, vm->scratch_pt);
1024 free_scratch_page(dev, vm->scratch_page);
1025 return PTR_ERR(vm->scratch_pd);
1026 }
1027
1028 if (USES_FULL_48BIT_PPGTT(dev)) {
1029 vm->scratch_pdp = alloc_pdp(dev);
1030 if (IS_ERR(vm->scratch_pdp)) {
1031 free_pd(dev, vm->scratch_pd);
1032 free_pt(dev, vm->scratch_pt);
1033 free_scratch_page(dev, vm->scratch_page);
1034 return PTR_ERR(vm->scratch_pdp);
1035 }
1036 }
1037
1038 gen8_initialize_pt(vm, vm->scratch_pt);
1039 gen8_initialize_pd(vm, vm->scratch_pd);
1040 if (USES_FULL_48BIT_PPGTT(dev))
1041 gen8_initialize_pdp(vm, vm->scratch_pdp);
1042
1043 return 0;
1044 }
1045
1046 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1047 {
1048 enum vgt_g2v_type msg;
1049 struct drm_device *dev = ppgtt->base.dev;
1050 struct drm_i915_private *dev_priv = dev->dev_private;
1051 unsigned int offset = vgtif_reg(pdp0_lo);
1052 int i;
1053
1054 if (USES_FULL_48BIT_PPGTT(dev)) {
1055 u64 daddr = px_dma(&ppgtt->pml4);
1056
1057 I915_WRITE(offset, lower_32_bits(daddr));
1058 I915_WRITE(offset + 4, upper_32_bits(daddr));
1059
1060 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1061 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1062 } else {
1063 for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
1064 u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1065
1066 I915_WRITE(offset, lower_32_bits(daddr));
1067 I915_WRITE(offset + 4, upper_32_bits(daddr));
1068
1069 offset += 8;
1070 }
1071
1072 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1073 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1074 }
1075
1076 I915_WRITE(vgtif_reg(g2v_notify), msg);
1077
1078 return 0;
1079 }
1080
1081 static void gen8_free_scratch(struct i915_address_space *vm)
1082 {
1083 struct drm_device *dev = vm->dev;
1084
1085 if (USES_FULL_48BIT_PPGTT(dev))
1086 free_pdp(dev, vm->scratch_pdp);
1087 free_pd(dev, vm->scratch_pd);
1088 free_pt(dev, vm->scratch_pt);
1089 free_scratch_page(dev, vm->scratch_page);
1090 }
1091
1092 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
1093 struct i915_page_directory_pointer *pdp)
1094 {
1095 int i;
1096
1097 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
1098 if (WARN_ON(!pdp->page_directory[i]))
1099 continue;
1100
1101 gen8_free_page_tables(dev, pdp->page_directory[i]);
1102 free_pd(dev, pdp->page_directory[i]);
1103 }
1104
1105 free_pdp(dev, pdp);
1106 }
1107
1108 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1109 {
1110 int i;
1111
1112 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
1113 if (WARN_ON(!ppgtt->pml4.pdps[i]))
1114 continue;
1115
1116 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
1117 }
1118
1119 cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
1120 }
1121
1122 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1123 {
1124 struct i915_hw_ppgtt *ppgtt =
1125 container_of(vm, struct i915_hw_ppgtt, base);
1126
1127 if (intel_vgpu_active(vm->dev))
1128 gen8_ppgtt_notify_vgt(ppgtt, false);
1129
1130 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
1131 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
1132 else
1133 gen8_ppgtt_cleanup_4lvl(ppgtt);
1134
1135 gen8_free_scratch(vm);
1136 }
1137
1138 /**
1139 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1140 * @vm: Master vm structure.
1141 * @pd: Page directory for this address range.
1142 * @start: Starting virtual address to begin allocations.
1143 * @length: Size of the allocations.
1144 * @new_pts: Bitmap set by function with new allocations. Likely used by the
1145 * caller to free on error.
1146 *
1147 * Allocate the required number of page tables. Extremely similar to
1148 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1149 * the page directory boundary (instead of the page directory pointer). That
1150 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1151 * possible, and likely that the caller will need to use multiple calls of this
1152 * function to achieve the appropriate allocation.
1153 *
1154 * Return: 0 if success; negative error code otherwise.
1155 */
1156 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1157 struct i915_page_directory *pd,
1158 uint64_t start,
1159 uint64_t length,
1160 unsigned long *new_pts)
1161 {
1162 struct drm_device *dev = vm->dev;
1163 struct i915_page_table *pt;
1164 uint64_t temp;
1165 uint32_t pde;
1166
1167 gen8_for_each_pde(pt, pd, start, length, temp, pde) {
1168 /* Don't reallocate page tables */
1169 if (test_bit(pde, pd->used_pdes)) {
1170 /* Scratch is never allocated this way */
1171 WARN_ON(pt == vm->scratch_pt);
1172 continue;
1173 }
1174
1175 pt = alloc_pt(dev);
1176 if (IS_ERR(pt))
1177 goto unwind_out;
1178
1179 gen8_initialize_pt(vm, pt);
1180 pd->page_table[pde] = pt;
1181 __set_bit(pde, new_pts);
1182 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1183 }
1184
1185 return 0;
1186
1187 unwind_out:
1188 for_each_set_bit(pde, new_pts, I915_PDES)
1189 free_pt(dev, pd->page_table[pde]);
1190
1191 return -ENOMEM;
1192 }
1193
1194 /**
1195 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1196 * @vm: Master vm structure.
1197 * @pdp: Page directory pointer for this address range.
1198 * @start: Starting virtual address to begin allocations.
1199 * @length: Size of the allocations.
1200 * @new_pds: Bitmap set by function with new allocations. Likely used by the
1201 * caller to free on error.
1202 *
1203 * Allocate the required number of page directories starting at the pde index of
1204 * @start, and ending at the pde index @start + @length. This function will skip
1205 * over already allocated page directories within the range, and only allocate
1206 * new ones, setting the appropriate pointer within the pdp as well as the
1207 * correct position in the bitmap @new_pds.
1208 *
1209 * The function will only allocate the pages within the range for a give page
1210 * directory pointer. In other words, if @start + @length straddles a virtually
1211 * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1212 * required by the caller, This is not currently possible, and the BUG in the
1213 * code will prevent it.
1214 *
1215 * Return: 0 if success; negative error code otherwise.
1216 */
1217 static int
1218 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1219 struct i915_page_directory_pointer *pdp,
1220 uint64_t start,
1221 uint64_t length,
1222 unsigned long *new_pds)
1223 {
1224 struct drm_device *dev = vm->dev;
1225 struct i915_page_directory *pd;
1226 uint64_t temp;
1227 uint32_t pdpe;
1228 uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1229
1230 WARN_ON(!bitmap_empty(new_pds, pdpes));
1231
1232 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1233 if (test_bit(pdpe, pdp->used_pdpes))
1234 continue;
1235
1236 pd = alloc_pd(dev);
1237 if (IS_ERR(pd))
1238 goto unwind_out;
1239
1240 gen8_initialize_pd(vm, pd);
1241 pdp->page_directory[pdpe] = pd;
1242 __set_bit(pdpe, new_pds);
1243 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1244 }
1245
1246 return 0;
1247
1248 unwind_out:
1249 for_each_set_bit(pdpe, new_pds, pdpes)
1250 free_pd(dev, pdp->page_directory[pdpe]);
1251
1252 return -ENOMEM;
1253 }
1254
1255 /**
1256 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1257 * @vm: Master vm structure.
1258 * @pml4: Page map level 4 for this address range.
1259 * @start: Starting virtual address to begin allocations.
1260 * @length: Size of the allocations.
1261 * @new_pdps: Bitmap set by function with new allocations. Likely used by the
1262 * caller to free on error.
1263 *
1264 * Allocate the required number of page directory pointers. Extremely similar to
1265 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1266 * The main difference is here we are limited by the pml4 boundary (instead of
1267 * the page directory pointer).
1268 *
1269 * Return: 0 if success; negative error code otherwise.
1270 */
1271 static int
1272 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1273 struct i915_pml4 *pml4,
1274 uint64_t start,
1275 uint64_t length,
1276 unsigned long *new_pdps)
1277 {
1278 struct drm_device *dev = vm->dev;
1279 struct i915_page_directory_pointer *pdp;
1280 uint64_t temp;
1281 uint32_t pml4e;
1282
1283 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1284
1285 gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
1286 if (!test_bit(pml4e, pml4->used_pml4es)) {
1287 pdp = alloc_pdp(dev);
1288 if (IS_ERR(pdp))
1289 goto unwind_out;
1290
1291 gen8_initialize_pdp(vm, pdp);
1292 pml4->pdps[pml4e] = pdp;
1293 __set_bit(pml4e, new_pdps);
1294 trace_i915_page_directory_pointer_entry_alloc(vm,
1295 pml4e,
1296 start,
1297 GEN8_PML4E_SHIFT);
1298 }
1299 }
1300
1301 return 0;
1302
1303 unwind_out:
1304 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1305 free_pdp(dev, pml4->pdps[pml4e]);
1306
1307 return -ENOMEM;
1308 }
1309
1310 static void
1311 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1312 {
1313 kfree(new_pts);
1314 kfree(new_pds);
1315 }
1316
1317 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1318 * of these are based on the number of PDPEs in the system.
1319 */
1320 static
1321 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1322 unsigned long **new_pts,
1323 uint32_t pdpes)
1324 {
1325 unsigned long *pds;
1326 unsigned long *pts;
1327
1328 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1329 if (!pds)
1330 return -ENOMEM;
1331
1332 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1333 GFP_TEMPORARY);
1334 if (!pts)
1335 goto err_out;
1336
1337 *new_pds = pds;
1338 *new_pts = pts;
1339
1340 return 0;
1341
1342 err_out:
1343 free_gen8_temp_bitmaps(pds, pts);
1344 return -ENOMEM;
1345 }
1346
1347 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1348 * the page table structures, we mark them dirty so that
1349 * context switching/execlist queuing code takes extra steps
1350 * to ensure that tlbs are flushed.
1351 */
1352 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1353 {
1354 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1355 }
1356
1357 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1358 struct i915_page_directory_pointer *pdp,
1359 uint64_t start,
1360 uint64_t length)
1361 {
1362 struct i915_hw_ppgtt *ppgtt =
1363 container_of(vm, struct i915_hw_ppgtt, base);
1364 unsigned long *new_page_dirs, *new_page_tables;
1365 struct drm_device *dev = vm->dev;
1366 struct i915_page_directory *pd;
1367 const uint64_t orig_start = start;
1368 const uint64_t orig_length = length;
1369 uint64_t temp;
1370 uint32_t pdpe;
1371 uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1372 int ret;
1373
1374 /* Wrap is never okay since we can only represent 48b, and we don't
1375 * actually use the other side of the canonical address space.
1376 */
1377 if (WARN_ON(start + length < start))
1378 return -ENODEV;
1379
1380 if (WARN_ON(start + length > vm->total))
1381 return -ENODEV;
1382
1383 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1384 if (ret)
1385 return ret;
1386
1387 /* Do the allocations first so we can easily bail out */
1388 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1389 new_page_dirs);
1390 if (ret) {
1391 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1392 return ret;
1393 }
1394
1395 /* For every page directory referenced, allocate page tables */
1396 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1397 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1398 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1399 if (ret)
1400 goto err_out;
1401 }
1402
1403 start = orig_start;
1404 length = orig_length;
1405
1406 /* Allocations have completed successfully, so set the bitmaps, and do
1407 * the mappings. */
1408 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1409 gen8_pde_t *const page_directory = kmap_px(pd);
1410 struct i915_page_table *pt;
1411 uint64_t pd_len = length;
1412 uint64_t pd_start = start;
1413 uint32_t pde;
1414
1415 /* Every pd should be allocated, we just did that above. */
1416 WARN_ON(!pd);
1417
1418 gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1419 /* Same reasoning as pd */
1420 WARN_ON(!pt);
1421 WARN_ON(!pd_len);
1422 WARN_ON(!gen8_pte_count(pd_start, pd_len));
1423
1424 /* Set our used ptes within the page table */
1425 bitmap_set(pt->used_ptes,
1426 gen8_pte_index(pd_start),
1427 gen8_pte_count(pd_start, pd_len));
1428
1429 /* Our pde is now pointing to the pagetable, pt */
1430 __set_bit(pde, pd->used_pdes);
1431
1432 /* Map the PDE to the page table */
1433 page_directory[pde] = gen8_pde_encode(px_dma(pt),
1434 I915_CACHE_LLC);
1435 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1436 gen8_pte_index(start),
1437 gen8_pte_count(start, length),
1438 GEN8_PTES);
1439
1440 /* NB: We haven't yet mapped ptes to pages. At this
1441 * point we're still relying on insert_entries() */
1442 }
1443
1444 kunmap_px(ppgtt, page_directory);
1445 __set_bit(pdpe, pdp->used_pdpes);
1446 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1447 }
1448
1449 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1450 mark_tlbs_dirty(ppgtt);
1451 return 0;
1452
1453 err_out:
1454 while (pdpe--) {
1455 for_each_set_bit(temp, new_page_tables + pdpe *
1456 BITS_TO_LONGS(I915_PDES), I915_PDES)
1457 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1458 }
1459
1460 for_each_set_bit(pdpe, new_page_dirs, pdpes)
1461 free_pd(dev, pdp->page_directory[pdpe]);
1462
1463 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1464 mark_tlbs_dirty(ppgtt);
1465 return ret;
1466 }
1467
1468 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1469 struct i915_pml4 *pml4,
1470 uint64_t start,
1471 uint64_t length)
1472 {
1473 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1474 struct i915_hw_ppgtt *ppgtt =
1475 container_of(vm, struct i915_hw_ppgtt, base);
1476 struct i915_page_directory_pointer *pdp;
1477 uint64_t temp, pml4e;
1478 int ret = 0;
1479
1480 /* Do the pml4 allocations first, so we don't need to track the newly
1481 * allocated tables below the pdp */
1482 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1483
1484 /* The pagedirectory and pagetable allocations are done in the shared 3
1485 * and 4 level code. Just allocate the pdps.
1486 */
1487 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1488 new_pdps);
1489 if (ret)
1490 return ret;
1491
1492 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1493 "The allocation has spanned more than 512GB. "
1494 "It is highly likely this is incorrect.");
1495
1496 gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
1497 WARN_ON(!pdp);
1498
1499 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1500 if (ret)
1501 goto err_out;
1502
1503 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1504 }
1505
1506 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1507 GEN8_PML4ES_PER_PML4);
1508
1509 return 0;
1510
1511 err_out:
1512 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1513 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1514
1515 return ret;
1516 }
1517
1518 static int gen8_alloc_va_range(struct i915_address_space *vm,
1519 uint64_t start, uint64_t length)
1520 {
1521 struct i915_hw_ppgtt *ppgtt =
1522 container_of(vm, struct i915_hw_ppgtt, base);
1523
1524 if (USES_FULL_48BIT_PPGTT(vm->dev))
1525 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1526 else
1527 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1528 }
1529
1530 #ifndef __NetBSD__ /* XXX debugfs */
1531 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1532 uint64_t start, uint64_t length,
1533 gen8_pte_t scratch_pte,
1534 struct seq_file *m)
1535 {
1536 struct i915_page_directory *pd;
1537 uint64_t temp;
1538 uint32_t pdpe;
1539
1540 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1541 struct i915_page_table *pt;
1542 uint64_t pd_len = length;
1543 uint64_t pd_start = start;
1544 uint32_t pde;
1545
1546 if (!test_bit(pdpe, pdp->used_pdpes))
1547 continue;
1548
1549 seq_printf(m, "\tPDPE #%d\n", pdpe);
1550 gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1551 uint32_t pte;
1552 gen8_pte_t *pt_vaddr;
1553
1554 if (!test_bit(pde, pd->used_pdes))
1555 continue;
1556
1557 pt_vaddr = kmap_px(pt);
1558 for (pte = 0; pte < GEN8_PTES; pte += 4) {
1559 uint64_t va =
1560 (pdpe << GEN8_PDPE_SHIFT) |
1561 (pde << GEN8_PDE_SHIFT) |
1562 (pte << GEN8_PTE_SHIFT);
1563 int i;
1564 bool found = false;
1565
1566 for (i = 0; i < 4; i++)
1567 if (pt_vaddr[pte + i] != scratch_pte)
1568 found = true;
1569 if (!found)
1570 continue;
1571
1572 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1573 for (i = 0; i < 4; i++) {
1574 if (pt_vaddr[pte + i] != scratch_pte)
1575 seq_printf(m, " %llx", pt_vaddr[pte + i]);
1576 else
1577 seq_puts(m, " SCRATCH ");
1578 }
1579 seq_puts(m, "\n");
1580 }
1581 /* don't use kunmap_px, it could trigger
1582 * an unnecessary flush.
1583 */
1584 kunmap_atomic(pt_vaddr);
1585 }
1586 }
1587 }
1588
1589 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1590 {
1591 struct i915_address_space *vm = &ppgtt->base;
1592 uint64_t start = ppgtt->base.start;
1593 uint64_t length = ppgtt->base.total;
1594 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1595 I915_CACHE_LLC, true);
1596
1597 if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1598 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1599 } else {
1600 uint64_t templ4, pml4e;
1601 struct i915_pml4 *pml4 = &ppgtt->pml4;
1602 struct i915_page_directory_pointer *pdp;
1603
1604 gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
1605 if (!test_bit(pml4e, pml4->used_pml4es))
1606 continue;
1607
1608 seq_printf(m, " PML4E #%llu\n", pml4e);
1609 gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1610 }
1611 }
1612 }
1613 #endif
1614
1615 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1616 {
1617 unsigned long *new_page_dirs, *new_page_tables;
1618 uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1619 int ret;
1620
1621 /* We allocate temp bitmap for page tables for no gain
1622 * but as this is for init only, lets keep the things simple
1623 */
1624 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1625 if (ret)
1626 return ret;
1627
1628 /* Allocate for all pdps regardless of how the ppgtt
1629 * was defined.
1630 */
1631 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1632 0, 1ULL << 32,
1633 new_page_dirs);
1634 if (!ret)
1635 *ppgtt->pdp.used_pdpes = *new_page_dirs;
1636
1637 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1638
1639 return ret;
1640 }
1641
1642 /*
1643 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1644 * with a net effect resembling a 2-level page table in normal x86 terms. Each
1645 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1646 * space.
1647 *
1648 */
1649 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1650 {
1651 int ret;
1652
1653 ret = gen8_init_scratch(&ppgtt->base);
1654 if (ret)
1655 return ret;
1656
1657 ppgtt->base.start = 0;
1658 ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1659 ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1660 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1661 ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1662 ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1663 ppgtt->base.bind_vma = ppgtt_bind_vma;
1664 #ifndef __NetBSD__ /* XXX debugfs */
1665 ppgtt->debug_dump = gen8_dump_ppgtt;
1666 #endif
1667
1668 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1669 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1670 if (ret)
1671 goto free_scratch;
1672
1673 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1674
1675 ppgtt->base.total = 1ULL << 48;
1676 ppgtt->switch_mm = gen8_48b_mm_switch;
1677 } else {
1678 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1679 if (ret)
1680 goto free_scratch;
1681
1682 ppgtt->base.total = 1ULL << 32;
1683 ppgtt->switch_mm = gen8_legacy_mm_switch;
1684 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1685 0, 0,
1686 GEN8_PML4E_SHIFT);
1687
1688 if (intel_vgpu_active(ppgtt->base.dev)) {
1689 ret = gen8_preallocate_top_level_pdps(ppgtt);
1690 if (ret)
1691 goto free_scratch;
1692 }
1693 }
1694
1695 if (intel_vgpu_active(ppgtt->base.dev))
1696 gen8_ppgtt_notify_vgt(ppgtt, true);
1697
1698 return 0;
1699
1700 free_scratch:
1701 gen8_free_scratch(&ppgtt->base);
1702 return ret;
1703 }
1704
1705 #ifndef __NetBSD__ /* XXX debugfs */
1706 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1707 {
1708 struct i915_address_space *vm = &ppgtt->base;
1709 struct i915_page_table *unused;
1710 gen6_pte_t scratch_pte;
1711 uint32_t pd_entry;
1712 uint32_t pte, pde, temp;
1713 uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1714
1715 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1716 I915_CACHE_LLC, true, 0);
1717
1718 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1719 u32 expected;
1720 gen6_pte_t *pt_vaddr;
1721 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1722 pd_entry = readl(ppgtt->pd_addr + pde);
1723 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1724
1725 if (pd_entry != expected)
1726 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1727 pde,
1728 pd_entry,
1729 expected);
1730 seq_printf(m, "\tPDE: %x\n", pd_entry);
1731
1732 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1733
1734 for (pte = 0; pte < GEN6_PTES; pte+=4) {
1735 unsigned long va =
1736 (pde * PAGE_SIZE * GEN6_PTES) +
1737 (pte * PAGE_SIZE);
1738 int i;
1739 bool found = false;
1740 for (i = 0; i < 4; i++)
1741 if (pt_vaddr[pte + i] != scratch_pte)
1742 found = true;
1743 if (!found)
1744 continue;
1745
1746 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1747 for (i = 0; i < 4; i++) {
1748 if (pt_vaddr[pte + i] != scratch_pte)
1749 seq_printf(m, " %08x", pt_vaddr[pte + i]);
1750 else
1751 seq_puts(m, " SCRATCH ");
1752 }
1753 seq_puts(m, "\n");
1754 }
1755 kunmap_px(ppgtt, pt_vaddr);
1756 }
1757 }
1758 #endif
1759
1760 /* Write pde (index) from the page directory @pd to the page table @pt */
1761 static void gen6_write_pde(struct i915_page_directory *pd,
1762 const int pde, struct i915_page_table *pt)
1763 {
1764 /* Caller needs to make sure the write completes if necessary */
1765 struct i915_hw_ppgtt *ppgtt =
1766 container_of(pd, struct i915_hw_ppgtt, pd);
1767 #ifdef __NetBSD__
1768 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
1769 const bus_space_tag_t bst = dev_priv->gtt.bst;
1770 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
1771 const bus_addr_t pd_base = ppgtt->pd.base.ggtt_offset;
1772 #endif
1773 u32 pd_entry;
1774
1775 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1776 pd_entry |= GEN6_PDE_VALID;
1777
1778 #ifdef __NetBSD__
1779 bus_space_write_4(bst, bsh, pd_base + pde, pd_entry);
1780 #else
1781 writel(pd_entry, ppgtt->pd_addr + pde);
1782 #endif
1783 }
1784
1785 /* Write all the page tables found in the ppgtt structure to incrementing page
1786 * directories. */
1787 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1788 struct i915_page_directory *pd,
1789 uint32_t start, uint32_t length)
1790 {
1791 struct i915_page_table *pt;
1792 uint32_t pde, temp;
1793
1794 gen6_for_each_pde(pt, pd, start, length, temp, pde)
1795 gen6_write_pde(pd, pde, pt);
1796
1797 /* Make sure write is complete before other code can use this page
1798 * table. Also require for WC mapped PTEs */
1799 #ifdef __NetBSD__
1800 bus_space_read_4(dev_priv->gtt.bst, dev_priv->gtt.bsh, 0);
1801 #else
1802 readl(dev_priv->gtt.gsm);
1803 #endif
1804 }
1805
1806 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1807 {
1808 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1809
1810 return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1811 }
1812
1813 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1814 struct drm_i915_gem_request *req)
1815 {
1816 struct intel_engine_cs *ring = req->ring;
1817 int ret;
1818
1819 /* NB: TLBs must be flushed and invalidated before a switch */
1820 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1821 if (ret)
1822 return ret;
1823
1824 ret = intel_ring_begin(req, 6);
1825 if (ret)
1826 return ret;
1827
1828 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1829 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1830 intel_ring_emit(ring, PP_DIR_DCLV_2G);
1831 intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1832 intel_ring_emit(ring, get_pd_offset(ppgtt));
1833 intel_ring_emit(ring, MI_NOOP);
1834 intel_ring_advance(ring);
1835
1836 return 0;
1837 }
1838
1839 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1840 struct drm_i915_gem_request *req)
1841 {
1842 struct intel_engine_cs *ring = req->ring;
1843 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1844
1845 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1846 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1847 return 0;
1848 }
1849
1850 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1851 struct drm_i915_gem_request *req)
1852 {
1853 struct intel_engine_cs *ring = req->ring;
1854 int ret;
1855
1856 /* NB: TLBs must be flushed and invalidated before a switch */
1857 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1858 if (ret)
1859 return ret;
1860
1861 ret = intel_ring_begin(req, 6);
1862 if (ret)
1863 return ret;
1864
1865 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1866 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1867 intel_ring_emit(ring, PP_DIR_DCLV_2G);
1868 intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1869 intel_ring_emit(ring, get_pd_offset(ppgtt));
1870 intel_ring_emit(ring, MI_NOOP);
1871 intel_ring_advance(ring);
1872
1873 /* XXX: RCS is the only one to auto invalidate the TLBs? */
1874 if (ring->id != RCS) {
1875 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1876 if (ret)
1877 return ret;
1878 }
1879
1880 return 0;
1881 }
1882
1883 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1884 struct drm_i915_gem_request *req)
1885 {
1886 struct intel_engine_cs *ring = req->ring;
1887 struct drm_device *dev = ppgtt->base.dev;
1888 struct drm_i915_private *dev_priv = dev->dev_private;
1889
1890
1891 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1892 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1893
1894 POSTING_READ(RING_PP_DIR_DCLV(ring));
1895
1896 return 0;
1897 }
1898
1899 static void gen8_ppgtt_enable(struct drm_device *dev)
1900 {
1901 struct drm_i915_private *dev_priv = dev->dev_private;
1902 struct intel_engine_cs *ring;
1903 int j;
1904
1905 for_each_ring(ring, dev_priv, j) {
1906 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1907 I915_WRITE(RING_MODE_GEN7(ring),
1908 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1909 }
1910 }
1911
1912 static void gen7_ppgtt_enable(struct drm_device *dev)
1913 {
1914 struct drm_i915_private *dev_priv = dev->dev_private;
1915 struct intel_engine_cs *ring;
1916 uint32_t ecochk, ecobits;
1917 int i;
1918
1919 ecobits = I915_READ(GAC_ECO_BITS);
1920 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1921
1922 ecochk = I915_READ(GAM_ECOCHK);
1923 if (IS_HASWELL(dev)) {
1924 ecochk |= ECOCHK_PPGTT_WB_HSW;
1925 } else {
1926 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1927 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1928 }
1929 I915_WRITE(GAM_ECOCHK, ecochk);
1930
1931 for_each_ring(ring, dev_priv, i) {
1932 /* GFX_MODE is per-ring on gen7+ */
1933 I915_WRITE(RING_MODE_GEN7(ring),
1934 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1935 }
1936 }
1937
1938 static void gen6_ppgtt_enable(struct drm_device *dev)
1939 {
1940 struct drm_i915_private *dev_priv = dev->dev_private;
1941 uint32_t ecochk, gab_ctl, ecobits;
1942
1943 ecobits = I915_READ(GAC_ECO_BITS);
1944 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1945 ECOBITS_PPGTT_CACHE64B);
1946
1947 gab_ctl = I915_READ(GAB_CTL);
1948 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1949
1950 ecochk = I915_READ(GAM_ECOCHK);
1951 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1952
1953 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1954 }
1955
1956 /* PPGTT support for Sandybdrige/Gen6 and later */
1957 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1958 uint64_t start,
1959 uint64_t length,
1960 bool use_scratch)
1961 {
1962 struct i915_hw_ppgtt *ppgtt =
1963 container_of(vm, struct i915_hw_ppgtt, base);
1964 gen6_pte_t *pt_vaddr, scratch_pte;
1965 unsigned first_entry = start >> PAGE_SHIFT;
1966 unsigned num_entries = length >> PAGE_SHIFT;
1967 unsigned act_pt = first_entry / GEN6_PTES;
1968 unsigned first_pte = first_entry % GEN6_PTES;
1969 unsigned last_pte, i;
1970
1971 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1972 I915_CACHE_LLC, true, 0);
1973
1974 while (num_entries) {
1975 last_pte = first_pte + num_entries;
1976 if (last_pte > GEN6_PTES)
1977 last_pte = GEN6_PTES;
1978
1979 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1980
1981 for (i = first_pte; i < last_pte; i++)
1982 pt_vaddr[i] = scratch_pte;
1983
1984 kunmap_px(ppgtt, pt_vaddr);
1985
1986 num_entries -= last_pte - first_pte;
1987 first_pte = 0;
1988 act_pt++;
1989 }
1990 }
1991
1992 #ifdef __NetBSD__
1993 static void
1994 gen6_ppgtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
1995 uint64_t start, enum i915_cache_level cache_level, uint32_t flags)
1996 {
1997 struct i915_hw_ppgtt *ppgtt =
1998 container_of(vm, struct i915_hw_ppgtt, base);
1999 gen6_pte_t *pt_vaddr;
2000 unsigned first_entry = start >> PAGE_SHIFT;
2001 unsigned act_pt = first_entry / GEN6_PTES;
2002 unsigned act_pte = first_entry % GEN6_PTES;
2003 unsigned seg;
2004
2005 pt_vaddr = NULL;
2006 KASSERT(0 < dmamap->dm_nsegs);
2007 for (seg = 0; seg < dmamap->dm_nsegs; seg++) {
2008 KASSERT(dmamap->dm_segs[seg].ds_len == PAGE_SIZE);
2009 if (pt_vaddr == NULL)
2010 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
2011 pt_vaddr[act_pte] =
2012 vm->pte_encode(dmamap->dm_segs[seg].ds_addr, cache_level,
2013 true, flags);
2014 if (++act_pte == GEN6_PTES) {
2015 kunmap_px(ppgtt, pt_vaddr);
2016 pt_vaddr = NULL;
2017 act_pt++;
2018 act_pte = 0;
2019 }
2020 }
2021 if (pt_vaddr)
2022 kunmap_px(ppgtt, pt_vaddr);
2023 }
2024 #else
2025 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
2026 struct sg_table *pages,
2027 uint64_t start,
2028 enum i915_cache_level cache_level, u32 flags)
2029 {
2030 struct i915_hw_ppgtt *ppgtt =
2031 container_of(vm, struct i915_hw_ppgtt, base);
2032 gen6_pte_t *pt_vaddr;
2033 unsigned first_entry = start >> PAGE_SHIFT;
2034 unsigned act_pt = first_entry / GEN6_PTES;
2035 unsigned act_pte = first_entry % GEN6_PTES;
2036 struct sg_page_iter sg_iter;
2037
2038 pt_vaddr = NULL;
2039 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
2040 if (pt_vaddr == NULL)
2041 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
2042
2043 pt_vaddr[act_pte] =
2044 vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
2045 cache_level, true, flags);
2046
2047 if (++act_pte == GEN6_PTES) {
2048 kunmap_px(ppgtt, pt_vaddr);
2049 pt_vaddr = NULL;
2050 act_pt++;
2051 act_pte = 0;
2052 }
2053 }
2054 if (pt_vaddr)
2055 kunmap_px(ppgtt, pt_vaddr);
2056 }
2057 #endif
2058
2059 static int gen6_alloc_va_range(struct i915_address_space *vm,
2060 uint64_t start_in, uint64_t length_in)
2061 {
2062 DECLARE_BITMAP(new_page_tables, I915_PDES);
2063 struct drm_device *dev = vm->dev;
2064 struct drm_i915_private *dev_priv = dev->dev_private;
2065 struct i915_hw_ppgtt *ppgtt =
2066 container_of(vm, struct i915_hw_ppgtt, base);
2067 struct i915_page_table *pt;
2068 uint32_t start, length, start_save, length_save;
2069 uint32_t pde, temp;
2070 int ret;
2071
2072 if (WARN_ON(start_in + length_in > ppgtt->base.total))
2073 return -ENODEV;
2074
2075 start = start_save = start_in;
2076 length = length_save = length_in;
2077
2078 bitmap_zero(new_page_tables, I915_PDES);
2079
2080 /* The allocation is done in two stages so that we can bail out with
2081 * minimal amount of pain. The first stage finds new page tables that
2082 * need allocation. The second stage marks use ptes within the page
2083 * tables.
2084 */
2085 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
2086 if (pt != vm->scratch_pt) {
2087 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
2088 continue;
2089 }
2090
2091 /* We've already allocated a page table */
2092 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
2093
2094 pt = alloc_pt(dev);
2095 if (IS_ERR(pt)) {
2096 ret = PTR_ERR(pt);
2097 goto unwind_out;
2098 }
2099
2100 gen6_initialize_pt(vm, pt);
2101
2102 ppgtt->pd.page_table[pde] = pt;
2103 __set_bit(pde, new_page_tables);
2104 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
2105 }
2106
2107 start = start_save;
2108 length = length_save;
2109
2110 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
2111 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
2112
2113 bitmap_zero(tmp_bitmap, GEN6_PTES);
2114 bitmap_set(tmp_bitmap, gen6_pte_index(start),
2115 gen6_pte_count(start, length));
2116
2117 if (__test_and_clear_bit(pde, new_page_tables))
2118 gen6_write_pde(&ppgtt->pd, pde, pt);
2119
2120 trace_i915_page_table_entry_map(vm, pde, pt,
2121 gen6_pte_index(start),
2122 gen6_pte_count(start, length),
2123 GEN6_PTES);
2124 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
2125 GEN6_PTES);
2126 }
2127
2128 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
2129
2130 /* Make sure write is complete before other code can use this page
2131 * table. Also require for WC mapped PTEs */
2132 #ifdef __NetBSD__
2133 bus_space_read_4(dev_priv->gtt.bst, dev_priv->gtt.bsh, 0);
2134 #else
2135 readl(dev_priv->gtt.gsm);
2136 #endif
2137
2138 mark_tlbs_dirty(ppgtt);
2139 return 0;
2140
2141 unwind_out:
2142 for_each_set_bit(pde, new_page_tables, I915_PDES) {
2143 struct i915_page_table *pt = ppgtt->pd.page_table[pde];
2144
2145 ppgtt->pd.page_table[pde] = vm->scratch_pt;
2146 free_pt(vm->dev, pt);
2147 }
2148
2149 mark_tlbs_dirty(ppgtt);
2150 return ret;
2151 }
2152
2153 static int gen6_init_scratch(struct i915_address_space *vm)
2154 {
2155 struct drm_device *dev = vm->dev;
2156
2157 vm->scratch_page = alloc_scratch_page(dev);
2158 if (IS_ERR(vm->scratch_page))
2159 return PTR_ERR(vm->scratch_page);
2160
2161 vm->scratch_pt = alloc_pt(dev);
2162 if (IS_ERR(vm->scratch_pt)) {
2163 free_scratch_page(dev, vm->scratch_page);
2164 return PTR_ERR(vm->scratch_pt);
2165 }
2166
2167 gen6_initialize_pt(vm, vm->scratch_pt);
2168
2169 return 0;
2170 }
2171
2172 static void gen6_free_scratch(struct i915_address_space *vm)
2173 {
2174 struct drm_device *dev = vm->dev;
2175
2176 free_pt(dev, vm->scratch_pt);
2177 free_scratch_page(dev, vm->scratch_page);
2178 }
2179
2180 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
2181 {
2182 struct i915_hw_ppgtt *ppgtt =
2183 container_of(vm, struct i915_hw_ppgtt, base);
2184 struct i915_page_table *pt;
2185 uint32_t pde;
2186
2187 drm_mm_remove_node(&ppgtt->node);
2188
2189 gen6_for_all_pdes(pt, ppgtt, pde) {
2190 if (pt != vm->scratch_pt)
2191 free_pt(ppgtt->base.dev, pt);
2192 }
2193
2194 gen6_free_scratch(vm);
2195 }
2196
2197 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
2198 {
2199 struct i915_address_space *vm = &ppgtt->base;
2200 struct drm_device *dev = ppgtt->base.dev;
2201 struct drm_i915_private *dev_priv = dev->dev_private;
2202 bool retried = false;
2203 int ret;
2204
2205 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2206 * allocator works in address space sizes, so it's multiplied by page
2207 * size. We allocate at the top of the GTT to avoid fragmentation.
2208 */
2209 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
2210
2211 ret = gen6_init_scratch(vm);
2212 if (ret)
2213 return ret;
2214
2215 alloc:
2216 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
2217 &ppgtt->node, GEN6_PD_SIZE,
2218 GEN6_PD_ALIGN, 0,
2219 0, dev_priv->gtt.base.total,
2220 DRM_MM_TOPDOWN);
2221 if (ret == -ENOSPC && !retried) {
2222 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
2223 GEN6_PD_SIZE, GEN6_PD_ALIGN,
2224 I915_CACHE_NONE,
2225 0, dev_priv->gtt.base.total,
2226 0);
2227 if (ret)
2228 goto err_out;
2229
2230 retried = true;
2231 goto alloc;
2232 }
2233
2234 if (ret)
2235 goto err_out;
2236
2237
2238 if (ppgtt->node.start < dev_priv->gtt.mappable_end)
2239 DRM_DEBUG("Forced to use aperture for PDEs\n");
2240
2241 return 0;
2242
2243 err_out:
2244 gen6_free_scratch(vm);
2245 return ret;
2246 }
2247
2248 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2249 {
2250 return gen6_ppgtt_allocate_page_directories(ppgtt);
2251 }
2252
2253 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2254 uint64_t start, uint64_t length)
2255 {
2256 struct i915_page_table *unused __unused;
2257 uint32_t pde, temp;
2258
2259 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
2260 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2261 }
2262
2263 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2264 {
2265 struct drm_device *dev = ppgtt->base.dev;
2266 struct drm_i915_private *dev_priv = dev->dev_private;
2267 int ret;
2268
2269 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
2270 if (IS_GEN6(dev)) {
2271 ppgtt->switch_mm = gen6_mm_switch;
2272 } else if (IS_HASWELL(dev)) {
2273 ppgtt->switch_mm = hsw_mm_switch;
2274 } else if (IS_GEN7(dev)) {
2275 ppgtt->switch_mm = gen7_mm_switch;
2276 } else
2277 BUG();
2278
2279 if (intel_vgpu_active(dev))
2280 ppgtt->switch_mm = vgpu_mm_switch;
2281
2282 ret = gen6_ppgtt_alloc(ppgtt);
2283 if (ret)
2284 return ret;
2285
2286 ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2287 ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2288 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2289 ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2290 ppgtt->base.bind_vma = ppgtt_bind_vma;
2291 ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2292 ppgtt->base.start = 0;
2293 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2294 #ifndef __NetBSD__
2295 ppgtt->debug_dump = gen6_dump_ppgtt;
2296 #endif
2297
2298 ppgtt->pd.base.ggtt_offset =
2299 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2300
2301 #ifndef __NetBSD__
2302 ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
2303 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2304 #endif
2305
2306 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2307
2308 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2309
2310 DRM_DEBUG_DRIVER("Allocated pde space (%"PRId64"M) at GTT entry: %"PRIx64"\n",
2311 ppgtt->node.size >> 20,
2312 ppgtt->node.start / PAGE_SIZE);
2313
2314 DRM_DEBUG("Adding PPGTT at offset %x\n",
2315 ppgtt->pd.base.ggtt_offset << 10);
2316
2317 return 0;
2318 }
2319
2320 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2321 {
2322 ppgtt->base.dev = dev;
2323
2324 if (INTEL_INFO(dev)->gen < 8)
2325 return gen6_ppgtt_init(ppgtt);
2326 else
2327 return gen8_ppgtt_init(ppgtt);
2328 }
2329
2330 static void i915_address_space_init(struct i915_address_space *vm,
2331 struct drm_i915_private *dev_priv)
2332 {
2333 drm_mm_init(&vm->mm, vm->start, vm->total);
2334 vm->dev = dev_priv->dev;
2335 INIT_LIST_HEAD(&vm->active_list);
2336 INIT_LIST_HEAD(&vm->inactive_list);
2337 list_add_tail(&vm->global_link, &dev_priv->vm_list);
2338 }
2339
2340 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2341 {
2342 struct drm_i915_private *dev_priv = dev->dev_private;
2343 int ret = 0;
2344
2345 ret = __hw_ppgtt_init(dev, ppgtt);
2346 if (ret == 0) {
2347 kref_init(&ppgtt->ref);
2348 i915_address_space_init(&ppgtt->base, dev_priv);
2349 }
2350
2351 return ret;
2352 }
2353
2354 int i915_ppgtt_init_hw(struct drm_device *dev)
2355 {
2356 /* In the case of execlists, PPGTT is enabled by the context descriptor
2357 * and the PDPs are contained within the context itself. We don't
2358 * need to do anything here. */
2359 if (i915.enable_execlists)
2360 return 0;
2361
2362 if (!USES_PPGTT(dev))
2363 return 0;
2364
2365 if (IS_GEN6(dev))
2366 gen6_ppgtt_enable(dev);
2367 else if (IS_GEN7(dev))
2368 gen7_ppgtt_enable(dev);
2369 else if (INTEL_INFO(dev)->gen >= 8)
2370 gen8_ppgtt_enable(dev);
2371 else
2372 MISSING_CASE(INTEL_INFO(dev)->gen);
2373
2374 return 0;
2375 }
2376
2377 int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
2378 {
2379 struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
2380 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2381
2382 if (i915.enable_execlists)
2383 return 0;
2384
2385 if (!ppgtt)
2386 return 0;
2387
2388 return ppgtt->switch_mm(ppgtt, req);
2389 }
2390
2391 struct i915_hw_ppgtt *
2392 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2393 {
2394 struct i915_hw_ppgtt *ppgtt;
2395 int ret;
2396
2397 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2398 if (!ppgtt)
2399 return ERR_PTR(-ENOMEM);
2400
2401 ret = i915_ppgtt_init(dev, ppgtt);
2402 if (ret) {
2403 kfree(ppgtt);
2404 return ERR_PTR(ret);
2405 }
2406
2407 ppgtt->file_priv = fpriv;
2408
2409 trace_i915_ppgtt_create(&ppgtt->base);
2410
2411 return ppgtt;
2412 }
2413
2414 void i915_ppgtt_release(struct kref *kref)
2415 {
2416 struct i915_hw_ppgtt *ppgtt =
2417 container_of(kref, struct i915_hw_ppgtt, ref);
2418
2419 trace_i915_ppgtt_release(&ppgtt->base);
2420
2421 /* vmas should already be unbound */
2422 WARN_ON(!list_empty(&ppgtt->base.active_list));
2423 WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2424
2425 list_del(&ppgtt->base.global_link);
2426 drm_mm_takedown(&ppgtt->base.mm);
2427
2428 ppgtt->base.cleanup(&ppgtt->base);
2429 kfree(ppgtt);
2430 }
2431
2432 extern int intel_iommu_gfx_mapped;
2433 /* Certain Gen5 chipsets require require idling the GPU before
2434 * unmapping anything from the GTT when VT-d is enabled.
2435 */
2436 static bool needs_idle_maps(struct drm_device *dev)
2437 {
2438 #ifdef CONFIG_INTEL_IOMMU
2439 /* Query intel_iommu to see if we need the workaround. Presumably that
2440 * was loaded first.
2441 */
2442 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2443 return true;
2444 #endif
2445 return false;
2446 }
2447
2448 static bool do_idling(struct drm_i915_private *dev_priv)
2449 {
2450 bool ret = dev_priv->mm.interruptible;
2451
2452 if (unlikely(dev_priv->gtt.do_idle_maps)) {
2453 dev_priv->mm.interruptible = false;
2454 if (i915_gpu_idle(dev_priv->dev)) {
2455 DRM_ERROR("Couldn't idle GPU\n");
2456 /* Wait a bit, in hopes it avoids the hang */
2457 udelay(10);
2458 }
2459 }
2460
2461 return ret;
2462 }
2463
2464 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2465 {
2466 if (unlikely(dev_priv->gtt.do_idle_maps))
2467 dev_priv->mm.interruptible = interruptible;
2468 }
2469
2470 void i915_check_and_clear_faults(struct drm_device *dev)
2471 {
2472 struct drm_i915_private *dev_priv = dev->dev_private;
2473 struct intel_engine_cs *ring;
2474 int i;
2475
2476 if (INTEL_INFO(dev)->gen < 6)
2477 return;
2478
2479 for_each_ring(ring, dev_priv, i) {
2480 u32 fault_reg;
2481 fault_reg = I915_READ(RING_FAULT_REG(ring));
2482 if (fault_reg & RING_FAULT_VALID) {
2483 DRM_DEBUG_DRIVER("Unexpected fault\n"
2484 "\tAddr: 0x%08"PRIx32"\n"
2485 "\tAddress space: %s\n"
2486 "\tSource ID: %d\n"
2487 "\tType: %d\n",
2488 fault_reg & PAGE_MASK,
2489 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2490 RING_FAULT_SRCID(fault_reg),
2491 RING_FAULT_FAULT_TYPE(fault_reg));
2492 I915_WRITE(RING_FAULT_REG(ring),
2493 fault_reg & ~RING_FAULT_VALID);
2494 }
2495 }
2496 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
2497 }
2498
2499 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2500 {
2501 if (INTEL_INFO(dev_priv->dev)->gen < 6) {
2502 intel_gtt_chipset_flush();
2503 } else {
2504 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2505 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2506 }
2507 }
2508
2509 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2510 {
2511 struct drm_i915_private *dev_priv = dev->dev_private;
2512
2513 /* Don't bother messing with faults pre GEN6 as we have little
2514 * documentation supporting that it's a good idea.
2515 */
2516 if (INTEL_INFO(dev)->gen < 6)
2517 return;
2518
2519 i915_check_and_clear_faults(dev);
2520
2521 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
2522 dev_priv->gtt.base.start,
2523 dev_priv->gtt.base.total,
2524 true);
2525
2526 i915_ggtt_flush(dev_priv);
2527 }
2528
2529 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2530 {
2531 #ifdef __NetBSD__
2532 KASSERT(0 < obj->base.size);
2533 /* XXX errno NetBSD->Linux */
2534 return -bus_dmamap_load_pglist(obj->base.dev->dmat, obj->pages,
2535 &obj->pageq, obj->base.size, BUS_DMA_NOWAIT);
2536 #else
2537 if (!dma_map_sg(&obj->base.dev->pdev->dev,
2538 obj->pages->sgl, obj->pages->nents,
2539 PCI_DMA_BIDIRECTIONAL))
2540 return -ENOSPC;
2541
2542 return 0;
2543 #endif
2544 }
2545
2546 #ifdef __NetBSD__
2547 static gen8_pte_t
2548 gen8_get_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i)
2549 {
2550 CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
2551 CTASSERT(sizeof(gen8_pte_t) == 8);
2552 #ifdef _LP64 /* XXX How to detect bus_space_read_8? */
2553 return bus_space_read_8(bst, bsh, 8*i);
2554 #else
2555 /*
2556 * XXX I'm not sure this case can actually happen in practice:
2557 * 32-bit gen8 chipsets?
2558 */
2559 return bus_space_read_4(bst, bsh, 8*i) |
2560 ((uint64_t)bus_space_read_4(bst, bsh, 8*i + 4) << 32);
2561 #endif
2562 }
2563
2564 static inline void
2565 gen8_set_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i,
2566 gen8_pte_t pte)
2567 {
2568 CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
2569 CTASSERT(sizeof(gen8_pte_t) == 8);
2570 #ifdef _LP64 /* XXX How to detect bus_space_write_8? */
2571 bus_space_write_8(bst, bsh, 8*i, pte);
2572 #else
2573 bus_space_write_4(bst, bsh, 8*i, (uint32_t)pte);
2574 bus_space_write_4(bst, bsh, 8*i + 4, (uint32_t)(pte >> 32));
2575 #endif
2576 }
2577 #else
2578 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2579 {
2580 #ifdef writeq
2581 writeq(pte, addr);
2582 #else
2583 iowrite32((u32)pte, addr);
2584 iowrite32(pte >> 32, addr + 4);
2585 #endif
2586 }
2587 #endif
2588
2589 #ifdef __NetBSD__
2590 static void
2591 gen8_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
2592 uint64_t start, enum i915_cache_level level, uint32_t unused_flags)
2593 {
2594 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2595 unsigned first_entry = start >> PAGE_SHIFT;
2596 const bus_space_tag_t bst = dev_priv->gtt.bst;
2597 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2598 unsigned i;
2599
2600 KASSERT(0 < dmamap->dm_nsegs);
2601 for (i = 0; i < dmamap->dm_nsegs; i++) {
2602 KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
2603 gen8_set_pte(bst, bsh, first_entry + i,
2604 gen8_pte_encode(dmamap->dm_segs[i].ds_addr, level, true));
2605 }
2606 if (0 < i) {
2607 /* Posting read. */
2608 WARN_ON(gen8_get_pte(bst, bsh, (first_entry + i - 1))
2609 != gen8_pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
2610 true));
2611 }
2612 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2613 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2614 }
2615 #else
2616 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2617 struct sg_table *st,
2618 uint64_t start,
2619 enum i915_cache_level level, u32 unused)
2620 {
2621 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2622 unsigned first_entry = start >> PAGE_SHIFT;
2623 gen8_pte_t __iomem *gtt_entries =
2624 (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2625 int i = 0;
2626 struct sg_page_iter sg_iter;
2627 dma_addr_t addr = 0; /* shut up gcc */
2628
2629 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2630 addr = sg_dma_address(sg_iter.sg) +
2631 (sg_iter.sg_pgoffset << PAGE_SHIFT);
2632 gen8_set_pte(>t_entries[i],
2633 gen8_pte_encode(addr, level, true));
2634 i++;
2635 }
2636
2637 /*
2638 * XXX: This serves as a posting read to make sure that the PTE has
2639 * actually been updated. There is some concern that even though
2640 * registers and PTEs are within the same BAR that they are potentially
2641 * of NUMA access patterns. Therefore, even with the way we assume
2642 * hardware should work, we must keep this posting read for paranoia.
2643 */
2644 if (i != 0)
2645 WARN_ON(readq(>t_entries[i-1])
2646 != gen8_pte_encode(addr, level, true));
2647
2648 /* This next bit makes the above posting read even more important. We
2649 * want to flush the TLBs only after we're certain all the PTE updates
2650 * have finished.
2651 */
2652 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2653 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2654 }
2655 #endif
2656
2657 /*
2658 * Binds an object into the global gtt with the specified cache level. The object
2659 * will be accessible to the GPU via commands whose operands reference offsets
2660 * within the global GTT as well as accessible by the GPU through the GMADR
2661 * mapped BAR (dev_priv->mm.gtt->gtt).
2662 */
2663 #ifdef __NetBSD__
2664 static void
2665 gen6_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
2666 uint64_t start, enum i915_cache_level level, uint32_t flags)
2667 {
2668 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2669 unsigned first_entry = start >> PAGE_SHIFT;
2670 const bus_space_tag_t bst = dev_priv->gtt.bst;
2671 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2672 unsigned i;
2673
2674 KASSERT(0 < dmamap->dm_nsegs);
2675 for (i = 0; i < dmamap->dm_nsegs; i++) {
2676 KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
2677 CTASSERT(sizeof(gen6_pte_t) == 4);
2678 bus_space_write_4(bst, bsh, 4*(first_entry + i),
2679 vm->pte_encode(dmamap->dm_segs[i].ds_addr, level, true,
2680 flags));
2681 }
2682 if (0 < i) {
2683 /* Posting read. */
2684 WARN_ON(bus_space_read_4(bst, bsh, 4*(first_entry + i - 1))
2685 != vm->pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
2686 true, flags));
2687 }
2688 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2689 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2690 }
2691 #else
2692 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2693 struct sg_table *st,
2694 uint64_t start,
2695 enum i915_cache_level level, u32 flags)
2696 {
2697 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2698 unsigned first_entry = start >> PAGE_SHIFT;
2699 gen6_pte_t __iomem *gtt_entries =
2700 (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2701 int i = 0;
2702 struct sg_page_iter sg_iter;
2703 dma_addr_t addr = 0;
2704
2705 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2706 addr = sg_page_iter_dma_address(&sg_iter);
2707 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]);
2708 i++;
2709 }
2710
2711 /* XXX: This serves as a posting read to make sure that the PTE has
2712 * actually been updated. There is some concern that even though
2713 * registers and PTEs are within the same BAR that they are potentially
2714 * of NUMA access patterns. Therefore, even with the way we assume
2715 * hardware should work, we must keep this posting read for paranoia.
2716 */
2717 if (i != 0) {
2718 unsigned long gtt = readl(>t_entries[i-1]);
2719 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
2720 }
2721
2722 /* This next bit makes the above posting read even more important. We
2723 * want to flush the TLBs only after we're certain all the PTE updates
2724 * have finished.
2725 */
2726 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2727 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2728 }
2729 #endif
2730
2731 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2732 uint64_t start,
2733 uint64_t length,
2734 bool use_scratch)
2735 {
2736 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2737 unsigned first_entry = start >> PAGE_SHIFT;
2738 unsigned num_entries = length >> PAGE_SHIFT;
2739 #ifdef __NetBSD__
2740 const bus_space_tag_t bst = dev_priv->gtt.bst;
2741 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2742 gen8_pte_t scratch_pte;
2743 #else
2744 gen8_pte_t scratch_pte, __iomem *gtt_base =
2745 (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2746 #endif
2747 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2748 int i;
2749
2750 if (WARN(num_entries > max_entries,
2751 "First entry = %d; Num entries = %d (max=%d)\n",
2752 first_entry, num_entries, max_entries))
2753 num_entries = max_entries;
2754
2755 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2756 I915_CACHE_LLC,
2757 use_scratch);
2758 #ifdef __NetBSD__
2759 for (i = 0; i < num_entries; i++)
2760 gen8_set_pte(bst, bsh, first_entry + i, scratch_pte);
2761 (void)gen8_get_pte(bst, bsh, first_entry);
2762 #else
2763 for (i = 0; i < num_entries; i++)
2764 gen8_set_pte(>t_base[i], scratch_pte);
2765 readl(gtt_base);
2766 #endif
2767 }
2768
2769 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2770 uint64_t start,
2771 uint64_t length,
2772 bool use_scratch)
2773 {
2774 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2775 unsigned first_entry = start >> PAGE_SHIFT;
2776 unsigned num_entries = length >> PAGE_SHIFT;
2777 #ifdef __NetBSD__
2778 const bus_space_tag_t bst = dev_priv->gtt.bst;
2779 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2780 gen8_pte_t scratch_pte;
2781 #else
2782 gen6_pte_t scratch_pte, __iomem *gtt_base =
2783 (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2784 #endif
2785 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2786 int i;
2787
2788 if (WARN(num_entries > max_entries,
2789 "First entry = %d; Num entries = %d (max=%d)\n",
2790 first_entry, num_entries, max_entries))
2791 num_entries = max_entries;
2792
2793 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2794 I915_CACHE_LLC, use_scratch, 0);
2795
2796 #ifdef __NetBSD__
2797 CTASSERT(sizeof(gen6_pte_t) == 4);
2798 for (i = 0; i < num_entries; i++)
2799 bus_space_write_4(bst, bsh, 4*(first_entry + i), scratch_pte);
2800 (void)bus_space_read_4(bst, bsh, 4*first_entry);
2801 #else
2802 for (i = 0; i < num_entries; i++)
2803 iowrite32(scratch_pte, >t_base[i]);
2804 readl(gtt_base);
2805 #endif
2806 }
2807
2808 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2809 #ifdef __NetBSD__
2810 bus_dmamap_t pages,
2811 #else
2812 struct sg_table *pages,
2813 #endif
2814 uint64_t start,
2815 enum i915_cache_level cache_level, u32 unused)
2816 {
2817 unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2818 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2819
2820 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2821 }
2822
2823 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2824 uint64_t start,
2825 uint64_t length,
2826 bool unused)
2827 {
2828 unsigned first_entry = start >> PAGE_SHIFT;
2829 unsigned num_entries = length >> PAGE_SHIFT;
2830 intel_gtt_clear_range(first_entry, num_entries);
2831 }
2832
2833 static int ggtt_bind_vma(struct i915_vma *vma,
2834 enum i915_cache_level cache_level,
2835 u32 flags)
2836 {
2837 struct drm_i915_gem_object *obj = vma->obj;
2838 u32 pte_flags = 0;
2839 int ret;
2840
2841 ret = i915_get_ggtt_vma_pages(vma);
2842 if (ret)
2843 return ret;
2844
2845 /* Currently applicable only to VLV */
2846 if (obj->gt_ro)
2847 pte_flags |= PTE_READ_ONLY;
2848
2849 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2850 vma->node.start,
2851 cache_level, pte_flags);
2852
2853 /*
2854 * Without aliasing PPGTT there's no difference between
2855 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2856 * upgrade to both bound if we bind either to avoid double-binding.
2857 */
2858 vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2859
2860 return 0;
2861 }
2862
2863 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2864 enum i915_cache_level cache_level,
2865 u32 flags)
2866 {
2867 struct drm_device *dev = vma->vm->dev;
2868 struct drm_i915_private *dev_priv = dev->dev_private;
2869 struct drm_i915_gem_object *obj = vma->obj;
2870 #ifdef __NetBSD__
2871 bus_dmamap_t pages = obj->pages;
2872 #else
2873 struct sg_table *pages = obj->pages;
2874 #endif
2875 u32 pte_flags = 0;
2876 int ret;
2877
2878 ret = i915_get_ggtt_vma_pages(vma);
2879 if (ret)
2880 return ret;
2881 pages = vma->ggtt_view.pages;
2882
2883 /* Currently applicable only to VLV */
2884 if (obj->gt_ro)
2885 pte_flags |= PTE_READ_ONLY;
2886
2887
2888 if (flags & GLOBAL_BIND) {
2889 vma->vm->insert_entries(vma->vm, pages,
2890 vma->node.start,
2891 cache_level, pte_flags);
2892 }
2893
2894 if (flags & LOCAL_BIND) {
2895 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2896 appgtt->base.insert_entries(&appgtt->base, pages,
2897 vma->node.start,
2898 cache_level, pte_flags);
2899 }
2900
2901 return 0;
2902 }
2903
2904 static void ggtt_unbind_vma(struct i915_vma *vma)
2905 {
2906 struct drm_device *dev = vma->vm->dev;
2907 struct drm_i915_private *dev_priv = dev->dev_private;
2908 struct drm_i915_gem_object *obj = vma->obj;
2909 const uint64_t size = min_t(uint64_t,
2910 obj->base.size,
2911 vma->node.size);
2912
2913 if (vma->bound & GLOBAL_BIND) {
2914 vma->vm->clear_range(vma->vm,
2915 vma->node.start,
2916 size,
2917 true);
2918 }
2919
2920 if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2921 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2922
2923 appgtt->base.clear_range(&appgtt->base,
2924 vma->node.start,
2925 size,
2926 true);
2927 }
2928 }
2929
2930 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2931 {
2932 struct drm_device *dev = obj->base.dev;
2933 struct drm_i915_private *dev_priv = dev->dev_private;
2934 bool interruptible;
2935
2936 interruptible = do_idling(dev_priv);
2937
2938 #ifdef __NetBSD__
2939 bus_dmamap_unload(dev->dmat, obj->pages);
2940 #else
2941 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2942 PCI_DMA_BIDIRECTIONAL);
2943 #endif
2944
2945 undo_idling(dev_priv, interruptible);
2946 }
2947
2948 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2949 unsigned long color,
2950 u64 *start,
2951 u64 *end)
2952 {
2953 if (node->color != color)
2954 *start += 4096;
2955
2956 if (!list_empty(&node->node_list)) {
2957 node = list_entry(node->node_list.next,
2958 struct drm_mm_node,
2959 node_list);
2960 if (node->allocated && node->color != color)
2961 *end -= 4096;
2962 }
2963 }
2964
2965 static int i915_gem_setup_global_gtt(struct drm_device *dev,
2966 u64 start,
2967 u64 mappable_end,
2968 u64 end)
2969 {
2970 /* Let GEM Manage all of the aperture.
2971 *
2972 * However, leave one page at the end still bound to the scratch page.
2973 * There are a number of places where the hardware apparently prefetches
2974 * past the end of the object, and we've seen multiple hangs with the
2975 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2976 * aperture. One page should be enough to keep any prefetching inside
2977 * of the aperture.
2978 */
2979 struct drm_i915_private *dev_priv = dev->dev_private;
2980 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2981 struct drm_mm_node *entry;
2982 struct drm_i915_gem_object *obj;
2983 unsigned long hole_start, hole_end;
2984 int ret;
2985
2986 BUG_ON(mappable_end > end);
2987
2988 ggtt_vm->start = start;
2989
2990 /* Subtract the guard page before address space initialization to
2991 * shrink the range used by drm_mm */
2992 ggtt_vm->total = end - start - PAGE_SIZE;
2993 i915_address_space_init(ggtt_vm, dev_priv);
2994 ggtt_vm->total += PAGE_SIZE;
2995
2996 if (intel_vgpu_active(dev)) {
2997 ret = intel_vgt_balloon(dev);
2998 if (ret)
2999 return ret;
3000 }
3001
3002 if (!HAS_LLC(dev))
3003 ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
3004
3005 /* Mark any preallocated objects as occupied */
3006 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3007 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
3008
3009 DRM_DEBUG_KMS("reserving preallocated space: %"PRIx64" + %zx\n",
3010 i915_gem_obj_ggtt_offset(obj), obj->base.size);
3011
3012 WARN_ON(i915_gem_obj_ggtt_bound(obj));
3013 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
3014 if (ret) {
3015 DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
3016 return ret;
3017 }
3018 vma->bound |= GLOBAL_BIND;
3019 __i915_vma_set_map_and_fenceable(vma);
3020 list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
3021 }
3022
3023 /* Clear any non-preallocated blocks */
3024 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
3025 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
3026 hole_start, hole_end);
3027 ggtt_vm->clear_range(ggtt_vm, hole_start,
3028 hole_end - hole_start, true);
3029 }
3030
3031 /* And finally clear the reserved guard page */
3032 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
3033
3034 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
3035 struct i915_hw_ppgtt *ppgtt;
3036
3037 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
3038 if (!ppgtt)
3039 return -ENOMEM;
3040
3041 ret = __hw_ppgtt_init(dev, ppgtt);
3042 if (ret) {
3043 ppgtt->base.cleanup(&ppgtt->base);
3044 kfree(ppgtt);
3045 return ret;
3046 }
3047
3048 if (ppgtt->base.allocate_va_range)
3049 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
3050 ppgtt->base.total);
3051 if (ret) {
3052 ppgtt->base.cleanup(&ppgtt->base);
3053 kfree(ppgtt);
3054 return ret;
3055 }
3056
3057 ppgtt->base.clear_range(&ppgtt->base,
3058 ppgtt->base.start,
3059 ppgtt->base.total,
3060 true);
3061
3062 dev_priv->mm.aliasing_ppgtt = ppgtt;
3063 WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
3064 dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
3065 }
3066
3067 return 0;
3068 }
3069
3070 void i915_gem_init_global_gtt(struct drm_device *dev)
3071 {
3072 struct drm_i915_private *dev_priv = dev->dev_private;
3073 u64 gtt_size, mappable_size;
3074
3075 gtt_size = dev_priv->gtt.base.total;
3076 mappable_size = dev_priv->gtt.mappable_end;
3077
3078 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
3079 }
3080
3081 void i915_global_gtt_cleanup(struct drm_device *dev)
3082 {
3083 struct drm_i915_private *dev_priv = dev->dev_private;
3084 struct i915_address_space *vm = &dev_priv->gtt.base;
3085
3086 if (dev_priv->mm.aliasing_ppgtt) {
3087 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
3088
3089 ppgtt->base.cleanup(&ppgtt->base);
3090 kfree(ppgtt);
3091 }
3092
3093 if (drm_mm_initialized(&vm->mm)) {
3094 if (intel_vgpu_active(dev))
3095 intel_vgt_deballoon();
3096
3097 drm_mm_takedown(&vm->mm);
3098 list_del(&vm->global_link);
3099 }
3100
3101 vm->cleanup(vm);
3102 }
3103
3104 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
3105 {
3106 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
3107 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
3108 return snb_gmch_ctl << 20;
3109 }
3110
3111 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
3112 {
3113 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
3114 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
3115 if (bdw_gmch_ctl)
3116 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
3117
3118 #ifdef CONFIG_X86_32
3119 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
3120 if (bdw_gmch_ctl > 4)
3121 bdw_gmch_ctl = 4;
3122 #endif
3123
3124 return bdw_gmch_ctl << 20;
3125 }
3126
3127 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
3128 {
3129 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
3130 gmch_ctrl &= SNB_GMCH_GGMS_MASK;
3131
3132 if (gmch_ctrl)
3133 return 1 << (20 + gmch_ctrl);
3134
3135 return 0;
3136 }
3137
3138 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
3139 {
3140 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
3141 snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
3142 return snb_gmch_ctl << 25; /* 32 MB units */
3143 }
3144
3145 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
3146 {
3147 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
3148 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
3149 return bdw_gmch_ctl << 25; /* 32 MB units */
3150 }
3151
3152 static size_t chv_get_stolen_size(u16 gmch_ctrl)
3153 {
3154 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
3155 gmch_ctrl &= SNB_GMCH_GMS_MASK;
3156
3157 /*
3158 * 0x0 to 0x10: 32MB increments starting at 0MB
3159 * 0x11 to 0x16: 4MB increments starting at 8MB
3160 * 0x17 to 0x1d: 4MB increments start at 36MB
3161 */
3162 if (gmch_ctrl < 0x11)
3163 return gmch_ctrl << 25;
3164 else if (gmch_ctrl < 0x17)
3165 return (gmch_ctrl - 0x11 + 2) << 22;
3166 else
3167 return (gmch_ctrl - 0x17 + 9) << 22;
3168 }
3169
3170 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
3171 {
3172 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
3173 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
3174
3175 if (gen9_gmch_ctl < 0xf0)
3176 return gen9_gmch_ctl << 25; /* 32 MB units */
3177 else
3178 /* 4MB increments starting at 0xf0 for 4MB */
3179 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
3180 }
3181
3182 static int ggtt_probe_common(struct drm_device *dev,
3183 size_t gtt_size)
3184 {
3185 struct drm_i915_private *dev_priv = dev->dev_private;
3186 struct i915_page_scratch *scratch_page;
3187 phys_addr_t gtt_phys_addr;
3188
3189 /* For Modern GENs the PTEs and register space are split in the BAR */
3190 gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
3191 (pci_resource_len(dev->pdev, 0) / 2);
3192
3193 #ifdef __NetBSD__
3194 int ret;
3195 dev_priv->gtt.bst = dev->pdev->pd_pa.pa_memt;
3196 /* XXX errno NetBSD->Linux */
3197 ret = -bus_space_map(dev_priv->gtt.bst, gtt_phys_addr, gtt_size,
3198 IS_BROXTON(dev) ? 0 : BUS_SPACE_MAP_PREFETCHABLE,
3199 &dev_priv->gtt.bsh);
3200 if (ret) {
3201 DRM_ERROR("Failed to map the graphics translation table: %d\n",
3202 ret);
3203 return ret;
3204 }
3205 dev_priv->gtt.size = gtt_size;
3206 #else
3207 /*
3208 * On BXT writes larger than 64 bit to the GTT pagetable range will be
3209 * dropped. For WC mappings in general we have 64 byte burst writes
3210 * when the WC buffer is flushed, so we can't use it, but have to
3211 * resort to an uncached mapping. The WC issue is easily caught by the
3212 * readback check when writing GTT PTE entries.
3213 */
3214 if (IS_BROXTON(dev))
3215 dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
3216 else
3217 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
3218 if (!dev_priv->gtt.gsm) {
3219 DRM_ERROR("Failed to map the gtt page table\n");
3220 return -ENOMEM;
3221 }
3222 #endif
3223
3224 scratch_page = alloc_scratch_page(dev);
3225 if (IS_ERR(scratch_page)) {
3226 DRM_ERROR("Scratch setup failed\n");
3227 /* iounmap will also get called at remove, but meh */
3228 #ifdef __NetBSD__
3229 bus_space_unmap(dev_priv->gtt.bst, dev_priv->gtt.bsh,
3230 dev_priv->gtt.size);
3231 #else
3232 iounmap(dev_priv->gtt.gsm);
3233 #endif
3234 return PTR_ERR(scratch_page);
3235 }
3236
3237 dev_priv->gtt.base.scratch_page = scratch_page;
3238
3239 return 0;
3240 }
3241
3242 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3243 * bits. When using advanced contexts each context stores its own PAT, but
3244 * writing this data shouldn't be harmful even in those cases. */
3245 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
3246 {
3247 uint64_t pat;
3248
3249 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
3250 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
3251 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
3252 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
3253 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
3254 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
3255 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
3256 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3257
3258 if (!USES_PPGTT(dev_priv->dev))
3259 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3260 * so RTL will always use the value corresponding to
3261 * pat_sel = 000".
3262 * So let's disable cache for GGTT to avoid screen corruptions.
3263 * MOCS still can be used though.
3264 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3265 * before this patch, i.e. the same uncached + snooping access
3266 * like on gen6/7 seems to be in effect.
3267 * - So this just fixes blitter/render access. Again it looks
3268 * like it's not just uncached access, but uncached + snooping.
3269 * So we can still hold onto all our assumptions wrt cpu
3270 * clflushing on LLC machines.
3271 */
3272 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
3273
3274 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
3275 * write would work. */
3276 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3277 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3278 }
3279
3280 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3281 {
3282 uint64_t pat;
3283
3284 /*
3285 * Map WB on BDW to snooped on CHV.
3286 *
3287 * Only the snoop bit has meaning for CHV, the rest is
3288 * ignored.
3289 *
3290 * The hardware will never snoop for certain types of accesses:
3291 * - CPU GTT (GMADR->GGTT->no snoop->memory)
3292 * - PPGTT page tables
3293 * - some other special cycles
3294 *
3295 * As with BDW, we also need to consider the following for GT accesses:
3296 * "For GGTT, there is NO pat_sel[2:0] from the entry,
3297 * so RTL will always use the value corresponding to
3298 * pat_sel = 000".
3299 * Which means we must set the snoop bit in PAT entry 0
3300 * in order to keep the global status page working.
3301 */
3302 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3303 GEN8_PPAT(1, 0) |
3304 GEN8_PPAT(2, 0) |
3305 GEN8_PPAT(3, 0) |
3306 GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3307 GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3308 GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3309 GEN8_PPAT(7, CHV_PPAT_SNOOP);
3310
3311 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3312 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3313 }
3314
3315 static int gen8_gmch_probe(struct drm_device *dev,
3316 u64 *gtt_total,
3317 size_t *stolen,
3318 phys_addr_t *mappable_base,
3319 u64 *mappable_end)
3320 {
3321 struct drm_i915_private *dev_priv = dev->dev_private;
3322 u64 gtt_size;
3323 u16 snb_gmch_ctl;
3324 int ret;
3325
3326 /* TODO: We're not aware of mappable constraints on gen8 yet */
3327 *mappable_base = pci_resource_start(dev->pdev, 2);
3328 *mappable_end = pci_resource_len(dev->pdev, 2);
3329
3330 #ifndef __NetBSD__
3331 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3332 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3333 #endif
3334
3335 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3336
3337 if (INTEL_INFO(dev)->gen >= 9) {
3338 *stolen = gen9_get_stolen_size(snb_gmch_ctl);
3339 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3340 } else if (IS_CHERRYVIEW(dev)) {
3341 *stolen = chv_get_stolen_size(snb_gmch_ctl);
3342 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
3343 } else {
3344 *stolen = gen8_get_stolen_size(snb_gmch_ctl);
3345 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3346 }
3347
3348 *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3349
3350 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3351 chv_setup_private_ppat(dev_priv);
3352 else
3353 bdw_setup_private_ppat(dev_priv);
3354
3355 ret = ggtt_probe_common(dev, gtt_size);
3356
3357 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
3358 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
3359 dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3360 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3361
3362 /* XXX 39-bit addresses? Really? See pci_set_dma_mask above... */
3363 dev_priv->gtt.max_paddr = __BITS(38, 0);
3364
3365 return ret;
3366 }
3367
3368 static int gen6_gmch_probe(struct drm_device *dev,
3369 u64 *gtt_total,
3370 size_t *stolen,
3371 phys_addr_t *mappable_base,
3372 u64 *mappable_end)
3373 {
3374 struct drm_i915_private *dev_priv = dev->dev_private;
3375 unsigned int gtt_size;
3376 u16 snb_gmch_ctl;
3377 int ret;
3378
3379 *mappable_base = pci_resource_start(dev->pdev, 2);
3380 *mappable_end = pci_resource_len(dev->pdev, 2);
3381
3382 /* 64/512MB is the current min/max we actually know of, but this is just
3383 * a coarse sanity check.
3384 */
3385 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
3386 DRM_ERROR("Unknown GMADR size (%"PRIx64")\n",
3387 dev_priv->gtt.mappable_end);
3388 return -ENXIO;
3389 }
3390
3391 #ifndef __NetBSD__
3392 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3393 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3394 #endif
3395 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3396
3397 *stolen = gen6_get_stolen_size(snb_gmch_ctl);
3398
3399 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
3400 *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3401
3402 ret = ggtt_probe_common(dev, gtt_size);
3403
3404 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
3405 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
3406 dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3407 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3408
3409 dev_priv->gtt.max_paddr = __BITS(39, 0);
3410
3411 return ret;
3412 }
3413
3414 static void gen6_gmch_remove(struct i915_address_space *vm)
3415 {
3416 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
3417
3418 #ifdef __NetBSD__
3419 bus_space_unmap(gtt->bst, gtt->bsh, gtt->size);
3420 #else
3421 iounmap(gtt->gsm);
3422 #endif
3423 free_scratch_page(vm->dev, vm->scratch_page);
3424 }
3425
3426 static int i915_gmch_probe(struct drm_device *dev,
3427 u64 *gtt_total,
3428 size_t *stolen,
3429 phys_addr_t *mappable_base,
3430 u64 *mappable_end)
3431 {
3432 struct drm_i915_private *dev_priv = dev->dev_private;
3433 int ret;
3434
3435 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
3436 if (!ret) {
3437 DRM_ERROR("failed to set up gmch\n");
3438 return -EIO;
3439 }
3440
3441 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
3442
3443 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
3444 dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
3445 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
3446 dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3447 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3448
3449 if (unlikely(dev_priv->gtt.do_idle_maps))
3450 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3451
3452 if (INTEL_INFO(dev)->gen <= 2)
3453 dev_priv->gtt.max_paddr = __BITS(29, 0);
3454 else if ((INTEL_INFO(dev)->gen <= 3) ||
3455 IS_BROADWATER(dev) || IS_CRESTLINE(dev))
3456 dev_priv->gtt.max_paddr = __BITS(31, 0);
3457 else if (INTEL_INFO(dev)->gen <= 5)
3458 dev_priv->gtt.max_paddr = __BITS(35, 0);
3459 else
3460 dev_priv->gtt.max_paddr = __BITS(39, 0);
3461
3462 return 0;
3463 }
3464
3465 static void i915_gmch_remove(struct i915_address_space *vm)
3466 {
3467 intel_gmch_remove();
3468 }
3469
3470 int i915_gem_gtt_init(struct drm_device *dev)
3471 {
3472 struct drm_i915_private *dev_priv = dev->dev_private;
3473 struct i915_gtt *gtt = &dev_priv->gtt;
3474 int ret;
3475
3476 if (INTEL_INFO(dev)->gen <= 5) {
3477 gtt->gtt_probe = i915_gmch_probe;
3478 gtt->base.cleanup = i915_gmch_remove;
3479 } else if (INTEL_INFO(dev)->gen < 8) {
3480 gtt->gtt_probe = gen6_gmch_probe;
3481 gtt->base.cleanup = gen6_gmch_remove;
3482 if (IS_HASWELL(dev) && dev_priv->ellc_size)
3483 gtt->base.pte_encode = iris_pte_encode;
3484 else if (IS_HASWELL(dev))
3485 gtt->base.pte_encode = hsw_pte_encode;
3486 else if (IS_VALLEYVIEW(dev))
3487 gtt->base.pte_encode = byt_pte_encode;
3488 else if (INTEL_INFO(dev)->gen >= 7)
3489 gtt->base.pte_encode = ivb_pte_encode;
3490 else
3491 gtt->base.pte_encode = snb_pte_encode;
3492 } else {
3493 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
3494 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
3495 }
3496
3497 gtt->base.dev = dev;
3498
3499 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
3500 >t->mappable_base, >t->mappable_end);
3501 if (ret)
3502 return ret;
3503
3504 #ifdef __NetBSD__
3505 dev_priv->gtt.pgfl = x86_select_freelist(dev_priv->gtt.max_paddr);
3506 ret = drm_limit_dma_space(dev, 0, dev_priv->gtt.max_paddr);
3507 if (ret) {
3508 DRM_ERROR("Unable to limit DMA paddr allocations: %d!\n", ret);
3509 gtt->base.cleanup(>t->base);
3510 return ret;
3511 }
3512 #endif
3513
3514 /* GMADR is the PCI mmio aperture into the global GTT. */
3515 DRM_INFO("Memory usable by graphics device = %"PRIu64"M\n",
3516 gtt->base.total >> 20);
3517 DRM_DEBUG_DRIVER("GMADR size = %"PRId64"M\n", gtt->mappable_end >> 20);
3518 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
3519 #ifdef CONFIG_INTEL_IOMMU
3520 if (intel_iommu_gfx_mapped)
3521 DRM_INFO("VT-d active for gfx access\n");
3522 #endif
3523 /*
3524 * i915.enable_ppgtt is read-only, so do an early pass to validate the
3525 * user's requested state against the hardware/driver capabilities. We
3526 * do this now so that we can print out any log messages once rather
3527 * than every time we check intel_enable_ppgtt().
3528 */
3529 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
3530 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
3531
3532 return 0;
3533 }
3534
3535 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3536 {
3537 struct drm_i915_private *dev_priv = dev->dev_private;
3538 struct drm_i915_gem_object *obj;
3539 struct i915_address_space *vm;
3540 struct i915_vma *vma;
3541 bool flush;
3542
3543 i915_check_and_clear_faults(dev);
3544
3545 /* First fill our portion of the GTT with scratch pages */
3546 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
3547 dev_priv->gtt.base.start,
3548 dev_priv->gtt.base.total,
3549 true);
3550
3551 /* Cache flush objects bound into GGTT and rebind them. */
3552 vm = &dev_priv->gtt.base;
3553 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3554 flush = false;
3555 list_for_each_entry(vma, &obj->vma_list, vma_link) {
3556 if (vma->vm != vm)
3557 continue;
3558
3559 WARN_ON(i915_vma_bind(vma, obj->cache_level,
3560 PIN_UPDATE));
3561
3562 flush = true;
3563 }
3564
3565 if (flush)
3566 i915_gem_clflush_object(obj, obj->pin_display);
3567 }
3568
3569 if (INTEL_INFO(dev)->gen >= 8) {
3570 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3571 chv_setup_private_ppat(dev_priv);
3572 else
3573 bdw_setup_private_ppat(dev_priv);
3574
3575 return;
3576 }
3577
3578 if (USES_PPGTT(dev)) {
3579 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3580 /* TODO: Perhaps it shouldn't be gen6 specific */
3581
3582 struct i915_hw_ppgtt *ppgtt =
3583 container_of(vm, struct i915_hw_ppgtt,
3584 base);
3585
3586 if (i915_is_ggtt(vm))
3587 ppgtt = dev_priv->mm.aliasing_ppgtt;
3588
3589 gen6_write_page_range(dev_priv, &ppgtt->pd,
3590 0, ppgtt->base.total);
3591 }
3592 }
3593
3594 i915_ggtt_flush(dev_priv);
3595 }
3596
3597 static struct i915_vma *
3598 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
3599 struct i915_address_space *vm,
3600 const struct i915_ggtt_view *ggtt_view)
3601 {
3602 struct i915_vma *vma;
3603
3604 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3605 return ERR_PTR(-EINVAL);
3606
3607 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3608 if (vma == NULL)
3609 return ERR_PTR(-ENOMEM);
3610
3611 INIT_LIST_HEAD(&vma->vma_link);
3612 INIT_LIST_HEAD(&vma->mm_list);
3613 INIT_LIST_HEAD(&vma->exec_list);
3614 vma->vm = vm;
3615 vma->obj = obj;
3616
3617 if (i915_is_ggtt(vm))
3618 vma->ggtt_view = *ggtt_view;
3619
3620 list_add_tail(&vma->vma_link, &obj->vma_list);
3621 if (!i915_is_ggtt(vm))
3622 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3623
3624 return vma;
3625 }
3626
3627 struct i915_vma *
3628 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3629 struct i915_address_space *vm)
3630 {
3631 struct i915_vma *vma;
3632
3633 vma = i915_gem_obj_to_vma(obj, vm);
3634 if (!vma)
3635 vma = __i915_gem_vma_create(obj, vm,
3636 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3637
3638 return vma;
3639 }
3640
3641 struct i915_vma *
3642 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3643 const struct i915_ggtt_view *view)
3644 {
3645 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
3646 struct i915_vma *vma;
3647
3648 if (WARN_ON(!view))
3649 return ERR_PTR(-EINVAL);
3650
3651 vma = i915_gem_obj_to_ggtt_view(obj, view);
3652
3653 if (IS_ERR(vma))
3654 return vma;
3655
3656 if (!vma)
3657 vma = __i915_gem_vma_create(obj, ggtt, view);
3658
3659 return vma;
3660
3661 }
3662
3663 #ifndef __NetBSD__
3664 static struct scatterlist *
3665 rotate_pages(dma_addr_t *in, unsigned int offset,
3666 unsigned int width, unsigned int height,
3667 struct sg_table *st, struct scatterlist *sg)
3668 {
3669 unsigned int column, row;
3670 unsigned int src_idx;
3671
3672 if (!sg) {
3673 st->nents = 0;
3674 sg = st->sgl;
3675 }
3676
3677 for (column = 0; column < width; column++) {
3678 src_idx = width * (height - 1) + column;
3679 for (row = 0; row < height; row++) {
3680 st->nents++;
3681 /* We don't need the pages, but need to initialize
3682 * the entries so the sg list can be happily traversed.
3683 * The only thing we need are DMA addresses.
3684 */
3685 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3686 sg_dma_address(sg) = in[offset + src_idx];
3687 sg_dma_len(sg) = PAGE_SIZE;
3688 sg = sg_next(sg);
3689 src_idx -= width;
3690 }
3691 }
3692
3693 return sg;
3694 }
3695
3696 static struct sg_table *
3697 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
3698 struct drm_i915_gem_object *obj)
3699 {
3700 struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
3701 unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
3702 unsigned int size_pages_uv;
3703 struct sg_page_iter sg_iter;
3704 unsigned long i;
3705 dma_addr_t *page_addr_list;
3706 struct sg_table *st;
3707 unsigned int uv_start_page;
3708 struct scatterlist *sg;
3709 int ret = -ENOMEM;
3710
3711 /* Allocate a temporary list of source pages for random access. */
3712 page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
3713 sizeof(dma_addr_t));
3714 if (!page_addr_list)
3715 return ERR_PTR(ret);
3716
3717 /* Account for UV plane with NV12. */
3718 if (rot_info->pixel_format == DRM_FORMAT_NV12)
3719 size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
3720 else
3721 size_pages_uv = 0;
3722
3723 /* Allocate target SG list. */
3724 st = kmalloc(sizeof(*st), GFP_KERNEL);
3725 if (!st)
3726 goto err_st_alloc;
3727
3728 ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3729 if (ret)
3730 goto err_sg_alloc;
3731
3732 /* Populate source page list from the object. */
3733 i = 0;
3734 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
3735 page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
3736 i++;
3737 }
3738
3739 /* Rotate the pages. */
3740 sg = rotate_pages(page_addr_list, 0,
3741 rot_info->width_pages, rot_info->height_pages,
3742 st, NULL);
3743
3744 /* Append the UV plane if NV12. */
3745 if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3746 uv_start_page = size_pages;
3747
3748 /* Check for tile-row un-alignment. */
3749 if (offset_in_page(rot_info->uv_offset))
3750 uv_start_page--;
3751
3752 rot_info->uv_start_page = uv_start_page;
3753
3754 rotate_pages(page_addr_list, uv_start_page,
3755 rot_info->width_pages_uv,
3756 rot_info->height_pages_uv,
3757 st, sg);
3758 }
3759
3760 DRM_DEBUG_KMS(
3761 "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
3762 obj->base.size, rot_info->pitch, rot_info->height,
3763 rot_info->pixel_format, rot_info->width_pages,
3764 rot_info->height_pages, size_pages + size_pages_uv,
3765 size_pages);
3766
3767 drm_free_large(page_addr_list);
3768
3769 return st;
3770
3771 err_sg_alloc:
3772 kfree(st);
3773 err_st_alloc:
3774 drm_free_large(page_addr_list);
3775
3776 DRM_DEBUG_KMS(
3777 "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
3778 obj->base.size, ret, rot_info->pitch, rot_info->height,
3779 rot_info->pixel_format, rot_info->width_pages,
3780 rot_info->height_pages, size_pages + size_pages_uv,
3781 size_pages);
3782 return ERR_PTR(ret);
3783 }
3784
3785 static struct sg_table *
3786 intel_partial_pages(const struct i915_ggtt_view *view,
3787 struct drm_i915_gem_object *obj)
3788 {
3789 struct sg_table *st;
3790 struct scatterlist *sg;
3791 struct sg_page_iter obj_sg_iter;
3792 int ret = -ENOMEM;
3793
3794 st = kmalloc(sizeof(*st), GFP_KERNEL);
3795 if (!st)
3796 goto err_st_alloc;
3797
3798 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3799 if (ret)
3800 goto err_sg_alloc;
3801
3802 sg = st->sgl;
3803 st->nents = 0;
3804 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3805 view->params.partial.offset)
3806 {
3807 if (st->nents >= view->params.partial.size)
3808 break;
3809
3810 sg_set_page(sg, NULL, PAGE_SIZE, 0);
3811 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3812 sg_dma_len(sg) = PAGE_SIZE;
3813
3814 sg = sg_next(sg);
3815 st->nents++;
3816 }
3817
3818 return st;
3819
3820 err_sg_alloc:
3821 kfree(st);
3822 err_st_alloc:
3823 return ERR_PTR(ret);
3824 }
3825 #endif
3826
3827 static int
3828 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3829 {
3830 int ret = 0;
3831
3832 if (vma->ggtt_view.pages)
3833 return 0;
3834
3835 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3836 vma->ggtt_view.pages = vma->obj->pages;
3837 #ifndef __NetBSD__
3838 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3839 vma->ggtt_view.pages =
3840 intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
3841 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3842 vma->ggtt_view.pages =
3843 intel_partial_pages(&vma->ggtt_view, vma->obj);
3844 #endif
3845 else
3846 WARN_ONCE(1, "GGTT view %u not implemented!\n",
3847 vma->ggtt_view.type);
3848
3849 if (!vma->ggtt_view.pages) {
3850 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3851 vma->ggtt_view.type);
3852 ret = -EINVAL;
3853 } else if (IS_ERR(vma->ggtt_view.pages)) {
3854 ret = PTR_ERR(vma->ggtt_view.pages);
3855 vma->ggtt_view.pages = NULL;
3856 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3857 vma->ggtt_view.type, ret);
3858 }
3859
3860 return ret;
3861 }
3862
3863 /**
3864 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3865 * @vma: VMA to map
3866 * @cache_level: mapping cache level
3867 * @flags: flags like global or local mapping
3868 *
3869 * DMA addresses are taken from the scatter-gather table of this object (or of
3870 * this VMA in case of non-default GGTT views) and PTE entries set up.
3871 * Note that DMA addresses are also the only part of the SG table we care about.
3872 */
3873 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3874 u32 flags)
3875 {
3876 int ret;
3877 u32 bind_flags;
3878
3879 if (WARN_ON(flags == 0))
3880 return -EINVAL;
3881
3882 bind_flags = 0;
3883 if (flags & PIN_GLOBAL)
3884 bind_flags |= GLOBAL_BIND;
3885 if (flags & PIN_USER)
3886 bind_flags |= LOCAL_BIND;
3887
3888 if (flags & PIN_UPDATE)
3889 bind_flags |= vma->bound;
3890 else
3891 bind_flags &= ~vma->bound;
3892
3893 if (bind_flags == 0)
3894 return 0;
3895
3896 if (vma->bound == 0 && vma->vm->allocate_va_range) {
3897 trace_i915_va_alloc(vma->vm,
3898 vma->node.start,
3899 vma->node.size,
3900 VM_TO_TRACE_NAME(vma->vm));
3901
3902 /* XXX: i915_vma_pin() will fix this +- hack */
3903 vma->pin_count++;
3904 ret = vma->vm->allocate_va_range(vma->vm,
3905 vma->node.start,
3906 vma->node.size);
3907 vma->pin_count--;
3908 if (ret)
3909 return ret;
3910 }
3911
3912 ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3913 if (ret)
3914 return ret;
3915
3916 vma->bound |= bind_flags;
3917
3918 return 0;
3919 }
3920
3921 /**
3922 * i915_ggtt_view_size - Get the size of a GGTT view.
3923 * @obj: Object the view is of.
3924 * @view: The view in question.
3925 *
3926 * @return The size of the GGTT view in bytes.
3927 */
3928 size_t
3929 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3930 const struct i915_ggtt_view *view)
3931 {
3932 if (view->type == I915_GGTT_VIEW_NORMAL) {
3933 return obj->base.size;
3934 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3935 return view->rotation_info.size;
3936 } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3937 return view->params.partial.size << PAGE_SHIFT;
3938 } else {
3939 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3940 return obj->base.size;
3941 }
3942 }
3943