Home | History | Annotate | Line # | Download | only in i915
i915_gem_gtt.c revision 1.3.6.1
      1 /*
      2  * Copyright  2010 Daniel Vetter
      3  * Copyright  2011-2014 Intel Corporation
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22  * IN THE SOFTWARE.
     23  *
     24  */
     25 
     26 #include <linux/err.h>
     27 #include <linux/seq_file.h>
     28 #include <drm/drmP.h>
     29 #include <drm/i915_drm.h>
     30 #include "i915_drv.h"
     31 #include "i915_trace.h"
     32 #include "intel_drv.h"
     33 
     34 #ifdef __NetBSD__
     35 #include <x86/machdep.h>
     36 #include <x86/pte.h>
     37 #define	_PAGE_PRESENT	PG_V	/* 0x01 PTE is present / valid */
     38 #define	_PAGE_RW	PG_RW	/* 0x02 read/write */
     39 #define	_PAGE_PWT	PG_WT	/* 0x08 write-through */
     40 #define	_PAGE_PCD	PG_N	/* 0x10 page cache disabled / non-cacheable */
     41 #define	_PAGE_PAT	PG_PAT	/* 0x80 page attribute table on PTE */
     42 #endif
     43 
     44 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv);
     45 
     46 bool intel_enable_ppgtt(struct drm_device *dev, bool full)
     47 {
     48 	if (i915.enable_ppgtt == 0)
     49 		return false;
     50 
     51 	if (i915.enable_ppgtt == 1 && full)
     52 		return false;
     53 
     54 	return true;
     55 }
     56 
     57 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
     58 {
     59 	if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
     60 		return 0;
     61 
     62 	if (enable_ppgtt == 1)
     63 		return 1;
     64 
     65 	if (enable_ppgtt == 2 && HAS_PPGTT(dev))
     66 		return 2;
     67 
     68 #ifdef CONFIG_INTEL_IOMMU
     69 	/* Disable ppgtt on SNB if VT-d is on. */
     70 	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
     71 		DRM_INFO("Disabling PPGTT because VT-d is on\n");
     72 		return 0;
     73 	}
     74 #endif
     75 
     76 	return HAS_ALIASING_PPGTT(dev) ? 1 : 0;
     77 }
     78 
     79 #define GEN6_PPGTT_PD_ENTRIES 512
     80 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
     81 typedef uint64_t gen8_gtt_pte_t;
     82 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
     83 
     84 /* PPGTT stuff */
     85 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
     86 #define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
     87 
     88 #define GEN6_PDE_VALID			(1 << 0)
     89 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
     90 #define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
     91 
     92 #define GEN6_PTE_VALID			(1 << 0)
     93 #define GEN6_PTE_UNCACHED		(1 << 1)
     94 #define HSW_PTE_UNCACHED		(0)
     95 #define GEN6_PTE_CACHE_LLC		(2 << 1)
     96 #define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
     97 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
     98 #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
     99 
    100 /* Cacheability Control is a 4-bit value. The low three bits are stored in *
    101  * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
    102  */
    103 #define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
    104 					 (((bits) & 0x8) << (11 - 3)))
    105 #define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
    106 #define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
    107 #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
    108 #define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
    109 #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
    110 #define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
    111 
    112 #define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
    113 #define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
    114 
    115 /* GEN8 legacy style addressis defined as a 3 level page table:
    116  * 31:30 | 29:21 | 20:12 |  11:0
    117  * PDPE  |  PDE  |  PTE  | offset
    118  * The difference as compared to normal x86 3 level page table is the PDPEs are
    119  * programmed via register.
    120  */
    121 #define GEN8_PDPE_SHIFT			30
    122 #define GEN8_PDPE_MASK			0x3
    123 #define GEN8_PDE_SHIFT			21
    124 #define GEN8_PDE_MASK			0x1ff
    125 #define GEN8_PTE_SHIFT			12
    126 #define GEN8_PTE_MASK			0x1ff
    127 
    128 #define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
    129 #define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
    130 #define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
    131 #define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
    132 
    133 static void ppgtt_bind_vma(struct i915_vma *vma,
    134 			   enum i915_cache_level cache_level,
    135 			   u32 flags);
    136 static void ppgtt_unbind_vma(struct i915_vma *vma);
    137 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
    138 
    139 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
    140 					     enum i915_cache_level level,
    141 					     bool valid)
    142 {
    143 	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
    144 	pte |= addr;
    145 	if (level != I915_CACHE_NONE)
    146 		pte |= PPAT_CACHED_INDEX;
    147 	else
    148 		pte |= PPAT_UNCACHED_INDEX;
    149 	return pte;
    150 }
    151 
    152 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
    153 					     dma_addr_t addr,
    154 					     enum i915_cache_level level)
    155 {
    156 	gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
    157 	pde |= addr;
    158 	if (level != I915_CACHE_NONE)
    159 		pde |= PPAT_CACHED_PDE_INDEX;
    160 	else
    161 		pde |= PPAT_UNCACHED_INDEX;
    162 	return pde;
    163 }
    164 
    165 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
    166 				     enum i915_cache_level level,
    167 				     bool valid)
    168 {
    169 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
    170 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
    171 
    172 	switch (level) {
    173 	case I915_CACHE_L3_LLC:
    174 	case I915_CACHE_LLC:
    175 		pte |= GEN6_PTE_CACHE_LLC;
    176 		break;
    177 	case I915_CACHE_NONE:
    178 		pte |= GEN6_PTE_UNCACHED;
    179 		break;
    180 	default:
    181 		WARN_ON(1);
    182 	}
    183 
    184 	return pte;
    185 }
    186 
    187 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
    188 				     enum i915_cache_level level,
    189 				     bool valid)
    190 {
    191 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
    192 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
    193 
    194 	switch (level) {
    195 	case I915_CACHE_L3_LLC:
    196 		pte |= GEN7_PTE_CACHE_L3_LLC;
    197 		break;
    198 	case I915_CACHE_LLC:
    199 		pte |= GEN6_PTE_CACHE_LLC;
    200 		break;
    201 	case I915_CACHE_NONE:
    202 		pte |= GEN6_PTE_UNCACHED;
    203 		break;
    204 	default:
    205 		WARN_ON(1);
    206 	}
    207 
    208 	return pte;
    209 }
    210 
    211 #define BYT_PTE_WRITEABLE		(1 << 1)
    212 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
    213 
    214 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
    215 				     enum i915_cache_level level,
    216 				     bool valid)
    217 {
    218 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
    219 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
    220 
    221 	/* Mark the page as writeable.  Other platforms don't have a
    222 	 * setting for read-only/writable, so this matches that behavior.
    223 	 */
    224 	pte |= BYT_PTE_WRITEABLE;
    225 
    226 	if (level != I915_CACHE_NONE)
    227 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
    228 
    229 	return pte;
    230 }
    231 
    232 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
    233 				     enum i915_cache_level level,
    234 				     bool valid)
    235 {
    236 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
    237 	pte |= HSW_PTE_ADDR_ENCODE(addr);
    238 
    239 	if (level != I915_CACHE_NONE)
    240 		pte |= HSW_WB_LLC_AGE3;
    241 
    242 	return pte;
    243 }
    244 
    245 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
    246 				      enum i915_cache_level level,
    247 				      bool valid)
    248 {
    249 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
    250 	pte |= HSW_PTE_ADDR_ENCODE(addr);
    251 
    252 	switch (level) {
    253 	case I915_CACHE_NONE:
    254 		break;
    255 	case I915_CACHE_WT:
    256 		pte |= HSW_WT_ELLC_LLC_AGE3;
    257 		break;
    258 	default:
    259 		pte |= HSW_WB_ELLC_LLC_AGE3;
    260 		break;
    261 	}
    262 
    263 	return pte;
    264 }
    265 
    266 /* Broadwell Page Directory Pointer Descriptors */
    267 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
    268 			   uint64_t val, bool synchronous)
    269 {
    270 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
    271 	int ret;
    272 
    273 	BUG_ON(entry >= 4);
    274 
    275 	if (synchronous) {
    276 		I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
    277 		I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
    278 		return 0;
    279 	}
    280 
    281 	ret = intel_ring_begin(ring, 6);
    282 	if (ret)
    283 		return ret;
    284 
    285 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
    286 	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
    287 	intel_ring_emit(ring, (u32)(val >> 32));
    288 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
    289 	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
    290 	intel_ring_emit(ring, (u32)(val));
    291 	intel_ring_advance(ring);
    292 
    293 	return 0;
    294 }
    295 
    296 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
    297 			  struct intel_ring_buffer *ring,
    298 			  bool synchronous)
    299 {
    300 	int i, ret;
    301 
    302 	/* bit of a hack to find the actual last used pd */
    303 	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
    304 
    305 	for (i = used_pd - 1; i >= 0; i--) {
    306 #ifdef __NetBSD__
    307 		const bus_addr_t addr =
    308 		    ppgtt->u.gen8->pd_map->dm_segs[i].ds_addr;
    309 #else
    310 		dma_addr_t addr = ppgtt->pd_dma_addr[i];
    311 #endif
    312 		ret = gen8_write_pdp(ring, i, addr, synchronous);
    313 		if (ret)
    314 			return ret;
    315 	}
    316 
    317 	return 0;
    318 }
    319 
    320 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
    321 				   uint64_t start,
    322 				   uint64_t length,
    323 				   bool use_scratch)
    324 {
    325 	struct i915_hw_ppgtt *ppgtt =
    326 		container_of(vm, struct i915_hw_ppgtt, base);
    327 	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
    328 	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
    329 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
    330 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
    331 	unsigned num_entries = length >> PAGE_SHIFT;
    332 	unsigned last_pte, i;
    333 #ifdef __NetBSD__
    334 	void *kva;
    335 	int ret;
    336 #endif
    337 
    338 	scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
    339 				      I915_CACHE_LLC, use_scratch);
    340 
    341 	while (num_entries) {
    342 #ifndef __NetBSD__
    343 		struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
    344 #endif
    345 
    346 		last_pte = pte + num_entries;
    347 		if (last_pte > GEN8_PTES_PER_PAGE)
    348 			last_pte = GEN8_PTES_PER_PAGE;
    349 
    350 #ifdef __NetBSD__
    351 		/* XXX errno NetBSD->Linux */
    352 		ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
    353 		    &ppgtt->u.gen8->pd[pdpe].pt_segs[pde], 1, PAGE_SIZE, &kva,
    354 		    BUS_DMA_NOWAIT);
    355 		if (ret) {
    356 			/*
    357 			 * XXX Should guarantee mapping earlier with
    358 			 * uvm_emap(9) or something.
    359 			 */
    360 			device_printf(ppgtt->base.dev->dev,
    361 			    "failed to map page table: %d\n", -ret);
    362 			goto skip;
    363 		}
    364 		pt_vaddr = kva;
    365 #else
    366 		pt_vaddr = kmap_atomic(page_table);
    367 #endif
    368 
    369 		for (i = pte; i < last_pte; i++) {
    370 			pt_vaddr[i] = scratch_pte;
    371 			num_entries--;
    372 		}
    373 
    374 #ifdef __NetBSD__
    375 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
    376 skip:;
    377 #else
    378 		kunmap_atomic(pt_vaddr);
    379 #endif
    380 
    381 		pte = 0;
    382 		if (++pde == GEN8_PDES_PER_PAGE) {
    383 			pdpe++;
    384 			pde = 0;
    385 		}
    386 	}
    387 }
    388 
    389 #ifdef __NetBSD__
    390 static void
    391 gen8_ppgtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
    392     uint64_t start, enum i915_cache_level cache_level)
    393 {
    394 	struct i915_hw_ppgtt *ppgtt =
    395 		container_of(vm, struct i915_hw_ppgtt, base);
    396 	gen8_gtt_pte_t *pt_vaddr;
    397 	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
    398 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
    399 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
    400 	unsigned seg;
    401 	void *kva;
    402 	int ret;
    403 
    404 	pt_vaddr = NULL;
    405 	KASSERT(0 < dmamap->dm_nsegs);
    406 	for (seg = 0; seg < dmamap->dm_nsegs; seg++) {
    407 		KASSERT(dmamap->dm_segs[seg].ds_len == PAGE_SIZE);
    408 		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
    409 			break;
    410 		if (pt_vaddr == NULL) {
    411 			/* XXX errno NetBSD->Linux */
    412 			ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
    413 			    &ppgtt->u.gen8->pd[pdpe].pt_segs[pde], 1,
    414 			    PAGE_SIZE, &kva, BUS_DMA_NOWAIT);
    415 			if (ret) {
    416 				/*
    417 				 * XXX Should guarantee mapping earlier
    418 				 * with uvm_emap(9) or something.
    419 				 */
    420 				device_printf(ppgtt->base.dev->dev,
    421 				    "failed to map page table: %d\n", -ret);
    422 				goto skip;
    423 			}
    424 			pt_vaddr = kva;
    425 		}
    426 		pt_vaddr[pte] = gen8_pte_encode(dmamap->dm_segs[seg].ds_addr,
    427 		    cache_level, true);
    428 skip:		if (++pte == GEN8_PTES_PER_PAGE) {
    429 			bus_dmamem_unmap(ppgtt->base.dev->dmat, kva,
    430 			    PAGE_SIZE);
    431 			pt_vaddr = NULL;
    432 			if (++pde == GEN8_PDES_PER_PAGE) {
    433 				pdpe++;
    434 				pde = 0;
    435 			}
    436 			pte = 0;
    437 		}
    438 	}
    439 	if (pt_vaddr)
    440 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
    441 }
    442 
    443 static void	gen8_ppgtt_cleanup(struct i915_address_space *);
    444 static int	gen8_ppgtt_alloc(struct i915_hw_ppgtt *, unsigned);
    445 static void	gen8_ppgtt_free(struct i915_hw_ppgtt *);
    446 static int	gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *,
    447 		    unsigned);
    448 static void	gen8_ppgtt_free_page_directories(struct i915_hw_ppgtt *);
    449 static int	gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *,
    450 		    unsigned);
    451 static void	gen8_ppgtt_free_page_tables(struct i915_hw_ppgtt *);
    452 
    453 static int
    454 gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
    455 {
    456 	const unsigned npdp = DIV_ROUND_UP(size, (1U << 30));
    457 	const unsigned min_pt_pages = (GEN8_PDES_PER_PAGE * npdp);
    458 	unsigned i, j;
    459 	int ret;
    460 
    461 	/* Allocate the PPGTT structures.  */
    462 	ret = gen8_ppgtt_alloc(ppgtt, npdp);
    463 	if (ret)
    464 		goto fail0;
    465 
    466 	/* Fill the page directory entries.  */
    467 	for (i = 0; i < npdp; i++) {
    468 		void *kva;
    469 
    470 		/* XXX errno NetBSD->Linux */
    471 		ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
    472 		    &ppgtt->u.gen8->pd_segs[i], 1, PAGE_SIZE, &kva,
    473 		    BUS_DMA_WAITOK);
    474 		if (ret)
    475 			goto fail1;
    476 
    477 		gen8_ppgtt_pde_t *const pd = kva;
    478 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
    479 			const bus_dma_segment_t *const seg =
    480 			    &ppgtt->u.gen8->pd[i].pt_segs[j];
    481 			KASSERT(seg->ds_len == PAGE_SIZE);
    482 			pd[j] = gen8_pde_encode(ppgtt->base.dev, seg->ds_addr,
    483 			    I915_CACHE_LLC);
    484 		}
    485 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
    486 	}
    487 
    488 	ppgtt->enable = gen8_ppgtt_enable;
    489 	ppgtt->switch_mm = gen8_mm_switch;
    490 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
    491 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
    492 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
    493 	ppgtt->base.start = 0;
    494 	ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
    495 
    496 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
    497 
    498 	DRM_DEBUG_DRIVER("Allocated %u pages for page directories\n", npdp);
    499 	DRM_DEBUG_DRIVER("Allocated %u pages for page tables"
    500 	    " (%"PRIxMAX" wasted)\n",
    501 	    ppgtt->num_pd_entries,
    502 	    ((uintmax_t)(ppgtt->num_pd_entries - min_pt_pages) +
    503 		(size % (1<<30))));
    504 
    505 	/* Success!  */
    506 	return 0;
    507 
    508 fail1:	gen8_ppgtt_free(ppgtt);
    509 fail0:	KASSERT(ret);
    510 	return ret;
    511 }
    512 
    513 static void
    514 gen8_ppgtt_cleanup(struct i915_address_space *vm)
    515 {
    516 	struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt,
    517 	    base);
    518 
    519 	list_del(&vm->global_link);
    520 	drm_mm_takedown(&vm->mm);
    521 
    522 	gen8_ppgtt_free(ppgtt);
    523 }
    524 
    525 static int
    526 gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, unsigned npdp)
    527 {
    528 	int ret;
    529 
    530 	/*
    531 	 * XXX This is a very large (48 MB) allocation!  However, it
    532 	 * can't really be made smaller than 8 MB, since we need a
    533 	 * contiguous array of DMA segments for the page tables.  I
    534 	 * expect this to be used mainly on machines with lots of
    535 	 * memory, so...
    536 	 */
    537 	ppgtt->u.gen8 = kmem_alloc(sizeof(*ppgtt->u.gen8), KM_SLEEP);
    538 
    539 	ret = gen8_ppgtt_allocate_page_directories(ppgtt, npdp);
    540 	if (ret)
    541 		goto fail0;
    542 	ppgtt->num_pd_entries = (npdp * GEN8_PDES_PER_PAGE);
    543 	ret = gen8_ppgtt_allocate_page_tables(ppgtt, npdp);
    544 	if (ret)
    545 		goto fail1;
    546 
    547 	/* Success!  */
    548 	return 0;
    549 
    550 fail2: __unused
    551 	gen8_ppgtt_free_page_tables(ppgtt);
    552 fail1:	gen8_ppgtt_free_page_directories(ppgtt);
    553 fail0:	KASSERT(ret);
    554 	kmem_free(ppgtt->u.gen8, sizeof(*ppgtt->u.gen8));
    555 	return ret;
    556 }
    557 
    558 static void
    559 gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
    560 {
    561 
    562 	gen8_ppgtt_free_page_tables(ppgtt);
    563 	gen8_ppgtt_free_page_directories(ppgtt);
    564 	kmem_free(ppgtt->u.gen8, sizeof(*ppgtt->u.gen8));
    565 }
    566 
    567 static int
    568 gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
    569     unsigned npdp)
    570 {
    571 	const bus_size_t pd_bytes = (npdp << PAGE_SHIFT);
    572 	const int nsegs = npdp;
    573 	int rsegs;
    574 	int ret;
    575 
    576 	ppgtt->u.gen8->npdp = npdp;
    577 
    578 	KASSERT(nsegs <= GEN8_LEGACY_PDPS);
    579 	CTASSERT(GEN8_LEGACY_PDPS == __arraycount(ppgtt->u.gen8->pd_segs));
    580 
    581 	/* XXX errno NetBSD->Linux */
    582 	ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat, pd_bytes, PAGE_SIZE,
    583 	    PAGE_SIZE, ppgtt->u.gen8->pd_segs, nsegs, &rsegs, BUS_DMA_WAITOK);
    584 	if (ret)
    585 		goto fail0;
    586 	KASSERT(rsegs == nsegs);
    587 
    588 	/* XXX errno NetBSD->Linux */
    589 	ret = -bus_dmamap_create(ppgtt->base.dev->dmat, pd_bytes, nsegs,
    590 	    PAGE_SIZE, 0, BUS_DMA_WAITOK, &ppgtt->u.gen8->pd_map);
    591 	if (ret)
    592 		goto fail1;
    593 
    594 	/* XXX errno NetBSD->Linux */
    595 	ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
    596 	    ppgtt->u.gen8->pd_map, ppgtt->u.gen8->pd_segs, nsegs, pd_bytes,
    597 	    BUS_DMA_WAITOK);
    598 	if (ret)
    599 		goto fail2;
    600 
    601 	/* Success!  */
    602 	return 0;
    603 
    604 fail3: __unused
    605 	bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
    606 fail2:	bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
    607 fail1:	bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_segs,
    608 	    ppgtt->u.gen8->npdp);
    609 fail0:	KASSERT(ret);
    610 	return ret;
    611 }
    612 
    613 static void
    614 gen8_ppgtt_free_page_directories(struct i915_hw_ppgtt *ppgtt)
    615 {
    616 
    617 	bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
    618 	bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
    619 	bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_segs,
    620 	    ppgtt->u.gen8->npdp);
    621 }
    622 
    623 static int
    624 gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, unsigned npdp)
    625 {
    626 	unsigned i, j;
    627 	int rsegs;
    628 	int ret;
    629 
    630 	for (i = 0; i < npdp; i++) {
    631 		CTASSERT(__arraycount(ppgtt->u.gen8->pd[i].pt_segs) ==
    632 		    GEN8_PDES_PER_PAGE);
    633 		/* XXX errno NetBSD->Linux */
    634 		ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat,
    635 		    (PAGE_SIZE * GEN8_PDES_PER_PAGE), PAGE_SIZE, PAGE_SIZE,
    636 		    ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE, &rsegs,
    637 		    BUS_DMA_WAITOK);
    638 		if (ret)
    639 			goto fail0;
    640 		KASSERT(rsegs == GEN8_PDES_PER_PAGE);
    641 		/* XXX errno NetBSD->Linux */
    642 		ret = -bus_dmamap_create(ppgtt->base.dev->dmat,
    643 		    (PAGE_SIZE * GEN8_PDES_PER_PAGE), GEN8_PDES_PER_PAGE,
    644 		    PAGE_SIZE, 0, BUS_DMA_WAITOK,
    645 		    &ppgtt->u.gen8->pd[i].pt_map);
    646 		if (ret)
    647 			goto fail1;
    648 		/* XXX errno NetBSD->Linux */
    649 		ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
    650 		    ppgtt->u.gen8->pd[i].pt_map, ppgtt->u.gen8->pd[i].pt_segs,
    651 		    GEN8_PDES_PER_PAGE, PAGE_SIZE, BUS_DMA_WAITOK);
    652 		if (ret)
    653 			goto fail2;
    654 		continue;
    655 
    656 fail3: __unused
    657 		bus_dmamap_unload(ppgtt->base.dev->dmat,
    658 		    ppgtt->u.gen8->pd[i].pt_map);
    659 fail2:		bus_dmamap_destroy(ppgtt->base.dev->dmat,
    660 		    ppgtt->u.gen8->pd[i].pt_map);
    661 fail1:		bus_dmamem_free(ppgtt->base.dev->dmat,
    662 		    ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE);
    663 fail0:		goto fail;
    664 	}
    665 
    666 	/* Success!  */
    667 	return 0;
    668 
    669 fail:	KASSERT(ret);
    670 	for (j = 0; j < i; j++) {
    671 		bus_dmamap_unload(ppgtt->base.dev->dmat,
    672 		    ppgtt->u.gen8->pd[j].pt_map);
    673 		bus_dmamap_destroy(ppgtt->base.dev->dmat,
    674 		    ppgtt->u.gen8->pd[j].pt_map);
    675 		bus_dmamem_free(ppgtt->base.dev->dmat,
    676 		    ppgtt->u.gen8->pd[j].pt_segs, GEN8_PDES_PER_PAGE);
    677 	}
    678 	return ret;
    679 }
    680 
    681 static void
    682 gen8_ppgtt_free_page_tables(struct i915_hw_ppgtt *ppgtt)
    683 {
    684 	unsigned i;
    685 
    686 	for (i = 0; i < ppgtt->u.gen8->npdp; i++) {
    687 		bus_dmamap_unload(ppgtt->base.dev->dmat,
    688 		    ppgtt->u.gen8->pd[i].pt_map);
    689 		bus_dmamap_destroy(ppgtt->base.dev->dmat,
    690 		    ppgtt->u.gen8->pd[i].pt_map);
    691 		bus_dmamem_free(ppgtt->base.dev->dmat,
    692 		    ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE);
    693 	}
    694 }
    695 #else
    696 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
    697 				      struct sg_table *pages,
    698 				      uint64_t start,
    699 				      enum i915_cache_level cache_level)
    700 {
    701 	struct i915_hw_ppgtt *ppgtt =
    702 		container_of(vm, struct i915_hw_ppgtt, base);
    703 	gen8_gtt_pte_t *pt_vaddr;
    704 	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
    705 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
    706 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
    707 	struct sg_page_iter sg_iter;
    708 
    709 	pt_vaddr = NULL;
    710 
    711 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
    712 		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
    713 			break;
    714 
    715 		if (pt_vaddr == NULL)
    716 			pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]);
    717 
    718 		pt_vaddr[pte] =
    719 			gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
    720 					cache_level, true);
    721 		if (++pte == GEN8_PTES_PER_PAGE) {
    722 			kunmap_atomic(pt_vaddr);
    723 			pt_vaddr = NULL;
    724 			if (++pde == GEN8_PDES_PER_PAGE) {
    725 				pdpe++;
    726 				pde = 0;
    727 			}
    728 			pte = 0;
    729 		}
    730 	}
    731 	if (pt_vaddr)
    732 		kunmap_atomic(pt_vaddr);
    733 }
    734 
    735 static void gen8_free_page_tables(struct page **pt_pages)
    736 {
    737 	int i;
    738 
    739 	if (pt_pages == NULL)
    740 		return;
    741 
    742 	for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
    743 		if (pt_pages[i])
    744 			__free_pages(pt_pages[i], 0);
    745 }
    746 
    747 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
    748 {
    749 	int i;
    750 
    751 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
    752 		gen8_free_page_tables(ppgtt->gen8_pt_pages[i]);
    753 		kfree(ppgtt->gen8_pt_pages[i]);
    754 		kfree(ppgtt->gen8_pt_dma_addr[i]);
    755 	}
    756 
    757 	__free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
    758 }
    759 
    760 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
    761 {
    762 	struct pci_dev *hwdev = ppgtt->base.dev->pdev;
    763 	int i, j;
    764 
    765 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
    766 		/* TODO: In the future we'll support sparse mappings, so this
    767 		 * will have to change. */
    768 		if (!ppgtt->pd_dma_addr[i])
    769 			continue;
    770 
    771 		pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
    772 			       PCI_DMA_BIDIRECTIONAL);
    773 
    774 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
    775 			dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
    776 			if (addr)
    777 				pci_unmap_page(hwdev, addr, PAGE_SIZE,
    778 					       PCI_DMA_BIDIRECTIONAL);
    779 		}
    780 	}
    781 }
    782 
    783 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
    784 {
    785 	struct i915_hw_ppgtt *ppgtt =
    786 		container_of(vm, struct i915_hw_ppgtt, base);
    787 
    788 	list_del(&vm->global_link);
    789 	drm_mm_takedown(&vm->mm);
    790 
    791 	gen8_ppgtt_unmap_pages(ppgtt);
    792 	gen8_ppgtt_free(ppgtt);
    793 }
    794 
    795 static struct page **__gen8_alloc_page_tables(void)
    796 {
    797 	struct page **pt_pages;
    798 	int i;
    799 
    800 	pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL);
    801 	if (!pt_pages)
    802 		return ERR_PTR(-ENOMEM);
    803 
    804 	for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
    805 		pt_pages[i] = alloc_page(GFP_KERNEL);
    806 		if (!pt_pages[i])
    807 			goto bail;
    808 	}
    809 
    810 	return pt_pages;
    811 
    812 bail:
    813 	gen8_free_page_tables(pt_pages);
    814 	kfree(pt_pages);
    815 	return ERR_PTR(-ENOMEM);
    816 }
    817 
    818 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt,
    819 					   const int max_pdp)
    820 {
    821 	struct page **pt_pages[GEN8_LEGACY_PDPS];
    822 	int i, ret;
    823 
    824 	for (i = 0; i < max_pdp; i++) {
    825 		pt_pages[i] = __gen8_alloc_page_tables();
    826 		if (IS_ERR(pt_pages[i])) {
    827 			ret = PTR_ERR(pt_pages[i]);
    828 			goto unwind_out;
    829 		}
    830 	}
    831 
    832 	/* NB: Avoid touching gen8_pt_pages until last to keep the allocation,
    833 	 * "atomic" - for cleanup purposes.
    834 	 */
    835 	for (i = 0; i < max_pdp; i++)
    836 		ppgtt->gen8_pt_pages[i] = pt_pages[i];
    837 
    838 	return 0;
    839 
    840 unwind_out:
    841 	while (i--) {
    842 		gen8_free_page_tables(pt_pages[i]);
    843 		kfree(pt_pages[i]);
    844 	}
    845 
    846 	return ret;
    847 }
    848 
    849 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
    850 {
    851 	int i;
    852 
    853 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
    854 		ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
    855 						     sizeof(dma_addr_t),
    856 						     GFP_KERNEL);
    857 		if (!ppgtt->gen8_pt_dma_addr[i])
    858 			return -ENOMEM;
    859 	}
    860 
    861 	return 0;
    862 }
    863 
    864 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
    865 						const int max_pdp)
    866 {
    867 	ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
    868 	if (!ppgtt->pd_pages)
    869 		return -ENOMEM;
    870 
    871 	ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
    872 	BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
    873 
    874 	return 0;
    875 }
    876 
    877 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
    878 			    const int max_pdp)
    879 {
    880 	int ret;
    881 
    882 	ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
    883 	if (ret)
    884 		return ret;
    885 
    886 	ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp);
    887 	if (ret) {
    888 		__free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
    889 		return ret;
    890 	}
    891 
    892 	ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
    893 
    894 	ret = gen8_ppgtt_allocate_dma(ppgtt);
    895 	if (ret)
    896 		gen8_ppgtt_free(ppgtt);
    897 
    898 	return ret;
    899 }
    900 
    901 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
    902 					     const int pd)
    903 {
    904 	dma_addr_t pd_addr;
    905 	int ret;
    906 
    907 	pd_addr = pci_map_page(ppgtt->base.dev->pdev,
    908 			       &ppgtt->pd_pages[pd], 0,
    909 			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
    910 
    911 	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
    912 	if (ret)
    913 		return ret;
    914 
    915 	ppgtt->pd_dma_addr[pd] = pd_addr;
    916 
    917 	return 0;
    918 }
    919 
    920 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
    921 					const int pd,
    922 					const int pt)
    923 {
    924 	dma_addr_t pt_addr;
    925 	struct page *p;
    926 	int ret;
    927 
    928 	p = ppgtt->gen8_pt_pages[pd][pt];
    929 	pt_addr = pci_map_page(ppgtt->base.dev->pdev,
    930 			       p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
    931 	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
    932 	if (ret)
    933 		return ret;
    934 
    935 	ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
    936 
    937 	return 0;
    938 }
    939 
    940 /**
    941  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
    942  * with a net effect resembling a 2-level page table in normal x86 terms. Each
    943  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
    944  * space.
    945  *
    946  * FIXME: split allocation into smaller pieces. For now we only ever do this
    947  * once, but with full PPGTT, the multiple contiguous allocations will be bad.
    948  * TODO: Do something with the size parameter
    949  */
    950 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
    951 {
    952 	const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
    953 	const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
    954 	int i, j, ret;
    955 
    956 	if (size % (1<<30))
    957 		DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
    958 
    959 	/* 1. Do all our allocations for page directories and page tables. */
    960 	ret = gen8_ppgtt_alloc(ppgtt, max_pdp);
    961 	if (ret)
    962 		return ret;
    963 
    964 	/*
    965 	 * 2. Create DMA mappings for the page directories and page tables.
    966 	 */
    967 	for (i = 0; i < max_pdp; i++) {
    968 		ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
    969 		if (ret)
    970 			goto bail;
    971 
    972 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
    973 			ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
    974 			if (ret)
    975 				goto bail;
    976 		}
    977 	}
    978 
    979 	/*
    980 	 * 3. Map all the page directory entires to point to the page tables
    981 	 * we've allocated.
    982 	 *
    983 	 * For now, the PPGTT helper functions all require that the PDEs are
    984 	 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
    985 	 * will never need to touch the PDEs again.
    986 	 */
    987 	for (i = 0; i < max_pdp; i++) {
    988 		gen8_ppgtt_pde_t *pd_vaddr;
    989 		pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
    990 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
    991 			dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
    992 			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
    993 						      I915_CACHE_LLC);
    994 		}
    995 		kunmap_atomic(pd_vaddr);
    996 	}
    997 
    998 	ppgtt->enable = gen8_ppgtt_enable;
    999 	ppgtt->switch_mm = gen8_mm_switch;
   1000 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
   1001 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
   1002 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
   1003 	ppgtt->base.start = 0;
   1004 	ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
   1005 
   1006 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
   1007 
   1008 	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
   1009 			 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
   1010 	DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
   1011 			 ppgtt->num_pd_entries,
   1012 			 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
   1013 	return 0;
   1014 
   1015 bail:
   1016 	gen8_ppgtt_unmap_pages(ppgtt);
   1017 	gen8_ppgtt_free(ppgtt);
   1018 	return ret;
   1019 }
   1020 #endif
   1021 
   1022 #ifndef __NetBSD__
   1023 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
   1024 {
   1025 	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
   1026 	struct i915_address_space *vm = &ppgtt->base;
   1027 	gen6_gtt_pte_t __iomem *pd_addr;
   1028 	gen6_gtt_pte_t scratch_pte;
   1029 	uint32_t pd_entry;
   1030 	int pte, pde;
   1031 
   1032 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
   1033 
   1034 	pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
   1035 		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
   1036 
   1037 	seq_printf(m, "  VM %p (pd_offset %x-%x):\n", vm,
   1038 		   ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
   1039 	for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
   1040 		u32 expected;
   1041 		gen6_gtt_pte_t *pt_vaddr;
   1042 		dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
   1043 		pd_entry = readl(pd_addr + pde);
   1044 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
   1045 
   1046 		if (pd_entry != expected)
   1047 			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
   1048 				   pde,
   1049 				   pd_entry,
   1050 				   expected);
   1051 		seq_printf(m, "\tPDE: %x\n", pd_entry);
   1052 
   1053 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
   1054 		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
   1055 			unsigned long va =
   1056 				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
   1057 				(pte * PAGE_SIZE);
   1058 			int i;
   1059 			bool found = false;
   1060 			for (i = 0; i < 4; i++)
   1061 				if (pt_vaddr[pte + i] != scratch_pte)
   1062 					found = true;
   1063 			if (!found)
   1064 				continue;
   1065 
   1066 			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
   1067 			for (i = 0; i < 4; i++) {
   1068 				if (pt_vaddr[pte + i] != scratch_pte)
   1069 					seq_printf(m, " %08x", pt_vaddr[pte + i]);
   1070 				else
   1071 					seq_puts(m, "  SCRATCH ");
   1072 			}
   1073 			seq_puts(m, "\n");
   1074 		}
   1075 		kunmap_atomic(pt_vaddr);
   1076 	}
   1077 }
   1078 #endif
   1079 
   1080 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
   1081 {
   1082 #ifdef __NetBSD__
   1083 	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
   1084 	const bus_space_tag_t bst = dev_priv->gtt.bst;
   1085 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
   1086 	const bus_size_t pd_base = ppgtt->u.gen6->pd_base;
   1087 	unsigned i;
   1088 
   1089 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
   1090 		const bus_addr_t pt_addr = ppgtt->u.gen6->pt_segs[i].ds_addr;
   1091 		uint32_t pd_entry;
   1092 
   1093 		KASSERT(ppgtt->u.gen6->pt_segs[i].ds_len == PAGE_SIZE);
   1094 
   1095 		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
   1096 		pd_entry |= GEN6_PDE_VALID;
   1097 
   1098 		bus_space_write_4(bst, bsh, pd_base + (4*i), pd_entry);
   1099 	}
   1100 	(void)bus_space_read_4(bst, bsh, pd_base);
   1101 #else
   1102 	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
   1103 	gen6_gtt_pte_t __iomem *pd_addr;
   1104 	uint32_t pd_entry;
   1105 	int i;
   1106 
   1107 	WARN_ON(ppgtt->pd_offset & 0x3f);
   1108 	pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
   1109 		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
   1110 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
   1111 		dma_addr_t pt_addr;
   1112 
   1113 		pt_addr = ppgtt->pt_dma_addr[i];
   1114 		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
   1115 		pd_entry |= GEN6_PDE_VALID;
   1116 
   1117 		writel(pd_entry, pd_addr + i);
   1118 	}
   1119 	readl(pd_addr);
   1120 #endif
   1121 }
   1122 
   1123 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
   1124 {
   1125 #ifdef __NetBSD__
   1126 	KASSERT(!ISSET(ppgtt->u.gen6->pd_base, 0x3f));
   1127 
   1128 	/* XXX 64? 16?  */
   1129 	return (ppgtt->u.gen6->pd_base / 64) << 16;
   1130 #else
   1131 	BUG_ON(ppgtt->pd_offset & 0x3f);
   1132 
   1133 	return (ppgtt->pd_offset / 64) << 16;
   1134 #endif
   1135 }
   1136 
   1137 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
   1138 			 struct intel_ring_buffer *ring,
   1139 			 bool synchronous)
   1140 {
   1141 	struct drm_device *dev = ppgtt->base.dev;
   1142 	struct drm_i915_private *dev_priv = dev->dev_private;
   1143 	int ret;
   1144 
   1145 	/* If we're in reset, we can assume the GPU is sufficiently idle to
   1146 	 * manually frob these bits. Ideally we could use the ring functions,
   1147 	 * except our error handling makes it quite difficult (can't use
   1148 	 * intel_ring_begin, ring->flush, or intel_ring_advance)
   1149 	 *
   1150 	 * FIXME: We should try not to special case reset
   1151 	 */
   1152 	if (synchronous ||
   1153 	    i915_reset_in_progress(&dev_priv->gpu_error)) {
   1154 		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
   1155 		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
   1156 		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
   1157 		POSTING_READ(RING_PP_DIR_BASE(ring));
   1158 		return 0;
   1159 	}
   1160 
   1161 	/* NB: TLBs must be flushed and invalidated before a switch */
   1162 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
   1163 	if (ret)
   1164 		return ret;
   1165 
   1166 	ret = intel_ring_begin(ring, 6);
   1167 	if (ret)
   1168 		return ret;
   1169 
   1170 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
   1171 	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
   1172 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
   1173 	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
   1174 	intel_ring_emit(ring, get_pd_offset(ppgtt));
   1175 	intel_ring_emit(ring, MI_NOOP);
   1176 	intel_ring_advance(ring);
   1177 
   1178 	return 0;
   1179 }
   1180 
   1181 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
   1182 			  struct intel_ring_buffer *ring,
   1183 			  bool synchronous)
   1184 {
   1185 	struct drm_device *dev = ppgtt->base.dev;
   1186 	struct drm_i915_private *dev_priv = dev->dev_private;
   1187 	int ret;
   1188 
   1189 	/* If we're in reset, we can assume the GPU is sufficiently idle to
   1190 	 * manually frob these bits. Ideally we could use the ring functions,
   1191 	 * except our error handling makes it quite difficult (can't use
   1192 	 * intel_ring_begin, ring->flush, or intel_ring_advance)
   1193 	 *
   1194 	 * FIXME: We should try not to special case reset
   1195 	 */
   1196 	if (synchronous ||
   1197 	    i915_reset_in_progress(&dev_priv->gpu_error)) {
   1198 		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
   1199 		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
   1200 		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
   1201 		POSTING_READ(RING_PP_DIR_BASE(ring));
   1202 		return 0;
   1203 	}
   1204 
   1205 	/* NB: TLBs must be flushed and invalidated before a switch */
   1206 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
   1207 	if (ret)
   1208 		return ret;
   1209 
   1210 	ret = intel_ring_begin(ring, 6);
   1211 	if (ret)
   1212 		return ret;
   1213 
   1214 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
   1215 	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
   1216 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
   1217 	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
   1218 	intel_ring_emit(ring, get_pd_offset(ppgtt));
   1219 	intel_ring_emit(ring, MI_NOOP);
   1220 	intel_ring_advance(ring);
   1221 
   1222 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
   1223 	if (ring->id != RCS) {
   1224 		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
   1225 		if (ret)
   1226 			return ret;
   1227 	}
   1228 
   1229 	return 0;
   1230 }
   1231 
   1232 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
   1233 			  struct intel_ring_buffer *ring,
   1234 			  bool synchronous)
   1235 {
   1236 	struct drm_device *dev = ppgtt->base.dev;
   1237 	struct drm_i915_private *dev_priv = dev->dev_private;
   1238 
   1239 	if (!synchronous)
   1240 		return 0;
   1241 
   1242 	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
   1243 	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
   1244 
   1245 	POSTING_READ(RING_PP_DIR_DCLV(ring));
   1246 
   1247 	return 0;
   1248 }
   1249 
   1250 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
   1251 {
   1252 	struct drm_device *dev = ppgtt->base.dev;
   1253 	struct drm_i915_private *dev_priv = dev->dev_private;
   1254 	struct intel_ring_buffer *ring;
   1255 	int j, ret;
   1256 
   1257 	for_each_ring(ring, dev_priv, j) {
   1258 		I915_WRITE(RING_MODE_GEN7(ring),
   1259 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
   1260 
   1261 		/* We promise to do a switch later with FULL PPGTT. If this is
   1262 		 * aliasing, this is the one and only switch we'll do */
   1263 		if (USES_FULL_PPGTT(dev))
   1264 			continue;
   1265 
   1266 		ret = ppgtt->switch_mm(ppgtt, ring, true);
   1267 		if (ret)
   1268 			goto err_out;
   1269 	}
   1270 
   1271 	return 0;
   1272 
   1273 err_out:
   1274 	for_each_ring(ring, dev_priv, j)
   1275 		I915_WRITE(RING_MODE_GEN7(ring),
   1276 			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
   1277 	return ret;
   1278 }
   1279 
   1280 static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
   1281 {
   1282 	struct drm_device *dev = ppgtt->base.dev;
   1283 	struct drm_i915_private *dev_priv = dev->dev_private;
   1284 	struct intel_ring_buffer *ring;
   1285 	uint32_t ecochk, ecobits;
   1286 	int i;
   1287 
   1288 	ecobits = I915_READ(GAC_ECO_BITS);
   1289 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
   1290 
   1291 	ecochk = I915_READ(GAM_ECOCHK);
   1292 	if (IS_HASWELL(dev)) {
   1293 		ecochk |= ECOCHK_PPGTT_WB_HSW;
   1294 	} else {
   1295 		ecochk |= ECOCHK_PPGTT_LLC_IVB;
   1296 		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
   1297 	}
   1298 	I915_WRITE(GAM_ECOCHK, ecochk);
   1299 
   1300 	for_each_ring(ring, dev_priv, i) {
   1301 		int ret;
   1302 		/* GFX_MODE is per-ring on gen7+ */
   1303 		I915_WRITE(RING_MODE_GEN7(ring),
   1304 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
   1305 
   1306 		/* We promise to do a switch later with FULL PPGTT. If this is
   1307 		 * aliasing, this is the one and only switch we'll do */
   1308 		if (USES_FULL_PPGTT(dev))
   1309 			continue;
   1310 
   1311 		ret = ppgtt->switch_mm(ppgtt, ring, true);
   1312 		if (ret)
   1313 			return ret;
   1314 	}
   1315 
   1316 	return 0;
   1317 }
   1318 
   1319 static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
   1320 {
   1321 	struct drm_device *dev = ppgtt->base.dev;
   1322 	struct drm_i915_private *dev_priv = dev->dev_private;
   1323 	struct intel_ring_buffer *ring;
   1324 	uint32_t ecochk, gab_ctl, ecobits;
   1325 	int i;
   1326 
   1327 	ecobits = I915_READ(GAC_ECO_BITS);
   1328 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
   1329 		   ECOBITS_PPGTT_CACHE64B);
   1330 
   1331 	gab_ctl = I915_READ(GAB_CTL);
   1332 	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
   1333 
   1334 	ecochk = I915_READ(GAM_ECOCHK);
   1335 	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
   1336 
   1337 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
   1338 
   1339 	for_each_ring(ring, dev_priv, i) {
   1340 		int ret = ppgtt->switch_mm(ppgtt, ring, true);
   1341 		if (ret)
   1342 			return ret;
   1343 	}
   1344 
   1345 	return 0;
   1346 }
   1347 
   1348 /* PPGTT support for Sandybdrige/Gen6 and later */
   1349 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
   1350 				   uint64_t start,
   1351 				   uint64_t length,
   1352 				   bool use_scratch)
   1353 {
   1354 	struct i915_hw_ppgtt *ppgtt =
   1355 		container_of(vm, struct i915_hw_ppgtt, base);
   1356 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
   1357 	unsigned first_entry = start >> PAGE_SHIFT;
   1358 	unsigned num_entries = length >> PAGE_SHIFT;
   1359 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
   1360 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
   1361 	unsigned last_pte, i;
   1362 #ifdef __NetBSD__
   1363 	void *kva;
   1364 	int ret;
   1365 #endif
   1366 
   1367 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
   1368 
   1369 	while (num_entries) {
   1370 		last_pte = first_pte + num_entries;
   1371 		if (last_pte > I915_PPGTT_PT_ENTRIES)
   1372 			last_pte = I915_PPGTT_PT_ENTRIES;
   1373 
   1374 #ifdef __NetBSD__
   1375 		/* XXX errno NetBSD->Linux */
   1376 		ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
   1377 		    &ppgtt->u.gen6->pt_segs[act_pt], 1, PAGE_SIZE, &kva,
   1378 		    BUS_DMA_NOWAIT);
   1379 		if (ret) {
   1380 			/*
   1381 			 * XXX Should guarantee mapping earlier with
   1382 			 * uvm_emap(9) or something.
   1383 			 */
   1384 			device_printf(ppgtt->base.dev->dev,
   1385 			    "failed to map page table: %d\n", -ret);
   1386 			goto skip;
   1387 		}
   1388 		pt_vaddr = kva;
   1389 #else
   1390 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
   1391 #endif
   1392 
   1393 		for (i = first_pte; i < last_pte; i++)
   1394 			pt_vaddr[i] = scratch_pte;
   1395 
   1396 #ifdef __NetBSD__
   1397 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
   1398 skip:
   1399 #else
   1400 		kunmap_atomic(pt_vaddr);
   1401 #endif
   1402 
   1403 		num_entries -= last_pte - first_pte;
   1404 		first_pte = 0;
   1405 		act_pt++;
   1406 	}
   1407 }
   1408 
   1409 #ifdef __NetBSD__
   1410 static void
   1411 gen6_ppgtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
   1412     uint64_t start, enum i915_cache_level cache_level)
   1413 {
   1414 	struct i915_hw_ppgtt *ppgtt =
   1415 		container_of(vm, struct i915_hw_ppgtt, base);
   1416 	gen6_gtt_pte_t *pt_vaddr;
   1417 	unsigned first_entry = start >> PAGE_SHIFT;
   1418 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
   1419 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
   1420 	unsigned seg;
   1421 	void *kva;
   1422 	int ret;
   1423 
   1424 	pt_vaddr = NULL;
   1425 	KASSERT(0 < dmamap->dm_nsegs);
   1426 	for (seg = 0; seg < dmamap->dm_nsegs; seg++) {
   1427 		KASSERT(dmamap->dm_segs[seg].ds_len == PAGE_SIZE);
   1428 		if (pt_vaddr == NULL) {
   1429 			/* XXX errno NetBSD->Linux */
   1430 			ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
   1431 			    &ppgtt->u.gen6->pt_segs[act_pt], 1,
   1432 			    PAGE_SIZE, &kva, BUS_DMA_NOWAIT);
   1433 			if (ret) {
   1434 				/*
   1435 				 * XXX Should guarantee mapping earlier
   1436 				 * with uvm_emap(9) or something.
   1437 				 */
   1438 				device_printf(ppgtt->base.dev->dev,
   1439 				    "failed to map page table: %d\n", -ret);
   1440 				goto skip;
   1441 			}
   1442 			pt_vaddr = kva;
   1443 		}
   1444 		pt_vaddr[act_pte] =
   1445 		    vm->pte_encode(dmamap->dm_segs[seg].ds_addr, cache_level,
   1446 			true);
   1447 skip:
   1448 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
   1449 			bus_dmamem_unmap(ppgtt->base.dev->dmat, kva,
   1450 			    PAGE_SIZE);
   1451 			pt_vaddr = NULL;
   1452 			act_pt++;
   1453 			act_pte = 0;
   1454 		}
   1455 	}
   1456 	if (pt_vaddr)
   1457 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
   1458 }
   1459 #else
   1460 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
   1461 				      struct sg_table *pages,
   1462 				      uint64_t start,
   1463 				      enum i915_cache_level cache_level)
   1464 {
   1465 	struct i915_hw_ppgtt *ppgtt =
   1466 		container_of(vm, struct i915_hw_ppgtt, base);
   1467 	gen6_gtt_pte_t *pt_vaddr;
   1468 	unsigned first_entry = start >> PAGE_SHIFT;
   1469 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
   1470 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
   1471 	struct sg_page_iter sg_iter;
   1472 
   1473 	pt_vaddr = NULL;
   1474 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
   1475 		if (pt_vaddr == NULL)
   1476 			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
   1477 
   1478 		pt_vaddr[act_pte] =
   1479 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
   1480 				       cache_level, true);
   1481 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
   1482 			kunmap_atomic(pt_vaddr);
   1483 			pt_vaddr = NULL;
   1484 			act_pt++;
   1485 			act_pte = 0;
   1486 		}
   1487 	}
   1488 	if (pt_vaddr)
   1489 		kunmap_atomic(pt_vaddr);
   1490 }
   1491 #endif
   1492 
   1493 #ifndef __NetBSD__
   1494 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
   1495 {
   1496 	int i;
   1497 
   1498 	if (ppgtt->pt_dma_addr) {
   1499 		for (i = 0; i < ppgtt->num_pd_entries; i++)
   1500 			pci_unmap_page(ppgtt->base.dev->pdev,
   1501 				       ppgtt->pt_dma_addr[i],
   1502 				       4096, PCI_DMA_BIDIRECTIONAL);
   1503 	}
   1504 }
   1505 
   1506 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
   1507 {
   1508 	int i;
   1509 
   1510 	kfree(ppgtt->pt_dma_addr);
   1511 	for (i = 0; i < ppgtt->num_pd_entries; i++)
   1512 		__free_page(ppgtt->pt_pages[i]);
   1513 	kfree(ppgtt->pt_pages);
   1514 }
   1515 
   1516 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
   1517 {
   1518 	struct i915_hw_ppgtt *ppgtt =
   1519 		container_of(vm, struct i915_hw_ppgtt, base);
   1520 
   1521 	list_del(&vm->global_link);
   1522 	drm_mm_takedown(&ppgtt->base.mm);
   1523 	drm_mm_remove_node(&ppgtt->node);
   1524 
   1525 	gen6_ppgtt_unmap_pages(ppgtt);
   1526 	gen6_ppgtt_free(ppgtt);
   1527 }
   1528 #endif
   1529 
   1530 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
   1531 {
   1532 #define GEN6_PD_ALIGN (PAGE_SIZE * 16)
   1533 #define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
   1534 	struct drm_device *dev = ppgtt->base.dev;
   1535 	struct drm_i915_private *dev_priv = dev->dev_private;
   1536 	bool retried = false;
   1537 	int ret;
   1538 
   1539 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
   1540 	 * allocator works in address space sizes, so it's multiplied by page
   1541 	 * size. We allocate at the top of the GTT to avoid fragmentation.
   1542 	 */
   1543 	BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
   1544 alloc:
   1545 	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
   1546 						  &ppgtt->node, GEN6_PD_SIZE,
   1547 						  GEN6_PD_ALIGN, 0,
   1548 						  0, dev_priv->gtt.base.total,
   1549 						  DRM_MM_SEARCH_DEFAULT,
   1550 						  DRM_MM_CREATE_DEFAULT);
   1551 	if (ret == -ENOSPC && !retried) {
   1552 		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
   1553 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
   1554 					       I915_CACHE_NONE,
   1555 					       0, dev_priv->gtt.base.total,
   1556 					       0);
   1557 		if (ret)
   1558 			return ret;
   1559 
   1560 		retried = true;
   1561 		goto alloc;
   1562 	}
   1563 
   1564 	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
   1565 		DRM_DEBUG("Forced to use aperture for PDEs\n");
   1566 
   1567 	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
   1568 	return ret;
   1569 }
   1570 
   1571 #ifdef __NetBSD__
   1572 static void	gen6_ppgtt_cleanup(struct i915_address_space *);
   1573 static int	gen6_ppgtt_alloc(struct i915_hw_ppgtt *);
   1574 static void	gen6_ppgtt_free(struct i915_hw_ppgtt *);
   1575 static int	gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *);
   1576 static void	gen6_ppgtt_free_page_directories(struct i915_hw_ppgtt *);
   1577 static int	gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *);
   1578 static void	gen6_ppgtt_free_page_tables(struct i915_hw_ppgtt *);
   1579 
   1580 static void
   1581 gen6_ppgtt_cleanup(struct i915_address_space *vm)
   1582 {
   1583 	struct i915_hw_ppgtt *ppgtt =
   1584 		container_of(vm, struct i915_hw_ppgtt, base);
   1585 
   1586 	list_del(&vm->global_link);
   1587 	drm_mm_takedown(&ppgtt->base.mm);
   1588 
   1589 	gen6_ppgtt_free(ppgtt);
   1590 }
   1591 
   1592 static int
   1593 gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
   1594 {
   1595 	int ret;
   1596 
   1597 	ppgtt->u.gen6 = kmem_alloc(sizeof(*ppgtt->u.gen6), KM_SLEEP);
   1598 
   1599 	ret = gen6_ppgtt_allocate_page_directories(ppgtt);
   1600 	if (ret)
   1601 		goto fail0;
   1602 	ret = gen6_ppgtt_allocate_page_tables(ppgtt);
   1603 	if (ret)
   1604 		goto fail1;
   1605 
   1606 	/* Success!  */
   1607 	return 0;
   1608 
   1609 fail2: __unused
   1610 	gen6_ppgtt_free_page_tables(ppgtt);
   1611 fail1:	gen6_ppgtt_free_page_directories(ppgtt);
   1612 fail0:	KASSERT(ret);
   1613 	kmem_free(ppgtt->u.gen6, sizeof(*ppgtt->u.gen6));
   1614 	return ret;
   1615 }
   1616 
   1617 static void
   1618 gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
   1619 {
   1620 
   1621 	gen6_ppgtt_free_page_tables(ppgtt);
   1622 	gen6_ppgtt_free_page_directories(ppgtt);
   1623 	kmem_free(ppgtt->u.gen6, sizeof(*ppgtt->u.gen6));
   1624 }
   1625 
   1626 static void
   1627 gen6_ppgtt_free_page_directories(struct i915_hw_ppgtt *ppgtt)
   1628 {
   1629 
   1630 	drm_mm_remove_node(&ppgtt->node);
   1631 }
   1632 
   1633 static int
   1634 gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
   1635 {
   1636 	int rsegs;
   1637 	int ret;
   1638 
   1639 	KASSERT(ppgtt->num_pd_entries <= INT_MAX);
   1640 #if UINT_MAX == SIZE_MAX	/* XXX ugh */
   1641 	KASSERT(ppgtt->num_pd_entries < (SIZE_MAX /
   1642 		sizeof(ppgtt->u.gen6->pt_segs[0])));
   1643 	KASSERT(ppgtt->num_pd_entries < (__type_max(bus_size_t) / PAGE_SIZE));
   1644 #endif
   1645 
   1646 	ppgtt->u.gen6->pt_segs = kmem_alloc((ppgtt->num_pd_entries *
   1647 		sizeof(ppgtt->u.gen6->pt_segs[0])), KM_SLEEP);
   1648 
   1649 	/* XXX errno NetBSD->Linux */
   1650 	ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat,
   1651 	    (PAGE_SIZE * ppgtt->num_pd_entries), PAGE_SIZE, PAGE_SIZE,
   1652 	    ppgtt->u.gen6->pt_segs, ppgtt->num_pd_entries, &rsegs,
   1653 	    BUS_DMA_WAITOK);
   1654 	if (ret)
   1655 		goto fail0;
   1656 	KASSERT(rsegs == ppgtt->num_pd_entries);
   1657 
   1658 	/* XXX errno NetBSD->Linux */
   1659 	ret = -bus_dmamap_create(ppgtt->base.dev->dmat,
   1660 	    (PAGE_SIZE * ppgtt->num_pd_entries), ppgtt->num_pd_entries,
   1661 	    PAGE_SIZE, 0, BUS_DMA_WAITOK, &ppgtt->u.gen6->pt_map);
   1662 	if (ret)
   1663 		goto fail1;
   1664 
   1665 	/* XXX errno NetBSD->Linux */
   1666 	ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
   1667 	    ppgtt->u.gen6->pt_map, ppgtt->u.gen6->pt_segs,
   1668 	    ppgtt->num_pd_entries, (PAGE_SIZE * ppgtt->num_pd_entries),
   1669 	    BUS_DMA_WAITOK);
   1670 	if (ret)
   1671 		goto fail2;
   1672 
   1673 	/* Success!  */
   1674 	return 0;
   1675 
   1676 fail3: __unused
   1677 	bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
   1678 fail2:	bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
   1679 fail1:	bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_segs,
   1680 	    ppgtt->num_pd_entries);
   1681 fail0:	KASSERT(ret);
   1682 	return ret;
   1683 }
   1684 
   1685 static void
   1686 gen6_ppgtt_free_page_tables(struct i915_hw_ppgtt *ppgtt)
   1687 {
   1688 
   1689 	bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
   1690 	bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
   1691 	bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_segs,
   1692 	    ppgtt->num_pd_entries);
   1693 }
   1694 #else
   1695 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
   1696 {
   1697 	int i;
   1698 
   1699 	ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
   1700 				  GFP_KERNEL);
   1701 
   1702 	if (!ppgtt->pt_pages)
   1703 		return -ENOMEM;
   1704 
   1705 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
   1706 		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
   1707 		if (!ppgtt->pt_pages[i]) {
   1708 			gen6_ppgtt_free(ppgtt);
   1709 			return -ENOMEM;
   1710 		}
   1711 	}
   1712 
   1713 	return 0;
   1714 }
   1715 
   1716 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
   1717 {
   1718 	int ret;
   1719 
   1720 	ret = gen6_ppgtt_allocate_page_directories(ppgtt);
   1721 	if (ret)
   1722 		return ret;
   1723 
   1724 	ret = gen6_ppgtt_allocate_page_tables(ppgtt);
   1725 	if (ret) {
   1726 		drm_mm_remove_node(&ppgtt->node);
   1727 		return ret;
   1728 	}
   1729 
   1730 	ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
   1731 				     GFP_KERNEL);
   1732 	if (!ppgtt->pt_dma_addr) {
   1733 		drm_mm_remove_node(&ppgtt->node);
   1734 		gen6_ppgtt_free(ppgtt);
   1735 		return -ENOMEM;
   1736 	}
   1737 
   1738 	return 0;
   1739 }
   1740 
   1741 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
   1742 {
   1743 	struct drm_device *dev = ppgtt->base.dev;
   1744 	int i;
   1745 
   1746 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
   1747 		dma_addr_t pt_addr;
   1748 
   1749 		pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
   1750 				       PCI_DMA_BIDIRECTIONAL);
   1751 
   1752 		if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
   1753 			gen6_ppgtt_unmap_pages(ppgtt);
   1754 			return -EIO;
   1755 		}
   1756 
   1757 		ppgtt->pt_dma_addr[i] = pt_addr;
   1758 	}
   1759 
   1760 	return 0;
   1761 }
   1762 #endif
   1763 
   1764 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
   1765 {
   1766 	struct drm_device *dev = ppgtt->base.dev;
   1767 	struct drm_i915_private *dev_priv = dev->dev_private;
   1768 	int ret;
   1769 
   1770 	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
   1771 	if (IS_GEN6(dev)) {
   1772 		ppgtt->enable = gen6_ppgtt_enable;
   1773 		ppgtt->switch_mm = gen6_mm_switch;
   1774 	} else if (IS_HASWELL(dev)) {
   1775 		ppgtt->enable = gen7_ppgtt_enable;
   1776 		ppgtt->switch_mm = hsw_mm_switch;
   1777 	} else if (IS_GEN7(dev)) {
   1778 		ppgtt->enable = gen7_ppgtt_enable;
   1779 		ppgtt->switch_mm = gen7_mm_switch;
   1780 	} else
   1781 		BUG();
   1782 
   1783 	ret = gen6_ppgtt_alloc(ppgtt);
   1784 	if (ret)
   1785 		return ret;
   1786 
   1787 #ifndef __NetBSD__
   1788 	ret = gen6_ppgtt_setup_page_tables(ppgtt);
   1789 	if (ret) {
   1790 		gen6_ppgtt_free(ppgtt);
   1791 		return ret;
   1792 	}
   1793 #endif
   1794 
   1795 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
   1796 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
   1797 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
   1798 	ppgtt->base.start = 0;
   1799 	ppgtt->base.total =  ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
   1800 #ifndef __NetBSD__
   1801 	ppgtt->debug_dump = gen6_dump_ppgtt;
   1802 #endif
   1803 
   1804 #ifdef __NetBSD__
   1805 	CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
   1806 	ppgtt->u.gen6->pd_base = 4*(ppgtt->node.start / PAGE_SIZE);
   1807 #else
   1808 	ppgtt->pd_offset =
   1809 		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
   1810 #endif
   1811 
   1812 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
   1813 
   1814 	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
   1815 			 ppgtt->node.size >> 20,
   1816 			 ppgtt->node.start / PAGE_SIZE);
   1817 
   1818 	return 0;
   1819 }
   1820 
   1821 int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
   1822 {
   1823 	struct drm_i915_private *dev_priv = dev->dev_private;
   1824 	int ret = 0;
   1825 
   1826 	ppgtt->base.dev = dev;
   1827 	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
   1828 
   1829 	if (INTEL_INFO(dev)->gen < 8)
   1830 		ret = gen6_ppgtt_init(ppgtt);
   1831 	else if (IS_GEN8(dev))
   1832 		ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
   1833 	else
   1834 		BUG();
   1835 
   1836 	if (!ret) {
   1837 		kref_init(&ppgtt->ref);
   1838 		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
   1839 			    ppgtt->base.total);
   1840 		i915_init_vm(dev_priv, &ppgtt->base);
   1841 		if (INTEL_INFO(dev)->gen < 8) {
   1842 			gen6_write_pdes(ppgtt);
   1843 #ifdef __NetBSD__
   1844 			DRM_DEBUG("Adding PPGTT at offset %"PRIxMAX"\n",
   1845 			    (uintmax_t)ppgtt->u.gen6->pd_base << 10);
   1846 #else
   1847 			DRM_DEBUG("Adding PPGTT at offset %x\n",
   1848 				  ppgtt->pd_offset << 10);
   1849 #endif
   1850 		}
   1851 	}
   1852 
   1853 	return ret;
   1854 }
   1855 
   1856 static void
   1857 ppgtt_bind_vma(struct i915_vma *vma,
   1858 	       enum i915_cache_level cache_level,
   1859 	       u32 flags)
   1860 {
   1861 #ifdef __NetBSD__
   1862 	vma->vm->insert_entries(vma->vm, vma->obj->igo_dmamap, vma->node.start,
   1863 				cache_level);
   1864 #else
   1865 	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
   1866 				cache_level);
   1867 #endif
   1868 }
   1869 
   1870 static void ppgtt_unbind_vma(struct i915_vma *vma)
   1871 {
   1872 	vma->vm->clear_range(vma->vm,
   1873 			     vma->node.start,
   1874 			     vma->obj->base.size,
   1875 			     true);
   1876 }
   1877 
   1878 extern int intel_iommu_gfx_mapped;
   1879 /* Certain Gen5 chipsets require require idling the GPU before
   1880  * unmapping anything from the GTT when VT-d is enabled.
   1881  */
   1882 static inline bool needs_idle_maps(struct drm_device *dev)
   1883 {
   1884 #ifdef CONFIG_INTEL_IOMMU
   1885 	/* Query intel_iommu to see if we need the workaround. Presumably that
   1886 	 * was loaded first.
   1887 	 */
   1888 	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
   1889 		return true;
   1890 #endif
   1891 	return false;
   1892 }
   1893 
   1894 static bool do_idling(struct drm_i915_private *dev_priv)
   1895 {
   1896 	bool ret = dev_priv->mm.interruptible;
   1897 
   1898 	if (unlikely(dev_priv->gtt.do_idle_maps)) {
   1899 		dev_priv->mm.interruptible = false;
   1900 		if (i915_gpu_idle(dev_priv->dev)) {
   1901 			DRM_ERROR("Couldn't idle GPU\n");
   1902 			/* Wait a bit, in hopes it avoids the hang */
   1903 			udelay(10);
   1904 		}
   1905 	}
   1906 
   1907 	return ret;
   1908 }
   1909 
   1910 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
   1911 {
   1912 	if (unlikely(dev_priv->gtt.do_idle_maps))
   1913 		dev_priv->mm.interruptible = interruptible;
   1914 }
   1915 
   1916 void i915_check_and_clear_faults(struct drm_device *dev)
   1917 {
   1918 	struct drm_i915_private *dev_priv = dev->dev_private;
   1919 	struct intel_ring_buffer *ring;
   1920 	int i;
   1921 
   1922 	if (INTEL_INFO(dev)->gen < 6)
   1923 		return;
   1924 
   1925 	for_each_ring(ring, dev_priv, i) {
   1926 		u32 fault_reg;
   1927 		fault_reg = I915_READ(RING_FAULT_REG(ring));
   1928 		if (fault_reg & RING_FAULT_VALID) {
   1929 			DRM_DEBUG_DRIVER("Unexpected fault\n"
   1930 					 "\tAddr: 0x%08"PRIx32"\\n"
   1931 					 "\tAddress space: %s\n"
   1932 					 "\tSource ID: %d\n"
   1933 					 "\tType: %d\n",
   1934 					 fault_reg & PAGE_MASK,
   1935 					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
   1936 					 RING_FAULT_SRCID(fault_reg),
   1937 					 RING_FAULT_FAULT_TYPE(fault_reg));
   1938 			I915_WRITE(RING_FAULT_REG(ring),
   1939 				   fault_reg & ~RING_FAULT_VALID);
   1940 		}
   1941 	}
   1942 	POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
   1943 }
   1944 
   1945 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
   1946 {
   1947 	struct drm_i915_private *dev_priv = dev->dev_private;
   1948 
   1949 	/* Don't bother messing with faults pre GEN6 as we have little
   1950 	 * documentation supporting that it's a good idea.
   1951 	 */
   1952 	if (INTEL_INFO(dev)->gen < 6)
   1953 		return;
   1954 
   1955 	i915_check_and_clear_faults(dev);
   1956 
   1957 	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
   1958 				       dev_priv->gtt.base.start,
   1959 				       dev_priv->gtt.base.total,
   1960 				       true);
   1961 }
   1962 
   1963 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
   1964 {
   1965 	struct drm_i915_private *dev_priv = dev->dev_private;
   1966 	struct drm_i915_gem_object *obj;
   1967 	struct i915_address_space *vm;
   1968 
   1969 	i915_check_and_clear_faults(dev);
   1970 
   1971 	/* First fill our portion of the GTT with scratch pages */
   1972 	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
   1973 				       dev_priv->gtt.base.start,
   1974 				       dev_priv->gtt.base.total,
   1975 				       true);
   1976 
   1977 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
   1978 		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
   1979 							   &dev_priv->gtt.base);
   1980 		if (!vma)
   1981 			continue;
   1982 
   1983 		i915_gem_clflush_object(obj, obj->pin_display);
   1984 		/* The bind_vma code tries to be smart about tracking mappings.
   1985 		 * Unfortunately above, we've just wiped out the mappings
   1986 		 * without telling our object about it. So we need to fake it.
   1987 		 */
   1988 		obj->has_global_gtt_mapping = 0;
   1989 		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
   1990 	}
   1991 
   1992 
   1993 	if (INTEL_INFO(dev)->gen >= 8) {
   1994 		gen8_setup_private_ppat(dev_priv);
   1995 		return;
   1996 	}
   1997 
   1998 	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
   1999 		/* TODO: Perhaps it shouldn't be gen6 specific */
   2000 		if (i915_is_ggtt(vm)) {
   2001 			if (dev_priv->mm.aliasing_ppgtt)
   2002 				gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
   2003 			continue;
   2004 		}
   2005 
   2006 		gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
   2007 	}
   2008 
   2009 	i915_gem_chipset_flush(dev);
   2010 }
   2011 
   2012 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
   2013 {
   2014 	if (obj->has_dma_mapping)
   2015 		return 0;
   2016 
   2017 #ifdef __NetBSD__
   2018 	KASSERT(0 < obj->base.size);
   2019 	/* XXX errno NetBSD->Linux */
   2020 	return -bus_dmamap_load_raw(obj->base.dev->dmat, obj->igo_dmamap,
   2021 	    obj->pages, obj->igo_nsegs, obj->base.size, BUS_DMA_NOWAIT);
   2022 #else
   2023 	if (!dma_map_sg(&obj->base.dev->pdev->dev,
   2024 			obj->pages->sgl, obj->pages->nents,
   2025 			PCI_DMA_BIDIRECTIONAL))
   2026 		return -ENOSPC;
   2027 
   2028 	return 0;
   2029 #endif
   2030 }
   2031 
   2032 #ifdef __NetBSD__
   2033 static inline uint64_t
   2034 gen8_get_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i)
   2035 {
   2036 	CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
   2037 	CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
   2038 #ifdef _LP64			/* XXX How to detect bus_space_read_8?  */
   2039 	return bus_space_read_8(bst, bsh, 8*i);
   2040 #else
   2041 	/*
   2042 	 * XXX I'm not sure this case can actually happen in practice:
   2043 	 * 32-bit gen8 chipsets?
   2044 	 */
   2045 	return bus_space_read_4(bst, bsh, 8*i) |
   2046 	    ((uint64_t)bus_space_read_4(bst, bsh, 8*i + 4) << 32);
   2047 #endif
   2048 }
   2049 
   2050 static inline void
   2051 gen8_set_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i,
   2052     gen8_gtt_pte_t pte)
   2053 {
   2054 	CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
   2055 	CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
   2056 #ifdef _LP64			/* XXX How to detect bus_space_write_8?  */
   2057 	bus_space_write_8(bst, bsh, 8*i, pte);
   2058 #else
   2059 	bus_space_write_4(bst, bsh, 8*i, (uint32_t)pte);
   2060 	bus_space_write_4(bst, bsh, 8*i + 4, (uint32_t)(pte >> 32));
   2061 #endif
   2062 }
   2063 #else
   2064 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
   2065 {
   2066 #ifdef writeq
   2067 	writeq(pte, addr);
   2068 #else
   2069 	iowrite32((u32)pte, addr);
   2070 	iowrite32(pte >> 32, addr + 4);
   2071 #endif
   2072 }
   2073 #endif
   2074 
   2075 #ifdef __NetBSD__
   2076 static void
   2077 gen8_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
   2078     uint64_t start, enum i915_cache_level level)
   2079 {
   2080 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
   2081 	unsigned first_entry = start >> PAGE_SHIFT;
   2082 	const bus_space_tag_t bst = dev_priv->gtt.bst;
   2083 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
   2084 	unsigned i;
   2085 
   2086 	KASSERT(0 < dmamap->dm_nsegs);
   2087 	for (i = 0; i < dmamap->dm_nsegs; i++) {
   2088 		KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
   2089 		gen8_set_pte(bst, bsh, first_entry + i,
   2090 		    gen8_pte_encode(dmamap->dm_segs[i].ds_addr, level, true));
   2091 	}
   2092 	if (0 < i) {
   2093 		/* Posting read.  */
   2094 		WARN_ON(gen8_get_pte(bst, bsh, (first_entry + i - 1))
   2095 		    != gen8_pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
   2096 			true));
   2097 	}
   2098 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
   2099 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
   2100 }
   2101 #else
   2102 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
   2103 				     struct sg_table *st,
   2104 				     uint64_t start,
   2105 				     enum i915_cache_level level)
   2106 {
   2107 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
   2108 	unsigned first_entry = start >> PAGE_SHIFT;
   2109 	gen8_gtt_pte_t __iomem *gtt_entries =
   2110 		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
   2111 	int i = 0;
   2112 	struct sg_page_iter sg_iter;
   2113 	dma_addr_t addr;
   2114 
   2115 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
   2116 		addr = sg_dma_address(sg_iter.sg) +
   2117 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
   2118 		gen8_set_pte(&gtt_entries[i],
   2119 			     gen8_pte_encode(addr, level, true));
   2120 		i++;
   2121 	}
   2122 
   2123 	/*
   2124 	 * XXX: This serves as a posting read to make sure that the PTE has
   2125 	 * actually been updated. There is some concern that even though
   2126 	 * registers and PTEs are within the same BAR that they are potentially
   2127 	 * of NUMA access patterns. Therefore, even with the way we assume
   2128 	 * hardware should work, we must keep this posting read for paranoia.
   2129 	 */
   2130 	if (i != 0)
   2131 		WARN_ON(readq(&gtt_entries[i-1])
   2132 			!= gen8_pte_encode(addr, level, true));
   2133 
   2134 	/* This next bit makes the above posting read even more important. We
   2135 	 * want to flush the TLBs only after we're certain all the PTE updates
   2136 	 * have finished.
   2137 	 */
   2138 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
   2139 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
   2140 }
   2141 #endif
   2142 
   2143 /*
   2144  * Binds an object into the global gtt with the specified cache level. The object
   2145  * will be accessible to the GPU via commands whose operands reference offsets
   2146  * within the global GTT as well as accessible by the GPU through the GMADR
   2147  * mapped BAR (dev_priv->mm.gtt->gtt).
   2148  */
   2149 #ifdef __NetBSD__
   2150 static void
   2151 gen6_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
   2152     uint64_t start, enum i915_cache_level level)
   2153 {
   2154 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
   2155 	unsigned first_entry = start >> PAGE_SHIFT;
   2156 	const bus_space_tag_t bst = dev_priv->gtt.bst;
   2157 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
   2158 	unsigned i;
   2159 
   2160 	KASSERT(0 < dmamap->dm_nsegs);
   2161 	for (i = 0; i < dmamap->dm_nsegs; i++) {
   2162 		KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
   2163 		CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
   2164 		bus_space_write_4(bst, bsh, 4*(first_entry + i),
   2165 		    vm->pte_encode(dmamap->dm_segs[i].ds_addr, level, true));
   2166 	}
   2167 	if (0 < i) {
   2168 		/* Posting read.  */
   2169 		WARN_ON(bus_space_read_4(bst, bsh, 4*(first_entry + i - 1))
   2170 		    != vm->pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
   2171 			true));
   2172 	}
   2173 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
   2174 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
   2175 }
   2176 #else
   2177 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
   2178 				     struct sg_table *st,
   2179 				     uint64_t start,
   2180 				     enum i915_cache_level level)
   2181 {
   2182 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
   2183 	unsigned first_entry = start >> PAGE_SHIFT;
   2184 	gen6_gtt_pte_t __iomem *gtt_entries =
   2185 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
   2186 	int i = 0;
   2187 	struct sg_page_iter sg_iter;
   2188 	dma_addr_t addr;
   2189 
   2190 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
   2191 		addr = sg_page_iter_dma_address(&sg_iter);
   2192 		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
   2193 		i++;
   2194 	}
   2195 
   2196 	/* XXX: This serves as a posting read to make sure that the PTE has
   2197 	 * actually been updated. There is some concern that even though
   2198 	 * registers and PTEs are within the same BAR that they are potentially
   2199 	 * of NUMA access patterns. Therefore, even with the way we assume
   2200 	 * hardware should work, we must keep this posting read for paranoia.
   2201 	 */
   2202 	if (i != 0)
   2203 		WARN_ON(readl(&gtt_entries[i-1]) !=
   2204 			vm->pte_encode(addr, level, true));
   2205 
   2206 	/* This next bit makes the above posting read even more important. We
   2207 	 * want to flush the TLBs only after we're certain all the PTE updates
   2208 	 * have finished.
   2209 	 */
   2210 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
   2211 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
   2212 }
   2213 #endif
   2214 
   2215 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
   2216 				  uint64_t start,
   2217 				  uint64_t length,
   2218 				  bool use_scratch)
   2219 {
   2220 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
   2221 	unsigned first_entry = start >> PAGE_SHIFT;
   2222 	unsigned num_entries = length >> PAGE_SHIFT;
   2223 #ifdef __NetBSD__
   2224 	const bus_space_tag_t bst = dev_priv->gtt.bst;
   2225 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
   2226 	gen8_gtt_pte_t scratch_pte;
   2227 #else
   2228 	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
   2229 		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
   2230 #endif
   2231 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
   2232 	int i;
   2233 
   2234 	if (WARN(num_entries > max_entries,
   2235 		 "First entry = %d; Num entries = %d (max=%d)\n",
   2236 		 first_entry, num_entries, max_entries))
   2237 		num_entries = max_entries;
   2238 
   2239 	scratch_pte = gen8_pte_encode(vm->scratch.addr,
   2240 				      I915_CACHE_LLC,
   2241 				      use_scratch);
   2242 #ifdef __NetBSD__
   2243 	CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
   2244 	for (i = 0; i < num_entries; i++)
   2245 		gen8_set_pte(bst, bsh, first_entry + i, scratch_pte);
   2246 	(void)gen8_get_pte(bst, bsh, first_entry);
   2247 #else
   2248 	for (i = 0; i < num_entries; i++)
   2249 		gen8_set_pte(&gtt_base[i], scratch_pte);
   2250 	readl(gtt_base);
   2251 #endif
   2252 }
   2253 
   2254 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
   2255 				  uint64_t start,
   2256 				  uint64_t length,
   2257 				  bool use_scratch)
   2258 {
   2259 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
   2260 	unsigned first_entry = start >> PAGE_SHIFT;
   2261 	unsigned num_entries = length >> PAGE_SHIFT;
   2262 #ifdef __NetBSD__
   2263 	const bus_space_tag_t bst = dev_priv->gtt.bst;
   2264 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
   2265 	gen8_gtt_pte_t scratch_pte;
   2266 #else
   2267 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
   2268 		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
   2269 #endif
   2270 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
   2271 	int i;
   2272 
   2273 	if (WARN(num_entries > max_entries,
   2274 		 "First entry = %d; Num entries = %d (max=%d)\n",
   2275 		 first_entry, num_entries, max_entries))
   2276 		num_entries = max_entries;
   2277 
   2278 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
   2279 
   2280 #ifdef __NetBSD__
   2281 	CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
   2282 	for (i = 0; i < num_entries; i++)
   2283 		bus_space_write_4(bst, bsh, 4*(first_entry + i), scratch_pte);
   2284 	(void)bus_space_read_4(bst, bsh, 4*first_entry);
   2285 #else
   2286 	for (i = 0; i < num_entries; i++)
   2287 		iowrite32(scratch_pte, &gtt_base[i]);
   2288 	readl(gtt_base);
   2289 #endif
   2290 }
   2291 
   2292 
   2293 static void i915_ggtt_bind_vma(struct i915_vma *vma,
   2294 			       enum i915_cache_level cache_level,
   2295 			       u32 unused)
   2296 {
   2297 	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
   2298 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
   2299 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
   2300 
   2301 	BUG_ON(!i915_is_ggtt(vma->vm));
   2302 #ifdef __NetBSD__
   2303 	intel_gtt_insert_entries(vma->obj->igo_dmamap, entry, flags);
   2304 #else
   2305 	intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
   2306 #endif
   2307 	vma->obj->has_global_gtt_mapping = 1;
   2308 }
   2309 
   2310 static void i915_ggtt_clear_range(struct i915_address_space *vm,
   2311 				  uint64_t start,
   2312 				  uint64_t length,
   2313 				  bool unused)
   2314 {
   2315 	unsigned first_entry = start >> PAGE_SHIFT;
   2316 	unsigned num_entries = length >> PAGE_SHIFT;
   2317 	intel_gtt_clear_range(first_entry, num_entries);
   2318 }
   2319 
   2320 static void i915_ggtt_unbind_vma(struct i915_vma *vma)
   2321 {
   2322 	const unsigned int first = vma->node.start >> PAGE_SHIFT;
   2323 	const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
   2324 
   2325 	BUG_ON(!i915_is_ggtt(vma->vm));
   2326 	vma->obj->has_global_gtt_mapping = 0;
   2327 	intel_gtt_clear_range(first, size);
   2328 }
   2329 
   2330 static void ggtt_bind_vma(struct i915_vma *vma,
   2331 			  enum i915_cache_level cache_level,
   2332 			  u32 flags)
   2333 {
   2334 	struct drm_device *dev = vma->vm->dev;
   2335 	struct drm_i915_private *dev_priv = dev->dev_private;
   2336 	struct drm_i915_gem_object *obj = vma->obj;
   2337 
   2338 	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
   2339 	 * or we have a global mapping already but the cacheability flags have
   2340 	 * changed, set the global PTEs.
   2341 	 *
   2342 	 * If there is an aliasing PPGTT it is anecdotally faster, so use that
   2343 	 * instead if none of the above hold true.
   2344 	 *
   2345 	 * NB: A global mapping should only be needed for special regions like
   2346 	 * "gtt mappable", SNB errata, or if specified via special execbuf
   2347 	 * flags. At all other times, the GPU will use the aliasing PPGTT.
   2348 	 */
   2349 	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
   2350 		if (!obj->has_global_gtt_mapping ||
   2351 		    (cache_level != obj->cache_level)) {
   2352 			vma->vm->insert_entries(vma->vm,
   2353 #ifdef __NetBSD__
   2354 						obj->igo_dmamap,
   2355 #else
   2356 						obj->pages,
   2357 #endif
   2358 						vma->node.start,
   2359 						cache_level);
   2360 			obj->has_global_gtt_mapping = 1;
   2361 		}
   2362 	}
   2363 
   2364 	if (dev_priv->mm.aliasing_ppgtt &&
   2365 	    (!obj->has_aliasing_ppgtt_mapping ||
   2366 	     (cache_level != obj->cache_level))) {
   2367 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
   2368 		appgtt->base.insert_entries(&appgtt->base,
   2369 #ifdef __NetBSD__
   2370 					    vma->obj->igo_dmamap,
   2371 #else
   2372 					    vma->obj->pages,
   2373 #endif
   2374 					    vma->node.start,
   2375 					    cache_level);
   2376 		vma->obj->has_aliasing_ppgtt_mapping = 1;
   2377 	}
   2378 }
   2379 
   2380 static void ggtt_unbind_vma(struct i915_vma *vma)
   2381 {
   2382 	struct drm_device *dev = vma->vm->dev;
   2383 	struct drm_i915_private *dev_priv = dev->dev_private;
   2384 	struct drm_i915_gem_object *obj = vma->obj;
   2385 
   2386 	if (obj->has_global_gtt_mapping) {
   2387 		vma->vm->clear_range(vma->vm,
   2388 				     vma->node.start,
   2389 				     obj->base.size,
   2390 				     true);
   2391 		obj->has_global_gtt_mapping = 0;
   2392 	}
   2393 
   2394 	if (obj->has_aliasing_ppgtt_mapping) {
   2395 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
   2396 		appgtt->base.clear_range(&appgtt->base,
   2397 					 vma->node.start,
   2398 					 obj->base.size,
   2399 					 true);
   2400 		obj->has_aliasing_ppgtt_mapping = 0;
   2401 	}
   2402 }
   2403 
   2404 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
   2405 {
   2406 	struct drm_device *dev = obj->base.dev;
   2407 	struct drm_i915_private *dev_priv = dev->dev_private;
   2408 	bool interruptible;
   2409 
   2410 	interruptible = do_idling(dev_priv);
   2411 
   2412 #ifdef __NetBSD__
   2413 	bus_dmamap_unload(dev->dmat, obj->igo_dmamap);
   2414 #else
   2415 	if (!obj->has_dma_mapping)
   2416 		dma_unmap_sg(&dev->pdev->dev,
   2417 			     obj->pages->sgl, obj->pages->nents,
   2418 			     PCI_DMA_BIDIRECTIONAL);
   2419 #endif
   2420 
   2421 	undo_idling(dev_priv, interruptible);
   2422 }
   2423 
   2424 static void i915_gtt_color_adjust(struct drm_mm_node *node,
   2425 				  unsigned long color,
   2426 				  unsigned long *start,
   2427 				  unsigned long *end)
   2428 {
   2429 	if (node->color != color)
   2430 		*start += 4096;
   2431 
   2432 	if (!list_empty(&node->node_list)) {
   2433 		node = list_entry(node->node_list.next,
   2434 				  struct drm_mm_node,
   2435 				  node_list);
   2436 		if (node->allocated && node->color != color)
   2437 			*end -= 4096;
   2438 	}
   2439 }
   2440 
   2441 void i915_gem_setup_global_gtt(struct drm_device *dev,
   2442 			       unsigned long start,
   2443 			       unsigned long mappable_end,
   2444 			       unsigned long end)
   2445 {
   2446 	/* Let GEM Manage all of the aperture.
   2447 	 *
   2448 	 * However, leave one page at the end still bound to the scratch page.
   2449 	 * There are a number of places where the hardware apparently prefetches
   2450 	 * past the end of the object, and we've seen multiple hangs with the
   2451 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
   2452 	 * aperture.  One page should be enough to keep any prefetching inside
   2453 	 * of the aperture.
   2454 	 */
   2455 	struct drm_i915_private *dev_priv = dev->dev_private;
   2456 	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
   2457 	struct drm_mm_node *entry;
   2458 	struct drm_i915_gem_object *obj;
   2459 	unsigned long hole_start, hole_end;
   2460 
   2461 	BUG_ON(mappable_end > end);
   2462 
   2463 	/* Subtract the guard page ... */
   2464 	drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
   2465 	if (!HAS_LLC(dev))
   2466 		dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
   2467 
   2468 	/* Mark any preallocated objects as occupied */
   2469 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
   2470 		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
   2471 		int ret;
   2472 		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
   2473 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
   2474 
   2475 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
   2476 		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
   2477 		if (ret)
   2478 			DRM_DEBUG_KMS("Reservation failed\n");
   2479 		obj->has_global_gtt_mapping = 1;
   2480 	}
   2481 
   2482 	dev_priv->gtt.base.start = start;
   2483 	dev_priv->gtt.base.total = end - start;
   2484 
   2485 	/* Clear any non-preallocated blocks */
   2486 	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
   2487 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
   2488 			      hole_start, hole_end);
   2489 		ggtt_vm->clear_range(ggtt_vm, hole_start,
   2490 				     hole_end - hole_start, true);
   2491 	}
   2492 
   2493 	/* And finally clear the reserved guard page */
   2494 	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
   2495 }
   2496 
   2497 void i915_gem_init_global_gtt(struct drm_device *dev)
   2498 {
   2499 	struct drm_i915_private *dev_priv = dev->dev_private;
   2500 	unsigned long gtt_size, mappable_size;
   2501 
   2502 	gtt_size = dev_priv->gtt.base.total;
   2503 	mappable_size = dev_priv->gtt.mappable_end;
   2504 
   2505 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
   2506 }
   2507 
   2508 static int setup_scratch_page(struct drm_device *dev)
   2509 {
   2510 	struct drm_i915_private *dev_priv = dev->dev_private;
   2511 #ifdef __NetBSD__
   2512 	int nsegs;
   2513 	int error;
   2514 
   2515 	error = bus_dmamem_alloc(dev->dmat, PAGE_SIZE, PAGE_SIZE, 0,
   2516 	    &dev_priv->gtt.base.scratch.seg, 1, &nsegs, BUS_DMA_WAITOK);
   2517 	if (error)
   2518 		goto fail0;
   2519 	KASSERT(nsegs == 1);
   2520 
   2521 	error = bus_dmamap_create(dev->dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
   2522 	    BUS_DMA_WAITOK, &dev_priv->gtt.base.scratch.map);
   2523 	if (error)
   2524 		goto fail1;
   2525 
   2526 	error = bus_dmamap_load_raw(dev->dmat, dev_priv->gtt.base.scratch.map,
   2527 	    &dev_priv->gtt.base.scratch.seg, 1, PAGE_SIZE, BUS_DMA_WAITOK);
   2528 	if (error)
   2529 		goto fail2;
   2530 
   2531 	/* Success!  */
   2532 	dev_priv->gtt.base.scratch.addr =
   2533 	    dev_priv->gtt.base.scratch.map->dm_segs[0].ds_addr;
   2534 	return 0;
   2535 
   2536 fail3: __unused
   2537 	dev_priv->gtt.base.scratch.addr = 0;
   2538 	bus_dmamap_unload(dev->dmat, dev_priv->gtt.base.scratch.map);
   2539 fail2:	bus_dmamap_destroy(dev->dmat, dev_priv->gtt.base.scratch.map);
   2540 fail1:	bus_dmamem_free(dev->dmat, &dev_priv->gtt.base.scratch.seg, 1);
   2541 fail0:	KASSERT(error);
   2542 	/* XXX errno NetBSD->Linux */
   2543 	return -error;
   2544 #else
   2545 	struct page *page;
   2546 	dma_addr_t dma_addr;
   2547 
   2548 	page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
   2549 	if (page == NULL)
   2550 		return -ENOMEM;
   2551 	get_page(page);
   2552 	set_pages_uc(page, 1);
   2553 
   2554 #ifdef CONFIG_INTEL_IOMMU
   2555 	dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
   2556 				PCI_DMA_BIDIRECTIONAL);
   2557 	if (pci_dma_mapping_error(dev->pdev, dma_addr))
   2558 		return -EINVAL;
   2559 #else
   2560 	dma_addr = page_to_phys(page);
   2561 #endif
   2562 	dev_priv->gtt.base.scratch.page = page;
   2563 	dev_priv->gtt.base.scratch.addr = dma_addr;
   2564 
   2565 	return 0;
   2566 #endif
   2567 }
   2568 
   2569 static void teardown_scratch_page(struct drm_device *dev)
   2570 {
   2571 	struct drm_i915_private *dev_priv = dev->dev_private;
   2572 #ifdef __NetBSD__
   2573 
   2574 	dev_priv->gtt.base.scratch.addr = 0;
   2575 	bus_dmamap_unload(dev->dmat, dev_priv->gtt.base.scratch.map);
   2576 	bus_dmamap_destroy(dev->dmat, dev_priv->gtt.base.scratch.map);
   2577 	bus_dmamem_free(dev->dmat, &dev_priv->gtt.base.scratch.seg, 1);
   2578 #else
   2579 	struct page *page = dev_priv->gtt.base.scratch.page;
   2580 
   2581 	set_pages_wb(page, 1);
   2582 	pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
   2583 		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
   2584 	put_page(page);
   2585 	__free_page(page);
   2586 #endif
   2587 }
   2588 
   2589 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
   2590 {
   2591 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
   2592 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
   2593 	return snb_gmch_ctl << 20;
   2594 }
   2595 
   2596 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
   2597 {
   2598 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
   2599 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
   2600 	if (bdw_gmch_ctl)
   2601 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
   2602 	return bdw_gmch_ctl << 20;
   2603 }
   2604 
   2605 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
   2606 {
   2607 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
   2608 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
   2609 	return snb_gmch_ctl << 25; /* 32 MB units */
   2610 }
   2611 
   2612 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
   2613 {
   2614 	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
   2615 	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
   2616 	return bdw_gmch_ctl << 25; /* 32 MB units */
   2617 }
   2618 
   2619 static int ggtt_probe_common(struct drm_device *dev,
   2620 			     size_t gtt_size)
   2621 {
   2622 	struct drm_i915_private *dev_priv = dev->dev_private;
   2623 	phys_addr_t gtt_phys_addr;
   2624 	int ret;
   2625 
   2626 	/* For Modern GENs the PTEs and register space are split in the BAR */
   2627 	gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
   2628 		(pci_resource_len(dev->pdev, 0) / 2);
   2629 
   2630 #ifdef __NetBSD__
   2631 	dev_priv->gtt.bst = dev->pdev->pd_pa.pa_memt;
   2632 	/* XXX errno NetBSD->Linux */
   2633 	ret = -bus_space_map(dev_priv->gtt.bst, gtt_phys_addr, gtt_size,
   2634 	    BUS_SPACE_MAP_PREFETCHABLE, &dev_priv->gtt.bsh);
   2635 	if (ret) {
   2636 		DRM_ERROR("Failed to map the graphics translation table: %d\n",
   2637 		    ret);
   2638 		return ret;
   2639 	}
   2640 	dev_priv->gtt.size = gtt_size;
   2641 #else
   2642 	dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
   2643 	if (!dev_priv->gtt.gsm) {
   2644 		DRM_ERROR("Failed to map the gtt page table\n");
   2645 		return -ENOMEM;
   2646 	}
   2647 #endif
   2648 
   2649 	ret = setup_scratch_page(dev);
   2650 	if (ret) {
   2651 		DRM_ERROR("Scratch setup failed\n");
   2652 		/* iounmap will also get called at remove, but meh */
   2653 #ifdef __NetBSD__
   2654 		bus_space_unmap(dev_priv->gtt.bst, dev_priv->gtt.bsh,
   2655 		    dev_priv->gtt.size);
   2656 #else
   2657 		iounmap(dev_priv->gtt.gsm);
   2658 #endif
   2659 	}
   2660 
   2661 	return ret;
   2662 }
   2663 
   2664 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
   2665  * bits. When using advanced contexts each context stores its own PAT, but
   2666  * writing this data shouldn't be harmful even in those cases. */
   2667 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
   2668 {
   2669 #define GEN8_PPAT_UC		(0<<0)
   2670 #define GEN8_PPAT_WC		(1<<0)
   2671 #define GEN8_PPAT_WT		(2<<0)
   2672 #define GEN8_PPAT_WB		(3<<0)
   2673 #define GEN8_PPAT_ELLC_OVERRIDE	(0<<2)
   2674 /* FIXME(BDW): Bspec is completely confused about cache control bits. */
   2675 #define GEN8_PPAT_LLC		(1<<2)
   2676 #define GEN8_PPAT_LLCELLC	(2<<2)
   2677 #define GEN8_PPAT_LLCeLLC	(3<<2)
   2678 #define GEN8_PPAT_AGE(x)	(x<<4)
   2679 #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
   2680 	uint64_t pat;
   2681 
   2682 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
   2683 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
   2684 	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
   2685 	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
   2686 	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
   2687 	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
   2688 	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
   2689 	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
   2690 
   2691 	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
   2692 	 * write would work. */
   2693 	I915_WRITE(GEN8_PRIVATE_PAT, pat);
   2694 	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
   2695 }
   2696 
   2697 static int gen8_gmch_probe(struct drm_device *dev,
   2698 			   size_t *gtt_total,
   2699 			   size_t *stolen,
   2700 			   phys_addr_t *mappable_base,
   2701 			   unsigned long *mappable_end)
   2702 {
   2703 	struct drm_i915_private *dev_priv = dev->dev_private;
   2704 	unsigned int gtt_size;
   2705 	u16 snb_gmch_ctl;
   2706 	int ret;
   2707 
   2708 	/* TODO: We're not aware of mappable constraints on gen8 yet */
   2709 	*mappable_base = pci_resource_start(dev->pdev, 2);
   2710 	*mappable_end = pci_resource_len(dev->pdev, 2);
   2711 
   2712 #ifndef __NetBSD__
   2713 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
   2714 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
   2715 #endif
   2716 
   2717 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
   2718 
   2719 	*stolen = gen8_get_stolen_size(snb_gmch_ctl);
   2720 
   2721 	gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
   2722 	*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
   2723 
   2724 	gen8_setup_private_ppat(dev_priv);
   2725 
   2726 	ret = ggtt_probe_common(dev, gtt_size);
   2727 
   2728 	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
   2729 	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
   2730 
   2731 	/* XXX 39-bit addresses?  Really?  See pci_set_dma_mask above...  */
   2732 	dev_priv->gtt.max_paddr = __BITS(38, 0);
   2733 
   2734 	return ret;
   2735 }
   2736 
   2737 static int gen6_gmch_probe(struct drm_device *dev,
   2738 			   size_t *gtt_total,
   2739 			   size_t *stolen,
   2740 			   phys_addr_t *mappable_base,
   2741 			   unsigned long *mappable_end)
   2742 {
   2743 	struct drm_i915_private *dev_priv = dev->dev_private;
   2744 	unsigned int gtt_size;
   2745 	u16 snb_gmch_ctl;
   2746 	int ret;
   2747 
   2748 	*mappable_base = pci_resource_start(dev->pdev, 2);
   2749 	*mappable_end = pci_resource_len(dev->pdev, 2);
   2750 
   2751 	/* 64/512MB is the current min/max we actually know of, but this is just
   2752 	 * a coarse sanity check.
   2753 	 */
   2754 	if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
   2755 		DRM_ERROR("Unknown GMADR size (%lx)\n",
   2756 			  dev_priv->gtt.mappable_end);
   2757 		return -ENXIO;
   2758 	}
   2759 
   2760 #ifndef __NetBSD__
   2761 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
   2762 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
   2763 #endif
   2764 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
   2765 
   2766 	*stolen = gen6_get_stolen_size(snb_gmch_ctl);
   2767 
   2768 	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
   2769 	*gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
   2770 
   2771 	ret = ggtt_probe_common(dev, gtt_size);
   2772 
   2773 	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
   2774 	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
   2775 
   2776 	dev_priv->gtt.max_paddr = __BITS(39, 0);
   2777 
   2778 	return ret;
   2779 }
   2780 
   2781 static void gen6_gmch_remove(struct i915_address_space *vm)
   2782 {
   2783 
   2784 	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
   2785 
   2786 	drm_mm_takedown(&vm->mm);
   2787 #ifdef __NetBSD__
   2788 	bus_space_unmap(gtt->bst, gtt->bsh, gtt->size);
   2789 #else
   2790 	iounmap(gtt->gsm);
   2791 #endif
   2792 	teardown_scratch_page(vm->dev);
   2793 }
   2794 
   2795 static int i915_gmch_probe(struct drm_device *dev,
   2796 			   size_t *gtt_total,
   2797 			   size_t *stolen,
   2798 			   phys_addr_t *mappable_base,
   2799 			   unsigned long *mappable_end)
   2800 {
   2801 	struct drm_i915_private *dev_priv = dev->dev_private;
   2802 	int ret;
   2803 
   2804 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
   2805 	if (!ret) {
   2806 		DRM_ERROR("failed to set up gmch\n");
   2807 		return -EIO;
   2808 	}
   2809 
   2810 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
   2811 
   2812 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
   2813 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
   2814 
   2815 	if (unlikely(dev_priv->gtt.do_idle_maps))
   2816 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
   2817 
   2818 	if (INTEL_INFO(dev)->gen <= 2)
   2819 		dev_priv->gtt.max_paddr = __BITS(29, 0);
   2820 	else if ((INTEL_INFO(dev)->gen <= 3) ||
   2821 	    IS_BROADWATER(dev) || IS_CRESTLINE(dev))
   2822 		dev_priv->gtt.max_paddr = __BITS(31, 0);
   2823 	else if (INTEL_INFO(dev)->gen <= 5)
   2824 		dev_priv->gtt.max_paddr = __BITS(35, 0);
   2825 	else
   2826 		dev_priv->gtt.max_paddr = __BITS(39, 0);
   2827 
   2828 	return 0;
   2829 }
   2830 
   2831 static void i915_gmch_remove(struct i915_address_space *vm)
   2832 {
   2833 	intel_gmch_remove();
   2834 }
   2835 
   2836 int i915_gem_gtt_init(struct drm_device *dev)
   2837 {
   2838 	struct drm_i915_private *dev_priv = dev->dev_private;
   2839 	struct i915_gtt *gtt = &dev_priv->gtt;
   2840 	int ret;
   2841 
   2842 	if (INTEL_INFO(dev)->gen <= 5) {
   2843 		gtt->gtt_probe = i915_gmch_probe;
   2844 		gtt->base.cleanup = i915_gmch_remove;
   2845 	} else if (INTEL_INFO(dev)->gen < 8) {
   2846 		gtt->gtt_probe = gen6_gmch_probe;
   2847 		gtt->base.cleanup = gen6_gmch_remove;
   2848 		if (IS_HASWELL(dev) && dev_priv->ellc_size)
   2849 			gtt->base.pte_encode = iris_pte_encode;
   2850 		else if (IS_HASWELL(dev))
   2851 			gtt->base.pte_encode = hsw_pte_encode;
   2852 		else if (IS_VALLEYVIEW(dev))
   2853 			gtt->base.pte_encode = byt_pte_encode;
   2854 		else if (INTEL_INFO(dev)->gen >= 7)
   2855 			gtt->base.pte_encode = ivb_pte_encode;
   2856 		else
   2857 			gtt->base.pte_encode = snb_pte_encode;
   2858 	} else {
   2859 		dev_priv->gtt.gtt_probe = gen8_gmch_probe;
   2860 		dev_priv->gtt.base.cleanup = gen6_gmch_remove;
   2861 	}
   2862 
   2863 	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
   2864 			     &gtt->mappable_base, &gtt->mappable_end);
   2865 	if (ret)
   2866 		return ret;
   2867 
   2868 #ifdef __NetBSD__
   2869 	dev_priv->gtt.pgfl = x86_select_freelist(dev_priv->gtt.max_paddr);
   2870 	ret = drm_limit_dma_space(dev, 0, dev_priv->gtt.max_paddr);
   2871 	if (ret) {
   2872 		DRM_ERROR("Unable to limit DMA paddr allocations: %d!\n", ret);
   2873 		gtt->base.cleanup(&gtt->base);
   2874 		return ret;
   2875 	}
   2876 #endif
   2877 
   2878 	gtt->base.dev = dev;
   2879 
   2880 	/* GMADR is the PCI mmio aperture into the global GTT. */
   2881 	DRM_INFO("Memory usable by graphics device = %zdM\n",
   2882 		 gtt->base.total >> 20);
   2883 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
   2884 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
   2885 	/*
   2886 	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
   2887 	 * user's requested state against the hardware/driver capabilities.  We
   2888 	 * do this now so that we can print out any log messages once rather
   2889 	 * than every time we check intel_enable_ppgtt().
   2890 	 */
   2891 	i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
   2892 	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
   2893 
   2894 	return 0;
   2895 }
   2896 
   2897 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
   2898 					      struct i915_address_space *vm)
   2899 {
   2900 	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
   2901 	if (vma == NULL)
   2902 		return ERR_PTR(-ENOMEM);
   2903 
   2904 	INIT_LIST_HEAD(&vma->vma_link);
   2905 	INIT_LIST_HEAD(&vma->mm_list);
   2906 	INIT_LIST_HEAD(&vma->exec_list);
   2907 	vma->vm = vm;
   2908 	vma->obj = obj;
   2909 
   2910 	switch (INTEL_INFO(vm->dev)->gen) {
   2911 	case 8:
   2912 	case 7:
   2913 	case 6:
   2914 		if (i915_is_ggtt(vm)) {
   2915 			vma->unbind_vma = ggtt_unbind_vma;
   2916 			vma->bind_vma = ggtt_bind_vma;
   2917 		} else {
   2918 			vma->unbind_vma = ppgtt_unbind_vma;
   2919 			vma->bind_vma = ppgtt_bind_vma;
   2920 		}
   2921 		break;
   2922 	case 5:
   2923 	case 4:
   2924 	case 3:
   2925 	case 2:
   2926 		BUG_ON(!i915_is_ggtt(vm));
   2927 		vma->unbind_vma = i915_ggtt_unbind_vma;
   2928 		vma->bind_vma = i915_ggtt_bind_vma;
   2929 		break;
   2930 	default:
   2931 		BUG();
   2932 	}
   2933 
   2934 	/* Keep GGTT vmas first to make debug easier */
   2935 	if (i915_is_ggtt(vm))
   2936 		list_add(&vma->vma_link, &obj->vma_list);
   2937 	else
   2938 		list_add_tail(&vma->vma_link, &obj->vma_list);
   2939 
   2940 	return vma;
   2941 }
   2942 
   2943 struct i915_vma *
   2944 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
   2945 				  struct i915_address_space *vm)
   2946 {
   2947 	struct i915_vma *vma;
   2948 
   2949 	vma = i915_gem_obj_to_vma(obj, vm);
   2950 	if (!vma)
   2951 		vma = __i915_gem_vma_create(obj, vm);
   2952 
   2953 	return vma;
   2954 }
   2955