Home | History | Annotate | Line # | Download | only in i915
i915_gem_gtt.c revision 1.1.1.1.8.2
      1 /*
      2  * Copyright  2010 Daniel Vetter
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  */
     24 
     25 #include <drm/drmP.h>
     26 #include <drm/i915_drm.h>
     27 #include "i915_drv.h"
     28 #include "i915_trace.h"
     29 #include "intel_drv.h"
     30 
     31 typedef uint32_t gtt_pte_t;
     32 
     33 /* PPGTT stuff */
     34 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
     35 
     36 #define GEN6_PDE_VALID			(1 << 0)
     37 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
     38 #define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
     39 
     40 #define GEN6_PTE_VALID			(1 << 0)
     41 #define GEN6_PTE_UNCACHED		(1 << 1)
     42 #define HSW_PTE_UNCACHED		(0)
     43 #define GEN6_PTE_CACHE_LLC		(2 << 1)
     44 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
     45 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
     46 
     47 static inline gtt_pte_t pte_encode(struct drm_device *dev,
     48 				   dma_addr_t addr,
     49 				   enum i915_cache_level level)
     50 {
     51 	gtt_pte_t pte = GEN6_PTE_VALID;
     52 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
     53 
     54 	switch (level) {
     55 	case I915_CACHE_LLC_MLC:
     56 		/* Haswell doesn't set L3 this way */
     57 		if (IS_HASWELL(dev))
     58 			pte |= GEN6_PTE_CACHE_LLC;
     59 		else
     60 			pte |= GEN6_PTE_CACHE_LLC_MLC;
     61 		break;
     62 	case I915_CACHE_LLC:
     63 		pte |= GEN6_PTE_CACHE_LLC;
     64 		break;
     65 	case I915_CACHE_NONE:
     66 		if (IS_HASWELL(dev))
     67 			pte |= HSW_PTE_UNCACHED;
     68 		else
     69 			pte |= GEN6_PTE_UNCACHED;
     70 		break;
     71 	default:
     72 		BUG();
     73 	}
     74 
     75 
     76 	return pte;
     77 }
     78 
     79 /* PPGTT support for Sandybdrige/Gen6 and later */
     80 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
     81 				   unsigned first_entry,
     82 				   unsigned num_entries)
     83 {
     84 	gtt_pte_t *pt_vaddr;
     85 	gtt_pte_t scratch_pte;
     86 	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
     87 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
     88 	unsigned last_pte, i;
     89 
     90 	scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr,
     91 				 I915_CACHE_LLC);
     92 
     93 	while (num_entries) {
     94 		last_pte = first_pte + num_entries;
     95 		if (last_pte > I915_PPGTT_PT_ENTRIES)
     96 			last_pte = I915_PPGTT_PT_ENTRIES;
     97 
     98 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
     99 
    100 		for (i = first_pte; i < last_pte; i++)
    101 			pt_vaddr[i] = scratch_pte;
    102 
    103 		kunmap_atomic(pt_vaddr);
    104 
    105 		num_entries -= last_pte - first_pte;
    106 		first_pte = 0;
    107 		act_pd++;
    108 	}
    109 }
    110 
    111 int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
    112 {
    113 	struct drm_i915_private *dev_priv = dev->dev_private;
    114 	struct i915_hw_ppgtt *ppgtt;
    115 	unsigned first_pd_entry_in_global_pt;
    116 	int i;
    117 	int ret = -ENOMEM;
    118 
    119 	/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
    120 	 * entries. For aliasing ppgtt support we just steal them at the end for
    121 	 * now. */
    122 	first_pd_entry_in_global_pt = dev_priv->mm.gtt->gtt_total_entries - I915_PPGTT_PD_ENTRIES;
    123 
    124 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
    125 	if (!ppgtt)
    126 		return ret;
    127 
    128 	ppgtt->dev = dev;
    129 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
    130 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
    131 				  GFP_KERNEL);
    132 	if (!ppgtt->pt_pages)
    133 		goto err_ppgtt;
    134 
    135 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
    136 		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
    137 		if (!ppgtt->pt_pages[i])
    138 			goto err_pt_alloc;
    139 	}
    140 
    141 	if (dev_priv->mm.gtt->needs_dmar) {
    142 		ppgtt->pt_dma_addr = kzalloc(sizeof(dma_addr_t)
    143 						*ppgtt->num_pd_entries,
    144 					     GFP_KERNEL);
    145 		if (!ppgtt->pt_dma_addr)
    146 			goto err_pt_alloc;
    147 
    148 		for (i = 0; i < ppgtt->num_pd_entries; i++) {
    149 			dma_addr_t pt_addr;
    150 
    151 			pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i],
    152 					       0, 4096,
    153 					       PCI_DMA_BIDIRECTIONAL);
    154 
    155 			if (pci_dma_mapping_error(dev->pdev,
    156 						  pt_addr)) {
    157 				ret = -EIO;
    158 				goto err_pd_pin;
    159 
    160 			}
    161 			ppgtt->pt_dma_addr[i] = pt_addr;
    162 		}
    163 	}
    164 
    165 	ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt->scratch_page_dma;
    166 
    167 	i915_ppgtt_clear_range(ppgtt, 0,
    168 			       ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
    169 
    170 	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
    171 
    172 	dev_priv->mm.aliasing_ppgtt = ppgtt;
    173 
    174 	return 0;
    175 
    176 err_pd_pin:
    177 	if (ppgtt->pt_dma_addr) {
    178 		for (i--; i >= 0; i--)
    179 			pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i],
    180 				       4096, PCI_DMA_BIDIRECTIONAL);
    181 	}
    182 err_pt_alloc:
    183 	kfree(ppgtt->pt_dma_addr);
    184 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
    185 		if (ppgtt->pt_pages[i])
    186 			__free_page(ppgtt->pt_pages[i]);
    187 	}
    188 	kfree(ppgtt->pt_pages);
    189 err_ppgtt:
    190 	kfree(ppgtt);
    191 
    192 	return ret;
    193 }
    194 
    195 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
    196 {
    197 	struct drm_i915_private *dev_priv = dev->dev_private;
    198 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
    199 	int i;
    200 
    201 	if (!ppgtt)
    202 		return;
    203 
    204 	if (ppgtt->pt_dma_addr) {
    205 		for (i = 0; i < ppgtt->num_pd_entries; i++)
    206 			pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i],
    207 				       4096, PCI_DMA_BIDIRECTIONAL);
    208 	}
    209 
    210 	kfree(ppgtt->pt_dma_addr);
    211 	for (i = 0; i < ppgtt->num_pd_entries; i++)
    212 		__free_page(ppgtt->pt_pages[i]);
    213 	kfree(ppgtt->pt_pages);
    214 	kfree(ppgtt);
    215 }
    216 
    217 static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
    218 					 const struct sg_table *pages,
    219 					 unsigned first_entry,
    220 					 enum i915_cache_level cache_level)
    221 {
    222 	gtt_pte_t *pt_vaddr;
    223 	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
    224 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
    225 	unsigned i, j, m, segment_len;
    226 	dma_addr_t page_addr;
    227 	struct scatterlist *sg;
    228 
    229 	/* init sg walking */
    230 	sg = pages->sgl;
    231 	i = 0;
    232 	segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
    233 	m = 0;
    234 
    235 	while (i < pages->nents) {
    236 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
    237 
    238 		for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
    239 			page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
    240 			pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
    241 						 cache_level);
    242 
    243 			/* grab the next page */
    244 			if (++m == segment_len) {
    245 				if (++i == pages->nents)
    246 					break;
    247 
    248 				sg = sg_next(sg);
    249 				segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
    250 				m = 0;
    251 			}
    252 		}
    253 
    254 		kunmap_atomic(pt_vaddr);
    255 
    256 		first_pte = 0;
    257 		act_pd++;
    258 	}
    259 }
    260 
    261 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
    262 			    struct drm_i915_gem_object *obj,
    263 			    enum i915_cache_level cache_level)
    264 {
    265 	i915_ppgtt_insert_sg_entries(ppgtt,
    266 				     obj->pages,
    267 				     obj->gtt_space->start >> PAGE_SHIFT,
    268 				     cache_level);
    269 }
    270 
    271 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
    272 			      struct drm_i915_gem_object *obj)
    273 {
    274 	i915_ppgtt_clear_range(ppgtt,
    275 			       obj->gtt_space->start >> PAGE_SHIFT,
    276 			       obj->base.size >> PAGE_SHIFT);
    277 }
    278 
    279 void i915_gem_init_ppgtt(struct drm_device *dev)
    280 {
    281 	drm_i915_private_t *dev_priv = dev->dev_private;
    282 	uint32_t pd_offset;
    283 	struct intel_ring_buffer *ring;
    284 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
    285 	uint32_t __iomem *pd_addr;
    286 	uint32_t pd_entry;
    287 	int i;
    288 
    289 	if (!dev_priv->mm.aliasing_ppgtt)
    290 		return;
    291 
    292 
    293 	pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t);
    294 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
    295 		dma_addr_t pt_addr;
    296 
    297 		if (dev_priv->mm.gtt->needs_dmar)
    298 			pt_addr = ppgtt->pt_dma_addr[i];
    299 		else
    300 			pt_addr = page_to_phys(ppgtt->pt_pages[i]);
    301 
    302 		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
    303 		pd_entry |= GEN6_PDE_VALID;
    304 
    305 		writel(pd_entry, pd_addr + i);
    306 	}
    307 	readl(pd_addr);
    308 
    309 	pd_offset = ppgtt->pd_offset;
    310 	pd_offset /= 64; /* in cachelines, */
    311 	pd_offset <<= 16;
    312 
    313 	if (INTEL_INFO(dev)->gen == 6) {
    314 		uint32_t ecochk, gab_ctl, ecobits;
    315 
    316 		ecobits = I915_READ(GAC_ECO_BITS);
    317 		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
    318 
    319 		gab_ctl = I915_READ(GAB_CTL);
    320 		I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
    321 
    322 		ecochk = I915_READ(GAM_ECOCHK);
    323 		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
    324 				       ECOCHK_PPGTT_CACHE64B);
    325 		I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
    326 	} else if (INTEL_INFO(dev)->gen >= 7) {
    327 		I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
    328 		/* GFX_MODE is per-ring on gen7+ */
    329 	}
    330 
    331 	for_each_ring(ring, dev_priv, i) {
    332 		if (INTEL_INFO(dev)->gen >= 7)
    333 			I915_WRITE(RING_MODE_GEN7(ring),
    334 				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
    335 
    336 		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
    337 		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
    338 	}
    339 }
    340 
    341 static bool do_idling(struct drm_i915_private *dev_priv)
    342 {
    343 	bool ret = dev_priv->mm.interruptible;
    344 
    345 	if (unlikely(dev_priv->mm.gtt->do_idle_maps)) {
    346 		dev_priv->mm.interruptible = false;
    347 		if (i915_gpu_idle(dev_priv->dev)) {
    348 			DRM_ERROR("Couldn't idle GPU\n");
    349 			/* Wait a bit, in hopes it avoids the hang */
    350 			udelay(10);
    351 		}
    352 	}
    353 
    354 	return ret;
    355 }
    356 
    357 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
    358 {
    359 	if (unlikely(dev_priv->mm.gtt->do_idle_maps))
    360 		dev_priv->mm.interruptible = interruptible;
    361 }
    362 
    363 
    364 static void i915_ggtt_clear_range(struct drm_device *dev,
    365 				 unsigned first_entry,
    366 				 unsigned num_entries)
    367 {
    368 	struct drm_i915_private *dev_priv = dev->dev_private;
    369 	gtt_pte_t scratch_pte;
    370 	gtt_pte_t __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry;
    371 	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
    372 	int i;
    373 
    374 	if (INTEL_INFO(dev)->gen < 6) {
    375 		intel_gtt_clear_range(first_entry, num_entries);
    376 		return;
    377 	}
    378 
    379 	if (WARN(num_entries > max_entries,
    380 		 "First entry = %d; Num entries = %d (max=%d)\n",
    381 		 first_entry, num_entries, max_entries))
    382 		num_entries = max_entries;
    383 
    384 	scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC);
    385 	for (i = 0; i < num_entries; i++)
    386 		iowrite32(scratch_pte, &gtt_base[i]);
    387 	readl(gtt_base);
    388 }
    389 
    390 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
    391 {
    392 	struct drm_i915_private *dev_priv = dev->dev_private;
    393 	struct drm_i915_gem_object *obj;
    394 
    395 	/* First fill our portion of the GTT with scratch pages */
    396 	i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE,
    397 			      (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
    398 
    399 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
    400 		i915_gem_clflush_object(obj);
    401 		i915_gem_gtt_bind_object(obj, obj->cache_level);
    402 	}
    403 
    404 	i915_gem_chipset_flush(dev);
    405 }
    406 
    407 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
    408 {
    409 	if (obj->has_dma_mapping)
    410 		return 0;
    411 
    412 	if (!dma_map_sg(&obj->base.dev->pdev->dev,
    413 			obj->pages->sgl, obj->pages->nents,
    414 			PCI_DMA_BIDIRECTIONAL))
    415 		return -ENOSPC;
    416 
    417 	return 0;
    418 }
    419 
    420 /*
    421  * Binds an object into the global gtt with the specified cache level. The object
    422  * will be accessible to the GPU via commands whose operands reference offsets
    423  * within the global GTT as well as accessible by the GPU through the GMADR
    424  * mapped BAR (dev_priv->mm.gtt->gtt).
    425  */
    426 static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
    427 				  enum i915_cache_level level)
    428 {
    429 	struct drm_device *dev = obj->base.dev;
    430 	struct drm_i915_private *dev_priv = dev->dev_private;
    431 	struct sg_table *st = obj->pages;
    432 	struct scatterlist *sg = st->sgl;
    433 	const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
    434 	const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
    435 	gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry;
    436 	int unused, i = 0;
    437 	unsigned int len, m = 0;
    438 	dma_addr_t addr;
    439 
    440 	for_each_sg(st->sgl, sg, st->nents, unused) {
    441 		len = sg_dma_len(sg) >> PAGE_SHIFT;
    442 		for (m = 0; m < len; m++) {
    443 			addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
    444 			iowrite32(pte_encode(dev, addr, level), &gtt_entries[i]);
    445 			i++;
    446 		}
    447 	}
    448 
    449 	BUG_ON(i > max_entries);
    450 	BUG_ON(i != obj->base.size / PAGE_SIZE);
    451 
    452 	/* XXX: This serves as a posting read to make sure that the PTE has
    453 	 * actually been updated. There is some concern that even though
    454 	 * registers and PTEs are within the same BAR that they are potentially
    455 	 * of NUMA access patterns. Therefore, even with the way we assume
    456 	 * hardware should work, we must keep this posting read for paranoia.
    457 	 */
    458 	if (i != 0)
    459 		WARN_ON(readl(&gtt_entries[i-1]) != pte_encode(dev, addr, level));
    460 
    461 	/* This next bit makes the above posting read even more important. We
    462 	 * want to flush the TLBs only after we're certain all the PTE updates
    463 	 * have finished.
    464 	 */
    465 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
    466 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
    467 }
    468 
    469 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
    470 			      enum i915_cache_level cache_level)
    471 {
    472 	struct drm_device *dev = obj->base.dev;
    473 	if (INTEL_INFO(dev)->gen < 6) {
    474 		unsigned int flags = (cache_level == I915_CACHE_NONE) ?
    475 			AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
    476 		intel_gtt_insert_sg_entries(obj->pages,
    477 					    obj->gtt_space->start >> PAGE_SHIFT,
    478 					    flags);
    479 	} else {
    480 		gen6_ggtt_bind_object(obj, cache_level);
    481 	}
    482 
    483 	obj->has_global_gtt_mapping = 1;
    484 }
    485 
    486 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
    487 {
    488 	i915_ggtt_clear_range(obj->base.dev,
    489 			      obj->gtt_space->start >> PAGE_SHIFT,
    490 			      obj->base.size >> PAGE_SHIFT);
    491 
    492 	obj->has_global_gtt_mapping = 0;
    493 }
    494 
    495 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
    496 {
    497 	struct drm_device *dev = obj->base.dev;
    498 	struct drm_i915_private *dev_priv = dev->dev_private;
    499 	bool interruptible;
    500 
    501 	interruptible = do_idling(dev_priv);
    502 
    503 	if (!obj->has_dma_mapping)
    504 		dma_unmap_sg(&dev->pdev->dev,
    505 			     obj->pages->sgl, obj->pages->nents,
    506 			     PCI_DMA_BIDIRECTIONAL);
    507 
    508 	undo_idling(dev_priv, interruptible);
    509 }
    510 
    511 static void i915_gtt_color_adjust(struct drm_mm_node *node,
    512 				  unsigned long color,
    513 				  unsigned long *start,
    514 				  unsigned long *end)
    515 {
    516 	if (node->color != color)
    517 		*start += 4096;
    518 
    519 	if (!list_empty(&node->node_list)) {
    520 		node = list_entry(node->node_list.next,
    521 				  struct drm_mm_node,
    522 				  node_list);
    523 		if (node->allocated && node->color != color)
    524 			*end -= 4096;
    525 	}
    526 }
    527 
    528 void i915_gem_init_global_gtt(struct drm_device *dev,
    529 			      unsigned long start,
    530 			      unsigned long mappable_end,
    531 			      unsigned long end)
    532 {
    533 	drm_i915_private_t *dev_priv = dev->dev_private;
    534 
    535 	/* Substract the guard page ... */
    536 	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
    537 	if (!HAS_LLC(dev))
    538 		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
    539 
    540 	dev_priv->mm.gtt_start = start;
    541 	dev_priv->mm.gtt_mappable_end = mappable_end;
    542 	dev_priv->mm.gtt_end = end;
    543 	dev_priv->mm.gtt_total = end - start;
    544 	dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
    545 
    546 	/* ... but ensure that we clear the entire range. */
    547 	i915_ggtt_clear_range(dev, start / PAGE_SIZE, (end-start) / PAGE_SIZE);
    548 }
    549 
    550 static int setup_scratch_page(struct drm_device *dev)
    551 {
    552 	struct drm_i915_private *dev_priv = dev->dev_private;
    553 	struct page *page;
    554 	dma_addr_t dma_addr;
    555 
    556 	page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
    557 	if (page == NULL)
    558 		return -ENOMEM;
    559 	get_page(page);
    560 	set_pages_uc(page, 1);
    561 
    562 #ifdef CONFIG_INTEL_IOMMU
    563 	dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
    564 				PCI_DMA_BIDIRECTIONAL);
    565 	if (pci_dma_mapping_error(dev->pdev, dma_addr))
    566 		return -EINVAL;
    567 #else
    568 	dma_addr = page_to_phys(page);
    569 #endif
    570 	dev_priv->mm.gtt->scratch_page = page;
    571 	dev_priv->mm.gtt->scratch_page_dma = dma_addr;
    572 
    573 	return 0;
    574 }
    575 
    576 static void teardown_scratch_page(struct drm_device *dev)
    577 {
    578 	struct drm_i915_private *dev_priv = dev->dev_private;
    579 	set_pages_wb(dev_priv->mm.gtt->scratch_page, 1);
    580 	pci_unmap_page(dev->pdev, dev_priv->mm.gtt->scratch_page_dma,
    581 		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
    582 	put_page(dev_priv->mm.gtt->scratch_page);
    583 	__free_page(dev_priv->mm.gtt->scratch_page);
    584 }
    585 
    586 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
    587 {
    588 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
    589 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
    590 	return snb_gmch_ctl << 20;
    591 }
    592 
    593 static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl)
    594 {
    595 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
    596 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
    597 	return snb_gmch_ctl << 25; /* 32 MB units */
    598 }
    599 
    600 static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl)
    601 {
    602 	static const int stolen_decoder[] = {
    603 		0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352};
    604 	snb_gmch_ctl >>= IVB_GMCH_GMS_SHIFT;
    605 	snb_gmch_ctl &= IVB_GMCH_GMS_MASK;
    606 	return stolen_decoder[snb_gmch_ctl] << 20;
    607 }
    608 
    609 int i915_gem_gtt_init(struct drm_device *dev)
    610 {
    611 	struct drm_i915_private *dev_priv = dev->dev_private;
    612 	phys_addr_t gtt_bus_addr;
    613 	u16 snb_gmch_ctl;
    614 	int ret;
    615 
    616 	/* On modern platforms we need not worry ourself with the legacy
    617 	 * hostbridge query stuff. Skip it entirely
    618 	 */
    619 	if (INTEL_INFO(dev)->gen < 6) {
    620 		ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL);
    621 		if (!ret) {
    622 			DRM_ERROR("failed to set up gmch\n");
    623 			return -EIO;
    624 		}
    625 
    626 		dev_priv->mm.gtt = intel_gtt_get();
    627 		if (!dev_priv->mm.gtt) {
    628 			DRM_ERROR("Failed to initialize GTT\n");
    629 			intel_gmch_remove();
    630 			return -ENODEV;
    631 		}
    632 		return 0;
    633 	}
    634 
    635 	dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
    636 	if (!dev_priv->mm.gtt)
    637 		return -ENOMEM;
    638 
    639 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
    640 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
    641 
    642 #ifdef CONFIG_INTEL_IOMMU
    643 	dev_priv->mm.gtt->needs_dmar = 1;
    644 #endif
    645 
    646 	/* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
    647 	gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20);
    648 	dev_priv->mm.gtt->gma_bus_addr = pci_resource_start(dev->pdev, 2);
    649 
    650 	/* i9xx_setup */
    651 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
    652 	dev_priv->mm.gtt->gtt_total_entries =
    653 		gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t);
    654 	if (INTEL_INFO(dev)->gen < 7)
    655 		dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
    656 	else
    657 		dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl);
    658 
    659 	dev_priv->mm.gtt->gtt_mappable_entries = pci_resource_len(dev->pdev, 2) >> PAGE_SHIFT;
    660 	/* 64/512MB is the current min/max we actually know of, but this is just a
    661 	 * coarse sanity check.
    662 	 */
    663 	if ((dev_priv->mm.gtt->gtt_mappable_entries >> 8) < 64 ||
    664 	    dev_priv->mm.gtt->gtt_mappable_entries > dev_priv->mm.gtt->gtt_total_entries) {
    665 		DRM_ERROR("Unknown GMADR entries (%d)\n",
    666 			  dev_priv->mm.gtt->gtt_mappable_entries);
    667 		ret = -ENXIO;
    668 		goto err_out;
    669 	}
    670 
    671 	ret = setup_scratch_page(dev);
    672 	if (ret) {
    673 		DRM_ERROR("Scratch setup failed\n");
    674 		goto err_out;
    675 	}
    676 
    677 	dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
    678 					   dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
    679 	if (!dev_priv->mm.gtt->gtt) {
    680 		DRM_ERROR("Failed to map the gtt page table\n");
    681 		teardown_scratch_page(dev);
    682 		ret = -ENOMEM;
    683 		goto err_out;
    684 	}
    685 
    686 	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
    687 	DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8);
    688 	DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
    689 	DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
    690 
    691 	return 0;
    692 
    693 err_out:
    694 	kfree(dev_priv->mm.gtt);
    695 	if (INTEL_INFO(dev)->gen < 6)
    696 		intel_gmch_remove();
    697 	return ret;
    698 }
    699 
    700 void i915_gem_gtt_fini(struct drm_device *dev)
    701 {
    702 	struct drm_i915_private *dev_priv = dev->dev_private;
    703 	iounmap(dev_priv->mm.gtt->gtt);
    704 	teardown_scratch_page(dev);
    705 	if (INTEL_INFO(dev)->gen < 6)
    706 		intel_gmch_remove();
    707 	kfree(dev_priv->mm.gtt);
    708 }
    709