103b705cfSriastradh/**************************************************************************
203b705cfSriastradh
303b705cfSriastradhCopyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
403b705cfSriastradhCopyright © 2002 by David Dawes.
503b705cfSriastradh
603b705cfSriastradhAll Rights Reserved.
703b705cfSriastradh
803b705cfSriastradhPermission is hereby granted, free of charge, to any person obtaining a
903b705cfSriastradhcopy of this software and associated documentation files (the
1003b705cfSriastradh"Software"), to deal in the Software without restriction, including
1103b705cfSriastradhwithout limitation the rights to use, copy, modify, merge, publish,
1203b705cfSriastradhdistribute, sub license, and/or sell copies of the Software, and to
1303b705cfSriastradhpermit persons to whom the Software is furnished to do so, subject to
1403b705cfSriastradhthe following conditions:
1503b705cfSriastradh
1603b705cfSriastradhThe above copyright notice and this permission notice (including the
1703b705cfSriastradhnext paragraph) shall be included in all copies or substantial portions
1803b705cfSriastradhof the Software.
1903b705cfSriastradh
2003b705cfSriastradhTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
2103b705cfSriastradhOR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2203b705cfSriastradhMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
2303b705cfSriastradhIN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR
2403b705cfSriastradhANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
2503b705cfSriastradhTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
2603b705cfSriastradhSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2703b705cfSriastradh
2803b705cfSriastradh**************************************************************************/
2903b705cfSriastradh
3003b705cfSriastradh/*
3103b705cfSriastradh * Authors:
3203b705cfSriastradh *   Keith Whitwell <keith@tungstengraphics.com>
3303b705cfSriastradh *   David Dawes <dawes@xfree86.org>
3403b705cfSriastradh *
3503b705cfSriastradh * Updated for Dual Head capabilities:
3603b705cfSriastradh *   Alan Hourihane <alanh@tungstengraphics.com>
3703b705cfSriastradh */
3803b705cfSriastradh
3903b705cfSriastradh/**
4003b705cfSriastradh * @file intel_memory.c
4103b705cfSriastradh *
4203b705cfSriastradh * This is the video memory allocator.  Our memory allocation is different from
4303b705cfSriastradh * other graphics chips, where you have a fixed amount of graphics memory
4403b705cfSriastradh * available that you want to put to the best use.  Instead, we have almost no
45fe8aea9eSmrg * memory pre-allocated, and we have to choose an appropriate amount of system
4603b705cfSriastradh * memory to use.
4703b705cfSriastradh *
4803b705cfSriastradh * The allocations we might do:
4903b705cfSriastradh *
5003b705cfSriastradh * - Ring buffer
5103b705cfSriastradh * - HW cursor block (either one block or four)
5203b705cfSriastradh * - Overlay registers
5303b705cfSriastradh * - Front buffer (screen 1)
5403b705cfSriastradh * - Front buffer (screen 2, only in zaphod mode)
5503b705cfSriastradh * - Back/depth buffer (3D only)
5603b705cfSriastradh * - Compatibility texture pool (optional, more is always better)
5703b705cfSriastradh * - New texture pool (optional, more is always better.  aperture allocation
5803b705cfSriastradh *     only)
5903b705cfSriastradh *
6003b705cfSriastradh * The user may request a specific amount of memory to be used
6103b705cfSriastradh * (intel->pEnt->videoRam != 0), in which case allocations have to fit within
6203b705cfSriastradh * that much aperture.  If not, the individual allocations will be
6303b705cfSriastradh * automatically sized, and will be fit within the maximum aperture size.
6403b705cfSriastradh * Only the actual memory used (not alignment padding) will get actual AGP
6503b705cfSriastradh * memory allocated.
6603b705cfSriastradh *
6703b705cfSriastradh * Given that the allocations listed are generally a page or more than a page,
6803b705cfSriastradh * our allocator will only return page-aligned offsets, simplifying the memory
6903b705cfSriastradh * binding process.  For smaller allocations, the acceleration architecture's
7003b705cfSriastradh * linear allocator is preferred.
7103b705cfSriastradh */
7203b705cfSriastradh
7303b705cfSriastradh#ifdef HAVE_CONFIG_H
7403b705cfSriastradh#include "config.h"
7503b705cfSriastradh#endif
7603b705cfSriastradh
7703b705cfSriastradh#include <assert.h>
7803b705cfSriastradh#include <inttypes.h>
7903b705cfSriastradh#include <string.h>
8003b705cfSriastradh#include <sys/types.h>
8103b705cfSriastradh#include <sys/ioctl.h>
8203b705cfSriastradh
8342542f5fSchristos#include "xorg-server.h"
8403b705cfSriastradh#include "xf86.h"
8503b705cfSriastradh#include "xf86_OSproc.h"
8603b705cfSriastradh
8703b705cfSriastradh#include "intel.h"
8803b705cfSriastradh#include "i915_drm.h"
8903b705cfSriastradh
9003b705cfSriastradh/**
9103b705cfSriastradh * Returns the fence size for a tiled area of the given size.
9203b705cfSriastradh */
9303b705cfSriastradhunsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long size)
9403b705cfSriastradh{
9503b705cfSriastradh	unsigned long i;
9603b705cfSriastradh	unsigned long start;
9703b705cfSriastradh
9803b705cfSriastradh	if (INTEL_INFO(intel)->gen >= 040 || intel->has_relaxed_fencing) {
9903b705cfSriastradh		/* The 965 can have fences at any page boundary. */
10003b705cfSriastradh		return ALIGN(size, 4096);
10103b705cfSriastradh	} else {
10203b705cfSriastradh		/* Align the size to a power of two greater than the smallest fence
10303b705cfSriastradh		 * size.
10403b705cfSriastradh		 */
10503b705cfSriastradh		if (IS_GEN3(intel))
10603b705cfSriastradh			start = MB(1);
10703b705cfSriastradh		else
10803b705cfSriastradh			start = KB(512);
10903b705cfSriastradh
11003b705cfSriastradh		for (i = start; i < size; i <<= 1) ;
11103b705cfSriastradh
11203b705cfSriastradh		return i;
11303b705cfSriastradh	}
11403b705cfSriastradh}
11503b705cfSriastradh
11603b705cfSriastradh/**
11703b705cfSriastradh * On some chips, pitch width has to be a power of two tile width, so
11803b705cfSriastradh * calculate that here.
11903b705cfSriastradh */
12003b705cfSriastradhunsigned long
12103b705cfSriastradhintel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch,
12203b705cfSriastradh		     uint32_t tiling_mode)
12303b705cfSriastradh{
12403b705cfSriastradh	unsigned long i;
12503b705cfSriastradh	unsigned long tile_width = (tiling_mode == I915_TILING_Y) ? 128 : 512;
12603b705cfSriastradh
12703b705cfSriastradh	if (tiling_mode == I915_TILING_NONE)
12803b705cfSriastradh		return pitch;
12903b705cfSriastradh
13003b705cfSriastradh	/* 965+ is flexible */
13103b705cfSriastradh	if (INTEL_INFO(intel)->gen >= 040)
13203b705cfSriastradh		return ALIGN(pitch, tile_width);
13303b705cfSriastradh
13403b705cfSriastradh	/* Pre-965 needs power of two tile width */
13503b705cfSriastradh	for (i = tile_width; i < pitch; i <<= 1) ;
13603b705cfSriastradh
13703b705cfSriastradh	return i;
13803b705cfSriastradh}
13903b705cfSriastradh
14042542f5fSchristosBool intel_check_display_stride(ScrnInfoPtr scrn, int stride, Bool tiling)
14103b705cfSriastradh{
14203b705cfSriastradh	intel_screen_private *intel = intel_get_screen_private(scrn);
14342542f5fSchristos	int limit;
14403b705cfSriastradh
14503b705cfSriastradh	/* 8xx spec has always 8K limit, but tests show larger limit in
14603b705cfSriastradh	   non-tiling mode, which makes large monitor work. */
14703b705cfSriastradh	if (tiling) {
14803b705cfSriastradh		if (IS_GEN2(intel))
14903b705cfSriastradh			limit = KB(8);
15003b705cfSriastradh		else if (IS_GEN3(intel))
15103b705cfSriastradh			limit = KB(8);
15203b705cfSriastradh		else if (IS_GEN4(intel))
15303b705cfSriastradh			limit = KB(16);
15403b705cfSriastradh		else
15503b705cfSriastradh			limit = KB(32);
15642542f5fSchristos	} else
15742542f5fSchristos		limit = KB(32);
15803b705cfSriastradh
15942542f5fSchristos	return stride <= limit;
16003b705cfSriastradh}
16103b705cfSriastradh
16203b705cfSriastradhstatic size_t
16303b705cfSriastradhagp_aperture_size(struct pci_device *dev, int gen)
16403b705cfSriastradh{
16503b705cfSriastradh	return dev->regions[gen < 030 ? 0 : 2].size;
16603b705cfSriastradh}
16703b705cfSriastradh
16842542f5fSchristosvoid intel_set_gem_max_sizes(ScrnInfoPtr scrn)
16903b705cfSriastradh{
17003b705cfSriastradh	intel_screen_private *intel = intel_get_screen_private(scrn);
17113496ba1Ssnj	size_t agp_size = agp_aperture_size(xf86GetPciInfoForEntity(intel->pEnt->index),
17203b705cfSriastradh					    INTEL_INFO(intel)->gen);
17303b705cfSriastradh
17403b705cfSriastradh	/* The chances of being able to mmap an object larger than
17503b705cfSriastradh	 * agp_size/2 are slim. Moreover, we may be forced to fallback
17603b705cfSriastradh	 * using a gtt mapping as both the source and a mask, as well
17703b705cfSriastradh	 * as a destination and all need to fit into the aperture.
17803b705cfSriastradh	 */
17903b705cfSriastradh	intel->max_gtt_map_size = agp_size / 4;
18003b705cfSriastradh
18103b705cfSriastradh	/* Let objects be tiled up to the size where only 4 would fit in
18203b705cfSriastradh	 * the aperture, presuming best case alignment. Also if we
18303b705cfSriastradh	 * cannot mmap it using the GTT we will be stuck. */
18403b705cfSriastradh	intel->max_tiling_size = intel->max_gtt_map_size;
18503b705cfSriastradh
18603b705cfSriastradh	/* Large BOs will tend to hit SW fallbacks frequently, and also will
18703b705cfSriastradh	 * tend to fail to successfully map when doing SW fallbacks because we
18803b705cfSriastradh	 * overcommit address space for BO access, or worse cause aperture
18903b705cfSriastradh	 * thrashing.
19003b705cfSriastradh	 */
19103b705cfSriastradh	intel->max_bo_size = intel->max_gtt_map_size;
19203b705cfSriastradh}
19313496ba1Ssnj
19413496ba1Ssnjunsigned int
19513496ba1Ssnjintel_compute_size(struct intel_screen_private *intel,
19613496ba1Ssnj                   int w, int h, int bpp, unsigned usage,
19713496ba1Ssnj                   uint32_t *tiling, int *stride)
19813496ba1Ssnj{
19913496ba1Ssnj	int pitch, size;
20013496ba1Ssnj
20113496ba1Ssnj	if (*tiling != I915_TILING_NONE) {
20213496ba1Ssnj		/* First check whether tiling is necessary. */
20313496ba1Ssnj		pitch = (w * bpp  + 7) / 8;
20413496ba1Ssnj		pitch = ALIGN(pitch, 64);
20513496ba1Ssnj		size = pitch * ALIGN (h, 2);
20613496ba1Ssnj		if (INTEL_INFO(intel)->gen < 040) {
20713496ba1Ssnj			/* Gen 2/3 has a maximum stride for tiling of
20813496ba1Ssnj			 * 8192 bytes.
20913496ba1Ssnj			 */
21013496ba1Ssnj			if (pitch > KB(8))
21113496ba1Ssnj				*tiling = I915_TILING_NONE;
21213496ba1Ssnj
21313496ba1Ssnj			/* Narrower than half a tile? */
21413496ba1Ssnj			if (pitch < 256)
21513496ba1Ssnj				*tiling = I915_TILING_NONE;
21613496ba1Ssnj
21713496ba1Ssnj			/* Older hardware requires fences to be pot size
21813496ba1Ssnj			 * aligned with a minimum of 1 MiB, so causes
21913496ba1Ssnj			 * massive overallocation for small textures.
22013496ba1Ssnj			 */
22113496ba1Ssnj			if (size < 1024*1024/2 && !intel->has_relaxed_fencing)
22213496ba1Ssnj				*tiling = I915_TILING_NONE;
22313496ba1Ssnj		} else if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && size <= 4096) {
22413496ba1Ssnj			/* Disable tiling beneath a page size, we will not see
22513496ba1Ssnj			 * any benefit from reducing TLB misses and instead
22613496ba1Ssnj			 * just incur extra cost when we require a fence.
22713496ba1Ssnj			 */
22813496ba1Ssnj			*tiling = I915_TILING_NONE;
22913496ba1Ssnj		}
23013496ba1Ssnj	}
23113496ba1Ssnj
23213496ba1Ssnj	pitch = (w * bpp + 7) / 8;
23313496ba1Ssnj	if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && pitch <= 256)
23413496ba1Ssnj		*tiling = I915_TILING_NONE;
23513496ba1Ssnj
23613496ba1Ssnj	if (*tiling != I915_TILING_NONE) {
23713496ba1Ssnj		int aligned_h, tile_height;
23813496ba1Ssnj
23913496ba1Ssnj		if (IS_GEN2(intel))
24013496ba1Ssnj			tile_height = 16;
24113496ba1Ssnj		else if (*tiling == I915_TILING_X)
24213496ba1Ssnj			tile_height = 8;
24313496ba1Ssnj		else
24413496ba1Ssnj			tile_height = 32;
24513496ba1Ssnj		aligned_h = ALIGN(h, tile_height);
24613496ba1Ssnj
24713496ba1Ssnj		*stride = intel_get_fence_pitch(intel,
24813496ba1Ssnj						ALIGN(pitch, 512),
24913496ba1Ssnj						*tiling);
25013496ba1Ssnj
25113496ba1Ssnj		/* Round the object up to the size of the fence it will live in
25213496ba1Ssnj		 * if necessary.  We could potentially make the kernel allocate
25313496ba1Ssnj		 * a larger aperture space and just bind the subset of pages in,
25413496ba1Ssnj		 * but this is easier and also keeps us out of trouble (as much)
25513496ba1Ssnj		 * with drm_intel_bufmgr_check_aperture().
25613496ba1Ssnj		 */
25713496ba1Ssnj		size = intel_get_fence_size(intel, *stride * aligned_h);
25813496ba1Ssnj
25913496ba1Ssnj		if (size > intel->max_tiling_size)
26013496ba1Ssnj			*tiling = I915_TILING_NONE;
26113496ba1Ssnj	}
26213496ba1Ssnj
26313496ba1Ssnj	if (*tiling == I915_TILING_NONE) {
26413496ba1Ssnj		/* We only require a 64 byte alignment for scanouts, but
26513496ba1Ssnj		 * a 256 byte alignment for sharing with PRIME.
26613496ba1Ssnj		 */
26713496ba1Ssnj		*stride = ALIGN(pitch, 256);
26813496ba1Ssnj		/* Round the height up so that the GPU's access to a 2x2 aligned
26913496ba1Ssnj		 * subspan doesn't address an invalid page offset beyond the
27013496ba1Ssnj		 * end of the GTT.
27113496ba1Ssnj		 */
27213496ba1Ssnj		size = *stride * ALIGN(h, 2);
27313496ba1Ssnj	}
27413496ba1Ssnj
27513496ba1Ssnj	return size;
27613496ba1Ssnj}
27713496ba1Ssnj
27813496ba1Ssnjdrm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn,
27913496ba1Ssnj					 int width, int height, int cpp,
28013496ba1Ssnj					 int *out_stride,
28113496ba1Ssnj					 uint32_t *out_tiling)
28213496ba1Ssnj{
28313496ba1Ssnj	intel_screen_private *intel = intel_get_screen_private(scrn);
28413496ba1Ssnj	uint32_t tiling;
28513496ba1Ssnj	int stride, size;
28613496ba1Ssnj	drm_intel_bo *bo;
28713496ba1Ssnj
28813496ba1Ssnj	intel_set_gem_max_sizes(scrn);
28913496ba1Ssnj
29013496ba1Ssnj	if (intel->tiling & INTEL_TILING_FB)
29113496ba1Ssnj		tiling = I915_TILING_X;
29213496ba1Ssnj	else
29313496ba1Ssnj		tiling = I915_TILING_NONE;
29413496ba1Ssnj
29513496ba1Ssnjretry:
29613496ba1Ssnj	size = intel_compute_size(intel,
29713496ba1Ssnj                                  width, height,
29813496ba1Ssnj                                  intel->cpp*8, 0,
29913496ba1Ssnj                                  &tiling, &stride);
30013496ba1Ssnj	if (!intel_check_display_stride(scrn, stride, tiling)) {
30113496ba1Ssnj		if (tiling != I915_TILING_NONE) {
30213496ba1Ssnj			tiling = I915_TILING_NONE;
30313496ba1Ssnj			goto retry;
30413496ba1Ssnj		}
30513496ba1Ssnj
30613496ba1Ssnj		xf86DrvMsg(scrn->scrnIndex, X_ERROR,
30713496ba1Ssnj			   "Front buffer stride %d kB "
30813496ba1Ssnj			   "exceeds display limit\n", stride / 1024);
30913496ba1Ssnj		return NULL;
31013496ba1Ssnj	}
31113496ba1Ssnj
31213496ba1Ssnj	bo = drm_intel_bo_alloc(intel->bufmgr, "front buffer", size, 0);
31313496ba1Ssnj	if (bo == NULL)
31413496ba1Ssnj		return FALSE;
31513496ba1Ssnj
31613496ba1Ssnj	if (tiling != I915_TILING_NONE)
31713496ba1Ssnj		drm_intel_bo_set_tiling(bo, &tiling, stride);
31813496ba1Ssnj
31913496ba1Ssnj	xf86DrvMsg(scrn->scrnIndex, X_INFO,
32013496ba1Ssnj		   "Allocated new frame buffer %dx%d stride %d, %s\n",
32113496ba1Ssnj		   width, height, stride,
32213496ba1Ssnj		   tiling == I915_TILING_NONE ? "untiled" : "tiled");
32313496ba1Ssnj
32413496ba1Ssnj	drm_intel_bo_disable_reuse(bo);
32513496ba1Ssnj
32613496ba1Ssnj	*out_stride = stride;
32713496ba1Ssnj	*out_tiling = tiling;
32813496ba1Ssnj	return bo;
32913496ba1Ssnj}
33013496ba1Ssnj
331