1/**************************************************************************
2
3Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
4Copyright © 2002 by David Dawes.
5
6All Rights Reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining a
9copy of this software and associated documentation files (the
10"Software"), to deal in the Software without restriction, including
11without limitation the rights to use, copy, modify, merge, publish,
12distribute, sub license, and/or sell copies of the Software, and to
13permit persons to whom the Software is furnished to do so, subject to
14the following conditions:
15
16The above copyright notice and this permission notice (including the
17next paragraph) shall be included in all copies or substantial portions
18of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR
24ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28**************************************************************************/
29
30/*
31 * Authors:
32 *   Keith Whitwell <keith@tungstengraphics.com>
33 *   David Dawes <dawes@xfree86.org>
34 *
35 * Updated for Dual Head capabilities:
36 *   Alan Hourihane <alanh@tungstengraphics.com>
37 */
38
39/**
40 * @file intel_memory.c
41 *
42 * This is the video memory allocator.  Our memory allocation is different from
43 * other graphics chips, where you have a fixed amount of graphics memory
44 * available that you want to put to the best use.  Instead, we have almost no
45 * memory pre-allocated, and we have to choose an appropriate amount of system
46 * memory to use.
47 *
48 * The allocations we might do:
49 *
50 * - Ring buffer
51 * - HW cursor block (either one block or four)
52 * - Overlay registers
53 * - Front buffer (screen 1)
54 * - Front buffer (screen 2, only in zaphod mode)
55 * - Back/depth buffer (3D only)
56 * - Compatibility texture pool (optional, more is always better)
57 * - New texture pool (optional, more is always better.  aperture allocation
58 *     only)
59 *
60 * The user may request a specific amount of memory to be used
61 * (intel->pEnt->videoRam != 0), in which case allocations have to fit within
62 * that much aperture.  If not, the individual allocations will be
63 * automatically sized, and will be fit within the maximum aperture size.
64 * Only the actual memory used (not alignment padding) will get actual AGP
65 * memory allocated.
66 *
67 * Given that the allocations listed are generally a page or more than a page,
68 * our allocator will only return page-aligned offsets, simplifying the memory
69 * binding process.  For smaller allocations, the acceleration architecture's
70 * linear allocator is preferred.
71 */
72
73#ifdef HAVE_CONFIG_H
74#include "config.h"
75#endif
76
77#include <assert.h>
78#include <inttypes.h>
79#include <string.h>
80#include <sys/types.h>
81#include <sys/ioctl.h>
82
83#include "xorg-server.h"
84#include "xf86.h"
85#include "xf86_OSproc.h"
86
87#include "intel.h"
88#include "i915_drm.h"
89
90/**
91 * Returns the fence size for a tiled area of the given size.
92 */
93unsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long size)
94{
95	unsigned long i;
96	unsigned long start;
97
98	if (INTEL_INFO(intel)->gen >= 040 || intel->has_relaxed_fencing) {
99		/* The 965 can have fences at any page boundary. */
100		return ALIGN(size, 4096);
101	} else {
102		/* Align the size to a power of two greater than the smallest fence
103		 * size.
104		 */
105		if (IS_GEN3(intel))
106			start = MB(1);
107		else
108			start = KB(512);
109
110		for (i = start; i < size; i <<= 1) ;
111
112		return i;
113	}
114}
115
116/**
117 * On some chips, pitch width has to be a power of two tile width, so
118 * calculate that here.
119 */
120unsigned long
121intel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch,
122		     uint32_t tiling_mode)
123{
124	unsigned long i;
125	unsigned long tile_width = (tiling_mode == I915_TILING_Y) ? 128 : 512;
126
127	if (tiling_mode == I915_TILING_NONE)
128		return pitch;
129
130	/* 965+ is flexible */
131	if (INTEL_INFO(intel)->gen >= 040)
132		return ALIGN(pitch, tile_width);
133
134	/* Pre-965 needs power of two tile width */
135	for (i = tile_width; i < pitch; i <<= 1) ;
136
137	return i;
138}
139
140Bool intel_check_display_stride(ScrnInfoPtr scrn, int stride, Bool tiling)
141{
142	intel_screen_private *intel = intel_get_screen_private(scrn);
143	int limit;
144
145	/* 8xx spec has always 8K limit, but tests show larger limit in
146	   non-tiling mode, which makes large monitor work. */
147	if (tiling) {
148		if (IS_GEN2(intel))
149			limit = KB(8);
150		else if (IS_GEN3(intel))
151			limit = KB(8);
152		else if (IS_GEN4(intel))
153			limit = KB(16);
154		else
155			limit = KB(32);
156	} else
157		limit = KB(32);
158
159	return stride <= limit;
160}
161
162static size_t
163agp_aperture_size(struct pci_device *dev, int gen)
164{
165	return dev->regions[gen < 030 ? 0 : 2].size;
166}
167
168void intel_set_gem_max_sizes(ScrnInfoPtr scrn)
169{
170	intel_screen_private *intel = intel_get_screen_private(scrn);
171	size_t agp_size = agp_aperture_size(xf86GetPciInfoForEntity(intel->pEnt->index),
172					    INTEL_INFO(intel)->gen);
173
174	/* The chances of being able to mmap an object larger than
175	 * agp_size/2 are slim. Moreover, we may be forced to fallback
176	 * using a gtt mapping as both the source and a mask, as well
177	 * as a destination and all need to fit into the aperture.
178	 */
179	intel->max_gtt_map_size = agp_size / 4;
180
181	/* Let objects be tiled up to the size where only 4 would fit in
182	 * the aperture, presuming best case alignment. Also if we
183	 * cannot mmap it using the GTT we will be stuck. */
184	intel->max_tiling_size = intel->max_gtt_map_size;
185
186	/* Large BOs will tend to hit SW fallbacks frequently, and also will
187	 * tend to fail to successfully map when doing SW fallbacks because we
188	 * overcommit address space for BO access, or worse cause aperture
189	 * thrashing.
190	 */
191	intel->max_bo_size = intel->max_gtt_map_size;
192}
193
194unsigned int
195intel_compute_size(struct intel_screen_private *intel,
196                   int w, int h, int bpp, unsigned usage,
197                   uint32_t *tiling, int *stride)
198{
199	int pitch, size;
200
201	if (*tiling != I915_TILING_NONE) {
202		/* First check whether tiling is necessary. */
203		pitch = (w * bpp  + 7) / 8;
204		pitch = ALIGN(pitch, 64);
205		size = pitch * ALIGN (h, 2);
206		if (INTEL_INFO(intel)->gen < 040) {
207			/* Gen 2/3 has a maximum stride for tiling of
208			 * 8192 bytes.
209			 */
210			if (pitch > KB(8))
211				*tiling = I915_TILING_NONE;
212
213			/* Narrower than half a tile? */
214			if (pitch < 256)
215				*tiling = I915_TILING_NONE;
216
217			/* Older hardware requires fences to be pot size
218			 * aligned with a minimum of 1 MiB, so causes
219			 * massive overallocation for small textures.
220			 */
221			if (size < 1024*1024/2 && !intel->has_relaxed_fencing)
222				*tiling = I915_TILING_NONE;
223		} else if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && size <= 4096) {
224			/* Disable tiling beneath a page size, we will not see
225			 * any benefit from reducing TLB misses and instead
226			 * just incur extra cost when we require a fence.
227			 */
228			*tiling = I915_TILING_NONE;
229		}
230	}
231
232	pitch = (w * bpp + 7) / 8;
233	if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && pitch <= 256)
234		*tiling = I915_TILING_NONE;
235
236	if (*tiling != I915_TILING_NONE) {
237		int aligned_h, tile_height;
238
239		if (IS_GEN2(intel))
240			tile_height = 16;
241		else if (*tiling == I915_TILING_X)
242			tile_height = 8;
243		else
244			tile_height = 32;
245		aligned_h = ALIGN(h, tile_height);
246
247		*stride = intel_get_fence_pitch(intel,
248						ALIGN(pitch, 512),
249						*tiling);
250
251		/* Round the object up to the size of the fence it will live in
252		 * if necessary.  We could potentially make the kernel allocate
253		 * a larger aperture space and just bind the subset of pages in,
254		 * but this is easier and also keeps us out of trouble (as much)
255		 * with drm_intel_bufmgr_check_aperture().
256		 */
257		size = intel_get_fence_size(intel, *stride * aligned_h);
258
259		if (size > intel->max_tiling_size)
260			*tiling = I915_TILING_NONE;
261	}
262
263	if (*tiling == I915_TILING_NONE) {
264		/* We only require a 64 byte alignment for scanouts, but
265		 * a 256 byte alignment for sharing with PRIME.
266		 */
267		*stride = ALIGN(pitch, 256);
268		/* Round the height up so that the GPU's access to a 2x2 aligned
269		 * subspan doesn't address an invalid page offset beyond the
270		 * end of the GTT.
271		 */
272		size = *stride * ALIGN(h, 2);
273	}
274
275	return size;
276}
277
278drm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn,
279					 int width, int height, int cpp,
280					 int *out_stride,
281					 uint32_t *out_tiling)
282{
283	intel_screen_private *intel = intel_get_screen_private(scrn);
284	uint32_t tiling;
285	int stride, size;
286	drm_intel_bo *bo;
287
288	intel_set_gem_max_sizes(scrn);
289
290	if (intel->tiling & INTEL_TILING_FB)
291		tiling = I915_TILING_X;
292	else
293		tiling = I915_TILING_NONE;
294
295retry:
296	size = intel_compute_size(intel,
297                                  width, height,
298                                  intel->cpp*8, 0,
299                                  &tiling, &stride);
300	if (!intel_check_display_stride(scrn, stride, tiling)) {
301		if (tiling != I915_TILING_NONE) {
302			tiling = I915_TILING_NONE;
303			goto retry;
304		}
305
306		xf86DrvMsg(scrn->scrnIndex, X_ERROR,
307			   "Front buffer stride %d kB "
308			   "exceeds display limit\n", stride / 1024);
309		return NULL;
310	}
311
312	bo = drm_intel_bo_alloc(intel->bufmgr, "front buffer", size, 0);
313	if (bo == NULL)
314		return FALSE;
315
316	if (tiling != I915_TILING_NONE)
317		drm_intel_bo_set_tiling(bo, &tiling, stride);
318
319	xf86DrvMsg(scrn->scrnIndex, X_INFO,
320		   "Allocated new frame buffer %dx%d stride %d, %s\n",
321		   width, height, stride,
322		   tiling == I915_TILING_NONE ? "untiled" : "tiled");
323
324	drm_intel_bo_disable_reuse(bo);
325
326	*out_stride = stride;
327	*out_tiling = tiling;
328	return bo;
329}
330
331