1/************************************************************************** 2 3Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. 4Copyright © 2002 by David Dawes. 5 6All Rights Reserved. 7 8Permission is hereby granted, free of charge, to any person obtaining a 9copy of this software and associated documentation files (the 10"Software"), to deal in the Software without restriction, including 11without limitation the rights to use, copy, modify, merge, publish, 12distribute, sub license, and/or sell copies of the Software, and to 13permit persons to whom the Software is furnished to do so, subject to 14the following conditions: 15 16The above copyright notice and this permission notice (including the 17next paragraph) shall be included in all copies or substantial portions 18of the Software. 19 20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 21OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 23IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR 24ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 28**************************************************************************/ 29 30/* 31 * Authors: 32 * Keith Whitwell <keith@tungstengraphics.com> 33 * David Dawes <dawes@xfree86.org> 34 * 35 * Updated for Dual Head capabilities: 36 * Alan Hourihane <alanh@tungstengraphics.com> 37 */ 38 39/** 40 * @file intel_memory.c 41 * 42 * This is the video memory allocator. Our memory allocation is different from 43 * other graphics chips, where you have a fixed amount of graphics memory 44 * available that you want to put to the best use. Instead, we have almost no 45 * memory pre-allocated, and we have to choose an appropriate amount of sytem 46 * memory to use. 47 * 48 * The allocations we might do: 49 * 50 * - Ring buffer 51 * - HW cursor block (either one block or four) 52 * - Overlay registers 53 * - Front buffer (screen 1) 54 * - Front buffer (screen 2, only in zaphod mode) 55 * - Back/depth buffer (3D only) 56 * - Compatibility texture pool (optional, more is always better) 57 * - New texture pool (optional, more is always better. aperture allocation 58 * only) 59 * 60 * The user may request a specific amount of memory to be used 61 * (intel->pEnt->videoRam != 0), in which case allocations have to fit within 62 * that much aperture. If not, the individual allocations will be 63 * automatically sized, and will be fit within the maximum aperture size. 64 * Only the actual memory used (not alignment padding) will get actual AGP 65 * memory allocated. 66 * 67 * Given that the allocations listed are generally a page or more than a page, 68 * our allocator will only return page-aligned offsets, simplifying the memory 69 * binding process. For smaller allocations, the acceleration architecture's 70 * linear allocator is preferred. 71 */ 72 73#ifdef HAVE_CONFIG_H 74#include "config.h" 75#endif 76 77#include <assert.h> 78#include <inttypes.h> 79#include <string.h> 80#include <sys/types.h> 81#include <sys/ioctl.h> 82 83#include "xorg-server.h" 84#include "xf86.h" 85#include "xf86_OSproc.h" 86 87#include "intel.h" 88#include "i915_drm.h" 89 90/** 91 * Returns the fence size for a tiled area of the given size. 92 */ 93unsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long size) 94{ 95 unsigned long i; 96 unsigned long start; 97 98 if (INTEL_INFO(intel)->gen >= 040 || intel->has_relaxed_fencing) { 99 /* The 965 can have fences at any page boundary. */ 100 return ALIGN(size, 4096); 101 } else { 102 /* Align the size to a power of two greater than the smallest fence 103 * size. 104 */ 105 if (IS_GEN3(intel)) 106 start = MB(1); 107 else 108 start = KB(512); 109 110 for (i = start; i < size; i <<= 1) ; 111 112 return i; 113 } 114} 115 116/** 117 * On some chips, pitch width has to be a power of two tile width, so 118 * calculate that here. 119 */ 120unsigned long 121intel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch, 122 uint32_t tiling_mode) 123{ 124 unsigned long i; 125 unsigned long tile_width = (tiling_mode == I915_TILING_Y) ? 128 : 512; 126 127 if (tiling_mode == I915_TILING_NONE) 128 return pitch; 129 130 /* 965+ is flexible */ 131 if (INTEL_INFO(intel)->gen >= 040) 132 return ALIGN(pitch, tile_width); 133 134 /* Pre-965 needs power of two tile width */ 135 for (i = tile_width; i < pitch; i <<= 1) ; 136 137 return i; 138} 139 140Bool intel_check_display_stride(ScrnInfoPtr scrn, int stride, Bool tiling) 141{ 142 intel_screen_private *intel = intel_get_screen_private(scrn); 143 int limit; 144 145 /* 8xx spec has always 8K limit, but tests show larger limit in 146 non-tiling mode, which makes large monitor work. */ 147 if (tiling) { 148 if (IS_GEN2(intel)) 149 limit = KB(8); 150 else if (IS_GEN3(intel)) 151 limit = KB(8); 152 else if (IS_GEN4(intel)) 153 limit = KB(16); 154 else 155 limit = KB(32); 156 } else 157 limit = KB(32); 158 159 return stride <= limit; 160} 161 162static size_t 163agp_aperture_size(struct pci_device *dev, int gen) 164{ 165 return dev->regions[gen < 030 ? 0 : 2].size; 166} 167 168void intel_set_gem_max_sizes(ScrnInfoPtr scrn) 169{ 170 intel_screen_private *intel = intel_get_screen_private(scrn); 171 size_t agp_size = agp_aperture_size(xf86GetPciInfoForEntity(intel->pEnt->index), 172 INTEL_INFO(intel)->gen); 173 174 /* The chances of being able to mmap an object larger than 175 * agp_size/2 are slim. Moreover, we may be forced to fallback 176 * using a gtt mapping as both the source and a mask, as well 177 * as a destination and all need to fit into the aperture. 178 */ 179 intel->max_gtt_map_size = agp_size / 4; 180 181 /* Let objects be tiled up to the size where only 4 would fit in 182 * the aperture, presuming best case alignment. Also if we 183 * cannot mmap it using the GTT we will be stuck. */ 184 intel->max_tiling_size = intel->max_gtt_map_size; 185 186 /* Large BOs will tend to hit SW fallbacks frequently, and also will 187 * tend to fail to successfully map when doing SW fallbacks because we 188 * overcommit address space for BO access, or worse cause aperture 189 * thrashing. 190 */ 191 intel->max_bo_size = intel->max_gtt_map_size; 192} 193 194unsigned int 195intel_compute_size(struct intel_screen_private *intel, 196 int w, int h, int bpp, unsigned usage, 197 uint32_t *tiling, int *stride) 198{ 199 int pitch, size; 200 201 if (*tiling != I915_TILING_NONE) { 202 /* First check whether tiling is necessary. */ 203 pitch = (w * bpp + 7) / 8; 204 pitch = ALIGN(pitch, 64); 205 size = pitch * ALIGN (h, 2); 206 if (INTEL_INFO(intel)->gen < 040) { 207 /* Gen 2/3 has a maximum stride for tiling of 208 * 8192 bytes. 209 */ 210 if (pitch > KB(8)) 211 *tiling = I915_TILING_NONE; 212 213 /* Narrower than half a tile? */ 214 if (pitch < 256) 215 *tiling = I915_TILING_NONE; 216 217 /* Older hardware requires fences to be pot size 218 * aligned with a minimum of 1 MiB, so causes 219 * massive overallocation for small textures. 220 */ 221 if (size < 1024*1024/2 && !intel->has_relaxed_fencing) 222 *tiling = I915_TILING_NONE; 223 } else if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && size <= 4096) { 224 /* Disable tiling beneath a page size, we will not see 225 * any benefit from reducing TLB misses and instead 226 * just incur extra cost when we require a fence. 227 */ 228 *tiling = I915_TILING_NONE; 229 } 230 } 231 232 pitch = (w * bpp + 7) / 8; 233 if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && pitch <= 256) 234 *tiling = I915_TILING_NONE; 235 236 if (*tiling != I915_TILING_NONE) { 237 int aligned_h, tile_height; 238 239 if (IS_GEN2(intel)) 240 tile_height = 16; 241 else if (*tiling == I915_TILING_X) 242 tile_height = 8; 243 else 244 tile_height = 32; 245 aligned_h = ALIGN(h, tile_height); 246 247 *stride = intel_get_fence_pitch(intel, 248 ALIGN(pitch, 512), 249 *tiling); 250 251 /* Round the object up to the size of the fence it will live in 252 * if necessary. We could potentially make the kernel allocate 253 * a larger aperture space and just bind the subset of pages in, 254 * but this is easier and also keeps us out of trouble (as much) 255 * with drm_intel_bufmgr_check_aperture(). 256 */ 257 size = intel_get_fence_size(intel, *stride * aligned_h); 258 259 if (size > intel->max_tiling_size) 260 *tiling = I915_TILING_NONE; 261 } 262 263 if (*tiling == I915_TILING_NONE) { 264 /* We only require a 64 byte alignment for scanouts, but 265 * a 256 byte alignment for sharing with PRIME. 266 */ 267 *stride = ALIGN(pitch, 256); 268 /* Round the height up so that the GPU's access to a 2x2 aligned 269 * subspan doesn't address an invalid page offset beyond the 270 * end of the GTT. 271 */ 272 size = *stride * ALIGN(h, 2); 273 } 274 275 return size; 276} 277 278drm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn, 279 int width, int height, int cpp, 280 int *out_stride, 281 uint32_t *out_tiling) 282{ 283 intel_screen_private *intel = intel_get_screen_private(scrn); 284 uint32_t tiling; 285 int stride, size; 286 drm_intel_bo *bo; 287 288 intel_set_gem_max_sizes(scrn); 289 290 if (intel->tiling & INTEL_TILING_FB) 291 tiling = I915_TILING_X; 292 else 293 tiling = I915_TILING_NONE; 294 295retry: 296 size = intel_compute_size(intel, 297 width, height, 298 intel->cpp*8, 0, 299 &tiling, &stride); 300 if (!intel_check_display_stride(scrn, stride, tiling)) { 301 if (tiling != I915_TILING_NONE) { 302 tiling = I915_TILING_NONE; 303 goto retry; 304 } 305 306 xf86DrvMsg(scrn->scrnIndex, X_ERROR, 307 "Front buffer stride %d kB " 308 "exceeds display limit\n", stride / 1024); 309 return NULL; 310 } 311 312 bo = drm_intel_bo_alloc(intel->bufmgr, "front buffer", size, 0); 313 if (bo == NULL) 314 return FALSE; 315 316 if (tiling != I915_TILING_NONE) 317 drm_intel_bo_set_tiling(bo, &tiling, stride); 318 319 xf86DrvMsg(scrn->scrnIndex, X_INFO, 320 "Allocated new frame buffer %dx%d stride %d, %s\n", 321 width, height, stride, 322 tiling == I915_TILING_NONE ? "untiled" : "tiled"); 323 324 drm_intel_bo_disable_reuse(bo); 325 326 *out_stride = stride; 327 *out_tiling = tiling; 328 return bo; 329} 330 331