103b705cfSriastradh/************************************************************************** 203b705cfSriastradh 303b705cfSriastradhCopyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. 403b705cfSriastradhCopyright © 2002 by David Dawes. 503b705cfSriastradh 603b705cfSriastradhAll Rights Reserved. 703b705cfSriastradh 803b705cfSriastradhPermission is hereby granted, free of charge, to any person obtaining a 903b705cfSriastradhcopy of this software and associated documentation files (the 1003b705cfSriastradh"Software"), to deal in the Software without restriction, including 1103b705cfSriastradhwithout limitation the rights to use, copy, modify, merge, publish, 1203b705cfSriastradhdistribute, sub license, and/or sell copies of the Software, and to 1303b705cfSriastradhpermit persons to whom the Software is furnished to do so, subject to 1403b705cfSriastradhthe following conditions: 1503b705cfSriastradh 1603b705cfSriastradhThe above copyright notice and this permission notice (including the 1703b705cfSriastradhnext paragraph) shall be included in all copies or substantial portions 1803b705cfSriastradhof the Software. 1903b705cfSriastradh 2003b705cfSriastradhTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 2103b705cfSriastradhOR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2203b705cfSriastradhMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 2303b705cfSriastradhIN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR 2403b705cfSriastradhANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 2503b705cfSriastradhTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 2603b705cfSriastradhSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2703b705cfSriastradh 2803b705cfSriastradh**************************************************************************/ 2903b705cfSriastradh 3003b705cfSriastradh/* 3103b705cfSriastradh * Authors: 3203b705cfSriastradh * Keith Whitwell <keith@tungstengraphics.com> 3303b705cfSriastradh * David Dawes <dawes@xfree86.org> 3403b705cfSriastradh * 3503b705cfSriastradh * Updated for Dual Head capabilities: 3603b705cfSriastradh * Alan Hourihane <alanh@tungstengraphics.com> 3703b705cfSriastradh */ 3803b705cfSriastradh 3903b705cfSriastradh/** 4003b705cfSriastradh * @file intel_memory.c 4103b705cfSriastradh * 4203b705cfSriastradh * This is the video memory allocator. Our memory allocation is different from 4303b705cfSriastradh * other graphics chips, where you have a fixed amount of graphics memory 4403b705cfSriastradh * available that you want to put to the best use. Instead, we have almost no 45fe8aea9eSmrg * memory pre-allocated, and we have to choose an appropriate amount of system 4603b705cfSriastradh * memory to use. 4703b705cfSriastradh * 4803b705cfSriastradh * The allocations we might do: 4903b705cfSriastradh * 5003b705cfSriastradh * - Ring buffer 5103b705cfSriastradh * - HW cursor block (either one block or four) 5203b705cfSriastradh * - Overlay registers 5303b705cfSriastradh * - Front buffer (screen 1) 5403b705cfSriastradh * - Front buffer (screen 2, only in zaphod mode) 5503b705cfSriastradh * - Back/depth buffer (3D only) 5603b705cfSriastradh * - Compatibility texture pool (optional, more is always better) 5703b705cfSriastradh * - New texture pool (optional, more is always better. aperture allocation 5803b705cfSriastradh * only) 5903b705cfSriastradh * 6003b705cfSriastradh * The user may request a specific amount of memory to be used 6103b705cfSriastradh * (intel->pEnt->videoRam != 0), in which case allocations have to fit within 6203b705cfSriastradh * that much aperture. If not, the individual allocations will be 6303b705cfSriastradh * automatically sized, and will be fit within the maximum aperture size. 6403b705cfSriastradh * Only the actual memory used (not alignment padding) will get actual AGP 6503b705cfSriastradh * memory allocated. 6603b705cfSriastradh * 6703b705cfSriastradh * Given that the allocations listed are generally a page or more than a page, 6803b705cfSriastradh * our allocator will only return page-aligned offsets, simplifying the memory 6903b705cfSriastradh * binding process. For smaller allocations, the acceleration architecture's 7003b705cfSriastradh * linear allocator is preferred. 7103b705cfSriastradh */ 7203b705cfSriastradh 7303b705cfSriastradh#ifdef HAVE_CONFIG_H 7403b705cfSriastradh#include "config.h" 7503b705cfSriastradh#endif 7603b705cfSriastradh 7703b705cfSriastradh#include <assert.h> 7803b705cfSriastradh#include <inttypes.h> 7903b705cfSriastradh#include <string.h> 8003b705cfSriastradh#include <sys/types.h> 8103b705cfSriastradh#include <sys/ioctl.h> 8203b705cfSriastradh 8342542f5fSchristos#include "xorg-server.h" 8403b705cfSriastradh#include "xf86.h" 8503b705cfSriastradh#include "xf86_OSproc.h" 8603b705cfSriastradh 8703b705cfSriastradh#include "intel.h" 8803b705cfSriastradh#include "i915_drm.h" 8903b705cfSriastradh 9003b705cfSriastradh/** 9103b705cfSriastradh * Returns the fence size for a tiled area of the given size. 9203b705cfSriastradh */ 9303b705cfSriastradhunsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long size) 9403b705cfSriastradh{ 9503b705cfSriastradh unsigned long i; 9603b705cfSriastradh unsigned long start; 9703b705cfSriastradh 9803b705cfSriastradh if (INTEL_INFO(intel)->gen >= 040 || intel->has_relaxed_fencing) { 9903b705cfSriastradh /* The 965 can have fences at any page boundary. */ 10003b705cfSriastradh return ALIGN(size, 4096); 10103b705cfSriastradh } else { 10203b705cfSriastradh /* Align the size to a power of two greater than the smallest fence 10303b705cfSriastradh * size. 10403b705cfSriastradh */ 10503b705cfSriastradh if (IS_GEN3(intel)) 10603b705cfSriastradh start = MB(1); 10703b705cfSriastradh else 10803b705cfSriastradh start = KB(512); 10903b705cfSriastradh 11003b705cfSriastradh for (i = start; i < size; i <<= 1) ; 11103b705cfSriastradh 11203b705cfSriastradh return i; 11303b705cfSriastradh } 11403b705cfSriastradh} 11503b705cfSriastradh 11603b705cfSriastradh/** 11703b705cfSriastradh * On some chips, pitch width has to be a power of two tile width, so 11803b705cfSriastradh * calculate that here. 11903b705cfSriastradh */ 12003b705cfSriastradhunsigned long 12103b705cfSriastradhintel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch, 12203b705cfSriastradh uint32_t tiling_mode) 12303b705cfSriastradh{ 12403b705cfSriastradh unsigned long i; 12503b705cfSriastradh unsigned long tile_width = (tiling_mode == I915_TILING_Y) ? 128 : 512; 12603b705cfSriastradh 12703b705cfSriastradh if (tiling_mode == I915_TILING_NONE) 12803b705cfSriastradh return pitch; 12903b705cfSriastradh 13003b705cfSriastradh /* 965+ is flexible */ 13103b705cfSriastradh if (INTEL_INFO(intel)->gen >= 040) 13203b705cfSriastradh return ALIGN(pitch, tile_width); 13303b705cfSriastradh 13403b705cfSriastradh /* Pre-965 needs power of two tile width */ 13503b705cfSriastradh for (i = tile_width; i < pitch; i <<= 1) ; 13603b705cfSriastradh 13703b705cfSriastradh return i; 13803b705cfSriastradh} 13903b705cfSriastradh 14042542f5fSchristosBool intel_check_display_stride(ScrnInfoPtr scrn, int stride, Bool tiling) 14103b705cfSriastradh{ 14203b705cfSriastradh intel_screen_private *intel = intel_get_screen_private(scrn); 14342542f5fSchristos int limit; 14403b705cfSriastradh 14503b705cfSriastradh /* 8xx spec has always 8K limit, but tests show larger limit in 14603b705cfSriastradh non-tiling mode, which makes large monitor work. */ 14703b705cfSriastradh if (tiling) { 14803b705cfSriastradh if (IS_GEN2(intel)) 14903b705cfSriastradh limit = KB(8); 15003b705cfSriastradh else if (IS_GEN3(intel)) 15103b705cfSriastradh limit = KB(8); 15203b705cfSriastradh else if (IS_GEN4(intel)) 15303b705cfSriastradh limit = KB(16); 15403b705cfSriastradh else 15503b705cfSriastradh limit = KB(32); 15642542f5fSchristos } else 15742542f5fSchristos limit = KB(32); 15803b705cfSriastradh 15942542f5fSchristos return stride <= limit; 16003b705cfSriastradh} 16103b705cfSriastradh 16203b705cfSriastradhstatic size_t 16303b705cfSriastradhagp_aperture_size(struct pci_device *dev, int gen) 16403b705cfSriastradh{ 16503b705cfSriastradh return dev->regions[gen < 030 ? 0 : 2].size; 16603b705cfSriastradh} 16703b705cfSriastradh 16842542f5fSchristosvoid intel_set_gem_max_sizes(ScrnInfoPtr scrn) 16903b705cfSriastradh{ 17003b705cfSriastradh intel_screen_private *intel = intel_get_screen_private(scrn); 17113496ba1Ssnj size_t agp_size = agp_aperture_size(xf86GetPciInfoForEntity(intel->pEnt->index), 17203b705cfSriastradh INTEL_INFO(intel)->gen); 17303b705cfSriastradh 17403b705cfSriastradh /* The chances of being able to mmap an object larger than 17503b705cfSriastradh * agp_size/2 are slim. Moreover, we may be forced to fallback 17603b705cfSriastradh * using a gtt mapping as both the source and a mask, as well 17703b705cfSriastradh * as a destination and all need to fit into the aperture. 17803b705cfSriastradh */ 17903b705cfSriastradh intel->max_gtt_map_size = agp_size / 4; 18003b705cfSriastradh 18103b705cfSriastradh /* Let objects be tiled up to the size where only 4 would fit in 18203b705cfSriastradh * the aperture, presuming best case alignment. Also if we 18303b705cfSriastradh * cannot mmap it using the GTT we will be stuck. */ 18403b705cfSriastradh intel->max_tiling_size = intel->max_gtt_map_size; 18503b705cfSriastradh 18603b705cfSriastradh /* Large BOs will tend to hit SW fallbacks frequently, and also will 18703b705cfSriastradh * tend to fail to successfully map when doing SW fallbacks because we 18803b705cfSriastradh * overcommit address space for BO access, or worse cause aperture 18903b705cfSriastradh * thrashing. 19003b705cfSriastradh */ 19103b705cfSriastradh intel->max_bo_size = intel->max_gtt_map_size; 19203b705cfSriastradh} 19313496ba1Ssnj 19413496ba1Ssnjunsigned int 19513496ba1Ssnjintel_compute_size(struct intel_screen_private *intel, 19613496ba1Ssnj int w, int h, int bpp, unsigned usage, 19713496ba1Ssnj uint32_t *tiling, int *stride) 19813496ba1Ssnj{ 19913496ba1Ssnj int pitch, size; 20013496ba1Ssnj 20113496ba1Ssnj if (*tiling != I915_TILING_NONE) { 20213496ba1Ssnj /* First check whether tiling is necessary. */ 20313496ba1Ssnj pitch = (w * bpp + 7) / 8; 20413496ba1Ssnj pitch = ALIGN(pitch, 64); 20513496ba1Ssnj size = pitch * ALIGN (h, 2); 20613496ba1Ssnj if (INTEL_INFO(intel)->gen < 040) { 20713496ba1Ssnj /* Gen 2/3 has a maximum stride for tiling of 20813496ba1Ssnj * 8192 bytes. 20913496ba1Ssnj */ 21013496ba1Ssnj if (pitch > KB(8)) 21113496ba1Ssnj *tiling = I915_TILING_NONE; 21213496ba1Ssnj 21313496ba1Ssnj /* Narrower than half a tile? */ 21413496ba1Ssnj if (pitch < 256) 21513496ba1Ssnj *tiling = I915_TILING_NONE; 21613496ba1Ssnj 21713496ba1Ssnj /* Older hardware requires fences to be pot size 21813496ba1Ssnj * aligned with a minimum of 1 MiB, so causes 21913496ba1Ssnj * massive overallocation for small textures. 22013496ba1Ssnj */ 22113496ba1Ssnj if (size < 1024*1024/2 && !intel->has_relaxed_fencing) 22213496ba1Ssnj *tiling = I915_TILING_NONE; 22313496ba1Ssnj } else if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && size <= 4096) { 22413496ba1Ssnj /* Disable tiling beneath a page size, we will not see 22513496ba1Ssnj * any benefit from reducing TLB misses and instead 22613496ba1Ssnj * just incur extra cost when we require a fence. 22713496ba1Ssnj */ 22813496ba1Ssnj *tiling = I915_TILING_NONE; 22913496ba1Ssnj } 23013496ba1Ssnj } 23113496ba1Ssnj 23213496ba1Ssnj pitch = (w * bpp + 7) / 8; 23313496ba1Ssnj if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && pitch <= 256) 23413496ba1Ssnj *tiling = I915_TILING_NONE; 23513496ba1Ssnj 23613496ba1Ssnj if (*tiling != I915_TILING_NONE) { 23713496ba1Ssnj int aligned_h, tile_height; 23813496ba1Ssnj 23913496ba1Ssnj if (IS_GEN2(intel)) 24013496ba1Ssnj tile_height = 16; 24113496ba1Ssnj else if (*tiling == I915_TILING_X) 24213496ba1Ssnj tile_height = 8; 24313496ba1Ssnj else 24413496ba1Ssnj tile_height = 32; 24513496ba1Ssnj aligned_h = ALIGN(h, tile_height); 24613496ba1Ssnj 24713496ba1Ssnj *stride = intel_get_fence_pitch(intel, 24813496ba1Ssnj ALIGN(pitch, 512), 24913496ba1Ssnj *tiling); 25013496ba1Ssnj 25113496ba1Ssnj /* Round the object up to the size of the fence it will live in 25213496ba1Ssnj * if necessary. We could potentially make the kernel allocate 25313496ba1Ssnj * a larger aperture space and just bind the subset of pages in, 25413496ba1Ssnj * but this is easier and also keeps us out of trouble (as much) 25513496ba1Ssnj * with drm_intel_bufmgr_check_aperture(). 25613496ba1Ssnj */ 25713496ba1Ssnj size = intel_get_fence_size(intel, *stride * aligned_h); 25813496ba1Ssnj 25913496ba1Ssnj if (size > intel->max_tiling_size) 26013496ba1Ssnj *tiling = I915_TILING_NONE; 26113496ba1Ssnj } 26213496ba1Ssnj 26313496ba1Ssnj if (*tiling == I915_TILING_NONE) { 26413496ba1Ssnj /* We only require a 64 byte alignment for scanouts, but 26513496ba1Ssnj * a 256 byte alignment for sharing with PRIME. 26613496ba1Ssnj */ 26713496ba1Ssnj *stride = ALIGN(pitch, 256); 26813496ba1Ssnj /* Round the height up so that the GPU's access to a 2x2 aligned 26913496ba1Ssnj * subspan doesn't address an invalid page offset beyond the 27013496ba1Ssnj * end of the GTT. 27113496ba1Ssnj */ 27213496ba1Ssnj size = *stride * ALIGN(h, 2); 27313496ba1Ssnj } 27413496ba1Ssnj 27513496ba1Ssnj return size; 27613496ba1Ssnj} 27713496ba1Ssnj 27813496ba1Ssnjdrm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn, 27913496ba1Ssnj int width, int height, int cpp, 28013496ba1Ssnj int *out_stride, 28113496ba1Ssnj uint32_t *out_tiling) 28213496ba1Ssnj{ 28313496ba1Ssnj intel_screen_private *intel = intel_get_screen_private(scrn); 28413496ba1Ssnj uint32_t tiling; 28513496ba1Ssnj int stride, size; 28613496ba1Ssnj drm_intel_bo *bo; 28713496ba1Ssnj 28813496ba1Ssnj intel_set_gem_max_sizes(scrn); 28913496ba1Ssnj 29013496ba1Ssnj if (intel->tiling & INTEL_TILING_FB) 29113496ba1Ssnj tiling = I915_TILING_X; 29213496ba1Ssnj else 29313496ba1Ssnj tiling = I915_TILING_NONE; 29413496ba1Ssnj 29513496ba1Ssnjretry: 29613496ba1Ssnj size = intel_compute_size(intel, 29713496ba1Ssnj width, height, 29813496ba1Ssnj intel->cpp*8, 0, 29913496ba1Ssnj &tiling, &stride); 30013496ba1Ssnj if (!intel_check_display_stride(scrn, stride, tiling)) { 30113496ba1Ssnj if (tiling != I915_TILING_NONE) { 30213496ba1Ssnj tiling = I915_TILING_NONE; 30313496ba1Ssnj goto retry; 30413496ba1Ssnj } 30513496ba1Ssnj 30613496ba1Ssnj xf86DrvMsg(scrn->scrnIndex, X_ERROR, 30713496ba1Ssnj "Front buffer stride %d kB " 30813496ba1Ssnj "exceeds display limit\n", stride / 1024); 30913496ba1Ssnj return NULL; 31013496ba1Ssnj } 31113496ba1Ssnj 31213496ba1Ssnj bo = drm_intel_bo_alloc(intel->bufmgr, "front buffer", size, 0); 31313496ba1Ssnj if (bo == NULL) 31413496ba1Ssnj return FALSE; 31513496ba1Ssnj 31613496ba1Ssnj if (tiling != I915_TILING_NONE) 31713496ba1Ssnj drm_intel_bo_set_tiling(bo, &tiling, stride); 31813496ba1Ssnj 31913496ba1Ssnj xf86DrvMsg(scrn->scrnIndex, X_INFO, 32013496ba1Ssnj "Allocated new frame buffer %dx%d stride %d, %s\n", 32113496ba1Ssnj width, height, stride, 32213496ba1Ssnj tiling == I915_TILING_NONE ? "untiled" : "tiled"); 32313496ba1Ssnj 32413496ba1Ssnj drm_intel_bo_disable_reuse(bo); 32513496ba1Ssnj 32613496ba1Ssnj *out_stride = stride; 32713496ba1Ssnj *out_tiling = tiling; 32813496ba1Ssnj return bo; 32913496ba1Ssnj} 33013496ba1Ssnj 331