src/uxa/intel_memory.c

03b705cfSriastradh/**************************************************************************
03b705cfSriastradh
03b705cfSriastradhCopyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
03b705cfSriastradhCopyright © 2002 by David Dawes.
03b705cfSriastradh
03b705cfSriastradhAll Rights Reserved.
03b705cfSriastradh
03b705cfSriastradhPermission is hereby granted, free of charge, to any person obtaining a
03b705cfSriastradhcopy of this software and associated documentation files (the
03b705cfSriastradh"Software"), to deal in the Software without restriction, including
03b705cfSriastradhwithout limitation the rights to use, copy, modify, merge, publish,
03b705cfSriastradhdistribute, sub license, and/or sell copies of the Software, and to
03b705cfSriastradhpermit persons to whom the Software is furnished to do so, subject to
03b705cfSriastradhthe following conditions:
03b705cfSriastradh
03b705cfSriastradhThe above copyright notice and this permission notice (including the
03b705cfSriastradhnext paragraph) shall be included in all copies or substantial portions
03b705cfSriastradhof the Software.
03b705cfSriastradh
03b705cfSriastradhTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
03b705cfSriastradhOR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
03b705cfSriastradhMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
03b705cfSriastradhIN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR
03b705cfSriastradhANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
03b705cfSriastradhTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
03b705cfSriastradhSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
03b705cfSriastradh
03b705cfSriastradh**************************************************************************/
03b705cfSriastradh
03b705cfSriastradh/*
03b705cfSriastradh * Authors:
03b705cfSriastradh *   Keith Whitwell <keith@tungstengraphics.com>
03b705cfSriastradh *   David Dawes <dawes@xfree86.org>
03b705cfSriastradh *
03b705cfSriastradh * Updated for Dual Head capabilities:
03b705cfSriastradh *   Alan Hourihane <alanh@tungstengraphics.com>
03b705cfSriastradh */
03b705cfSriastradh
03b705cfSriastradh/**
03b705cfSriastradh * @file intel_memory.c
03b705cfSriastradh *
03b705cfSriastradh * This is the video memory allocator.  Our memory allocation is different from
03b705cfSriastradh * other graphics chips, where you have a fixed amount of graphics memory
03b705cfSriastradh * available that you want to put to the best use.  Instead, we have almost no
fe8aea9eSmrg * memory pre-allocated, and we have to choose an appropriate amount of system
03b705cfSriastradh * memory to use.
03b705cfSriastradh *
03b705cfSriastradh * The allocations we might do:
03b705cfSriastradh *
03b705cfSriastradh * - Ring buffer
03b705cfSriastradh * - HW cursor block (either one block or four)
03b705cfSriastradh * - Overlay registers
03b705cfSriastradh * - Front buffer (screen 1)
03b705cfSriastradh * - Front buffer (screen 2, only in zaphod mode)
03b705cfSriastradh * - Back/depth buffer (3D only)
03b705cfSriastradh * - Compatibility texture pool (optional, more is always better)
03b705cfSriastradh * - New texture pool (optional, more is always better.  aperture allocation
03b705cfSriastradh *     only)
03b705cfSriastradh *
03b705cfSriastradh * The user may request a specific amount of memory to be used
03b705cfSriastradh * (intel->pEnt->videoRam != 0), in which case allocations have to fit within
03b705cfSriastradh * that much aperture.  If not, the individual allocations will be
03b705cfSriastradh * automatically sized, and will be fit within the maximum aperture size.
03b705cfSriastradh * Only the actual memory used (not alignment padding) will get actual AGP
03b705cfSriastradh * memory allocated.
03b705cfSriastradh *
03b705cfSriastradh * Given that the allocations listed are generally a page or more than a page,
03b705cfSriastradh * our allocator will only return page-aligned offsets, simplifying the memory
03b705cfSriastradh * binding process.  For smaller allocations, the acceleration architecture's
03b705cfSriastradh * linear allocator is preferred.
03b705cfSriastradh */
03b705cfSriastradh
03b705cfSriastradh#ifdef HAVE_CONFIG_H
03b705cfSriastradh#include "config.h"
03b705cfSriastradh#endif
03b705cfSriastradh
03b705cfSriastradh#include <assert.h>
03b705cfSriastradh#include <inttypes.h>
03b705cfSriastradh#include <string.h>
03b705cfSriastradh#include <sys/types.h>
03b705cfSriastradh#include <sys/ioctl.h>
03b705cfSriastradh
42542f5fSchristos#include "xorg-server.h"
03b705cfSriastradh#include "xf86.h"
03b705cfSriastradh#include "xf86_OSproc.h"
03b705cfSriastradh
03b705cfSriastradh#include "intel.h"
03b705cfSriastradh#include "i915_drm.h"
03b705cfSriastradh
03b705cfSriastradh/**
03b705cfSriastradh * Returns the fence size for a tiled area of the given size.
03b705cfSriastradh */
03b705cfSriastradhunsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long size)
03b705cfSriastradh{
03b705cfSriastradh	unsigned long i;
03b705cfSriastradh	unsigned long start;
03b705cfSriastradh
03b705cfSriastradh	if (INTEL_INFO(intel)->gen >= 040 || intel->has_relaxed_fencing) {
03b705cfSriastradh		/* The 965 can have fences at any page boundary. */
03b705cfSriastradh		return ALIGN(size, 4096);
03b705cfSriastradh	} else {
03b705cfSriastradh		/* Align the size to a power of two greater than the smallest fence
03b705cfSriastradh		 * size.
03b705cfSriastradh		 */
03b705cfSriastradh		if (IS_GEN3(intel))
03b705cfSriastradh			start = MB(1);
03b705cfSriastradh		else
03b705cfSriastradh			start = KB(512);
03b705cfSriastradh
03b705cfSriastradh		for (i = start; i < size; i <<= 1) ;
03b705cfSriastradh
03b705cfSriastradh		return i;
03b705cfSriastradh	}
03b705cfSriastradh}
03b705cfSriastradh
03b705cfSriastradh/**
03b705cfSriastradh * On some chips, pitch width has to be a power of two tile width, so
03b705cfSriastradh * calculate that here.
03b705cfSriastradh */
03b705cfSriastradhunsigned long
03b705cfSriastradhintel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch,
03b705cfSriastradh		     uint32_t tiling_mode)
03b705cfSriastradh{
03b705cfSriastradh	unsigned long i;
03b705cfSriastradh	unsigned long tile_width = (tiling_mode == I915_TILING_Y) ? 128 : 512;
03b705cfSriastradh
03b705cfSriastradh	if (tiling_mode == I915_TILING_NONE)
03b705cfSriastradh		return pitch;
03b705cfSriastradh
03b705cfSriastradh	/* 965+ is flexible */
03b705cfSriastradh	if (INTEL_INFO(intel)->gen >= 040)
03b705cfSriastradh		return ALIGN(pitch, tile_width);
03b705cfSriastradh
03b705cfSriastradh	/* Pre-965 needs power of two tile width */
03b705cfSriastradh	for (i = tile_width; i < pitch; i <<= 1) ;
03b705cfSriastradh
03b705cfSriastradh	return i;
03b705cfSriastradh}
03b705cfSriastradh
42542f5fSchristosBool intel_check_display_stride(ScrnInfoPtr scrn, int stride, Bool tiling)
03b705cfSriastradh{
03b705cfSriastradh	intel_screen_private *intel = intel_get_screen_private(scrn);
42542f5fSchristos	int limit;
03b705cfSriastradh
03b705cfSriastradh	/* 8xx spec has always 8K limit, but tests show larger limit in
03b705cfSriastradh	   non-tiling mode, which makes large monitor work. */
03b705cfSriastradh	if (tiling) {
03b705cfSriastradh		if (IS_GEN2(intel))
03b705cfSriastradh			limit = KB(8);
03b705cfSriastradh		else if (IS_GEN3(intel))
03b705cfSriastradh			limit = KB(8);
03b705cfSriastradh		else if (IS_GEN4(intel))
03b705cfSriastradh			limit = KB(16);
03b705cfSriastradh		else
03b705cfSriastradh			limit = KB(32);
42542f5fSchristos	} else
42542f5fSchristos		limit = KB(32);
03b705cfSriastradh
42542f5fSchristos	return stride <= limit;
03b705cfSriastradh}
03b705cfSriastradh
03b705cfSriastradhstatic size_t
03b705cfSriastradhagp_aperture_size(struct pci_device *dev, int gen)
03b705cfSriastradh{
03b705cfSriastradh	return dev->regions[gen < 030 ? 0 : 2].size;
03b705cfSriastradh}
03b705cfSriastradh
42542f5fSchristosvoid intel_set_gem_max_sizes(ScrnInfoPtr scrn)
03b705cfSriastradh{
03b705cfSriastradh	intel_screen_private *intel = intel_get_screen_private(scrn);
13496ba1Ssnj	size_t agp_size = agp_aperture_size(xf86GetPciInfoForEntity(intel->pEnt->index),
03b705cfSriastradh					    INTEL_INFO(intel)->gen);
03b705cfSriastradh
03b705cfSriastradh	/* The chances of being able to mmap an object larger than
03b705cfSriastradh	 * agp_size/2 are slim. Moreover, we may be forced to fallback
03b705cfSriastradh	 * using a gtt mapping as both the source and a mask, as well
03b705cfSriastradh	 * as a destination and all need to fit into the aperture.
03b705cfSriastradh	 */
03b705cfSriastradh	intel->max_gtt_map_size = agp_size / 4;
03b705cfSriastradh
03b705cfSriastradh	/* Let objects be tiled up to the size where only 4 would fit in
03b705cfSriastradh	 * the aperture, presuming best case alignment. Also if we
03b705cfSriastradh	 * cannot mmap it using the GTT we will be stuck. */
03b705cfSriastradh	intel->max_tiling_size = intel->max_gtt_map_size;
03b705cfSriastradh
03b705cfSriastradh	/* Large BOs will tend to hit SW fallbacks frequently, and also will
03b705cfSriastradh	 * tend to fail to successfully map when doing SW fallbacks because we
03b705cfSriastradh	 * overcommit address space for BO access, or worse cause aperture
03b705cfSriastradh	 * thrashing.
03b705cfSriastradh	 */
03b705cfSriastradh	intel->max_bo_size = intel->max_gtt_map_size;
03b705cfSriastradh}
13496ba1Ssnj
13496ba1Ssnjunsigned int
13496ba1Ssnjintel_compute_size(struct intel_screen_private *intel,
13496ba1Ssnj                   int w, int h, int bpp, unsigned usage,
13496ba1Ssnj                   uint32_t *tiling, int *stride)
13496ba1Ssnj{
13496ba1Ssnj	int pitch, size;
13496ba1Ssnj
13496ba1Ssnj	if (*tiling != I915_TILING_NONE) {
13496ba1Ssnj		/* First check whether tiling is necessary. */
13496ba1Ssnj		pitch = (w * bpp  + 7) / 8;
13496ba1Ssnj		pitch = ALIGN(pitch, 64);
13496ba1Ssnj		size = pitch * ALIGN (h, 2);
13496ba1Ssnj		if (INTEL_INFO(intel)->gen < 040) {
13496ba1Ssnj			/* Gen 2/3 has a maximum stride for tiling of
13496ba1Ssnj			 * 8192 bytes.
13496ba1Ssnj			 */
13496ba1Ssnj			if (pitch > KB(8))
13496ba1Ssnj				*tiling = I915_TILING_NONE;
13496ba1Ssnj
13496ba1Ssnj			/* Narrower than half a tile? */
13496ba1Ssnj			if (pitch < 256)
13496ba1Ssnj				*tiling = I915_TILING_NONE;
13496ba1Ssnj
13496ba1Ssnj			/* Older hardware requires fences to be pot size
13496ba1Ssnj			 * aligned with a minimum of 1 MiB, so causes
13496ba1Ssnj			 * massive overallocation for small textures.
13496ba1Ssnj			 */
13496ba1Ssnj			if (size < 1024*1024/2 && !intel->has_relaxed_fencing)
13496ba1Ssnj				*tiling = I915_TILING_NONE;
13496ba1Ssnj		} else if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && size <= 4096) {
13496ba1Ssnj			/* Disable tiling beneath a page size, we will not see
13496ba1Ssnj			 * any benefit from reducing TLB misses and instead
13496ba1Ssnj			 * just incur extra cost when we require a fence.
13496ba1Ssnj			 */
13496ba1Ssnj			*tiling = I915_TILING_NONE;
13496ba1Ssnj		}
13496ba1Ssnj	}
13496ba1Ssnj
13496ba1Ssnj	pitch = (w * bpp + 7) / 8;
13496ba1Ssnj	if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && pitch <= 256)
13496ba1Ssnj		*tiling = I915_TILING_NONE;
13496ba1Ssnj
13496ba1Ssnj	if (*tiling != I915_TILING_NONE) {
13496ba1Ssnj		int aligned_h, tile_height;
13496ba1Ssnj
13496ba1Ssnj		if (IS_GEN2(intel))
13496ba1Ssnj			tile_height = 16;
13496ba1Ssnj		else if (*tiling == I915_TILING_X)
13496ba1Ssnj			tile_height = 8;
13496ba1Ssnj		else
13496ba1Ssnj			tile_height = 32;
13496ba1Ssnj		aligned_h = ALIGN(h, tile_height);
13496ba1Ssnj
13496ba1Ssnj		*stride = intel_get_fence_pitch(intel,
13496ba1Ssnj						ALIGN(pitch, 512),
13496ba1Ssnj						*tiling);
13496ba1Ssnj
13496ba1Ssnj		/* Round the object up to the size of the fence it will live in
13496ba1Ssnj		 * if necessary.  We could potentially make the kernel allocate
13496ba1Ssnj		 * a larger aperture space and just bind the subset of pages in,
13496ba1Ssnj		 * but this is easier and also keeps us out of trouble (as much)
13496ba1Ssnj		 * with drm_intel_bufmgr_check_aperture().
13496ba1Ssnj		 */
13496ba1Ssnj		size = intel_get_fence_size(intel, *stride * aligned_h);
13496ba1Ssnj
13496ba1Ssnj		if (size > intel->max_tiling_size)
13496ba1Ssnj			*tiling = I915_TILING_NONE;
13496ba1Ssnj	}
13496ba1Ssnj
13496ba1Ssnj	if (*tiling == I915_TILING_NONE) {
13496ba1Ssnj		/* We only require a 64 byte alignment for scanouts, but
13496ba1Ssnj		 * a 256 byte alignment for sharing with PRIME.
13496ba1Ssnj		 */
13496ba1Ssnj		*stride = ALIGN(pitch, 256);
13496ba1Ssnj		/* Round the height up so that the GPU's access to a 2x2 aligned
13496ba1Ssnj		 * subspan doesn't address an invalid page offset beyond the
13496ba1Ssnj		 * end of the GTT.
13496ba1Ssnj		 */
13496ba1Ssnj		size = *stride * ALIGN(h, 2);
13496ba1Ssnj	}
13496ba1Ssnj
13496ba1Ssnj	return size;
13496ba1Ssnj}
13496ba1Ssnj
13496ba1Ssnjdrm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn,
13496ba1Ssnj					 int width, int height, int cpp,
13496ba1Ssnj					 int *out_stride,
13496ba1Ssnj					 uint32_t *out_tiling)
13496ba1Ssnj{
13496ba1Ssnj	intel_screen_private *intel = intel_get_screen_private(scrn);
13496ba1Ssnj	uint32_t tiling;
13496ba1Ssnj	int stride, size;
13496ba1Ssnj	drm_intel_bo *bo;
13496ba1Ssnj
13496ba1Ssnj	intel_set_gem_max_sizes(scrn);
13496ba1Ssnj
13496ba1Ssnj	if (intel->tiling & INTEL_TILING_FB)
13496ba1Ssnj		tiling = I915_TILING_X;
13496ba1Ssnj	else
13496ba1Ssnj		tiling = I915_TILING_NONE;
13496ba1Ssnj
13496ba1Ssnjretry:
13496ba1Ssnj	size = intel_compute_size(intel,
13496ba1Ssnj                                  width, height,
13496ba1Ssnj                                  intel->cpp*8, 0,
13496ba1Ssnj                                  &tiling, &stride);
13496ba1Ssnj	if (!intel_check_display_stride(scrn, stride, tiling)) {
13496ba1Ssnj		if (tiling != I915_TILING_NONE) {
13496ba1Ssnj			tiling = I915_TILING_NONE;
13496ba1Ssnj			goto retry;
13496ba1Ssnj		}
13496ba1Ssnj
13496ba1Ssnj		xf86DrvMsg(scrn->scrnIndex, X_ERROR,
13496ba1Ssnj			   "Front buffer stride %d kB "
13496ba1Ssnj			   "exceeds display limit\n", stride / 1024);
13496ba1Ssnj		return NULL;
13496ba1Ssnj	}
13496ba1Ssnj
13496ba1Ssnj	bo = drm_intel_bo_alloc(intel->bufmgr, "front buffer", size, 0);
13496ba1Ssnj	if (bo == NULL)
13496ba1Ssnj		return FALSE;
13496ba1Ssnj
13496ba1Ssnj	if (tiling != I915_TILING_NONE)
13496ba1Ssnj		drm_intel_bo_set_tiling(bo, &tiling, stride);
13496ba1Ssnj
13496ba1Ssnj	xf86DrvMsg(scrn->scrnIndex, X_INFO,
13496ba1Ssnj		   "Allocated new frame buffer %dx%d stride %d, %s\n",
13496ba1Ssnj		   width, height, stride,
13496ba1Ssnj		   tiling == I915_TILING_NONE ? "untiled" : "tiled");
13496ba1Ssnj
13496ba1Ssnj	drm_intel_bo_disable_reuse(bo);
13496ba1Ssnj
13496ba1Ssnj	*out_stride = stride;
13496ba1Ssnj	*out_tiling = tiling;
13496ba1Ssnj	return bo;
13496ba1Ssnj}
13496ba1Ssnj