Home | History | Annotate | Line # | Download | only in gem
      1 /*	$NetBSD: i915_gem_tiling.c,v 1.3 2021/12/19 11:33:49 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2008 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: i915_gem_tiling.c,v 1.3 2021/12/19 11:33:49 riastradh Exp $");
     11 
     12 #include <linux/bitmap.h>
     13 #include <linux/string.h>
     14 #include <linux/bitops.h>
     15 #include <drm/i915_drm.h>
     16 
     17 #include "i915_drv.h"
     18 #include "i915_gem.h"
     19 #include "i915_gem_ioctls.h"
     20 #include "i915_gem_mman.h"
     21 #include "i915_gem_object.h"
     22 
     23 /**
     24  * DOC: buffer object tiling
     25  *
     26  * i915_gem_set_tiling_ioctl() and i915_gem_get_tiling_ioctl() is the userspace
     27  * interface to declare fence register requirements.
     28  *
     29  * In principle GEM doesn't care at all about the internal data layout of an
     30  * object, and hence it also doesn't care about tiling or swizzling. There's two
     31  * exceptions:
     32  *
     33  * - For X and Y tiling the hardware provides detilers for CPU access, so called
     34  *   fences. Since there's only a limited amount of them the kernel must manage
     35  *   these, and therefore userspace must tell the kernel the object tiling if it
     36  *   wants to use fences for detiling.
     37  * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which
     38  *   depends upon the physical page frame number. When swapping such objects the
     39  *   page frame number might change and the kernel must be able to fix this up
     40  *   and hence now the tiling. Note that on a subset of platforms with
     41  *   asymmetric memory channel population the swizzling pattern changes in an
     42  *   unknown way, and for those the kernel simply forbids swapping completely.
     43  *
     44  * Since neither of this applies for new tiling layouts on modern platforms like
     45  * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled.
     46  * Anything else can be handled in userspace entirely without the kernel's
     47  * invovlement.
     48  */
     49 
     50 /**
     51  * i915_gem_fence_size - required global GTT size for a fence
     52  * @i915: i915 device
     53  * @size: object size
     54  * @tiling: tiling mode
     55  * @stride: tiling stride
     56  *
     57  * Return the required global GTT size for a fence (view of a tiled object),
     58  * taking into account potential fence register mapping.
     59  */
     60 u32 i915_gem_fence_size(struct drm_i915_private *i915,
     61 			u32 size, unsigned int tiling, unsigned int stride)
     62 {
     63 	u32 ggtt_size;
     64 
     65 	GEM_BUG_ON(!size);
     66 
     67 	if (tiling == I915_TILING_NONE)
     68 		return size;
     69 
     70 	GEM_BUG_ON(!stride);
     71 
     72 	if (INTEL_GEN(i915) >= 4) {
     73 		stride *= i915_gem_tile_height(tiling);
     74 		GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE));
     75 		return roundup(size, stride);
     76 	}
     77 
     78 	/* Previous chips need a power-of-two fence region when tiling */
     79 	if (IS_GEN(i915, 3))
     80 		ggtt_size = 1024*1024;
     81 	else
     82 		ggtt_size = 512*1024;
     83 
     84 	while (ggtt_size < size)
     85 		ggtt_size <<= 1;
     86 
     87 	return ggtt_size;
     88 }
     89 
     90 /**
     91  * i915_gem_fence_alignment - required global GTT alignment for a fence
     92  * @i915: i915 device
     93  * @size: object size
     94  * @tiling: tiling mode
     95  * @stride: tiling stride
     96  *
     97  * Return the required global GTT alignment for a fence (a view of a tiled
     98  * object), taking into account potential fence register mapping.
     99  */
    100 u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size,
    101 			     unsigned int tiling, unsigned int stride)
    102 {
    103 	GEM_BUG_ON(!size);
    104 
    105 	/*
    106 	 * Minimum alignment is 4k (GTT page size), but might be greater
    107 	 * if a fence register is needed for the object.
    108 	 */
    109 	if (tiling == I915_TILING_NONE)
    110 		return I915_GTT_MIN_ALIGNMENT;
    111 
    112 	if (INTEL_GEN(i915) >= 4)
    113 		return I965_FENCE_PAGE;
    114 
    115 	/*
    116 	 * Previous chips need to be aligned to the size of the smallest
    117 	 * fence register that can contain the object.
    118 	 */
    119 	return i915_gem_fence_size(i915, size, tiling, stride);
    120 }
    121 
    122 /* Check pitch constriants for all chips & tiling formats */
    123 static bool
    124 i915_tiling_ok(struct drm_i915_gem_object *obj,
    125 	       unsigned int tiling, unsigned int stride)
    126 {
    127 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
    128 	unsigned int tile_width;
    129 
    130 	/* Linear is always fine */
    131 	if (tiling == I915_TILING_NONE)
    132 		return true;
    133 
    134 	if (tiling > I915_TILING_LAST)
    135 		return false;
    136 
    137 	/* check maximum stride & object size */
    138 	/* i965+ stores the end address of the gtt mapping in the fence
    139 	 * reg, so dont bother to check the size */
    140 	if (INTEL_GEN(i915) >= 7) {
    141 		if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL)
    142 			return false;
    143 	} else if (INTEL_GEN(i915) >= 4) {
    144 		if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
    145 			return false;
    146 	} else {
    147 		if (stride > 8192)
    148 			return false;
    149 
    150 		if (!is_power_of_2(stride))
    151 			return false;
    152 	}
    153 
    154 	if (IS_GEN(i915, 2) ||
    155 	    (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915)))
    156 		tile_width = 128;
    157 	else
    158 		tile_width = 512;
    159 
    160 	if (!stride || !IS_ALIGNED(stride, tile_width))
    161 		return false;
    162 
    163 	return true;
    164 }
    165 
    166 static bool i915_vma_fence_prepare(struct i915_vma *vma,
    167 				   int tiling_mode, unsigned int stride)
    168 {
    169 	struct drm_i915_private *i915 = vma->vm->i915;
    170 	u32 size, alignment;
    171 
    172 	if (!i915_vma_is_map_and_fenceable(vma))
    173 		return true;
    174 
    175 	size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride);
    176 	if (vma->node.size < size)
    177 		return false;
    178 
    179 	alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride);
    180 	if (!IS_ALIGNED(vma->node.start, alignment))
    181 		return false;
    182 
    183 	return true;
    184 }
    185 
    186 /* Make the current GTT allocation valid for the change in tiling. */
    187 static int
    188 i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
    189 			      int tiling_mode, unsigned int stride)
    190 {
    191 	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
    192 	struct i915_vma *vma;
    193 	int ret = 0;
    194 
    195 	if (tiling_mode == I915_TILING_NONE)
    196 		return 0;
    197 
    198 	mutex_lock(&ggtt->vm.mutex);
    199 	for_each_ggtt_vma(vma, obj) {
    200 		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
    201 			continue;
    202 
    203 		ret = __i915_vma_unbind(vma);
    204 		if (ret)
    205 			break;
    206 	}
    207 	mutex_unlock(&ggtt->vm.mutex);
    208 
    209 	return ret;
    210 }
    211 
    212 int
    213 i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
    214 			   unsigned int tiling, unsigned int stride)
    215 {
    216 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
    217 	struct i915_vma *vma;
    218 	int err;
    219 
    220 	/* Make sure we don't cross-contaminate obj->tiling_and_stride */
    221 	BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK);
    222 
    223 	GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
    224 	GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
    225 
    226 	if ((tiling | stride) == obj->tiling_and_stride)
    227 		return 0;
    228 
    229 	if (i915_gem_object_is_framebuffer(obj))
    230 		return -EBUSY;
    231 
    232 	/* We need to rebind the object if its current allocation
    233 	 * no longer meets the alignment restrictions for its new
    234 	 * tiling mode. Otherwise we can just leave it alone, but
    235 	 * need to ensure that any fence register is updated before
    236 	 * the next fenced (either through the GTT or by the BLT unit
    237 	 * on older GPUs) access.
    238 	 *
    239 	 * After updating the tiling parameters, we then flag whether
    240 	 * we need to update an associated fence register. Note this
    241 	 * has to also include the unfenced register the GPU uses
    242 	 * whilst executing a fenced command for an untiled object.
    243 	 */
    244 
    245 	i915_gem_object_lock(obj);
    246 	if (i915_gem_object_is_framebuffer(obj)) {
    247 		i915_gem_object_unlock(obj);
    248 		return -EBUSY;
    249 	}
    250 
    251 	err = i915_gem_object_fence_prepare(obj, tiling, stride);
    252 	if (err) {
    253 		i915_gem_object_unlock(obj);
    254 		return err;
    255 	}
    256 
    257 	/* If the memory has unknown (i.e. varying) swizzling, we pin the
    258 	 * pages to prevent them being swapped out and causing corruption
    259 	 * due to the change in swizzling.
    260 	 */
    261 	mutex_lock(&obj->mm.lock);
    262 	if (i915_gem_object_has_pages(obj) &&
    263 	    obj->mm.madv == I915_MADV_WILLNEED &&
    264 	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
    265 		if (tiling == I915_TILING_NONE) {
    266 			GEM_BUG_ON(!obj->mm.quirked);
    267 			__i915_gem_object_unpin_pages(obj);
    268 			obj->mm.quirked = false;
    269 		}
    270 		if (!i915_gem_object_is_tiled(obj)) {
    271 			GEM_BUG_ON(obj->mm.quirked);
    272 			__i915_gem_object_pin_pages(obj);
    273 			obj->mm.quirked = true;
    274 		}
    275 	}
    276 	mutex_unlock(&obj->mm.lock);
    277 
    278 	for_each_ggtt_vma(vma, obj) {
    279 		vma->fence_size =
    280 			i915_gem_fence_size(i915, vma->size, tiling, stride);
    281 		vma->fence_alignment =
    282 			i915_gem_fence_alignment(i915,
    283 						 vma->size, tiling, stride);
    284 
    285 		if (vma->fence)
    286 			vma->fence->dirty = true;
    287 	}
    288 
    289 	obj->tiling_and_stride = tiling | stride;
    290 	i915_gem_object_unlock(obj);
    291 
    292 	/* Force the fence to be reacquired for GTT access */
    293 	i915_gem_object_release_mmap(obj);
    294 
    295 	/* Try to preallocate memory required to save swizzling on put-pages */
    296 	if (i915_gem_object_needs_bit17_swizzle(obj)) {
    297 		if (!obj->bit_17) {
    298 			obj->bit_17 = bitmap_zalloc(obj->base.size >> PAGE_SHIFT,
    299 						    GFP_KERNEL);
    300 		}
    301 	} else {
    302 		bitmap_free(obj->bit_17);
    303 		obj->bit_17 = NULL;
    304 	}
    305 
    306 	return 0;
    307 }
    308 
    309 /**
    310  * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode
    311  * @dev: DRM device
    312  * @data: data pointer for the ioctl
    313  * @file: DRM file for the ioctl call
    314  *
    315  * Sets the tiling mode of an object, returning the required swizzling of
    316  * bit 6 of addresses in the object.
    317  *
    318  * Called by the user via ioctl.
    319  *
    320  * Returns:
    321  * Zero on success, negative errno on failure.
    322  */
    323 int
    324 i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
    325 			  struct drm_file *file)
    326 {
    327 	struct drm_i915_private *dev_priv = to_i915(dev);
    328 	struct drm_i915_gem_set_tiling *args = data;
    329 	struct drm_i915_gem_object *obj;
    330 	int err;
    331 
    332 	if (!dev_priv->ggtt.num_fences)
    333 		return -EOPNOTSUPP;
    334 
    335 	obj = i915_gem_object_lookup(file, args->handle);
    336 	if (!obj)
    337 		return -ENOENT;
    338 
    339 	/*
    340 	 * The tiling mode of proxy objects is handled by its generator, and
    341 	 * not allowed to be changed by userspace.
    342 	 */
    343 	if (i915_gem_object_is_proxy(obj)) {
    344 		err = -ENXIO;
    345 		goto err;
    346 	}
    347 
    348 	if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) {
    349 		err = -EINVAL;
    350 		goto err;
    351 	}
    352 
    353 	if (args->tiling_mode == I915_TILING_NONE) {
    354 		args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
    355 		args->stride = 0;
    356 	} else {
    357 		if (args->tiling_mode == I915_TILING_X)
    358 			args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x;
    359 		else
    360 			args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y;
    361 
    362 		/* Hide bit 17 swizzling from the user.  This prevents old Mesa
    363 		 * from aborting the application on sw fallbacks to bit 17,
    364 		 * and we use the pread/pwrite bit17 paths to swizzle for it.
    365 		 * If there was a user that was relying on the swizzle
    366 		 * information for drm_intel_bo_map()ed reads/writes this would
    367 		 * break it, but we don't have any of those.
    368 		 */
    369 		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
    370 			args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
    371 		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
    372 			args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
    373 
    374 		/* If we can't handle the swizzling, make it untiled. */
    375 		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
    376 			args->tiling_mode = I915_TILING_NONE;
    377 			args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
    378 			args->stride = 0;
    379 		}
    380 	}
    381 
    382 	err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
    383 
    384 	/* We have to maintain this existing ABI... */
    385 	args->stride = i915_gem_object_get_stride(obj);
    386 	args->tiling_mode = i915_gem_object_get_tiling(obj);
    387 
    388 err:
    389 	i915_gem_object_put(obj);
    390 	return err;
    391 }
    392 
    393 /**
    394  * i915_gem_get_tiling_ioctl - IOCTL handler to get tiling mode
    395  * @dev: DRM device
    396  * @data: data pointer for the ioctl
    397  * @file: DRM file for the ioctl call
    398  *
    399  * Returns the current tiling mode and required bit 6 swizzling for the object.
    400  *
    401  * Called by the user via ioctl.
    402  *
    403  * Returns:
    404  * Zero on success, negative errno on failure.
    405  */
    406 int
    407 i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
    408 			  struct drm_file *file)
    409 {
    410 	struct drm_i915_gem_get_tiling *args = data;
    411 	struct drm_i915_private *dev_priv = to_i915(dev);
    412 	struct drm_i915_gem_object *obj;
    413 	int err = -ENOENT;
    414 
    415 	if (!dev_priv->ggtt.num_fences)
    416 		return -EOPNOTSUPP;
    417 
    418 	rcu_read_lock();
    419 	obj = i915_gem_object_lookup_rcu(file, args->handle);
    420 	if (obj) {
    421 		args->tiling_mode =
    422 			READ_ONCE(obj->tiling_and_stride) & TILING_MASK;
    423 		err = 0;
    424 	}
    425 	rcu_read_unlock();
    426 	if (unlikely(err))
    427 		return err;
    428 
    429 	switch (args->tiling_mode) {
    430 	case I915_TILING_X:
    431 		args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x;
    432 		break;
    433 	case I915_TILING_Y:
    434 		args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y;
    435 		break;
    436 	default:
    437 	case I915_TILING_NONE:
    438 		args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
    439 		break;
    440 	}
    441 
    442 	/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
    443 	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
    444 		args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN;
    445 	else
    446 		args->phys_swizzle_mode = args->swizzle_mode;
    447 	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
    448 		args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
    449 	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
    450 		args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
    451 
    452 	return 0;
    453 }
    454