Home | History | Annotate | Line # | Download | only in i915
      1 /*	$NetBSD: i915_gem_fence_reg.c,v 1.6 2021/12/19 11:33:49 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright  2008-2015 Intel Corporation
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  */
     25 
     26 #include <sys/cdefs.h>
     27 __KERNEL_RCSID(0, "$NetBSD: i915_gem_fence_reg.c,v 1.6 2021/12/19 11:33:49 riastradh Exp $");
     28 
     29 #include <linux/bitmap.h>
     30 #include <drm/i915_drm.h>
     31 
     32 #include "i915_drv.h"
     33 #include "i915_scatterlist.h"
     34 #include "i915_vgpu.h"
     35 
     36 /**
     37  * DOC: fence register handling
     38  *
     39  * Important to avoid confusions: "fences" in the i915 driver are not execution
     40  * fences used to track command completion but hardware detiler objects which
     41  * wrap a given range of the global GTT. Each platform has only a fairly limited
     42  * set of these objects.
     43  *
     44  * Fences are used to detile GTT memory mappings. They're also connected to the
     45  * hardware frontbuffer render tracking and hence interact with frontbuffer
     46  * compression. Furthermore on older platforms fences are required for tiled
     47  * objects used by the display engine. They can also be used by the render
     48  * engine - they're required for blitter commands and are optional for render
     49  * commands. But on gen4+ both display (with the exception of fbc) and rendering
     50  * have their own tiling state bits and don't need fences.
     51  *
     52  * Also note that fences only support X and Y tiling and hence can't be used for
     53  * the fancier new tiling formats like W, Ys and Yf.
     54  *
     55  * Finally note that because fences are such a restricted resource they're
     56  * dynamically associated with objects. Furthermore fence state is committed to
     57  * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
     58  * explicitly call i915_gem_object_get_fence() to synchronize fencing status
     59  * for cpu access. Also note that some code wants an unfenced view, for those
     60  * cases the fence can be removed forcefully with i915_gem_object_put_fence().
     61  *
     62  * Internally these functions will synchronize with userspace access by removing
     63  * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
     64  */
     65 
     66 #define pipelined 0
     67 
     68 static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence)
     69 {
     70 	return fence->ggtt->vm.i915;
     71 }
     72 
     73 static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence)
     74 {
     75 	return fence->ggtt->vm.gt->uncore;
     76 }
     77 
     78 static void i965_write_fence_reg(struct i915_fence_reg *fence,
     79 				 struct i915_vma *vma)
     80 {
     81 	i915_reg_t fence_reg_lo, fence_reg_hi;
     82 	int fence_pitch_shift;
     83 	u64 val;
     84 
     85 	if (INTEL_GEN(fence_to_i915(fence)) >= 6) {
     86 		fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
     87 		fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
     88 		fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
     89 
     90 	} else {
     91 		fence_reg_lo = FENCE_REG_965_LO(fence->id);
     92 		fence_reg_hi = FENCE_REG_965_HI(fence->id);
     93 		fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
     94 	}
     95 
     96 	val = 0;
     97 	if (vma) {
     98 		unsigned int stride = i915_gem_object_get_stride(vma->obj);
     99 
    100 		GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
    101 		GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE));
    102 		GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE));
    103 		GEM_BUG_ON(!IS_ALIGNED(stride, 128));
    104 
    105 		val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32;
    106 		val |= vma->node.start;
    107 		val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
    108 		if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y)
    109 			val |= BIT(I965_FENCE_TILING_Y_SHIFT);
    110 		val |= I965_FENCE_REG_VALID;
    111 	}
    112 
    113 	if (!pipelined) {
    114 		struct intel_uncore *uncore = fence_to_uncore(fence);
    115 
    116 		/*
    117 		 * To w/a incoherency with non-atomic 64-bit register updates,
    118 		 * we split the 64-bit update into two 32-bit writes. In order
    119 		 * for a partial fence not to be evaluated between writes, we
    120 		 * precede the update with write to turn off the fence register,
    121 		 * and only enable the fence as the last step.
    122 		 *
    123 		 * For extra levels of paranoia, we make sure each step lands
    124 		 * before applying the next step.
    125 		 */
    126 		intel_uncore_write_fw(uncore, fence_reg_lo, 0);
    127 		intel_uncore_posting_read_fw(uncore, fence_reg_lo);
    128 
    129 		intel_uncore_write_fw(uncore, fence_reg_hi, upper_32_bits(val));
    130 		intel_uncore_write_fw(uncore, fence_reg_lo, lower_32_bits(val));
    131 		intel_uncore_posting_read_fw(uncore, fence_reg_lo);
    132 	}
    133 }
    134 
    135 static void i915_write_fence_reg(struct i915_fence_reg *fence,
    136 				 struct i915_vma *vma)
    137 {
    138 	u32 val;
    139 
    140 	val = 0;
    141 	if (vma) {
    142 		unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
    143 		bool is_y_tiled = tiling == I915_TILING_Y;
    144 		unsigned int stride = i915_gem_object_get_stride(vma->obj);
    145 
    146 		GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
    147 		GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK);
    148 		GEM_BUG_ON(!is_power_of_2(vma->fence_size));
    149 		GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size));
    150 
    151 		if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence)))
    152 			stride /= 128;
    153 		else
    154 			stride /= 512;
    155 		GEM_BUG_ON(!is_power_of_2(stride));
    156 
    157 		val = vma->node.start;
    158 		if (is_y_tiled)
    159 			val |= BIT(I830_FENCE_TILING_Y_SHIFT);
    160 		val |= I915_FENCE_SIZE_BITS(vma->fence_size);
    161 		val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT;
    162 
    163 		val |= I830_FENCE_REG_VALID;
    164 	}
    165 
    166 	if (!pipelined) {
    167 		struct intel_uncore *uncore = fence_to_uncore(fence);
    168 		i915_reg_t reg = FENCE_REG(fence->id);
    169 
    170 		intel_uncore_write_fw(uncore, reg, val);
    171 		intel_uncore_posting_read_fw(uncore, reg);
    172 	}
    173 }
    174 
    175 static void i830_write_fence_reg(struct i915_fence_reg *fence,
    176 				 struct i915_vma *vma)
    177 {
    178 	u32 val;
    179 
    180 	val = 0;
    181 	if (vma) {
    182 		unsigned int stride = i915_gem_object_get_stride(vma->obj);
    183 
    184 		GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
    185 		GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK);
    186 		GEM_BUG_ON(!is_power_of_2(vma->fence_size));
    187 		GEM_BUG_ON(!is_power_of_2(stride / 128));
    188 		GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size));
    189 
    190 		val = vma->node.start;
    191 		if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y)
    192 			val |= BIT(I830_FENCE_TILING_Y_SHIFT);
    193 		val |= I830_FENCE_SIZE_BITS(vma->fence_size);
    194 		val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT;
    195 		val |= I830_FENCE_REG_VALID;
    196 	}
    197 
    198 	if (!pipelined) {
    199 		struct intel_uncore *uncore = fence_to_uncore(fence);
    200 		i915_reg_t reg = FENCE_REG(fence->id);
    201 
    202 		intel_uncore_write_fw(uncore, reg, val);
    203 		intel_uncore_posting_read_fw(uncore, reg);
    204 	}
    205 }
    206 
    207 static void fence_write(struct i915_fence_reg *fence,
    208 			struct i915_vma *vma)
    209 {
    210 	struct drm_i915_private *i915 = fence_to_i915(fence);
    211 
    212 	/*
    213 	 * Previous access through the fence register is marshalled by
    214 	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
    215 	 * and explicitly managed for internal users.
    216 	 */
    217 
    218 	if (IS_GEN(i915, 2))
    219 		i830_write_fence_reg(fence, vma);
    220 	else if (IS_GEN(i915, 3))
    221 		i915_write_fence_reg(fence, vma);
    222 	else
    223 		i965_write_fence_reg(fence, vma);
    224 
    225 	/*
    226 	 * Access through the fenced region afterwards is
    227 	 * ordered by the posting reads whilst writing the registers.
    228 	 */
    229 
    230 	fence->dirty = false;
    231 }
    232 
    233 static int fence_update(struct i915_fence_reg *fence,
    234 			struct i915_vma *vma)
    235 {
    236 	struct i915_ggtt *ggtt = fence->ggtt;
    237 	struct intel_uncore *uncore = fence_to_uncore(fence);
    238 	intel_wakeref_t wakeref;
    239 	struct i915_vma *old;
    240 	int ret;
    241 
    242 	if (vma) {
    243 		if (!i915_vma_is_map_and_fenceable(vma))
    244 			return -EINVAL;
    245 
    246 		if (WARN(!i915_gem_object_get_stride(vma->obj) ||
    247 			 !i915_gem_object_get_tiling(vma->obj),
    248 			 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
    249 			 i915_gem_object_get_stride(vma->obj),
    250 			 i915_gem_object_get_tiling(vma->obj)))
    251 			return -EINVAL;
    252 
    253 		ret = i915_vma_sync(vma);
    254 		if (ret)
    255 			return ret;
    256 	}
    257 
    258 	old = xchg(&fence->vma, NULL);
    259 	if (old) {
    260 		/* XXX Ideally we would move the waiting to outside the mutex */
    261 		ret = i915_vma_sync(old);
    262 		if (ret) {
    263 			fence->vma = old;
    264 			return ret;
    265 		}
    266 
    267 		i915_vma_flush_writes(old);
    268 
    269 		/*
    270 		 * Ensure that all userspace CPU access is completed before
    271 		 * stealing the fence.
    272 		 */
    273 		if (old != vma) {
    274 			GEM_BUG_ON(old->fence != fence);
    275 			i915_vma_revoke_mmap(old);
    276 			old->fence = NULL;
    277 		}
    278 
    279 		list_move(&fence->link, &ggtt->fence_list);
    280 	}
    281 
    282 	/*
    283 	 * We only need to update the register itself if the device is awake.
    284 	 * If the device is currently powered down, we will defer the write
    285 	 * to the runtime resume, see i915_gem_restore_fences().
    286 	 *
    287 	 * This only works for removing the fence register, on acquisition
    288 	 * the caller must hold the rpm wakeref. The fence register must
    289 	 * be cleared before we can use any other fences to ensure that
    290 	 * the new fences do not overlap the elided clears, confusing HW.
    291 	 */
    292 	wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm);
    293 	if (!wakeref) {
    294 		GEM_BUG_ON(vma);
    295 		return 0;
    296 	}
    297 
    298 	WRITE_ONCE(fence->vma, vma);
    299 	fence_write(fence, vma);
    300 
    301 	if (vma) {
    302 		vma->fence = fence;
    303 		list_move_tail(&fence->link, &ggtt->fence_list);
    304 	}
    305 
    306 	intel_runtime_pm_put(uncore->rpm, wakeref);
    307 	return 0;
    308 }
    309 
    310 /**
    311  * i915_vma_revoke_fence - force-remove fence for a VMA
    312  * @vma: vma to map linearly (not through a fence reg)
    313  *
    314  * This function force-removes any fence from the given object, which is useful
    315  * if the kernel wants to do untiled GTT access.
    316  *
    317  * Returns:
    318  *
    319  * 0 on success, negative error code on failure.
    320  */
    321 int i915_vma_revoke_fence(struct i915_vma *vma)
    322 {
    323 	struct i915_fence_reg *fence = vma->fence;
    324 
    325 	lockdep_assert_held(&vma->vm->mutex);
    326 	if (!fence)
    327 		return 0;
    328 
    329 	if (atomic_read(&fence->pin_count))
    330 		return -EBUSY;
    331 
    332 	return fence_update(fence, NULL);
    333 }
    334 
    335 static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
    336 {
    337 	struct i915_fence_reg *fence;
    338 
    339 	list_for_each_entry(fence, &ggtt->fence_list, link) {
    340 		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
    341 
    342 		if (atomic_read(&fence->pin_count))
    343 			continue;
    344 
    345 		return fence;
    346 	}
    347 
    348 	/* Wait for completion of pending flips which consume fences */
    349 	if (intel_has_pending_fb_unpin(ggtt->vm.i915))
    350 		return ERR_PTR(-EAGAIN);
    351 
    352 	return ERR_PTR(-EDEADLK);
    353 }
    354 
    355 int __i915_vma_pin_fence(struct i915_vma *vma)
    356 {
    357 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
    358 	struct i915_fence_reg *fence;
    359 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
    360 	int err;
    361 
    362 	lockdep_assert_held(&vma->vm->mutex);
    363 
    364 	/* Just update our place in the LRU if our fence is getting reused. */
    365 	if (vma->fence) {
    366 		fence = vma->fence;
    367 		GEM_BUG_ON(fence->vma != vma);
    368 		atomic_inc(&fence->pin_count);
    369 		if (!fence->dirty) {
    370 			list_move_tail(&fence->link, &ggtt->fence_list);
    371 			return 0;
    372 		}
    373 	} else if (set) {
    374 		fence = fence_find(ggtt);
    375 		if (IS_ERR(fence))
    376 			return PTR_ERR(fence);
    377 
    378 		GEM_BUG_ON(atomic_read(&fence->pin_count));
    379 		atomic_inc(&fence->pin_count);
    380 	} else {
    381 		return 0;
    382 	}
    383 
    384 	err = fence_update(fence, set);
    385 	if (err)
    386 		goto out_unpin;
    387 
    388 	GEM_BUG_ON(fence->vma != set);
    389 	GEM_BUG_ON(vma->fence != (set ? fence : NULL));
    390 
    391 	if (set)
    392 		return 0;
    393 
    394 out_unpin:
    395 	atomic_dec(&fence->pin_count);
    396 	return err;
    397 }
    398 
    399 /**
    400  * i915_vma_pin_fence - set up fencing for a vma
    401  * @vma: vma to map through a fence reg
    402  *
    403  * When mapping objects through the GTT, userspace wants to be able to write
    404  * to them without having to worry about swizzling if the object is tiled.
    405  * This function walks the fence regs looking for a free one for @obj,
    406  * stealing one if it can't find any.
    407  *
    408  * It then sets up the reg based on the object's properties: address, pitch
    409  * and tiling format.
    410  *
    411  * For an untiled surface, this removes any existing fence.
    412  *
    413  * Returns:
    414  *
    415  * 0 on success, negative error code on failure.
    416  */
    417 int i915_vma_pin_fence(struct i915_vma *vma)
    418 {
    419 	int err;
    420 
    421 	if (!vma->fence && !i915_gem_object_is_tiled(vma->obj))
    422 		return 0;
    423 
    424 	/*
    425 	 * Note that we revoke fences on runtime suspend. Therefore the user
    426 	 * must keep the device awake whilst using the fence.
    427 	 */
    428 	assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
    429 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
    430 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
    431 
    432 	err = mutex_lock_interruptible(&vma->vm->mutex);
    433 	if (err)
    434 		return err;
    435 
    436 	err = __i915_vma_pin_fence(vma);
    437 	mutex_unlock(&vma->vm->mutex);
    438 
    439 	return err;
    440 }
    441 
    442 /**
    443  * i915_reserve_fence - Reserve a fence for vGPU
    444  * @ggtt: Global GTT
    445  *
    446  * This function walks the fence regs looking for a free one and remove
    447  * it from the fence_list. It is used to reserve fence for vGPU to use.
    448  */
    449 struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt)
    450 {
    451 	struct i915_fence_reg *fence;
    452 	int count;
    453 	int ret;
    454 
    455 	lockdep_assert_held(&ggtt->vm.mutex);
    456 
    457 	/* Keep at least one fence available for the display engine. */
    458 	count = 0;
    459 	list_for_each_entry(fence, &ggtt->fence_list, link)
    460 		count += !atomic_read(&fence->pin_count);
    461 	if (count <= 1)
    462 		return ERR_PTR(-ENOSPC);
    463 
    464 	fence = fence_find(ggtt);
    465 	if (IS_ERR(fence))
    466 		return fence;
    467 
    468 	if (fence->vma) {
    469 		/* Force-remove fence from VMA */
    470 		ret = fence_update(fence, NULL);
    471 		if (ret)
    472 			return ERR_PTR(ret);
    473 	}
    474 
    475 	list_del(&fence->link);
    476 
    477 	return fence;
    478 }
    479 
    480 /**
    481  * i915_unreserve_fence - Reclaim a reserved fence
    482  * @fence: the fence reg
    483  *
    484  * This function add a reserved fence register from vGPU to the fence_list.
    485  */
    486 void i915_unreserve_fence(struct i915_fence_reg *fence)
    487 {
    488 	struct i915_ggtt *ggtt = fence->ggtt;
    489 
    490 	lockdep_assert_held(&ggtt->vm.mutex);
    491 
    492 	list_add(&fence->link, &ggtt->fence_list);
    493 }
    494 
    495 /**
    496  * i915_gem_restore_fences - restore fence state
    497  * @ggtt: Global GTT
    498  *
    499  * Restore the hw fence state to match the software tracking again, to be called
    500  * after a gpu reset and on resume. Note that on runtime suspend we only cancel
    501  * the fences, to be reacquired by the user later.
    502  */
    503 void i915_gem_restore_fences(struct i915_ggtt *ggtt)
    504 {
    505 	int i;
    506 
    507 	rcu_read_lock(); /* keep obj alive as we dereference */
    508 	for (i = 0; i < ggtt->num_fences; i++) {
    509 		struct i915_fence_reg *reg = &ggtt->fence_regs[i];
    510 		struct i915_vma *vma = READ_ONCE(reg->vma);
    511 
    512 		GEM_BUG_ON(vma && vma->fence != reg);
    513 
    514 		/*
    515 		 * Commit delayed tiling changes if we have an object still
    516 		 * attached to the fence, otherwise just clear the fence.
    517 		 */
    518 		if (vma && !i915_gem_object_is_tiled(vma->obj))
    519 			vma = NULL;
    520 
    521 		fence_write(reg, vma);
    522 	}
    523 	rcu_read_unlock();
    524 }
    525 
    526 /**
    527  * DOC: tiling swizzling details
    528  *
    529  * The idea behind tiling is to increase cache hit rates by rearranging
    530  * pixel data so that a group of pixel accesses are in the same cacheline.
    531  * Performance improvement from doing this on the back/depth buffer are on
    532  * the order of 30%.
    533  *
    534  * Intel architectures make this somewhat more complicated, though, by
    535  * adjustments made to addressing of data when the memory is in interleaved
    536  * mode (matched pairs of DIMMS) to improve memory bandwidth.
    537  * For interleaved memory, the CPU sends every sequential 64 bytes
    538  * to an alternate memory channel so it can get the bandwidth from both.
    539  *
    540  * The GPU also rearranges its accesses for increased bandwidth to interleaved
    541  * memory, and it matches what the CPU does for non-tiled.  However, when tiled
    542  * it does it a little differently, since one walks addresses not just in the
    543  * X direction but also Y.  So, along with alternating channels when bit
    544  * 6 of the address flips, it also alternates when other bits flip --  Bits 9
    545  * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
    546  * are common to both the 915 and 965-class hardware.
    547  *
    548  * The CPU also sometimes XORs in higher bits as well, to improve
    549  * bandwidth doing strided access like we do so frequently in graphics.  This
    550  * is called "Channel XOR Randomization" in the MCH documentation.  The result
    551  * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
    552  * decode.
    553  *
    554  * All of this bit 6 XORing has an effect on our memory management,
    555  * as we need to make sure that the 3d driver can correctly address object
    556  * contents.
    557  *
    558  * If we don't have interleaved memory, all tiling is safe and no swizzling is
    559  * required.
    560  *
    561  * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
    562  * 17 is not just a page offset, so as we page an object out and back in,
    563  * individual pages in it will have different bit 17 addresses, resulting in
    564  * each 64 bytes being swapped with its neighbor!
    565  *
    566  * Otherwise, if interleaved, we have to tell the 3d driver what the address
    567  * swizzling it needs to do is, since it's writing with the CPU to the pages
    568  * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
    569  * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
    570  * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
    571  * to match what the GPU expects.
    572  */
    573 
    574 /**
    575  * detect_bit_6_swizzle - detect bit 6 swizzling pattern
    576  * @ggtt: Global GGTT
    577  *
    578  * Detects bit 6 swizzling of address lookup between IGD access and CPU
    579  * access through main memory.
    580  */
    581 static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
    582 {
    583 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
    584 	struct drm_i915_private *i915 = ggtt->vm.i915;
    585 	u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
    586 	u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
    587 
    588 	if (INTEL_GEN(i915) >= 8 || IS_VALLEYVIEW(i915)) {
    589 		/*
    590 		 * On BDW+, swizzling is not used. We leave the CPU memory
    591 		 * controller in charge of optimizing memory accesses without
    592 		 * the extra address manipulation GPU side.
    593 		 *
    594 		 * VLV and CHV don't have GPU swizzling.
    595 		 */
    596 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
    597 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
    598 	} else if (INTEL_GEN(i915) >= 6) {
    599 		if (i915->preserve_bios_swizzle) {
    600 			if (intel_uncore_read(uncore, DISP_ARB_CTL) &
    601 			    DISP_TILE_SURFACE_SWIZZLING) {
    602 				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
    603 				swizzle_y = I915_BIT_6_SWIZZLE_9;
    604 			} else {
    605 				swizzle_x = I915_BIT_6_SWIZZLE_NONE;
    606 				swizzle_y = I915_BIT_6_SWIZZLE_NONE;
    607 			}
    608 		} else {
    609 			u32 dimm_c0, dimm_c1;
    610 			dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0);
    611 			dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1);
    612 			dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
    613 			dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
    614 			/*
    615 			 * Enable swizzling when the channels are populated
    616 			 * with identically sized dimms. We don't need to check
    617 			 * the 3rd channel because no cpu with gpu attached
    618 			 * ships in that configuration. Also, swizzling only
    619 			 * makes sense for 2 channels anyway.
    620 			 */
    621 			if (dimm_c0 == dimm_c1) {
    622 				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
    623 				swizzle_y = I915_BIT_6_SWIZZLE_9;
    624 			} else {
    625 				swizzle_x = I915_BIT_6_SWIZZLE_NONE;
    626 				swizzle_y = I915_BIT_6_SWIZZLE_NONE;
    627 			}
    628 		}
    629 	} else if (IS_GEN(i915, 5)) {
    630 		/*
    631 		 * On Ironlake whatever DRAM config, GPU always do
    632 		 * same swizzling setup.
    633 		 */
    634 		swizzle_x = I915_BIT_6_SWIZZLE_9_10;
    635 		swizzle_y = I915_BIT_6_SWIZZLE_9;
    636 	} else if (IS_GEN(i915, 2)) {
    637 		/*
    638 		 * As far as we know, the 865 doesn't have these bit 6
    639 		 * swizzling issues.
    640 		 */
    641 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
    642 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
    643 	} else if (IS_G45(i915) || IS_I965G(i915) || IS_G33(i915)) {
    644 		/*
    645 		 * The 965, G33, and newer, have a very flexible memory
    646 		 * configuration.  It will enable dual-channel mode
    647 		 * (interleaving) on as much memory as it can, and the GPU
    648 		 * will additionally sometimes enable different bit 6
    649 		 * swizzling for tiled objects from the CPU.
    650 		 *
    651 		 * Here's what I found on the G965:
    652 		 *    slot fill         memory size  swizzling
    653 		 * 0A   0B   1A   1B    1-ch   2-ch
    654 		 * 512  0    0    0     512    0     O
    655 		 * 512  0    512  0     16     1008  X
    656 		 * 512  0    0    512   16     1008  X
    657 		 * 0    512  0    512   16     1008  X
    658 		 * 1024 1024 1024 0     2048   1024  O
    659 		 *
    660 		 * We could probably detect this based on either the DRB
    661 		 * matching, which was the case for the swizzling required in
    662 		 * the table above, or from the 1-ch value being less than
    663 		 * the minimum size of a rank.
    664 		 *
    665 		 * Reports indicate that the swizzling actually
    666 		 * varies depending upon page placement inside the
    667 		 * channels, i.e. we see swizzled pages where the
    668 		 * banks of memory are paired and unswizzled on the
    669 		 * uneven portion, so leave that as unknown.
    670 		 */
    671 		if (intel_uncore_read(uncore, C0DRB3) ==
    672 		    intel_uncore_read(uncore, C1DRB3)) {
    673 			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
    674 			swizzle_y = I915_BIT_6_SWIZZLE_9;
    675 		}
    676 	} else {
    677 		u32 dcc = intel_uncore_read(uncore, DCC);
    678 
    679 		/*
    680 		 * On 9xx chipsets, channel interleave by the CPU is
    681 		 * determined by DCC.  For single-channel, neither the CPU
    682 		 * nor the GPU do swizzling.  For dual channel interleaved,
    683 		 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
    684 		 * 9 for Y tiled.  The CPU's interleave is independent, and
    685 		 * can be based on either bit 11 (haven't seen this yet) or
    686 		 * bit 17 (common).
    687 		 */
    688 		switch (dcc & DCC_ADDRESSING_MODE_MASK) {
    689 		case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
    690 		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
    691 			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
    692 			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
    693 			break;
    694 		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
    695 			if (dcc & DCC_CHANNEL_XOR_DISABLE) {
    696 				/*
    697 				 * This is the base swizzling by the GPU for
    698 				 * tiled buffers.
    699 				 */
    700 				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
    701 				swizzle_y = I915_BIT_6_SWIZZLE_9;
    702 			} else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
    703 				/* Bit 11 swizzling by the CPU in addition. */
    704 				swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
    705 				swizzle_y = I915_BIT_6_SWIZZLE_9_11;
    706 			} else {
    707 				/* Bit 17 swizzling by the CPU in addition. */
    708 				swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
    709 				swizzle_y = I915_BIT_6_SWIZZLE_9_17;
    710 			}
    711 			break;
    712 		}
    713 
    714 		/* check for L-shaped memory aka modified enhanced addressing */
    715 		if (IS_GEN(i915, 4) &&
    716 		    !(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
    717 			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
    718 			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
    719 		}
    720 
    721 		if (dcc == 0xffffffff) {
    722 			DRM_ERROR("Couldn't read from MCHBAR.  "
    723 				  "Disabling tiling.\n");
    724 			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
    725 			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
    726 		}
    727 	}
    728 
    729 	if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
    730 	    swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
    731 		/*
    732 		 * Userspace likes to explode if it sees unknown swizzling,
    733 		 * so lie. We will finish the lie when reporting through
    734 		 * the get-tiling-ioctl by reporting the physical swizzle
    735 		 * mode as unknown instead.
    736 		 *
    737 		 * As we don't strictly know what the swizzling is, it may be
    738 		 * bit17 dependent, and so we need to also prevent the pages
    739 		 * from being moved.
    740 		 */
    741 		i915->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
    742 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
    743 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
    744 	}
    745 
    746 	i915->ggtt.bit_6_swizzle_x = swizzle_x;
    747 	i915->ggtt.bit_6_swizzle_y = swizzle_y;
    748 }
    749 
    750 /*
    751  * Swap every 64 bytes of this page around, to account for it having a new
    752  * bit 17 of its physical address and therefore being interpreted differently
    753  * by the GPU.
    754  */
    755 static void i915_gem_swizzle_page(struct page *page)
    756 {
    757 	char temp[64];
    758 	char *vaddr;
    759 	int i;
    760 
    761 	vaddr = kmap(page);
    762 
    763 	for (i = 0; i < PAGE_SIZE; i += 128) {
    764 		memcpy(temp, &vaddr[i], 64);
    765 		memcpy(&vaddr[i], &vaddr[i + 64], 64);
    766 		memcpy(&vaddr[i + 64], temp, 64);
    767 	}
    768 
    769 	kunmap(page);
    770 }
    771 
    772 /**
    773  * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
    774  * @obj: i915 GEM buffer object
    775  * @pages: the scattergather list of physical pages
    776  *
    777  * This function fixes up the swizzling in case any page frame number for this
    778  * object has changed in bit 17 since that state has been saved with
    779  * i915_gem_object_save_bit_17_swizzle().
    780  *
    781  * This is called when pinning backing storage again, since the kernel is free
    782  * to move unpinned backing storage around (either by directly moving pages or
    783  * by swapping them out and back in again).
    784  */
    785 void
    786 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
    787 				  struct sg_table *pages)
    788 {
    789 	struct sgt_iter sgt_iter;
    790 	struct page *page;
    791 	int i;
    792 
    793 	if (obj->bit_17 == NULL)
    794 		return;
    795 
    796 	i = 0;
    797 	for_each_sgt_page(page, sgt_iter, pages) {
    798 		char new_bit_17 = page_to_phys(page) >> 17;
    799 		if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
    800 			i915_gem_swizzle_page(page);
    801 			set_page_dirty(page);
    802 		}
    803 		i++;
    804 	}
    805 }
    806 
    807 /**
    808  * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
    809  * @obj: i915 GEM buffer object
    810  * @pages: the scattergather list of physical pages
    811  *
    812  * This function saves the bit 17 of each page frame number so that swizzling
    813  * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
    814  * be called before the backing storage can be unpinned.
    815  */
    816 void
    817 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
    818 				    struct sg_table *pages)
    819 {
    820 	const unsigned int page_count = obj->base.size >> PAGE_SHIFT;
    821 	struct sgt_iter sgt_iter;
    822 	struct page *page;
    823 	int i;
    824 
    825 	if (obj->bit_17 == NULL) {
    826 		obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL);
    827 		if (obj->bit_17 == NULL) {
    828 			DRM_ERROR("Failed to allocate memory for bit 17 "
    829 				  "record\n");
    830 			return;
    831 		}
    832 	}
    833 
    834 	i = 0;
    835 
    836 	for_each_sgt_page(page, sgt_iter, pages) {
    837 		if (page_to_phys(page) & (1 << 17))
    838 			__set_bit(i, obj->bit_17);
    839 		else
    840 			__clear_bit(i, obj->bit_17);
    841 		i++;
    842 	}
    843 }
    844 
    845 void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
    846 {
    847 	struct drm_i915_private *i915 = ggtt->vm.i915;
    848 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
    849 	int num_fences;
    850 	int i;
    851 
    852 	INIT_LIST_HEAD(&ggtt->fence_list);
    853 	INIT_LIST_HEAD(&ggtt->userfault_list);
    854 	intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm);
    855 
    856 	detect_bit_6_swizzle(ggtt);
    857 
    858 	if (!i915_ggtt_has_aperture(ggtt))
    859 		num_fences = 0;
    860 	else if (INTEL_GEN(i915) >= 7 &&
    861 		 !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)))
    862 		num_fences = 32;
    863 	else if (INTEL_GEN(i915) >= 4 ||
    864 		 IS_I945G(i915) || IS_I945GM(i915) ||
    865 		 IS_G33(i915) || IS_PINEVIEW(i915))
    866 		num_fences = 16;
    867 	else
    868 		num_fences = 8;
    869 
    870 	if (intel_vgpu_active(i915))
    871 		num_fences = intel_uncore_read(uncore,
    872 					       vgtif_reg(avail_rs.fence_num));
    873 
    874 	/* Initialize fence registers to zero */
    875 	for (i = 0; i < num_fences; i++) {
    876 		struct i915_fence_reg *fence = &ggtt->fence_regs[i];
    877 
    878 		fence->ggtt = ggtt;
    879 		fence->id = i;
    880 		list_add_tail(&fence->link, &ggtt->fence_list);
    881 	}
    882 	ggtt->num_fences = num_fences;
    883 
    884 	i915_gem_restore_fences(ggtt);
    885 }
    886 
    887 void intel_gt_init_swizzling(struct intel_gt *gt)
    888 {
    889 	struct drm_i915_private *i915 = gt->i915;
    890 	struct intel_uncore *uncore = gt->uncore;
    891 
    892 	if (INTEL_GEN(i915) < 5 ||
    893 	    i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
    894 		return;
    895 
    896 	intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
    897 
    898 	if (IS_GEN(i915, 5))
    899 		return;
    900 
    901 	intel_uncore_rmw(uncore, TILECTL, 0, TILECTL_SWZCTL);
    902 
    903 	if (IS_GEN(i915, 6))
    904 		intel_uncore_write(uncore,
    905 				   ARB_MODE,
    906 				   _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
    907 	else if (IS_GEN(i915, 7))
    908 		intel_uncore_write(uncore,
    909 				   ARB_MODE,
    910 				   _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
    911 	else if (IS_GEN(i915, 8))
    912 		intel_uncore_write(uncore,
    913 				   GAMTARBMODE,
    914 				   _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
    915 	else
    916 		MISSING_CASE(INTEL_GEN(i915));
    917 }
    918