Home | History | Annotate | Line # | Download | only in gem
      1 /*	$NetBSD: i915_gem_domain.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2014-2016 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: i915_gem_domain.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
     11 
     12 #include "display/intel_frontbuffer.h"
     13 
     14 #include "i915_drv.h"
     15 #include "i915_gem_clflush.h"
     16 #include "i915_gem_gtt.h"
     17 #include "i915_gem_ioctls.h"
     18 #include "i915_gem_object.h"
     19 #include "i915_vma.h"
     20 #include "i915_gem_lmem.h"
     21 #include "i915_gem_mman.h"
     22 
     23 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
     24 {
     25 	/*
     26 	 * We manually flush the CPU domain so that we can override and
     27 	 * force the flush for the display, and perform it asyncrhonously.
     28 	 */
     29 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
     30 	if (obj->cache_dirty)
     31 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
     32 	obj->write_domain = 0;
     33 }
     34 
     35 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
     36 {
     37 	if (!i915_gem_object_is_framebuffer(obj))
     38 		return;
     39 
     40 	i915_gem_object_lock(obj);
     41 	__i915_gem_object_flush_for_display(obj);
     42 	i915_gem_object_unlock(obj);
     43 }
     44 
     45 /**
     46  * Moves a single object to the WC read, and possibly write domain.
     47  * @obj: object to act on
     48  * @write: ask for write access or read only
     49  *
     50  * This function returns when the move is complete, including waiting on
     51  * flushes to occur.
     52  */
     53 int
     54 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
     55 {
     56 	int ret;
     57 
     58 	assert_object_held(obj);
     59 
     60 	ret = i915_gem_object_wait(obj,
     61 				   I915_WAIT_INTERRUPTIBLE |
     62 				   (write ? I915_WAIT_ALL : 0),
     63 				   MAX_SCHEDULE_TIMEOUT);
     64 	if (ret)
     65 		return ret;
     66 
     67 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
     68 		return 0;
     69 
     70 	/* Flush and acquire obj->pages so that we are coherent through
     71 	 * direct access in memory with previous cached writes through
     72 	 * shmemfs and that our cache domain tracking remains valid.
     73 	 * For example, if the obj->filp was moved to swap without us
     74 	 * being notified and releasing the pages, we would mistakenly
     75 	 * continue to assume that the obj remained out of the CPU cached
     76 	 * domain.
     77 	 */
     78 	ret = i915_gem_object_pin_pages(obj);
     79 	if (ret)
     80 		return ret;
     81 
     82 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
     83 
     84 	/* Serialise direct access to this object with the barriers for
     85 	 * coherent writes from the GPU, by effectively invalidating the
     86 	 * WC domain upon first access.
     87 	 */
     88 	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
     89 		mb();
     90 
     91 	/* It should now be out of any other write domains, and we can update
     92 	 * the domain values for our changes.
     93 	 */
     94 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
     95 	obj->read_domains |= I915_GEM_DOMAIN_WC;
     96 	if (write) {
     97 		obj->read_domains = I915_GEM_DOMAIN_WC;
     98 		obj->write_domain = I915_GEM_DOMAIN_WC;
     99 		obj->mm.dirty = true;
    100 	}
    101 
    102 	i915_gem_object_unpin_pages(obj);
    103 	return 0;
    104 }
    105 
    106 /**
    107  * Moves a single object to the GTT read, and possibly write domain.
    108  * @obj: object to act on
    109  * @write: ask for write access or read only
    110  *
    111  * This function returns when the move is complete, including waiting on
    112  * flushes to occur.
    113  */
    114 int
    115 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
    116 {
    117 	int ret;
    118 
    119 	assert_object_held(obj);
    120 
    121 	ret = i915_gem_object_wait(obj,
    122 				   I915_WAIT_INTERRUPTIBLE |
    123 				   (write ? I915_WAIT_ALL : 0),
    124 				   MAX_SCHEDULE_TIMEOUT);
    125 	if (ret)
    126 		return ret;
    127 
    128 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
    129 		return 0;
    130 
    131 	/* Flush and acquire obj->pages so that we are coherent through
    132 	 * direct access in memory with previous cached writes through
    133 	 * shmemfs and that our cache domain tracking remains valid.
    134 	 * For example, if the obj->filp was moved to swap without us
    135 	 * being notified and releasing the pages, we would mistakenly
    136 	 * continue to assume that the obj remained out of the CPU cached
    137 	 * domain.
    138 	 */
    139 	ret = i915_gem_object_pin_pages(obj);
    140 	if (ret)
    141 		return ret;
    142 
    143 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
    144 
    145 	/* Serialise direct access to this object with the barriers for
    146 	 * coherent writes from the GPU, by effectively invalidating the
    147 	 * GTT domain upon first access.
    148 	 */
    149 	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
    150 		mb();
    151 
    152 	/* It should now be out of any other write domains, and we can update
    153 	 * the domain values for our changes.
    154 	 */
    155 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
    156 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
    157 	if (write) {
    158 		struct i915_vma *vma;
    159 
    160 		obj->read_domains = I915_GEM_DOMAIN_GTT;
    161 		obj->write_domain = I915_GEM_DOMAIN_GTT;
    162 		obj->mm.dirty = true;
    163 
    164 		spin_lock(&obj->vma.lock);
    165 		for_each_ggtt_vma(vma, obj)
    166 			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
    167 				i915_vma_set_ggtt_write(vma);
    168 		spin_unlock(&obj->vma.lock);
    169 	}
    170 
    171 	i915_gem_object_unpin_pages(obj);
    172 	return 0;
    173 }
    174 
    175 /**
    176  * Changes the cache-level of an object across all VMA.
    177  * @obj: object to act on
    178  * @cache_level: new cache level to set for the object
    179  *
    180  * After this function returns, the object will be in the new cache-level
    181  * across all GTT and the contents of the backing storage will be coherent,
    182  * with respect to the new cache-level. In order to keep the backing storage
    183  * coherent for all users, we only allow a single cache level to be set
    184  * globally on the object and prevent it from being changed whilst the
    185  * hardware is reading from the object. That is if the object is currently
    186  * on the scanout it will be set to uncached (or equivalent display
    187  * cache coherency) and all non-MOCS GPU access will also be uncached so
    188  * that all direct access to the scanout remains coherent.
    189  */
    190 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
    191 				    enum i915_cache_level cache_level)
    192 {
    193 	int ret;
    194 
    195 	if (obj->cache_level == cache_level)
    196 		return 0;
    197 
    198 	ret = i915_gem_object_wait(obj,
    199 				   I915_WAIT_INTERRUPTIBLE |
    200 				   I915_WAIT_ALL,
    201 				   MAX_SCHEDULE_TIMEOUT);
    202 	if (ret)
    203 		return ret;
    204 
    205 	ret = i915_gem_object_lock_interruptible(obj);
    206 	if (ret)
    207 		return ret;
    208 
    209 	/* Always invalidate stale cachelines */
    210 	if (obj->cache_level != cache_level) {
    211 		i915_gem_object_set_cache_coherency(obj, cache_level);
    212 		obj->cache_dirty = true;
    213 	}
    214 
    215 	i915_gem_object_unlock(obj);
    216 
    217 	/* The cache-level will be applied when each vma is rebound. */
    218 	return i915_gem_object_unbind(obj,
    219 				      I915_GEM_OBJECT_UNBIND_ACTIVE |
    220 				      I915_GEM_OBJECT_UNBIND_BARRIER);
    221 }
    222 
    223 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
    224 			       struct drm_file *file)
    225 {
    226 	struct drm_i915_gem_caching *args = data;
    227 	struct drm_i915_gem_object *obj;
    228 	int err = 0;
    229 
    230 	rcu_read_lock();
    231 	obj = i915_gem_object_lookup_rcu(file, args->handle);
    232 	if (!obj) {
    233 		err = -ENOENT;
    234 		goto out;
    235 	}
    236 
    237 	switch (obj->cache_level) {
    238 	case I915_CACHE_LLC:
    239 	case I915_CACHE_L3_LLC:
    240 		args->caching = I915_CACHING_CACHED;
    241 		break;
    242 
    243 	case I915_CACHE_WT:
    244 		args->caching = I915_CACHING_DISPLAY;
    245 		break;
    246 
    247 	default:
    248 		args->caching = I915_CACHING_NONE;
    249 		break;
    250 	}
    251 out:
    252 	rcu_read_unlock();
    253 	return err;
    254 }
    255 
    256 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
    257 			       struct drm_file *file)
    258 {
    259 	struct drm_i915_private *i915 = to_i915(dev);
    260 	struct drm_i915_gem_caching *args = data;
    261 	struct drm_i915_gem_object *obj;
    262 	enum i915_cache_level level;
    263 	int ret = 0;
    264 
    265 	switch (args->caching) {
    266 	case I915_CACHING_NONE:
    267 		level = I915_CACHE_NONE;
    268 		break;
    269 	case I915_CACHING_CACHED:
    270 		/*
    271 		 * Due to a HW issue on BXT A stepping, GPU stores via a
    272 		 * snooped mapping may leave stale data in a corresponding CPU
    273 		 * cacheline, whereas normally such cachelines would get
    274 		 * invalidated.
    275 		 */
    276 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
    277 			return -ENODEV;
    278 
    279 		level = I915_CACHE_LLC;
    280 		break;
    281 	case I915_CACHING_DISPLAY:
    282 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
    283 		break;
    284 	default:
    285 		return -EINVAL;
    286 	}
    287 
    288 	obj = i915_gem_object_lookup(file, args->handle);
    289 	if (!obj)
    290 		return -ENOENT;
    291 
    292 	/*
    293 	 * The caching mode of proxy object is handled by its generator, and
    294 	 * not allowed to be changed by userspace.
    295 	 */
    296 	if (i915_gem_object_is_proxy(obj)) {
    297 		ret = -ENXIO;
    298 		goto out;
    299 	}
    300 
    301 	ret = i915_gem_object_set_cache_level(obj, level);
    302 
    303 out:
    304 	i915_gem_object_put(obj);
    305 	return ret;
    306 }
    307 
    308 /*
    309  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
    310  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
    311  * (for pageflips). We only flush the caches while preparing the buffer for
    312  * display, the callers are responsible for frontbuffer flush.
    313  */
    314 struct i915_vma *
    315 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
    316 				     u32 alignment,
    317 				     const struct i915_ggtt_view *view,
    318 				     unsigned int flags)
    319 {
    320 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
    321 	struct i915_vma *vma;
    322 	int ret;
    323 
    324 	/* Frame buffer must be in LMEM (no migration yet) */
    325 	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
    326 		return ERR_PTR(-EINVAL);
    327 
    328 	/*
    329 	 * The display engine is not coherent with the LLC cache on gen6.  As
    330 	 * a result, we make sure that the pinning that is about to occur is
    331 	 * done with uncached PTEs. This is lowest common denominator for all
    332 	 * chipsets.
    333 	 *
    334 	 * However for gen6+, we could do better by using the GFDT bit instead
    335 	 * of uncaching, which would allow us to flush all the LLC-cached data
    336 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
    337 	 */
    338 	ret = i915_gem_object_set_cache_level(obj,
    339 					      HAS_WT(i915) ?
    340 					      I915_CACHE_WT : I915_CACHE_NONE);
    341 	if (ret)
    342 		return ERR_PTR(ret);
    343 
    344 	/*
    345 	 * As the user may map the buffer once pinned in the display plane
    346 	 * (e.g. libkms for the bootup splash), we have to ensure that we
    347 	 * always use map_and_fenceable for all scanout buffers. However,
    348 	 * it may simply be too big to fit into mappable, in which case
    349 	 * put it anyway and hope that userspace can cope (but always first
    350 	 * try to preserve the existing ABI).
    351 	 */
    352 	vma = ERR_PTR(-ENOSPC);
    353 	if ((flags & PIN_MAPPABLE) == 0 &&
    354 	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
    355 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
    356 					       flags |
    357 					       PIN_MAPPABLE |
    358 					       PIN_NONBLOCK);
    359 	if (IS_ERR(vma))
    360 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
    361 	if (IS_ERR(vma))
    362 		return vma;
    363 
    364 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
    365 
    366 	i915_gem_object_flush_if_display(obj);
    367 
    368 	return vma;
    369 }
    370 
    371 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
    372 {
    373 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
    374 	struct i915_vma *vma;
    375 
    376 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
    377 	if (!atomic_read(&obj->bind_count))
    378 		return;
    379 
    380 	mutex_lock(&i915->ggtt.vm.mutex);
    381 	spin_lock(&obj->vma.lock);
    382 	for_each_ggtt_vma(vma, obj) {
    383 		if (!drm_mm_node_allocated(&vma->node))
    384 			continue;
    385 
    386 		GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
    387 		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
    388 	}
    389 	spin_unlock(&obj->vma.lock);
    390 	mutex_unlock(&i915->ggtt.vm.mutex);
    391 
    392 	if (i915_gem_object_is_shrinkable(obj)) {
    393 		unsigned long flags;
    394 
    395 		spin_lock_irqsave(&i915->mm.obj_lock, flags);
    396 
    397 		if (obj->mm.madv == I915_MADV_WILLNEED &&
    398 		    !atomic_read(&obj->mm.shrink_pin))
    399 			list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
    400 
    401 		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
    402 	}
    403 }
    404 
    405 void
    406 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
    407 {
    408 	struct drm_i915_gem_object *obj = vma->obj;
    409 
    410 	assert_object_held(obj);
    411 
    412 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
    413 	i915_gem_object_bump_inactive_ggtt(obj);
    414 
    415 	i915_vma_unpin(vma);
    416 }
    417 
    418 /**
    419  * Moves a single object to the CPU read, and possibly write domain.
    420  * @obj: object to act on
    421  * @write: requesting write or read-only access
    422  *
    423  * This function returns when the move is complete, including waiting on
    424  * flushes to occur.
    425  */
    426 int
    427 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
    428 {
    429 	int ret;
    430 
    431 	assert_object_held(obj);
    432 
    433 	ret = i915_gem_object_wait(obj,
    434 				   I915_WAIT_INTERRUPTIBLE |
    435 				   (write ? I915_WAIT_ALL : 0),
    436 				   MAX_SCHEDULE_TIMEOUT);
    437 	if (ret)
    438 		return ret;
    439 
    440 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
    441 
    442 	/* Flush the CPU cache if it's still invalid. */
    443 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
    444 		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
    445 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
    446 	}
    447 
    448 	/* It should now be out of any other write domains, and we can update
    449 	 * the domain values for our changes.
    450 	 */
    451 	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
    452 
    453 	/* If we're writing through the CPU, then the GPU read domains will
    454 	 * need to be invalidated at next use.
    455 	 */
    456 	if (write)
    457 		__start_cpu_write(obj);
    458 
    459 	return 0;
    460 }
    461 
    462 /**
    463  * Called when user space prepares to use an object with the CPU, either
    464  * through the mmap ioctl's mapping or a GTT mapping.
    465  * @dev: drm device
    466  * @data: ioctl data blob
    467  * @file: drm file
    468  */
    469 int
    470 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
    471 			  struct drm_file *file)
    472 {
    473 	struct drm_i915_gem_set_domain *args = data;
    474 	struct drm_i915_gem_object *obj;
    475 	u32 read_domains = args->read_domains;
    476 	u32 write_domain = args->write_domain;
    477 	int err;
    478 
    479 	/* Only handle setting domains to types used by the CPU. */
    480 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
    481 		return -EINVAL;
    482 
    483 	/*
    484 	 * Having something in the write domain implies it's in the read
    485 	 * domain, and only that read domain.  Enforce that in the request.
    486 	 */
    487 	if (write_domain && read_domains != write_domain)
    488 		return -EINVAL;
    489 
    490 	if (!read_domains)
    491 		return 0;
    492 
    493 	obj = i915_gem_object_lookup(file, args->handle);
    494 	if (!obj)
    495 		return -ENOENT;
    496 
    497 	/*
    498 	 * Already in the desired write domain? Nothing for us to do!
    499 	 *
    500 	 * We apply a little bit of cunning here to catch a broader set of
    501 	 * no-ops. If obj->write_domain is set, we must be in the same
    502 	 * obj->read_domains, and only that domain. Therefore, if that
    503 	 * obj->write_domain matches the request read_domains, we are
    504 	 * already in the same read/write domain and can skip the operation,
    505 	 * without having to further check the requested write_domain.
    506 	 */
    507 	if (READ_ONCE(obj->write_domain) == read_domains) {
    508 		err = 0;
    509 		goto out;
    510 	}
    511 
    512 	/*
    513 	 * Try to flush the object off the GPU without holding the lock.
    514 	 * We will repeat the flush holding the lock in the normal manner
    515 	 * to catch cases where we are gazumped.
    516 	 */
    517 	err = i915_gem_object_wait(obj,
    518 				   I915_WAIT_INTERRUPTIBLE |
    519 				   I915_WAIT_PRIORITY |
    520 				   (write_domain ? I915_WAIT_ALL : 0),
    521 				   MAX_SCHEDULE_TIMEOUT);
    522 	if (err)
    523 		goto out;
    524 
    525 	/*
    526 	 * Proxy objects do not control access to the backing storage, ergo
    527 	 * they cannot be used as a means to manipulate the cache domain
    528 	 * tracking for that backing storage. The proxy object is always
    529 	 * considered to be outside of any cache domain.
    530 	 */
    531 	if (i915_gem_object_is_proxy(obj)) {
    532 		err = -ENXIO;
    533 		goto out;
    534 	}
    535 
    536 	/*
    537 	 * Flush and acquire obj->pages so that we are coherent through
    538 	 * direct access in memory with previous cached writes through
    539 	 * shmemfs and that our cache domain tracking remains valid.
    540 	 * For example, if the obj->filp was moved to swap without us
    541 	 * being notified and releasing the pages, we would mistakenly
    542 	 * continue to assume that the obj remained out of the CPU cached
    543 	 * domain.
    544 	 */
    545 	err = i915_gem_object_pin_pages(obj);
    546 	if (err)
    547 		goto out;
    548 
    549 	err = i915_gem_object_lock_interruptible(obj);
    550 	if (err)
    551 		goto out_unpin;
    552 
    553 	if (read_domains & I915_GEM_DOMAIN_WC)
    554 		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
    555 	else if (read_domains & I915_GEM_DOMAIN_GTT)
    556 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
    557 	else
    558 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
    559 
    560 	/* And bump the LRU for this access */
    561 	i915_gem_object_bump_inactive_ggtt(obj);
    562 
    563 	i915_gem_object_unlock(obj);
    564 
    565 	if (write_domain)
    566 		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
    567 
    568 out_unpin:
    569 	i915_gem_object_unpin_pages(obj);
    570 out:
    571 	i915_gem_object_put(obj);
    572 	return err;
    573 }
    574 
    575 /*
    576  * Pins the specified object's pages and synchronizes the object with
    577  * GPU accesses. Sets needs_clflush to non-zero if the caller should
    578  * flush the object from the CPU cache.
    579  */
    580 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
    581 				 unsigned int *needs_clflush)
    582 {
    583 	int ret;
    584 
    585 	*needs_clflush = 0;
    586 	if (!i915_gem_object_has_struct_page(obj))
    587 		return -ENODEV;
    588 
    589 	ret = i915_gem_object_lock_interruptible(obj);
    590 	if (ret)
    591 		return ret;
    592 
    593 	ret = i915_gem_object_wait(obj,
    594 				   I915_WAIT_INTERRUPTIBLE,
    595 				   MAX_SCHEDULE_TIMEOUT);
    596 	if (ret)
    597 		goto err_unlock;
    598 
    599 	ret = i915_gem_object_pin_pages(obj);
    600 	if (ret)
    601 		goto err_unlock;
    602 
    603 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
    604 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
    605 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
    606 		if (ret)
    607 			goto err_unpin;
    608 		else
    609 			goto out;
    610 	}
    611 
    612 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
    613 
    614 	/* If we're not in the cpu read domain, set ourself into the gtt
    615 	 * read domain and manually flush cachelines (if required). This
    616 	 * optimizes for the case when the gpu will dirty the data
    617 	 * anyway again before the next pread happens.
    618 	 */
    619 	if (!obj->cache_dirty &&
    620 	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
    621 		*needs_clflush = CLFLUSH_BEFORE;
    622 
    623 out:
    624 	/* return with the pages pinned */
    625 	return 0;
    626 
    627 err_unpin:
    628 	i915_gem_object_unpin_pages(obj);
    629 err_unlock:
    630 	i915_gem_object_unlock(obj);
    631 	return ret;
    632 }
    633 
    634 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
    635 				  unsigned int *needs_clflush)
    636 {
    637 	int ret;
    638 
    639 	*needs_clflush = 0;
    640 	if (!i915_gem_object_has_struct_page(obj))
    641 		return -ENODEV;
    642 
    643 	ret = i915_gem_object_lock_interruptible(obj);
    644 	if (ret)
    645 		return ret;
    646 
    647 	ret = i915_gem_object_wait(obj,
    648 				   I915_WAIT_INTERRUPTIBLE |
    649 				   I915_WAIT_ALL,
    650 				   MAX_SCHEDULE_TIMEOUT);
    651 	if (ret)
    652 		goto err_unlock;
    653 
    654 	ret = i915_gem_object_pin_pages(obj);
    655 	if (ret)
    656 		goto err_unlock;
    657 
    658 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
    659 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
    660 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
    661 		if (ret)
    662 			goto err_unpin;
    663 		else
    664 			goto out;
    665 	}
    666 
    667 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
    668 
    669 	/* If we're not in the cpu write domain, set ourself into the
    670 	 * gtt write domain and manually flush cachelines (as required).
    671 	 * This optimizes for the case when the gpu will use the data
    672 	 * right away and we therefore have to clflush anyway.
    673 	 */
    674 	if (!obj->cache_dirty) {
    675 		*needs_clflush |= CLFLUSH_AFTER;
    676 
    677 		/*
    678 		 * Same trick applies to invalidate partially written
    679 		 * cachelines read before writing.
    680 		 */
    681 		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
    682 			*needs_clflush |= CLFLUSH_BEFORE;
    683 	}
    684 
    685 out:
    686 	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
    687 	obj->mm.dirty = true;
    688 	/* return with the pages pinned */
    689 	return 0;
    690 
    691 err_unpin:
    692 	i915_gem_object_unpin_pages(obj);
    693 err_unlock:
    694 	i915_gem_object_unlock(obj);
    695 	return ret;
    696 }
    697