Home | History | Annotate | Line # | Download | only in gem
      1 /*	$NetBSD: i915_gem_userptr.c,v 1.5 2021/12/19 12:32:15 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2012-2014 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: i915_gem_userptr.c,v 1.5 2021/12/19 12:32:15 riastradh Exp $");
     11 
     12 #include <linux/mmu_context.h>
     13 #include <linux/mmu_notifier.h>
     14 #include <linux/mempolicy.h>
     15 #include <linux/swap.h>
     16 #include <linux/sched/mm.h>
     17 
     18 #include <drm/i915_drm.h>
     19 
     20 #include "i915_drv.h"
     21 #include "i915_gem_ioctls.h"
     22 #include "i915_gem_object.h"
     23 #include "i915_scatterlist.h"
     24 
     25 #include <linux/nbsd-namespace.h>
     26 
     27 struct i915_mm_struct {
     28 #ifdef __NetBSD__
     29 	struct vmspace *mm;
     30 #else
     31 	struct mm_struct *mm;
     32 #endif
     33 	struct drm_i915_private *i915;
     34 	struct i915_mmu_notifier *mn;
     35 	struct hlist_node node;
     36 	struct kref kref;
     37 	struct work_struct work;
     38 };
     39 
     40 #if defined(CONFIG_MMU_NOTIFIER)
     41 #include <linux/interval_tree.h>
     42 
     43 struct i915_mmu_notifier {
     44 	spinlock_t lock;
     45 	struct hlist_node node;
     46 	struct mmu_notifier mn;
     47 	struct rb_root_cached objects;
     48 	struct i915_mm_struct *mm;
     49 };
     50 
     51 struct i915_mmu_object {
     52 	struct i915_mmu_notifier *mn;
     53 	struct drm_i915_gem_object *obj;
     54 	struct interval_tree_node it;
     55 };
     56 
     57 static void add_object(struct i915_mmu_object *mo)
     58 {
     59 	GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
     60 	interval_tree_insert(&mo->it, &mo->mn->objects);
     61 }
     62 
     63 static void del_object(struct i915_mmu_object *mo)
     64 {
     65 	if (RB_EMPTY_NODE(&mo->it.rb))
     66 		return;
     67 
     68 	interval_tree_remove(&mo->it, &mo->mn->objects);
     69 	RB_CLEAR_NODE(&mo->it.rb);
     70 }
     71 
     72 static void
     73 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
     74 {
     75 	struct i915_mmu_object *mo = obj->userptr.mmu_object;
     76 
     77 	/*
     78 	 * During mm_invalidate_range we need to cancel any userptr that
     79 	 * overlaps the range being invalidated. Doing so requires the
     80 	 * struct_mutex, and that risks recursion. In order to cause
     81 	 * recursion, the user must alias the userptr address space with
     82 	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
     83 	 * to invalidate that mmaping, mm_invalidate_range is called with
     84 	 * the userptr address *and* the struct_mutex held.  To prevent that
     85 	 * we set a flag under the i915_mmu_notifier spinlock to indicate
     86 	 * whether this object is valid.
     87 	 */
     88 	if (!mo)
     89 		return;
     90 
     91 	spin_lock(&mo->mn->lock);
     92 	if (value)
     93 		add_object(mo);
     94 	else
     95 		del_object(mo);
     96 	spin_unlock(&mo->mn->lock);
     97 }
     98 
     99 static int
    100 userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
    101 				  const struct mmu_notifier_range *range)
    102 {
    103 	struct i915_mmu_notifier *mn =
    104 		container_of(_mn, struct i915_mmu_notifier, mn);
    105 	struct interval_tree_node *it;
    106 	unsigned long end;
    107 	int ret = 0;
    108 
    109 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
    110 		return 0;
    111 
    112 	/* interval ranges are inclusive, but invalidate range is exclusive */
    113 	end = range->end - 1;
    114 
    115 	spin_lock(&mn->lock);
    116 	it = interval_tree_iter_first(&mn->objects, range->start, end);
    117 	while (it) {
    118 		struct drm_i915_gem_object *obj;
    119 
    120 		if (!mmu_notifier_range_blockable(range)) {
    121 			ret = -EAGAIN;
    122 			break;
    123 		}
    124 
    125 		/*
    126 		 * The mmu_object is released late when destroying the
    127 		 * GEM object so it is entirely possible to gain a
    128 		 * reference on an object in the process of being freed
    129 		 * since our serialisation is via the spinlock and not
    130 		 * the struct_mutex - and consequently use it after it
    131 		 * is freed and then double free it. To prevent that
    132 		 * use-after-free we only acquire a reference on the
    133 		 * object if it is not in the process of being destroyed.
    134 		 */
    135 		obj = container_of(it, struct i915_mmu_object, it)->obj;
    136 		if (!kref_get_unless_zero(&obj->base.refcount)) {
    137 			it = interval_tree_iter_next(it, range->start, end);
    138 			continue;
    139 		}
    140 		spin_unlock(&mn->lock);
    141 
    142 		ret = i915_gem_object_unbind(obj,
    143 					     I915_GEM_OBJECT_UNBIND_ACTIVE |
    144 					     I915_GEM_OBJECT_UNBIND_BARRIER);
    145 		if (ret == 0)
    146 			ret = __i915_gem_object_put_pages(obj);
    147 		i915_gem_object_put(obj);
    148 		if (ret)
    149 			return ret;
    150 
    151 		spin_lock(&mn->lock);
    152 
    153 		/*
    154 		 * As we do not (yet) protect the mmu from concurrent insertion
    155 		 * over this range, there is no guarantee that this search will
    156 		 * terminate given a pathologic workload.
    157 		 */
    158 		it = interval_tree_iter_first(&mn->objects, range->start, end);
    159 	}
    160 	spin_unlock(&mn->lock);
    161 
    162 	return ret;
    163 
    164 }
    165 
    166 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
    167 	.invalidate_range_start = userptr_mn_invalidate_range_start,
    168 };
    169 
    170 static struct i915_mmu_notifier *
    171 i915_mmu_notifier_create(struct i915_mm_struct *mm)
    172 {
    173 	struct i915_mmu_notifier *mn;
    174 
    175 	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
    176 	if (mn == NULL)
    177 		return ERR_PTR(-ENOMEM);
    178 
    179 	spin_lock_init(&mn->lock);
    180 	mn->mn.ops = &i915_gem_userptr_notifier;
    181 	mn->objects = RB_ROOT_CACHED;
    182 	mn->mm = mm;
    183 
    184 	return mn;
    185 }
    186 
    187 static void
    188 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
    189 {
    190 	struct i915_mmu_object *mo;
    191 
    192 	mo = fetch_and_zero(&obj->userptr.mmu_object);
    193 	if (!mo)
    194 		return;
    195 
    196 	spin_lock(&mo->mn->lock);
    197 	del_object(mo);
    198 	spin_unlock(&mo->mn->lock);
    199 	kfree(mo);
    200 }
    201 
    202 static struct i915_mmu_notifier *
    203 i915_mmu_notifier_find(struct i915_mm_struct *mm)
    204 {
    205 	struct i915_mmu_notifier *mn;
    206 	int err = 0;
    207 
    208 	mn = mm->mn;
    209 	if (mn)
    210 		return mn;
    211 
    212 	mn = i915_mmu_notifier_create(mm);
    213 	if (IS_ERR(mn))
    214 		err = PTR_ERR(mn);
    215 
    216 	down_write(&mm->mm->mmap_sem);
    217 	mutex_lock(&mm->i915->mm_lock);
    218 	if (mm->mn == NULL && !err) {
    219 		/* Protected by mmap_sem (write-lock) */
    220 		err = __mmu_notifier_register(&mn->mn, mm->mm);
    221 		if (!err) {
    222 			/* Protected by mm_lock */
    223 			mm->mn = fetch_and_zero(&mn);
    224 		}
    225 	} else if (mm->mn) {
    226 		/*
    227 		 * Someone else raced and successfully installed the mmu
    228 		 * notifier, we can cancel our own errors.
    229 		 */
    230 		err = 0;
    231 	}
    232 	mutex_unlock(&mm->i915->mm_lock);
    233 	up_write(&mm->mm->mmap_sem);
    234 
    235 	if (mn && !IS_ERR(mn)) {
    236 		spin_lock_destroy(&mn->lock);
    237 		kfree(mn);
    238 	}
    239 
    240 	return err ? ERR_PTR(err) : mm->mn;
    241 }
    242 
    243 static int
    244 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
    245 				    unsigned flags)
    246 {
    247 	struct i915_mmu_notifier *mn;
    248 	struct i915_mmu_object *mo;
    249 
    250 	if (flags & I915_USERPTR_UNSYNCHRONIZED)
    251 		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
    252 
    253 	if (WARN_ON(obj->userptr.mm == NULL))
    254 		return -EINVAL;
    255 
    256 	mn = i915_mmu_notifier_find(obj->userptr.mm);
    257 	if (IS_ERR(mn))
    258 		return PTR_ERR(mn);
    259 
    260 	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
    261 	if (!mo)
    262 		return -ENOMEM;
    263 
    264 	mo->mn = mn;
    265 	mo->obj = obj;
    266 	mo->it.start = obj->userptr.ptr;
    267 	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
    268 	RB_CLEAR_NODE(&mo->it.rb);
    269 
    270 	obj->userptr.mmu_object = mo;
    271 	return 0;
    272 }
    273 
    274 static void
    275 #ifdef __NetBSD__
    276 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
    277 		       struct vmspace *mm)
    278 #else
    279 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
    280 		       struct mm_struct *mm)
    281 #endif
    282 {
    283 	if (mn == NULL)
    284 		return;
    285 
    286 	mmu_notifier_unregister(&mn->mn, mm);
    287 	spin_lock_destroy(&mn->lock);
    288 	kfree(mn);
    289 }
    290 
    291 #else
    292 
    293 static void
    294 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
    295 {
    296 }
    297 
    298 static void
    299 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
    300 {
    301 }
    302 
    303 static int
    304 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
    305 				    unsigned flags)
    306 {
    307 	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
    308 		return -ENODEV;
    309 
    310 	if (!capable(CAP_SYS_ADMIN))
    311 		return -EPERM;
    312 
    313 	return 0;
    314 }
    315 
    316 static void
    317 #ifdef __NetBSD__
    318 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
    319 		       struct vmspace *mm)
    320 #else
    321 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
    322 		       struct mm_struct *mm)
    323 #endif
    324 {
    325 }
    326 
    327 #endif
    328 
    329 static struct i915_mm_struct *
    330 #ifdef __NetBSD__
    331 __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct vmspace *real)
    332 #else
    333 __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
    334 #endif
    335 {
    336 	struct i915_mm_struct *mm;
    337 
    338 	/* Protected by dev_priv->mm_lock */
    339 	hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
    340 		if (mm->mm == real)
    341 			return mm;
    342 
    343 	return NULL;
    344 }
    345 
    346 static int
    347 i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
    348 {
    349 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
    350 	struct i915_mm_struct *mm;
    351 	int ret = 0;
    352 
    353 	/* During release of the GEM object we hold the struct_mutex. This
    354 	 * precludes us from calling mmput() at that time as that may be
    355 	 * the last reference and so call exit_mmap(). exit_mmap() will
    356 	 * attempt to reap the vma, and if we were holding a GTT mmap
    357 	 * would then call drm_gem_vm_close() and attempt to reacquire
    358 	 * the struct mutex. So in order to avoid that recursion, we have
    359 	 * to defer releasing the mm reference until after we drop the
    360 	 * struct_mutex, i.e. we need to schedule a worker to do the clean
    361 	 * up.
    362 	 */
    363 	mutex_lock(&dev_priv->mm_lock);
    364 #ifdef __NetBSD__
    365 	mm = __i915_mm_struct_find(dev_priv, curproc->p_vmspace);
    366 #else
    367 	mm = __i915_mm_struct_find(dev_priv, current->mm);
    368 #endif
    369 	if (mm == NULL) {
    370 		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
    371 		if (mm == NULL) {
    372 			ret = -ENOMEM;
    373 			goto out;
    374 		}
    375 
    376 		kref_init(&mm->kref);
    377 		mm->i915 = to_i915(obj->base.dev);
    378 
    379 #ifdef __NetBSD__
    380 		mm->mm = curproc->p_vmspace;
    381 #else
    382 		mm->mm = current->mm;
    383 #endif
    384 		mmgrab(mm->mm);
    385 
    386 		mm->mn = NULL;
    387 
    388 		/* Protected by dev_priv->mm_lock */
    389 		hash_add(dev_priv->mm_structs,
    390 			 &mm->node, (unsigned long)mm->mm);
    391 	} else
    392 		kref_get(&mm->kref);
    393 
    394 	obj->userptr.mm = mm;
    395 out:
    396 	mutex_unlock(&dev_priv->mm_lock);
    397 	return ret;
    398 }
    399 
    400 static void
    401 __i915_mm_struct_free__worker(struct work_struct *work)
    402 {
    403 	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
    404 	i915_mmu_notifier_free(mm->mn, mm->mm);
    405 	mmdrop(mm->mm);
    406 	kfree(mm);
    407 }
    408 
    409 static void
    410 __i915_mm_struct_free(struct kref *kref)
    411 {
    412 	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
    413 
    414 	/* Protected by dev_priv->mm_lock */
    415 	hash_del(&mm->node);
    416 	mutex_unlock(&mm->i915->mm_lock);
    417 
    418 	INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
    419 	queue_work(mm->i915->mm.userptr_wq, &mm->work);
    420 }
    421 
    422 static void
    423 i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
    424 {
    425 	if (obj->userptr.mm == NULL)
    426 		return;
    427 
    428 	kref_put_mutex(&obj->userptr.mm->kref,
    429 		       __i915_mm_struct_free,
    430 		       &to_i915(obj->base.dev)->mm_lock);
    431 	obj->userptr.mm = NULL;
    432 }
    433 
    434 struct get_pages_work {
    435 	struct work_struct work;
    436 	struct drm_i915_gem_object *obj;
    437 	struct task_struct *task;
    438 };
    439 
    440 static struct sg_table *
    441 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
    442 			       struct page **pvec, unsigned long num_pages)
    443 {
    444 	unsigned int max_segment = i915_sg_segment_size();
    445 	struct sg_table *st;
    446 	unsigned int sg_page_sizes;
    447 	int ret;
    448 
    449 	st = kmalloc(sizeof(*st), GFP_KERNEL);
    450 	if (!st)
    451 		return ERR_PTR(-ENOMEM);
    452 
    453 alloc_table:
    454 	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
    455 					  0, num_pages << PAGE_SHIFT,
    456 					  max_segment,
    457 					  GFP_KERNEL);
    458 	if (ret) {
    459 		kfree(st);
    460 		return ERR_PTR(ret);
    461 	}
    462 
    463 	ret = i915_gem_gtt_prepare_pages(obj, st);
    464 	if (ret) {
    465 		sg_free_table(st);
    466 
    467 		if (max_segment > PAGE_SIZE) {
    468 			max_segment = PAGE_SIZE;
    469 			goto alloc_table;
    470 		}
    471 
    472 		kfree(st);
    473 		return ERR_PTR(ret);
    474 	}
    475 
    476 	sg_page_sizes = i915_sg_page_sizes(st->sgl);
    477 
    478 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
    479 
    480 	return st;
    481 }
    482 
    483 static void
    484 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
    485 {
    486 	struct get_pages_work *work = container_of(_work, typeof(*work), work);
    487 	struct drm_i915_gem_object *obj = work->obj;
    488 	const unsigned long npages = obj->base.size >> PAGE_SHIFT;
    489 	unsigned long pinned;
    490 	struct page **pvec;
    491 	int ret;
    492 
    493 	ret = -ENOMEM;
    494 	pinned = 0;
    495 
    496 	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
    497 	if (pvec != NULL) {
    498 #ifdef __NetBSD__
    499 		struct vmspace *mm = obj->userptr.mm->mm;
    500 #else
    501 		struct mm_struct *mm = obj->userptr.mm->mm;
    502 #endif
    503 		unsigned int flags = 0;
    504 		int locked = 0;
    505 
    506 		if (!i915_gem_object_is_readonly(obj))
    507 			flags |= FOLL_WRITE;
    508 
    509 		ret = -EFAULT;
    510 		if (mmget_not_zero(mm)) {
    511 			while (pinned < npages) {
    512 				if (!locked) {
    513 					down_read(&mm->mmap_sem);
    514 					locked = 1;
    515 				}
    516 				ret = get_user_pages_remote
    517 					(work->task, mm,
    518 					 obj->userptr.ptr + pinned * PAGE_SIZE,
    519 					 npages - pinned,
    520 					 flags,
    521 					 pvec + pinned, NULL, &locked);
    522 				if (ret < 0)
    523 					break;
    524 
    525 				pinned += ret;
    526 			}
    527 			if (locked)
    528 				up_read(&mm->mmap_sem);
    529 			mmput(mm);
    530 		}
    531 	}
    532 
    533 	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
    534 	if (obj->userptr.work == &work->work) {
    535 		struct sg_table *pages = ERR_PTR(ret);
    536 
    537 		if (pinned == npages) {
    538 			pages = __i915_gem_userptr_alloc_pages(obj, pvec,
    539 							       npages);
    540 			if (!IS_ERR(pages)) {
    541 				pinned = 0;
    542 				pages = NULL;
    543 			}
    544 		}
    545 
    546 		obj->userptr.work = ERR_CAST(pages);
    547 		if (IS_ERR(pages))
    548 			__i915_gem_userptr_set_active(obj, false);
    549 	}
    550 	mutex_unlock(&obj->mm.lock);
    551 
    552 	release_pages(pvec, pinned);
    553 	kvfree(pvec);
    554 
    555 	i915_gem_object_put(obj);
    556 	put_task_struct(work->task);
    557 	kfree(work);
    558 }
    559 
    560 static struct sg_table *
    561 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
    562 {
    563 	struct get_pages_work *work;
    564 
    565 	/* Spawn a worker so that we can acquire the
    566 	 * user pages without holding our mutex. Access
    567 	 * to the user pages requires mmap_sem, and we have
    568 	 * a strict lock ordering of mmap_sem, struct_mutex -
    569 	 * we already hold struct_mutex here and so cannot
    570 	 * call gup without encountering a lock inversion.
    571 	 *
    572 	 * Userspace will keep on repeating the operation
    573 	 * (thanks to EAGAIN) until either we hit the fast
    574 	 * path or the worker completes. If the worker is
    575 	 * cancelled or superseded, the task is still run
    576 	 * but the results ignored. (This leads to
    577 	 * complications that we may have a stray object
    578 	 * refcount that we need to be wary of when
    579 	 * checking for existing objects during creation.)
    580 	 * If the worker encounters an error, it reports
    581 	 * that error back to this function through
    582 	 * obj->userptr.work = ERR_PTR.
    583 	 */
    584 	work = kmalloc(sizeof(*work), GFP_KERNEL);
    585 	if (work == NULL)
    586 		return ERR_PTR(-ENOMEM);
    587 
    588 	obj->userptr.work = &work->work;
    589 
    590 	work->obj = i915_gem_object_get(obj);
    591 
    592 	work->task = current;
    593 	get_task_struct(work->task);
    594 
    595 	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
    596 	queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
    597 
    598 	return ERR_PTR(-EAGAIN);
    599 }
    600 
    601 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
    602 {
    603 	const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
    604 #ifdef __NetBSD__
    605 	struct vmspace *mm = obj->userptr.mm->mm;
    606 #else
    607 	struct mm_struct *mm = obj->userptr.mm->mm;
    608 #endif
    609 	struct page **pvec;
    610 	struct sg_table *pages;
    611 	bool active;
    612 	int pinned;
    613 
    614 	/* If userspace should engineer that these pages are replaced in
    615 	 * the vma between us binding this page into the GTT and completion
    616 	 * of rendering... Their loss. If they change the mapping of their
    617 	 * pages they need to create a new bo to point to the new vma.
    618 	 *
    619 	 * However, that still leaves open the possibility of the vma
    620 	 * being copied upon fork. Which falls under the same userspace
    621 	 * synchronisation issue as a regular bo, except that this time
    622 	 * the process may not be expecting that a particular piece of
    623 	 * memory is tied to the GPU.
    624 	 *
    625 	 * Fortunately, we can hook into the mmu_notifier in order to
    626 	 * discard the page references prior to anything nasty happening
    627 	 * to the vma (discard or cloning) which should prevent the more
    628 	 * egregious cases from causing harm.
    629 	 */
    630 
    631 	if (obj->userptr.work) {
    632 		/* active flag should still be held for the pending work */
    633 		if (IS_ERR(obj->userptr.work))
    634 			return PTR_ERR(obj->userptr.work);
    635 		else
    636 			return -EAGAIN;
    637 	}
    638 
    639 	pvec = NULL;
    640 	pinned = 0;
    641 
    642 #ifdef __NetBSD__
    643 	if (mm == curproc->p_vmspace)
    644 #else
    645 	if (mm == current->mm)
    646 #endif
    647 	{
    648 		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
    649 				      GFP_KERNEL |
    650 				      __GFP_NORETRY |
    651 				      __GFP_NOWARN);
    652 		if (pvec) /* defer to worker if malloc fails */
    653 			pinned = __get_user_pages_fast(obj->userptr.ptr,
    654 						       num_pages,
    655 						       !i915_gem_object_is_readonly(obj),
    656 						       pvec);
    657 	}
    658 
    659 	active = false;
    660 	if (pinned < 0) {
    661 		pages = ERR_PTR(pinned);
    662 		pinned = 0;
    663 	} else if (pinned < num_pages) {
    664 		pages = __i915_gem_userptr_get_pages_schedule(obj);
    665 		active = pages == ERR_PTR(-EAGAIN);
    666 	} else {
    667 		pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
    668 		active = !IS_ERR(pages);
    669 	}
    670 	if (active)
    671 		__i915_gem_userptr_set_active(obj, true);
    672 
    673 	if (IS_ERR(pages))
    674 		release_pages(pvec, pinned);
    675 	kvfree(pvec);
    676 
    677 	return PTR_ERR_OR_ZERO(pages);
    678 }
    679 
    680 static void
    681 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
    682 			   struct sg_table *pages)
    683 {
    684 	struct sgt_iter sgt_iter;
    685 	struct page *page;
    686 
    687 	/* Cancel any inflight work and force them to restart their gup */
    688 	obj->userptr.work = NULL;
    689 	__i915_gem_userptr_set_active(obj, false);
    690 	if (!pages)
    691 		return;
    692 
    693 	__i915_gem_object_release_shmem(obj, pages, true);
    694 	i915_gem_gtt_finish_pages(obj, pages);
    695 
    696 	/*
    697 	 * We always mark objects as dirty when they are used by the GPU,
    698 	 * just in case. However, if we set the vma as being read-only we know
    699 	 * that the object will never have been written to.
    700 	 */
    701 	if (i915_gem_object_is_readonly(obj))
    702 		obj->mm.dirty = false;
    703 
    704 	for_each_sgt_page(page, sgt_iter, pages) {
    705 		if (obj->mm.dirty && trylock_page(page)) {
    706 			/*
    707 			 * As this may not be anonymous memory (e.g. shmem)
    708 			 * but exist on a real mapping, we have to lock
    709 			 * the page in order to dirty it -- holding
    710 			 * the page reference is not sufficient to
    711 			 * prevent the inode from being truncated.
    712 			 * Play safe and take the lock.
    713 			 *
    714 			 * However...!
    715 			 *
    716 			 * The mmu-notifier can be invalidated for a
    717 			 * migrate_page, that is alreadying holding the lock
    718 			 * on the page. Such a try_to_unmap() will result
    719 			 * in us calling put_pages() and so recursively try
    720 			 * to lock the page. We avoid that deadlock with
    721 			 * a trylock_page() and in exchange we risk missing
    722 			 * some page dirtying.
    723 			 */
    724 			set_page_dirty(page);
    725 			unlock_page(page);
    726 		}
    727 
    728 		mark_page_accessed(page);
    729 		put_page(page);
    730 	}
    731 	obj->mm.dirty = false;
    732 
    733 	sg_free_table(pages);
    734 	kfree(pages);
    735 }
    736 
    737 static void
    738 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
    739 {
    740 	i915_gem_userptr_release__mmu_notifier(obj);
    741 	i915_gem_userptr_release__mm_struct(obj);
    742 }
    743 
    744 static int
    745 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
    746 {
    747 	if (obj->userptr.mmu_object)
    748 		return 0;
    749 
    750 	return i915_gem_userptr_init__mmu_notifier(obj, 0);
    751 }
    752 
    753 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
    754 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
    755 		 I915_GEM_OBJECT_IS_SHRINKABLE |
    756 		 I915_GEM_OBJECT_NO_GGTT |
    757 		 I915_GEM_OBJECT_ASYNC_CANCEL,
    758 	.get_pages = i915_gem_userptr_get_pages,
    759 	.put_pages = i915_gem_userptr_put_pages,
    760 	.dmabuf_export = i915_gem_userptr_dmabuf_export,
    761 	.release = i915_gem_userptr_release,
    762 };
    763 
    764 /*
    765  * Creates a new mm object that wraps some normal memory from the process
    766  * context - user memory.
    767  *
    768  * We impose several restrictions upon the memory being mapped
    769  * into the GPU.
    770  * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
    771  * 2. It must be normal system memory, not a pointer into another map of IO
    772  *    space (e.g. it must not be a GTT mmapping of another object).
    773  * 3. We only allow a bo as large as we could in theory map into the GTT,
    774  *    that is we limit the size to the total size of the GTT.
    775  * 4. The bo is marked as being snoopable. The backing pages are left
    776  *    accessible directly by the CPU, but reads and writes by the GPU may
    777  *    incur the cost of a snoop (unless you have an LLC architecture).
    778  *
    779  * Synchronisation between multiple users and the GPU is left to userspace
    780  * through the normal set-domain-ioctl. The kernel will enforce that the
    781  * GPU relinquishes the VMA before it is returned back to the system
    782  * i.e. upon free(), munmap() or process termination. However, the userspace
    783  * malloc() library may not immediately relinquish the VMA after free() and
    784  * instead reuse it whilst the GPU is still reading and writing to the VMA.
    785  * Caveat emptor.
    786  *
    787  * Also note, that the object created here is not currently a "first class"
    788  * object, in that several ioctls are banned. These are the CPU access
    789  * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
    790  * direct access via your pointer rather than use those ioctls. Another
    791  * restriction is that we do not allow userptr surfaces to be pinned to the
    792  * hardware and so we reject any attempt to create a framebuffer out of a
    793  * userptr.
    794  *
    795  * If you think this is a good interface to use to pass GPU memory between
    796  * drivers, please use dma-buf instead. In fact, wherever possible use
    797  * dma-buf instead.
    798  */
    799 int
    800 i915_gem_userptr_ioctl(struct drm_device *dev,
    801 		       void *data,
    802 		       struct drm_file *file)
    803 {
    804 	static struct lock_class_key lock_class;
    805 	struct drm_i915_private *dev_priv = to_i915(dev);
    806 	struct drm_i915_gem_userptr *args = data;
    807 	struct drm_i915_gem_object *obj;
    808 	int ret;
    809 	u32 handle;
    810 
    811 	if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
    812 		/* We cannot support coherent userptr objects on hw without
    813 		 * LLC and broken snooping.
    814 		 */
    815 		return -ENODEV;
    816 	}
    817 
    818 	if (args->flags & ~(I915_USERPTR_READ_ONLY |
    819 			    I915_USERPTR_UNSYNCHRONIZED))
    820 		return -EINVAL;
    821 
    822 	if (!args->user_size)
    823 		return -EINVAL;
    824 
    825 	if (offset_in_page(args->user_ptr | args->user_size))
    826 		return -EINVAL;
    827 
    828 	if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
    829 		return -EFAULT;
    830 
    831 	if (args->flags & I915_USERPTR_READ_ONLY) {
    832 		/*
    833 		 * On almost all of the older hw, we cannot tell the GPU that
    834 		 * a page is readonly.
    835 		 */
    836 		if (!dev_priv->gt.vm->has_read_only)
    837 			return -ENODEV;
    838 	}
    839 
    840 	obj = i915_gem_object_alloc();
    841 	if (obj == NULL)
    842 		return -ENOMEM;
    843 
    844 	drm_gem_private_object_init(dev, &obj->base, args->user_size);
    845 	i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
    846 	obj->read_domains = I915_GEM_DOMAIN_CPU;
    847 	obj->write_domain = I915_GEM_DOMAIN_CPU;
    848 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
    849 
    850 	obj->userptr.ptr = args->user_ptr;
    851 	if (args->flags & I915_USERPTR_READ_ONLY)
    852 		i915_gem_object_set_readonly(obj);
    853 
    854 	/* And keep a pointer to the current->mm for resolving the user pages
    855 	 * at binding. This means that we need to hook into the mmu_notifier
    856 	 * in order to detect if the mmu is destroyed.
    857 	 */
    858 	ret = i915_gem_userptr_init__mm_struct(obj);
    859 	if (ret == 0)
    860 		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
    861 	if (ret == 0)
    862 		ret = drm_gem_handle_create(file, &obj->base, &handle);
    863 
    864 	/* drop reference from allocate - handle holds it now */
    865 	i915_gem_object_put(obj);
    866 	if (ret)
    867 		return ret;
    868 
    869 	args->handle = handle;
    870 	return 0;
    871 }
    872 
    873 int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
    874 {
    875 	mutex_init(&dev_priv->mm_lock);
    876 	hash_init(dev_priv->mm_structs);
    877 
    878 	dev_priv->mm.userptr_wq =
    879 		alloc_workqueue("i915-userptr-acquire",
    880 				WQ_HIGHPRI | WQ_UNBOUND,
    881 				0);
    882 	if (!dev_priv->mm.userptr_wq)
    883 		return -ENOMEM;
    884 
    885 	return 0;
    886 }
    887 
    888 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
    889 {
    890 	destroy_workqueue(dev_priv->mm.userptr_wq);
    891 	mutex_destroy(&dev_priv->mm_lock);
    892 }
    893