Home | History | Annotate | Line # | Download | only in linux
      1 /*	$NetBSD: linux_dma_resv.c,v 1.22 2022/02/15 22:51:03 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_dma_resv.c,v 1.22 2022/02/15 22:51:03 riastradh Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/poll.h>
     37 #include <sys/select.h>
     38 
     39 #include <linux/dma-fence.h>
     40 #include <linux/dma-resv.h>
     41 #include <linux/seqlock.h>
     42 #include <linux/ww_mutex.h>
     43 
     44 DEFINE_WW_CLASS(reservation_ww_class __cacheline_aligned);
     45 
     46 static struct dma_resv_list *
     47 objlist_tryalloc(uint32_t n)
     48 {
     49 	struct dma_resv_list *list;
     50 
     51 	list = kmem_alloc(offsetof(typeof(*list), shared[n]), KM_NOSLEEP);
     52 	if (list == NULL)
     53 		return NULL;
     54 	list->shared_max = n;
     55 
     56 	return list;
     57 }
     58 
     59 static struct dma_resv_list *
     60 objlist_alloc(uint32_t n)
     61 {
     62 	struct dma_resv_list *list;
     63 
     64 	list = kmem_alloc(offsetof(typeof(*list), shared[n]), KM_SLEEP);
     65 	list->shared_max = n;
     66 
     67 	return list;
     68 }
     69 
     70 static void
     71 objlist_free(struct dma_resv_list *list)
     72 {
     73 	uint32_t n = list->shared_max;
     74 
     75 	kmem_free(list, offsetof(typeof(*list), shared[n]));
     76 }
     77 
     78 static void
     79 objlist_free_cb(struct rcu_head *rcu)
     80 {
     81 	struct dma_resv_list *list = container_of(rcu,
     82 	    struct dma_resv_list, rol_rcu);
     83 
     84 	objlist_free(list);
     85 }
     86 
     87 static void
     88 objlist_defer_free(struct dma_resv_list *list)
     89 {
     90 
     91 	call_rcu(&list->rol_rcu, objlist_free_cb);
     92 }
     93 
     94 /*
     95  * dma_resv_init(robj)
     96  *
     97  *	Initialize a reservation object.  Caller must later destroy it
     98  *	with dma_resv_fini.
     99  */
    100 void
    101 dma_resv_init(struct dma_resv *robj)
    102 {
    103 
    104 	ww_mutex_init(&robj->lock, &reservation_ww_class);
    105 	seqcount_init(&robj->seq);
    106 	robj->fence_excl = NULL;
    107 	robj->fence = NULL;
    108 	robj->robj_prealloc = NULL;
    109 }
    110 
    111 /*
    112  * dma_resv_fini(robj)
    113  *
    114  *	Destroy a reservation object, freeing any memory that had been
    115  *	allocated for it.  Caller must have exclusive access to it.
    116  */
    117 void
    118 dma_resv_fini(struct dma_resv *robj)
    119 {
    120 	unsigned i;
    121 
    122 	if (robj->robj_prealloc) {
    123 		objlist_free(robj->robj_prealloc);
    124 		robj->robj_prealloc = NULL; /* paranoia */
    125 	}
    126 	if (robj->fence) {
    127 		for (i = 0; i < robj->fence->shared_count; i++) {
    128 			dma_fence_put(robj->fence->shared[i]);
    129 			robj->fence->shared[i] = NULL; /* paranoia */
    130 		}
    131 		objlist_free(robj->fence);
    132 		robj->fence = NULL; /* paranoia */
    133 	}
    134 	if (robj->fence_excl) {
    135 		dma_fence_put(robj->fence_excl);
    136 		robj->fence_excl = NULL; /* paranoia */
    137 	}
    138 	ww_mutex_destroy(&robj->lock);
    139 }
    140 
    141 /*
    142  * dma_resv_lock(robj, ctx)
    143  *
    144  *	Acquire a reservation object's lock.  Return 0 on success,
    145  *	-EALREADY if caller already holds it, -EDEADLK if a
    146  *	higher-priority owner holds it and the caller must back out and
    147  *	retry.
    148  */
    149 int
    150 dma_resv_lock(struct dma_resv *robj,
    151     struct ww_acquire_ctx *ctx)
    152 {
    153 
    154 	return ww_mutex_lock(&robj->lock, ctx);
    155 }
    156 
    157 /*
    158  * dma_resv_lock_slow(robj, ctx)
    159  *
    160  *	Acquire a reservation object's lock.  Caller must not hold
    161  *	this lock or any others -- this is to be used in slow paths
    162  *	after dma_resv_lock or dma_resv_lock_interruptible has failed
    163  *	and the caller has backed out all other locks.
    164  */
    165 void
    166 dma_resv_lock_slow(struct dma_resv *robj,
    167     struct ww_acquire_ctx *ctx)
    168 {
    169 
    170 	ww_mutex_lock_slow(&robj->lock, ctx);
    171 }
    172 
    173 /*
    174  * dma_resv_lock_interruptible(robj, ctx)
    175  *
    176  *	Acquire a reservation object's lock.  Return 0 on success,
    177  *	-EALREADY if caller already holds it, -EDEADLK if a
    178  *	higher-priority owner holds it and the caller must back out and
    179  *	retry, -EINTR if interrupted.
    180  */
    181 int
    182 dma_resv_lock_interruptible(struct dma_resv *robj,
    183     struct ww_acquire_ctx *ctx)
    184 {
    185 
    186 	return ww_mutex_lock_interruptible(&robj->lock, ctx);
    187 }
    188 
    189 /*
    190  * dma_resv_lock_slow_interruptible(robj, ctx)
    191  *
    192  *	Acquire a reservation object's lock.  Caller must not hold
    193  *	this lock or any others -- this is to be used in slow paths
    194  *	after dma_resv_lock or dma_resv_lock_interruptible has failed
    195  *	and the caller has backed out all other locks.  Return 0 on
    196  *	success, -EINTR if interrupted.
    197  */
    198 int
    199 dma_resv_lock_slow_interruptible(struct dma_resv *robj,
    200     struct ww_acquire_ctx *ctx)
    201 {
    202 
    203 	return ww_mutex_lock_slow_interruptible(&robj->lock, ctx);
    204 }
    205 
    206 /*
    207  * dma_resv_trylock(robj)
    208  *
    209  *	Try to acquire a reservation object's lock without blocking.
    210  *	Return true on success, false on failure.
    211  */
    212 bool
    213 dma_resv_trylock(struct dma_resv *robj)
    214 {
    215 
    216 	return ww_mutex_trylock(&robj->lock);
    217 }
    218 
    219 /*
    220  * dma_resv_locking_ctx(robj)
    221  *
    222  *	Return a pointer to the ww_acquire_ctx used by the owner of
    223  *	the reservation object's lock, or NULL if it is either not
    224  *	owned or if it is locked without context.
    225  */
    226 struct ww_acquire_ctx *
    227 dma_resv_locking_ctx(struct dma_resv *robj)
    228 {
    229 
    230 	return ww_mutex_locking_ctx(&robj->lock);
    231 }
    232 
    233 /*
    234  * dma_resv_unlock(robj)
    235  *
    236  *	Release a reservation object's lock.
    237  */
    238 void
    239 dma_resv_unlock(struct dma_resv *robj)
    240 {
    241 
    242 	return ww_mutex_unlock(&robj->lock);
    243 }
    244 
    245 /*
    246  * dma_resv_is_locked(robj)
    247  *
    248  *	True if robj is locked.
    249  */
    250 bool
    251 dma_resv_is_locked(struct dma_resv *robj)
    252 {
    253 
    254 	return ww_mutex_is_locked(&robj->lock);
    255 }
    256 
    257 /*
    258  * dma_resv_held(robj)
    259  *
    260  *	True if robj is locked.
    261  */
    262 bool
    263 dma_resv_held(struct dma_resv *robj)
    264 {
    265 
    266 	return ww_mutex_is_locked(&robj->lock);
    267 }
    268 
    269 /*
    270  * dma_resv_assert_held(robj)
    271  *
    272  *	Panic if robj is not held, in DIAGNOSTIC builds.
    273  */
    274 void
    275 dma_resv_assert_held(struct dma_resv *robj)
    276 {
    277 
    278 	KASSERT(dma_resv_held(robj));
    279 }
    280 
    281 /*
    282  * dma_resv_get_excl(robj)
    283  *
    284  *	Return a pointer to the exclusive fence of the reservation
    285  *	object robj.
    286  *
    287  *	Caller must have robj locked.
    288  */
    289 struct dma_fence *
    290 dma_resv_get_excl(struct dma_resv *robj)
    291 {
    292 
    293 	KASSERT(dma_resv_held(robj));
    294 	return robj->fence_excl;
    295 }
    296 
    297 /*
    298  * dma_resv_get_list(robj)
    299  *
    300  *	Return a pointer to the shared fence list of the reservation
    301  *	object robj.
    302  *
    303  *	Caller must have robj locked.
    304  */
    305 struct dma_resv_list *
    306 dma_resv_get_list(struct dma_resv *robj)
    307 {
    308 
    309 	KASSERT(dma_resv_held(robj));
    310 	return robj->fence;
    311 }
    312 
    313 /*
    314  * dma_resv_reserve_shared(robj, num_fences)
    315  *
    316  *	Reserve space in robj to add num_fences shared fences.  To be
    317  *	used only once before calling dma_resv_add_shared_fence.
    318  *
    319  *	Caller must have robj locked.
    320  *
    321  *	Internally, we start with room for four entries and double if
    322  *	we don't have enough.  This is not guaranteed.
    323  */
    324 int
    325 dma_resv_reserve_shared(struct dma_resv *robj, unsigned int num_fences)
    326 {
    327 	struct dma_resv_list *list, *prealloc;
    328 	uint32_t n, nalloc;
    329 
    330 	KASSERT(dma_resv_held(robj));
    331 
    332 	list = robj->fence;
    333 	prealloc = robj->robj_prealloc;
    334 
    335 	/* If there's an existing list, check it for space.  */
    336 	if (list) {
    337 		/* If there's too many already, give up.  */
    338 		if (list->shared_count > UINT32_MAX - num_fences)
    339 			return -ENOMEM;
    340 
    341 		/* Add some more. */
    342 		n = list->shared_count + num_fences;
    343 
    344 		/* If there's enough for one more, we're done.  */
    345 		if (n <= list->shared_max)
    346 			return 0;
    347 	} else {
    348 		/* No list already.  We need space for num_fences.  */
    349 		n = num_fences;
    350 	}
    351 
    352 	/* If not, maybe there's a preallocated list ready.  */
    353 	if (prealloc != NULL) {
    354 		/* If there's enough room in it, stop here.  */
    355 		if (n <= prealloc->shared_max)
    356 			return 0;
    357 
    358 		/* Try to double its capacity.  */
    359 		nalloc = n > UINT32_MAX/2 ? UINT32_MAX : 2*n;
    360 		prealloc = objlist_alloc(nalloc);
    361 
    362 		/* Swap the new preallocated list and free the old one.  */
    363 		objlist_free(robj->robj_prealloc);
    364 		robj->robj_prealloc = prealloc;
    365 	} else {
    366 		/* Start with some spare.  */
    367 		nalloc = n > UINT32_MAX/2 ? UINT32_MAX : MAX(2*n, 4);
    368 		prealloc = objlist_alloc(nalloc);
    369 
    370 		/* Save the new preallocated list.  */
    371 		robj->robj_prealloc = prealloc;
    372 	}
    373 
    374 	/* Success!  */
    375 	return 0;
    376 }
    377 
    378 struct dma_resv_write_ticket {
    379 };
    380 
    381 /*
    382  * dma_resv_write_begin(robj, ticket)
    383  *
    384  *	Begin an atomic batch of writes to robj, and initialize opaque
    385  *	ticket for it.  The ticket must be passed to
    386  *	dma_resv_write_commit to commit the writes.
    387  *
    388  *	Caller must have robj locked.
    389  *
    390  *	Implies membar_producer, i.e. store-before-store barrier.  Does
    391  *	NOT serve as an acquire operation, however.
    392  */
    393 static void
    394 dma_resv_write_begin(struct dma_resv *robj,
    395     struct dma_resv_write_ticket *ticket)
    396 {
    397 
    398 	KASSERT(dma_resv_held(robj));
    399 
    400 	write_seqcount_begin(&robj->seq);
    401 }
    402 
    403 /*
    404  * dma_resv_write_commit(robj, ticket)
    405  *
    406  *	Commit an atomic batch of writes to robj begun with the call to
    407  *	dma_resv_write_begin that returned ticket.
    408  *
    409  *	Caller must have robj locked.
    410  *
    411  *	Implies membar_producer, i.e. store-before-store barrier.  Does
    412  *	NOT serve as a release operation, however.
    413  */
    414 static void
    415 dma_resv_write_commit(struct dma_resv *robj,
    416     struct dma_resv_write_ticket *ticket)
    417 {
    418 
    419 	KASSERT(dma_resv_held(robj));
    420 
    421 	write_seqcount_end(&robj->seq);
    422 }
    423 
    424 struct dma_resv_read_ticket {
    425 	unsigned version;
    426 };
    427 
    428 /*
    429  * dma_resv_read_begin(robj, ticket)
    430  *
    431  *	Begin a read section, and initialize opaque ticket for it.  The
    432  *	ticket must be passed to dma_resv_read_exit, and the
    433  *	caller must be prepared to retry reading if it fails.
    434  */
    435 static void
    436 dma_resv_read_begin(const struct dma_resv *robj,
    437     struct dma_resv_read_ticket *ticket)
    438 {
    439 
    440 	ticket->version = read_seqcount_begin(&robj->seq);
    441 }
    442 
    443 /*
    444  * dma_resv_read_valid(robj, ticket)
    445  *
    446  *	Test whether the read sections are valid.  Return true on
    447  *	success, or false on failure if the read ticket has been
    448  *	invalidated.
    449  */
    450 static bool
    451 dma_resv_read_valid(const struct dma_resv *robj,
    452     struct dma_resv_read_ticket *ticket)
    453 {
    454 
    455 	return !read_seqcount_retry(&robj->seq, ticket->version);
    456 }
    457 
    458 /*
    459  * dma_resv_get_shared_reader(robj, listp, shared_countp, ticket)
    460  *
    461  *	Set *listp and *shared_countp to a snapshot of the pointer to
    462  *	and length of the shared fence list of robj and return true, or
    463  *	set them to NULL/0 and return false if a writer intervened so
    464  *	the caller must start over.
    465  *
    466  *	Both *listp and *shared_countp are unconditionally initialized
    467  *	on return.  They may be NULL/0 even on success, if there is no
    468  *	shared list at the moment.  Does not take any fence references.
    469  */
    470 static bool
    471 dma_resv_get_shared_reader(const struct dma_resv *robj,
    472     const struct dma_resv_list **listp, unsigned *shared_countp,
    473     struct dma_resv_read_ticket *ticket)
    474 {
    475 	struct dma_resv_list *list;
    476 	unsigned shared_count = 0;
    477 
    478 	/*
    479 	 * Get the list and, if it is present, its length.  If the list
    480 	 * is present, it has a valid length.  The atomic_load_consume
    481 	 * pairs with the membar_producer in dma_resv_write_begin.
    482 	 */
    483 	list = atomic_load_consume(&robj->fence);
    484 	shared_count = list ? atomic_load_relaxed(&list->shared_count) : 0;
    485 
    486 	/*
    487 	 * We are done reading from robj and list.  Validate our
    488 	 * parking ticket.  If it's invalid, do not pass go and do not
    489 	 * collect $200.
    490 	 */
    491 	if (!dma_resv_read_valid(robj, ticket))
    492 		goto fail;
    493 
    494 	/* Success!  */
    495 	*listp = list;
    496 	*shared_countp = shared_count;
    497 	return true;
    498 
    499 fail:	*listp = NULL;
    500 	*shared_countp = 0;
    501 	return false;
    502 }
    503 
    504 /*
    505  * dma_resv_get_excl_reader(robj, fencep, ticket)
    506  *
    507  *	Set *fencep to the exclusive fence of robj and return true, or
    508  *	set it to NULL and return false if either
    509  *	(a) a writer intervened, or
    510  *	(b) the fence is scheduled to be destroyed after this RCU grace
    511  *	    period,
    512  *	in either case meaning the caller must restart.
    513  *
    514  *	The value of *fencep is unconditionally initialized on return.
    515  *	It may be NULL, if there is no exclusive fence at the moment.
    516  *	If nonnull, *fencep is referenced; caller must dma_fence_put.
    517  */
    518 static bool
    519 dma_resv_get_excl_reader(const struct dma_resv *robj,
    520     struct dma_fence **fencep,
    521     struct dma_resv_read_ticket *ticket)
    522 {
    523 	struct dma_fence *fence;
    524 
    525 	/*
    526 	 * Get the candidate fence pointer.  The atomic_load_consume
    527 	 * pairs with the membar_consumer in dma_resv_write_begin.
    528 	 */
    529 	fence = atomic_load_consume(&robj->fence_excl);
    530 
    531 	/*
    532 	 * The load of robj->fence_excl is atomic, but the caller may
    533 	 * have previously loaded the shared fence list and should
    534 	 * restart if its view of the entire dma_resv object is not a
    535 	 * consistent snapshot.
    536 	 */
    537 	if (!dma_resv_read_valid(robj, ticket))
    538 		goto fail;
    539 
    540 	/*
    541 	 * If the fence is already scheduled to away after this RCU
    542 	 * read section, give up.  Otherwise, take a reference so it
    543 	 * won't go away until after dma_fence_put.
    544 	 */
    545 	if (fence != NULL &&
    546 	    (fence = dma_fence_get_rcu(fence)) == NULL)
    547 		goto fail;
    548 
    549 	/* Success!  */
    550 	*fencep = fence;
    551 	return true;
    552 
    553 fail:	*fencep = NULL;
    554 	return false;
    555 }
    556 
    557 /*
    558  * dma_resv_add_excl_fence(robj, fence)
    559  *
    560  *	Empty and release all of robj's shared fences, and clear and
    561  *	release its exclusive fence.  If fence is nonnull, acquire a
    562  *	reference to it and save it as robj's exclusive fence.
    563  *
    564  *	Caller must have robj locked.
    565  */
    566 void
    567 dma_resv_add_excl_fence(struct dma_resv *robj,
    568     struct dma_fence *fence)
    569 {
    570 	struct dma_fence *old_fence = robj->fence_excl;
    571 	struct dma_resv_list *old_list = robj->fence;
    572 	uint32_t old_shared_count;
    573 	struct dma_resv_write_ticket ticket;
    574 
    575 	KASSERT(dma_resv_held(robj));
    576 
    577 	/*
    578 	 * If we are setting rather than just removing a fence, acquire
    579 	 * a reference for ourselves.
    580 	 */
    581 	if (fence)
    582 		(void)dma_fence_get(fence);
    583 
    584 	/* If there are any shared fences, remember how many.  */
    585 	if (old_list)
    586 		old_shared_count = old_list->shared_count;
    587 
    588 	/* Begin an update.  Implies membar_producer for fence.  */
    589 	dma_resv_write_begin(robj, &ticket);
    590 
    591 	/* Replace the fence and zero the shared count.  */
    592 	atomic_store_relaxed(&robj->fence_excl, fence);
    593 	if (old_list)
    594 		old_list->shared_count = 0;
    595 
    596 	/* Commit the update.  */
    597 	dma_resv_write_commit(robj, &ticket);
    598 
    599 	/* Release the old exclusive fence, if any.  */
    600 	if (old_fence) {
    601 		dma_fence_put(old_fence);
    602 		old_fence = NULL; /* paranoia */
    603 	}
    604 
    605 	/* Release any old shared fences.  */
    606 	if (old_list) {
    607 		while (old_shared_count--) {
    608 			dma_fence_put(old_list->shared[old_shared_count]);
    609 			/* paranoia */
    610 			old_list->shared[old_shared_count] = NULL;
    611 		}
    612 	}
    613 }
    614 
    615 /*
    616  * dma_resv_add_shared_fence(robj, fence)
    617  *
    618  *	Acquire a reference to fence and add it to robj's shared list.
    619  *	If any fence was already added with the same context number,
    620  *	release it and replace it by this one.
    621  *
    622  *	Caller must have robj locked, and must have preceded with a
    623  *	call to dma_resv_reserve_shared for each shared fence
    624  *	added.
    625  */
    626 void
    627 dma_resv_add_shared_fence(struct dma_resv *robj,
    628     struct dma_fence *fence)
    629 {
    630 	struct dma_resv_list *list = robj->fence;
    631 	struct dma_resv_list *prealloc = robj->robj_prealloc;
    632 	struct dma_resv_write_ticket ticket;
    633 	struct dma_fence *replace = NULL;
    634 	uint32_t i;
    635 
    636 	KASSERT(dma_resv_held(robj));
    637 
    638 	/* Acquire a reference to the fence.  */
    639 	KASSERT(fence != NULL);
    640 	(void)dma_fence_get(fence);
    641 
    642 	/* Check for a preallocated replacement list.  */
    643 	if (prealloc == NULL) {
    644 		/*
    645 		 * If there is no preallocated replacement list, then
    646 		 * there must be room in the current list.
    647 		 */
    648 		KASSERT(list != NULL);
    649 		KASSERT(list->shared_count < list->shared_max);
    650 
    651 		/* Begin an update.  Implies membar_producer for fence.  */
    652 		dma_resv_write_begin(robj, &ticket);
    653 
    654 		/* Find a fence with the same context number.  */
    655 		for (i = 0; i < list->shared_count; i++) {
    656 			if (list->shared[i]->context == fence->context) {
    657 				replace = list->shared[i];
    658 				atomic_store_relaxed(&list->shared[i], fence);
    659 				break;
    660 			}
    661 		}
    662 
    663 		/* If we didn't find one, add it at the end.  */
    664 		if (i == list->shared_count) {
    665 			atomic_store_relaxed(&list->shared[list->shared_count],
    666 			    fence);
    667 			atomic_store_relaxed(&list->shared_count,
    668 			    list->shared_count + 1);
    669 		}
    670 
    671 		/* Commit the update.  */
    672 		dma_resv_write_commit(robj, &ticket);
    673 	} else {
    674 		/*
    675 		 * There is a preallocated replacement list.  There may
    676 		 * not be a current list.  If not, treat it as a zero-
    677 		 * length list.
    678 		 */
    679 		uint32_t shared_count = (list == NULL? 0 : list->shared_count);
    680 
    681 		/* There had better be room in the preallocated list.  */
    682 		KASSERT(shared_count < prealloc->shared_max);
    683 
    684 		/*
    685 		 * Copy the fences over, but replace if we find one
    686 		 * with the same context number.
    687 		 */
    688 		for (i = 0; i < shared_count; i++) {
    689 			if (replace == NULL &&
    690 			    list->shared[i]->context == fence->context) {
    691 				replace = list->shared[i];
    692 				prealloc->shared[i] = fence;
    693 			} else {
    694 				prealloc->shared[i] = list->shared[i];
    695 			}
    696 		}
    697 		prealloc->shared_count = shared_count;
    698 
    699 		/* If we didn't find one, add it at the end.  */
    700 		if (replace == NULL) {
    701 			KASSERT(prealloc->shared_count < prealloc->shared_max);
    702 			prealloc->shared[prealloc->shared_count++] = fence;
    703 		}
    704 
    705 		/*
    706 		 * Now ready to replace the list.  Begin an update.
    707 		 * Implies membar_producer for fence and prealloc.
    708 		 */
    709 		dma_resv_write_begin(robj, &ticket);
    710 
    711 		/* Replace the list.  */
    712 		atomic_store_relaxed(&robj->fence, prealloc);
    713 		robj->robj_prealloc = NULL;
    714 
    715 		/* Commit the update.  */
    716 		dma_resv_write_commit(robj, &ticket);
    717 
    718 		/*
    719 		 * If there is an old list, free it when convenient.
    720 		 * (We are not in a position at this point to sleep
    721 		 * waiting for activity on all CPUs.)
    722 		 */
    723 		if (list)
    724 			objlist_defer_free(list);
    725 	}
    726 
    727 	/* Release a fence if we replaced it.  */
    728 	if (replace) {
    729 		dma_fence_put(replace);
    730 		replace = NULL;	/* paranoia */
    731 	}
    732 }
    733 
    734 /*
    735  * dma_resv_get_excl_rcu(robj)
    736  *
    737  *	Note: Caller need not call this from an RCU read section.
    738  */
    739 struct dma_fence *
    740 dma_resv_get_excl_rcu(const struct dma_resv *robj)
    741 {
    742 	struct dma_fence *fence;
    743 
    744 	rcu_read_lock();
    745 	fence = dma_fence_get_rcu_safe(&robj->fence_excl);
    746 	rcu_read_unlock();
    747 
    748 	return fence;
    749 }
    750 
    751 /*
    752  * dma_resv_get_fences_rcu(robj, fencep, nsharedp, sharedp)
    753  *
    754  *	Get a snapshot of the exclusive and shared fences of robj.  The
    755  *	shared fences are returned as a pointer *sharedp to an array,
    756  *	to be freed by the caller with kfree, of *nsharedp elements.
    757  *	If fencep is null, then add the exclusive fence, if any, at the
    758  *	end of the array instead.
    759  *
    760  *	Returns zero on success, negative (Linux-style) error code on
    761  *	failure.  On failure, *fencep, *nsharedp, and *sharedp are
    762  *	untouched.
    763  */
    764 int
    765 dma_resv_get_fences_rcu(const struct dma_resv *robj,
    766     struct dma_fence **fencep, unsigned *nsharedp, struct dma_fence ***sharedp)
    767 {
    768 	const struct dma_resv_list *list = NULL;
    769 	struct dma_fence *fence = NULL;
    770 	struct dma_fence **shared = NULL;
    771 	unsigned shared_alloc = 0, shared_count, i;
    772 	struct dma_resv_read_ticket ticket;
    773 
    774 top:	KASSERT(fence == NULL);
    775 
    776 	/* Enter an RCU read section and get a read ticket.  */
    777 	rcu_read_lock();
    778 	dma_resv_read_begin(robj, &ticket);
    779 
    780 	/* If there is a shared list, grab it.  */
    781 	if (!dma_resv_get_shared_reader(robj, &list, &shared_count, &ticket))
    782 		goto restart;
    783 	if (list != NULL) {
    784 
    785 		/*
    786 		 * Avoid arithmetic overflow with `+ 1' below.
    787 		 * Strictly speaking we don't need this if the caller
    788 		 * specified fencep or if there is no exclusive fence,
    789 		 * but it is simpler to not have to consider those
    790 		 * cases.
    791 		 */
    792 		KASSERT(shared_count <= list->shared_max);
    793 		if (list->shared_max == UINT_MAX)
    794 			return -ENOMEM;
    795 
    796 		/* Check whether we have a buffer.  */
    797 		if (shared == NULL) {
    798 			/*
    799 			 * We don't have a buffer yet.  Try to allocate
    800 			 * one without waiting.
    801 			 */
    802 			shared_alloc = list->shared_max + 1;
    803 			shared = kcalloc(shared_alloc, sizeof(shared[0]),
    804 			    GFP_NOWAIT);
    805 			if (shared == NULL) {
    806 				/*
    807 				 * Couldn't do it immediately.  Back
    808 				 * out of RCU and allocate one with
    809 				 * waiting.
    810 				 */
    811 				rcu_read_unlock();
    812 				shared = kcalloc(shared_alloc,
    813 				    sizeof(shared[0]), GFP_KERNEL);
    814 				if (shared == NULL)
    815 					return -ENOMEM;
    816 				goto top;
    817 			}
    818 		} else if (shared_alloc < list->shared_max + 1) {
    819 			/*
    820 			 * We have a buffer but it's too small.  We're
    821 			 * already racing in this case, so just back
    822 			 * out and wait to allocate a bigger one.
    823 			 */
    824 			shared_alloc = list->shared_max + 1;
    825 			rcu_read_unlock();
    826 			kfree(shared);
    827 			shared = kcalloc(shared_alloc, sizeof(shared[0]),
    828 			    GFP_KERNEL);
    829 			if (shared == NULL)
    830 				return -ENOMEM;
    831 			goto top;
    832 		}
    833 
    834 		/*
    835 		 * We got a buffer large enough.  Copy into the buffer
    836 		 * and record the number of elements.  Could safely use
    837 		 * memcpy here, because even if we race with a writer
    838 		 * it'll invalidate the read ticket and we'll start
    839 		 * over, but atomic_load in a loop will pacify kcsan.
    840 		 */
    841 		for (i = 0; i < shared_count; i++)
    842 			shared[i] = atomic_load_relaxed(&list->shared[i]);
    843 
    844 		/* If anything changed while we were copying, restart.  */
    845 		if (!dma_resv_read_valid(robj, &ticket))
    846 			goto restart;
    847 	}
    848 
    849 	/* If there is an exclusive fence, grab it.  */
    850 	KASSERT(fence == NULL);
    851 	if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
    852 		goto restart;
    853 
    854 	/*
    855 	 * Try to get a reference to all of the shared fences.
    856 	 */
    857 	for (i = 0; i < shared_count; i++) {
    858 		if (dma_fence_get_rcu(atomic_load_relaxed(&shared[i])) == NULL)
    859 			goto put_restart;
    860 	}
    861 
    862 	/* Success!  */
    863 	rcu_read_unlock();
    864 	KASSERT(shared_count <= shared_alloc);
    865 	KASSERT(shared_alloc == 0 || shared_count < shared_alloc);
    866 	KASSERT(shared_alloc <= UINT_MAX);
    867 	if (fencep) {
    868 		*fencep = fence;
    869 	} else if (fence) {
    870 		if (shared_count) {
    871 			shared[shared_count++] = fence;
    872 		} else {
    873 			shared = kmalloc(sizeof(shared[0]), GFP_KERNEL);
    874 			shared[0] = fence;
    875 			shared_count = 1;
    876 		}
    877 	}
    878 	*nsharedp = shared_count;
    879 	*sharedp = shared;
    880 	return 0;
    881 
    882 put_restart:
    883 	/* Back out.  */
    884 	while (i --> 0) {
    885 		dma_fence_put(shared[i]);
    886 		shared[i] = NULL; /* paranoia */
    887 	}
    888 	if (fence) {
    889 		dma_fence_put(fence);
    890 		fence = NULL;
    891 	}
    892 
    893 restart:
    894 	KASSERT(fence == NULL);
    895 	rcu_read_unlock();
    896 	goto top;
    897 }
    898 
    899 /*
    900  * dma_resv_copy_fences(dst, src)
    901  *
    902  *	Copy the exclusive fence and all the shared fences from src to
    903  *	dst.
    904  *
    905  *	Caller must have dst locked.
    906  */
    907 int
    908 dma_resv_copy_fences(struct dma_resv *dst_robj,
    909     const struct dma_resv *src_robj)
    910 {
    911 	const struct dma_resv_list *src_list;
    912 	struct dma_resv_list *dst_list = NULL;
    913 	struct dma_resv_list *old_list;
    914 	struct dma_fence *fence = NULL;
    915 	struct dma_fence *old_fence;
    916 	uint32_t shared_count, i;
    917 	struct dma_resv_read_ticket read_ticket;
    918 	struct dma_resv_write_ticket write_ticket;
    919 
    920 	KASSERT(dma_resv_held(dst_robj));
    921 
    922 top:	KASSERT(fence == NULL);
    923 
    924 	/* Enter an RCU read section and get a read ticket.  */
    925 	rcu_read_lock();
    926 	dma_resv_read_begin(src_robj, &read_ticket);
    927 
    928 	/* Get the shared list.  */
    929 	if (!dma_resv_get_shared_reader(src_robj, &src_list, &shared_count,
    930 		&read_ticket))
    931 		goto restart;
    932 	if (src_list) {
    933 		/* Allocate a new list, if necessary.  */
    934 		if (dst_list == NULL)
    935 			dst_list = objlist_tryalloc(shared_count);
    936 		if (dst_list == NULL || dst_list->shared_max < shared_count) {
    937 			rcu_read_unlock();
    938 			if (dst_list) {
    939 				objlist_free(dst_list);
    940 				dst_list = NULL;
    941 			}
    942 			dst_list = objlist_alloc(shared_count);
    943 			dst_list->shared_count = 0; /* paranoia */
    944 			goto top;
    945 		}
    946 
    947 		/* Copy over all fences that are not yet signalled.  */
    948 		dst_list->shared_count = 0;
    949 		for (i = 0; i < shared_count; i++) {
    950 			KASSERT(fence == NULL);
    951 			fence = atomic_load_relaxed(&src_list->shared[i]);
    952 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
    953 				goto restart;
    954 			if (dma_fence_is_signaled(fence)) {
    955 				dma_fence_put(fence);
    956 				fence = NULL;
    957 				continue;
    958 			}
    959 			dst_list->shared[dst_list->shared_count++] = fence;
    960 			fence = NULL;
    961 		}
    962 
    963 		/* If anything changed while we were copying, restart.  */
    964 		if (!dma_resv_read_valid(src_robj, &read_ticket))
    965 			goto restart;
    966 	}
    967 
    968 	/* Get the exclusive fence.  */
    969 	KASSERT(fence == NULL);
    970 	if (!dma_resv_get_excl_reader(src_robj, &fence, &read_ticket))
    971 		goto restart;
    972 
    973 	/* All done with src; exit the RCU read section.  */
    974 	rcu_read_unlock();
    975 
    976 	/*
    977 	 * We now have a snapshot of the shared and exclusive fences of
    978 	 * src_robj and we have acquired references to them so they
    979 	 * won't go away.  Transfer them over to dst_robj, releasing
    980 	 * references to any that were there.
    981 	 */
    982 
    983 	/* Get the old shared and exclusive fences, if any.  */
    984 	old_list = dst_robj->fence;
    985 	old_fence = dst_robj->fence_excl;
    986 
    987 	/*
    988 	 * Begin an update.  Implies membar_producer for dst_list and
    989 	 * fence.
    990 	 */
    991 	dma_resv_write_begin(dst_robj, &write_ticket);
    992 
    993 	/* Replace the fences.  */
    994 	atomic_store_relaxed(&dst_robj->fence, dst_list);
    995 	atomic_store_relaxed(&dst_robj->fence_excl, fence);
    996 
    997 	/* Commit the update.  */
    998 	dma_resv_write_commit(dst_robj, &write_ticket);
    999 
   1000 	/* Release the old exclusive fence, if any.  */
   1001 	if (old_fence) {
   1002 		dma_fence_put(old_fence);
   1003 		old_fence = NULL; /* paranoia */
   1004 	}
   1005 
   1006 	/* Release any old shared fences.  */
   1007 	if (old_list) {
   1008 		for (i = old_list->shared_count; i --> 0;) {
   1009 			dma_fence_put(old_list->shared[i]);
   1010 			old_list->shared[i] = NULL; /* paranoia */
   1011 		}
   1012 		objlist_free(old_list);
   1013 		old_list = NULL; /* paranoia */
   1014 	}
   1015 
   1016 	/* Success!  */
   1017 	return 0;
   1018 
   1019 restart:
   1020 	KASSERT(fence == NULL);
   1021 	rcu_read_unlock();
   1022 	if (dst_list) {
   1023 		for (i = dst_list->shared_count; i --> 0;) {
   1024 			dma_fence_put(dst_list->shared[i]);
   1025 			dst_list->shared[i] = NULL; /* paranoia */
   1026 		}
   1027 		/* reuse dst_list allocation for the next attempt */
   1028 	}
   1029 	goto top;
   1030 }
   1031 
   1032 /*
   1033  * dma_resv_test_signaled_rcu(robj, shared)
   1034  *
   1035  *	If shared is true, test whether all of the shared fences are
   1036  *	signalled, or if there are none, test whether the exclusive
   1037  *	fence is signalled.  If shared is false, test only whether the
   1038  *	exclusive fence is signalled.
   1039  *
   1040  *	XXX Why does this _not_ test the exclusive fence if shared is
   1041  *	true only if there are no shared fences?  This makes no sense.
   1042  */
   1043 bool
   1044 dma_resv_test_signaled_rcu(const struct dma_resv *robj,
   1045     bool shared)
   1046 {
   1047 	struct dma_resv_read_ticket ticket;
   1048 	const struct dma_resv_list *list;
   1049 	struct dma_fence *fence = NULL;
   1050 	uint32_t i, shared_count;
   1051 	bool signaled = true;
   1052 
   1053 top:	KASSERT(fence == NULL);
   1054 
   1055 	/* Enter an RCU read section and get a read ticket.  */
   1056 	rcu_read_lock();
   1057 	dma_resv_read_begin(robj, &ticket);
   1058 
   1059 	/* If shared is requested and there is a shared list, test it.  */
   1060 	if (shared) {
   1061 		if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
   1062 			&ticket))
   1063 			goto restart;
   1064 	} else {
   1065 		list = NULL;
   1066 		shared_count = 0;
   1067 	}
   1068 	if (list != NULL) {
   1069 		/*
   1070 		 * For each fence, if it is going away, restart.
   1071 		 * Otherwise, acquire a reference to it to test whether
   1072 		 * it is signalled.  Stop if we find any that is not
   1073 		 * signalled.
   1074 		 */
   1075 		for (i = 0; i < shared_count; i++) {
   1076 			KASSERT(fence == NULL);
   1077 			fence = atomic_load_relaxed(&list->shared[i]);
   1078 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1079 				goto restart;
   1080 			signaled &= dma_fence_is_signaled(fence);
   1081 			dma_fence_put(fence);
   1082 			fence = NULL;
   1083 			if (!signaled)
   1084 				goto out;
   1085 		}
   1086 
   1087 		/* If anything changed while we were testing, restart.  */
   1088 		if (!dma_resv_read_valid(robj, &ticket))
   1089 			goto restart;
   1090 	}
   1091 	if (shared_count)
   1092 		goto out;
   1093 
   1094 	/* If there is an exclusive fence, test it.  */
   1095 	KASSERT(fence == NULL);
   1096 	if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
   1097 		goto restart;
   1098 	if (fence != NULL) {
   1099 		/* Test whether it is signalled.  If no, stop.  */
   1100 		signaled &= dma_fence_is_signaled(fence);
   1101 		dma_fence_put(fence);
   1102 		fence = NULL;
   1103 		if (!signaled)
   1104 			goto out;
   1105 	}
   1106 
   1107 out:	KASSERT(fence == NULL);
   1108 	rcu_read_unlock();
   1109 	return signaled;
   1110 
   1111 restart:
   1112 	KASSERT(fence == NULL);
   1113 	rcu_read_unlock();
   1114 	goto top;
   1115 }
   1116 
   1117 /*
   1118  * dma_resv_wait_timeout_rcu(robj, shared, intr, timeout)
   1119  *
   1120  *	If shared is true, wait for all of the shared fences to be
   1121  *	signalled, or if there are none, wait for the exclusive fence
   1122  *	to be signalled.  If shared is false, wait only for the
   1123  *	exclusive fence to be signalled.  If timeout is zero, don't
   1124  *	wait, only test.
   1125  *
   1126  *	XXX Why does this _not_ wait for the exclusive fence if shared
   1127  *	is true only if there are no shared fences?  This makes no
   1128  *	sense.
   1129  */
   1130 long
   1131 dma_resv_wait_timeout_rcu(const struct dma_resv *robj,
   1132     bool shared, bool intr, unsigned long timeout)
   1133 {
   1134 	struct dma_resv_read_ticket ticket;
   1135 	const struct dma_resv_list *list;
   1136 	struct dma_fence *fence = NULL;
   1137 	uint32_t i, shared_count;
   1138 	long ret;
   1139 
   1140 	if (timeout == 0)
   1141 		return dma_resv_test_signaled_rcu(robj, shared);
   1142 
   1143 top:	KASSERT(fence == NULL);
   1144 
   1145 	/* Enter an RCU read section and get a read ticket.  */
   1146 	rcu_read_lock();
   1147 	dma_resv_read_begin(robj, &ticket);
   1148 
   1149 	/* If shared is requested and there is a shared list, wait on it.  */
   1150 	if (shared) {
   1151 		if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
   1152 			&ticket))
   1153 			goto restart;
   1154 	} else {
   1155 		list = NULL;
   1156 		shared_count = 0;
   1157 	}
   1158 	if (list != NULL) {
   1159 		/*
   1160 		 * For each fence, if it is going away, restart.
   1161 		 * Otherwise, acquire a reference to it to test whether
   1162 		 * it is signalled.  Stop and wait if we find any that
   1163 		 * is not signalled.
   1164 		 */
   1165 		for (i = 0; i < shared_count; i++) {
   1166 			KASSERT(fence == NULL);
   1167 			fence = atomic_load_relaxed(&list->shared[i]);
   1168 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1169 				goto restart;
   1170 			if (!dma_fence_is_signaled(fence))
   1171 				goto wait;
   1172 			dma_fence_put(fence);
   1173 			fence = NULL;
   1174 		}
   1175 
   1176 		/* If anything changed while we were testing, restart.  */
   1177 		if (!dma_resv_read_valid(robj, &ticket))
   1178 			goto restart;
   1179 	}
   1180 	if (shared_count)
   1181 		goto out;
   1182 
   1183 	/* If there is an exclusive fence, test it.  */
   1184 	KASSERT(fence == NULL);
   1185 	if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
   1186 		goto restart;
   1187 	if (fence != NULL) {
   1188 		/* Test whether it is signalled.  If no, wait.  */
   1189 		if (!dma_fence_is_signaled(fence))
   1190 			goto wait;
   1191 		dma_fence_put(fence);
   1192 		fence = NULL;
   1193 	}
   1194 
   1195 out:	/* Success!  Return the number of ticks left.  */
   1196 	rcu_read_unlock();
   1197 	KASSERT(fence == NULL);
   1198 	return timeout;
   1199 
   1200 restart:
   1201 	KASSERT(fence == NULL);
   1202 	rcu_read_unlock();
   1203 	goto top;
   1204 
   1205 wait:
   1206 	/*
   1207 	 * Exit the RCU read section, wait for it, and release the
   1208 	 * fence when we're done.  If we time out or fail, bail.
   1209 	 * Otherwise, go back to the top.
   1210 	 */
   1211 	KASSERT(fence != NULL);
   1212 	rcu_read_unlock();
   1213 	ret = dma_fence_wait_timeout(fence, intr, timeout);
   1214 	dma_fence_put(fence);
   1215 	fence = NULL;
   1216 	if (ret <= 0)
   1217 		return ret;
   1218 	KASSERT(ret <= timeout);
   1219 	timeout = ret;
   1220 	goto top;
   1221 }
   1222 
   1223 /*
   1224  * dma_resv_poll_init(rpoll, lock)
   1225  *
   1226  *	Initialize reservation poll state.
   1227  */
   1228 void
   1229 dma_resv_poll_init(struct dma_resv_poll *rpoll)
   1230 {
   1231 
   1232 	mutex_init(&rpoll->rp_lock, MUTEX_DEFAULT, IPL_VM);
   1233 	selinit(&rpoll->rp_selq);
   1234 	rpoll->rp_claimed = 0;
   1235 }
   1236 
   1237 /*
   1238  * dma_resv_poll_fini(rpoll)
   1239  *
   1240  *	Release any resource associated with reservation poll state.
   1241  */
   1242 void
   1243 dma_resv_poll_fini(struct dma_resv_poll *rpoll)
   1244 {
   1245 
   1246 	KASSERT(rpoll->rp_claimed == 0);
   1247 	seldestroy(&rpoll->rp_selq);
   1248 	mutex_destroy(&rpoll->rp_lock);
   1249 }
   1250 
   1251 /*
   1252  * dma_resv_poll_cb(fence, fcb)
   1253  *
   1254  *	Callback to notify a reservation poll that a fence has
   1255  *	completed.  Notify any waiters and allow the next poller to
   1256  *	claim the callback.
   1257  *
   1258  *	If one thread is waiting for the exclusive fence only, and we
   1259  *	spuriously notify them about a shared fence, tough.
   1260  */
   1261 static void
   1262 dma_resv_poll_cb(struct dma_fence *fence, struct dma_fence_cb *fcb)
   1263 {
   1264 	struct dma_resv_poll *rpoll = container_of(fcb,
   1265 	    struct dma_resv_poll, rp_fcb);
   1266 
   1267 	mutex_enter(&rpoll->rp_lock);
   1268 	selnotify(&rpoll->rp_selq, 0, NOTE_SUBMIT);
   1269 	rpoll->rp_claimed = 0;
   1270 	mutex_exit(&rpoll->rp_lock);
   1271 }
   1272 
   1273 /*
   1274  * dma_resv_do_poll(robj, events, rpoll)
   1275  *
   1276  *	Poll for reservation object events using the reservation poll
   1277  *	state in rpoll:
   1278  *
   1279  *	- POLLOUT	wait for all fences shared and exclusive
   1280  *	- POLLIN	wait for the exclusive fence
   1281  *
   1282  *	Return the subset of events in events that are ready.  If any
   1283  *	are requested but not ready, arrange to be notified with
   1284  *	selnotify when they are.
   1285  */
   1286 int
   1287 dma_resv_do_poll(const struct dma_resv *robj, int events,
   1288     struct dma_resv_poll *rpoll)
   1289 {
   1290 	struct dma_resv_read_ticket ticket;
   1291 	const struct dma_resv_list *list;
   1292 	struct dma_fence *fence = NULL;
   1293 	uint32_t i, shared_count;
   1294 	int revents;
   1295 	bool recorded = false;	/* curlwp is on the selq */
   1296 	bool claimed = false;	/* we claimed the callback */
   1297 	bool callback = false;	/* we requested a callback */
   1298 
   1299 	/*
   1300 	 * Start with the maximal set of events that could be ready.
   1301 	 * We will eliminate the events that are definitely not ready
   1302 	 * as we go at the same time as we add callbacks to notify us
   1303 	 * that they may be ready.
   1304 	 */
   1305 	revents = events & (POLLIN|POLLOUT);
   1306 	if (revents == 0)
   1307 		return 0;
   1308 
   1309 top:	KASSERT(fence == NULL);
   1310 
   1311 	/* Enter an RCU read section and get a read ticket.  */
   1312 	rcu_read_lock();
   1313 	dma_resv_read_begin(robj, &ticket);
   1314 
   1315 	/* If we want to wait for all fences, get the shared list.  */
   1316 	if (events & POLLOUT) {
   1317 		if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
   1318 			&ticket))
   1319 			goto restart;
   1320 	} else {
   1321 		list = NULL;
   1322 		shared_count = 0;
   1323 	}
   1324 	if (list != NULL) do {
   1325 		/*
   1326 		 * For each fence, if it is going away, restart.
   1327 		 * Otherwise, acquire a reference to it to test whether
   1328 		 * it is signalled.  Stop and request a callback if we
   1329 		 * find any that is not signalled.
   1330 		 */
   1331 		for (i = 0; i < shared_count; i++) {
   1332 			KASSERT(fence == NULL);
   1333 			fence = atomic_load_relaxed(&list->shared[i]);
   1334 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1335 				goto restart;
   1336 			if (!dma_fence_is_signaled(fence)) {
   1337 				dma_fence_put(fence);
   1338 				fence = NULL;
   1339 				break;
   1340 			}
   1341 			dma_fence_put(fence);
   1342 			fence = NULL;
   1343 		}
   1344 
   1345 		/* If all shared fences have been signalled, move on.  */
   1346 		if (i == shared_count)
   1347 			break;
   1348 
   1349 		/* Put ourselves on the selq if we haven't already.  */
   1350 		if (!recorded)
   1351 			goto record;
   1352 
   1353 		/*
   1354 		 * If someone else claimed the callback, or we already
   1355 		 * requested it, we're guaranteed to be notified, so
   1356 		 * assume the event is not ready.
   1357 		 */
   1358 		if (!claimed || callback) {
   1359 			revents &= ~POLLOUT;
   1360 			break;
   1361 		}
   1362 
   1363 		/*
   1364 		 * Otherwise, find the first fence that is not
   1365 		 * signalled, request the callback, and clear POLLOUT
   1366 		 * from the possible ready events.  If they are all
   1367 		 * signalled, leave POLLOUT set; we will simulate the
   1368 		 * callback later.
   1369 		 */
   1370 		for (i = 0; i < shared_count; i++) {
   1371 			KASSERT(fence == NULL);
   1372 			fence = atomic_load_relaxed(&list->shared[i]);
   1373 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1374 				goto restart;
   1375 			if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
   1376 				dma_resv_poll_cb)) {
   1377 				dma_fence_put(fence);
   1378 				fence = NULL;
   1379 				revents &= ~POLLOUT;
   1380 				callback = true;
   1381 				break;
   1382 			}
   1383 			dma_fence_put(fence);
   1384 			fence = NULL;
   1385 		}
   1386 	} while (0);
   1387 
   1388 	/* We always wait for at least the exclusive fence, so get it.  */
   1389 	KASSERT(fence == NULL);
   1390 	if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
   1391 		goto restart;
   1392 	if (fence != NULL) do {
   1393 		/*
   1394 		 * Test whether it is signalled.  If not, stop and
   1395 		 * request a callback.
   1396 		 */
   1397 		if (dma_fence_is_signaled(fence))
   1398 			break;
   1399 
   1400 		/* Put ourselves on the selq if we haven't already.  */
   1401 		if (!recorded) {
   1402 			dma_fence_put(fence);
   1403 			fence = NULL;
   1404 			goto record;
   1405 		}
   1406 
   1407 		/*
   1408 		 * If someone else claimed the callback, or we already
   1409 		 * requested it, we're guaranteed to be notified, so
   1410 		 * assume the event is not ready.
   1411 		 */
   1412 		if (!claimed || callback) {
   1413 			revents = 0;
   1414 			break;
   1415 		}
   1416 
   1417 		/*
   1418 		 * Otherwise, try to request the callback, and clear
   1419 		 * all possible ready events.  If the fence has been
   1420 		 * signalled in the interim, leave the events set; we
   1421 		 * will simulate the callback later.
   1422 		 */
   1423 		if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
   1424 			dma_resv_poll_cb)) {
   1425 			revents = 0;
   1426 			callback = true;
   1427 			break;
   1428 		}
   1429 	} while (0);
   1430 	if (fence != NULL) {
   1431 		dma_fence_put(fence);
   1432 		fence = NULL;
   1433 	}
   1434 
   1435 	/* All done reading the fences.  */
   1436 	rcu_read_unlock();
   1437 
   1438 	if (claimed && !callback) {
   1439 		/*
   1440 		 * We claimed the callback but we didn't actually
   1441 		 * request it because a fence was signalled while we
   1442 		 * were claiming it.  Call it ourselves now.  The
   1443 		 * callback doesn't use the fence nor rely on holding
   1444 		 * any of the fence locks, so this is safe.
   1445 		 */
   1446 		dma_resv_poll_cb(NULL, &rpoll->rp_fcb);
   1447 	}
   1448 	return revents;
   1449 
   1450 restart:
   1451 	KASSERT(fence == NULL);
   1452 	rcu_read_unlock();
   1453 	goto top;
   1454 
   1455 record:
   1456 	KASSERT(fence == NULL);
   1457 	rcu_read_unlock();
   1458 	mutex_enter(&rpoll->rp_lock);
   1459 	selrecord(curlwp, &rpoll->rp_selq);
   1460 	if (!rpoll->rp_claimed)
   1461 		claimed = rpoll->rp_claimed = true;
   1462 	mutex_exit(&rpoll->rp_lock);
   1463 	recorded = true;
   1464 	goto top;
   1465 }
   1466 
   1467 /*
   1468  * dma_resv_kqfilter(robj, kn, rpoll)
   1469  *
   1470  *	Kqueue filter for reservation objects.  Currently not
   1471  *	implemented because the logic to implement it is nontrivial,
   1472  *	and userland will presumably never use it, so it would be
   1473  *	dangerous to add never-tested complex code paths to the kernel.
   1474  */
   1475 int
   1476 dma_resv_kqfilter(const struct dma_resv *robj,
   1477     struct knote *kn, struct dma_resv_poll *rpoll)
   1478 {
   1479 
   1480 	return EINVAL;
   1481 }
   1482