Home | History | Annotate | Line # | Download | only in linux
linux_dma_resv.c revision 1.17
      1 /*	$NetBSD: linux_dma_resv.c,v 1.17 2021/12/19 12:31:34 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_dma_resv.c,v 1.17 2021/12/19 12:31:34 riastradh Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/poll.h>
     37 #include <sys/select.h>
     38 
     39 #include <linux/dma-fence.h>
     40 #include <linux/dma-resv.h>
     41 #include <linux/seqlock.h>
     42 #include <linux/ww_mutex.h>
     43 
     44 DEFINE_WW_CLASS(reservation_ww_class __cacheline_aligned);
     45 
     46 static struct dma_resv_list *
     47 objlist_tryalloc(uint32_t n)
     48 {
     49 	struct dma_resv_list *list;
     50 
     51 	list = kmem_alloc(offsetof(typeof(*list), shared[n]), KM_NOSLEEP);
     52 	if (list == NULL)
     53 		return NULL;
     54 	list->shared_max = n;
     55 
     56 	return list;
     57 }
     58 
     59 static void
     60 objlist_free(struct dma_resv_list *list)
     61 {
     62 	uint32_t n = list->shared_max;
     63 
     64 	kmem_free(list, offsetof(typeof(*list), shared[n]));
     65 }
     66 
     67 static void
     68 objlist_free_cb(struct rcu_head *rcu)
     69 {
     70 	struct dma_resv_list *list = container_of(rcu,
     71 	    struct dma_resv_list, rol_rcu);
     72 
     73 	objlist_free(list);
     74 }
     75 
     76 static void
     77 objlist_defer_free(struct dma_resv_list *list)
     78 {
     79 
     80 	call_rcu(&list->rol_rcu, objlist_free_cb);
     81 }
     82 
     83 /*
     84  * dma_resv_init(robj)
     85  *
     86  *	Initialize a reservation object.  Caller must later destroy it
     87  *	with dma_resv_fini.
     88  */
     89 void
     90 dma_resv_init(struct dma_resv *robj)
     91 {
     92 
     93 	ww_mutex_init(&robj->lock, &reservation_ww_class);
     94 	seqcount_init(&robj->seq);
     95 	robj->fence_excl = NULL;
     96 	robj->fence = NULL;
     97 	robj->robj_prealloc = NULL;
     98 }
     99 
    100 /*
    101  * dma_resv_fini(robj)
    102  *
    103  *	Destroy a reservation object, freeing any memory that had been
    104  *	allocated for it.  Caller must have exclusive access to it.
    105  */
    106 void
    107 dma_resv_fini(struct dma_resv *robj)
    108 {
    109 	unsigned i;
    110 
    111 	if (robj->robj_prealloc) {
    112 		objlist_free(robj->robj_prealloc);
    113 		robj->robj_prealloc = NULL; /* paranoia */
    114 	}
    115 	if (robj->fence) {
    116 		for (i = 0; i < robj->fence->shared_count; i++) {
    117 			dma_fence_put(robj->fence->shared[i]);
    118 			robj->fence->shared[i] = NULL; /* paranoia */
    119 		}
    120 		objlist_free(robj->fence);
    121 		robj->fence = NULL; /* paranoia */
    122 	}
    123 	if (robj->fence_excl) {
    124 		dma_fence_put(robj->fence_excl);
    125 		robj->fence_excl = NULL; /* paranoia */
    126 	}
    127 	ww_mutex_destroy(&robj->lock);
    128 }
    129 
    130 /*
    131  * dma_resv_lock(robj, ctx)
    132  *
    133  *	Acquire a reservation object's lock.  Return 0 on success,
    134  *	-EALREADY if caller already holds it, -EDEADLK if a
    135  *	higher-priority owner holds it and the caller must back out and
    136  *	retry.
    137  */
    138 int
    139 dma_resv_lock(struct dma_resv *robj,
    140     struct ww_acquire_ctx *ctx)
    141 {
    142 
    143 	return ww_mutex_lock(&robj->lock, ctx);
    144 }
    145 
    146 /*
    147  * dma_resv_lock_slow(robj, ctx)
    148  *
    149  *	Acquire a reservation object's lock.  Caller must not hold
    150  *	this lock or any others -- this is to be used in slow paths
    151  *	after dma_resv_lock or dma_resv_lock_interruptible has failed
    152  *	and the caller has backed out all other locks.
    153  */
    154 void
    155 dma_resv_lock_slow(struct dma_resv *robj,
    156     struct ww_acquire_ctx *ctx)
    157 {
    158 
    159 	ww_mutex_lock_slow(&robj->lock, ctx);
    160 }
    161 
    162 /*
    163  * dma_resv_lock_interruptible(robj, ctx)
    164  *
    165  *	Acquire a reservation object's lock.  Return 0 on success,
    166  *	-EALREADY if caller already holds it, -EDEADLK if a
    167  *	higher-priority owner holds it and the caller must back out and
    168  *	retry, -ERESTART/-EINTR if interrupted.
    169  */
    170 int
    171 dma_resv_lock_interruptible(struct dma_resv *robj,
    172     struct ww_acquire_ctx *ctx)
    173 {
    174 
    175 	return ww_mutex_lock_interruptible(&robj->lock, ctx);
    176 }
    177 
    178 /*
    179  * dma_resv_lock_slow_interruptible(robj, ctx)
    180  *
    181  *	Acquire a reservation object's lock.  Caller must not hold
    182  *	this lock or any others -- this is to be used in slow paths
    183  *	after dma_resv_lock or dma_resv_lock_interruptible has failed
    184  *	and the caller has backed out all other locks.  Return 0 on
    185  *	success, -ERESTART/-EINTR if interrupted.
    186  */
    187 int
    188 dma_resv_lock_slow_interruptible(struct dma_resv *robj,
    189     struct ww_acquire_ctx *ctx)
    190 {
    191 
    192 	return ww_mutex_lock_slow_interruptible(&robj->lock, ctx);
    193 }
    194 
    195 /*
    196  * dma_resv_trylock(robj)
    197  *
    198  *	Try to acquire a reservation object's lock without blocking.
    199  *	Return true on success, false on failure.
    200  */
    201 bool
    202 dma_resv_trylock(struct dma_resv *robj)
    203 {
    204 
    205 	return ww_mutex_trylock(&robj->lock);
    206 }
    207 
    208 /*
    209  * dma_resv_locking_ctx(robj)
    210  *
    211  *	Return a pointer to the ww_acquire_ctx used by the owner of
    212  *	the reservation object's lock, or NULL if it is either not
    213  *	owned or if it is locked without context.
    214  */
    215 struct ww_acquire_ctx *
    216 dma_resv_locking_ctx(struct dma_resv *robj)
    217 {
    218 
    219 	return ww_mutex_locking_ctx(&robj->lock);
    220 }
    221 
    222 /*
    223  * dma_resv_unlock(robj)
    224  *
    225  *	Release a reservation object's lock.
    226  */
    227 void
    228 dma_resv_unlock(struct dma_resv *robj)
    229 {
    230 
    231 	return ww_mutex_unlock(&robj->lock);
    232 }
    233 
    234 /*
    235  * dma_resv_is_locked(robj)
    236  *
    237  *	True if robj is locked.
    238  */
    239 bool
    240 dma_resv_is_locked(struct dma_resv *robj)
    241 {
    242 
    243 	return ww_mutex_is_locked(&robj->lock);
    244 }
    245 
    246 /*
    247  * dma_resv_held(robj)
    248  *
    249  *	True if robj is locked.
    250  */
    251 bool
    252 dma_resv_held(struct dma_resv *robj)
    253 {
    254 
    255 	return ww_mutex_is_locked(&robj->lock);
    256 }
    257 
    258 /*
    259  * dma_resv_assert_held(robj)
    260  *
    261  *	Panic if robj is not held, in DIAGNOSTIC builds.
    262  */
    263 void
    264 dma_resv_assert_held(struct dma_resv *robj)
    265 {
    266 
    267 	KASSERT(dma_resv_held(robj));
    268 }
    269 
    270 /*
    271  * dma_resv_get_excl(robj)
    272  *
    273  *	Return a pointer to the exclusive fence of the reservation
    274  *	object robj.
    275  *
    276  *	Caller must have robj locked.
    277  */
    278 struct dma_fence *
    279 dma_resv_get_excl(struct dma_resv *robj)
    280 {
    281 
    282 	KASSERT(dma_resv_held(robj));
    283 	return robj->fence_excl;
    284 }
    285 
    286 /*
    287  * dma_resv_get_list(robj)
    288  *
    289  *	Return a pointer to the shared fence list of the reservation
    290  *	object robj.
    291  *
    292  *	Caller must have robj locked.
    293  */
    294 struct dma_resv_list *
    295 dma_resv_get_list(struct dma_resv *robj)
    296 {
    297 
    298 	KASSERT(dma_resv_held(robj));
    299 	return robj->fence;
    300 }
    301 
    302 /*
    303  * dma_resv_reserve_shared(robj)
    304  *
    305  *	Reserve space in robj to add a shared fence.  To be used only
    306  *	once before calling dma_resv_add_shared_fence.
    307  *
    308  *	Caller must have robj locked.
    309  *
    310  *	Internally, we start with room for four entries and double if
    311  *	we don't have enough.  This is not guaranteed.
    312  */
    313 int
    314 dma_resv_reserve_shared(struct dma_resv *robj, unsigned int num_fences)
    315 {
    316 	struct dma_resv_list *list, *prealloc;
    317 	uint32_t n, nalloc;
    318 
    319 	KASSERT(dma_resv_held(robj));
    320 	KASSERT(num_fences == 1);
    321 
    322 	list = robj->fence;
    323 	prealloc = robj->robj_prealloc;
    324 
    325 	/* If there's an existing list, check it for space.  */
    326 	if (list) {
    327 		/* If there's too many already, give up.  */
    328 		if (list->shared_count == UINT32_MAX)
    329 			return -ENOMEM;
    330 
    331 		/* Add one more. */
    332 		n = list->shared_count + 1;
    333 
    334 		/* If there's enough for one more, we're done.  */
    335 		if (n <= list->shared_max)
    336 			return 0;
    337 	} else {
    338 		/* No list already.  We need space for 1.  */
    339 		n = 1;
    340 	}
    341 
    342 	/* If not, maybe there's a preallocated list ready.  */
    343 	if (prealloc != NULL) {
    344 		/* If there's enough room in it, stop here.  */
    345 		if (n <= prealloc->shared_max)
    346 			return 0;
    347 
    348 		/* Try to double its capacity.  */
    349 		nalloc = n > UINT32_MAX/2 ? UINT32_MAX : 2*n;
    350 		prealloc = objlist_tryalloc(nalloc);
    351 		if (prealloc == NULL)
    352 			return -ENOMEM;
    353 
    354 		/* Swap the new preallocated list and free the old one.  */
    355 		objlist_free(robj->robj_prealloc);
    356 		robj->robj_prealloc = prealloc;
    357 	} else {
    358 		/* Start with some spare.  */
    359 		nalloc = n > UINT32_MAX/2 ? UINT32_MAX : MAX(2*n, 4);
    360 		prealloc = objlist_tryalloc(nalloc);
    361 		if (prealloc == NULL)
    362 			return -ENOMEM;
    363 		/* Save the new preallocated list.  */
    364 		robj->robj_prealloc = prealloc;
    365 	}
    366 
    367 	/* Success!  */
    368 	return 0;
    369 }
    370 
    371 struct dma_resv_write_ticket {
    372 };
    373 
    374 /*
    375  * dma_resv_write_begin(robj, ticket)
    376  *
    377  *	Begin an atomic batch of writes to robj, and initialize opaque
    378  *	ticket for it.  The ticket must be passed to
    379  *	dma_resv_write_commit to commit the writes.
    380  *
    381  *	Caller must have robj locked.
    382  *
    383  *	Implies membar_producer, i.e. store-before-store barrier.  Does
    384  *	NOT serve as an acquire operation, however.
    385  */
    386 static void
    387 dma_resv_write_begin(struct dma_resv *robj,
    388     struct dma_resv_write_ticket *ticket)
    389 {
    390 
    391 	KASSERT(dma_resv_held(robj));
    392 
    393 	write_seqcount_begin(&robj->seq);
    394 }
    395 
    396 /*
    397  * dma_resv_write_commit(robj, ticket)
    398  *
    399  *	Commit an atomic batch of writes to robj begun with the call to
    400  *	dma_resv_write_begin that returned ticket.
    401  *
    402  *	Caller must have robj locked.
    403  *
    404  *	Implies membar_producer, i.e. store-before-store barrier.  Does
    405  *	NOT serve as a release operation, however.
    406  */
    407 static void
    408 dma_resv_write_commit(struct dma_resv *robj,
    409     struct dma_resv_write_ticket *ticket)
    410 {
    411 
    412 	KASSERT(dma_resv_held(robj));
    413 
    414 	write_seqcount_end(&robj->seq);
    415 }
    416 
    417 struct dma_resv_read_ticket {
    418 	unsigned version;
    419 };
    420 
    421 /*
    422  * dma_resv_read_begin(robj, ticket)
    423  *
    424  *	Begin a read section, and initialize opaque ticket for it.  The
    425  *	ticket must be passed to dma_resv_read_exit, and the
    426  *	caller must be prepared to retry reading if it fails.
    427  */
    428 static void
    429 dma_resv_read_begin(const struct dma_resv *robj,
    430     struct dma_resv_read_ticket *ticket)
    431 {
    432 
    433 	ticket->version = read_seqcount_begin(&robj->seq);
    434 }
    435 
    436 /*
    437  * dma_resv_read_valid(robj, ticket)
    438  *
    439  *	Test whether the read sections are valid.  Return true on
    440  *	success, or false on failure if the read ticket has been
    441  *	invalidated.
    442  */
    443 static bool
    444 dma_resv_read_valid(const struct dma_resv *robj,
    445     struct dma_resv_read_ticket *ticket)
    446 {
    447 
    448 	return !read_seqcount_retry(&robj->seq, ticket->version);
    449 }
    450 
    451 /*
    452  * dma_resv_get_shared_reader(robj, listp, shared_countp, ticket)
    453  *
    454  *	Set *listp and *shared_countp to a snapshot of the pointer to
    455  *	and length of the shared fence list of robj and return true, or
    456  *	set them to NULL/0 and return false if a writer intervened so
    457  *	the caller must start over.
    458  *
    459  *	Both *listp and *shared_countp are unconditionally initialized
    460  *	on return.  They may be NULL/0 even on success, if there is no
    461  *	shared list at the moment.  Does not take any fence references.
    462  */
    463 static bool
    464 dma_resv_get_shared_reader(const struct dma_resv *robj,
    465     const struct dma_resv_list **listp, unsigned *shared_countp,
    466     struct dma_resv_read_ticket *ticket)
    467 {
    468 	struct dma_resv_list *list;
    469 	unsigned shared_count = 0;
    470 
    471 	/*
    472 	 * Get the list and, if it is present, its length.  If the list
    473 	 * is present, it has a valid length.  The atomic_load_consume
    474 	 * pairs with the membar_producer in dma_resv_write_begin.
    475 	 */
    476 	list = atomic_load_consume(&robj->fence);
    477 	shared_count = list ? atomic_load_relaxed(&list->shared_count) : 0;
    478 
    479 	/*
    480 	 * We are done reading from robj and list.  Validate our
    481 	 * parking ticket.  If it's invalid, do not pass go and do not
    482 	 * collect $200.
    483 	 */
    484 	if (!dma_resv_read_valid(robj, ticket))
    485 		goto fail;
    486 
    487 	/* Success!  */
    488 	*listp = list;
    489 	*shared_countp = shared_count;
    490 	return true;
    491 
    492 fail:	*listp = NULL;
    493 	*shared_countp = 0;
    494 	return false;
    495 }
    496 
    497 /*
    498  * dma_resv_get_excl_reader(robj, fencep, ticket)
    499  *
    500  *	Set *fencep to the exclusive fence of robj and return true, or
    501  *	set it to NULL and return false if either
    502  *	(a) a writer intervened, or
    503  *	(b) the fence is scheduled to be destroyed after this RCU grace
    504  *	    period,
    505  *	in either case meaning the caller must restart.
    506  *
    507  *	The value of *fencep is unconditionally initialized on return.
    508  *	It may be NULL, if there is no exclusive fence at the moment.
    509  *	If nonnull, *fencep is referenced; caller must dma_fence_put.
    510  */
    511 static bool
    512 dma_resv_get_excl_reader(const struct dma_resv *robj,
    513     struct dma_fence **fencep,
    514     struct dma_resv_read_ticket *ticket)
    515 {
    516 	struct dma_fence *fence;
    517 
    518 	/*
    519 	 * Get the candidate fence pointer.  The atomic_load_consume
    520 	 * pairs with the membar_consumer in dma_resv_write_begin.
    521 	 */
    522 	fence = atomic_load_consume(&robj->fence_excl);
    523 
    524 	/*
    525 	 * The load of robj->fence_excl is atomic, but the caller may
    526 	 * have previously loaded the shared fence list and should
    527 	 * restart if its view of the entire dma_resv object is not a
    528 	 * consistent snapshot.
    529 	 */
    530 	if (!dma_resv_read_valid(robj, ticket))
    531 		goto fail;
    532 
    533 	/*
    534 	 * If the fence is already scheduled to away after this RCU
    535 	 * read section, give up.  Otherwise, take a reference so it
    536 	 * won't go away until after dma_fence_put.
    537 	 */
    538 	if (fence != NULL &&
    539 	    (fence = dma_fence_get_rcu(fence)) == NULL)
    540 		goto fail;
    541 
    542 	/* Success!  */
    543 	*fencep = fence;
    544 	return true;
    545 
    546 fail:	*fencep = NULL;
    547 	return false;
    548 }
    549 
    550 /*
    551  * dma_resv_add_excl_fence(robj, fence)
    552  *
    553  *	Empty and release all of robj's shared fences, and clear and
    554  *	release its exclusive fence.  If fence is nonnull, acquire a
    555  *	reference to it and save it as robj's exclusive fence.
    556  *
    557  *	Caller must have robj locked.
    558  */
    559 void
    560 dma_resv_add_excl_fence(struct dma_resv *robj,
    561     struct dma_fence *fence)
    562 {
    563 	struct dma_fence *old_fence = robj->fence_excl;
    564 	struct dma_resv_list *old_list = robj->fence;
    565 	uint32_t old_shared_count;
    566 	struct dma_resv_write_ticket ticket;
    567 
    568 	KASSERT(dma_resv_held(robj));
    569 
    570 	/*
    571 	 * If we are setting rather than just removing a fence, acquire
    572 	 * a reference for ourselves.
    573 	 */
    574 	if (fence)
    575 		(void)dma_fence_get(fence);
    576 
    577 	/* If there are any shared fences, remember how many.  */
    578 	if (old_list)
    579 		old_shared_count = old_list->shared_count;
    580 
    581 	/* Begin an update.  Implies membar_producer for fence.  */
    582 	dma_resv_write_begin(robj, &ticket);
    583 
    584 	/* Replace the fence and zero the shared count.  */
    585 	atomic_store_relaxed(&robj->fence_excl, fence);
    586 	if (old_list)
    587 		old_list->shared_count = 0;
    588 
    589 	/* Commit the update.  */
    590 	dma_resv_write_commit(robj, &ticket);
    591 
    592 	/* Release the old exclusive fence, if any.  */
    593 	if (old_fence) {
    594 		dma_fence_put(old_fence);
    595 		old_fence = NULL; /* paranoia */
    596 	}
    597 
    598 	/* Release any old shared fences.  */
    599 	if (old_list) {
    600 		while (old_shared_count--) {
    601 			dma_fence_put(old_list->shared[old_shared_count]);
    602 			/* paranoia */
    603 			old_list->shared[old_shared_count] = NULL;
    604 		}
    605 	}
    606 }
    607 
    608 /*
    609  * dma_resv_add_shared_fence(robj, fence)
    610  *
    611  *	Acquire a reference to fence and add it to robj's shared list.
    612  *	If any fence was already added with the same context number,
    613  *	release it and replace it by this one.
    614  *
    615  *	Caller must have robj locked, and must have preceded with a
    616  *	call to dma_resv_reserve_shared for each shared fence
    617  *	added.
    618  */
    619 void
    620 dma_resv_add_shared_fence(struct dma_resv *robj,
    621     struct dma_fence *fence)
    622 {
    623 	struct dma_resv_list *list = robj->fence;
    624 	struct dma_resv_list *prealloc = robj->robj_prealloc;
    625 	struct dma_resv_write_ticket ticket;
    626 	struct dma_fence *replace = NULL;
    627 	uint32_t i;
    628 
    629 	KASSERT(dma_resv_held(robj));
    630 
    631 	/* Acquire a reference to the fence.  */
    632 	KASSERT(fence != NULL);
    633 	(void)dma_fence_get(fence);
    634 
    635 	/* Check for a preallocated replacement list.  */
    636 	if (prealloc == NULL) {
    637 		/*
    638 		 * If there is no preallocated replacement list, then
    639 		 * there must be room in the current list.
    640 		 */
    641 		KASSERT(list != NULL);
    642 		KASSERT(list->shared_count < list->shared_max);
    643 
    644 		/* Begin an update.  Implies membar_producer for fence.  */
    645 		dma_resv_write_begin(robj, &ticket);
    646 
    647 		/* Find a fence with the same context number.  */
    648 		for (i = 0; i < list->shared_count; i++) {
    649 			if (list->shared[i]->context == fence->context) {
    650 				replace = list->shared[i];
    651 				atomic_store_relaxed(&list->shared[i], fence);
    652 				break;
    653 			}
    654 		}
    655 
    656 		/* If we didn't find one, add it at the end.  */
    657 		if (i == list->shared_count) {
    658 			atomic_store_relaxed(&list->shared[list->shared_count],
    659 			    fence);
    660 			atomic_store_relaxed(&list->shared_count,
    661 			    list->shared_count + 1);
    662 		}
    663 
    664 		/* Commit the update.  */
    665 		dma_resv_write_commit(robj, &ticket);
    666 	} else {
    667 		/*
    668 		 * There is a preallocated replacement list.  There may
    669 		 * not be a current list.  If not, treat it as a zero-
    670 		 * length list.
    671 		 */
    672 		uint32_t shared_count = (list == NULL? 0 : list->shared_count);
    673 
    674 		/* There had better be room in the preallocated list.  */
    675 		KASSERT(shared_count < prealloc->shared_max);
    676 
    677 		/*
    678 		 * Copy the fences over, but replace if we find one
    679 		 * with the same context number.
    680 		 */
    681 		for (i = 0; i < shared_count; i++) {
    682 			if (replace == NULL &&
    683 			    list->shared[i]->context == fence->context) {
    684 				replace = list->shared[i];
    685 				prealloc->shared[i] = fence;
    686 			} else {
    687 				prealloc->shared[i] = list->shared[i];
    688 			}
    689 		}
    690 		prealloc->shared_count = shared_count;
    691 
    692 		/* If we didn't find one, add it at the end.  */
    693 		if (replace == NULL)
    694 			prealloc->shared[prealloc->shared_count++] = fence;
    695 
    696 		/*
    697 		 * Now ready to replace the list.  Begin an update.
    698 		 * Implies membar_producer for fence and prealloc.
    699 		 */
    700 		dma_resv_write_begin(robj, &ticket);
    701 
    702 		/* Replace the list.  */
    703 		atomic_store_relaxed(&robj->fence, prealloc);
    704 		robj->robj_prealloc = NULL;
    705 
    706 		/* Commit the update.  */
    707 		dma_resv_write_commit(robj, &ticket);
    708 
    709 		/*
    710 		 * If there is an old list, free it when convenient.
    711 		 * (We are not in a position at this point to sleep
    712 		 * waiting for activity on all CPUs.)
    713 		 */
    714 		if (list)
    715 			objlist_defer_free(list);
    716 	}
    717 
    718 	/* Release a fence if we replaced it.  */
    719 	if (replace) {
    720 		dma_fence_put(replace);
    721 		replace = NULL;	/* paranoia */
    722 	}
    723 }
    724 
    725 /*
    726  * dma_resv_get_excl_rcu(robj)
    727  *
    728  *	Note: Caller need not call this from an RCU read section.
    729  */
    730 struct dma_fence *
    731 dma_resv_get_excl_rcu(const struct dma_resv *robj)
    732 {
    733 	struct dma_fence *fence;
    734 
    735 	rcu_read_lock();
    736 	fence = dma_fence_get_rcu_safe(&robj->fence_excl);
    737 	rcu_read_unlock();
    738 
    739 	return fence;
    740 }
    741 
    742 /*
    743  * dma_resv_get_fences_rcu(robj, fencep, nsharedp, sharedp)
    744  *
    745  *	Get a snapshot of the exclusive and shared fences of robj.  The
    746  *	shared fences are returned as a pointer *sharedp to an array,
    747  *	to be freed by the caller with kfree, of *nsharedp elements.
    748  *	If fencep is null, then add the exclusive fence, if any, at the
    749  *	end of the array instead.
    750  *
    751  *	Returns zero on success, negative (Linux-style) error code on
    752  *	failure.  On failure, *fencep, *nsharedp, and *sharedp are
    753  *	untouched.
    754  */
    755 int
    756 dma_resv_get_fences_rcu(const struct dma_resv *robj,
    757     struct dma_fence **fencep, unsigned *nsharedp, struct dma_fence ***sharedp)
    758 {
    759 	const struct dma_resv_list *list = NULL;
    760 	struct dma_fence *fence = NULL;
    761 	struct dma_fence **shared = NULL;
    762 	unsigned shared_alloc, shared_count, i;
    763 	struct dma_resv_read_ticket ticket;
    764 
    765 top:	KASSERT(fence == NULL);
    766 
    767 	/* Enter an RCU read section and get a read ticket.  */
    768 	rcu_read_lock();
    769 	dma_resv_read_begin(robj, &ticket);
    770 
    771 	/* If there is a shared list, grab it.  */
    772 	if (!dma_resv_get_shared_reader(robj, &list, &shared_count, &ticket))
    773 		goto restart;
    774 	if (list != NULL) {
    775 
    776 		/*
    777 		 * Avoid arithmetic overflow with `+ 1' below.
    778 		 * Strictly speaking we don't need this if the caller
    779 		 * specified fencep or if there is no exclusive fence,
    780 		 * but it is simpler to not have to consider those
    781 		 * cases.
    782 		 */
    783 		KASSERT(shared_count <= list->shared_max);
    784 		if (list->shared_max == UINT_MAX)
    785 			return -ENOMEM;
    786 
    787 		/* Check whether we have a buffer.  */
    788 		if (shared == NULL) {
    789 			/*
    790 			 * We don't have a buffer yet.  Try to allocate
    791 			 * one without waiting.
    792 			 */
    793 			shared_alloc = list->shared_max + 1;
    794 			shared = kcalloc(shared_alloc, sizeof(shared[0]),
    795 			    GFP_NOWAIT);
    796 			if (shared == NULL) {
    797 				/*
    798 				 * Couldn't do it immediately.  Back
    799 				 * out of RCU and allocate one with
    800 				 * waiting.
    801 				 */
    802 				rcu_read_unlock();
    803 				shared = kcalloc(shared_alloc,
    804 				    sizeof(shared[0]), GFP_KERNEL);
    805 				if (shared == NULL)
    806 					return -ENOMEM;
    807 				goto top;
    808 			}
    809 		} else if (shared_alloc < list->shared_max + 1) {
    810 			/*
    811 			 * We have a buffer but it's too small.  We're
    812 			 * already racing in this case, so just back
    813 			 * out and wait to allocate a bigger one.
    814 			 */
    815 			shared_alloc = list->shared_max + 1;
    816 			rcu_read_unlock();
    817 			kfree(shared);
    818 			shared = kcalloc(shared_alloc, sizeof(shared[0]),
    819 			    GFP_KERNEL);
    820 			if (shared == NULL)
    821 				return -ENOMEM;
    822 		}
    823 
    824 		/*
    825 		 * We got a buffer large enough.  Copy into the buffer
    826 		 * and record the number of elements.  Could safely use
    827 		 * memcpy here, because even if we race with a writer
    828 		 * it'll invalidate the read ticket and we'll start
    829 		 * over, but atomic_load in a loop will pacify kcsan.
    830 		 */
    831 		for (i = 0; i < shared_count; i++)
    832 			shared[i] = atomic_load_relaxed(&list->shared[i]);
    833 
    834 		/* If anything changed while we were copying, restart.  */
    835 		if (!dma_resv_read_valid(robj, &ticket))
    836 			goto restart;
    837 	}
    838 
    839 	/* If there is an exclusive fence, grab it.  */
    840 	KASSERT(fence == NULL);
    841 	if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
    842 		goto restart;
    843 
    844 	/*
    845 	 * Try to get a reference to all of the shared fences.
    846 	 */
    847 	for (i = 0; i < shared_count; i++) {
    848 		if (dma_fence_get_rcu(atomic_load_relaxed(&shared[i])) == NULL)
    849 			goto put_restart;
    850 	}
    851 
    852 	/* Success!  */
    853 	rcu_read_unlock();
    854 	if (fencep) {
    855 		*fencep = fence;
    856 	} else if (fence) {
    857 		KASSERT(shared_count < UINT_MAX);
    858 		shared[shared_count++] = fence;
    859 	}
    860 	*nsharedp = shared_count;
    861 	*sharedp = shared;
    862 	return 0;
    863 
    864 put_restart:
    865 	/* Back out.  */
    866 	while (i --> 0) {
    867 		dma_fence_put(shared[i]);
    868 		shared[i] = NULL; /* paranoia */
    869 	}
    870 	if (fence) {
    871 		dma_fence_put(fence);
    872 		fence = NULL;
    873 	}
    874 
    875 restart:
    876 	KASSERT(fence == NULL);
    877 	rcu_read_unlock();
    878 	goto top;
    879 }
    880 
    881 /*
    882  * dma_resv_copy_fences(dst, src)
    883  *
    884  *	Copy the exclusive fence and all the shared fences from src to
    885  *	dst.
    886  *
    887  *	Caller must have dst locked.
    888  */
    889 int
    890 dma_resv_copy_fences(struct dma_resv *dst_robj,
    891     const struct dma_resv *src_robj)
    892 {
    893 	const struct dma_resv_list *src_list;
    894 	struct dma_resv_list *dst_list = NULL;
    895 	struct dma_resv_list *old_list;
    896 	struct dma_fence *fence = NULL;
    897 	struct dma_fence *old_fence;
    898 	uint32_t shared_count, i;
    899 	struct dma_resv_read_ticket read_ticket;
    900 	struct dma_resv_write_ticket write_ticket;
    901 
    902 	KASSERT(dma_resv_held(dst_robj));
    903 
    904 top:	KASSERT(fence == NULL);
    905 
    906 	/* Enter an RCU read section and get a read ticket.  */
    907 	rcu_read_lock();
    908 	dma_resv_read_begin(src_robj, &read_ticket);
    909 
    910 	/* Get the shared list.  */
    911 	if (!dma_resv_get_shared_reader(src_robj, &src_list, &shared_count,
    912 		&read_ticket))
    913 		goto restart;
    914 	if (src_list != NULL) {
    915 		/* Allocate a new list.  */
    916 		dst_list = objlist_tryalloc(shared_count);
    917 		if (dst_list == NULL)
    918 			return -ENOMEM;
    919 
    920 		/* Copy over all fences that are not yet signalled.  */
    921 		dst_list->shared_count = 0;
    922 		for (i = 0; i < shared_count; i++) {
    923 			KASSERT(fence == NULL);
    924 			fence = atomic_load_relaxed(&src_list->shared[i]);
    925 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
    926 				goto restart;
    927 			if (dma_fence_is_signaled(fence)) {
    928 				dma_fence_put(fence);
    929 				fence = NULL;
    930 				continue;
    931 			}
    932 			dst_list->shared[dst_list->shared_count++] = fence;
    933 			fence = NULL;
    934 		}
    935 
    936 		/* If anything changed while we were copying, restart.  */
    937 		if (!dma_resv_read_valid(src_robj, &read_ticket))
    938 			goto restart;
    939 	}
    940 
    941 	/* Get the exclusive fence.  */
    942 	KASSERT(fence == NULL);
    943 	if (!dma_resv_get_excl_reader(src_robj, &fence, &read_ticket))
    944 		goto restart;
    945 
    946 	/* All done with src; exit the RCU read section.  */
    947 	rcu_read_unlock();
    948 
    949 	/*
    950 	 * We now have a snapshot of the shared and exclusive fences of
    951 	 * src_robj and we have acquired references to them so they
    952 	 * won't go away.  Transfer them over to dst_robj, releasing
    953 	 * references to any that were there.
    954 	 */
    955 
    956 	/* Get the old shared and exclusive fences, if any.  */
    957 	old_list = dst_robj->fence;
    958 	old_fence = dst_robj->fence_excl;
    959 
    960 	/*
    961 	 * Begin an update.  Implies membar_producer for dst_list and
    962 	 * fence.
    963 	 */
    964 	dma_resv_write_begin(dst_robj, &write_ticket);
    965 
    966 	/* Replace the fences.  */
    967 	atomic_store_relaxed(&dst_robj->fence, dst_list);
    968 	atomic_store_relaxed(&dst_robj->fence_excl, fence);
    969 
    970 	/* Commit the update.  */
    971 	dma_resv_write_commit(dst_robj, &write_ticket);
    972 
    973 	/* Release the old exclusive fence, if any.  */
    974 	if (old_fence) {
    975 		dma_fence_put(old_fence);
    976 		old_fence = NULL; /* paranoia */
    977 	}
    978 
    979 	/* Release any old shared fences.  */
    980 	if (old_list) {
    981 		for (i = old_list->shared_count; i --> 0;) {
    982 			dma_fence_put(old_list->shared[i]);
    983 			old_list->shared[i] = NULL; /* paranoia */
    984 		}
    985 		objlist_free(old_list);
    986 		old_list = NULL; /* paranoia */
    987 	}
    988 
    989 	/* Success!  */
    990 	return 0;
    991 
    992 restart:
    993 	KASSERT(fence == NULL);
    994 	rcu_read_unlock();
    995 	if (dst_list) {
    996 		for (i = dst_list->shared_count; i --> 0;) {
    997 			dma_fence_put(dst_list->shared[i]);
    998 			dst_list->shared[i] = NULL; /* paranoia */
    999 		}
   1000 		objlist_free(dst_list);
   1001 		dst_list = NULL;
   1002 	}
   1003 	goto top;
   1004 }
   1005 
   1006 /*
   1007  * dma_resv_test_signaled_rcu(robj, shared)
   1008  *
   1009  *	If shared is true, test whether all of the shared fences are
   1010  *	signalled, or if there are none, test whether the exclusive
   1011  *	fence is signalled.  If shared is false, test only whether the
   1012  *	exclusive fence is signalled.
   1013  *
   1014  *	XXX Why does this _not_ test the exclusive fence if shared is
   1015  *	true only if there are no shared fences?  This makes no sense.
   1016  */
   1017 bool
   1018 dma_resv_test_signaled_rcu(const struct dma_resv *robj,
   1019     bool shared)
   1020 {
   1021 	struct dma_resv_read_ticket ticket;
   1022 	const struct dma_resv_list *list;
   1023 	struct dma_fence *fence = NULL;
   1024 	uint32_t i, shared_count;
   1025 	bool signaled = true;
   1026 
   1027 top:	KASSERT(fence == NULL);
   1028 
   1029 	/* Enter an RCU read section and get a read ticket.  */
   1030 	rcu_read_lock();
   1031 	dma_resv_read_begin(robj, &ticket);
   1032 
   1033 	/* If shared is requested and there is a shared list, test it.  */
   1034 	if (shared) {
   1035 		if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
   1036 			&ticket))
   1037 			goto restart;
   1038 	} else {
   1039 		list = NULL;
   1040 		shared_count = 0;
   1041 	}
   1042 	if (list != NULL) {
   1043 		/*
   1044 		 * For each fence, if it is going away, restart.
   1045 		 * Otherwise, acquire a reference to it to test whether
   1046 		 * it is signalled.  Stop if we find any that is not
   1047 		 * signalled.
   1048 		 */
   1049 		for (i = 0; i < shared_count; i++) {
   1050 			KASSERT(fence == NULL);
   1051 			fence = atomic_load_relaxed(&list->shared[i]);
   1052 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1053 				goto restart;
   1054 			signaled &= dma_fence_is_signaled(fence);
   1055 			dma_fence_put(fence);
   1056 			fence = NULL;
   1057 			if (!signaled)
   1058 				goto out;
   1059 		}
   1060 
   1061 		/* If anything changed while we were testing, restart.  */
   1062 		if (!dma_resv_read_valid(robj, &ticket))
   1063 			goto restart;
   1064 	}
   1065 	if (shared_count)
   1066 		goto out;
   1067 
   1068 	/* If there is an exclusive fence, test it.  */
   1069 	KASSERT(fence == NULL);
   1070 	if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
   1071 		goto restart;
   1072 	if (fence != NULL) {
   1073 		/* Test whether it is signalled.  If no, stop.  */
   1074 		signaled &= dma_fence_is_signaled(fence);
   1075 		dma_fence_put(fence);
   1076 		fence = NULL;
   1077 		if (!signaled)
   1078 			goto out;
   1079 	}
   1080 
   1081 out:	KASSERT(fence == NULL);
   1082 	rcu_read_unlock();
   1083 	return signaled;
   1084 
   1085 restart:
   1086 	KASSERT(fence == NULL);
   1087 	rcu_read_unlock();
   1088 	goto top;
   1089 }
   1090 
   1091 /*
   1092  * dma_resv_wait_timeout_rcu(robj, shared, intr, timeout)
   1093  *
   1094  *	If shared is true, wait for all of the shared fences to be
   1095  *	signalled, or if there are none, wait for the exclusive fence
   1096  *	to be signalled.  If shared is false, wait only for the
   1097  *	exclusive fence to be signalled.  If timeout is zero, don't
   1098  *	wait, only test.
   1099  *
   1100  *	XXX Why does this _not_ wait for the exclusive fence if shared
   1101  *	is true only if there are no shared fences?  This makes no
   1102  *	sense.
   1103  */
   1104 long
   1105 dma_resv_wait_timeout_rcu(const struct dma_resv *robj,
   1106     bool shared, bool intr, unsigned long timeout)
   1107 {
   1108 	struct dma_resv_read_ticket ticket;
   1109 	const struct dma_resv_list *list;
   1110 	struct dma_fence *fence = NULL;
   1111 	uint32_t i, shared_count;
   1112 	long ret;
   1113 
   1114 	if (timeout == 0)
   1115 		return dma_resv_test_signaled_rcu(robj, shared);
   1116 
   1117 top:	KASSERT(fence == NULL);
   1118 
   1119 	/* Enter an RCU read section and get a read ticket.  */
   1120 	rcu_read_lock();
   1121 	dma_resv_read_begin(robj, &ticket);
   1122 
   1123 	/* If shared is requested and there is a shared list, wait on it.  */
   1124 	if (shared) {
   1125 		if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
   1126 			&ticket))
   1127 			goto restart;
   1128 	} else {
   1129 		list = NULL;
   1130 		shared_count = 0;
   1131 	}
   1132 	if (list != NULL) {
   1133 		/*
   1134 		 * For each fence, if it is going away, restart.
   1135 		 * Otherwise, acquire a reference to it to test whether
   1136 		 * it is signalled.  Stop and wait if we find any that
   1137 		 * is not signalled.
   1138 		 */
   1139 		for (i = 0; i < shared_count; i++) {
   1140 			KASSERT(fence == NULL);
   1141 			fence = atomic_load_relaxed(&list->shared[i]);
   1142 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1143 				goto restart;
   1144 			if (!dma_fence_is_signaled(fence))
   1145 				goto wait;
   1146 			dma_fence_put(fence);
   1147 			fence = NULL;
   1148 		}
   1149 
   1150 		/* If anything changed while we were testing, restart.  */
   1151 		if (!dma_resv_read_valid(robj, &ticket))
   1152 			goto restart;
   1153 	}
   1154 	if (shared_count)
   1155 		goto out;
   1156 
   1157 	/* If there is an exclusive fence, test it.  */
   1158 	KASSERT(fence == NULL);
   1159 	if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
   1160 		goto restart;
   1161 	if (fence != NULL) {
   1162 		/* Test whether it is signalled.  If no, wait.  */
   1163 		if (!dma_fence_is_signaled(fence))
   1164 			goto wait;
   1165 		dma_fence_put(fence);
   1166 		fence = NULL;
   1167 	}
   1168 
   1169 out:	/* Success!  Return the number of ticks left.  */
   1170 	rcu_read_unlock();
   1171 	KASSERT(fence == NULL);
   1172 	return timeout;
   1173 
   1174 restart:
   1175 	KASSERT(fence == NULL);
   1176 	rcu_read_unlock();
   1177 	goto top;
   1178 
   1179 wait:
   1180 	/*
   1181 	 * Exit the RCU read section, wait for it, and release the
   1182 	 * fence when we're done.  If we time out or fail, bail.
   1183 	 * Otherwise, go back to the top.
   1184 	 */
   1185 	KASSERT(fence != NULL);
   1186 	rcu_read_unlock();
   1187 	ret = dma_fence_wait_timeout(fence, intr, timeout);
   1188 	dma_fence_put(fence);
   1189 	fence = NULL;
   1190 	if (ret <= 0)
   1191 		return ret;
   1192 	KASSERT(ret <= timeout);
   1193 	timeout = ret;
   1194 	goto top;
   1195 }
   1196 
   1197 /*
   1198  * dma_resv_poll_init(rpoll, lock)
   1199  *
   1200  *	Initialize reservation poll state.
   1201  */
   1202 void
   1203 dma_resv_poll_init(struct dma_resv_poll *rpoll)
   1204 {
   1205 
   1206 	mutex_init(&rpoll->rp_lock, MUTEX_DEFAULT, IPL_VM);
   1207 	selinit(&rpoll->rp_selq);
   1208 	rpoll->rp_claimed = 0;
   1209 }
   1210 
   1211 /*
   1212  * dma_resv_poll_fini(rpoll)
   1213  *
   1214  *	Release any resource associated with reservation poll state.
   1215  */
   1216 void
   1217 dma_resv_poll_fini(struct dma_resv_poll *rpoll)
   1218 {
   1219 
   1220 	KASSERT(rpoll->rp_claimed == 0);
   1221 	seldestroy(&rpoll->rp_selq);
   1222 	mutex_destroy(&rpoll->rp_lock);
   1223 }
   1224 
   1225 /*
   1226  * dma_resv_poll_cb(fence, fcb)
   1227  *
   1228  *	Callback to notify a reservation poll that a fence has
   1229  *	completed.  Notify any waiters and allow the next poller to
   1230  *	claim the callback.
   1231  *
   1232  *	If one thread is waiting for the exclusive fence only, and we
   1233  *	spuriously notify them about a shared fence, tough.
   1234  */
   1235 static void
   1236 dma_resv_poll_cb(struct dma_fence *fence, struct dma_fence_cb *fcb)
   1237 {
   1238 	struct dma_resv_poll *rpoll = container_of(fcb,
   1239 	    struct dma_resv_poll, rp_fcb);
   1240 
   1241 	mutex_enter(&rpoll->rp_lock);
   1242 	selnotify(&rpoll->rp_selq, 0, NOTE_SUBMIT);
   1243 	rpoll->rp_claimed = 0;
   1244 	mutex_exit(&rpoll->rp_lock);
   1245 }
   1246 
   1247 /*
   1248  * dma_resv_do_poll(robj, events, rpoll)
   1249  *
   1250  *	Poll for reservation object events using the reservation poll
   1251  *	state in rpoll:
   1252  *
   1253  *	- POLLOUT	wait for all fences shared and exclusive
   1254  *	- POLLIN	wait for the exclusive fence
   1255  *
   1256  *	Return the subset of events in events that are ready.  If any
   1257  *	are requested but not ready, arrange to be notified with
   1258  *	selnotify when they are.
   1259  */
   1260 int
   1261 dma_resv_do_poll(const struct dma_resv *robj, int events,
   1262     struct dma_resv_poll *rpoll)
   1263 {
   1264 	struct dma_resv_read_ticket ticket;
   1265 	const struct dma_resv_list *list;
   1266 	struct dma_fence *fence = NULL;
   1267 	uint32_t i, shared_count;
   1268 	int revents;
   1269 	bool recorded = false;	/* curlwp is on the selq */
   1270 	bool claimed = false;	/* we claimed the callback */
   1271 	bool callback = false;	/* we requested a callback */
   1272 
   1273 	/*
   1274 	 * Start with the maximal set of events that could be ready.
   1275 	 * We will eliminate the events that are definitely not ready
   1276 	 * as we go at the same time as we add callbacks to notify us
   1277 	 * that they may be ready.
   1278 	 */
   1279 	revents = events & (POLLIN|POLLOUT);
   1280 	if (revents == 0)
   1281 		return 0;
   1282 
   1283 top:	KASSERT(fence == NULL);
   1284 
   1285 	/* Enter an RCU read section and get a read ticket.  */
   1286 	rcu_read_lock();
   1287 	dma_resv_read_begin(robj, &ticket);
   1288 
   1289 	/* If we want to wait for all fences, get the shared list.  */
   1290 	if (events & POLLOUT) {
   1291 		if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
   1292 			&ticket))
   1293 			goto restart;
   1294 	} else {
   1295 		list = NULL;
   1296 		shared_count = 0;
   1297 	}
   1298 	if (list != NULL) do {
   1299 		/*
   1300 		 * For each fence, if it is going away, restart.
   1301 		 * Otherwise, acquire a reference to it to test whether
   1302 		 * it is signalled.  Stop and request a callback if we
   1303 		 * find any that is not signalled.
   1304 		 */
   1305 		for (i = 0; i < shared_count; i++) {
   1306 			KASSERT(fence == NULL);
   1307 			fence = atomic_load_relaxed(&list->shared[i]);
   1308 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1309 				goto restart;
   1310 			if (!dma_fence_is_signaled(fence)) {
   1311 				dma_fence_put(fence);
   1312 				fence = NULL;
   1313 				break;
   1314 			}
   1315 			dma_fence_put(fence);
   1316 			fence = NULL;
   1317 		}
   1318 
   1319 		/* If all shared fences have been signalled, move on.  */
   1320 		if (i == shared_count)
   1321 			break;
   1322 
   1323 		/* Put ourselves on the selq if we haven't already.  */
   1324 		if (!recorded)
   1325 			goto record;
   1326 
   1327 		/*
   1328 		 * If someone else claimed the callback, or we already
   1329 		 * requested it, we're guaranteed to be notified, so
   1330 		 * assume the event is not ready.
   1331 		 */
   1332 		if (!claimed || callback) {
   1333 			revents &= ~POLLOUT;
   1334 			break;
   1335 		}
   1336 
   1337 		/*
   1338 		 * Otherwise, find the first fence that is not
   1339 		 * signalled, request the callback, and clear POLLOUT
   1340 		 * from the possible ready events.  If they are all
   1341 		 * signalled, leave POLLOUT set; we will simulate the
   1342 		 * callback later.
   1343 		 */
   1344 		for (i = 0; i < shared_count; i++) {
   1345 			KASSERT(fence == NULL);
   1346 			fence = atomic_load_relaxed(&list->shared[i]);
   1347 			if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1348 				goto restart;
   1349 			if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
   1350 				dma_resv_poll_cb)) {
   1351 				dma_fence_put(fence);
   1352 				fence = NULL;
   1353 				revents &= ~POLLOUT;
   1354 				callback = true;
   1355 				break;
   1356 			}
   1357 			dma_fence_put(fence);
   1358 			fence = NULL;
   1359 		}
   1360 	} while (0);
   1361 
   1362 	/* We always wait for at least the exclusive fence, so get it.  */
   1363 	KASSERT(fence == NULL);
   1364 	if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
   1365 		goto restart;
   1366 	if (fence != NULL) do {
   1367 		/*
   1368 		 * Test whether it is signalled.  If not, stop and
   1369 		 * request a callback.
   1370 		 */
   1371 		if (dma_fence_is_signaled(fence))
   1372 			break;
   1373 
   1374 		/* Put ourselves on the selq if we haven't already.  */
   1375 		if (!recorded) {
   1376 			dma_fence_put(fence);
   1377 			fence = NULL;
   1378 			goto record;
   1379 		}
   1380 
   1381 		/*
   1382 		 * If someone else claimed the callback, or we already
   1383 		 * requested it, we're guaranteed to be notified, so
   1384 		 * assume the event is not ready.
   1385 		 */
   1386 		if (!claimed || callback) {
   1387 			revents = 0;
   1388 			break;
   1389 		}
   1390 
   1391 		/*
   1392 		 * Otherwise, try to request the callback, and clear
   1393 		 * all possible ready events.  If the fence has been
   1394 		 * signalled in the interim, leave the events set; we
   1395 		 * will simulate the callback later.
   1396 		 */
   1397 		if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
   1398 			dma_resv_poll_cb)) {
   1399 			revents = 0;
   1400 			callback = true;
   1401 			break;
   1402 		}
   1403 	} while (0);
   1404 	if (fence != NULL) {
   1405 		dma_fence_put(fence);
   1406 		fence = NULL;
   1407 	}
   1408 
   1409 	/* All done reading the fences.  */
   1410 	rcu_read_unlock();
   1411 
   1412 	if (claimed && !callback) {
   1413 		/*
   1414 		 * We claimed the callback but we didn't actually
   1415 		 * request it because a fence was signalled while we
   1416 		 * were claiming it.  Call it ourselves now.  The
   1417 		 * callback doesn't use the fence nor rely on holding
   1418 		 * any of the fence locks, so this is safe.
   1419 		 */
   1420 		dma_resv_poll_cb(NULL, &rpoll->rp_fcb);
   1421 	}
   1422 	return revents;
   1423 
   1424 restart:
   1425 	KASSERT(fence == NULL);
   1426 	rcu_read_unlock();
   1427 	goto top;
   1428 
   1429 record:
   1430 	KASSERT(fence == NULL);
   1431 	rcu_read_unlock();
   1432 	mutex_enter(&rpoll->rp_lock);
   1433 	selrecord(curlwp, &rpoll->rp_selq);
   1434 	if (!rpoll->rp_claimed)
   1435 		claimed = rpoll->rp_claimed = true;
   1436 	mutex_exit(&rpoll->rp_lock);
   1437 	recorded = true;
   1438 	goto top;
   1439 }
   1440 
   1441 /*
   1442  * dma_resv_kqfilter(robj, kn, rpoll)
   1443  *
   1444  *	Kqueue filter for reservation objects.  Currently not
   1445  *	implemented because the logic to implement it is nontrivial,
   1446  *	and userland will presumably never use it, so it would be
   1447  *	dangerous to add never-tested complex code paths to the kernel.
   1448  */
   1449 int
   1450 dma_resv_kqfilter(const struct dma_resv *robj,
   1451     struct knote *kn, struct dma_resv_poll *rpoll)
   1452 {
   1453 
   1454 	return EINVAL;
   1455 }
   1456