Home | History | Annotate | Line # | Download | only in linux
linux_dma_resv.c revision 1.2
      1 /*	$NetBSD: linux_dma_resv.c,v 1.2 2021/12/19 10:36:55 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_dma_resv.c,v 1.2 2021/12/19 10:36:55 riastradh Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/poll.h>
     37 #include <sys/select.h>
     38 
     39 #include <linux/dma-fence.h>
     40 #include <linux/dma-resv.h>
     41 #include <linux/seqlock.h>
     42 #include <linux/ww_mutex.h>
     43 
     44 DEFINE_WW_CLASS(reservation_ww_class __cacheline_aligned);
     45 
     46 static struct dma_resv_list *
     47 objlist_tryalloc(uint32_t n)
     48 {
     49 	struct dma_resv_list *list;
     50 
     51 	list = kmem_alloc(offsetof(typeof(*list), shared[n]), KM_NOSLEEP);
     52 	if (list == NULL)
     53 		return NULL;
     54 	list->shared_max = n;
     55 
     56 	return list;
     57 }
     58 
     59 static void
     60 objlist_free(struct dma_resv_list *list)
     61 {
     62 	uint32_t n = list->shared_max;
     63 
     64 	kmem_free(list, offsetof(typeof(*list), shared[n]));
     65 }
     66 
     67 static void
     68 objlist_free_cb(struct rcu_head *rcu)
     69 {
     70 	struct dma_resv_list *list = container_of(rcu,
     71 	    struct dma_resv_list, rol_rcu);
     72 
     73 	objlist_free(list);
     74 }
     75 
     76 static void
     77 objlist_defer_free(struct dma_resv_list *list)
     78 {
     79 
     80 	call_rcu(&list->rol_rcu, objlist_free_cb);
     81 }
     82 
     83 /*
     84  * dma_resv_init(robj)
     85  *
     86  *	Initialize a reservation object.  Caller must later destroy it
     87  *	with dma_resv_fini.
     88  */
     89 void
     90 dma_resv_init(struct dma_resv *robj)
     91 {
     92 
     93 	ww_mutex_init(&robj->lock, &reservation_ww_class);
     94 	seqcount_init(&robj->seq);
     95 	robj->fence_excl = NULL;
     96 	robj->fence = NULL;
     97 	robj->robj_prealloc = NULL;
     98 }
     99 
    100 /*
    101  * dma_resv_fini(robj)
    102  *
    103  *	Destroy a reservation object, freeing any memory that had been
    104  *	allocated for it.  Caller must have exclusive access to it.
    105  */
    106 void
    107 dma_resv_fini(struct dma_resv *robj)
    108 {
    109 	unsigned i;
    110 
    111 	if (robj->robj_prealloc)
    112 		objlist_free(robj->robj_prealloc);
    113 	if (robj->fence) {
    114 		for (i = 0; i < robj->fence->shared_count; i++)
    115 			dma_fence_put(robj->fence->shared[i]);
    116 		objlist_free(robj->fence);
    117 	}
    118 	if (robj->fence_excl)
    119 		dma_fence_put(robj->fence_excl);
    120 	ww_mutex_destroy(&robj->lock);
    121 }
    122 
    123 /*
    124  * dma_resv_lock(robj, ctx)
    125  *
    126  *	Acquire a reservation object's lock.  Return 0 on success,
    127  *	-EALREADY if caller already holds it, -EDEADLK if a
    128  *	higher-priority owner holds it and the caller must back out and
    129  *	retry.
    130  */
    131 int
    132 dma_resv_lock(struct dma_resv *robj,
    133     struct ww_acquire_ctx *ctx)
    134 {
    135 
    136 	return ww_mutex_lock(&robj->lock, ctx);
    137 }
    138 
    139 /*
    140  * dma_resv_lock_slow(robj, ctx)
    141  *
    142  *	Acquire a reservation object's lock.  Caller must not hold
    143  *	this lock or any others -- this is to be used in slow paths
    144  *	after dma_resv_lock or dma_resv_lock_interruptible has failed
    145  *	and the caller has backed out all other locks.
    146  */
    147 void
    148 dma_resv_lock_slow(struct dma_resv *robj,
    149     struct ww_acquire_ctx *ctx)
    150 {
    151 
    152 	ww_mutex_lock_slow(&robj->lock, ctx);
    153 }
    154 
    155 /*
    156  * dma_resv_lock_interruptible(robj, ctx)
    157  *
    158  *	Acquire a reservation object's lock.  Return 0 on success,
    159  *	-EALREADY if caller already holds it, -EDEADLK if a
    160  *	higher-priority owner holds it and the caller must back out and
    161  *	retry, -ERESTART/-EINTR if interrupted.
    162  */
    163 int
    164 dma_resv_lock_interruptible(struct dma_resv *robj,
    165     struct ww_acquire_ctx *ctx)
    166 {
    167 
    168 	return ww_mutex_lock_interruptible(&robj->lock, ctx);
    169 }
    170 
    171 /*
    172  * dma_resv_lock_slow_interruptible(robj, ctx)
    173  *
    174  *	Acquire a reservation object's lock.  Caller must not hold
    175  *	this lock or any others -- this is to be used in slow paths
    176  *	after dma_resv_lock or dma_resv_lock_interruptible has failed
    177  *	and the caller has backed out all other locks.  Return 0 on
    178  *	success, -ERESTART/-EINTR if interrupted.
    179  */
    180 int
    181 dma_resv_lock_slow_interruptible(struct dma_resv *robj,
    182     struct ww_acquire_ctx *ctx)
    183 {
    184 
    185 	return ww_mutex_lock_slow_interruptible(&robj->lock, ctx);
    186 }
    187 
    188 /*
    189  * dma_resv_trylock(robj)
    190  *
    191  *	Try to acquire a reservation object's lock without blocking.
    192  *	Return true on success, false on failure.
    193  */
    194 bool
    195 dma_resv_trylock(struct dma_resv *robj)
    196 {
    197 
    198 	return ww_mutex_trylock(&robj->lock);
    199 }
    200 
    201 /*
    202  * dma_resv_unlock(robj)
    203  *
    204  *	Release a reservation object's lock.
    205  */
    206 void
    207 dma_resv_unlock(struct dma_resv *robj)
    208 {
    209 
    210 	return ww_mutex_unlock(&robj->lock);
    211 }
    212 
    213 /*
    214  * dma_resv_held(robj)
    215  *
    216  *	True if robj is locked.
    217  */
    218 bool
    219 dma_resv_held(struct dma_resv *robj)
    220 {
    221 
    222 	return ww_mutex_is_locked(&robj->lock);
    223 }
    224 
    225 /*
    226  * dma_resv_assert_held(robj)
    227  *
    228  *	Panic if robj is not held, in DIAGNOSTIC builds.
    229  */
    230 void
    231 dma_resv_assert_held(struct dma_resv *robj)
    232 {
    233 
    234 	KASSERT(dma_resv_held(robj));
    235 }
    236 
    237 /*
    238  * dma_resv_get_excl(robj)
    239  *
    240  *	Return a pointer to the exclusive fence of the reservation
    241  *	object robj.
    242  *
    243  *	Caller must have robj locked.
    244  */
    245 struct dma_fence *
    246 dma_resv_get_excl(struct dma_resv *robj)
    247 {
    248 
    249 	KASSERT(dma_resv_held(robj));
    250 	return robj->fence_excl;
    251 }
    252 
    253 /*
    254  * dma_resv_get_list(robj)
    255  *
    256  *	Return a pointer to the shared fence list of the reservation
    257  *	object robj.
    258  *
    259  *	Caller must have robj locked.
    260  */
    261 struct dma_resv_list *
    262 dma_resv_get_list(struct dma_resv *robj)
    263 {
    264 
    265 	KASSERT(dma_resv_held(robj));
    266 	return robj->fence;
    267 }
    268 
    269 /*
    270  * dma_resv_reserve_shared(robj)
    271  *
    272  *	Reserve space in robj to add a shared fence.  To be used only
    273  *	once before calling dma_resv_add_shared_fence.
    274  *
    275  *	Caller must have robj locked.
    276  *
    277  *	Internally, we start with room for four entries and double if
    278  *	we don't have enough.  This is not guaranteed.
    279  */
    280 int
    281 dma_resv_reserve_shared(struct dma_resv *robj)
    282 {
    283 	struct dma_resv_list *list, *prealloc;
    284 	uint32_t n, nalloc;
    285 
    286 	KASSERT(dma_resv_held(robj));
    287 
    288 	list = robj->fence;
    289 	prealloc = robj->robj_prealloc;
    290 
    291 	/* If there's an existing list, check it for space.  */
    292 	if (list) {
    293 		/* If there's too many already, give up.  */
    294 		if (list->shared_count == UINT32_MAX)
    295 			return -ENOMEM;
    296 
    297 		/* Add one more. */
    298 		n = list->shared_count + 1;
    299 
    300 		/* If there's enough for one more, we're done.  */
    301 		if (n <= list->shared_max)
    302 			return 0;
    303 	} else {
    304 		/* No list already.  We need space for 1.  */
    305 		n = 1;
    306 	}
    307 
    308 	/* If not, maybe there's a preallocated list ready.  */
    309 	if (prealloc != NULL) {
    310 		/* If there's enough room in it, stop here.  */
    311 		if (n <= prealloc->shared_max)
    312 			return 0;
    313 
    314 		/* Try to double its capacity.  */
    315 		nalloc = n > UINT32_MAX/2 ? UINT32_MAX : 2*n;
    316 		prealloc = objlist_tryalloc(nalloc);
    317 		if (prealloc == NULL)
    318 			return -ENOMEM;
    319 
    320 		/* Swap the new preallocated list and free the old one.  */
    321 		objlist_free(robj->robj_prealloc);
    322 		robj->robj_prealloc = prealloc;
    323 	} else {
    324 		/* Start with some spare.  */
    325 		nalloc = n > UINT32_MAX/2 ? UINT32_MAX : MAX(2*n, 4);
    326 		prealloc = objlist_tryalloc(nalloc);
    327 		if (prealloc == NULL)
    328 			return -ENOMEM;
    329 		/* Save the new preallocated list.  */
    330 		robj->robj_prealloc = prealloc;
    331 	}
    332 
    333 	/* Success!  */
    334 	return 0;
    335 }
    336 
    337 struct dma_resv_write_ticket {
    338 };
    339 
    340 /*
    341  * dma_resv_write_begin(robj, ticket)
    342  *
    343  *	Begin an atomic batch of writes to robj, and initialize opaque
    344  *	ticket for it.  The ticket must be passed to
    345  *	dma_resv_write_commit to commit the writes.
    346  *
    347  *	Caller must have robj locked.
    348  *
    349  *	Implies membar_producer, i.e. store-before-store barrier.  Does
    350  *	NOT serve as an acquire operation, however.
    351  */
    352 static void
    353 dma_resv_write_begin(struct dma_resv *robj,
    354     struct dma_resv_write_ticket *ticket)
    355 {
    356 
    357 	KASSERT(dma_resv_held(robj));
    358 
    359 	write_seqcount_begin(&robj->seq);
    360 }
    361 
    362 /*
    363  * dma_resv_write_commit(robj, ticket)
    364  *
    365  *	Commit an atomic batch of writes to robj begun with the call to
    366  *	dma_resv_write_begin that returned ticket.
    367  *
    368  *	Caller must have robj locked.
    369  *
    370  *	Implies membar_producer, i.e. store-before-store barrier.  Does
    371  *	NOT serve as a release operation, however.
    372  */
    373 static void
    374 dma_resv_write_commit(struct dma_resv *robj,
    375     struct dma_resv_write_ticket *ticket)
    376 {
    377 
    378 	KASSERT(dma_resv_held(robj));
    379 
    380 	write_seqcount_end(&robj->seq);
    381 }
    382 
    383 struct dma_resv_read_ticket {
    384 	unsigned version;
    385 };
    386 
    387 /*
    388  * dma_resv_read_begin(robj, ticket)
    389  *
    390  *	Begin a read section, and initialize opaque ticket for it.  The
    391  *	ticket must be passed to dma_resv_read_exit, and the
    392  *	caller must be prepared to retry reading if it fails.
    393  */
    394 static void
    395 dma_resv_read_begin(const struct dma_resv *robj,
    396     struct dma_resv_read_ticket *ticket)
    397 {
    398 
    399 	ticket->version = read_seqcount_begin(&robj->seq);
    400 }
    401 
    402 /*
    403  * dma_resv_read_valid(robj, ticket)
    404  *
    405  *	Test whether the read sections are valid.  Return true on
    406  *	success, or false on failure if the read ticket has been
    407  *	invalidated.
    408  */
    409 static bool
    410 dma_resv_read_valid(const struct dma_resv *robj,
    411     struct dma_resv_read_ticket *ticket)
    412 {
    413 
    414 	return !read_seqcount_retry(&robj->seq, ticket->version);
    415 }
    416 
    417 /*
    418  * dma_resv_add_excl_fence(robj, fence)
    419  *
    420  *	Empty and release all of robj's shared fences, and clear and
    421  *	release its exclusive fence.  If fence is nonnull, acquire a
    422  *	reference to it and save it as robj's exclusive fence.
    423  *
    424  *	Caller must have robj locked.
    425  */
    426 void
    427 dma_resv_add_excl_fence(struct dma_resv *robj,
    428     struct dma_fence *fence)
    429 {
    430 	struct dma_fence *old_fence = robj->fence_excl;
    431 	struct dma_resv_list *old_list = robj->fence;
    432 	uint32_t old_shared_count;
    433 	struct dma_resv_write_ticket ticket;
    434 
    435 	KASSERT(dma_resv_held(robj));
    436 
    437 	/*
    438 	 * If we are setting rather than just removing a fence, acquire
    439 	 * a reference for ourselves.
    440 	 */
    441 	if (fence)
    442 		(void)dma_fence_get(fence);
    443 
    444 	/* If there are any shared fences, remember how many.  */
    445 	if (old_list)
    446 		old_shared_count = old_list->shared_count;
    447 
    448 	/* Begin an update.  */
    449 	dma_resv_write_begin(robj, &ticket);
    450 
    451 	/* Replace the fence and zero the shared count.  */
    452 	robj->fence_excl = fence;
    453 	if (old_list)
    454 		old_list->shared_count = 0;
    455 
    456 	/* Commit the update.  */
    457 	dma_resv_write_commit(robj, &ticket);
    458 
    459 	/* Release the old exclusive fence, if any.  */
    460 	if (old_fence)
    461 		dma_fence_put(old_fence);
    462 
    463 	/* Release any old shared fences.  */
    464 	if (old_list) {
    465 		while (old_shared_count--)
    466 			dma_fence_put(old_list->shared[old_shared_count]);
    467 	}
    468 }
    469 
    470 /*
    471  * dma_resv_add_shared_fence(robj, fence)
    472  *
    473  *	Acquire a reference to fence and add it to robj's shared list.
    474  *	If any fence was already added with the same context number,
    475  *	release it and replace it by this one.
    476  *
    477  *	Caller must have robj locked, and must have preceded with a
    478  *	call to dma_resv_reserve_shared for each shared fence
    479  *	added.
    480  */
    481 void
    482 dma_resv_add_shared_fence(struct dma_resv *robj,
    483     struct dma_fence *fence)
    484 {
    485 	struct dma_resv_list *list = robj->fence;
    486 	struct dma_resv_list *prealloc = robj->robj_prealloc;
    487 	struct dma_resv_write_ticket ticket;
    488 	struct dma_fence *replace = NULL;
    489 	uint32_t i;
    490 
    491 	KASSERT(dma_resv_held(robj));
    492 
    493 	/* Acquire a reference to the fence.  */
    494 	KASSERT(fence != NULL);
    495 	(void)dma_fence_get(fence);
    496 
    497 	/* Check for a preallocated replacement list.  */
    498 	if (prealloc == NULL) {
    499 		/*
    500 		 * If there is no preallocated replacement list, then
    501 		 * there must be room in the current list.
    502 		 */
    503 		KASSERT(list != NULL);
    504 		KASSERT(list->shared_count < list->shared_max);
    505 
    506 		/* Begin an update.  Implies membar_producer for fence.  */
    507 		dma_resv_write_begin(robj, &ticket);
    508 
    509 		/* Find a fence with the same context number.  */
    510 		for (i = 0; i < list->shared_count; i++) {
    511 			if (list->shared[i]->context == fence->context) {
    512 				replace = list->shared[i];
    513 				list->shared[i] = fence;
    514 				break;
    515 			}
    516 		}
    517 
    518 		/* If we didn't find one, add it at the end.  */
    519 		if (i == list->shared_count)
    520 			list->shared[list->shared_count++] = fence;
    521 
    522 		/* Commit the update.  */
    523 		dma_resv_write_commit(robj, &ticket);
    524 	} else {
    525 		/*
    526 		 * There is a preallocated replacement list.  There may
    527 		 * not be a current list.  If not, treat it as a zero-
    528 		 * length list.
    529 		 */
    530 		uint32_t shared_count = (list == NULL? 0 : list->shared_count);
    531 
    532 		/* There had better be room in the preallocated list.  */
    533 		KASSERT(shared_count < prealloc->shared_max);
    534 
    535 		/*
    536 		 * Copy the fences over, but replace if we find one
    537 		 * with the same context number.
    538 		 */
    539 		for (i = 0; i < shared_count; i++) {
    540 			if (replace == NULL &&
    541 			    list->shared[i]->context == fence->context) {
    542 				replace = list->shared[i];
    543 				prealloc->shared[i] = fence;
    544 			} else {
    545 				prealloc->shared[i] = list->shared[i];
    546 			}
    547 		}
    548 		prealloc->shared_count = shared_count;
    549 
    550 		/* If we didn't find one, add it at the end.  */
    551 		if (replace == NULL)
    552 			prealloc->shared[prealloc->shared_count++] = fence;
    553 
    554 		/*
    555 		 * Now ready to replace the list.  Begin an update.
    556 		 * Implies membar_producer for fence and prealloc.
    557 		 */
    558 		dma_resv_write_begin(robj, &ticket);
    559 
    560 		/* Replace the list.  */
    561 		robj->fence = prealloc;
    562 		robj->robj_prealloc = NULL;
    563 
    564 		/* Commit the update.  */
    565 		dma_resv_write_commit(robj, &ticket);
    566 
    567 		/*
    568 		 * If there is an old list, free it when convenient.
    569 		 * (We are not in a position at this point to sleep
    570 		 * waiting for activity on all CPUs.)
    571 		 */
    572 		if (list)
    573 			objlist_defer_free(list);
    574 	}
    575 
    576 	/* Release a fence if we replaced it.  */
    577 	if (replace)
    578 		dma_fence_put(replace);
    579 }
    580 
    581 /*
    582  * dma_resv_get_excl_rcu(robj)
    583  *
    584  *	Note: Caller need not call this from an RCU read section.
    585  */
    586 struct dma_fence *
    587 dma_resv_get_excl_rcu(const struct dma_resv *robj)
    588 {
    589 	struct dma_fence *fence;
    590 
    591 	rcu_read_lock();
    592 	fence = dma_fence_get_rcu_safe(&robj->fence_excl);
    593 	rcu_read_unlock();
    594 
    595 	return fence;
    596 }
    597 
    598 /*
    599  * dma_resv_get_fences_rcu(robj, fencep, nsharedp, sharedp)
    600  */
    601 int
    602 dma_resv_get_fences_rcu(const struct dma_resv *robj,
    603     struct dma_fence **fencep, unsigned *nsharedp, struct dma_fence ***sharedp)
    604 {
    605 	const struct dma_resv_list *list;
    606 	struct dma_fence *fence;
    607 	struct dma_fence **shared = NULL;
    608 	unsigned shared_alloc, shared_count, i;
    609 	struct dma_resv_read_ticket ticket;
    610 
    611 top:
    612 	/* Enter an RCU read section and get a read ticket.  */
    613 	rcu_read_lock();
    614 	dma_resv_read_begin(robj, &ticket);
    615 
    616 	/* If there is a shared list, grab it.  */
    617 	list = robj->fence;
    618 	__insn_barrier();
    619 	if (list) {
    620 		/* Make sure the content of the list has been published.  */
    621 		membar_datadep_consumer();
    622 
    623 		/* Check whether we have a buffer.  */
    624 		if (shared == NULL) {
    625 			/*
    626 			 * We don't have a buffer yet.  Try to allocate
    627 			 * one without waiting.
    628 			 */
    629 			shared_alloc = list->shared_max;
    630 			__insn_barrier();
    631 			shared = kcalloc(shared_alloc, sizeof(shared[0]),
    632 			    GFP_NOWAIT);
    633 			if (shared == NULL) {
    634 				/*
    635 				 * Couldn't do it immediately.  Back
    636 				 * out of RCU and allocate one with
    637 				 * waiting.
    638 				 */
    639 				rcu_read_unlock();
    640 				shared = kcalloc(shared_alloc,
    641 				    sizeof(shared[0]), GFP_KERNEL);
    642 				if (shared == NULL)
    643 					return -ENOMEM;
    644 				goto top;
    645 			}
    646 		} else if (shared_alloc < list->shared_max) {
    647 			/*
    648 			 * We have a buffer but it's too small.  We're
    649 			 * already racing in this case, so just back
    650 			 * out and wait to allocate a bigger one.
    651 			 */
    652 			shared_alloc = list->shared_max;
    653 			__insn_barrier();
    654 			rcu_read_unlock();
    655 			kfree(shared);
    656 			shared = kcalloc(shared_alloc, sizeof(shared[0]),
    657 			    GFP_KERNEL);
    658 			if (shared == NULL)
    659 				return -ENOMEM;
    660 		}
    661 
    662 		/*
    663 		 * We got a buffer large enough.  Copy into the buffer
    664 		 * and record the number of elements.
    665 		 */
    666 		memcpy(shared, list->shared, shared_alloc * sizeof(shared[0]));
    667 		shared_count = list->shared_count;
    668 	} else {
    669 		/* No shared list: shared count is zero.  */
    670 		shared_count = 0;
    671 	}
    672 
    673 	/* If there is an exclusive fence, grab it.  */
    674 	fence = robj->fence_excl;
    675 	__insn_barrier();
    676 	if (fence) {
    677 		/* Make sure the content of the fence has been published.  */
    678 		membar_datadep_consumer();
    679 	}
    680 
    681 	/*
    682 	 * We are done reading from robj and list.  Validate our
    683 	 * parking ticket.  If it's invalid, do not pass go and do not
    684 	 * collect $200.
    685 	 */
    686 	if (!dma_resv_read_valid(robj, &ticket))
    687 		goto restart;
    688 
    689 	/*
    690 	 * Try to get a reference to the exclusive fence, if there is
    691 	 * one.  If we can't, start over.
    692 	 */
    693 	if (fence) {
    694 		if (dma_fence_get_rcu(fence) == NULL)
    695 			goto restart;
    696 	}
    697 
    698 	/*
    699 	 * Try to get a reference to all of the shared fences.
    700 	 */
    701 	for (i = 0; i < shared_count; i++) {
    702 		if (dma_fence_get_rcu(shared[i]) == NULL)
    703 			goto put_restart;
    704 	}
    705 
    706 	/* Success!  */
    707 	rcu_read_unlock();
    708 	*fencep = fence;
    709 	*nsharedp = shared_count;
    710 	*sharedp = shared;
    711 	return 0;
    712 
    713 put_restart:
    714 	/* Back out.  */
    715 	while (i --> 0) {
    716 		dma_fence_put(shared[i]);
    717 		shared[i] = NULL; /* paranoia */
    718 	}
    719 	if (fence) {
    720 		dma_fence_put(fence);
    721 		fence = NULL;	/* paranoia */
    722 	}
    723 
    724 restart:
    725 	rcu_read_unlock();
    726 	goto top;
    727 }
    728 
    729 /*
    730  * dma_resv_copy_fences(dst, src)
    731  *
    732  *	Copy the exclusive fence and all the shared fences from src to
    733  *	dst.
    734  *
    735  *	Caller must have dst locked.
    736  */
    737 int
    738 dma_resv_copy_fences(struct dma_resv *dst_robj,
    739     const struct dma_resv *src_robj)
    740 {
    741 	const struct dma_resv_list *src_list;
    742 	struct dma_resv_list *dst_list = NULL;
    743 	struct dma_resv_list *old_list;
    744 	struct dma_fence *fence = NULL;
    745 	struct dma_fence *old_fence;
    746 	uint32_t shared_count, i;
    747 	struct dma_resv_read_ticket read_ticket;
    748 	struct dma_resv_write_ticket write_ticket;
    749 
    750 	KASSERT(dma_resv_held(dst_robj));
    751 
    752 top:
    753 	/* Enter an RCU read section and get a read ticket.  */
    754 	rcu_read_lock();
    755 	dma_resv_read_begin(src_robj, &read_ticket);
    756 
    757 	/* Get the shared list.  */
    758 	src_list = src_robj->fence;
    759 	__insn_barrier();
    760 	if (src_list) {
    761 		/* Make sure the content of the list has been published.  */
    762 		membar_datadep_consumer();
    763 
    764 		/* Find out how long it is.  */
    765 		shared_count = src_list->shared_count;
    766 
    767 		/*
    768 		 * Make sure we saw a consistent snapshot of the list
    769 		 * pointer and length.
    770 		 */
    771 		if (!dma_resv_read_valid(src_robj, &read_ticket))
    772 			goto restart;
    773 
    774 		/* Allocate a new list.  */
    775 		dst_list = objlist_tryalloc(shared_count);
    776 		if (dst_list == NULL)
    777 			return -ENOMEM;
    778 
    779 		/* Copy over all fences that are not yet signalled.  */
    780 		dst_list->shared_count = 0;
    781 		for (i = 0; i < shared_count; i++) {
    782 			if ((fence = dma_fence_get_rcu(src_list->shared[i]))
    783 			    != NULL)
    784 				goto restart;
    785 			if (dma_fence_is_signaled(fence)) {
    786 				dma_fence_put(fence);
    787 				fence = NULL;
    788 				continue;
    789 			}
    790 			dst_list->shared[dst_list->shared_count++] = fence;
    791 			fence = NULL;
    792 		}
    793 	}
    794 
    795 	/* Get the exclusive fence.  */
    796 	fence = src_robj->fence_excl;
    797 	__insn_barrier();
    798 	if (fence != NULL) {
    799 		/* Make sure the content of the fence has been published.  */
    800 		membar_datadep_consumer();
    801 
    802 		/*
    803 		 * Make sure we saw a consistent snapshot of the fence.
    804 		 *
    805 		 * XXX I'm not actually sure this is necessary since
    806 		 * pointer writes are supposed to be atomic.
    807 		 */
    808 		if (!dma_resv_read_valid(src_robj, &read_ticket)) {
    809 			fence = NULL;
    810 			goto restart;
    811 		}
    812 
    813 		/*
    814 		 * If it is going away, restart.  Otherwise, acquire a
    815 		 * reference to it.
    816 		 */
    817 		if (!dma_fence_get_rcu(fence)) {
    818 			fence = NULL;
    819 			goto restart;
    820 		}
    821 	}
    822 
    823 	/* All done with src; exit the RCU read section.  */
    824 	rcu_read_unlock();
    825 
    826 	/*
    827 	 * We now have a snapshot of the shared and exclusive fences of
    828 	 * src_robj and we have acquired references to them so they
    829 	 * won't go away.  Transfer them over to dst_robj, releasing
    830 	 * references to any that were there.
    831 	 */
    832 
    833 	/* Get the old shared and exclusive fences, if any.  */
    834 	old_list = dst_robj->fence;
    835 	old_fence = dst_robj->fence_excl;
    836 
    837 	/* Begin an update.  */
    838 	dma_resv_write_begin(dst_robj, &write_ticket);
    839 
    840 	/* Replace the fences.  */
    841 	dst_robj->fence = dst_list;
    842 	dst_robj->fence_excl = fence;
    843 
    844 	/* Commit the update.  */
    845 	dma_resv_write_commit(dst_robj, &write_ticket);
    846 
    847 	/* Release the old exclusive fence, if any.  */
    848 	if (old_fence)
    849 		dma_fence_put(old_fence);
    850 
    851 	/* Release any old shared fences.  */
    852 	if (old_list) {
    853 		for (i = old_list->shared_count; i --> 0;)
    854 			dma_fence_put(old_list->shared[i]);
    855 	}
    856 
    857 	/* Success!  */
    858 	return 0;
    859 
    860 restart:
    861 	rcu_read_unlock();
    862 	if (dst_list) {
    863 		for (i = dst_list->shared_count; i --> 0;) {
    864 			dma_fence_put(dst_list->shared[i]);
    865 			dst_list->shared[i] = NULL;
    866 		}
    867 		objlist_free(dst_list);
    868 		dst_list = NULL;
    869 	}
    870 	if (fence) {
    871 		dma_fence_put(fence);
    872 		fence = NULL;
    873 	}
    874 	goto top;
    875 }
    876 
    877 /*
    878  * dma_resv_test_signaled_rcu(robj, shared)
    879  *
    880  *	If shared is true, test whether all of the shared fences are
    881  *	signalled, or if there are none, test whether the exclusive
    882  *	fence is signalled.  If shared is false, test only whether the
    883  *	exclusive fence is signalled.
    884  *
    885  *	XXX Why does this _not_ test the exclusive fence if shared is
    886  *	true only if there are no shared fences?  This makes no sense.
    887  */
    888 bool
    889 dma_resv_test_signaled_rcu(const struct dma_resv *robj,
    890     bool shared)
    891 {
    892 	struct dma_resv_read_ticket ticket;
    893 	struct dma_resv_list *list;
    894 	struct dma_fence *fence;
    895 	uint32_t i, shared_count;
    896 	bool signaled = true;
    897 
    898 top:
    899 	/* Enter an RCU read section and get a read ticket.  */
    900 	rcu_read_lock();
    901 	dma_resv_read_begin(robj, &ticket);
    902 
    903 	/* If shared is requested and there is a shared list, test it.  */
    904 	if (!shared)
    905 		goto excl;
    906 	list = robj->fence;
    907 	__insn_barrier();
    908 	if (list) {
    909 		/* Make sure the content of the list has been published.  */
    910 		membar_datadep_consumer();
    911 
    912 		/* Find out how long it is.  */
    913 		shared_count = list->shared_count;
    914 
    915 		/*
    916 		 * Make sure we saw a consistent snapshot of the list
    917 		 * pointer and length.
    918 		 */
    919 		if (!dma_resv_read_valid(robj, &ticket))
    920 			goto restart;
    921 
    922 		/*
    923 		 * For each fence, if it is going away, restart.
    924 		 * Otherwise, acquire a reference to it to test whether
    925 		 * it is signalled.  Stop if we find any that is not
    926 		 * signalled.
    927 		 */
    928 		for (i = 0; i < shared_count; i++) {
    929 			fence = dma_fence_get_rcu(list->shared[i]);
    930 			if (fence == NULL)
    931 				goto restart;
    932 			signaled &= dma_fence_is_signaled(fence);
    933 			dma_fence_put(fence);
    934 			if (!signaled)
    935 				goto out;
    936 		}
    937 	}
    938 
    939 excl:
    940 	/* If there is an exclusive fence, test it.  */
    941 	fence = robj->fence_excl;
    942 	__insn_barrier();
    943 	if (fence) {
    944 		/* Make sure the content of the fence has been published.  */
    945 		membar_datadep_consumer();
    946 
    947 		/*
    948 		 * Make sure we saw a consistent snapshot of the fence.
    949 		 *
    950 		 * XXX I'm not actually sure this is necessary since
    951 		 * pointer writes are supposed to be atomic.
    952 		 */
    953 		if (!dma_resv_read_valid(robj, &ticket))
    954 			goto restart;
    955 
    956 		/*
    957 		 * If it is going away, restart.  Otherwise, acquire a
    958 		 * reference to it to test whether it is signalled.
    959 		 */
    960 		if ((fence = dma_fence_get_rcu(fence)) == NULL)
    961 			goto restart;
    962 		signaled &= dma_fence_is_signaled(fence);
    963 		dma_fence_put(fence);
    964 		if (!signaled)
    965 			goto out;
    966 	}
    967 
    968 out:	rcu_read_unlock();
    969 	return signaled;
    970 
    971 restart:
    972 	rcu_read_unlock();
    973 	goto top;
    974 }
    975 
    976 /*
    977  * dma_resv_wait_timeout_rcu(robj, shared, intr, timeout)
    978  *
    979  *	If shared is true, wait for all of the shared fences to be
    980  *	signalled, or if there are none, wait for the exclusive fence
    981  *	to be signalled.  If shared is false, wait only for the
    982  *	exclusive fence to be signalled.  If timeout is zero, don't
    983  *	wait, only test.
    984  *
    985  *	XXX Why does this _not_ wait for the exclusive fence if shared
    986  *	is true only if there are no shared fences?  This makes no
    987  *	sense.
    988  */
    989 long
    990 dma_resv_wait_timeout_rcu(const struct dma_resv *robj,
    991     bool shared, bool intr, unsigned long timeout)
    992 {
    993 	struct dma_resv_read_ticket ticket;
    994 	struct dma_resv_list *list;
    995 	struct dma_fence *fence;
    996 	uint32_t i, shared_count;
    997 	long ret;
    998 
    999 	if (timeout == 0)
   1000 		return dma_resv_test_signaled_rcu(robj, shared);
   1001 
   1002 top:
   1003 	/* Enter an RCU read section and get a read ticket.  */
   1004 	rcu_read_lock();
   1005 	dma_resv_read_begin(robj, &ticket);
   1006 
   1007 	/* If shared is requested and there is a shared list, wait on it.  */
   1008 	if (!shared)
   1009 		goto excl;
   1010 	list = robj->fence;
   1011 	__insn_barrier();
   1012 	if (list) {
   1013 		/* Make sure the content of the list has been published.  */
   1014 		membar_datadep_consumer();
   1015 
   1016 		/* Find out how long it is.  */
   1017 		shared_count = list->shared_count;
   1018 
   1019 		/*
   1020 		 * Make sure we saw a consistent snapshot of the list
   1021 		 * pointer and length.
   1022 		 */
   1023 		if (!dma_resv_read_valid(robj, &ticket))
   1024 			goto restart;
   1025 
   1026 		/*
   1027 		 * For each fence, if it is going away, restart.
   1028 		 * Otherwise, acquire a reference to it to test whether
   1029 		 * it is signalled.  Stop and wait if we find any that
   1030 		 * is not signalled.
   1031 		 */
   1032 		for (i = 0; i < shared_count; i++) {
   1033 			fence = dma_fence_get_rcu(list->shared[i]);
   1034 			if (fence == NULL)
   1035 				goto restart;
   1036 			if (!dma_fence_is_signaled(fence))
   1037 				goto wait;
   1038 			dma_fence_put(fence);
   1039 		}
   1040 	}
   1041 
   1042 excl:
   1043 	/* If there is an exclusive fence, test it.  */
   1044 	fence = robj->fence_excl;
   1045 	__insn_barrier();
   1046 	if (fence) {
   1047 		/* Make sure the content of the fence has been published.  */
   1048 		membar_datadep_consumer();
   1049 
   1050 		/*
   1051 		 * Make sure we saw a consistent snapshot of the fence.
   1052 		 *
   1053 		 * XXX I'm not actually sure this is necessary since
   1054 		 * pointer writes are supposed to be atomic.
   1055 		 */
   1056 		if (!dma_resv_read_valid(robj, &ticket))
   1057 			goto restart;
   1058 
   1059 		/*
   1060 		 * If it is going away, restart.  Otherwise, acquire a
   1061 		 * reference to it to test whether it is signalled.  If
   1062 		 * not, wait for it.
   1063 		 */
   1064 		if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1065 			goto restart;
   1066 		if (!dma_fence_is_signaled(fence))
   1067 			goto wait;
   1068 		dma_fence_put(fence);
   1069 	}
   1070 
   1071 	/* Success!  Return the number of ticks left.  */
   1072 	rcu_read_unlock();
   1073 	return timeout;
   1074 
   1075 restart:
   1076 	rcu_read_unlock();
   1077 	goto top;
   1078 
   1079 wait:
   1080 	/*
   1081 	 * Exit the RCU read section and wait for it.  If we time out
   1082 	 * or fail, bail.  Otherwise, go back to the top.
   1083 	 */
   1084 	KASSERT(fence != NULL);
   1085 	rcu_read_unlock();
   1086 	ret = dma_fence_wait_timeout(fence, intr, timeout);
   1087 	dma_fence_put(fence);
   1088 	if (ret <= 0)
   1089 		return ret;
   1090 	KASSERT(ret <= timeout);
   1091 	timeout = ret;
   1092 	goto top;
   1093 }
   1094 
   1095 /*
   1096  * dma_resv_poll_init(rpoll, lock)
   1097  *
   1098  *	Initialize reservation poll state.
   1099  */
   1100 void
   1101 dma_resv_poll_init(struct dma_resv_poll *rpoll)
   1102 {
   1103 
   1104 	mutex_init(&rpoll->rp_lock, MUTEX_DEFAULT, IPL_VM);
   1105 	selinit(&rpoll->rp_selq);
   1106 	rpoll->rp_claimed = 0;
   1107 }
   1108 
   1109 /*
   1110  * dma_resv_poll_fini(rpoll)
   1111  *
   1112  *	Release any resource associated with reservation poll state.
   1113  */
   1114 void
   1115 dma_resv_poll_fini(struct dma_resv_poll *rpoll)
   1116 {
   1117 
   1118 	KASSERT(rpoll->rp_claimed == 0);
   1119 	seldestroy(&rpoll->rp_selq);
   1120 	mutex_destroy(&rpoll->rp_lock);
   1121 }
   1122 
   1123 /*
   1124  * dma_resv_poll_cb(fence, fcb)
   1125  *
   1126  *	Callback to notify a reservation poll that a fence has
   1127  *	completed.  Notify any waiters and allow the next poller to
   1128  *	claim the callback.
   1129  *
   1130  *	If one thread is waiting for the exclusive fence only, and we
   1131  *	spuriously notify them about a shared fence, tough.
   1132  */
   1133 static void
   1134 dma_resv_poll_cb(struct dma_fence *fence, struct dma_fence_cb *fcb)
   1135 {
   1136 	struct dma_resv_poll *rpoll = container_of(fcb,
   1137 	    struct dma_resv_poll, rp_fcb);
   1138 
   1139 	mutex_enter(&rpoll->rp_lock);
   1140 	selnotify(&rpoll->rp_selq, 0, NOTE_SUBMIT);
   1141 	rpoll->rp_claimed = 0;
   1142 	mutex_exit(&rpoll->rp_lock);
   1143 }
   1144 
   1145 /*
   1146  * dma_resv_do_poll(robj, events, rpoll)
   1147  *
   1148  *	Poll for reservation object events using the reservation poll
   1149  *	state in rpoll:
   1150  *
   1151  *	- POLLOUT	wait for all fences shared and exclusive
   1152  *	- POLLIN	wait for the exclusive fence
   1153  *
   1154  *	Return the subset of events in events that are ready.  If any
   1155  *	are requested but not ready, arrange to be notified with
   1156  *	selnotify when they are.
   1157  */
   1158 int
   1159 dma_resv_do_poll(const struct dma_resv *robj, int events,
   1160     struct dma_resv_poll *rpoll)
   1161 {
   1162 	struct dma_resv_read_ticket ticket;
   1163 	struct dma_resv_list *list;
   1164 	struct dma_fence *fence;
   1165 	uint32_t i, shared_count;
   1166 	int revents;
   1167 	bool recorded = false;	/* curlwp is on the selq */
   1168 	bool claimed = false;	/* we claimed the callback */
   1169 	bool callback = false;	/* we requested a callback */
   1170 
   1171 	/*
   1172 	 * Start with the maximal set of events that could be ready.
   1173 	 * We will eliminate the events that are definitely not ready
   1174 	 * as we go at the same time as we add callbacks to notify us
   1175 	 * that they may be ready.
   1176 	 */
   1177 	revents = events & (POLLIN|POLLOUT);
   1178 	if (revents == 0)
   1179 		return 0;
   1180 
   1181 top:
   1182 	/* Enter an RCU read section and get a read ticket.  */
   1183 	rcu_read_lock();
   1184 	dma_resv_read_begin(robj, &ticket);
   1185 
   1186 	/* If we want to wait for all fences, get the shared list.  */
   1187 	if (!(events & POLLOUT))
   1188 		goto excl;
   1189 	list = robj->fence;
   1190 	__insn_barrier();
   1191 	if (list) do {
   1192 		/* Make sure the content of the list has been published.  */
   1193 		membar_datadep_consumer();
   1194 
   1195 		/* Find out how long it is.  */
   1196 		shared_count = list->shared_count;
   1197 
   1198 		/*
   1199 		 * Make sure we saw a consistent snapshot of the list
   1200 		 * pointer and length.
   1201 		 */
   1202 		if (!dma_resv_read_valid(robj, &ticket))
   1203 			goto restart;
   1204 
   1205 		/*
   1206 		 * For each fence, if it is going away, restart.
   1207 		 * Otherwise, acquire a reference to it to test whether
   1208 		 * it is signalled.  Stop and request a callback if we
   1209 		 * find any that is not signalled.
   1210 		 */
   1211 		for (i = 0; i < shared_count; i++) {
   1212 			fence = dma_fence_get_rcu(list->shared[i]);
   1213 			if (fence == NULL)
   1214 				goto restart;
   1215 			if (!dma_fence_is_signaled(fence)) {
   1216 				dma_fence_put(fence);
   1217 				break;
   1218 			}
   1219 			dma_fence_put(fence);
   1220 		}
   1221 
   1222 		/* If all shared fences have been signalled, move on.  */
   1223 		if (i == shared_count)
   1224 			break;
   1225 
   1226 		/* Put ourselves on the selq if we haven't already.  */
   1227 		if (!recorded)
   1228 			goto record;
   1229 
   1230 		/*
   1231 		 * If someone else claimed the callback, or we already
   1232 		 * requested it, we're guaranteed to be notified, so
   1233 		 * assume the event is not ready.
   1234 		 */
   1235 		if (!claimed || callback) {
   1236 			revents &= ~POLLOUT;
   1237 			break;
   1238 		}
   1239 
   1240 		/*
   1241 		 * Otherwise, find the first fence that is not
   1242 		 * signalled, request the callback, and clear POLLOUT
   1243 		 * from the possible ready events.  If they are all
   1244 		 * signalled, leave POLLOUT set; we will simulate the
   1245 		 * callback later.
   1246 		 */
   1247 		for (i = 0; i < shared_count; i++) {
   1248 			fence = dma_fence_get_rcu(list->shared[i]);
   1249 			if (fence == NULL)
   1250 				goto restart;
   1251 			if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
   1252 				dma_resv_poll_cb)) {
   1253 				dma_fence_put(fence);
   1254 				revents &= ~POLLOUT;
   1255 				callback = true;
   1256 				break;
   1257 			}
   1258 			dma_fence_put(fence);
   1259 		}
   1260 	} while (0);
   1261 
   1262 excl:
   1263 	/* We always wait for at least the exclusive fence, so get it.  */
   1264 	fence = robj->fence_excl;
   1265 	__insn_barrier();
   1266 	if (fence) do {
   1267 		/* Make sure the content of the fence has been published.  */
   1268 		membar_datadep_consumer();
   1269 
   1270 		/*
   1271 		 * Make sure we saw a consistent snapshot of the fence.
   1272 		 *
   1273 		 * XXX I'm not actually sure this is necessary since
   1274 		 * pointer writes are supposed to be atomic.
   1275 		 */
   1276 		if (!dma_resv_read_valid(robj, &ticket))
   1277 			goto restart;
   1278 
   1279 		/*
   1280 		 * If it is going away, restart.  Otherwise, acquire a
   1281 		 * reference to it to test whether it is signalled.  If
   1282 		 * not, stop and request a callback.
   1283 		 */
   1284 		if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1285 			goto restart;
   1286 		if (dma_fence_is_signaled(fence)) {
   1287 			dma_fence_put(fence);
   1288 			break;
   1289 		}
   1290 
   1291 		/* Put ourselves on the selq if we haven't already.  */
   1292 		if (!recorded) {
   1293 			dma_fence_put(fence);
   1294 			goto record;
   1295 		}
   1296 
   1297 		/*
   1298 		 * If someone else claimed the callback, or we already
   1299 		 * requested it, we're guaranteed to be notified, so
   1300 		 * assume the event is not ready.
   1301 		 */
   1302 		if (!claimed || callback) {
   1303 			dma_fence_put(fence);
   1304 			revents = 0;
   1305 			break;
   1306 		}
   1307 
   1308 		/*
   1309 		 * Otherwise, try to request the callback, and clear
   1310 		 * all possible ready events.  If the fence has been
   1311 		 * signalled in the interim, leave the events set; we
   1312 		 * will simulate the callback later.
   1313 		 */
   1314 		if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
   1315 			dma_resv_poll_cb)) {
   1316 			dma_fence_put(fence);
   1317 			revents = 0;
   1318 			callback = true;
   1319 			break;
   1320 		}
   1321 		dma_fence_put(fence);
   1322 	} while (0);
   1323 
   1324 	/* All done reading the fences.  */
   1325 	rcu_read_unlock();
   1326 
   1327 	if (claimed && !callback) {
   1328 		/*
   1329 		 * We claimed the callback but we didn't actually
   1330 		 * request it because a fence was signalled while we
   1331 		 * were claiming it.  Call it ourselves now.  The
   1332 		 * callback doesn't use the fence nor rely on holding
   1333 		 * any of the fence locks, so this is safe.
   1334 		 */
   1335 		dma_resv_poll_cb(NULL, &rpoll->rp_fcb);
   1336 	}
   1337 	return revents;
   1338 
   1339 restart:
   1340 	rcu_read_unlock();
   1341 	goto top;
   1342 
   1343 record:
   1344 	rcu_read_unlock();
   1345 	mutex_enter(&rpoll->rp_lock);
   1346 	selrecord(curlwp, &rpoll->rp_selq);
   1347 	if (!rpoll->rp_claimed)
   1348 		claimed = rpoll->rp_claimed = true;
   1349 	mutex_exit(&rpoll->rp_lock);
   1350 	recorded = true;
   1351 	goto top;
   1352 }
   1353 
   1354 /*
   1355  * dma_resv_kqfilter(robj, kn, rpoll)
   1356  *
   1357  *	Kqueue filter for reservation objects.  Currently not
   1358  *	implemented because the logic to implement it is nontrivial,
   1359  *	and userland will presumably never use it, so it would be
   1360  *	dangerous to add never-tested complex code paths to the kernel.
   1361  */
   1362 int
   1363 dma_resv_kqfilter(const struct dma_resv *robj,
   1364     struct knote *kn, struct dma_resv_poll *rpoll)
   1365 {
   1366 
   1367 	return EINVAL;
   1368 }
   1369