Home | History | Annotate | Line # | Download | only in linux
linux_dma_resv.c revision 1.3
      1 /*	$NetBSD: linux_dma_resv.c,v 1.3 2021/12/19 10:37:47 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_dma_resv.c,v 1.3 2021/12/19 10:37:47 riastradh Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/poll.h>
     37 #include <sys/select.h>
     38 
     39 #include <linux/dma-fence.h>
     40 #include <linux/dma-resv.h>
     41 #include <linux/seqlock.h>
     42 #include <linux/ww_mutex.h>
     43 
     44 DEFINE_WW_CLASS(reservation_ww_class __cacheline_aligned);
     45 
     46 static struct dma_resv_list *
     47 objlist_tryalloc(uint32_t n)
     48 {
     49 	struct dma_resv_list *list;
     50 
     51 	list = kmem_alloc(offsetof(typeof(*list), shared[n]), KM_NOSLEEP);
     52 	if (list == NULL)
     53 		return NULL;
     54 	list->shared_max = n;
     55 
     56 	return list;
     57 }
     58 
     59 static void
     60 objlist_free(struct dma_resv_list *list)
     61 {
     62 	uint32_t n = list->shared_max;
     63 
     64 	kmem_free(list, offsetof(typeof(*list), shared[n]));
     65 }
     66 
     67 static void
     68 objlist_free_cb(struct rcu_head *rcu)
     69 {
     70 	struct dma_resv_list *list = container_of(rcu,
     71 	    struct dma_resv_list, rol_rcu);
     72 
     73 	objlist_free(list);
     74 }
     75 
     76 static void
     77 objlist_defer_free(struct dma_resv_list *list)
     78 {
     79 
     80 	call_rcu(&list->rol_rcu, objlist_free_cb);
     81 }
     82 
     83 /*
     84  * dma_resv_init(robj)
     85  *
     86  *	Initialize a reservation object.  Caller must later destroy it
     87  *	with dma_resv_fini.
     88  */
     89 void
     90 dma_resv_init(struct dma_resv *robj)
     91 {
     92 
     93 	ww_mutex_init(&robj->lock, &reservation_ww_class);
     94 	seqcount_init(&robj->seq);
     95 	robj->fence_excl = NULL;
     96 	robj->fence = NULL;
     97 	robj->robj_prealloc = NULL;
     98 }
     99 
    100 /*
    101  * dma_resv_fini(robj)
    102  *
    103  *	Destroy a reservation object, freeing any memory that had been
    104  *	allocated for it.  Caller must have exclusive access to it.
    105  */
    106 void
    107 dma_resv_fini(struct dma_resv *robj)
    108 {
    109 	unsigned i;
    110 
    111 	if (robj->robj_prealloc)
    112 		objlist_free(robj->robj_prealloc);
    113 	if (robj->fence) {
    114 		for (i = 0; i < robj->fence->shared_count; i++)
    115 			dma_fence_put(robj->fence->shared[i]);
    116 		objlist_free(robj->fence);
    117 	}
    118 	if (robj->fence_excl)
    119 		dma_fence_put(robj->fence_excl);
    120 	ww_mutex_destroy(&robj->lock);
    121 }
    122 
    123 /*
    124  * dma_resv_lock(robj, ctx)
    125  *
    126  *	Acquire a reservation object's lock.  Return 0 on success,
    127  *	-EALREADY if caller already holds it, -EDEADLK if a
    128  *	higher-priority owner holds it and the caller must back out and
    129  *	retry.
    130  */
    131 int
    132 dma_resv_lock(struct dma_resv *robj,
    133     struct ww_acquire_ctx *ctx)
    134 {
    135 
    136 	return ww_mutex_lock(&robj->lock, ctx);
    137 }
    138 
    139 /*
    140  * dma_resv_lock_slow(robj, ctx)
    141  *
    142  *	Acquire a reservation object's lock.  Caller must not hold
    143  *	this lock or any others -- this is to be used in slow paths
    144  *	after dma_resv_lock or dma_resv_lock_interruptible has failed
    145  *	and the caller has backed out all other locks.
    146  */
    147 void
    148 dma_resv_lock_slow(struct dma_resv *robj,
    149     struct ww_acquire_ctx *ctx)
    150 {
    151 
    152 	ww_mutex_lock_slow(&robj->lock, ctx);
    153 }
    154 
    155 /*
    156  * dma_resv_lock_interruptible(robj, ctx)
    157  *
    158  *	Acquire a reservation object's lock.  Return 0 on success,
    159  *	-EALREADY if caller already holds it, -EDEADLK if a
    160  *	higher-priority owner holds it and the caller must back out and
    161  *	retry, -ERESTART/-EINTR if interrupted.
    162  */
    163 int
    164 dma_resv_lock_interruptible(struct dma_resv *robj,
    165     struct ww_acquire_ctx *ctx)
    166 {
    167 
    168 	return ww_mutex_lock_interruptible(&robj->lock, ctx);
    169 }
    170 
    171 /*
    172  * dma_resv_lock_slow_interruptible(robj, ctx)
    173  *
    174  *	Acquire a reservation object's lock.  Caller must not hold
    175  *	this lock or any others -- this is to be used in slow paths
    176  *	after dma_resv_lock or dma_resv_lock_interruptible has failed
    177  *	and the caller has backed out all other locks.  Return 0 on
    178  *	success, -ERESTART/-EINTR if interrupted.
    179  */
    180 int
    181 dma_resv_lock_slow_interruptible(struct dma_resv *robj,
    182     struct ww_acquire_ctx *ctx)
    183 {
    184 
    185 	return ww_mutex_lock_slow_interruptible(&robj->lock, ctx);
    186 }
    187 
    188 /*
    189  * dma_resv_trylock(robj)
    190  *
    191  *	Try to acquire a reservation object's lock without blocking.
    192  *	Return true on success, false on failure.
    193  */
    194 bool
    195 dma_resv_trylock(struct dma_resv *robj)
    196 {
    197 
    198 	return ww_mutex_trylock(&robj->lock);
    199 }
    200 
    201 /*
    202  * dma_resv_unlock(robj)
    203  *
    204  *	Release a reservation object's lock.
    205  */
    206 void
    207 dma_resv_unlock(struct dma_resv *robj)
    208 {
    209 
    210 	return ww_mutex_unlock(&robj->lock);
    211 }
    212 
    213 /*
    214  * dma_resv_held(robj)
    215  *
    216  *	True if robj is locked.
    217  */
    218 bool
    219 dma_resv_held(struct dma_resv *robj)
    220 {
    221 
    222 	return ww_mutex_is_locked(&robj->lock);
    223 }
    224 
    225 /*
    226  * dma_resv_assert_held(robj)
    227  *
    228  *	Panic if robj is not held, in DIAGNOSTIC builds.
    229  */
    230 void
    231 dma_resv_assert_held(struct dma_resv *robj)
    232 {
    233 
    234 	KASSERT(dma_resv_held(robj));
    235 }
    236 
    237 /*
    238  * dma_resv_get_excl(robj)
    239  *
    240  *	Return a pointer to the exclusive fence of the reservation
    241  *	object robj.
    242  *
    243  *	Caller must have robj locked.
    244  */
    245 struct dma_fence *
    246 dma_resv_get_excl(struct dma_resv *robj)
    247 {
    248 
    249 	KASSERT(dma_resv_held(robj));
    250 	return robj->fence_excl;
    251 }
    252 
    253 /*
    254  * dma_resv_get_list(robj)
    255  *
    256  *	Return a pointer to the shared fence list of the reservation
    257  *	object robj.
    258  *
    259  *	Caller must have robj locked.
    260  */
    261 struct dma_resv_list *
    262 dma_resv_get_list(struct dma_resv *robj)
    263 {
    264 
    265 	KASSERT(dma_resv_held(robj));
    266 	return robj->fence;
    267 }
    268 
    269 /*
    270  * dma_resv_reserve_shared(robj)
    271  *
    272  *	Reserve space in robj to add a shared fence.  To be used only
    273  *	once before calling dma_resv_add_shared_fence.
    274  *
    275  *	Caller must have robj locked.
    276  *
    277  *	Internally, we start with room for four entries and double if
    278  *	we don't have enough.  This is not guaranteed.
    279  */
    280 int
    281 dma_resv_reserve_shared(struct dma_resv *robj, unsigned int num_fences)
    282 {
    283 	struct dma_resv_list *list, *prealloc;
    284 	uint32_t n, nalloc;
    285 
    286 	KASSERT(dma_resv_held(robj));
    287 	KASSERT(num_fences == 1);
    288 
    289 	list = robj->fence;
    290 	prealloc = robj->robj_prealloc;
    291 
    292 	/* If there's an existing list, check it for space.  */
    293 	if (list) {
    294 		/* If there's too many already, give up.  */
    295 		if (list->shared_count == UINT32_MAX)
    296 			return -ENOMEM;
    297 
    298 		/* Add one more. */
    299 		n = list->shared_count + 1;
    300 
    301 		/* If there's enough for one more, we're done.  */
    302 		if (n <= list->shared_max)
    303 			return 0;
    304 	} else {
    305 		/* No list already.  We need space for 1.  */
    306 		n = 1;
    307 	}
    308 
    309 	/* If not, maybe there's a preallocated list ready.  */
    310 	if (prealloc != NULL) {
    311 		/* If there's enough room in it, stop here.  */
    312 		if (n <= prealloc->shared_max)
    313 			return 0;
    314 
    315 		/* Try to double its capacity.  */
    316 		nalloc = n > UINT32_MAX/2 ? UINT32_MAX : 2*n;
    317 		prealloc = objlist_tryalloc(nalloc);
    318 		if (prealloc == NULL)
    319 			return -ENOMEM;
    320 
    321 		/* Swap the new preallocated list and free the old one.  */
    322 		objlist_free(robj->robj_prealloc);
    323 		robj->robj_prealloc = prealloc;
    324 	} else {
    325 		/* Start with some spare.  */
    326 		nalloc = n > UINT32_MAX/2 ? UINT32_MAX : MAX(2*n, 4);
    327 		prealloc = objlist_tryalloc(nalloc);
    328 		if (prealloc == NULL)
    329 			return -ENOMEM;
    330 		/* Save the new preallocated list.  */
    331 		robj->robj_prealloc = prealloc;
    332 	}
    333 
    334 	/* Success!  */
    335 	return 0;
    336 }
    337 
    338 struct dma_resv_write_ticket {
    339 };
    340 
    341 /*
    342  * dma_resv_write_begin(robj, ticket)
    343  *
    344  *	Begin an atomic batch of writes to robj, and initialize opaque
    345  *	ticket for it.  The ticket must be passed to
    346  *	dma_resv_write_commit to commit the writes.
    347  *
    348  *	Caller must have robj locked.
    349  *
    350  *	Implies membar_producer, i.e. store-before-store barrier.  Does
    351  *	NOT serve as an acquire operation, however.
    352  */
    353 static void
    354 dma_resv_write_begin(struct dma_resv *robj,
    355     struct dma_resv_write_ticket *ticket)
    356 {
    357 
    358 	KASSERT(dma_resv_held(robj));
    359 
    360 	write_seqcount_begin(&robj->seq);
    361 }
    362 
    363 /*
    364  * dma_resv_write_commit(robj, ticket)
    365  *
    366  *	Commit an atomic batch of writes to robj begun with the call to
    367  *	dma_resv_write_begin that returned ticket.
    368  *
    369  *	Caller must have robj locked.
    370  *
    371  *	Implies membar_producer, i.e. store-before-store barrier.  Does
    372  *	NOT serve as a release operation, however.
    373  */
    374 static void
    375 dma_resv_write_commit(struct dma_resv *robj,
    376     struct dma_resv_write_ticket *ticket)
    377 {
    378 
    379 	KASSERT(dma_resv_held(robj));
    380 
    381 	write_seqcount_end(&robj->seq);
    382 }
    383 
    384 struct dma_resv_read_ticket {
    385 	unsigned version;
    386 };
    387 
    388 /*
    389  * dma_resv_read_begin(robj, ticket)
    390  *
    391  *	Begin a read section, and initialize opaque ticket for it.  The
    392  *	ticket must be passed to dma_resv_read_exit, and the
    393  *	caller must be prepared to retry reading if it fails.
    394  */
    395 static void
    396 dma_resv_read_begin(const struct dma_resv *robj,
    397     struct dma_resv_read_ticket *ticket)
    398 {
    399 
    400 	ticket->version = read_seqcount_begin(&robj->seq);
    401 }
    402 
    403 /*
    404  * dma_resv_read_valid(robj, ticket)
    405  *
    406  *	Test whether the read sections are valid.  Return true on
    407  *	success, or false on failure if the read ticket has been
    408  *	invalidated.
    409  */
    410 static bool
    411 dma_resv_read_valid(const struct dma_resv *robj,
    412     struct dma_resv_read_ticket *ticket)
    413 {
    414 
    415 	return !read_seqcount_retry(&robj->seq, ticket->version);
    416 }
    417 
    418 /*
    419  * dma_resv_add_excl_fence(robj, fence)
    420  *
    421  *	Empty and release all of robj's shared fences, and clear and
    422  *	release its exclusive fence.  If fence is nonnull, acquire a
    423  *	reference to it and save it as robj's exclusive fence.
    424  *
    425  *	Caller must have robj locked.
    426  */
    427 void
    428 dma_resv_add_excl_fence(struct dma_resv *robj,
    429     struct dma_fence *fence)
    430 {
    431 	struct dma_fence *old_fence = robj->fence_excl;
    432 	struct dma_resv_list *old_list = robj->fence;
    433 	uint32_t old_shared_count;
    434 	struct dma_resv_write_ticket ticket;
    435 
    436 	KASSERT(dma_resv_held(robj));
    437 
    438 	/*
    439 	 * If we are setting rather than just removing a fence, acquire
    440 	 * a reference for ourselves.
    441 	 */
    442 	if (fence)
    443 		(void)dma_fence_get(fence);
    444 
    445 	/* If there are any shared fences, remember how many.  */
    446 	if (old_list)
    447 		old_shared_count = old_list->shared_count;
    448 
    449 	/* Begin an update.  */
    450 	dma_resv_write_begin(robj, &ticket);
    451 
    452 	/* Replace the fence and zero the shared count.  */
    453 	robj->fence_excl = fence;
    454 	if (old_list)
    455 		old_list->shared_count = 0;
    456 
    457 	/* Commit the update.  */
    458 	dma_resv_write_commit(robj, &ticket);
    459 
    460 	/* Release the old exclusive fence, if any.  */
    461 	if (old_fence)
    462 		dma_fence_put(old_fence);
    463 
    464 	/* Release any old shared fences.  */
    465 	if (old_list) {
    466 		while (old_shared_count--)
    467 			dma_fence_put(old_list->shared[old_shared_count]);
    468 	}
    469 }
    470 
    471 /*
    472  * dma_resv_add_shared_fence(robj, fence)
    473  *
    474  *	Acquire a reference to fence and add it to robj's shared list.
    475  *	If any fence was already added with the same context number,
    476  *	release it and replace it by this one.
    477  *
    478  *	Caller must have robj locked, and must have preceded with a
    479  *	call to dma_resv_reserve_shared for each shared fence
    480  *	added.
    481  */
    482 void
    483 dma_resv_add_shared_fence(struct dma_resv *robj,
    484     struct dma_fence *fence)
    485 {
    486 	struct dma_resv_list *list = robj->fence;
    487 	struct dma_resv_list *prealloc = robj->robj_prealloc;
    488 	struct dma_resv_write_ticket ticket;
    489 	struct dma_fence *replace = NULL;
    490 	uint32_t i;
    491 
    492 	KASSERT(dma_resv_held(robj));
    493 
    494 	/* Acquire a reference to the fence.  */
    495 	KASSERT(fence != NULL);
    496 	(void)dma_fence_get(fence);
    497 
    498 	/* Check for a preallocated replacement list.  */
    499 	if (prealloc == NULL) {
    500 		/*
    501 		 * If there is no preallocated replacement list, then
    502 		 * there must be room in the current list.
    503 		 */
    504 		KASSERT(list != NULL);
    505 		KASSERT(list->shared_count < list->shared_max);
    506 
    507 		/* Begin an update.  Implies membar_producer for fence.  */
    508 		dma_resv_write_begin(robj, &ticket);
    509 
    510 		/* Find a fence with the same context number.  */
    511 		for (i = 0; i < list->shared_count; i++) {
    512 			if (list->shared[i]->context == fence->context) {
    513 				replace = list->shared[i];
    514 				list->shared[i] = fence;
    515 				break;
    516 			}
    517 		}
    518 
    519 		/* If we didn't find one, add it at the end.  */
    520 		if (i == list->shared_count)
    521 			list->shared[list->shared_count++] = fence;
    522 
    523 		/* Commit the update.  */
    524 		dma_resv_write_commit(robj, &ticket);
    525 	} else {
    526 		/*
    527 		 * There is a preallocated replacement list.  There may
    528 		 * not be a current list.  If not, treat it as a zero-
    529 		 * length list.
    530 		 */
    531 		uint32_t shared_count = (list == NULL? 0 : list->shared_count);
    532 
    533 		/* There had better be room in the preallocated list.  */
    534 		KASSERT(shared_count < prealloc->shared_max);
    535 
    536 		/*
    537 		 * Copy the fences over, but replace if we find one
    538 		 * with the same context number.
    539 		 */
    540 		for (i = 0; i < shared_count; i++) {
    541 			if (replace == NULL &&
    542 			    list->shared[i]->context == fence->context) {
    543 				replace = list->shared[i];
    544 				prealloc->shared[i] = fence;
    545 			} else {
    546 				prealloc->shared[i] = list->shared[i];
    547 			}
    548 		}
    549 		prealloc->shared_count = shared_count;
    550 
    551 		/* If we didn't find one, add it at the end.  */
    552 		if (replace == NULL)
    553 			prealloc->shared[prealloc->shared_count++] = fence;
    554 
    555 		/*
    556 		 * Now ready to replace the list.  Begin an update.
    557 		 * Implies membar_producer for fence and prealloc.
    558 		 */
    559 		dma_resv_write_begin(robj, &ticket);
    560 
    561 		/* Replace the list.  */
    562 		robj->fence = prealloc;
    563 		robj->robj_prealloc = NULL;
    564 
    565 		/* Commit the update.  */
    566 		dma_resv_write_commit(robj, &ticket);
    567 
    568 		/*
    569 		 * If there is an old list, free it when convenient.
    570 		 * (We are not in a position at this point to sleep
    571 		 * waiting for activity on all CPUs.)
    572 		 */
    573 		if (list)
    574 			objlist_defer_free(list);
    575 	}
    576 
    577 	/* Release a fence if we replaced it.  */
    578 	if (replace)
    579 		dma_fence_put(replace);
    580 }
    581 
    582 /*
    583  * dma_resv_get_excl_rcu(robj)
    584  *
    585  *	Note: Caller need not call this from an RCU read section.
    586  */
    587 struct dma_fence *
    588 dma_resv_get_excl_rcu(const struct dma_resv *robj)
    589 {
    590 	struct dma_fence *fence;
    591 
    592 	rcu_read_lock();
    593 	fence = dma_fence_get_rcu_safe(&robj->fence_excl);
    594 	rcu_read_unlock();
    595 
    596 	return fence;
    597 }
    598 
    599 /*
    600  * dma_resv_get_fences_rcu(robj, fencep, nsharedp, sharedp)
    601  */
    602 int
    603 dma_resv_get_fences_rcu(const struct dma_resv *robj,
    604     struct dma_fence **fencep, unsigned *nsharedp, struct dma_fence ***sharedp)
    605 {
    606 	const struct dma_resv_list *list;
    607 	struct dma_fence *fence;
    608 	struct dma_fence **shared = NULL;
    609 	unsigned shared_alloc, shared_count, i;
    610 	struct dma_resv_read_ticket ticket;
    611 
    612 top:
    613 	/* Enter an RCU read section and get a read ticket.  */
    614 	rcu_read_lock();
    615 	dma_resv_read_begin(robj, &ticket);
    616 
    617 	/* If there is a shared list, grab it.  */
    618 	list = robj->fence;
    619 	__insn_barrier();
    620 	if (list) {
    621 		/* Make sure the content of the list has been published.  */
    622 		membar_datadep_consumer();
    623 
    624 		/* Check whether we have a buffer.  */
    625 		if (shared == NULL) {
    626 			/*
    627 			 * We don't have a buffer yet.  Try to allocate
    628 			 * one without waiting.
    629 			 */
    630 			shared_alloc = list->shared_max;
    631 			__insn_barrier();
    632 			shared = kcalloc(shared_alloc, sizeof(shared[0]),
    633 			    GFP_NOWAIT);
    634 			if (shared == NULL) {
    635 				/*
    636 				 * Couldn't do it immediately.  Back
    637 				 * out of RCU and allocate one with
    638 				 * waiting.
    639 				 */
    640 				rcu_read_unlock();
    641 				shared = kcalloc(shared_alloc,
    642 				    sizeof(shared[0]), GFP_KERNEL);
    643 				if (shared == NULL)
    644 					return -ENOMEM;
    645 				goto top;
    646 			}
    647 		} else if (shared_alloc < list->shared_max) {
    648 			/*
    649 			 * We have a buffer but it's too small.  We're
    650 			 * already racing in this case, so just back
    651 			 * out and wait to allocate a bigger one.
    652 			 */
    653 			shared_alloc = list->shared_max;
    654 			__insn_barrier();
    655 			rcu_read_unlock();
    656 			kfree(shared);
    657 			shared = kcalloc(shared_alloc, sizeof(shared[0]),
    658 			    GFP_KERNEL);
    659 			if (shared == NULL)
    660 				return -ENOMEM;
    661 		}
    662 
    663 		/*
    664 		 * We got a buffer large enough.  Copy into the buffer
    665 		 * and record the number of elements.
    666 		 */
    667 		memcpy(shared, list->shared, shared_alloc * sizeof(shared[0]));
    668 		shared_count = list->shared_count;
    669 	} else {
    670 		/* No shared list: shared count is zero.  */
    671 		shared_count = 0;
    672 	}
    673 
    674 	/* If there is an exclusive fence, grab it.  */
    675 	fence = robj->fence_excl;
    676 	__insn_barrier();
    677 	if (fence) {
    678 		/* Make sure the content of the fence has been published.  */
    679 		membar_datadep_consumer();
    680 	}
    681 
    682 	/*
    683 	 * We are done reading from robj and list.  Validate our
    684 	 * parking ticket.  If it's invalid, do not pass go and do not
    685 	 * collect $200.
    686 	 */
    687 	if (!dma_resv_read_valid(robj, &ticket))
    688 		goto restart;
    689 
    690 	/*
    691 	 * Try to get a reference to the exclusive fence, if there is
    692 	 * one.  If we can't, start over.
    693 	 */
    694 	if (fence) {
    695 		if (dma_fence_get_rcu(fence) == NULL)
    696 			goto restart;
    697 	}
    698 
    699 	/*
    700 	 * Try to get a reference to all of the shared fences.
    701 	 */
    702 	for (i = 0; i < shared_count; i++) {
    703 		if (dma_fence_get_rcu(shared[i]) == NULL)
    704 			goto put_restart;
    705 	}
    706 
    707 	/* Success!  */
    708 	rcu_read_unlock();
    709 	*fencep = fence;
    710 	*nsharedp = shared_count;
    711 	*sharedp = shared;
    712 	return 0;
    713 
    714 put_restart:
    715 	/* Back out.  */
    716 	while (i --> 0) {
    717 		dma_fence_put(shared[i]);
    718 		shared[i] = NULL; /* paranoia */
    719 	}
    720 	if (fence) {
    721 		dma_fence_put(fence);
    722 		fence = NULL;	/* paranoia */
    723 	}
    724 
    725 restart:
    726 	rcu_read_unlock();
    727 	goto top;
    728 }
    729 
    730 /*
    731  * dma_resv_copy_fences(dst, src)
    732  *
    733  *	Copy the exclusive fence and all the shared fences from src to
    734  *	dst.
    735  *
    736  *	Caller must have dst locked.
    737  */
    738 int
    739 dma_resv_copy_fences(struct dma_resv *dst_robj,
    740     const struct dma_resv *src_robj)
    741 {
    742 	const struct dma_resv_list *src_list;
    743 	struct dma_resv_list *dst_list = NULL;
    744 	struct dma_resv_list *old_list;
    745 	struct dma_fence *fence = NULL;
    746 	struct dma_fence *old_fence;
    747 	uint32_t shared_count, i;
    748 	struct dma_resv_read_ticket read_ticket;
    749 	struct dma_resv_write_ticket write_ticket;
    750 
    751 	KASSERT(dma_resv_held(dst_robj));
    752 
    753 top:
    754 	/* Enter an RCU read section and get a read ticket.  */
    755 	rcu_read_lock();
    756 	dma_resv_read_begin(src_robj, &read_ticket);
    757 
    758 	/* Get the shared list.  */
    759 	src_list = src_robj->fence;
    760 	__insn_barrier();
    761 	if (src_list) {
    762 		/* Make sure the content of the list has been published.  */
    763 		membar_datadep_consumer();
    764 
    765 		/* Find out how long it is.  */
    766 		shared_count = src_list->shared_count;
    767 
    768 		/*
    769 		 * Make sure we saw a consistent snapshot of the list
    770 		 * pointer and length.
    771 		 */
    772 		if (!dma_resv_read_valid(src_robj, &read_ticket))
    773 			goto restart;
    774 
    775 		/* Allocate a new list.  */
    776 		dst_list = objlist_tryalloc(shared_count);
    777 		if (dst_list == NULL)
    778 			return -ENOMEM;
    779 
    780 		/* Copy over all fences that are not yet signalled.  */
    781 		dst_list->shared_count = 0;
    782 		for (i = 0; i < shared_count; i++) {
    783 			if ((fence = dma_fence_get_rcu(src_list->shared[i]))
    784 			    != NULL)
    785 				goto restart;
    786 			if (dma_fence_is_signaled(fence)) {
    787 				dma_fence_put(fence);
    788 				fence = NULL;
    789 				continue;
    790 			}
    791 			dst_list->shared[dst_list->shared_count++] = fence;
    792 			fence = NULL;
    793 		}
    794 	}
    795 
    796 	/* Get the exclusive fence.  */
    797 	fence = src_robj->fence_excl;
    798 	__insn_barrier();
    799 	if (fence != NULL) {
    800 		/* Make sure the content of the fence has been published.  */
    801 		membar_datadep_consumer();
    802 
    803 		/*
    804 		 * Make sure we saw a consistent snapshot of the fence.
    805 		 *
    806 		 * XXX I'm not actually sure this is necessary since
    807 		 * pointer writes are supposed to be atomic.
    808 		 */
    809 		if (!dma_resv_read_valid(src_robj, &read_ticket)) {
    810 			fence = NULL;
    811 			goto restart;
    812 		}
    813 
    814 		/*
    815 		 * If it is going away, restart.  Otherwise, acquire a
    816 		 * reference to it.
    817 		 */
    818 		if (!dma_fence_get_rcu(fence)) {
    819 			fence = NULL;
    820 			goto restart;
    821 		}
    822 	}
    823 
    824 	/* All done with src; exit the RCU read section.  */
    825 	rcu_read_unlock();
    826 
    827 	/*
    828 	 * We now have a snapshot of the shared and exclusive fences of
    829 	 * src_robj and we have acquired references to them so they
    830 	 * won't go away.  Transfer them over to dst_robj, releasing
    831 	 * references to any that were there.
    832 	 */
    833 
    834 	/* Get the old shared and exclusive fences, if any.  */
    835 	old_list = dst_robj->fence;
    836 	old_fence = dst_robj->fence_excl;
    837 
    838 	/* Begin an update.  */
    839 	dma_resv_write_begin(dst_robj, &write_ticket);
    840 
    841 	/* Replace the fences.  */
    842 	dst_robj->fence = dst_list;
    843 	dst_robj->fence_excl = fence;
    844 
    845 	/* Commit the update.  */
    846 	dma_resv_write_commit(dst_robj, &write_ticket);
    847 
    848 	/* Release the old exclusive fence, if any.  */
    849 	if (old_fence)
    850 		dma_fence_put(old_fence);
    851 
    852 	/* Release any old shared fences.  */
    853 	if (old_list) {
    854 		for (i = old_list->shared_count; i --> 0;)
    855 			dma_fence_put(old_list->shared[i]);
    856 	}
    857 
    858 	/* Success!  */
    859 	return 0;
    860 
    861 restart:
    862 	rcu_read_unlock();
    863 	if (dst_list) {
    864 		for (i = dst_list->shared_count; i --> 0;) {
    865 			dma_fence_put(dst_list->shared[i]);
    866 			dst_list->shared[i] = NULL;
    867 		}
    868 		objlist_free(dst_list);
    869 		dst_list = NULL;
    870 	}
    871 	if (fence) {
    872 		dma_fence_put(fence);
    873 		fence = NULL;
    874 	}
    875 	goto top;
    876 }
    877 
    878 /*
    879  * dma_resv_test_signaled_rcu(robj, shared)
    880  *
    881  *	If shared is true, test whether all of the shared fences are
    882  *	signalled, or if there are none, test whether the exclusive
    883  *	fence is signalled.  If shared is false, test only whether the
    884  *	exclusive fence is signalled.
    885  *
    886  *	XXX Why does this _not_ test the exclusive fence if shared is
    887  *	true only if there are no shared fences?  This makes no sense.
    888  */
    889 bool
    890 dma_resv_test_signaled_rcu(const struct dma_resv *robj,
    891     bool shared)
    892 {
    893 	struct dma_resv_read_ticket ticket;
    894 	struct dma_resv_list *list;
    895 	struct dma_fence *fence;
    896 	uint32_t i, shared_count;
    897 	bool signaled = true;
    898 
    899 top:
    900 	/* Enter an RCU read section and get a read ticket.  */
    901 	rcu_read_lock();
    902 	dma_resv_read_begin(robj, &ticket);
    903 
    904 	/* If shared is requested and there is a shared list, test it.  */
    905 	if (!shared)
    906 		goto excl;
    907 	list = robj->fence;
    908 	__insn_barrier();
    909 	if (list) {
    910 		/* Make sure the content of the list has been published.  */
    911 		membar_datadep_consumer();
    912 
    913 		/* Find out how long it is.  */
    914 		shared_count = list->shared_count;
    915 
    916 		/*
    917 		 * Make sure we saw a consistent snapshot of the list
    918 		 * pointer and length.
    919 		 */
    920 		if (!dma_resv_read_valid(robj, &ticket))
    921 			goto restart;
    922 
    923 		/*
    924 		 * For each fence, if it is going away, restart.
    925 		 * Otherwise, acquire a reference to it to test whether
    926 		 * it is signalled.  Stop if we find any that is not
    927 		 * signalled.
    928 		 */
    929 		for (i = 0; i < shared_count; i++) {
    930 			fence = dma_fence_get_rcu(list->shared[i]);
    931 			if (fence == NULL)
    932 				goto restart;
    933 			signaled &= dma_fence_is_signaled(fence);
    934 			dma_fence_put(fence);
    935 			if (!signaled)
    936 				goto out;
    937 		}
    938 	}
    939 
    940 excl:
    941 	/* If there is an exclusive fence, test it.  */
    942 	fence = robj->fence_excl;
    943 	__insn_barrier();
    944 	if (fence) {
    945 		/* Make sure the content of the fence has been published.  */
    946 		membar_datadep_consumer();
    947 
    948 		/*
    949 		 * Make sure we saw a consistent snapshot of the fence.
    950 		 *
    951 		 * XXX I'm not actually sure this is necessary since
    952 		 * pointer writes are supposed to be atomic.
    953 		 */
    954 		if (!dma_resv_read_valid(robj, &ticket))
    955 			goto restart;
    956 
    957 		/*
    958 		 * If it is going away, restart.  Otherwise, acquire a
    959 		 * reference to it to test whether it is signalled.
    960 		 */
    961 		if ((fence = dma_fence_get_rcu(fence)) == NULL)
    962 			goto restart;
    963 		signaled &= dma_fence_is_signaled(fence);
    964 		dma_fence_put(fence);
    965 		if (!signaled)
    966 			goto out;
    967 	}
    968 
    969 out:	rcu_read_unlock();
    970 	return signaled;
    971 
    972 restart:
    973 	rcu_read_unlock();
    974 	goto top;
    975 }
    976 
    977 /*
    978  * dma_resv_wait_timeout_rcu(robj, shared, intr, timeout)
    979  *
    980  *	If shared is true, wait for all of the shared fences to be
    981  *	signalled, or if there are none, wait for the exclusive fence
    982  *	to be signalled.  If shared is false, wait only for the
    983  *	exclusive fence to be signalled.  If timeout is zero, don't
    984  *	wait, only test.
    985  *
    986  *	XXX Why does this _not_ wait for the exclusive fence if shared
    987  *	is true only if there are no shared fences?  This makes no
    988  *	sense.
    989  */
    990 long
    991 dma_resv_wait_timeout_rcu(const struct dma_resv *robj,
    992     bool shared, bool intr, unsigned long timeout)
    993 {
    994 	struct dma_resv_read_ticket ticket;
    995 	struct dma_resv_list *list;
    996 	struct dma_fence *fence;
    997 	uint32_t i, shared_count;
    998 	long ret;
    999 
   1000 	if (timeout == 0)
   1001 		return dma_resv_test_signaled_rcu(robj, shared);
   1002 
   1003 top:
   1004 	/* Enter an RCU read section and get a read ticket.  */
   1005 	rcu_read_lock();
   1006 	dma_resv_read_begin(robj, &ticket);
   1007 
   1008 	/* If shared is requested and there is a shared list, wait on it.  */
   1009 	if (!shared)
   1010 		goto excl;
   1011 	list = robj->fence;
   1012 	__insn_barrier();
   1013 	if (list) {
   1014 		/* Make sure the content of the list has been published.  */
   1015 		membar_datadep_consumer();
   1016 
   1017 		/* Find out how long it is.  */
   1018 		shared_count = list->shared_count;
   1019 
   1020 		/*
   1021 		 * Make sure we saw a consistent snapshot of the list
   1022 		 * pointer and length.
   1023 		 */
   1024 		if (!dma_resv_read_valid(robj, &ticket))
   1025 			goto restart;
   1026 
   1027 		/*
   1028 		 * For each fence, if it is going away, restart.
   1029 		 * Otherwise, acquire a reference to it to test whether
   1030 		 * it is signalled.  Stop and wait if we find any that
   1031 		 * is not signalled.
   1032 		 */
   1033 		for (i = 0; i < shared_count; i++) {
   1034 			fence = dma_fence_get_rcu(list->shared[i]);
   1035 			if (fence == NULL)
   1036 				goto restart;
   1037 			if (!dma_fence_is_signaled(fence))
   1038 				goto wait;
   1039 			dma_fence_put(fence);
   1040 		}
   1041 	}
   1042 
   1043 excl:
   1044 	/* If there is an exclusive fence, test it.  */
   1045 	fence = robj->fence_excl;
   1046 	__insn_barrier();
   1047 	if (fence) {
   1048 		/* Make sure the content of the fence has been published.  */
   1049 		membar_datadep_consumer();
   1050 
   1051 		/*
   1052 		 * Make sure we saw a consistent snapshot of the fence.
   1053 		 *
   1054 		 * XXX I'm not actually sure this is necessary since
   1055 		 * pointer writes are supposed to be atomic.
   1056 		 */
   1057 		if (!dma_resv_read_valid(robj, &ticket))
   1058 			goto restart;
   1059 
   1060 		/*
   1061 		 * If it is going away, restart.  Otherwise, acquire a
   1062 		 * reference to it to test whether it is signalled.  If
   1063 		 * not, wait for it.
   1064 		 */
   1065 		if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1066 			goto restart;
   1067 		if (!dma_fence_is_signaled(fence))
   1068 			goto wait;
   1069 		dma_fence_put(fence);
   1070 	}
   1071 
   1072 	/* Success!  Return the number of ticks left.  */
   1073 	rcu_read_unlock();
   1074 	return timeout;
   1075 
   1076 restart:
   1077 	rcu_read_unlock();
   1078 	goto top;
   1079 
   1080 wait:
   1081 	/*
   1082 	 * Exit the RCU read section and wait for it.  If we time out
   1083 	 * or fail, bail.  Otherwise, go back to the top.
   1084 	 */
   1085 	KASSERT(fence != NULL);
   1086 	rcu_read_unlock();
   1087 	ret = dma_fence_wait_timeout(fence, intr, timeout);
   1088 	dma_fence_put(fence);
   1089 	if (ret <= 0)
   1090 		return ret;
   1091 	KASSERT(ret <= timeout);
   1092 	timeout = ret;
   1093 	goto top;
   1094 }
   1095 
   1096 /*
   1097  * dma_resv_poll_init(rpoll, lock)
   1098  *
   1099  *	Initialize reservation poll state.
   1100  */
   1101 void
   1102 dma_resv_poll_init(struct dma_resv_poll *rpoll)
   1103 {
   1104 
   1105 	mutex_init(&rpoll->rp_lock, MUTEX_DEFAULT, IPL_VM);
   1106 	selinit(&rpoll->rp_selq);
   1107 	rpoll->rp_claimed = 0;
   1108 }
   1109 
   1110 /*
   1111  * dma_resv_poll_fini(rpoll)
   1112  *
   1113  *	Release any resource associated with reservation poll state.
   1114  */
   1115 void
   1116 dma_resv_poll_fini(struct dma_resv_poll *rpoll)
   1117 {
   1118 
   1119 	KASSERT(rpoll->rp_claimed == 0);
   1120 	seldestroy(&rpoll->rp_selq);
   1121 	mutex_destroy(&rpoll->rp_lock);
   1122 }
   1123 
   1124 /*
   1125  * dma_resv_poll_cb(fence, fcb)
   1126  *
   1127  *	Callback to notify a reservation poll that a fence has
   1128  *	completed.  Notify any waiters and allow the next poller to
   1129  *	claim the callback.
   1130  *
   1131  *	If one thread is waiting for the exclusive fence only, and we
   1132  *	spuriously notify them about a shared fence, tough.
   1133  */
   1134 static void
   1135 dma_resv_poll_cb(struct dma_fence *fence, struct dma_fence_cb *fcb)
   1136 {
   1137 	struct dma_resv_poll *rpoll = container_of(fcb,
   1138 	    struct dma_resv_poll, rp_fcb);
   1139 
   1140 	mutex_enter(&rpoll->rp_lock);
   1141 	selnotify(&rpoll->rp_selq, 0, NOTE_SUBMIT);
   1142 	rpoll->rp_claimed = 0;
   1143 	mutex_exit(&rpoll->rp_lock);
   1144 }
   1145 
   1146 /*
   1147  * dma_resv_do_poll(robj, events, rpoll)
   1148  *
   1149  *	Poll for reservation object events using the reservation poll
   1150  *	state in rpoll:
   1151  *
   1152  *	- POLLOUT	wait for all fences shared and exclusive
   1153  *	- POLLIN	wait for the exclusive fence
   1154  *
   1155  *	Return the subset of events in events that are ready.  If any
   1156  *	are requested but not ready, arrange to be notified with
   1157  *	selnotify when they are.
   1158  */
   1159 int
   1160 dma_resv_do_poll(const struct dma_resv *robj, int events,
   1161     struct dma_resv_poll *rpoll)
   1162 {
   1163 	struct dma_resv_read_ticket ticket;
   1164 	struct dma_resv_list *list;
   1165 	struct dma_fence *fence;
   1166 	uint32_t i, shared_count;
   1167 	int revents;
   1168 	bool recorded = false;	/* curlwp is on the selq */
   1169 	bool claimed = false;	/* we claimed the callback */
   1170 	bool callback = false;	/* we requested a callback */
   1171 
   1172 	/*
   1173 	 * Start with the maximal set of events that could be ready.
   1174 	 * We will eliminate the events that are definitely not ready
   1175 	 * as we go at the same time as we add callbacks to notify us
   1176 	 * that they may be ready.
   1177 	 */
   1178 	revents = events & (POLLIN|POLLOUT);
   1179 	if (revents == 0)
   1180 		return 0;
   1181 
   1182 top:
   1183 	/* Enter an RCU read section and get a read ticket.  */
   1184 	rcu_read_lock();
   1185 	dma_resv_read_begin(robj, &ticket);
   1186 
   1187 	/* If we want to wait for all fences, get the shared list.  */
   1188 	if (!(events & POLLOUT))
   1189 		goto excl;
   1190 	list = robj->fence;
   1191 	__insn_barrier();
   1192 	if (list) do {
   1193 		/* Make sure the content of the list has been published.  */
   1194 		membar_datadep_consumer();
   1195 
   1196 		/* Find out how long it is.  */
   1197 		shared_count = list->shared_count;
   1198 
   1199 		/*
   1200 		 * Make sure we saw a consistent snapshot of the list
   1201 		 * pointer and length.
   1202 		 */
   1203 		if (!dma_resv_read_valid(robj, &ticket))
   1204 			goto restart;
   1205 
   1206 		/*
   1207 		 * For each fence, if it is going away, restart.
   1208 		 * Otherwise, acquire a reference to it to test whether
   1209 		 * it is signalled.  Stop and request a callback if we
   1210 		 * find any that is not signalled.
   1211 		 */
   1212 		for (i = 0; i < shared_count; i++) {
   1213 			fence = dma_fence_get_rcu(list->shared[i]);
   1214 			if (fence == NULL)
   1215 				goto restart;
   1216 			if (!dma_fence_is_signaled(fence)) {
   1217 				dma_fence_put(fence);
   1218 				break;
   1219 			}
   1220 			dma_fence_put(fence);
   1221 		}
   1222 
   1223 		/* If all shared fences have been signalled, move on.  */
   1224 		if (i == shared_count)
   1225 			break;
   1226 
   1227 		/* Put ourselves on the selq if we haven't already.  */
   1228 		if (!recorded)
   1229 			goto record;
   1230 
   1231 		/*
   1232 		 * If someone else claimed the callback, or we already
   1233 		 * requested it, we're guaranteed to be notified, so
   1234 		 * assume the event is not ready.
   1235 		 */
   1236 		if (!claimed || callback) {
   1237 			revents &= ~POLLOUT;
   1238 			break;
   1239 		}
   1240 
   1241 		/*
   1242 		 * Otherwise, find the first fence that is not
   1243 		 * signalled, request the callback, and clear POLLOUT
   1244 		 * from the possible ready events.  If they are all
   1245 		 * signalled, leave POLLOUT set; we will simulate the
   1246 		 * callback later.
   1247 		 */
   1248 		for (i = 0; i < shared_count; i++) {
   1249 			fence = dma_fence_get_rcu(list->shared[i]);
   1250 			if (fence == NULL)
   1251 				goto restart;
   1252 			if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
   1253 				dma_resv_poll_cb)) {
   1254 				dma_fence_put(fence);
   1255 				revents &= ~POLLOUT;
   1256 				callback = true;
   1257 				break;
   1258 			}
   1259 			dma_fence_put(fence);
   1260 		}
   1261 	} while (0);
   1262 
   1263 excl:
   1264 	/* We always wait for at least the exclusive fence, so get it.  */
   1265 	fence = robj->fence_excl;
   1266 	__insn_barrier();
   1267 	if (fence) do {
   1268 		/* Make sure the content of the fence has been published.  */
   1269 		membar_datadep_consumer();
   1270 
   1271 		/*
   1272 		 * Make sure we saw a consistent snapshot of the fence.
   1273 		 *
   1274 		 * XXX I'm not actually sure this is necessary since
   1275 		 * pointer writes are supposed to be atomic.
   1276 		 */
   1277 		if (!dma_resv_read_valid(robj, &ticket))
   1278 			goto restart;
   1279 
   1280 		/*
   1281 		 * If it is going away, restart.  Otherwise, acquire a
   1282 		 * reference to it to test whether it is signalled.  If
   1283 		 * not, stop and request a callback.
   1284 		 */
   1285 		if ((fence = dma_fence_get_rcu(fence)) == NULL)
   1286 			goto restart;
   1287 		if (dma_fence_is_signaled(fence)) {
   1288 			dma_fence_put(fence);
   1289 			break;
   1290 		}
   1291 
   1292 		/* Put ourselves on the selq if we haven't already.  */
   1293 		if (!recorded) {
   1294 			dma_fence_put(fence);
   1295 			goto record;
   1296 		}
   1297 
   1298 		/*
   1299 		 * If someone else claimed the callback, or we already
   1300 		 * requested it, we're guaranteed to be notified, so
   1301 		 * assume the event is not ready.
   1302 		 */
   1303 		if (!claimed || callback) {
   1304 			dma_fence_put(fence);
   1305 			revents = 0;
   1306 			break;
   1307 		}
   1308 
   1309 		/*
   1310 		 * Otherwise, try to request the callback, and clear
   1311 		 * all possible ready events.  If the fence has been
   1312 		 * signalled in the interim, leave the events set; we
   1313 		 * will simulate the callback later.
   1314 		 */
   1315 		if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
   1316 			dma_resv_poll_cb)) {
   1317 			dma_fence_put(fence);
   1318 			revents = 0;
   1319 			callback = true;
   1320 			break;
   1321 		}
   1322 		dma_fence_put(fence);
   1323 	} while (0);
   1324 
   1325 	/* All done reading the fences.  */
   1326 	rcu_read_unlock();
   1327 
   1328 	if (claimed && !callback) {
   1329 		/*
   1330 		 * We claimed the callback but we didn't actually
   1331 		 * request it because a fence was signalled while we
   1332 		 * were claiming it.  Call it ourselves now.  The
   1333 		 * callback doesn't use the fence nor rely on holding
   1334 		 * any of the fence locks, so this is safe.
   1335 		 */
   1336 		dma_resv_poll_cb(NULL, &rpoll->rp_fcb);
   1337 	}
   1338 	return revents;
   1339 
   1340 restart:
   1341 	rcu_read_unlock();
   1342 	goto top;
   1343 
   1344 record:
   1345 	rcu_read_unlock();
   1346 	mutex_enter(&rpoll->rp_lock);
   1347 	selrecord(curlwp, &rpoll->rp_selq);
   1348 	if (!rpoll->rp_claimed)
   1349 		claimed = rpoll->rp_claimed = true;
   1350 	mutex_exit(&rpoll->rp_lock);
   1351 	recorded = true;
   1352 	goto top;
   1353 }
   1354 
   1355 /*
   1356  * dma_resv_kqfilter(robj, kn, rpoll)
   1357  *
   1358  *	Kqueue filter for reservation objects.  Currently not
   1359  *	implemented because the logic to implement it is nontrivial,
   1360  *	and userland will presumably never use it, so it would be
   1361  *	dangerous to add never-tested complex code paths to the kernel.
   1362  */
   1363 int
   1364 dma_resv_kqfilter(const struct dma_resv *robj,
   1365     struct knote *kn, struct dma_resv_poll *rpoll)
   1366 {
   1367 
   1368 	return EINVAL;
   1369 }
   1370