linux_dma_resv.c revision 1.19 1 /* $NetBSD: linux_dma_resv.c,v 1.19 2021/12/19 12:33:34 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_dma_resv.c,v 1.19 2021/12/19 12:33:34 riastradh Exp $");
34
35 #include <sys/param.h>
36 #include <sys/poll.h>
37 #include <sys/select.h>
38
39 #include <linux/dma-fence.h>
40 #include <linux/dma-resv.h>
41 #include <linux/seqlock.h>
42 #include <linux/ww_mutex.h>
43
44 DEFINE_WW_CLASS(reservation_ww_class __cacheline_aligned);
45
46 static struct dma_resv_list *
47 objlist_tryalloc(uint32_t n)
48 {
49 struct dma_resv_list *list;
50
51 list = kmem_alloc(offsetof(typeof(*list), shared[n]), KM_NOSLEEP);
52 if (list == NULL)
53 return NULL;
54 list->shared_max = n;
55
56 return list;
57 }
58
59 static void
60 objlist_free(struct dma_resv_list *list)
61 {
62 uint32_t n = list->shared_max;
63
64 kmem_free(list, offsetof(typeof(*list), shared[n]));
65 }
66
67 static void
68 objlist_free_cb(struct rcu_head *rcu)
69 {
70 struct dma_resv_list *list = container_of(rcu,
71 struct dma_resv_list, rol_rcu);
72
73 objlist_free(list);
74 }
75
76 static void
77 objlist_defer_free(struct dma_resv_list *list)
78 {
79
80 call_rcu(&list->rol_rcu, objlist_free_cb);
81 }
82
83 /*
84 * dma_resv_init(robj)
85 *
86 * Initialize a reservation object. Caller must later destroy it
87 * with dma_resv_fini.
88 */
89 void
90 dma_resv_init(struct dma_resv *robj)
91 {
92
93 ww_mutex_init(&robj->lock, &reservation_ww_class);
94 seqcount_init(&robj->seq);
95 robj->fence_excl = NULL;
96 robj->fence = NULL;
97 robj->robj_prealloc = NULL;
98 }
99
100 /*
101 * dma_resv_fini(robj)
102 *
103 * Destroy a reservation object, freeing any memory that had been
104 * allocated for it. Caller must have exclusive access to it.
105 */
106 void
107 dma_resv_fini(struct dma_resv *robj)
108 {
109 unsigned i;
110
111 if (robj->robj_prealloc) {
112 objlist_free(robj->robj_prealloc);
113 robj->robj_prealloc = NULL; /* paranoia */
114 }
115 if (robj->fence) {
116 for (i = 0; i < robj->fence->shared_count; i++) {
117 dma_fence_put(robj->fence->shared[i]);
118 robj->fence->shared[i] = NULL; /* paranoia */
119 }
120 objlist_free(robj->fence);
121 robj->fence = NULL; /* paranoia */
122 }
123 if (robj->fence_excl) {
124 dma_fence_put(robj->fence_excl);
125 robj->fence_excl = NULL; /* paranoia */
126 }
127 ww_mutex_destroy(&robj->lock);
128 }
129
130 /*
131 * dma_resv_lock(robj, ctx)
132 *
133 * Acquire a reservation object's lock. Return 0 on success,
134 * -EALREADY if caller already holds it, -EDEADLK if a
135 * higher-priority owner holds it and the caller must back out and
136 * retry.
137 */
138 int
139 dma_resv_lock(struct dma_resv *robj,
140 struct ww_acquire_ctx *ctx)
141 {
142
143 return ww_mutex_lock(&robj->lock, ctx);
144 }
145
146 /*
147 * dma_resv_lock_slow(robj, ctx)
148 *
149 * Acquire a reservation object's lock. Caller must not hold
150 * this lock or any others -- this is to be used in slow paths
151 * after dma_resv_lock or dma_resv_lock_interruptible has failed
152 * and the caller has backed out all other locks.
153 */
154 void
155 dma_resv_lock_slow(struct dma_resv *robj,
156 struct ww_acquire_ctx *ctx)
157 {
158
159 ww_mutex_lock_slow(&robj->lock, ctx);
160 }
161
162 /*
163 * dma_resv_lock_interruptible(robj, ctx)
164 *
165 * Acquire a reservation object's lock. Return 0 on success,
166 * -EALREADY if caller already holds it, -EDEADLK if a
167 * higher-priority owner holds it and the caller must back out and
168 * retry, -ERESTART/-EINTR if interrupted.
169 */
170 int
171 dma_resv_lock_interruptible(struct dma_resv *robj,
172 struct ww_acquire_ctx *ctx)
173 {
174
175 return ww_mutex_lock_interruptible(&robj->lock, ctx);
176 }
177
178 /*
179 * dma_resv_lock_slow_interruptible(robj, ctx)
180 *
181 * Acquire a reservation object's lock. Caller must not hold
182 * this lock or any others -- this is to be used in slow paths
183 * after dma_resv_lock or dma_resv_lock_interruptible has failed
184 * and the caller has backed out all other locks. Return 0 on
185 * success, -ERESTART/-EINTR if interrupted.
186 */
187 int
188 dma_resv_lock_slow_interruptible(struct dma_resv *robj,
189 struct ww_acquire_ctx *ctx)
190 {
191
192 return ww_mutex_lock_slow_interruptible(&robj->lock, ctx);
193 }
194
195 /*
196 * dma_resv_trylock(robj)
197 *
198 * Try to acquire a reservation object's lock without blocking.
199 * Return true on success, false on failure.
200 */
201 bool
202 dma_resv_trylock(struct dma_resv *robj)
203 {
204
205 return ww_mutex_trylock(&robj->lock);
206 }
207
208 /*
209 * dma_resv_locking_ctx(robj)
210 *
211 * Return a pointer to the ww_acquire_ctx used by the owner of
212 * the reservation object's lock, or NULL if it is either not
213 * owned or if it is locked without context.
214 */
215 struct ww_acquire_ctx *
216 dma_resv_locking_ctx(struct dma_resv *robj)
217 {
218
219 return ww_mutex_locking_ctx(&robj->lock);
220 }
221
222 /*
223 * dma_resv_unlock(robj)
224 *
225 * Release a reservation object's lock.
226 */
227 void
228 dma_resv_unlock(struct dma_resv *robj)
229 {
230
231 return ww_mutex_unlock(&robj->lock);
232 }
233
234 /*
235 * dma_resv_is_locked(robj)
236 *
237 * True if robj is locked.
238 */
239 bool
240 dma_resv_is_locked(struct dma_resv *robj)
241 {
242
243 return ww_mutex_is_locked(&robj->lock);
244 }
245
246 /*
247 * dma_resv_held(robj)
248 *
249 * True if robj is locked.
250 */
251 bool
252 dma_resv_held(struct dma_resv *robj)
253 {
254
255 return ww_mutex_is_locked(&robj->lock);
256 }
257
258 /*
259 * dma_resv_assert_held(robj)
260 *
261 * Panic if robj is not held, in DIAGNOSTIC builds.
262 */
263 void
264 dma_resv_assert_held(struct dma_resv *robj)
265 {
266
267 KASSERT(dma_resv_held(robj));
268 }
269
270 /*
271 * dma_resv_get_excl(robj)
272 *
273 * Return a pointer to the exclusive fence of the reservation
274 * object robj.
275 *
276 * Caller must have robj locked.
277 */
278 struct dma_fence *
279 dma_resv_get_excl(struct dma_resv *robj)
280 {
281
282 KASSERT(dma_resv_held(robj));
283 return robj->fence_excl;
284 }
285
286 /*
287 * dma_resv_get_list(robj)
288 *
289 * Return a pointer to the shared fence list of the reservation
290 * object robj.
291 *
292 * Caller must have robj locked.
293 */
294 struct dma_resv_list *
295 dma_resv_get_list(struct dma_resv *robj)
296 {
297
298 KASSERT(dma_resv_held(robj));
299 return robj->fence;
300 }
301
302 /*
303 * dma_resv_reserve_shared(robj, num_fences)
304 *
305 * Reserve space in robj to add num_fences shared fences. To be
306 * used only once before calling dma_resv_add_shared_fence.
307 *
308 * Caller must have robj locked.
309 *
310 * Internally, we start with room for four entries and double if
311 * we don't have enough. This is not guaranteed.
312 */
313 int
314 dma_resv_reserve_shared(struct dma_resv *robj, unsigned int num_fences)
315 {
316 struct dma_resv_list *list, *prealloc;
317 uint32_t n, nalloc;
318
319 KASSERT(dma_resv_held(robj));
320
321 list = robj->fence;
322 prealloc = robj->robj_prealloc;
323
324 /* If there's an existing list, check it for space. */
325 if (list) {
326 /* If there's too many already, give up. */
327 if (list->shared_count > UINT32_MAX - num_fences)
328 return -ENOMEM;
329
330 /* Add some more. */
331 n = list->shared_count + num_fences;
332
333 /* If there's enough for one more, we're done. */
334 if (n <= list->shared_max)
335 return 0;
336 } else {
337 /* No list already. We need space for num_fences. */
338 n = num_fences;
339 }
340
341 /* If not, maybe there's a preallocated list ready. */
342 if (prealloc != NULL) {
343 /* If there's enough room in it, stop here. */
344 if (n <= prealloc->shared_max)
345 return 0;
346
347 /* Try to double its capacity. */
348 nalloc = n > UINT32_MAX/2 ? UINT32_MAX : 2*n;
349 prealloc = objlist_tryalloc(nalloc);
350 if (prealloc == NULL)
351 return -ENOMEM;
352
353 /* Swap the new preallocated list and free the old one. */
354 objlist_free(robj->robj_prealloc);
355 robj->robj_prealloc = prealloc;
356 } else {
357 /* Start with some spare. */
358 nalloc = n > UINT32_MAX/2 ? UINT32_MAX : MAX(2*n, 4);
359 prealloc = objlist_tryalloc(nalloc);
360 if (prealloc == NULL)
361 return -ENOMEM;
362 /* Save the new preallocated list. */
363 robj->robj_prealloc = prealloc;
364 }
365
366 /* Success! */
367 return 0;
368 }
369
370 struct dma_resv_write_ticket {
371 };
372
373 /*
374 * dma_resv_write_begin(robj, ticket)
375 *
376 * Begin an atomic batch of writes to robj, and initialize opaque
377 * ticket for it. The ticket must be passed to
378 * dma_resv_write_commit to commit the writes.
379 *
380 * Caller must have robj locked.
381 *
382 * Implies membar_producer, i.e. store-before-store barrier. Does
383 * NOT serve as an acquire operation, however.
384 */
385 static void
386 dma_resv_write_begin(struct dma_resv *robj,
387 struct dma_resv_write_ticket *ticket)
388 {
389
390 KASSERT(dma_resv_held(robj));
391
392 write_seqcount_begin(&robj->seq);
393 }
394
395 /*
396 * dma_resv_write_commit(robj, ticket)
397 *
398 * Commit an atomic batch of writes to robj begun with the call to
399 * dma_resv_write_begin that returned ticket.
400 *
401 * Caller must have robj locked.
402 *
403 * Implies membar_producer, i.e. store-before-store barrier. Does
404 * NOT serve as a release operation, however.
405 */
406 static void
407 dma_resv_write_commit(struct dma_resv *robj,
408 struct dma_resv_write_ticket *ticket)
409 {
410
411 KASSERT(dma_resv_held(robj));
412
413 write_seqcount_end(&robj->seq);
414 }
415
416 struct dma_resv_read_ticket {
417 unsigned version;
418 };
419
420 /*
421 * dma_resv_read_begin(robj, ticket)
422 *
423 * Begin a read section, and initialize opaque ticket for it. The
424 * ticket must be passed to dma_resv_read_exit, and the
425 * caller must be prepared to retry reading if it fails.
426 */
427 static void
428 dma_resv_read_begin(const struct dma_resv *robj,
429 struct dma_resv_read_ticket *ticket)
430 {
431
432 ticket->version = read_seqcount_begin(&robj->seq);
433 }
434
435 /*
436 * dma_resv_read_valid(robj, ticket)
437 *
438 * Test whether the read sections are valid. Return true on
439 * success, or false on failure if the read ticket has been
440 * invalidated.
441 */
442 static bool
443 dma_resv_read_valid(const struct dma_resv *robj,
444 struct dma_resv_read_ticket *ticket)
445 {
446
447 return !read_seqcount_retry(&robj->seq, ticket->version);
448 }
449
450 /*
451 * dma_resv_get_shared_reader(robj, listp, shared_countp, ticket)
452 *
453 * Set *listp and *shared_countp to a snapshot of the pointer to
454 * and length of the shared fence list of robj and return true, or
455 * set them to NULL/0 and return false if a writer intervened so
456 * the caller must start over.
457 *
458 * Both *listp and *shared_countp are unconditionally initialized
459 * on return. They may be NULL/0 even on success, if there is no
460 * shared list at the moment. Does not take any fence references.
461 */
462 static bool
463 dma_resv_get_shared_reader(const struct dma_resv *robj,
464 const struct dma_resv_list **listp, unsigned *shared_countp,
465 struct dma_resv_read_ticket *ticket)
466 {
467 struct dma_resv_list *list;
468 unsigned shared_count = 0;
469
470 /*
471 * Get the list and, if it is present, its length. If the list
472 * is present, it has a valid length. The atomic_load_consume
473 * pairs with the membar_producer in dma_resv_write_begin.
474 */
475 list = atomic_load_consume(&robj->fence);
476 shared_count = list ? atomic_load_relaxed(&list->shared_count) : 0;
477
478 /*
479 * We are done reading from robj and list. Validate our
480 * parking ticket. If it's invalid, do not pass go and do not
481 * collect $200.
482 */
483 if (!dma_resv_read_valid(robj, ticket))
484 goto fail;
485
486 /* Success! */
487 *listp = list;
488 *shared_countp = shared_count;
489 return true;
490
491 fail: *listp = NULL;
492 *shared_countp = 0;
493 return false;
494 }
495
496 /*
497 * dma_resv_get_excl_reader(robj, fencep, ticket)
498 *
499 * Set *fencep to the exclusive fence of robj and return true, or
500 * set it to NULL and return false if either
501 * (a) a writer intervened, or
502 * (b) the fence is scheduled to be destroyed after this RCU grace
503 * period,
504 * in either case meaning the caller must restart.
505 *
506 * The value of *fencep is unconditionally initialized on return.
507 * It may be NULL, if there is no exclusive fence at the moment.
508 * If nonnull, *fencep is referenced; caller must dma_fence_put.
509 */
510 static bool
511 dma_resv_get_excl_reader(const struct dma_resv *robj,
512 struct dma_fence **fencep,
513 struct dma_resv_read_ticket *ticket)
514 {
515 struct dma_fence *fence;
516
517 /*
518 * Get the candidate fence pointer. The atomic_load_consume
519 * pairs with the membar_consumer in dma_resv_write_begin.
520 */
521 fence = atomic_load_consume(&robj->fence_excl);
522
523 /*
524 * The load of robj->fence_excl is atomic, but the caller may
525 * have previously loaded the shared fence list and should
526 * restart if its view of the entire dma_resv object is not a
527 * consistent snapshot.
528 */
529 if (!dma_resv_read_valid(robj, ticket))
530 goto fail;
531
532 /*
533 * If the fence is already scheduled to away after this RCU
534 * read section, give up. Otherwise, take a reference so it
535 * won't go away until after dma_fence_put.
536 */
537 if (fence != NULL &&
538 (fence = dma_fence_get_rcu(fence)) == NULL)
539 goto fail;
540
541 /* Success! */
542 *fencep = fence;
543 return true;
544
545 fail: *fencep = NULL;
546 return false;
547 }
548
549 /*
550 * dma_resv_add_excl_fence(robj, fence)
551 *
552 * Empty and release all of robj's shared fences, and clear and
553 * release its exclusive fence. If fence is nonnull, acquire a
554 * reference to it and save it as robj's exclusive fence.
555 *
556 * Caller must have robj locked.
557 */
558 void
559 dma_resv_add_excl_fence(struct dma_resv *robj,
560 struct dma_fence *fence)
561 {
562 struct dma_fence *old_fence = robj->fence_excl;
563 struct dma_resv_list *old_list = robj->fence;
564 uint32_t old_shared_count;
565 struct dma_resv_write_ticket ticket;
566
567 KASSERT(dma_resv_held(robj));
568
569 /*
570 * If we are setting rather than just removing a fence, acquire
571 * a reference for ourselves.
572 */
573 if (fence)
574 (void)dma_fence_get(fence);
575
576 /* If there are any shared fences, remember how many. */
577 if (old_list)
578 old_shared_count = old_list->shared_count;
579
580 /* Begin an update. Implies membar_producer for fence. */
581 dma_resv_write_begin(robj, &ticket);
582
583 /* Replace the fence and zero the shared count. */
584 atomic_store_relaxed(&robj->fence_excl, fence);
585 if (old_list)
586 old_list->shared_count = 0;
587
588 /* Commit the update. */
589 dma_resv_write_commit(robj, &ticket);
590
591 /* Release the old exclusive fence, if any. */
592 if (old_fence) {
593 dma_fence_put(old_fence);
594 old_fence = NULL; /* paranoia */
595 }
596
597 /* Release any old shared fences. */
598 if (old_list) {
599 while (old_shared_count--) {
600 dma_fence_put(old_list->shared[old_shared_count]);
601 /* paranoia */
602 old_list->shared[old_shared_count] = NULL;
603 }
604 }
605 }
606
607 /*
608 * dma_resv_add_shared_fence(robj, fence)
609 *
610 * Acquire a reference to fence and add it to robj's shared list.
611 * If any fence was already added with the same context number,
612 * release it and replace it by this one.
613 *
614 * Caller must have robj locked, and must have preceded with a
615 * call to dma_resv_reserve_shared for each shared fence
616 * added.
617 */
618 void
619 dma_resv_add_shared_fence(struct dma_resv *robj,
620 struct dma_fence *fence)
621 {
622 struct dma_resv_list *list = robj->fence;
623 struct dma_resv_list *prealloc = robj->robj_prealloc;
624 struct dma_resv_write_ticket ticket;
625 struct dma_fence *replace = NULL;
626 uint32_t i;
627
628 KASSERT(dma_resv_held(robj));
629
630 /* Acquire a reference to the fence. */
631 KASSERT(fence != NULL);
632 (void)dma_fence_get(fence);
633
634 /* Check for a preallocated replacement list. */
635 if (prealloc == NULL) {
636 /*
637 * If there is no preallocated replacement list, then
638 * there must be room in the current list.
639 */
640 KASSERT(list != NULL);
641 KASSERT(list->shared_count < list->shared_max);
642
643 /* Begin an update. Implies membar_producer for fence. */
644 dma_resv_write_begin(robj, &ticket);
645
646 /* Find a fence with the same context number. */
647 for (i = 0; i < list->shared_count; i++) {
648 if (list->shared[i]->context == fence->context) {
649 replace = list->shared[i];
650 atomic_store_relaxed(&list->shared[i], fence);
651 break;
652 }
653 }
654
655 /* If we didn't find one, add it at the end. */
656 if (i == list->shared_count) {
657 atomic_store_relaxed(&list->shared[list->shared_count],
658 fence);
659 atomic_store_relaxed(&list->shared_count,
660 list->shared_count + 1);
661 }
662
663 /* Commit the update. */
664 dma_resv_write_commit(robj, &ticket);
665 } else {
666 /*
667 * There is a preallocated replacement list. There may
668 * not be a current list. If not, treat it as a zero-
669 * length list.
670 */
671 uint32_t shared_count = (list == NULL? 0 : list->shared_count);
672
673 /* There had better be room in the preallocated list. */
674 KASSERT(shared_count < prealloc->shared_max);
675
676 /*
677 * Copy the fences over, but replace if we find one
678 * with the same context number.
679 */
680 for (i = 0; i < shared_count; i++) {
681 if (replace == NULL &&
682 list->shared[i]->context == fence->context) {
683 replace = list->shared[i];
684 prealloc->shared[i] = fence;
685 } else {
686 prealloc->shared[i] = list->shared[i];
687 }
688 }
689 prealloc->shared_count = shared_count;
690
691 /* If we didn't find one, add it at the end. */
692 if (replace == NULL)
693 prealloc->shared[prealloc->shared_count++] = fence;
694
695 /*
696 * Now ready to replace the list. Begin an update.
697 * Implies membar_producer for fence and prealloc.
698 */
699 dma_resv_write_begin(robj, &ticket);
700
701 /* Replace the list. */
702 atomic_store_relaxed(&robj->fence, prealloc);
703 robj->robj_prealloc = NULL;
704
705 /* Commit the update. */
706 dma_resv_write_commit(robj, &ticket);
707
708 /*
709 * If there is an old list, free it when convenient.
710 * (We are not in a position at this point to sleep
711 * waiting for activity on all CPUs.)
712 */
713 if (list)
714 objlist_defer_free(list);
715 }
716
717 /* Release a fence if we replaced it. */
718 if (replace) {
719 dma_fence_put(replace);
720 replace = NULL; /* paranoia */
721 }
722 }
723
724 /*
725 * dma_resv_get_excl_rcu(robj)
726 *
727 * Note: Caller need not call this from an RCU read section.
728 */
729 struct dma_fence *
730 dma_resv_get_excl_rcu(const struct dma_resv *robj)
731 {
732 struct dma_fence *fence;
733
734 rcu_read_lock();
735 fence = dma_fence_get_rcu_safe(&robj->fence_excl);
736 rcu_read_unlock();
737
738 return fence;
739 }
740
741 /*
742 * dma_resv_get_fences_rcu(robj, fencep, nsharedp, sharedp)
743 *
744 * Get a snapshot of the exclusive and shared fences of robj. The
745 * shared fences are returned as a pointer *sharedp to an array,
746 * to be freed by the caller with kfree, of *nsharedp elements.
747 * If fencep is null, then add the exclusive fence, if any, at the
748 * end of the array instead.
749 *
750 * Returns zero on success, negative (Linux-style) error code on
751 * failure. On failure, *fencep, *nsharedp, and *sharedp are
752 * untouched.
753 */
754 int
755 dma_resv_get_fences_rcu(const struct dma_resv *robj,
756 struct dma_fence **fencep, unsigned *nsharedp, struct dma_fence ***sharedp)
757 {
758 const struct dma_resv_list *list = NULL;
759 struct dma_fence *fence = NULL;
760 struct dma_fence **shared = NULL;
761 unsigned shared_alloc, shared_count, i;
762 struct dma_resv_read_ticket ticket;
763
764 top: KASSERT(fence == NULL);
765
766 /* Enter an RCU read section and get a read ticket. */
767 rcu_read_lock();
768 dma_resv_read_begin(robj, &ticket);
769
770 /* If there is a shared list, grab it. */
771 if (!dma_resv_get_shared_reader(robj, &list, &shared_count, &ticket))
772 goto restart;
773 if (list != NULL) {
774
775 /*
776 * Avoid arithmetic overflow with `+ 1' below.
777 * Strictly speaking we don't need this if the caller
778 * specified fencep or if there is no exclusive fence,
779 * but it is simpler to not have to consider those
780 * cases.
781 */
782 KASSERT(shared_count <= list->shared_max);
783 if (list->shared_max == UINT_MAX)
784 return -ENOMEM;
785
786 /* Check whether we have a buffer. */
787 if (shared == NULL) {
788 /*
789 * We don't have a buffer yet. Try to allocate
790 * one without waiting.
791 */
792 shared_alloc = list->shared_max + 1;
793 shared = kcalloc(shared_alloc, sizeof(shared[0]),
794 GFP_NOWAIT);
795 if (shared == NULL) {
796 /*
797 * Couldn't do it immediately. Back
798 * out of RCU and allocate one with
799 * waiting.
800 */
801 rcu_read_unlock();
802 shared = kcalloc(shared_alloc,
803 sizeof(shared[0]), GFP_KERNEL);
804 if (shared == NULL)
805 return -ENOMEM;
806 goto top;
807 }
808 } else if (shared_alloc < list->shared_max + 1) {
809 /*
810 * We have a buffer but it's too small. We're
811 * already racing in this case, so just back
812 * out and wait to allocate a bigger one.
813 */
814 shared_alloc = list->shared_max + 1;
815 rcu_read_unlock();
816 kfree(shared);
817 shared = kcalloc(shared_alloc, sizeof(shared[0]),
818 GFP_KERNEL);
819 if (shared == NULL)
820 return -ENOMEM;
821 goto top;
822 }
823
824 /*
825 * We got a buffer large enough. Copy into the buffer
826 * and record the number of elements. Could safely use
827 * memcpy here, because even if we race with a writer
828 * it'll invalidate the read ticket and we'll start
829 * over, but atomic_load in a loop will pacify kcsan.
830 */
831 for (i = 0; i < shared_count; i++)
832 shared[i] = atomic_load_relaxed(&list->shared[i]);
833
834 /* If anything changed while we were copying, restart. */
835 if (!dma_resv_read_valid(robj, &ticket))
836 goto restart;
837 }
838
839 /* If there is an exclusive fence, grab it. */
840 KASSERT(fence == NULL);
841 if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
842 goto restart;
843
844 /*
845 * Try to get a reference to all of the shared fences.
846 */
847 for (i = 0; i < shared_count; i++) {
848 if (dma_fence_get_rcu(atomic_load_relaxed(&shared[i])) == NULL)
849 goto put_restart;
850 }
851
852 /* Success! */
853 rcu_read_unlock();
854 if (fencep) {
855 *fencep = fence;
856 } else if (fence) {
857 KASSERT(shared_count < UINT_MAX);
858 shared[shared_count++] = fence;
859 }
860 *nsharedp = shared_count;
861 *sharedp = shared;
862 return 0;
863
864 put_restart:
865 /* Back out. */
866 while (i --> 0) {
867 dma_fence_put(shared[i]);
868 shared[i] = NULL; /* paranoia */
869 }
870 if (fence) {
871 dma_fence_put(fence);
872 fence = NULL;
873 }
874
875 restart:
876 KASSERT(fence == NULL);
877 rcu_read_unlock();
878 goto top;
879 }
880
881 /*
882 * dma_resv_copy_fences(dst, src)
883 *
884 * Copy the exclusive fence and all the shared fences from src to
885 * dst.
886 *
887 * Caller must have dst locked.
888 */
889 int
890 dma_resv_copy_fences(struct dma_resv *dst_robj,
891 const struct dma_resv *src_robj)
892 {
893 const struct dma_resv_list *src_list;
894 struct dma_resv_list *dst_list = NULL;
895 struct dma_resv_list *old_list;
896 struct dma_fence *fence = NULL;
897 struct dma_fence *old_fence;
898 uint32_t shared_count, i;
899 struct dma_resv_read_ticket read_ticket;
900 struct dma_resv_write_ticket write_ticket;
901
902 KASSERT(dma_resv_held(dst_robj));
903
904 top: KASSERT(fence == NULL);
905
906 /* Enter an RCU read section and get a read ticket. */
907 rcu_read_lock();
908 dma_resv_read_begin(src_robj, &read_ticket);
909
910 /* Get the shared list. */
911 if (!dma_resv_get_shared_reader(src_robj, &src_list, &shared_count,
912 &read_ticket))
913 goto restart;
914 if (src_list != NULL) {
915 /* Allocate a new list. */
916 dst_list = objlist_tryalloc(shared_count);
917 if (dst_list == NULL)
918 return -ENOMEM;
919
920 /* Copy over all fences that are not yet signalled. */
921 dst_list->shared_count = 0;
922 for (i = 0; i < shared_count; i++) {
923 KASSERT(fence == NULL);
924 fence = atomic_load_relaxed(&src_list->shared[i]);
925 if ((fence = dma_fence_get_rcu(fence)) == NULL)
926 goto restart;
927 if (dma_fence_is_signaled(fence)) {
928 dma_fence_put(fence);
929 fence = NULL;
930 continue;
931 }
932 dst_list->shared[dst_list->shared_count++] = fence;
933 fence = NULL;
934 }
935
936 /* If anything changed while we were copying, restart. */
937 if (!dma_resv_read_valid(src_robj, &read_ticket))
938 goto restart;
939 }
940
941 /* Get the exclusive fence. */
942 KASSERT(fence == NULL);
943 if (!dma_resv_get_excl_reader(src_robj, &fence, &read_ticket))
944 goto restart;
945
946 /* All done with src; exit the RCU read section. */
947 rcu_read_unlock();
948
949 /*
950 * We now have a snapshot of the shared and exclusive fences of
951 * src_robj and we have acquired references to them so they
952 * won't go away. Transfer them over to dst_robj, releasing
953 * references to any that were there.
954 */
955
956 /* Get the old shared and exclusive fences, if any. */
957 old_list = dst_robj->fence;
958 old_fence = dst_robj->fence_excl;
959
960 /*
961 * Begin an update. Implies membar_producer for dst_list and
962 * fence.
963 */
964 dma_resv_write_begin(dst_robj, &write_ticket);
965
966 /* Replace the fences. */
967 atomic_store_relaxed(&dst_robj->fence, dst_list);
968 atomic_store_relaxed(&dst_robj->fence_excl, fence);
969
970 /* Commit the update. */
971 dma_resv_write_commit(dst_robj, &write_ticket);
972
973 /* Release the old exclusive fence, if any. */
974 if (old_fence) {
975 dma_fence_put(old_fence);
976 old_fence = NULL; /* paranoia */
977 }
978
979 /* Release any old shared fences. */
980 if (old_list) {
981 for (i = old_list->shared_count; i --> 0;) {
982 dma_fence_put(old_list->shared[i]);
983 old_list->shared[i] = NULL; /* paranoia */
984 }
985 objlist_free(old_list);
986 old_list = NULL; /* paranoia */
987 }
988
989 /* Success! */
990 return 0;
991
992 restart:
993 KASSERT(fence == NULL);
994 rcu_read_unlock();
995 if (dst_list) {
996 for (i = dst_list->shared_count; i --> 0;) {
997 dma_fence_put(dst_list->shared[i]);
998 dst_list->shared[i] = NULL; /* paranoia */
999 }
1000 objlist_free(dst_list);
1001 dst_list = NULL;
1002 }
1003 goto top;
1004 }
1005
1006 /*
1007 * dma_resv_test_signaled_rcu(robj, shared)
1008 *
1009 * If shared is true, test whether all of the shared fences are
1010 * signalled, or if there are none, test whether the exclusive
1011 * fence is signalled. If shared is false, test only whether the
1012 * exclusive fence is signalled.
1013 *
1014 * XXX Why does this _not_ test the exclusive fence if shared is
1015 * true only if there are no shared fences? This makes no sense.
1016 */
1017 bool
1018 dma_resv_test_signaled_rcu(const struct dma_resv *robj,
1019 bool shared)
1020 {
1021 struct dma_resv_read_ticket ticket;
1022 const struct dma_resv_list *list;
1023 struct dma_fence *fence = NULL;
1024 uint32_t i, shared_count;
1025 bool signaled = true;
1026
1027 top: KASSERT(fence == NULL);
1028
1029 /* Enter an RCU read section and get a read ticket. */
1030 rcu_read_lock();
1031 dma_resv_read_begin(robj, &ticket);
1032
1033 /* If shared is requested and there is a shared list, test it. */
1034 if (shared) {
1035 if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
1036 &ticket))
1037 goto restart;
1038 } else {
1039 list = NULL;
1040 shared_count = 0;
1041 }
1042 if (list != NULL) {
1043 /*
1044 * For each fence, if it is going away, restart.
1045 * Otherwise, acquire a reference to it to test whether
1046 * it is signalled. Stop if we find any that is not
1047 * signalled.
1048 */
1049 for (i = 0; i < shared_count; i++) {
1050 KASSERT(fence == NULL);
1051 fence = atomic_load_relaxed(&list->shared[i]);
1052 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1053 goto restart;
1054 signaled &= dma_fence_is_signaled(fence);
1055 dma_fence_put(fence);
1056 fence = NULL;
1057 if (!signaled)
1058 goto out;
1059 }
1060
1061 /* If anything changed while we were testing, restart. */
1062 if (!dma_resv_read_valid(robj, &ticket))
1063 goto restart;
1064 }
1065 if (shared_count)
1066 goto out;
1067
1068 /* If there is an exclusive fence, test it. */
1069 KASSERT(fence == NULL);
1070 if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
1071 goto restart;
1072 if (fence != NULL) {
1073 /* Test whether it is signalled. If no, stop. */
1074 signaled &= dma_fence_is_signaled(fence);
1075 dma_fence_put(fence);
1076 fence = NULL;
1077 if (!signaled)
1078 goto out;
1079 }
1080
1081 out: KASSERT(fence == NULL);
1082 rcu_read_unlock();
1083 return signaled;
1084
1085 restart:
1086 KASSERT(fence == NULL);
1087 rcu_read_unlock();
1088 goto top;
1089 }
1090
1091 /*
1092 * dma_resv_wait_timeout_rcu(robj, shared, intr, timeout)
1093 *
1094 * If shared is true, wait for all of the shared fences to be
1095 * signalled, or if there are none, wait for the exclusive fence
1096 * to be signalled. If shared is false, wait only for the
1097 * exclusive fence to be signalled. If timeout is zero, don't
1098 * wait, only test.
1099 *
1100 * XXX Why does this _not_ wait for the exclusive fence if shared
1101 * is true only if there are no shared fences? This makes no
1102 * sense.
1103 */
1104 long
1105 dma_resv_wait_timeout_rcu(const struct dma_resv *robj,
1106 bool shared, bool intr, unsigned long timeout)
1107 {
1108 struct dma_resv_read_ticket ticket;
1109 const struct dma_resv_list *list;
1110 struct dma_fence *fence = NULL;
1111 uint32_t i, shared_count;
1112 long ret;
1113
1114 if (timeout == 0)
1115 return dma_resv_test_signaled_rcu(robj, shared);
1116
1117 top: KASSERT(fence == NULL);
1118
1119 /* Enter an RCU read section and get a read ticket. */
1120 rcu_read_lock();
1121 dma_resv_read_begin(robj, &ticket);
1122
1123 /* If shared is requested and there is a shared list, wait on it. */
1124 if (shared) {
1125 if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
1126 &ticket))
1127 goto restart;
1128 } else {
1129 list = NULL;
1130 shared_count = 0;
1131 }
1132 if (list != NULL) {
1133 /*
1134 * For each fence, if it is going away, restart.
1135 * Otherwise, acquire a reference to it to test whether
1136 * it is signalled. Stop and wait if we find any that
1137 * is not signalled.
1138 */
1139 for (i = 0; i < shared_count; i++) {
1140 KASSERT(fence == NULL);
1141 fence = atomic_load_relaxed(&list->shared[i]);
1142 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1143 goto restart;
1144 if (!dma_fence_is_signaled(fence))
1145 goto wait;
1146 dma_fence_put(fence);
1147 fence = NULL;
1148 }
1149
1150 /* If anything changed while we were testing, restart. */
1151 if (!dma_resv_read_valid(robj, &ticket))
1152 goto restart;
1153 }
1154 if (shared_count)
1155 goto out;
1156
1157 /* If there is an exclusive fence, test it. */
1158 KASSERT(fence == NULL);
1159 if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
1160 goto restart;
1161 if (fence != NULL) {
1162 /* Test whether it is signalled. If no, wait. */
1163 if (!dma_fence_is_signaled(fence))
1164 goto wait;
1165 dma_fence_put(fence);
1166 fence = NULL;
1167 }
1168
1169 out: /* Success! Return the number of ticks left. */
1170 rcu_read_unlock();
1171 KASSERT(fence == NULL);
1172 return timeout;
1173
1174 restart:
1175 KASSERT(fence == NULL);
1176 rcu_read_unlock();
1177 goto top;
1178
1179 wait:
1180 /*
1181 * Exit the RCU read section, wait for it, and release the
1182 * fence when we're done. If we time out or fail, bail.
1183 * Otherwise, go back to the top.
1184 */
1185 KASSERT(fence != NULL);
1186 rcu_read_unlock();
1187 ret = dma_fence_wait_timeout(fence, intr, timeout);
1188 dma_fence_put(fence);
1189 fence = NULL;
1190 if (ret <= 0)
1191 return ret;
1192 KASSERT(ret <= timeout);
1193 timeout = ret;
1194 goto top;
1195 }
1196
1197 /*
1198 * dma_resv_poll_init(rpoll, lock)
1199 *
1200 * Initialize reservation poll state.
1201 */
1202 void
1203 dma_resv_poll_init(struct dma_resv_poll *rpoll)
1204 {
1205
1206 mutex_init(&rpoll->rp_lock, MUTEX_DEFAULT, IPL_VM);
1207 selinit(&rpoll->rp_selq);
1208 rpoll->rp_claimed = 0;
1209 }
1210
1211 /*
1212 * dma_resv_poll_fini(rpoll)
1213 *
1214 * Release any resource associated with reservation poll state.
1215 */
1216 void
1217 dma_resv_poll_fini(struct dma_resv_poll *rpoll)
1218 {
1219
1220 KASSERT(rpoll->rp_claimed == 0);
1221 seldestroy(&rpoll->rp_selq);
1222 mutex_destroy(&rpoll->rp_lock);
1223 }
1224
1225 /*
1226 * dma_resv_poll_cb(fence, fcb)
1227 *
1228 * Callback to notify a reservation poll that a fence has
1229 * completed. Notify any waiters and allow the next poller to
1230 * claim the callback.
1231 *
1232 * If one thread is waiting for the exclusive fence only, and we
1233 * spuriously notify them about a shared fence, tough.
1234 */
1235 static void
1236 dma_resv_poll_cb(struct dma_fence *fence, struct dma_fence_cb *fcb)
1237 {
1238 struct dma_resv_poll *rpoll = container_of(fcb,
1239 struct dma_resv_poll, rp_fcb);
1240
1241 mutex_enter(&rpoll->rp_lock);
1242 selnotify(&rpoll->rp_selq, 0, NOTE_SUBMIT);
1243 rpoll->rp_claimed = 0;
1244 mutex_exit(&rpoll->rp_lock);
1245 }
1246
1247 /*
1248 * dma_resv_do_poll(robj, events, rpoll)
1249 *
1250 * Poll for reservation object events using the reservation poll
1251 * state in rpoll:
1252 *
1253 * - POLLOUT wait for all fences shared and exclusive
1254 * - POLLIN wait for the exclusive fence
1255 *
1256 * Return the subset of events in events that are ready. If any
1257 * are requested but not ready, arrange to be notified with
1258 * selnotify when they are.
1259 */
1260 int
1261 dma_resv_do_poll(const struct dma_resv *robj, int events,
1262 struct dma_resv_poll *rpoll)
1263 {
1264 struct dma_resv_read_ticket ticket;
1265 const struct dma_resv_list *list;
1266 struct dma_fence *fence = NULL;
1267 uint32_t i, shared_count;
1268 int revents;
1269 bool recorded = false; /* curlwp is on the selq */
1270 bool claimed = false; /* we claimed the callback */
1271 bool callback = false; /* we requested a callback */
1272
1273 /*
1274 * Start with the maximal set of events that could be ready.
1275 * We will eliminate the events that are definitely not ready
1276 * as we go at the same time as we add callbacks to notify us
1277 * that they may be ready.
1278 */
1279 revents = events & (POLLIN|POLLOUT);
1280 if (revents == 0)
1281 return 0;
1282
1283 top: KASSERT(fence == NULL);
1284
1285 /* Enter an RCU read section and get a read ticket. */
1286 rcu_read_lock();
1287 dma_resv_read_begin(robj, &ticket);
1288
1289 /* If we want to wait for all fences, get the shared list. */
1290 if (events & POLLOUT) {
1291 if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
1292 &ticket))
1293 goto restart;
1294 } else {
1295 list = NULL;
1296 shared_count = 0;
1297 }
1298 if (list != NULL) do {
1299 /*
1300 * For each fence, if it is going away, restart.
1301 * Otherwise, acquire a reference to it to test whether
1302 * it is signalled. Stop and request a callback if we
1303 * find any that is not signalled.
1304 */
1305 for (i = 0; i < shared_count; i++) {
1306 KASSERT(fence == NULL);
1307 fence = atomic_load_relaxed(&list->shared[i]);
1308 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1309 goto restart;
1310 if (!dma_fence_is_signaled(fence)) {
1311 dma_fence_put(fence);
1312 fence = NULL;
1313 break;
1314 }
1315 dma_fence_put(fence);
1316 fence = NULL;
1317 }
1318
1319 /* If all shared fences have been signalled, move on. */
1320 if (i == shared_count)
1321 break;
1322
1323 /* Put ourselves on the selq if we haven't already. */
1324 if (!recorded)
1325 goto record;
1326
1327 /*
1328 * If someone else claimed the callback, or we already
1329 * requested it, we're guaranteed to be notified, so
1330 * assume the event is not ready.
1331 */
1332 if (!claimed || callback) {
1333 revents &= ~POLLOUT;
1334 break;
1335 }
1336
1337 /*
1338 * Otherwise, find the first fence that is not
1339 * signalled, request the callback, and clear POLLOUT
1340 * from the possible ready events. If they are all
1341 * signalled, leave POLLOUT set; we will simulate the
1342 * callback later.
1343 */
1344 for (i = 0; i < shared_count; i++) {
1345 KASSERT(fence == NULL);
1346 fence = atomic_load_relaxed(&list->shared[i]);
1347 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1348 goto restart;
1349 if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
1350 dma_resv_poll_cb)) {
1351 dma_fence_put(fence);
1352 fence = NULL;
1353 revents &= ~POLLOUT;
1354 callback = true;
1355 break;
1356 }
1357 dma_fence_put(fence);
1358 fence = NULL;
1359 }
1360 } while (0);
1361
1362 /* We always wait for at least the exclusive fence, so get it. */
1363 KASSERT(fence == NULL);
1364 if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
1365 goto restart;
1366 if (fence != NULL) do {
1367 /*
1368 * Test whether it is signalled. If not, stop and
1369 * request a callback.
1370 */
1371 if (dma_fence_is_signaled(fence))
1372 break;
1373
1374 /* Put ourselves on the selq if we haven't already. */
1375 if (!recorded) {
1376 dma_fence_put(fence);
1377 fence = NULL;
1378 goto record;
1379 }
1380
1381 /*
1382 * If someone else claimed the callback, or we already
1383 * requested it, we're guaranteed to be notified, so
1384 * assume the event is not ready.
1385 */
1386 if (!claimed || callback) {
1387 revents = 0;
1388 break;
1389 }
1390
1391 /*
1392 * Otherwise, try to request the callback, and clear
1393 * all possible ready events. If the fence has been
1394 * signalled in the interim, leave the events set; we
1395 * will simulate the callback later.
1396 */
1397 if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
1398 dma_resv_poll_cb)) {
1399 revents = 0;
1400 callback = true;
1401 break;
1402 }
1403 } while (0);
1404 if (fence != NULL) {
1405 dma_fence_put(fence);
1406 fence = NULL;
1407 }
1408
1409 /* All done reading the fences. */
1410 rcu_read_unlock();
1411
1412 if (claimed && !callback) {
1413 /*
1414 * We claimed the callback but we didn't actually
1415 * request it because a fence was signalled while we
1416 * were claiming it. Call it ourselves now. The
1417 * callback doesn't use the fence nor rely on holding
1418 * any of the fence locks, so this is safe.
1419 */
1420 dma_resv_poll_cb(NULL, &rpoll->rp_fcb);
1421 }
1422 return revents;
1423
1424 restart:
1425 KASSERT(fence == NULL);
1426 rcu_read_unlock();
1427 goto top;
1428
1429 record:
1430 KASSERT(fence == NULL);
1431 rcu_read_unlock();
1432 mutex_enter(&rpoll->rp_lock);
1433 selrecord(curlwp, &rpoll->rp_selq);
1434 if (!rpoll->rp_claimed)
1435 claimed = rpoll->rp_claimed = true;
1436 mutex_exit(&rpoll->rp_lock);
1437 recorded = true;
1438 goto top;
1439 }
1440
1441 /*
1442 * dma_resv_kqfilter(robj, kn, rpoll)
1443 *
1444 * Kqueue filter for reservation objects. Currently not
1445 * implemented because the logic to implement it is nontrivial,
1446 * and userland will presumably never use it, so it would be
1447 * dangerous to add never-tested complex code paths to the kernel.
1448 */
1449 int
1450 dma_resv_kqfilter(const struct dma_resv *robj,
1451 struct knote *kn, struct dma_resv_poll *rpoll)
1452 {
1453
1454 return EINVAL;
1455 }
1456