linux_dma_resv.c revision 1.11 1 /* $NetBSD: linux_dma_resv.c,v 1.11 2021/12/19 12:21:30 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_dma_resv.c,v 1.11 2021/12/19 12:21:30 riastradh Exp $");
34
35 #include <sys/param.h>
36 #include <sys/poll.h>
37 #include <sys/select.h>
38
39 #include <linux/dma-fence.h>
40 #include <linux/dma-resv.h>
41 #include <linux/seqlock.h>
42 #include <linux/ww_mutex.h>
43
44 DEFINE_WW_CLASS(reservation_ww_class __cacheline_aligned);
45
46 static struct dma_resv_list *
47 objlist_tryalloc(uint32_t n)
48 {
49 struct dma_resv_list *list;
50
51 list = kmem_alloc(offsetof(typeof(*list), shared[n]), KM_NOSLEEP);
52 if (list == NULL)
53 return NULL;
54 list->shared_max = n;
55
56 return list;
57 }
58
59 static void
60 objlist_free(struct dma_resv_list *list)
61 {
62 uint32_t n = list->shared_max;
63
64 kmem_free(list, offsetof(typeof(*list), shared[n]));
65 }
66
67 static void
68 objlist_free_cb(struct rcu_head *rcu)
69 {
70 struct dma_resv_list *list = container_of(rcu,
71 struct dma_resv_list, rol_rcu);
72
73 objlist_free(list);
74 }
75
76 static void
77 objlist_defer_free(struct dma_resv_list *list)
78 {
79
80 call_rcu(&list->rol_rcu, objlist_free_cb);
81 }
82
83 /*
84 * dma_resv_init(robj)
85 *
86 * Initialize a reservation object. Caller must later destroy it
87 * with dma_resv_fini.
88 */
89 void
90 dma_resv_init(struct dma_resv *robj)
91 {
92
93 ww_mutex_init(&robj->lock, &reservation_ww_class);
94 seqcount_init(&robj->seq);
95 robj->fence_excl = NULL;
96 robj->fence = NULL;
97 robj->robj_prealloc = NULL;
98 }
99
100 /*
101 * dma_resv_fini(robj)
102 *
103 * Destroy a reservation object, freeing any memory that had been
104 * allocated for it. Caller must have exclusive access to it.
105 */
106 void
107 dma_resv_fini(struct dma_resv *robj)
108 {
109 unsigned i;
110
111 if (robj->robj_prealloc) {
112 objlist_free(robj->robj_prealloc);
113 robj->robj_prealloc = NULL; /* paranoia */
114 }
115 if (robj->fence) {
116 for (i = 0; i < robj->fence->shared_count; i++) {
117 dma_fence_put(robj->fence->shared[i]);
118 robj->fence->shared[i] = NULL; /* paranoia */
119 }
120 objlist_free(robj->fence);
121 robj->fence = NULL; /* paranoia */
122 }
123 if (robj->fence_excl) {
124 dma_fence_put(robj->fence_excl);
125 robj->fence_excl = NULL; /* paranoia */
126 }
127 ww_mutex_destroy(&robj->lock);
128 }
129
130 /*
131 * dma_resv_lock(robj, ctx)
132 *
133 * Acquire a reservation object's lock. Return 0 on success,
134 * -EALREADY if caller already holds it, -EDEADLK if a
135 * higher-priority owner holds it and the caller must back out and
136 * retry.
137 */
138 int
139 dma_resv_lock(struct dma_resv *robj,
140 struct ww_acquire_ctx *ctx)
141 {
142
143 return ww_mutex_lock(&robj->lock, ctx);
144 }
145
146 /*
147 * dma_resv_lock_slow(robj, ctx)
148 *
149 * Acquire a reservation object's lock. Caller must not hold
150 * this lock or any others -- this is to be used in slow paths
151 * after dma_resv_lock or dma_resv_lock_interruptible has failed
152 * and the caller has backed out all other locks.
153 */
154 void
155 dma_resv_lock_slow(struct dma_resv *robj,
156 struct ww_acquire_ctx *ctx)
157 {
158
159 ww_mutex_lock_slow(&robj->lock, ctx);
160 }
161
162 /*
163 * dma_resv_lock_interruptible(robj, ctx)
164 *
165 * Acquire a reservation object's lock. Return 0 on success,
166 * -EALREADY if caller already holds it, -EDEADLK if a
167 * higher-priority owner holds it and the caller must back out and
168 * retry, -ERESTART/-EINTR if interrupted.
169 */
170 int
171 dma_resv_lock_interruptible(struct dma_resv *robj,
172 struct ww_acquire_ctx *ctx)
173 {
174
175 return ww_mutex_lock_interruptible(&robj->lock, ctx);
176 }
177
178 /*
179 * dma_resv_lock_slow_interruptible(robj, ctx)
180 *
181 * Acquire a reservation object's lock. Caller must not hold
182 * this lock or any others -- this is to be used in slow paths
183 * after dma_resv_lock or dma_resv_lock_interruptible has failed
184 * and the caller has backed out all other locks. Return 0 on
185 * success, -ERESTART/-EINTR if interrupted.
186 */
187 int
188 dma_resv_lock_slow_interruptible(struct dma_resv *robj,
189 struct ww_acquire_ctx *ctx)
190 {
191
192 return ww_mutex_lock_slow_interruptible(&robj->lock, ctx);
193 }
194
195 /*
196 * dma_resv_trylock(robj)
197 *
198 * Try to acquire a reservation object's lock without blocking.
199 * Return true on success, false on failure.
200 */
201 bool
202 dma_resv_trylock(struct dma_resv *robj)
203 {
204
205 return ww_mutex_trylock(&robj->lock);
206 }
207
208 /*
209 * dma_resv_locking_ctx(robj)
210 *
211 * Return a pointer to the ww_acquire_ctx used by the owner of
212 * the reservation object's lock, or NULL if it is either not
213 * owned or if it is locked without context.
214 */
215 struct ww_acquire_ctx *
216 dma_resv_locking_ctx(struct dma_resv *robj)
217 {
218
219 return ww_mutex_locking_ctx(&robj->lock);
220 }
221
222 /*
223 * dma_resv_unlock(robj)
224 *
225 * Release a reservation object's lock.
226 */
227 void
228 dma_resv_unlock(struct dma_resv *robj)
229 {
230
231 return ww_mutex_unlock(&robj->lock);
232 }
233
234 /*
235 * dma_resv_is_locked(robj)
236 *
237 * True if robj is locked.
238 */
239 bool
240 dma_resv_is_locked(struct dma_resv *robj)
241 {
242
243 return ww_mutex_is_locked(&robj->lock);
244 }
245
246 /*
247 * dma_resv_held(robj)
248 *
249 * True if robj is locked.
250 */
251 bool
252 dma_resv_held(struct dma_resv *robj)
253 {
254
255 return ww_mutex_is_locked(&robj->lock);
256 }
257
258 /*
259 * dma_resv_assert_held(robj)
260 *
261 * Panic if robj is not held, in DIAGNOSTIC builds.
262 */
263 void
264 dma_resv_assert_held(struct dma_resv *robj)
265 {
266
267 KASSERT(dma_resv_held(robj));
268 }
269
270 /*
271 * dma_resv_get_excl(robj)
272 *
273 * Return a pointer to the exclusive fence of the reservation
274 * object robj.
275 *
276 * Caller must have robj locked.
277 */
278 struct dma_fence *
279 dma_resv_get_excl(struct dma_resv *robj)
280 {
281
282 KASSERT(dma_resv_held(robj));
283 return robj->fence_excl;
284 }
285
286 /*
287 * dma_resv_get_list(robj)
288 *
289 * Return a pointer to the shared fence list of the reservation
290 * object robj.
291 *
292 * Caller must have robj locked.
293 */
294 struct dma_resv_list *
295 dma_resv_get_list(struct dma_resv *robj)
296 {
297
298 KASSERT(dma_resv_held(robj));
299 return robj->fence;
300 }
301
302 /*
303 * dma_resv_reserve_shared(robj)
304 *
305 * Reserve space in robj to add a shared fence. To be used only
306 * once before calling dma_resv_add_shared_fence.
307 *
308 * Caller must have robj locked.
309 *
310 * Internally, we start with room for four entries and double if
311 * we don't have enough. This is not guaranteed.
312 */
313 int
314 dma_resv_reserve_shared(struct dma_resv *robj, unsigned int num_fences)
315 {
316 struct dma_resv_list *list, *prealloc;
317 uint32_t n, nalloc;
318
319 KASSERT(dma_resv_held(robj));
320 KASSERT(num_fences == 1);
321
322 list = robj->fence;
323 prealloc = robj->robj_prealloc;
324
325 /* If there's an existing list, check it for space. */
326 if (list) {
327 /* If there's too many already, give up. */
328 if (list->shared_count == UINT32_MAX)
329 return -ENOMEM;
330
331 /* Add one more. */
332 n = list->shared_count + 1;
333
334 /* If there's enough for one more, we're done. */
335 if (n <= list->shared_max)
336 return 0;
337 } else {
338 /* No list already. We need space for 1. */
339 n = 1;
340 }
341
342 /* If not, maybe there's a preallocated list ready. */
343 if (prealloc != NULL) {
344 /* If there's enough room in it, stop here. */
345 if (n <= prealloc->shared_max)
346 return 0;
347
348 /* Try to double its capacity. */
349 nalloc = n > UINT32_MAX/2 ? UINT32_MAX : 2*n;
350 prealloc = objlist_tryalloc(nalloc);
351 if (prealloc == NULL)
352 return -ENOMEM;
353
354 /* Swap the new preallocated list and free the old one. */
355 objlist_free(robj->robj_prealloc);
356 robj->robj_prealloc = prealloc;
357 } else {
358 /* Start with some spare. */
359 nalloc = n > UINT32_MAX/2 ? UINT32_MAX : MAX(2*n, 4);
360 prealloc = objlist_tryalloc(nalloc);
361 if (prealloc == NULL)
362 return -ENOMEM;
363 /* Save the new preallocated list. */
364 robj->robj_prealloc = prealloc;
365 }
366
367 /* Success! */
368 return 0;
369 }
370
371 struct dma_resv_write_ticket {
372 };
373
374 /*
375 * dma_resv_write_begin(robj, ticket)
376 *
377 * Begin an atomic batch of writes to robj, and initialize opaque
378 * ticket for it. The ticket must be passed to
379 * dma_resv_write_commit to commit the writes.
380 *
381 * Caller must have robj locked.
382 *
383 * Implies membar_producer, i.e. store-before-store barrier. Does
384 * NOT serve as an acquire operation, however.
385 */
386 static void
387 dma_resv_write_begin(struct dma_resv *robj,
388 struct dma_resv_write_ticket *ticket)
389 {
390
391 KASSERT(dma_resv_held(robj));
392
393 write_seqcount_begin(&robj->seq);
394 }
395
396 /*
397 * dma_resv_write_commit(robj, ticket)
398 *
399 * Commit an atomic batch of writes to robj begun with the call to
400 * dma_resv_write_begin that returned ticket.
401 *
402 * Caller must have robj locked.
403 *
404 * Implies membar_producer, i.e. store-before-store barrier. Does
405 * NOT serve as a release operation, however.
406 */
407 static void
408 dma_resv_write_commit(struct dma_resv *robj,
409 struct dma_resv_write_ticket *ticket)
410 {
411
412 KASSERT(dma_resv_held(robj));
413
414 write_seqcount_end(&robj->seq);
415 }
416
417 struct dma_resv_read_ticket {
418 unsigned version;
419 };
420
421 /*
422 * dma_resv_read_begin(robj, ticket)
423 *
424 * Begin a read section, and initialize opaque ticket for it. The
425 * ticket must be passed to dma_resv_read_exit, and the
426 * caller must be prepared to retry reading if it fails.
427 */
428 static void
429 dma_resv_read_begin(const struct dma_resv *robj,
430 struct dma_resv_read_ticket *ticket)
431 {
432
433 ticket->version = read_seqcount_begin(&robj->seq);
434 }
435
436 /*
437 * dma_resv_read_valid(robj, ticket)
438 *
439 * Test whether the read sections are valid. Return true on
440 * success, or false on failure if the read ticket has been
441 * invalidated.
442 */
443 static bool
444 dma_resv_read_valid(const struct dma_resv *robj,
445 struct dma_resv_read_ticket *ticket)
446 {
447
448 return !read_seqcount_retry(&robj->seq, ticket->version);
449 }
450
451 /*
452 * dma_resv_add_excl_fence(robj, fence)
453 *
454 * Empty and release all of robj's shared fences, and clear and
455 * release its exclusive fence. If fence is nonnull, acquire a
456 * reference to it and save it as robj's exclusive fence.
457 *
458 * Caller must have robj locked.
459 */
460 void
461 dma_resv_add_excl_fence(struct dma_resv *robj,
462 struct dma_fence *fence)
463 {
464 struct dma_fence *old_fence = robj->fence_excl;
465 struct dma_resv_list *old_list = robj->fence;
466 uint32_t old_shared_count;
467 struct dma_resv_write_ticket ticket;
468
469 KASSERT(dma_resv_held(robj));
470
471 /*
472 * If we are setting rather than just removing a fence, acquire
473 * a reference for ourselves.
474 */
475 if (fence)
476 (void)dma_fence_get(fence);
477
478 /* If there are any shared fences, remember how many. */
479 if (old_list)
480 old_shared_count = old_list->shared_count;
481
482 /* Begin an update. Implies membar_producer for fence. */
483 dma_resv_write_begin(robj, &ticket);
484
485 /* Replace the fence and zero the shared count. */
486 atomic_store_relaxed(&robj->fence_excl, fence);
487 if (old_list)
488 old_list->shared_count = 0;
489
490 /* Commit the update. */
491 dma_resv_write_commit(robj, &ticket);
492
493 /* Release the old exclusive fence, if any. */
494 if (old_fence) {
495 dma_fence_put(old_fence);
496 old_fence = NULL; /* paranoia */
497 }
498
499 /* Release any old shared fences. */
500 if (old_list) {
501 while (old_shared_count--) {
502 dma_fence_put(old_list->shared[old_shared_count]);
503 /* paranoia */
504 old_list->shared[old_shared_count] = NULL;
505 }
506 }
507 }
508
509 /*
510 * dma_resv_add_shared_fence(robj, fence)
511 *
512 * Acquire a reference to fence and add it to robj's shared list.
513 * If any fence was already added with the same context number,
514 * release it and replace it by this one.
515 *
516 * Caller must have robj locked, and must have preceded with a
517 * call to dma_resv_reserve_shared for each shared fence
518 * added.
519 */
520 void
521 dma_resv_add_shared_fence(struct dma_resv *robj,
522 struct dma_fence *fence)
523 {
524 struct dma_resv_list *list = robj->fence;
525 struct dma_resv_list *prealloc = robj->robj_prealloc;
526 struct dma_resv_write_ticket ticket;
527 struct dma_fence *replace = NULL;
528 uint32_t i;
529
530 KASSERT(dma_resv_held(robj));
531
532 /* Acquire a reference to the fence. */
533 KASSERT(fence != NULL);
534 (void)dma_fence_get(fence);
535
536 /* Check for a preallocated replacement list. */
537 if (prealloc == NULL) {
538 /*
539 * If there is no preallocated replacement list, then
540 * there must be room in the current list.
541 */
542 KASSERT(list != NULL);
543 KASSERT(list->shared_count < list->shared_max);
544
545 /* Begin an update. Implies membar_producer for fence. */
546 dma_resv_write_begin(robj, &ticket);
547
548 /* Find a fence with the same context number. */
549 for (i = 0; i < list->shared_count; i++) {
550 if (list->shared[i]->context == fence->context) {
551 replace = list->shared[i];
552 atomic_store_relaxed(&list->shared[i], fence);
553 break;
554 }
555 }
556
557 /* If we didn't find one, add it at the end. */
558 if (i == list->shared_count) {
559 atomic_store_relaxed(&list->shared[list->shared_count],
560 fence);
561 atomic_store_relaxed(&list->shared_count,
562 list->shared_count + 1);
563 }
564
565 /* Commit the update. */
566 dma_resv_write_commit(robj, &ticket);
567 } else {
568 /*
569 * There is a preallocated replacement list. There may
570 * not be a current list. If not, treat it as a zero-
571 * length list.
572 */
573 uint32_t shared_count = (list == NULL? 0 : list->shared_count);
574
575 /* There had better be room in the preallocated list. */
576 KASSERT(shared_count < prealloc->shared_max);
577
578 /*
579 * Copy the fences over, but replace if we find one
580 * with the same context number.
581 */
582 for (i = 0; i < shared_count; i++) {
583 if (replace == NULL &&
584 list->shared[i]->context == fence->context) {
585 replace = list->shared[i];
586 prealloc->shared[i] = fence;
587 } else {
588 prealloc->shared[i] = list->shared[i];
589 }
590 }
591 prealloc->shared_count = shared_count;
592
593 /* If we didn't find one, add it at the end. */
594 if (replace == NULL)
595 prealloc->shared[prealloc->shared_count++] = fence;
596
597 /*
598 * Now ready to replace the list. Begin an update.
599 * Implies membar_producer for fence and prealloc.
600 */
601 dma_resv_write_begin(robj, &ticket);
602
603 /* Replace the list. */
604 atomic_store_relaxed(&robj->fence, prealloc);
605 robj->robj_prealloc = NULL;
606
607 /* Commit the update. */
608 dma_resv_write_commit(robj, &ticket);
609
610 /*
611 * If there is an old list, free it when convenient.
612 * (We are not in a position at this point to sleep
613 * waiting for activity on all CPUs.)
614 */
615 if (list)
616 objlist_defer_free(list);
617 }
618
619 /* Release a fence if we replaced it. */
620 if (replace) {
621 dma_fence_put(replace);
622 replace = NULL; /* paranoia */
623 }
624 }
625
626 /*
627 * dma_resv_get_excl_rcu(robj)
628 *
629 * Note: Caller need not call this from an RCU read section.
630 */
631 struct dma_fence *
632 dma_resv_get_excl_rcu(const struct dma_resv *robj)
633 {
634 struct dma_fence *fence;
635
636 rcu_read_lock();
637 fence = dma_fence_get_rcu_safe(&robj->fence_excl);
638 rcu_read_unlock();
639
640 return fence;
641 }
642
643 /*
644 * dma_resv_get_fences_rcu(robj, fencep, nsharedp, sharedp)
645 */
646 int
647 dma_resv_get_fences_rcu(const struct dma_resv *robj,
648 struct dma_fence **fencep, unsigned *nsharedp, struct dma_fence ***sharedp)
649 {
650 const struct dma_resv_list *list = NULL;
651 struct dma_fence *fence = NULL;
652 struct dma_fence **shared = NULL;
653 unsigned shared_alloc, shared_count, i;
654 struct dma_resv_read_ticket ticket;
655
656 top: KASSERT(fence == NULL);
657
658 /* Enter an RCU read section and get a read ticket. */
659 rcu_read_lock();
660 dma_resv_read_begin(robj, &ticket);
661
662 /*
663 * If there is a shared list, grab it. The atomic_load_consume
664 * here pairs with the membar_producer in dma_resv_write_begin
665 * to ensure the content of robj->fence is initialized before
666 * we witness the pointer.
667 */
668 if ((list = atomic_load_consume(&robj->fence)) != NULL) {
669
670 /* Check whether we have a buffer. */
671 if (shared == NULL) {
672 /*
673 * We don't have a buffer yet. Try to allocate
674 * one without waiting.
675 */
676 shared_alloc = list->shared_max;
677 shared = kcalloc(shared_alloc, sizeof(shared[0]),
678 GFP_NOWAIT);
679 if (shared == NULL) {
680 /*
681 * Couldn't do it immediately. Back
682 * out of RCU and allocate one with
683 * waiting.
684 */
685 rcu_read_unlock();
686 shared = kcalloc(shared_alloc,
687 sizeof(shared[0]), GFP_KERNEL);
688 if (shared == NULL)
689 return -ENOMEM;
690 goto top;
691 }
692 } else if (shared_alloc < list->shared_max) {
693 /*
694 * We have a buffer but it's too small. We're
695 * already racing in this case, so just back
696 * out and wait to allocate a bigger one.
697 */
698 shared_alloc = list->shared_max;
699 rcu_read_unlock();
700 kfree(shared);
701 shared = kcalloc(shared_alloc, sizeof(shared[0]),
702 GFP_KERNEL);
703 if (shared == NULL)
704 return -ENOMEM;
705 }
706
707 /*
708 * We got a buffer large enough. Copy into the buffer
709 * and record the number of elements. Could safely use
710 * memcpy here, because even if we race with a writer
711 * it'll invalidate the read ticket and we'll start
712 * ove, but atomic_load in a loop will pacify kcsan.
713 */
714 shared_count = atomic_load_relaxed(&list->shared_count);
715 for (i = 0; i < shared_count; i++)
716 shared[i] = atomic_load_relaxed(&list->shared[i]);
717 } else {
718 /* No shared list: shared count is zero. */
719 shared_count = 0;
720 }
721
722 /* If there is an exclusive fence, grab it. */
723 KASSERT(fence == NULL);
724 fence = atomic_load_consume(&robj->fence_excl);
725
726 /*
727 * We are done reading from robj and list. Validate our
728 * parking ticket. If it's invalid, do not pass go and do not
729 * collect $200.
730 */
731 if (!dma_resv_read_valid(robj, &ticket)) {
732 fence = NULL;
733 goto restart;
734 }
735
736 /*
737 * Try to get a reference to the exclusive fence, if there is
738 * one. If we can't, start over.
739 */
740 if (fence) {
741 if ((fence = dma_fence_get_rcu(fence)) == NULL)
742 goto restart;
743 }
744
745 /*
746 * Try to get a reference to all of the shared fences.
747 */
748 for (i = 0; i < shared_count; i++) {
749 if (dma_fence_get_rcu(atomic_load_relaxed(&shared[i])) == NULL)
750 goto put_restart;
751 }
752
753 /* Success! */
754 rcu_read_unlock();
755 *fencep = fence;
756 *nsharedp = shared_count;
757 *sharedp = shared;
758 return 0;
759
760 put_restart:
761 /* Back out. */
762 while (i --> 0) {
763 dma_fence_put(shared[i]);
764 shared[i] = NULL; /* paranoia */
765 }
766 if (fence) {
767 dma_fence_put(fence);
768 fence = NULL;
769 }
770
771 restart:
772 KASSERT(fence == NULL);
773 rcu_read_unlock();
774 goto top;
775 }
776
777 /*
778 * dma_resv_copy_fences(dst, src)
779 *
780 * Copy the exclusive fence and all the shared fences from src to
781 * dst.
782 *
783 * Caller must have dst locked.
784 */
785 int
786 dma_resv_copy_fences(struct dma_resv *dst_robj,
787 const struct dma_resv *src_robj)
788 {
789 const struct dma_resv_list *src_list;
790 struct dma_resv_list *dst_list = NULL;
791 struct dma_resv_list *old_list;
792 struct dma_fence *fence = NULL;
793 struct dma_fence *old_fence;
794 uint32_t shared_count, i;
795 struct dma_resv_read_ticket read_ticket;
796 struct dma_resv_write_ticket write_ticket;
797
798 KASSERT(dma_resv_held(dst_robj));
799
800 top: KASSERT(fence == NULL);
801
802 /* Enter an RCU read section and get a read ticket. */
803 rcu_read_lock();
804 dma_resv_read_begin(src_robj, &read_ticket);
805
806 /* Get the shared list. */
807 if ((src_list = atomic_load_consume(&src_robj->fence)) != NULL) {
808
809 /* Find out how long it is. */
810 shared_count = atomic_load_relaxed(&src_list->shared_count);
811
812 /*
813 * Make sure we saw a consistent snapshot of the list
814 * pointer and length.
815 */
816 if (!dma_resv_read_valid(src_robj, &read_ticket))
817 goto restart;
818
819 /* Allocate a new list. */
820 dst_list = objlist_tryalloc(shared_count);
821 if (dst_list == NULL)
822 return -ENOMEM;
823
824 /* Copy over all fences that are not yet signalled. */
825 dst_list->shared_count = 0;
826 for (i = 0; i < shared_count; i++) {
827 KASSERT(fence == NULL);
828 fence = atomic_load_relaxed(&src_list->shared[i]);
829 if ((fence = dma_fence_get_rcu(fence)) == NULL)
830 goto restart;
831 if (dma_fence_is_signaled(fence)) {
832 dma_fence_put(fence);
833 fence = NULL;
834 continue;
835 }
836 dst_list->shared[dst_list->shared_count++] = fence;
837 fence = NULL;
838 }
839 }
840
841 /* Get the exclusive fence. */
842 KASSERT(fence == NULL);
843 if ((fence = atomic_load_consume(&src_robj->fence_excl)) != NULL) {
844
845 /*
846 * Make sure we saw a consistent snapshot of the fence.
847 *
848 * XXX I'm not actually sure this is necessary since
849 * pointer writes are supposed to be atomic.
850 */
851 if (!dma_resv_read_valid(src_robj, &read_ticket)) {
852 fence = NULL;
853 goto restart;
854 }
855
856 /*
857 * If it is going away, restart. Otherwise, acquire a
858 * reference to it.
859 */
860 if (!dma_fence_get_rcu(fence)) {
861 fence = NULL;
862 goto restart;
863 }
864 }
865
866 /* All done with src; exit the RCU read section. */
867 rcu_read_unlock();
868
869 /*
870 * We now have a snapshot of the shared and exclusive fences of
871 * src_robj and we have acquired references to them so they
872 * won't go away. Transfer them over to dst_robj, releasing
873 * references to any that were there.
874 */
875
876 /* Get the old shared and exclusive fences, if any. */
877 old_list = dst_robj->fence;
878 old_fence = dst_robj->fence_excl;
879
880 /*
881 * Begin an update. Implies membar_producer for dst_list and
882 * fence.
883 */
884 dma_resv_write_begin(dst_robj, &write_ticket);
885
886 /* Replace the fences. */
887 atomic_store_relaxed(&dst_robj->fence, dst_list);
888 atomic_store_relaxed(&dst_robj->fence_excl, fence);
889
890 /* Commit the update. */
891 dma_resv_write_commit(dst_robj, &write_ticket);
892
893 /* Release the old exclusive fence, if any. */
894 if (old_fence) {
895 dma_fence_put(old_fence);
896 old_fence = NULL; /* paranoia */
897 }
898
899 /* Release any old shared fences. */
900 if (old_list) {
901 for (i = old_list->shared_count; i --> 0;) {
902 dma_fence_put(old_list->shared[i]);
903 old_list->shared[i] = NULL; /* paranoia */
904 }
905 objlist_free(old_list);
906 old_list = NULL; /* paranoia */
907 }
908
909 /* Success! */
910 return 0;
911
912 restart:
913 KASSERT(fence == NULL);
914 rcu_read_unlock();
915 if (dst_list) {
916 for (i = dst_list->shared_count; i --> 0;) {
917 dma_fence_put(dst_list->shared[i]);
918 dst_list->shared[i] = NULL; /* paranoia */
919 }
920 objlist_free(dst_list);
921 dst_list = NULL;
922 }
923 goto top;
924 }
925
926 /*
927 * dma_resv_test_signaled_rcu(robj, shared)
928 *
929 * If shared is true, test whether all of the shared fences are
930 * signalled, or if there are none, test whether the exclusive
931 * fence is signalled. If shared is false, test only whether the
932 * exclusive fence is signalled.
933 *
934 * XXX Why does this _not_ test the exclusive fence if shared is
935 * true only if there are no shared fences? This makes no sense.
936 */
937 bool
938 dma_resv_test_signaled_rcu(const struct dma_resv *robj,
939 bool shared)
940 {
941 struct dma_resv_read_ticket ticket;
942 struct dma_resv_list *list;
943 struct dma_fence *fence = NULL;
944 uint32_t i, shared_count;
945 bool signaled = true;
946
947 top: KASSERT(fence == NULL);
948
949 /* Enter an RCU read section and get a read ticket. */
950 rcu_read_lock();
951 dma_resv_read_begin(robj, &ticket);
952
953 /* If shared is requested and there is a shared list, test it. */
954 if (shared && (list = atomic_load_consume(&robj->fence)) != NULL) {
955
956 /* Find out how long it is. */
957 shared_count = atomic_load_relaxed(&list->shared_count);
958
959 /*
960 * Make sure we saw a consistent snapshot of the list
961 * pointer and length.
962 */
963 if (!dma_resv_read_valid(robj, &ticket))
964 goto restart;
965
966 /*
967 * For each fence, if it is going away, restart.
968 * Otherwise, acquire a reference to it to test whether
969 * it is signalled. Stop if we find any that is not
970 * signalled.
971 */
972 for (i = 0; i < shared_count; i++) {
973 KASSERT(fence == NULL);
974 fence = atomic_load_relaxed(&list->shared[i]);
975 if ((fence = dma_fence_get_rcu(fence)) == NULL)
976 goto restart;
977 signaled &= dma_fence_is_signaled(fence);
978 dma_fence_put(fence);
979 fence = NULL;
980 if (!signaled)
981 goto out;
982 }
983 }
984
985 /* If there is an exclusive fence, test it. */
986 KASSERT(fence == NULL);
987 if ((fence = atomic_load_consume(&robj->fence_excl)) != NULL) {
988
989 /*
990 * Make sure we saw a consistent snapshot of the fence.
991 *
992 * XXX I'm not actually sure this is necessary since
993 * pointer writes are supposed to be atomic.
994 */
995 if (!dma_resv_read_valid(robj, &ticket)) {
996 fence = NULL;
997 goto restart;
998 }
999
1000 /*
1001 * If it is going away, restart. Otherwise, acquire a
1002 * reference to it to test whether it is signalled.
1003 */
1004 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1005 goto restart;
1006 signaled &= dma_fence_is_signaled(fence);
1007 dma_fence_put(fence);
1008 fence = NULL;
1009 if (!signaled)
1010 goto out;
1011 }
1012
1013 out: KASSERT(fence == NULL);
1014 rcu_read_unlock();
1015 return signaled;
1016
1017 restart:
1018 KASSERT(fence == NULL);
1019 rcu_read_unlock();
1020 goto top;
1021 }
1022
1023 /*
1024 * dma_resv_wait_timeout_rcu(robj, shared, intr, timeout)
1025 *
1026 * If shared is true, wait for all of the shared fences to be
1027 * signalled, or if there are none, wait for the exclusive fence
1028 * to be signalled. If shared is false, wait only for the
1029 * exclusive fence to be signalled. If timeout is zero, don't
1030 * wait, only test.
1031 *
1032 * XXX Why does this _not_ wait for the exclusive fence if shared
1033 * is true only if there are no shared fences? This makes no
1034 * sense.
1035 */
1036 long
1037 dma_resv_wait_timeout_rcu(const struct dma_resv *robj,
1038 bool shared, bool intr, unsigned long timeout)
1039 {
1040 struct dma_resv_read_ticket ticket;
1041 struct dma_resv_list *list;
1042 struct dma_fence *fence = NULL;
1043 uint32_t i, shared_count;
1044 long ret;
1045
1046 if (timeout == 0)
1047 return dma_resv_test_signaled_rcu(robj, shared);
1048
1049 top: KASSERT(fence == NULL);
1050
1051 /* Enter an RCU read section and get a read ticket. */
1052 rcu_read_lock();
1053 dma_resv_read_begin(robj, &ticket);
1054
1055 /* If shared is requested and there is a shared list, wait on it. */
1056 if (shared && (list = atomic_load_consume(&robj->fence)) != NULL) {
1057
1058 /* Find out how long it is. */
1059 shared_count = list->shared_count;
1060
1061 /*
1062 * Make sure we saw a consistent snapshot of the list
1063 * pointer and length.
1064 */
1065 if (!dma_resv_read_valid(robj, &ticket))
1066 goto restart;
1067
1068 /*
1069 * For each fence, if it is going away, restart.
1070 * Otherwise, acquire a reference to it to test whether
1071 * it is signalled. Stop and wait if we find any that
1072 * is not signalled.
1073 */
1074 for (i = 0; i < shared_count; i++) {
1075 KASSERT(fence == NULL);
1076 fence = atomic_load_relaxed(&list->shared[i]);
1077 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1078 goto restart;
1079 if (!dma_fence_is_signaled(fence))
1080 goto wait;
1081 dma_fence_put(fence);
1082 fence = NULL;
1083 }
1084 }
1085
1086 /* If there is an exclusive fence, test it. */
1087 KASSERT(fence == NULL);
1088 if ((fence = atomic_load_consume(&robj->fence_excl)) != NULL) {
1089
1090 /*
1091 * Make sure we saw a consistent snapshot of the fence.
1092 *
1093 * XXX I'm not actually sure this is necessary since
1094 * pointer writes are supposed to be atomic.
1095 */
1096 if (!dma_resv_read_valid(robj, &ticket)) {
1097 fence = NULL;
1098 goto restart;
1099 }
1100
1101 /*
1102 * If it is going away, restart. Otherwise, acquire a
1103 * reference to it to test whether it is signalled. If
1104 * not, wait for it.
1105 */
1106 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1107 goto restart;
1108 if (!dma_fence_is_signaled(fence))
1109 goto wait;
1110 dma_fence_put(fence);
1111 fence = NULL;
1112 }
1113
1114 /* Success! Return the number of ticks left. */
1115 rcu_read_unlock();
1116 KASSERT(fence == NULL);
1117 return timeout;
1118
1119 restart:
1120 KASSERT(fence == NULL);
1121 rcu_read_unlock();
1122 goto top;
1123
1124 wait:
1125 /*
1126 * Exit the RCU read section, wait for it, and release the
1127 * fence when we're done. If we time out or fail, bail.
1128 * Otherwise, go back to the top.
1129 */
1130 KASSERT(fence != NULL);
1131 rcu_read_unlock();
1132 ret = dma_fence_wait_timeout(fence, intr, timeout);
1133 dma_fence_put(fence);
1134 fence = NULL;
1135 if (ret <= 0)
1136 return ret;
1137 KASSERT(ret <= timeout);
1138 timeout = ret;
1139 goto top;
1140 }
1141
1142 /*
1143 * dma_resv_poll_init(rpoll, lock)
1144 *
1145 * Initialize reservation poll state.
1146 */
1147 void
1148 dma_resv_poll_init(struct dma_resv_poll *rpoll)
1149 {
1150
1151 mutex_init(&rpoll->rp_lock, MUTEX_DEFAULT, IPL_VM);
1152 selinit(&rpoll->rp_selq);
1153 rpoll->rp_claimed = 0;
1154 }
1155
1156 /*
1157 * dma_resv_poll_fini(rpoll)
1158 *
1159 * Release any resource associated with reservation poll state.
1160 */
1161 void
1162 dma_resv_poll_fini(struct dma_resv_poll *rpoll)
1163 {
1164
1165 KASSERT(rpoll->rp_claimed == 0);
1166 seldestroy(&rpoll->rp_selq);
1167 mutex_destroy(&rpoll->rp_lock);
1168 }
1169
1170 /*
1171 * dma_resv_poll_cb(fence, fcb)
1172 *
1173 * Callback to notify a reservation poll that a fence has
1174 * completed. Notify any waiters and allow the next poller to
1175 * claim the callback.
1176 *
1177 * If one thread is waiting for the exclusive fence only, and we
1178 * spuriously notify them about a shared fence, tough.
1179 */
1180 static void
1181 dma_resv_poll_cb(struct dma_fence *fence, struct dma_fence_cb *fcb)
1182 {
1183 struct dma_resv_poll *rpoll = container_of(fcb,
1184 struct dma_resv_poll, rp_fcb);
1185
1186 mutex_enter(&rpoll->rp_lock);
1187 selnotify(&rpoll->rp_selq, 0, NOTE_SUBMIT);
1188 rpoll->rp_claimed = 0;
1189 mutex_exit(&rpoll->rp_lock);
1190 }
1191
1192 /*
1193 * dma_resv_do_poll(robj, events, rpoll)
1194 *
1195 * Poll for reservation object events using the reservation poll
1196 * state in rpoll:
1197 *
1198 * - POLLOUT wait for all fences shared and exclusive
1199 * - POLLIN wait for the exclusive fence
1200 *
1201 * Return the subset of events in events that are ready. If any
1202 * are requested but not ready, arrange to be notified with
1203 * selnotify when they are.
1204 */
1205 int
1206 dma_resv_do_poll(const struct dma_resv *robj, int events,
1207 struct dma_resv_poll *rpoll)
1208 {
1209 struct dma_resv_read_ticket ticket;
1210 struct dma_resv_list *list;
1211 struct dma_fence *fence = NULL;
1212 uint32_t i, shared_count;
1213 int revents;
1214 bool recorded = false; /* curlwp is on the selq */
1215 bool claimed = false; /* we claimed the callback */
1216 bool callback = false; /* we requested a callback */
1217
1218 /*
1219 * Start with the maximal set of events that could be ready.
1220 * We will eliminate the events that are definitely not ready
1221 * as we go at the same time as we add callbacks to notify us
1222 * that they may be ready.
1223 */
1224 revents = events & (POLLIN|POLLOUT);
1225 if (revents == 0)
1226 return 0;
1227
1228 top: KASSERT(fence == NULL);
1229
1230 /* Enter an RCU read section and get a read ticket. */
1231 rcu_read_lock();
1232 dma_resv_read_begin(robj, &ticket);
1233
1234 /* If we want to wait for all fences, get the shared list. */
1235 if ((events & POLLOUT) != 0 &&
1236 (list = atomic_load_consume(&robj->fence)) != NULL) do {
1237
1238 /* Find out how long it is. */
1239 shared_count = list->shared_count;
1240
1241 /*
1242 * Make sure we saw a consistent snapshot of the list
1243 * pointer and length.
1244 */
1245 if (!dma_resv_read_valid(robj, &ticket))
1246 goto restart;
1247
1248 /*
1249 * For each fence, if it is going away, restart.
1250 * Otherwise, acquire a reference to it to test whether
1251 * it is signalled. Stop and request a callback if we
1252 * find any that is not signalled.
1253 */
1254 for (i = 0; i < shared_count; i++) {
1255 KASSERT(fence == NULL);
1256 fence = atomic_load_relaxed(&list->shared[i]);
1257 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1258 goto restart;
1259 if (!dma_fence_is_signaled(fence)) {
1260 dma_fence_put(fence);
1261 fence = NULL;
1262 break;
1263 }
1264 dma_fence_put(fence);
1265 fence = NULL;
1266 }
1267
1268 /* If all shared fences have been signalled, move on. */
1269 if (i == shared_count)
1270 break;
1271
1272 /* Put ourselves on the selq if we haven't already. */
1273 if (!recorded)
1274 goto record;
1275
1276 /*
1277 * If someone else claimed the callback, or we already
1278 * requested it, we're guaranteed to be notified, so
1279 * assume the event is not ready.
1280 */
1281 if (!claimed || callback) {
1282 revents &= ~POLLOUT;
1283 break;
1284 }
1285
1286 /*
1287 * Otherwise, find the first fence that is not
1288 * signalled, request the callback, and clear POLLOUT
1289 * from the possible ready events. If they are all
1290 * signalled, leave POLLOUT set; we will simulate the
1291 * callback later.
1292 */
1293 for (i = 0; i < shared_count; i++) {
1294 KASSERT(fence == NULL);
1295 fence = atomic_load_relaxed(&list->shared[i]);
1296 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1297 goto restart;
1298 if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
1299 dma_resv_poll_cb)) {
1300 dma_fence_put(fence);
1301 fence = NULL;
1302 revents &= ~POLLOUT;
1303 callback = true;
1304 break;
1305 }
1306 dma_fence_put(fence);
1307 fence = NULL;
1308 }
1309 } while (0);
1310
1311 /* We always wait for at least the exclusive fence, so get it. */
1312 KASSERT(fence == NULL);
1313 if ((fence = atomic_load_consume(&robj->fence_excl)) != NULL) do {
1314
1315 /*
1316 * Make sure we saw a consistent snapshot of the fence.
1317 *
1318 * XXX I'm not actually sure this is necessary since
1319 * pointer writes are supposed to be atomic.
1320 */
1321 if (!dma_resv_read_valid(robj, &ticket)) {
1322 fence = NULL;
1323 goto restart;
1324 }
1325
1326 /*
1327 * If it is going away, restart. Otherwise, acquire a
1328 * reference to it to test whether it is signalled. If
1329 * not, stop and request a callback.
1330 */
1331 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1332 goto restart;
1333 if (dma_fence_is_signaled(fence)) {
1334 dma_fence_put(fence);
1335 fence = NULL;
1336 break;
1337 }
1338
1339 /* Put ourselves on the selq if we haven't already. */
1340 if (!recorded) {
1341 dma_fence_put(fence);
1342 fence = NULL;
1343 goto record;
1344 }
1345
1346 /*
1347 * If someone else claimed the callback, or we already
1348 * requested it, we're guaranteed to be notified, so
1349 * assume the event is not ready.
1350 */
1351 if (!claimed || callback) {
1352 dma_fence_put(fence);
1353 fence = NULL;
1354 revents = 0;
1355 break;
1356 }
1357
1358 /*
1359 * Otherwise, try to request the callback, and clear
1360 * all possible ready events. If the fence has been
1361 * signalled in the interim, leave the events set; we
1362 * will simulate the callback later.
1363 */
1364 if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
1365 dma_resv_poll_cb)) {
1366 dma_fence_put(fence);
1367 fence = NULL;
1368 revents = 0;
1369 callback = true;
1370 break;
1371 }
1372 dma_fence_put(fence);
1373 fence = NULL;
1374 } while (0);
1375 KASSERT(fence == NULL);
1376
1377 /* All done reading the fences. */
1378 rcu_read_unlock();
1379
1380 if (claimed && !callback) {
1381 /*
1382 * We claimed the callback but we didn't actually
1383 * request it because a fence was signalled while we
1384 * were claiming it. Call it ourselves now. The
1385 * callback doesn't use the fence nor rely on holding
1386 * any of the fence locks, so this is safe.
1387 */
1388 dma_resv_poll_cb(NULL, &rpoll->rp_fcb);
1389 }
1390 return revents;
1391
1392 restart:
1393 KASSERT(fence == NULL);
1394 rcu_read_unlock();
1395 goto top;
1396
1397 record:
1398 KASSERT(fence == NULL);
1399 rcu_read_unlock();
1400 mutex_enter(&rpoll->rp_lock);
1401 selrecord(curlwp, &rpoll->rp_selq);
1402 if (!rpoll->rp_claimed)
1403 claimed = rpoll->rp_claimed = true;
1404 mutex_exit(&rpoll->rp_lock);
1405 recorded = true;
1406 goto top;
1407 }
1408
1409 /*
1410 * dma_resv_kqfilter(robj, kn, rpoll)
1411 *
1412 * Kqueue filter for reservation objects. Currently not
1413 * implemented because the logic to implement it is nontrivial,
1414 * and userland will presumably never use it, so it would be
1415 * dangerous to add never-tested complex code paths to the kernel.
1416 */
1417 int
1418 dma_resv_kqfilter(const struct dma_resv *robj,
1419 struct knote *kn, struct dma_resv_poll *rpoll)
1420 {
1421
1422 return EINVAL;
1423 }
1424