linux_dma_resv.c revision 1.15 1 /* $NetBSD: linux_dma_resv.c,v 1.15 2021/12/19 12:26:30 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_dma_resv.c,v 1.15 2021/12/19 12:26:30 riastradh Exp $");
34
35 #include <sys/param.h>
36 #include <sys/poll.h>
37 #include <sys/select.h>
38
39 #include <linux/dma-fence.h>
40 #include <linux/dma-resv.h>
41 #include <linux/seqlock.h>
42 #include <linux/ww_mutex.h>
43
44 DEFINE_WW_CLASS(reservation_ww_class __cacheline_aligned);
45
46 static struct dma_resv_list *
47 objlist_tryalloc(uint32_t n)
48 {
49 struct dma_resv_list *list;
50
51 list = kmem_alloc(offsetof(typeof(*list), shared[n]), KM_NOSLEEP);
52 if (list == NULL)
53 return NULL;
54 list->shared_max = n;
55
56 return list;
57 }
58
59 static void
60 objlist_free(struct dma_resv_list *list)
61 {
62 uint32_t n = list->shared_max;
63
64 kmem_free(list, offsetof(typeof(*list), shared[n]));
65 }
66
67 static void
68 objlist_free_cb(struct rcu_head *rcu)
69 {
70 struct dma_resv_list *list = container_of(rcu,
71 struct dma_resv_list, rol_rcu);
72
73 objlist_free(list);
74 }
75
76 static void
77 objlist_defer_free(struct dma_resv_list *list)
78 {
79
80 call_rcu(&list->rol_rcu, objlist_free_cb);
81 }
82
83 /*
84 * dma_resv_init(robj)
85 *
86 * Initialize a reservation object. Caller must later destroy it
87 * with dma_resv_fini.
88 */
89 void
90 dma_resv_init(struct dma_resv *robj)
91 {
92
93 ww_mutex_init(&robj->lock, &reservation_ww_class);
94 seqcount_init(&robj->seq);
95 robj->fence_excl = NULL;
96 robj->fence = NULL;
97 robj->robj_prealloc = NULL;
98 }
99
100 /*
101 * dma_resv_fini(robj)
102 *
103 * Destroy a reservation object, freeing any memory that had been
104 * allocated for it. Caller must have exclusive access to it.
105 */
106 void
107 dma_resv_fini(struct dma_resv *robj)
108 {
109 unsigned i;
110
111 if (robj->robj_prealloc) {
112 objlist_free(robj->robj_prealloc);
113 robj->robj_prealloc = NULL; /* paranoia */
114 }
115 if (robj->fence) {
116 for (i = 0; i < robj->fence->shared_count; i++) {
117 dma_fence_put(robj->fence->shared[i]);
118 robj->fence->shared[i] = NULL; /* paranoia */
119 }
120 objlist_free(robj->fence);
121 robj->fence = NULL; /* paranoia */
122 }
123 if (robj->fence_excl) {
124 dma_fence_put(robj->fence_excl);
125 robj->fence_excl = NULL; /* paranoia */
126 }
127 ww_mutex_destroy(&robj->lock);
128 }
129
130 /*
131 * dma_resv_lock(robj, ctx)
132 *
133 * Acquire a reservation object's lock. Return 0 on success,
134 * -EALREADY if caller already holds it, -EDEADLK if a
135 * higher-priority owner holds it and the caller must back out and
136 * retry.
137 */
138 int
139 dma_resv_lock(struct dma_resv *robj,
140 struct ww_acquire_ctx *ctx)
141 {
142
143 return ww_mutex_lock(&robj->lock, ctx);
144 }
145
146 /*
147 * dma_resv_lock_slow(robj, ctx)
148 *
149 * Acquire a reservation object's lock. Caller must not hold
150 * this lock or any others -- this is to be used in slow paths
151 * after dma_resv_lock or dma_resv_lock_interruptible has failed
152 * and the caller has backed out all other locks.
153 */
154 void
155 dma_resv_lock_slow(struct dma_resv *robj,
156 struct ww_acquire_ctx *ctx)
157 {
158
159 ww_mutex_lock_slow(&robj->lock, ctx);
160 }
161
162 /*
163 * dma_resv_lock_interruptible(robj, ctx)
164 *
165 * Acquire a reservation object's lock. Return 0 on success,
166 * -EALREADY if caller already holds it, -EDEADLK if a
167 * higher-priority owner holds it and the caller must back out and
168 * retry, -ERESTART/-EINTR if interrupted.
169 */
170 int
171 dma_resv_lock_interruptible(struct dma_resv *robj,
172 struct ww_acquire_ctx *ctx)
173 {
174
175 return ww_mutex_lock_interruptible(&robj->lock, ctx);
176 }
177
178 /*
179 * dma_resv_lock_slow_interruptible(robj, ctx)
180 *
181 * Acquire a reservation object's lock. Caller must not hold
182 * this lock or any others -- this is to be used in slow paths
183 * after dma_resv_lock or dma_resv_lock_interruptible has failed
184 * and the caller has backed out all other locks. Return 0 on
185 * success, -ERESTART/-EINTR if interrupted.
186 */
187 int
188 dma_resv_lock_slow_interruptible(struct dma_resv *robj,
189 struct ww_acquire_ctx *ctx)
190 {
191
192 return ww_mutex_lock_slow_interruptible(&robj->lock, ctx);
193 }
194
195 /*
196 * dma_resv_trylock(robj)
197 *
198 * Try to acquire a reservation object's lock without blocking.
199 * Return true on success, false on failure.
200 */
201 bool
202 dma_resv_trylock(struct dma_resv *robj)
203 {
204
205 return ww_mutex_trylock(&robj->lock);
206 }
207
208 /*
209 * dma_resv_locking_ctx(robj)
210 *
211 * Return a pointer to the ww_acquire_ctx used by the owner of
212 * the reservation object's lock, or NULL if it is either not
213 * owned or if it is locked without context.
214 */
215 struct ww_acquire_ctx *
216 dma_resv_locking_ctx(struct dma_resv *robj)
217 {
218
219 return ww_mutex_locking_ctx(&robj->lock);
220 }
221
222 /*
223 * dma_resv_unlock(robj)
224 *
225 * Release a reservation object's lock.
226 */
227 void
228 dma_resv_unlock(struct dma_resv *robj)
229 {
230
231 return ww_mutex_unlock(&robj->lock);
232 }
233
234 /*
235 * dma_resv_is_locked(robj)
236 *
237 * True if robj is locked.
238 */
239 bool
240 dma_resv_is_locked(struct dma_resv *robj)
241 {
242
243 return ww_mutex_is_locked(&robj->lock);
244 }
245
246 /*
247 * dma_resv_held(robj)
248 *
249 * True if robj is locked.
250 */
251 bool
252 dma_resv_held(struct dma_resv *robj)
253 {
254
255 return ww_mutex_is_locked(&robj->lock);
256 }
257
258 /*
259 * dma_resv_assert_held(robj)
260 *
261 * Panic if robj is not held, in DIAGNOSTIC builds.
262 */
263 void
264 dma_resv_assert_held(struct dma_resv *robj)
265 {
266
267 KASSERT(dma_resv_held(robj));
268 }
269
270 /*
271 * dma_resv_get_excl(robj)
272 *
273 * Return a pointer to the exclusive fence of the reservation
274 * object robj.
275 *
276 * Caller must have robj locked.
277 */
278 struct dma_fence *
279 dma_resv_get_excl(struct dma_resv *robj)
280 {
281
282 KASSERT(dma_resv_held(robj));
283 return robj->fence_excl;
284 }
285
286 /*
287 * dma_resv_get_list(robj)
288 *
289 * Return a pointer to the shared fence list of the reservation
290 * object robj.
291 *
292 * Caller must have robj locked.
293 */
294 struct dma_resv_list *
295 dma_resv_get_list(struct dma_resv *robj)
296 {
297
298 KASSERT(dma_resv_held(robj));
299 return robj->fence;
300 }
301
302 /*
303 * dma_resv_reserve_shared(robj)
304 *
305 * Reserve space in robj to add a shared fence. To be used only
306 * once before calling dma_resv_add_shared_fence.
307 *
308 * Caller must have robj locked.
309 *
310 * Internally, we start with room for four entries and double if
311 * we don't have enough. This is not guaranteed.
312 */
313 int
314 dma_resv_reserve_shared(struct dma_resv *robj, unsigned int num_fences)
315 {
316 struct dma_resv_list *list, *prealloc;
317 uint32_t n, nalloc;
318
319 KASSERT(dma_resv_held(robj));
320 KASSERT(num_fences == 1);
321
322 list = robj->fence;
323 prealloc = robj->robj_prealloc;
324
325 /* If there's an existing list, check it for space. */
326 if (list) {
327 /* If there's too many already, give up. */
328 if (list->shared_count == UINT32_MAX)
329 return -ENOMEM;
330
331 /* Add one more. */
332 n = list->shared_count + 1;
333
334 /* If there's enough for one more, we're done. */
335 if (n <= list->shared_max)
336 return 0;
337 } else {
338 /* No list already. We need space for 1. */
339 n = 1;
340 }
341
342 /* If not, maybe there's a preallocated list ready. */
343 if (prealloc != NULL) {
344 /* If there's enough room in it, stop here. */
345 if (n <= prealloc->shared_max)
346 return 0;
347
348 /* Try to double its capacity. */
349 nalloc = n > UINT32_MAX/2 ? UINT32_MAX : 2*n;
350 prealloc = objlist_tryalloc(nalloc);
351 if (prealloc == NULL)
352 return -ENOMEM;
353
354 /* Swap the new preallocated list and free the old one. */
355 objlist_free(robj->robj_prealloc);
356 robj->robj_prealloc = prealloc;
357 } else {
358 /* Start with some spare. */
359 nalloc = n > UINT32_MAX/2 ? UINT32_MAX : MAX(2*n, 4);
360 prealloc = objlist_tryalloc(nalloc);
361 if (prealloc == NULL)
362 return -ENOMEM;
363 /* Save the new preallocated list. */
364 robj->robj_prealloc = prealloc;
365 }
366
367 /* Success! */
368 return 0;
369 }
370
371 struct dma_resv_write_ticket {
372 };
373
374 /*
375 * dma_resv_write_begin(robj, ticket)
376 *
377 * Begin an atomic batch of writes to robj, and initialize opaque
378 * ticket for it. The ticket must be passed to
379 * dma_resv_write_commit to commit the writes.
380 *
381 * Caller must have robj locked.
382 *
383 * Implies membar_producer, i.e. store-before-store barrier. Does
384 * NOT serve as an acquire operation, however.
385 */
386 static void
387 dma_resv_write_begin(struct dma_resv *robj,
388 struct dma_resv_write_ticket *ticket)
389 {
390
391 KASSERT(dma_resv_held(robj));
392
393 write_seqcount_begin(&robj->seq);
394 }
395
396 /*
397 * dma_resv_write_commit(robj, ticket)
398 *
399 * Commit an atomic batch of writes to robj begun with the call to
400 * dma_resv_write_begin that returned ticket.
401 *
402 * Caller must have robj locked.
403 *
404 * Implies membar_producer, i.e. store-before-store barrier. Does
405 * NOT serve as a release operation, however.
406 */
407 static void
408 dma_resv_write_commit(struct dma_resv *robj,
409 struct dma_resv_write_ticket *ticket)
410 {
411
412 KASSERT(dma_resv_held(robj));
413
414 write_seqcount_end(&robj->seq);
415 }
416
417 struct dma_resv_read_ticket {
418 unsigned version;
419 };
420
421 /*
422 * dma_resv_read_begin(robj, ticket)
423 *
424 * Begin a read section, and initialize opaque ticket for it. The
425 * ticket must be passed to dma_resv_read_exit, and the
426 * caller must be prepared to retry reading if it fails.
427 */
428 static void
429 dma_resv_read_begin(const struct dma_resv *robj,
430 struct dma_resv_read_ticket *ticket)
431 {
432
433 ticket->version = read_seqcount_begin(&robj->seq);
434 }
435
436 /*
437 * dma_resv_read_valid(robj, ticket)
438 *
439 * Test whether the read sections are valid. Return true on
440 * success, or false on failure if the read ticket has been
441 * invalidated.
442 */
443 static bool
444 dma_resv_read_valid(const struct dma_resv *robj,
445 struct dma_resv_read_ticket *ticket)
446 {
447
448 return !read_seqcount_retry(&robj->seq, ticket->version);
449 }
450
451 /*
452 * dma_resv_get_shared_reader(robj, listp, shared_countp, ticket)
453 *
454 * Set *listp and *shared_countp to a snapshot of the pointer to
455 * and length of the shared fence list of robj and return true, or
456 * set them to NULL/0 and return false if a writer intervened so
457 * the caller must start over.
458 *
459 * Both *listp and *shared_countp are unconditionally initialized
460 * on return. They may be NULL/0 even on success, if there is no
461 * shared list at the moment. Does not take any fence references.
462 */
463 static bool
464 dma_resv_get_shared_reader(const struct dma_resv *robj,
465 const struct dma_resv_list **listp, unsigned *shared_countp,
466 struct dma_resv_read_ticket *ticket)
467 {
468 struct dma_resv_list *list;
469 unsigned shared_count = 0;
470
471 /*
472 * Get the list and, if it is present, its length. If the list
473 * is present, it has a valid length. The atomic_load_consume
474 * pairs with the membar_producer in dma_resv_write_begin.
475 */
476 list = atomic_load_consume(&robj->fence);
477 shared_count = list ? atomic_load_relaxed(&list->shared_count) : 0;
478
479 /*
480 * We are done reading from robj and list. Validate our
481 * parking ticket. If it's invalid, do not pass go and do not
482 * collect $200.
483 */
484 if (!dma_resv_read_valid(robj, ticket))
485 goto fail;
486
487 /* Success! */
488 *listp = list;
489 *shared_countp = shared_count;
490 return true;
491
492 fail: *listp = NULL;
493 *shared_countp = 0;
494 return false;
495 }
496
497 /*
498 * dma_resv_get_excl_reader(robj, fencep, ticket)
499 *
500 * Set *fencep to the exclusive fence of robj and return true, or
501 * set it to NULL and return false if either
502 * (a) a writer intervened, or
503 * (b) the fence is scheduled to be destroyed after this RCU grace
504 * period,
505 * in either case meaning the caller must restart.
506 *
507 * The value of *fencep is unconditionally initialized on return.
508 * It may be NULL, if there is no exclusive fence at the moment.
509 * If nonnull, *fencep is referenced; caller must dma_fence_put.
510 */
511 static bool
512 dma_resv_get_excl_reader(const struct dma_resv *robj,
513 struct dma_fence **fencep,
514 struct dma_resv_read_ticket *ticket)
515 {
516 struct dma_fence *fence;
517
518 /*
519 * Get the candidate fence pointer. The atomic_load_consume
520 * pairs with the membar_consumer in dma_resv_write_begin.
521 */
522 fence = atomic_load_consume(&robj->fence_excl);
523
524 /*
525 * The load of robj->fence_excl is atomic, but the caller may
526 * have previously loaded the shared fence list and should
527 * restart if its view of the entire dma_resv object is not a
528 * consistent snapshot.
529 */
530 if (!dma_resv_read_valid(robj, ticket))
531 goto fail;
532
533 /*
534 * If the fence is already scheduled to away after this RCU
535 * read section, give up. Otherwise, take a reference so it
536 * won't go away until after dma_fence_put.
537 */
538 if (fence != NULL &&
539 (fence = dma_fence_get_rcu(fence)) == NULL)
540 goto fail;
541
542 /* Success! */
543 *fencep = fence;
544 return true;
545
546 fail: *fencep = NULL;
547 return false;
548 }
549
550 /*
551 * dma_resv_add_excl_fence(robj, fence)
552 *
553 * Empty and release all of robj's shared fences, and clear and
554 * release its exclusive fence. If fence is nonnull, acquire a
555 * reference to it and save it as robj's exclusive fence.
556 *
557 * Caller must have robj locked.
558 */
559 void
560 dma_resv_add_excl_fence(struct dma_resv *robj,
561 struct dma_fence *fence)
562 {
563 struct dma_fence *old_fence = robj->fence_excl;
564 struct dma_resv_list *old_list = robj->fence;
565 uint32_t old_shared_count;
566 struct dma_resv_write_ticket ticket;
567
568 KASSERT(dma_resv_held(robj));
569
570 /*
571 * If we are setting rather than just removing a fence, acquire
572 * a reference for ourselves.
573 */
574 if (fence)
575 (void)dma_fence_get(fence);
576
577 /* If there are any shared fences, remember how many. */
578 if (old_list)
579 old_shared_count = old_list->shared_count;
580
581 /* Begin an update. Implies membar_producer for fence. */
582 dma_resv_write_begin(robj, &ticket);
583
584 /* Replace the fence and zero the shared count. */
585 atomic_store_relaxed(&robj->fence_excl, fence);
586 if (old_list)
587 old_list->shared_count = 0;
588
589 /* Commit the update. */
590 dma_resv_write_commit(robj, &ticket);
591
592 /* Release the old exclusive fence, if any. */
593 if (old_fence) {
594 dma_fence_put(old_fence);
595 old_fence = NULL; /* paranoia */
596 }
597
598 /* Release any old shared fences. */
599 if (old_list) {
600 while (old_shared_count--) {
601 dma_fence_put(old_list->shared[old_shared_count]);
602 /* paranoia */
603 old_list->shared[old_shared_count] = NULL;
604 }
605 }
606 }
607
608 /*
609 * dma_resv_add_shared_fence(robj, fence)
610 *
611 * Acquire a reference to fence and add it to robj's shared list.
612 * If any fence was already added with the same context number,
613 * release it and replace it by this one.
614 *
615 * Caller must have robj locked, and must have preceded with a
616 * call to dma_resv_reserve_shared for each shared fence
617 * added.
618 */
619 void
620 dma_resv_add_shared_fence(struct dma_resv *robj,
621 struct dma_fence *fence)
622 {
623 struct dma_resv_list *list = robj->fence;
624 struct dma_resv_list *prealloc = robj->robj_prealloc;
625 struct dma_resv_write_ticket ticket;
626 struct dma_fence *replace = NULL;
627 uint32_t i;
628
629 KASSERT(dma_resv_held(robj));
630
631 /* Acquire a reference to the fence. */
632 KASSERT(fence != NULL);
633 (void)dma_fence_get(fence);
634
635 /* Check for a preallocated replacement list. */
636 if (prealloc == NULL) {
637 /*
638 * If there is no preallocated replacement list, then
639 * there must be room in the current list.
640 */
641 KASSERT(list != NULL);
642 KASSERT(list->shared_count < list->shared_max);
643
644 /* Begin an update. Implies membar_producer for fence. */
645 dma_resv_write_begin(robj, &ticket);
646
647 /* Find a fence with the same context number. */
648 for (i = 0; i < list->shared_count; i++) {
649 if (list->shared[i]->context == fence->context) {
650 replace = list->shared[i];
651 atomic_store_relaxed(&list->shared[i], fence);
652 break;
653 }
654 }
655
656 /* If we didn't find one, add it at the end. */
657 if (i == list->shared_count) {
658 atomic_store_relaxed(&list->shared[list->shared_count],
659 fence);
660 atomic_store_relaxed(&list->shared_count,
661 list->shared_count + 1);
662 }
663
664 /* Commit the update. */
665 dma_resv_write_commit(robj, &ticket);
666 } else {
667 /*
668 * There is a preallocated replacement list. There may
669 * not be a current list. If not, treat it as a zero-
670 * length list.
671 */
672 uint32_t shared_count = (list == NULL? 0 : list->shared_count);
673
674 /* There had better be room in the preallocated list. */
675 KASSERT(shared_count < prealloc->shared_max);
676
677 /*
678 * Copy the fences over, but replace if we find one
679 * with the same context number.
680 */
681 for (i = 0; i < shared_count; i++) {
682 if (replace == NULL &&
683 list->shared[i]->context == fence->context) {
684 replace = list->shared[i];
685 prealloc->shared[i] = fence;
686 } else {
687 prealloc->shared[i] = list->shared[i];
688 }
689 }
690 prealloc->shared_count = shared_count;
691
692 /* If we didn't find one, add it at the end. */
693 if (replace == NULL)
694 prealloc->shared[prealloc->shared_count++] = fence;
695
696 /*
697 * Now ready to replace the list. Begin an update.
698 * Implies membar_producer for fence and prealloc.
699 */
700 dma_resv_write_begin(robj, &ticket);
701
702 /* Replace the list. */
703 atomic_store_relaxed(&robj->fence, prealloc);
704 robj->robj_prealloc = NULL;
705
706 /* Commit the update. */
707 dma_resv_write_commit(robj, &ticket);
708
709 /*
710 * If there is an old list, free it when convenient.
711 * (We are not in a position at this point to sleep
712 * waiting for activity on all CPUs.)
713 */
714 if (list)
715 objlist_defer_free(list);
716 }
717
718 /* Release a fence if we replaced it. */
719 if (replace) {
720 dma_fence_put(replace);
721 replace = NULL; /* paranoia */
722 }
723 }
724
725 /*
726 * dma_resv_get_excl_rcu(robj)
727 *
728 * Note: Caller need not call this from an RCU read section.
729 */
730 struct dma_fence *
731 dma_resv_get_excl_rcu(const struct dma_resv *robj)
732 {
733 struct dma_fence *fence;
734
735 rcu_read_lock();
736 fence = dma_fence_get_rcu_safe(&robj->fence_excl);
737 rcu_read_unlock();
738
739 return fence;
740 }
741
742 /*
743 * dma_resv_get_fences_rcu(robj, fencep, nsharedp, sharedp)
744 *
745 * Get a snapshot of the exclusive and shared fences of robj. The
746 * shared fences are returned as a pointer *sharedp to an array,
747 * to be freed by the caller with kfree, of *nsharedp elements.
748 *
749 * Returns zero on success, negative (Linux-style) error code on
750 * failure. On failure, *fencep, *nsharedp, and *sharedp are
751 * untouched.
752 */
753 int
754 dma_resv_get_fences_rcu(const struct dma_resv *robj,
755 struct dma_fence **fencep, unsigned *nsharedp, struct dma_fence ***sharedp)
756 {
757 const struct dma_resv_list *list = NULL;
758 struct dma_fence *fence = NULL;
759 struct dma_fence **shared = NULL;
760 unsigned shared_alloc, shared_count, i;
761 struct dma_resv_read_ticket ticket;
762
763 top: KASSERT(fence == NULL);
764
765 /* Enter an RCU read section and get a read ticket. */
766 rcu_read_lock();
767 dma_resv_read_begin(robj, &ticket);
768
769 /* If there is a shared list, grab it. */
770 if (!dma_resv_get_shared_reader(robj, &list, &shared_count, &ticket))
771 goto restart;
772 if (list != NULL) {
773
774 /* Check whether we have a buffer. */
775 if (shared == NULL) {
776 /*
777 * We don't have a buffer yet. Try to allocate
778 * one without waiting.
779 */
780 shared_alloc = list->shared_max;
781 shared = kcalloc(shared_alloc, sizeof(shared[0]),
782 GFP_NOWAIT);
783 if (shared == NULL) {
784 /*
785 * Couldn't do it immediately. Back
786 * out of RCU and allocate one with
787 * waiting.
788 */
789 rcu_read_unlock();
790 shared = kcalloc(shared_alloc,
791 sizeof(shared[0]), GFP_KERNEL);
792 if (shared == NULL)
793 return -ENOMEM;
794 goto top;
795 }
796 } else if (shared_alloc < list->shared_max) {
797 /*
798 * We have a buffer but it's too small. We're
799 * already racing in this case, so just back
800 * out and wait to allocate a bigger one.
801 */
802 shared_alloc = list->shared_max;
803 rcu_read_unlock();
804 kfree(shared);
805 shared = kcalloc(shared_alloc, sizeof(shared[0]),
806 GFP_KERNEL);
807 if (shared == NULL)
808 return -ENOMEM;
809 }
810
811 /*
812 * We got a buffer large enough. Copy into the buffer
813 * and record the number of elements. Could safely use
814 * memcpy here, because even if we race with a writer
815 * it'll invalidate the read ticket and we'll start
816 * over, but atomic_load in a loop will pacify kcsan.
817 */
818 for (i = 0; i < shared_count; i++)
819 shared[i] = atomic_load_relaxed(&list->shared[i]);
820
821 /* If anything changed while we were copying, restart. */
822 if (!dma_resv_read_valid(robj, &ticket))
823 goto restart;
824 }
825
826 /* If there is an exclusive fence, grab it. */
827 KASSERT(fence == NULL);
828 if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
829 goto restart;
830
831 /*
832 * Try to get a reference to all of the shared fences.
833 */
834 for (i = 0; i < shared_count; i++) {
835 if (dma_fence_get_rcu(atomic_load_relaxed(&shared[i])) == NULL)
836 goto put_restart;
837 }
838
839 /* Success! */
840 rcu_read_unlock();
841 *fencep = fence;
842 *nsharedp = shared_count;
843 *sharedp = shared;
844 return 0;
845
846 put_restart:
847 /* Back out. */
848 while (i --> 0) {
849 dma_fence_put(shared[i]);
850 shared[i] = NULL; /* paranoia */
851 }
852 if (fence) {
853 dma_fence_put(fence);
854 fence = NULL;
855 }
856
857 restart:
858 KASSERT(fence == NULL);
859 rcu_read_unlock();
860 goto top;
861 }
862
863 /*
864 * dma_resv_copy_fences(dst, src)
865 *
866 * Copy the exclusive fence and all the shared fences from src to
867 * dst.
868 *
869 * Caller must have dst locked.
870 */
871 int
872 dma_resv_copy_fences(struct dma_resv *dst_robj,
873 const struct dma_resv *src_robj)
874 {
875 const struct dma_resv_list *src_list;
876 struct dma_resv_list *dst_list = NULL;
877 struct dma_resv_list *old_list;
878 struct dma_fence *fence = NULL;
879 struct dma_fence *old_fence;
880 uint32_t shared_count, i;
881 struct dma_resv_read_ticket read_ticket;
882 struct dma_resv_write_ticket write_ticket;
883
884 KASSERT(dma_resv_held(dst_robj));
885
886 top: KASSERT(fence == NULL);
887
888 /* Enter an RCU read section and get a read ticket. */
889 rcu_read_lock();
890 dma_resv_read_begin(src_robj, &read_ticket);
891
892 /* Get the shared list. */
893 if (!dma_resv_get_shared_reader(src_robj, &src_list, &shared_count,
894 &read_ticket))
895 goto restart;
896 if (src_list != NULL) {
897 /* Allocate a new list. */
898 dst_list = objlist_tryalloc(shared_count);
899 if (dst_list == NULL)
900 return -ENOMEM;
901
902 /* Copy over all fences that are not yet signalled. */
903 dst_list->shared_count = 0;
904 for (i = 0; i < shared_count; i++) {
905 KASSERT(fence == NULL);
906 fence = atomic_load_relaxed(&src_list->shared[i]);
907 if ((fence = dma_fence_get_rcu(fence)) == NULL)
908 goto restart;
909 if (dma_fence_is_signaled(fence)) {
910 dma_fence_put(fence);
911 fence = NULL;
912 continue;
913 }
914 dst_list->shared[dst_list->shared_count++] = fence;
915 fence = NULL;
916 }
917
918 /* If anything changed while we were copying, restart. */
919 if (!dma_resv_read_valid(src_robj, &read_ticket))
920 goto restart;
921 }
922
923 /* Get the exclusive fence. */
924 KASSERT(fence == NULL);
925 if (!dma_resv_get_excl_reader(src_robj, &fence, &read_ticket))
926 goto restart;
927
928 /* All done with src; exit the RCU read section. */
929 rcu_read_unlock();
930
931 /*
932 * We now have a snapshot of the shared and exclusive fences of
933 * src_robj and we have acquired references to them so they
934 * won't go away. Transfer them over to dst_robj, releasing
935 * references to any that were there.
936 */
937
938 /* Get the old shared and exclusive fences, if any. */
939 old_list = dst_robj->fence;
940 old_fence = dst_robj->fence_excl;
941
942 /*
943 * Begin an update. Implies membar_producer for dst_list and
944 * fence.
945 */
946 dma_resv_write_begin(dst_robj, &write_ticket);
947
948 /* Replace the fences. */
949 atomic_store_relaxed(&dst_robj->fence, dst_list);
950 atomic_store_relaxed(&dst_robj->fence_excl, fence);
951
952 /* Commit the update. */
953 dma_resv_write_commit(dst_robj, &write_ticket);
954
955 /* Release the old exclusive fence, if any. */
956 if (old_fence) {
957 dma_fence_put(old_fence);
958 old_fence = NULL; /* paranoia */
959 }
960
961 /* Release any old shared fences. */
962 if (old_list) {
963 for (i = old_list->shared_count; i --> 0;) {
964 dma_fence_put(old_list->shared[i]);
965 old_list->shared[i] = NULL; /* paranoia */
966 }
967 objlist_free(old_list);
968 old_list = NULL; /* paranoia */
969 }
970
971 /* Success! */
972 return 0;
973
974 restart:
975 KASSERT(fence == NULL);
976 rcu_read_unlock();
977 if (dst_list) {
978 for (i = dst_list->shared_count; i --> 0;) {
979 dma_fence_put(dst_list->shared[i]);
980 dst_list->shared[i] = NULL; /* paranoia */
981 }
982 objlist_free(dst_list);
983 dst_list = NULL;
984 }
985 goto top;
986 }
987
988 /*
989 * dma_resv_test_signaled_rcu(robj, shared)
990 *
991 * If shared is true, test whether all of the shared fences are
992 * signalled, or if there are none, test whether the exclusive
993 * fence is signalled. If shared is false, test only whether the
994 * exclusive fence is signalled.
995 *
996 * XXX Why does this _not_ test the exclusive fence if shared is
997 * true only if there are no shared fences? This makes no sense.
998 */
999 bool
1000 dma_resv_test_signaled_rcu(const struct dma_resv *robj,
1001 bool shared)
1002 {
1003 struct dma_resv_read_ticket ticket;
1004 const struct dma_resv_list *list;
1005 struct dma_fence *fence = NULL;
1006 uint32_t i, shared_count;
1007 bool signaled = true;
1008
1009 top: KASSERT(fence == NULL);
1010
1011 /* Enter an RCU read section and get a read ticket. */
1012 rcu_read_lock();
1013 dma_resv_read_begin(robj, &ticket);
1014
1015 /* If shared is requested and there is a shared list, test it. */
1016 if (shared) {
1017 if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
1018 &ticket))
1019 goto restart;
1020 } else {
1021 list = NULL;
1022 shared_count = 0;
1023 }
1024 if (list != NULL) {
1025 /*
1026 * For each fence, if it is going away, restart.
1027 * Otherwise, acquire a reference to it to test whether
1028 * it is signalled. Stop if we find any that is not
1029 * signalled.
1030 */
1031 for (i = 0; i < shared_count; i++) {
1032 KASSERT(fence == NULL);
1033 fence = atomic_load_relaxed(&list->shared[i]);
1034 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1035 goto restart;
1036 signaled &= dma_fence_is_signaled(fence);
1037 dma_fence_put(fence);
1038 fence = NULL;
1039 if (!signaled)
1040 goto out;
1041 }
1042
1043 /* If anything changed while we were testing, restart. */
1044 if (!dma_resv_read_valid(robj, &ticket))
1045 goto restart;
1046 }
1047 if (shared_count)
1048 goto out;
1049
1050 /* If there is an exclusive fence, test it. */
1051 KASSERT(fence == NULL);
1052 if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
1053 goto restart;
1054 if (fence != NULL) {
1055 /* Test whether it is signalled. If no, stop. */
1056 signaled &= dma_fence_is_signaled(fence);
1057 dma_fence_put(fence);
1058 fence = NULL;
1059 if (!signaled)
1060 goto out;
1061 }
1062
1063 out: KASSERT(fence == NULL);
1064 rcu_read_unlock();
1065 return signaled;
1066
1067 restart:
1068 KASSERT(fence == NULL);
1069 rcu_read_unlock();
1070 goto top;
1071 }
1072
1073 /*
1074 * dma_resv_wait_timeout_rcu(robj, shared, intr, timeout)
1075 *
1076 * If shared is true, wait for all of the shared fences to be
1077 * signalled, or if there are none, wait for the exclusive fence
1078 * to be signalled. If shared is false, wait only for the
1079 * exclusive fence to be signalled. If timeout is zero, don't
1080 * wait, only test.
1081 *
1082 * XXX Why does this _not_ wait for the exclusive fence if shared
1083 * is true only if there are no shared fences? This makes no
1084 * sense.
1085 */
1086 long
1087 dma_resv_wait_timeout_rcu(const struct dma_resv *robj,
1088 bool shared, bool intr, unsigned long timeout)
1089 {
1090 struct dma_resv_read_ticket ticket;
1091 const struct dma_resv_list *list;
1092 struct dma_fence *fence = NULL;
1093 uint32_t i, shared_count;
1094 long ret;
1095
1096 if (timeout == 0)
1097 return dma_resv_test_signaled_rcu(robj, shared);
1098
1099 top: KASSERT(fence == NULL);
1100
1101 /* Enter an RCU read section and get a read ticket. */
1102 rcu_read_lock();
1103 dma_resv_read_begin(robj, &ticket);
1104
1105 /* If shared is requested and there is a shared list, wait on it. */
1106 if (shared) {
1107 if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
1108 &ticket))
1109 goto restart;
1110 } else {
1111 list = NULL;
1112 shared_count = 0;
1113 }
1114 if (list != NULL) {
1115 /*
1116 * For each fence, if it is going away, restart.
1117 * Otherwise, acquire a reference to it to test whether
1118 * it is signalled. Stop and wait if we find any that
1119 * is not signalled.
1120 */
1121 for (i = 0; i < shared_count; i++) {
1122 KASSERT(fence == NULL);
1123 fence = atomic_load_relaxed(&list->shared[i]);
1124 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1125 goto restart;
1126 if (!dma_fence_is_signaled(fence))
1127 goto wait;
1128 dma_fence_put(fence);
1129 fence = NULL;
1130 }
1131
1132 /* If anything changed while we were testing, restart. */
1133 if (!dma_resv_read_valid(robj, &ticket))
1134 goto restart;
1135 }
1136 if (shared_count)
1137 goto out;
1138
1139 /* If there is an exclusive fence, test it. */
1140 KASSERT(fence == NULL);
1141 if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
1142 goto restart;
1143 if (fence != NULL) {
1144 /* Test whether it is signalled. If no, wait. */
1145 if (!dma_fence_is_signaled(fence))
1146 goto wait;
1147 dma_fence_put(fence);
1148 fence = NULL;
1149 }
1150
1151 out: /* Success! Return the number of ticks left. */
1152 rcu_read_unlock();
1153 KASSERT(fence == NULL);
1154 return timeout;
1155
1156 restart:
1157 KASSERT(fence == NULL);
1158 rcu_read_unlock();
1159 goto top;
1160
1161 wait:
1162 /*
1163 * Exit the RCU read section, wait for it, and release the
1164 * fence when we're done. If we time out or fail, bail.
1165 * Otherwise, go back to the top.
1166 */
1167 KASSERT(fence != NULL);
1168 rcu_read_unlock();
1169 ret = dma_fence_wait_timeout(fence, intr, timeout);
1170 dma_fence_put(fence);
1171 fence = NULL;
1172 if (ret <= 0)
1173 return ret;
1174 KASSERT(ret <= timeout);
1175 timeout = ret;
1176 goto top;
1177 }
1178
1179 /*
1180 * dma_resv_poll_init(rpoll, lock)
1181 *
1182 * Initialize reservation poll state.
1183 */
1184 void
1185 dma_resv_poll_init(struct dma_resv_poll *rpoll)
1186 {
1187
1188 mutex_init(&rpoll->rp_lock, MUTEX_DEFAULT, IPL_VM);
1189 selinit(&rpoll->rp_selq);
1190 rpoll->rp_claimed = 0;
1191 }
1192
1193 /*
1194 * dma_resv_poll_fini(rpoll)
1195 *
1196 * Release any resource associated with reservation poll state.
1197 */
1198 void
1199 dma_resv_poll_fini(struct dma_resv_poll *rpoll)
1200 {
1201
1202 KASSERT(rpoll->rp_claimed == 0);
1203 seldestroy(&rpoll->rp_selq);
1204 mutex_destroy(&rpoll->rp_lock);
1205 }
1206
1207 /*
1208 * dma_resv_poll_cb(fence, fcb)
1209 *
1210 * Callback to notify a reservation poll that a fence has
1211 * completed. Notify any waiters and allow the next poller to
1212 * claim the callback.
1213 *
1214 * If one thread is waiting for the exclusive fence only, and we
1215 * spuriously notify them about a shared fence, tough.
1216 */
1217 static void
1218 dma_resv_poll_cb(struct dma_fence *fence, struct dma_fence_cb *fcb)
1219 {
1220 struct dma_resv_poll *rpoll = container_of(fcb,
1221 struct dma_resv_poll, rp_fcb);
1222
1223 mutex_enter(&rpoll->rp_lock);
1224 selnotify(&rpoll->rp_selq, 0, NOTE_SUBMIT);
1225 rpoll->rp_claimed = 0;
1226 mutex_exit(&rpoll->rp_lock);
1227 }
1228
1229 /*
1230 * dma_resv_do_poll(robj, events, rpoll)
1231 *
1232 * Poll for reservation object events using the reservation poll
1233 * state in rpoll:
1234 *
1235 * - POLLOUT wait for all fences shared and exclusive
1236 * - POLLIN wait for the exclusive fence
1237 *
1238 * Return the subset of events in events that are ready. If any
1239 * are requested but not ready, arrange to be notified with
1240 * selnotify when they are.
1241 */
1242 int
1243 dma_resv_do_poll(const struct dma_resv *robj, int events,
1244 struct dma_resv_poll *rpoll)
1245 {
1246 struct dma_resv_read_ticket ticket;
1247 const struct dma_resv_list *list;
1248 struct dma_fence *fence = NULL;
1249 uint32_t i, shared_count;
1250 int revents;
1251 bool recorded = false; /* curlwp is on the selq */
1252 bool claimed = false; /* we claimed the callback */
1253 bool callback = false; /* we requested a callback */
1254
1255 /*
1256 * Start with the maximal set of events that could be ready.
1257 * We will eliminate the events that are definitely not ready
1258 * as we go at the same time as we add callbacks to notify us
1259 * that they may be ready.
1260 */
1261 revents = events & (POLLIN|POLLOUT);
1262 if (revents == 0)
1263 return 0;
1264
1265 top: KASSERT(fence == NULL);
1266
1267 /* Enter an RCU read section and get a read ticket. */
1268 rcu_read_lock();
1269 dma_resv_read_begin(robj, &ticket);
1270
1271 /* If we want to wait for all fences, get the shared list. */
1272 if (events & POLLOUT) {
1273 if (!dma_resv_get_shared_reader(robj, &list, &shared_count,
1274 &ticket))
1275 goto restart;
1276 } else {
1277 list = NULL;
1278 shared_count = 0;
1279 }
1280 if (list != NULL) do {
1281 /*
1282 * For each fence, if it is going away, restart.
1283 * Otherwise, acquire a reference to it to test whether
1284 * it is signalled. Stop and request a callback if we
1285 * find any that is not signalled.
1286 */
1287 for (i = 0; i < shared_count; i++) {
1288 KASSERT(fence == NULL);
1289 fence = atomic_load_relaxed(&list->shared[i]);
1290 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1291 goto restart;
1292 if (!dma_fence_is_signaled(fence)) {
1293 dma_fence_put(fence);
1294 fence = NULL;
1295 break;
1296 }
1297 dma_fence_put(fence);
1298 fence = NULL;
1299 }
1300
1301 /* If all shared fences have been signalled, move on. */
1302 if (i == shared_count)
1303 break;
1304
1305 /* Put ourselves on the selq if we haven't already. */
1306 if (!recorded)
1307 goto record;
1308
1309 /*
1310 * If someone else claimed the callback, or we already
1311 * requested it, we're guaranteed to be notified, so
1312 * assume the event is not ready.
1313 */
1314 if (!claimed || callback) {
1315 revents &= ~POLLOUT;
1316 break;
1317 }
1318
1319 /*
1320 * Otherwise, find the first fence that is not
1321 * signalled, request the callback, and clear POLLOUT
1322 * from the possible ready events. If they are all
1323 * signalled, leave POLLOUT set; we will simulate the
1324 * callback later.
1325 */
1326 for (i = 0; i < shared_count; i++) {
1327 KASSERT(fence == NULL);
1328 fence = atomic_load_relaxed(&list->shared[i]);
1329 if ((fence = dma_fence_get_rcu(fence)) == NULL)
1330 goto restart;
1331 if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
1332 dma_resv_poll_cb)) {
1333 dma_fence_put(fence);
1334 fence = NULL;
1335 revents &= ~POLLOUT;
1336 callback = true;
1337 break;
1338 }
1339 dma_fence_put(fence);
1340 fence = NULL;
1341 }
1342 } while (0);
1343
1344 /* We always wait for at least the exclusive fence, so get it. */
1345 KASSERT(fence == NULL);
1346 if (!dma_resv_get_excl_reader(robj, &fence, &ticket))
1347 goto restart;
1348 if (fence != NULL) do {
1349 /*
1350 * Test whether it is signalled. If not, stop and
1351 * request a callback.
1352 */
1353 if (dma_fence_is_signaled(fence)) {
1354 dma_fence_put(fence);
1355 fence = NULL;
1356 break;
1357 }
1358
1359 /* Put ourselves on the selq if we haven't already. */
1360 if (!recorded) {
1361 dma_fence_put(fence);
1362 fence = NULL;
1363 goto record;
1364 }
1365
1366 /*
1367 * If someone else claimed the callback, or we already
1368 * requested it, we're guaranteed to be notified, so
1369 * assume the event is not ready.
1370 */
1371 if (!claimed || callback) {
1372 dma_fence_put(fence);
1373 fence = NULL;
1374 revents = 0;
1375 break;
1376 }
1377
1378 /*
1379 * Otherwise, try to request the callback, and clear
1380 * all possible ready events. If the fence has been
1381 * signalled in the interim, leave the events set; we
1382 * will simulate the callback later.
1383 */
1384 if (!dma_fence_add_callback(fence, &rpoll->rp_fcb,
1385 dma_resv_poll_cb)) {
1386 dma_fence_put(fence);
1387 fence = NULL;
1388 revents = 0;
1389 callback = true;
1390 break;
1391 }
1392 dma_fence_put(fence);
1393 fence = NULL;
1394 } while (0);
1395 KASSERT(fence == NULL);
1396
1397 /* All done reading the fences. */
1398 rcu_read_unlock();
1399
1400 if (claimed && !callback) {
1401 /*
1402 * We claimed the callback but we didn't actually
1403 * request it because a fence was signalled while we
1404 * were claiming it. Call it ourselves now. The
1405 * callback doesn't use the fence nor rely on holding
1406 * any of the fence locks, so this is safe.
1407 */
1408 dma_resv_poll_cb(NULL, &rpoll->rp_fcb);
1409 }
1410 return revents;
1411
1412 restart:
1413 KASSERT(fence == NULL);
1414 rcu_read_unlock();
1415 goto top;
1416
1417 record:
1418 KASSERT(fence == NULL);
1419 rcu_read_unlock();
1420 mutex_enter(&rpoll->rp_lock);
1421 selrecord(curlwp, &rpoll->rp_selq);
1422 if (!rpoll->rp_claimed)
1423 claimed = rpoll->rp_claimed = true;
1424 mutex_exit(&rpoll->rp_lock);
1425 recorded = true;
1426 goto top;
1427 }
1428
1429 /*
1430 * dma_resv_kqfilter(robj, kn, rpoll)
1431 *
1432 * Kqueue filter for reservation objects. Currently not
1433 * implemented because the logic to implement it is nontrivial,
1434 * and userland will presumably never use it, so it would be
1435 * dangerous to add never-tested complex code paths to the kernel.
1436 */
1437 int
1438 dma_resv_kqfilter(const struct dma_resv *robj,
1439 struct knote *kn, struct dma_resv_poll *rpoll)
1440 {
1441
1442 return EINVAL;
1443 }
1444