radeon_fence.c revision 1.16 1 /* $NetBSD: radeon_fence.c,v 1.16 2020/02/14 04:35:20 riastradh Exp $ */
2
3 /*
4 * Copyright 2009 Jerome Glisse.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 */
28 /*
29 * Authors:
30 * Jerome Glisse <glisse (at) freedesktop.org>
31 * Dave Airlie
32 */
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: radeon_fence.c,v 1.16 2020/02/14 04:35:20 riastradh Exp $");
35
36 #include <linux/seq_file.h>
37 #include <linux/atomic.h>
38 #include <linux/wait.h>
39 #include <linux/kref.h>
40 #include <linux/slab.h>
41 #include <linux/firmware.h>
42 #include <drm/drmP.h>
43 #include "radeon_reg.h"
44 #include "radeon.h"
45 #include "radeon_trace.h"
46
47 #include <linux/nbsd-namespace.h>
48
49 /*
50 * Fences
51 * Fences mark an event in the GPUs pipeline and are used
52 * for GPU/CPU synchronization. When the fence is written,
53 * it is expected that all buffers associated with that fence
54 * are no longer in use by the associated ring on the GPU and
55 * that the the relevant GPU caches have been flushed. Whether
56 * we use a scratch register or memory location depends on the asic
57 * and whether writeback is enabled.
58 */
59
60 /**
61 * radeon_fence_write - write a fence value
62 *
63 * @rdev: radeon_device pointer
64 * @seq: sequence number to write
65 * @ring: ring index the fence is associated with
66 *
67 * Writes a fence value to memory or a scratch register (all asics).
68 */
69 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
70 {
71 struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
72 if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
73 if (drv->cpu_addr) {
74 *drv->cpu_addr = cpu_to_le32(seq);
75 }
76 } else {
77 WREG32(drv->scratch_reg, seq);
78 }
79 }
80
81 /**
82 * radeon_fence_read - read a fence value
83 *
84 * @rdev: radeon_device pointer
85 * @ring: ring index the fence is associated with
86 *
87 * Reads a fence value from memory or a scratch register (all asics).
88 * Returns the value of the fence read from memory or register.
89 */
90 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
91 {
92 struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
93 u32 seq = 0;
94
95 if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
96 if (drv->cpu_addr) {
97 seq = le32_to_cpu(*drv->cpu_addr);
98 } else {
99 seq = lower_32_bits(atomic64_read(&drv->last_seq));
100 }
101 } else {
102 seq = RREG32(drv->scratch_reg);
103 }
104 return seq;
105 }
106
107 /**
108 * radeon_fence_schedule_check - schedule lockup check
109 *
110 * @rdev: radeon_device pointer
111 * @ring: ring index we should work with
112 *
113 * Queues a delayed work item to check for lockups.
114 */
115 static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
116 {
117 /*
118 * Do not reset the timer here with mod_delayed_work,
119 * this can livelock in an interaction with TTM delayed destroy.
120 */
121 queue_delayed_work(system_power_efficient_wq,
122 &rdev->fence_drv[ring].lockup_work,
123 RADEON_FENCE_JIFFIES_TIMEOUT);
124 }
125
126 /**
127 * radeon_fence_emit - emit a fence on the requested ring
128 *
129 * @rdev: radeon_device pointer
130 * @fence: radeon fence object
131 * @ring: ring index the fence is associated with
132 *
133 * Emits a fence command on the requested ring (all asics).
134 * Returns 0 on success, -ENOMEM on failure.
135 */
136 int radeon_fence_emit(struct radeon_device *rdev,
137 struct radeon_fence **fence,
138 int ring)
139 {
140 u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
141
142 /* we are protected by the ring emission mutex */
143 *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
144 if ((*fence) == NULL) {
145 return -ENOMEM;
146 }
147 (*fence)->rdev = rdev;
148 (*fence)->seq = seq;
149 (*fence)->ring = ring;
150 (*fence)->is_vm_update = false;
151 fence_init(&(*fence)->base, &radeon_fence_ops,
152 &rdev->fence_lock, rdev->fence_context + ring, seq);
153 radeon_fence_ring_emit(rdev, ring, *fence);
154 trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
155 radeon_fence_schedule_check(rdev, ring);
156 return 0;
157 }
158
159 /**
160 * radeon_fence_check_signaled - callback from fence_queue
161 *
162 * this function is called with fence_queue lock held, which is also used
163 * for the fence locking itself, so unlocked variants are used for
164 * fence_signal, and remove_wait_queue.
165 */
166 #ifdef __NetBSD__
167 static int radeon_fence_check_signaled(struct radeon_fence *fence)
168 #else
169 static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
170 #endif
171 {
172 #ifndef __NetBSD__
173 struct radeon_fence *fence;
174 #endif
175 u64 seq;
176
177 #ifndef __NetBSD__
178 fence = container_of(wait, struct radeon_fence, fence_wake);
179 #endif
180 BUG_ON(!spin_is_locked(&fence->rdev->fence_lock));
181
182 /*
183 * We cannot use radeon_fence_process here because we're already
184 * in the waitqueue, in a call from wake_up_all.
185 */
186 seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
187 if (seq >= fence->seq) {
188 int ret = fence_signal_locked(&fence->base);
189
190 if (!ret)
191 FENCE_TRACE(&fence->base, "signaled from irq context\n");
192 else
193 FENCE_TRACE(&fence->base, "was already signaled\n");
194
195 radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
196 #ifdef __NetBSD__
197 TAILQ_REMOVE(&fence->rdev->fence_check, fence, fence_check);
198 #else
199 __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
200 #endif
201 fence_put(&fence->base);
202 } else
203 FENCE_TRACE(&fence->base, "pending\n");
204 return 0;
205 }
206
207 #ifdef __NetBSD__
208 void
209 radeon_fence_wakeup_locked(struct radeon_device *rdev)
210 {
211 struct radeon_fence *fence, *next;
212
213 BUG_ON(!spin_is_locked(&rdev->fence_lock));
214 DRM_SPIN_WAKEUP_ALL(&rdev->fence_queue, &rdev->fence_lock);
215 TAILQ_FOREACH_SAFE(fence, &rdev->fence_check, fence_check, next) {
216 radeon_fence_check_signaled(fence);
217 }
218 }
219 #endif
220
221 /**
222 * radeon_fence_activity - check for fence activity
223 *
224 * @rdev: radeon_device pointer
225 * @ring: ring index the fence is associated with
226 *
227 * Checks the current fence value and calculates the last
228 * signalled fence value. Returns true if activity occured
229 * on the ring, and the fence_queue should be waken up.
230 */
231 static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
232 {
233 uint64_t seq, last_seq, last_emitted;
234 unsigned count_loop = 0;
235 bool wake = false;
236
237 BUG_ON(!spin_is_locked(&rdev->fence_lock));
238
239 /* Note there is a scenario here for an infinite loop but it's
240 * very unlikely to happen. For it to happen, the current polling
241 * process need to be interrupted by another process and another
242 * process needs to update the last_seq btw the atomic read and
243 * xchg of the current process.
244 *
245 * More over for this to go in infinite loop there need to be
246 * continuously new fence signaled ie radeon_fence_read needs
247 * to return a different value each time for both the currently
248 * polling process and the other process that xchg the last_seq
249 * btw atomic read and xchg of the current process. And the
250 * value the other process set as last seq must be higher than
251 * the seq value we just read. Which means that current process
252 * need to be interrupted after radeon_fence_read and before
253 * atomic xchg.
254 *
255 * To be even more safe we count the number of time we loop and
256 * we bail after 10 loop just accepting the fact that we might
257 * have temporarly set the last_seq not to the true real last
258 * seq but to an older one.
259 */
260 last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
261 do {
262 last_emitted = rdev->fence_drv[ring].sync_seq[ring];
263 seq = radeon_fence_read(rdev, ring);
264 seq |= last_seq & 0xffffffff00000000LL;
265 if (seq < last_seq) {
266 seq &= 0xffffffff;
267 seq |= last_emitted & 0xffffffff00000000LL;
268 }
269
270 if (seq <= last_seq || seq > last_emitted) {
271 break;
272 }
273 /* If we loop over we don't want to return without
274 * checking if a fence is signaled as it means that the
275 * seq we just read is different from the previous on.
276 */
277 wake = true;
278 last_seq = seq;
279 if ((count_loop++) > 10) {
280 /* We looped over too many time leave with the
281 * fact that we might have set an older fence
282 * seq then the current real last seq as signaled
283 * by the hw.
284 */
285 break;
286 }
287 } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
288
289 if (seq < last_emitted)
290 radeon_fence_schedule_check(rdev, ring);
291
292 return wake;
293 }
294
295 /**
296 * radeon_fence_check_lockup - check for hardware lockup
297 *
298 * @work: delayed work item
299 *
300 * Checks for fence activity and if there is none probe
301 * the hardware if a lockup occured.
302 */
303 static void radeon_fence_check_lockup(struct work_struct *work)
304 {
305 struct radeon_fence_driver *fence_drv;
306 struct radeon_device *rdev;
307 int ring;
308
309 fence_drv = container_of(work, struct radeon_fence_driver,
310 lockup_work.work);
311 rdev = fence_drv->rdev;
312 ring = fence_drv - &rdev->fence_drv[0];
313
314 #ifdef __NetBSD__
315 spin_lock(&rdev->fence_lock);
316 #endif
317
318 if (!down_read_trylock(&rdev->exclusive_lock)) {
319 /* just reschedule the check if a reset is going on */
320 radeon_fence_schedule_check(rdev, ring);
321 return;
322 }
323
324 if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
325 unsigned long irqflags;
326
327 fence_drv->delayed_irq = false;
328 spin_lock_irqsave(&rdev->irq.lock, irqflags);
329 radeon_irq_set(rdev);
330 spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
331 }
332
333 if (radeon_fence_activity(rdev, ring))
334 #ifdef __NetBSD__
335 radeon_fence_wakeup_locked(rdev);
336 #else
337 wake_up_all(&rdev->fence_queue);
338 #endif
339
340 else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
341
342 /* good news we believe it's a lockup */
343 dev_warn(rdev->dev, "GPU lockup (current fence id "
344 "0x%016"PRIx64" last fence id 0x%016"PRIx64" on ring %d)\n",
345 (uint64_t)atomic64_read(&fence_drv->last_seq),
346 fence_drv->sync_seq[ring], ring);
347
348 /* remember that we need an reset */
349 rdev->needs_reset = true;
350 #ifdef __NetBSD__
351 radeon_fence_wakeup_locked(rdev);
352 #else
353 wake_up_all(&rdev->fence_queue);
354 #endif
355 }
356 up_read(&rdev->exclusive_lock);
357 #ifdef __NetBSD__
358 spin_unlock(&rdev->fence_lock);
359 #endif
360 }
361
362 /**
363 * radeon_fence_process - process a fence
364 *
365 * @rdev: radeon_device pointer
366 * @ring: ring index the fence is associated with
367 *
368 * Checks the current fence value and wakes the fence queue
369 * if the sequence number has increased (all asics).
370 */
371 static void radeon_fence_process_locked(struct radeon_device *rdev, int ring)
372 {
373 if (radeon_fence_activity(rdev, ring))
374 #ifdef __NetBSD__
375 radeon_fence_wakeup_locked(rdev);
376 #else
377 wake_up_all(&rdev->fence_queue);
378 #endif
379 }
380
381 void radeon_fence_process(struct radeon_device *rdev, int ring)
382 {
383
384 spin_lock(&rdev->fence_lock);
385 radeon_fence_process_locked(rdev, ring);
386 spin_unlock(&rdev->fence_lock);
387 }
388
389 /**
390 * radeon_fence_seq_signaled - check if a fence sequence number has signaled
391 *
392 * @rdev: radeon device pointer
393 * @seq: sequence number
394 * @ring: ring index the fence is associated with
395 *
396 * Check if the last signaled fence sequnce number is >= the requested
397 * sequence number (all asics).
398 * Returns true if the fence has signaled (current fence value
399 * is >= requested value) or false if it has not (current fence
400 * value is < the requested value. Helper function for
401 * radeon_fence_signaled().
402 */
403 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
404 u64 seq, unsigned ring)
405 {
406 BUG_ON(!spin_is_locked(&rdev->fence_lock));
407 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
408 return true;
409 }
410 /* poll new last sequence at least once */
411 radeon_fence_process_locked(rdev, ring);
412 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
413 return true;
414 }
415 return false;
416 }
417
418 static bool radeon_fence_is_signaled(struct fence *f)
419 {
420 struct radeon_fence *fence = to_radeon_fence(f);
421 struct radeon_device *rdev = fence->rdev;
422 unsigned ring = fence->ring;
423 u64 seq = fence->seq;
424
425 BUG_ON(!spin_is_locked(&rdev->fence_lock));
426
427 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
428 return true;
429 }
430
431 if (down_read_trylock(&rdev->exclusive_lock)) {
432 radeon_fence_process_locked(rdev, ring);
433 up_read(&rdev->exclusive_lock);
434
435 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
436 return true;
437 }
438 }
439 return false;
440 }
441
442 /**
443 * radeon_fence_enable_signaling - enable signalling on fence
444 * @fence: fence
445 *
446 * This function is called with fence_queue lock held, and adds a callback
447 * to fence_queue that checks if this fence is signaled, and if so it
448 * signals the fence and removes itself.
449 */
450 static bool radeon_fence_enable_signaling(struct fence *f)
451 {
452 struct radeon_fence *fence = to_radeon_fence(f);
453 struct radeon_device *rdev = fence->rdev;
454
455 BUG_ON(!spin_is_locked(&rdev->fence_lock));
456
457 if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
458 return false;
459
460 if (down_read_trylock(&rdev->exclusive_lock)) {
461 radeon_irq_kms_sw_irq_get(rdev, fence->ring);
462
463 if (radeon_fence_activity(rdev, fence->ring))
464 #ifdef __NetBSD__
465 radeon_fence_wakeup_locked(rdev);
466 #else
467 wake_up_all_locked(&rdev->fence_queue);
468 #endif
469
470 /* did fence get signaled after we enabled the sw irq? */
471 if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
472 radeon_irq_kms_sw_irq_put(rdev, fence->ring);
473 up_read(&rdev->exclusive_lock);
474 return false;
475 }
476
477 up_read(&rdev->exclusive_lock);
478 } else {
479 /* we're probably in a lockup, lets not fiddle too much */
480 if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
481 rdev->fence_drv[fence->ring].delayed_irq = true;
482 radeon_fence_schedule_check(rdev, fence->ring);
483 }
484
485 #ifdef __NetBSD__
486 TAILQ_INSERT_TAIL(&rdev->fence_check, fence, fence_check);
487 #else
488 fence->fence_wake.flags = 0;
489 fence->fence_wake.private = NULL;
490 fence->fence_wake.func = radeon_fence_check_signaled;
491 __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
492 #endif
493 fence_get(f);
494
495 FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
496 return true;
497 }
498
499 /**
500 * radeon_fence_signaled - check if a fence has signaled
501 *
502 * @fence: radeon fence object
503 *
504 * Check if the requested fence has signaled (all asics).
505 * Returns true if the fence has signaled or false if it has not.
506 */
507 bool radeon_fence_signaled(struct radeon_fence *fence)
508 {
509 if (!fence)
510 return true;
511
512 spin_lock(&fence->rdev->fence_lock);
513 if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
514 int ret;
515
516 ret = fence_signal_locked(&fence->base);
517 if (!ret)
518 FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
519 spin_unlock(&fence->rdev->fence_lock);
520 return true;
521 }
522 spin_unlock(&fence->rdev->fence_lock);
523 return false;
524 }
525
526 /**
527 * radeon_fence_any_seq_signaled - check if any sequence number is signaled
528 *
529 * @rdev: radeon device pointer
530 * @seq: sequence numbers
531 *
532 * Check if the last signaled fence sequnce number is >= the requested
533 * sequence number (all asics).
534 * Returns true if any has signaled (current value is >= requested value)
535 * or false if it has not. Helper function for radeon_fence_wait_seq.
536 */
537 static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
538 {
539 unsigned i;
540
541 BUG_ON(!spin_is_locked(&rdev->fence_lock));
542
543 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
544 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
545 return true;
546 }
547 return false;
548 }
549
550 /**
551 * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
552 *
553 * @rdev: radeon device pointer
554 * @target_seq: sequence number(s) we want to wait for
555 * @intr: use interruptable sleep
556 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
557 *
558 * Wait for the requested sequence number(s) to be written by any ring
559 * (all asics). Sequnce number array is indexed by ring id.
560 * @intr selects whether to use interruptable (true) or non-interruptable
561 * (false) sleep when waiting for the sequence number. Helper function
562 * for radeon_fence_wait_*().
563 * Returns remaining time if the sequence number has passed, 0 when
564 * the wait timeout, or an error for all other cases.
565 * -EDEADLK is returned when a GPU lockup has been detected.
566 */
567 static long radeon_fence_wait_seq_timeout_locked(struct radeon_device *rdev,
568 u64 *target_seq, bool intr,
569 long timeout)
570 {
571 long r;
572 int i;
573
574 if (radeon_fence_any_seq_signaled(rdev, target_seq))
575 return timeout;
576
577 /* enable IRQs and tracing */
578 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
579 if (!target_seq[i])
580 continue;
581
582 trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
583 radeon_irq_kms_sw_irq_get(rdev, i);
584 }
585
586 #ifdef __NetBSD__
587 if (intr)
588 DRM_SPIN_TIMED_WAIT_UNTIL(r, &rdev->fence_queue,
589 &rdev->fence_lock, timeout,
590 (radeon_fence_any_seq_signaled(rdev, target_seq)
591 || rdev->needs_reset));
592 else
593 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(r, &rdev->fence_queue,
594 &rdev->fence_lock, timeout,
595 (radeon_fence_any_seq_signaled(rdev, target_seq)
596 || rdev->needs_reset));
597 #else
598 if (intr) {
599 r = wait_event_interruptible_timeout(rdev->fence_queue, (
600 radeon_fence_any_seq_signaled(rdev, target_seq)
601 || rdev->needs_reset), timeout);
602 } else {
603 r = wait_event_timeout(rdev->fence_queue, (
604 radeon_fence_any_seq_signaled(rdev, target_seq)
605 || rdev->needs_reset), timeout);
606 }
607 #endif
608
609 if (rdev->needs_reset)
610 r = -EDEADLK;
611
612 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
613 if (!target_seq[i])
614 continue;
615
616 radeon_irq_kms_sw_irq_put(rdev, i);
617 trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
618 }
619
620 return r;
621 }
622
623 static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
624 u64 *target_seq, bool intr, long timo)
625 {
626 long r;
627
628 spin_lock(&rdev->fence_lock);
629 r = radeon_fence_wait_seq_timeout_locked(rdev, target_seq, intr, timo);
630 spin_unlock(&rdev->fence_lock);
631
632 return r;
633 }
634
635 /**
636 * radeon_fence_wait - wait for a fence to signal
637 *
638 * @fence: radeon fence object
639 * @intr: use interruptible sleep
640 *
641 * Wait for the requested fence to signal (all asics).
642 * @intr selects whether to use interruptable (true) or non-interruptable
643 * (false) sleep when waiting for the fence.
644 * Returns 0 if the fence has passed, error for all other cases.
645 */
646 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
647 {
648 uint64_t seq[RADEON_NUM_RINGS] = {};
649 long r;
650
651 /*
652 * This function should not be called on !radeon fences.
653 * If this is the case, it would mean this function can
654 * also be called on radeon fences belonging to another card.
655 * exclusive_lock is not held in that case.
656 */
657 if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
658 return fence_wait(&fence->base, intr);
659
660 seq[fence->ring] = fence->seq;
661 r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
662 if (r < 0) {
663 return r;
664 }
665
666 r = fence_signal(&fence->base);
667 if (!r)
668 FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
669 return 0;
670 }
671
672 /**
673 * radeon_fence_wait_any - wait for a fence to signal on any ring
674 *
675 * @rdev: radeon device pointer
676 * @fences: radeon fence object(s)
677 * @intr: use interruptable sleep
678 *
679 * Wait for any requested fence to signal (all asics). Fence
680 * array is indexed by ring id. @intr selects whether to use
681 * interruptable (true) or non-interruptable (false) sleep when
682 * waiting for the fences. Used by the suballocator.
683 * Returns 0 if any fence has passed, error for all other cases.
684 */
685 int radeon_fence_wait_any(struct radeon_device *rdev,
686 struct radeon_fence **fences,
687 bool intr)
688 {
689 uint64_t seq[RADEON_NUM_RINGS];
690 unsigned i, num_rings = 0;
691 long r;
692
693 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
694 seq[i] = 0;
695
696 if (!fences[i]) {
697 continue;
698 }
699
700 seq[i] = fences[i]->seq;
701 ++num_rings;
702 }
703
704 /* nothing to wait for ? */
705 if (num_rings == 0)
706 return -ENOENT;
707
708 r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
709 if (r < 0) {
710 return r;
711 }
712 return 0;
713 }
714
715 /**
716 * radeon_fence_wait_next - wait for the next fence to signal
717 *
718 * @rdev: radeon device pointer
719 * @ring: ring index the fence is associated with
720 *
721 * Wait for the next fence on the requested ring to signal (all asics).
722 * Returns 0 if the next fence has passed, error for all other cases.
723 * Caller must hold ring lock.
724 */
725 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
726 {
727 uint64_t seq[RADEON_NUM_RINGS] = {};
728 long r;
729
730 seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
731 if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
732 /* nothing to wait for, last_seq is
733 already the last emited fence */
734 return -ENOENT;
735 }
736 r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
737 if (r < 0)
738 return r;
739 return 0;
740 }
741
742 /**
743 * radeon_fence_wait_empty - wait for all fences to signal
744 *
745 * @rdev: radeon device pointer
746 * @ring: ring index the fence is associated with
747 *
748 * Wait for all fences on the requested ring to signal (all asics).
749 * Returns 0 if the fences have passed, error for all other cases.
750 * Caller must hold ring lock.
751 */
752 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
753 {
754 uint64_t seq[RADEON_NUM_RINGS] = {};
755 long r;
756
757 seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
758 if (!seq[ring])
759 return 0;
760
761 r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
762 if (r < 0) {
763 if (r == -EDEADLK)
764 return -EDEADLK;
765
766 dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
767 ring, r);
768 }
769 return 0;
770 }
771
772 /**
773 * radeon_fence_ref - take a ref on a fence
774 *
775 * @fence: radeon fence object
776 *
777 * Take a reference on a fence (all asics).
778 * Returns the fence.
779 */
780 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
781 {
782 fence_get(&fence->base);
783 return fence;
784 }
785
786 /**
787 * radeon_fence_unref - remove a ref on a fence
788 *
789 * @fence: radeon fence object
790 *
791 * Remove a reference on a fence (all asics).
792 */
793 void radeon_fence_unref(struct radeon_fence **fence)
794 {
795 struct radeon_fence *tmp = *fence;
796
797 *fence = NULL;
798 if (tmp) {
799 fence_put(&tmp->base);
800 }
801 }
802
803 /**
804 * radeon_fence_count_emitted - get the count of emitted fences
805 *
806 * @rdev: radeon device pointer
807 * @ring: ring index the fence is associated with
808 *
809 * Get the number of fences emitted on the requested ring (all asics).
810 * Returns the number of emitted fences on the ring. Used by the
811 * dynpm code to ring track activity.
812 */
813 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
814 {
815 uint64_t emitted;
816
817 /* We are not protected by ring lock when reading the last sequence
818 * but it's ok to report slightly wrong fence count here.
819 */
820 radeon_fence_process(rdev, ring);
821 emitted = rdev->fence_drv[ring].sync_seq[ring]
822 - atomic64_read(&rdev->fence_drv[ring].last_seq);
823 /* to avoid 32bits warp around */
824 if (emitted > 0x10000000) {
825 emitted = 0x10000000;
826 }
827 return (unsigned)emitted;
828 }
829
830 /**
831 * radeon_fence_need_sync - do we need a semaphore
832 *
833 * @fence: radeon fence object
834 * @dst_ring: which ring to check against
835 *
836 * Check if the fence needs to be synced against another ring
837 * (all asics). If so, we need to emit a semaphore.
838 * Returns true if we need to sync with another ring, false if
839 * not.
840 */
841 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
842 {
843 struct radeon_fence_driver *fdrv;
844
845 if (!fence) {
846 return false;
847 }
848
849 if (fence->ring == dst_ring) {
850 return false;
851 }
852
853 /* we are protected by the ring mutex */
854 fdrv = &fence->rdev->fence_drv[dst_ring];
855 if (fence->seq <= fdrv->sync_seq[fence->ring]) {
856 return false;
857 }
858
859 return true;
860 }
861
862 /**
863 * radeon_fence_note_sync - record the sync point
864 *
865 * @fence: radeon fence object
866 * @dst_ring: which ring to check against
867 *
868 * Note the sequence number at which point the fence will
869 * be synced with the requested ring (all asics).
870 */
871 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
872 {
873 struct radeon_fence_driver *dst, *src;
874 unsigned i;
875
876 if (!fence) {
877 return;
878 }
879
880 if (fence->ring == dst_ring) {
881 return;
882 }
883
884 /* we are protected by the ring mutex */
885 src = &fence->rdev->fence_drv[fence->ring];
886 dst = &fence->rdev->fence_drv[dst_ring];
887 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
888 if (i == dst_ring) {
889 continue;
890 }
891 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
892 }
893 }
894
895 /**
896 * radeon_fence_driver_start_ring - make the fence driver
897 * ready for use on the requested ring.
898 *
899 * @rdev: radeon device pointer
900 * @ring: ring index to start the fence driver on
901 *
902 * Make the fence driver ready for processing (all asics).
903 * Not all asics have all rings, so each asic will only
904 * start the fence driver on the rings it has.
905 * Returns 0 for success, errors for failure.
906 */
907 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
908 {
909 uint64_t index;
910 int r;
911
912 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
913 if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
914 rdev->fence_drv[ring].scratch_reg = 0;
915 if (ring != R600_RING_TYPE_UVD_INDEX) {
916 index = R600_WB_EVENT_OFFSET + ring * 4;
917 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
918 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
919 index;
920
921 } else {
922 /* put fence directly behind firmware */
923 index = ALIGN(rdev->uvd_fw->size, 8);
924 rdev->fence_drv[ring].cpu_addr = (uint32_t *)((uint8_t *)rdev->uvd.cpu_addr + index);
925 rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
926 }
927
928 } else {
929 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
930 if (r) {
931 dev_err(rdev->dev, "fence failed to get scratch register\n");
932 return r;
933 }
934 index = RADEON_WB_SCRATCH_OFFSET +
935 rdev->fence_drv[ring].scratch_reg -
936 rdev->scratch.reg_base;
937 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
938 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
939 }
940 radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
941 rdev->fence_drv[ring].initialized = true;
942 dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016"PRIx64" and cpu addr 0x%p\n",
943 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
944 return 0;
945 }
946
947 /**
948 * radeon_fence_driver_init_ring - init the fence driver
949 * for the requested ring.
950 *
951 * @rdev: radeon device pointer
952 * @ring: ring index to start the fence driver on
953 *
954 * Init the fence driver for the requested ring (all asics).
955 * Helper function for radeon_fence_driver_init().
956 */
957 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
958 {
959 int i;
960
961 rdev->fence_drv[ring].scratch_reg = -1;
962 rdev->fence_drv[ring].cpu_addr = NULL;
963 rdev->fence_drv[ring].gpu_addr = 0;
964 for (i = 0; i < RADEON_NUM_RINGS; ++i)
965 rdev->fence_drv[ring].sync_seq[i] = 0;
966 atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
967 rdev->fence_drv[ring].initialized = false;
968 INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
969 radeon_fence_check_lockup);
970 rdev->fence_drv[ring].rdev = rdev;
971 }
972
973 /**
974 * radeon_fence_driver_init - init the fence driver
975 * for all possible rings.
976 *
977 * @rdev: radeon device pointer
978 *
979 * Init the fence driver for all possible rings (all asics).
980 * Not all asics have all rings, so each asic will only
981 * start the fence driver on the rings it has using
982 * radeon_fence_driver_start_ring().
983 * Returns 0 for success.
984 */
985 int radeon_fence_driver_init(struct radeon_device *rdev)
986 {
987 int ring;
988
989 #ifdef __NetBSD__
990 spin_lock_init(&rdev->fence_lock);
991 DRM_INIT_WAITQUEUE(&rdev->fence_queue, "radfence");
992 TAILQ_INIT(&rdev->fence_check);
993 #else
994 init_waitqueue_head(&rdev->fence_queue);
995 #endif
996 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
997 radeon_fence_driver_init_ring(rdev, ring);
998 }
999 if (radeon_debugfs_fence_init(rdev)) {
1000 dev_err(rdev->dev, "fence debugfs file creation failed\n");
1001 }
1002 return 0;
1003 }
1004
1005 /**
1006 * radeon_fence_driver_fini - tear down the fence driver
1007 * for all possible rings.
1008 *
1009 * @rdev: radeon device pointer
1010 *
1011 * Tear down the fence driver for all possible rings (all asics).
1012 */
1013 void radeon_fence_driver_fini(struct radeon_device *rdev)
1014 {
1015 int ring, r;
1016
1017 mutex_lock(&rdev->ring_lock);
1018 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
1019 if (!rdev->fence_drv[ring].initialized)
1020 continue;
1021 r = radeon_fence_wait_empty(rdev, ring);
1022 if (r) {
1023 /* no need to trigger GPU reset as we are unloading */
1024 radeon_fence_driver_force_completion(rdev, ring);
1025 }
1026 cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
1027 #ifdef __NetBSD__
1028 spin_lock(&rdev->fence_lock);
1029 radeon_fence_wakeup_locked(rdev);
1030 spin_unlock(&rdev->fence_lock);
1031 #else
1032 wake_up_all(&rdev->fence_queue);
1033 #endif
1034 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
1035 rdev->fence_drv[ring].initialized = false;
1036 }
1037 mutex_unlock(&rdev->ring_lock);
1038
1039 #ifdef __NetBSD__
1040 BUG_ON(!TAILQ_EMPTY(&rdev->fence_check));
1041 DRM_DESTROY_WAITQUEUE(&rdev->fence_queue);
1042 spin_lock_destroy(&rdev->fence_lock);
1043 #endif
1044 }
1045
1046 /**
1047 * radeon_fence_driver_force_completion - force all fence waiter to complete
1048 *
1049 * @rdev: radeon device pointer
1050 * @ring: the ring to complete
1051 *
1052 * In case of GPU reset failure make sure no process keep waiting on fence
1053 * that will never complete.
1054 */
1055 void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
1056 {
1057 if (rdev->fence_drv[ring].initialized) {
1058 radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
1059 cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
1060 }
1061 }
1062
1063
1064 /*
1065 * Fence debugfs
1066 */
1067 #if defined(CONFIG_DEBUG_FS)
1068 static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
1069 {
1070 struct drm_info_node *node = (struct drm_info_node *)m->private;
1071 struct drm_device *dev = node->minor->dev;
1072 struct radeon_device *rdev = dev->dev_private;
1073 int i, j;
1074
1075 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
1076 if (!rdev->fence_drv[i].initialized)
1077 continue;
1078
1079 radeon_fence_process(rdev, i);
1080
1081 seq_printf(m, "--- ring %d ---\n", i);
1082 seq_printf(m, "Last signaled fence 0x%016llx\n",
1083 (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
1084 seq_printf(m, "Last emitted 0x%016"PRIx64"\n",
1085 rdev->fence_drv[i].sync_seq[i]);
1086
1087 for (j = 0; j < RADEON_NUM_RINGS; ++j) {
1088 if (i != j && rdev->fence_drv[j].initialized)
1089 seq_printf(m, "Last sync to ring %d 0x%016"PRIx64"\n",
1090 j, rdev->fence_drv[i].sync_seq[j]);
1091 }
1092 }
1093 return 0;
1094 }
1095
1096 /**
1097 * radeon_debugfs_gpu_reset - manually trigger a gpu reset
1098 *
1099 * Manually trigger a gpu reset at the next fence wait.
1100 */
1101 static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
1102 {
1103 struct drm_info_node *node = (struct drm_info_node *) m->private;
1104 struct drm_device *dev = node->minor->dev;
1105 struct radeon_device *rdev = dev->dev_private;
1106
1107 down_read(&rdev->exclusive_lock);
1108 seq_printf(m, "%d\n", rdev->needs_reset);
1109 rdev->needs_reset = true;
1110 wake_up_all(&rdev->fence_queue);
1111 up_read(&rdev->exclusive_lock);
1112
1113 return 0;
1114 }
1115
1116 static struct drm_info_list radeon_debugfs_fence_list[] = {
1117 {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
1118 {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
1119 };
1120 #endif
1121
1122 int radeon_debugfs_fence_init(struct radeon_device *rdev)
1123 {
1124 #if defined(CONFIG_DEBUG_FS)
1125 return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
1126 #else
1127 return 0;
1128 #endif
1129 }
1130
1131 static const char *radeon_fence_get_driver_name(struct fence *fence)
1132 {
1133 return "radeon";
1134 }
1135
1136 static const char *radeon_fence_get_timeline_name(struct fence *f)
1137 {
1138 struct radeon_fence *fence = to_radeon_fence(f);
1139 switch (fence->ring) {
1140 case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
1141 case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
1142 case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
1143 case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
1144 case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
1145 case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
1146 case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
1147 case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
1148 default: WARN_ON_ONCE(1); return "radeon.unk";
1149 }
1150 }
1151
1152 static inline bool radeon_test_signaled(struct radeon_fence *fence)
1153 {
1154 return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1155 }
1156
1157 #ifdef __NetBSD__
1158
1159 static void
1160 radeon_fence_wakeup_cb(struct fence *fence, struct fence_cb *cb)
1161 {
1162 struct radeon_fence *rfence = to_radeon_fence(fence);
1163 struct radeon_device *rdev = rfence->rdev;
1164
1165 BUG_ON(!spin_is_locked(&rdev->fence_lock));
1166 cv_broadcast(&rdev->fence_queue);
1167 }
1168
1169 static signed long
1170 radeon_fence_default_wait(struct fence *f, bool intr, signed long timo)
1171 {
1172 struct fence_cb fcb;
1173 struct radeon_fence *fence = to_radeon_fence(f);
1174 struct radeon_device *rdev = fence->rdev;
1175 int r;
1176
1177 r = fence_add_callback(f, &fcb, radeon_fence_wakeup_cb);
1178 if (r) /* fence is done already */
1179 return timo;
1180
1181 spin_lock(&rdev->fence_lock);
1182 if (intr) {
1183 DRM_SPIN_TIMED_WAIT_UNTIL(r, &rdev->fence_queue,
1184 &rdev->fence_lock, timo,
1185 radeon_test_signaled(fence));
1186 } else {
1187 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(r, &rdev->fence_queue,
1188 &rdev->fence_lock, timo,
1189 radeon_test_signaled(fence));
1190 }
1191 spin_unlock(&rdev->fence_lock);
1192
1193 (void)fence_remove_callback(f, &fcb);
1194
1195 return r;
1196 }
1197
1198 #else
1199
1200 struct radeon_wait_cb {
1201 struct fence_cb base;
1202 struct task_struct *task;
1203 };
1204
1205 static void
1206 radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
1207 {
1208 struct radeon_wait_cb *wait =
1209 container_of(cb, struct radeon_wait_cb, base);
1210
1211 wake_up_process(wait->task);
1212 }
1213
1214 static signed long radeon_fence_default_wait(struct fence *f, bool intr,
1215 signed long t)
1216 {
1217 struct radeon_fence *fence = to_radeon_fence(f);
1218 struct radeon_device *rdev = fence->rdev;
1219 struct radeon_wait_cb cb;
1220
1221 cb.task = current;
1222
1223 if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
1224 return t;
1225
1226 while (t > 0) {
1227 if (intr)
1228 set_current_state(TASK_INTERRUPTIBLE);
1229 else
1230 set_current_state(TASK_UNINTERRUPTIBLE);
1231
1232 /*
1233 * radeon_test_signaled must be called after
1234 * set_current_state to prevent a race with wake_up_process
1235 */
1236 if (radeon_test_signaled(fence))
1237 break;
1238
1239 if (rdev->needs_reset) {
1240 t = -EDEADLK;
1241 break;
1242 }
1243
1244 t = schedule_timeout(t);
1245
1246 if (t > 0 && intr && signal_pending(current))
1247 t = -ERESTARTSYS;
1248 }
1249
1250 __set_current_state(TASK_RUNNING);
1251 fence_remove_callback(f, &cb.base);
1252
1253 return t;
1254 }
1255
1256 #endif
1257
1258 const struct fence_ops radeon_fence_ops = {
1259 .get_driver_name = radeon_fence_get_driver_name,
1260 .get_timeline_name = radeon_fence_get_timeline_name,
1261 .enable_signaling = radeon_fence_enable_signaling,
1262 .signaled = radeon_fence_is_signaled,
1263 .wait = radeon_fence_default_wait,
1264 .release = NULL,
1265 };
1266