radeon_fence.c revision 1.1.1.2 1 /* $NetBSD: radeon_fence.c,v 1.1.1.2 2018/08/27 01:34:58 riastradh Exp $ */
2
3 /*
4 * Copyright 2009 Jerome Glisse.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 */
28 /*
29 * Authors:
30 * Jerome Glisse <glisse (at) freedesktop.org>
31 * Dave Airlie
32 */
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: radeon_fence.c,v 1.1.1.2 2018/08/27 01:34:58 riastradh Exp $");
35
36 #include <linux/seq_file.h>
37 #include <linux/atomic.h>
38 #include <linux/wait.h>
39 #include <linux/kref.h>
40 #include <linux/slab.h>
41 #include <linux/firmware.h>
42 #include <drm/drmP.h>
43 #include "radeon_reg.h"
44 #include "radeon.h"
45 #include "radeon_trace.h"
46
47 /*
48 * Fences
49 * Fences mark an event in the GPUs pipeline and are used
50 * for GPU/CPU synchronization. When the fence is written,
51 * it is expected that all buffers associated with that fence
52 * are no longer in use by the associated ring on the GPU and
53 * that the the relevant GPU caches have been flushed. Whether
54 * we use a scratch register or memory location depends on the asic
55 * and whether writeback is enabled.
56 */
57
58 /**
59 * radeon_fence_write - write a fence value
60 *
61 * @rdev: radeon_device pointer
62 * @seq: sequence number to write
63 * @ring: ring index the fence is associated with
64 *
65 * Writes a fence value to memory or a scratch register (all asics).
66 */
67 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
68 {
69 struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
70 if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
71 if (drv->cpu_addr) {
72 *drv->cpu_addr = cpu_to_le32(seq);
73 }
74 } else {
75 WREG32(drv->scratch_reg, seq);
76 }
77 }
78
79 /**
80 * radeon_fence_read - read a fence value
81 *
82 * @rdev: radeon_device pointer
83 * @ring: ring index the fence is associated with
84 *
85 * Reads a fence value from memory or a scratch register (all asics).
86 * Returns the value of the fence read from memory or register.
87 */
88 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
89 {
90 struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
91 u32 seq = 0;
92
93 if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
94 if (drv->cpu_addr) {
95 seq = le32_to_cpu(*drv->cpu_addr);
96 } else {
97 seq = lower_32_bits(atomic64_read(&drv->last_seq));
98 }
99 } else {
100 seq = RREG32(drv->scratch_reg);
101 }
102 return seq;
103 }
104
105 /**
106 * radeon_fence_schedule_check - schedule lockup check
107 *
108 * @rdev: radeon_device pointer
109 * @ring: ring index we should work with
110 *
111 * Queues a delayed work item to check for lockups.
112 */
113 static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
114 {
115 /*
116 * Do not reset the timer here with mod_delayed_work,
117 * this can livelock in an interaction with TTM delayed destroy.
118 */
119 queue_delayed_work(system_power_efficient_wq,
120 &rdev->fence_drv[ring].lockup_work,
121 RADEON_FENCE_JIFFIES_TIMEOUT);
122 }
123
124 /**
125 * radeon_fence_emit - emit a fence on the requested ring
126 *
127 * @rdev: radeon_device pointer
128 * @fence: radeon fence object
129 * @ring: ring index the fence is associated with
130 *
131 * Emits a fence command on the requested ring (all asics).
132 * Returns 0 on success, -ENOMEM on failure.
133 */
134 int radeon_fence_emit(struct radeon_device *rdev,
135 struct radeon_fence **fence,
136 int ring)
137 {
138 u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
139
140 /* we are protected by the ring emission mutex */
141 *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
142 if ((*fence) == NULL) {
143 return -ENOMEM;
144 }
145 (*fence)->rdev = rdev;
146 (*fence)->seq = seq;
147 (*fence)->ring = ring;
148 (*fence)->is_vm_update = false;
149 fence_init(&(*fence)->base, &radeon_fence_ops,
150 &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
151 radeon_fence_ring_emit(rdev, ring, *fence);
152 trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
153 radeon_fence_schedule_check(rdev, ring);
154 return 0;
155 }
156
157 /**
158 * radeon_fence_check_signaled - callback from fence_queue
159 *
160 * this function is called with fence_queue lock held, which is also used
161 * for the fence locking itself, so unlocked variants are used for
162 * fence_signal, and remove_wait_queue.
163 */
164 static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
165 {
166 struct radeon_fence *fence;
167 u64 seq;
168
169 fence = container_of(wait, struct radeon_fence, fence_wake);
170
171 /*
172 * We cannot use radeon_fence_process here because we're already
173 * in the waitqueue, in a call from wake_up_all.
174 */
175 seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
176 if (seq >= fence->seq) {
177 int ret = fence_signal_locked(&fence->base);
178
179 if (!ret)
180 FENCE_TRACE(&fence->base, "signaled from irq context\n");
181 else
182 FENCE_TRACE(&fence->base, "was already signaled\n");
183
184 radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
185 __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
186 fence_put(&fence->base);
187 } else
188 FENCE_TRACE(&fence->base, "pending\n");
189 return 0;
190 }
191
192 /**
193 * radeon_fence_activity - check for fence activity
194 *
195 * @rdev: radeon_device pointer
196 * @ring: ring index the fence is associated with
197 *
198 * Checks the current fence value and calculates the last
199 * signalled fence value. Returns true if activity occured
200 * on the ring, and the fence_queue should be waken up.
201 */
202 static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
203 {
204 uint64_t seq, last_seq, last_emitted;
205 unsigned count_loop = 0;
206 bool wake = false;
207
208 /* Note there is a scenario here for an infinite loop but it's
209 * very unlikely to happen. For it to happen, the current polling
210 * process need to be interrupted by another process and another
211 * process needs to update the last_seq btw the atomic read and
212 * xchg of the current process.
213 *
214 * More over for this to go in infinite loop there need to be
215 * continuously new fence signaled ie radeon_fence_read needs
216 * to return a different value each time for both the currently
217 * polling process and the other process that xchg the last_seq
218 * btw atomic read and xchg of the current process. And the
219 * value the other process set as last seq must be higher than
220 * the seq value we just read. Which means that current process
221 * need to be interrupted after radeon_fence_read and before
222 * atomic xchg.
223 *
224 * To be even more safe we count the number of time we loop and
225 * we bail after 10 loop just accepting the fact that we might
226 * have temporarly set the last_seq not to the true real last
227 * seq but to an older one.
228 */
229 last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
230 do {
231 last_emitted = rdev->fence_drv[ring].sync_seq[ring];
232 seq = radeon_fence_read(rdev, ring);
233 seq |= last_seq & 0xffffffff00000000LL;
234 if (seq < last_seq) {
235 seq &= 0xffffffff;
236 seq |= last_emitted & 0xffffffff00000000LL;
237 }
238
239 if (seq <= last_seq || seq > last_emitted) {
240 break;
241 }
242 /* If we loop over we don't want to return without
243 * checking if a fence is signaled as it means that the
244 * seq we just read is different from the previous on.
245 */
246 wake = true;
247 last_seq = seq;
248 if ((count_loop++) > 10) {
249 /* We looped over too many time leave with the
250 * fact that we might have set an older fence
251 * seq then the current real last seq as signaled
252 * by the hw.
253 */
254 break;
255 }
256 } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
257
258 if (seq < last_emitted)
259 radeon_fence_schedule_check(rdev, ring);
260
261 return wake;
262 }
263
264 /**
265 * radeon_fence_check_lockup - check for hardware lockup
266 *
267 * @work: delayed work item
268 *
269 * Checks for fence activity and if there is none probe
270 * the hardware if a lockup occured.
271 */
272 static void radeon_fence_check_lockup(struct work_struct *work)
273 {
274 struct radeon_fence_driver *fence_drv;
275 struct radeon_device *rdev;
276 int ring;
277
278 fence_drv = container_of(work, struct radeon_fence_driver,
279 lockup_work.work);
280 rdev = fence_drv->rdev;
281 ring = fence_drv - &rdev->fence_drv[0];
282
283 if (!down_read_trylock(&rdev->exclusive_lock)) {
284 /* just reschedule the check if a reset is going on */
285 radeon_fence_schedule_check(rdev, ring);
286 return;
287 }
288
289 if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
290 unsigned long irqflags;
291
292 fence_drv->delayed_irq = false;
293 spin_lock_irqsave(&rdev->irq.lock, irqflags);
294 radeon_irq_set(rdev);
295 spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
296 }
297
298 if (radeon_fence_activity(rdev, ring))
299 wake_up_all(&rdev->fence_queue);
300
301 else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
302
303 /* good news we believe it's a lockup */
304 dev_warn(rdev->dev, "GPU lockup (current fence id "
305 "0x%016llx last fence id 0x%016llx on ring %d)\n",
306 (uint64_t)atomic64_read(&fence_drv->last_seq),
307 fence_drv->sync_seq[ring], ring);
308
309 /* remember that we need an reset */
310 rdev->needs_reset = true;
311 wake_up_all(&rdev->fence_queue);
312 }
313 up_read(&rdev->exclusive_lock);
314 }
315
316 /**
317 * radeon_fence_process - process a fence
318 *
319 * @rdev: radeon_device pointer
320 * @ring: ring index the fence is associated with
321 *
322 * Checks the current fence value and wakes the fence queue
323 * if the sequence number has increased (all asics).
324 */
325 void radeon_fence_process(struct radeon_device *rdev, int ring)
326 {
327 if (radeon_fence_activity(rdev, ring))
328 wake_up_all(&rdev->fence_queue);
329 }
330
331 /**
332 * radeon_fence_seq_signaled - check if a fence sequence number has signaled
333 *
334 * @rdev: radeon device pointer
335 * @seq: sequence number
336 * @ring: ring index the fence is associated with
337 *
338 * Check if the last signaled fence sequnce number is >= the requested
339 * sequence number (all asics).
340 * Returns true if the fence has signaled (current fence value
341 * is >= requested value) or false if it has not (current fence
342 * value is < the requested value. Helper function for
343 * radeon_fence_signaled().
344 */
345 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
346 u64 seq, unsigned ring)
347 {
348 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
349 return true;
350 }
351 /* poll new last sequence at least once */
352 radeon_fence_process(rdev, ring);
353 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
354 return true;
355 }
356 return false;
357 }
358
359 static bool radeon_fence_is_signaled(struct fence *f)
360 {
361 struct radeon_fence *fence = to_radeon_fence(f);
362 struct radeon_device *rdev = fence->rdev;
363 unsigned ring = fence->ring;
364 u64 seq = fence->seq;
365
366 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
367 return true;
368 }
369
370 if (down_read_trylock(&rdev->exclusive_lock)) {
371 radeon_fence_process(rdev, ring);
372 up_read(&rdev->exclusive_lock);
373
374 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
375 return true;
376 }
377 }
378 return false;
379 }
380
381 /**
382 * radeon_fence_enable_signaling - enable signalling on fence
383 * @fence: fence
384 *
385 * This function is called with fence_queue lock held, and adds a callback
386 * to fence_queue that checks if this fence is signaled, and if so it
387 * signals the fence and removes itself.
388 */
389 static bool radeon_fence_enable_signaling(struct fence *f)
390 {
391 struct radeon_fence *fence = to_radeon_fence(f);
392 struct radeon_device *rdev = fence->rdev;
393
394 if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
395 return false;
396
397 if (down_read_trylock(&rdev->exclusive_lock)) {
398 radeon_irq_kms_sw_irq_get(rdev, fence->ring);
399
400 if (radeon_fence_activity(rdev, fence->ring))
401 wake_up_all_locked(&rdev->fence_queue);
402
403 /* did fence get signaled after we enabled the sw irq? */
404 if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
405 radeon_irq_kms_sw_irq_put(rdev, fence->ring);
406 up_read(&rdev->exclusive_lock);
407 return false;
408 }
409
410 up_read(&rdev->exclusive_lock);
411 } else {
412 /* we're probably in a lockup, lets not fiddle too much */
413 if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
414 rdev->fence_drv[fence->ring].delayed_irq = true;
415 radeon_fence_schedule_check(rdev, fence->ring);
416 }
417
418 fence->fence_wake.flags = 0;
419 fence->fence_wake.private = NULL;
420 fence->fence_wake.func = radeon_fence_check_signaled;
421 __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
422 fence_get(f);
423
424 FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
425 return true;
426 }
427
428 /**
429 * radeon_fence_signaled - check if a fence has signaled
430 *
431 * @fence: radeon fence object
432 *
433 * Check if the requested fence has signaled (all asics).
434 * Returns true if the fence has signaled or false if it has not.
435 */
436 bool radeon_fence_signaled(struct radeon_fence *fence)
437 {
438 if (!fence)
439 return true;
440
441 if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
442 int ret;
443
444 ret = fence_signal(&fence->base);
445 if (!ret)
446 FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
447 return true;
448 }
449 return false;
450 }
451
452 /**
453 * radeon_fence_any_seq_signaled - check if any sequence number is signaled
454 *
455 * @rdev: radeon device pointer
456 * @seq: sequence numbers
457 *
458 * Check if the last signaled fence sequnce number is >= the requested
459 * sequence number (all asics).
460 * Returns true if any has signaled (current value is >= requested value)
461 * or false if it has not. Helper function for radeon_fence_wait_seq.
462 */
463 static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
464 {
465 unsigned i;
466
467 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
468 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
469 return true;
470 }
471 return false;
472 }
473
474 /**
475 * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
476 *
477 * @rdev: radeon device pointer
478 * @target_seq: sequence number(s) we want to wait for
479 * @intr: use interruptable sleep
480 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
481 *
482 * Wait for the requested sequence number(s) to be written by any ring
483 * (all asics). Sequnce number array is indexed by ring id.
484 * @intr selects whether to use interruptable (true) or non-interruptable
485 * (false) sleep when waiting for the sequence number. Helper function
486 * for radeon_fence_wait_*().
487 * Returns remaining time if the sequence number has passed, 0 when
488 * the wait timeout, or an error for all other cases.
489 * -EDEADLK is returned when a GPU lockup has been detected.
490 */
491 static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
492 u64 *target_seq, bool intr,
493 long timeout)
494 {
495 long r;
496 int i;
497
498 if (radeon_fence_any_seq_signaled(rdev, target_seq))
499 return timeout;
500
501 /* enable IRQs and tracing */
502 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
503 if (!target_seq[i])
504 continue;
505
506 trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
507 radeon_irq_kms_sw_irq_get(rdev, i);
508 }
509
510 if (intr) {
511 r = wait_event_interruptible_timeout(rdev->fence_queue, (
512 radeon_fence_any_seq_signaled(rdev, target_seq)
513 || rdev->needs_reset), timeout);
514 } else {
515 r = wait_event_timeout(rdev->fence_queue, (
516 radeon_fence_any_seq_signaled(rdev, target_seq)
517 || rdev->needs_reset), timeout);
518 }
519
520 if (rdev->needs_reset)
521 r = -EDEADLK;
522
523 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
524 if (!target_seq[i])
525 continue;
526
527 radeon_irq_kms_sw_irq_put(rdev, i);
528 trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
529 }
530
531 return r;
532 }
533
534 /**
535 * radeon_fence_wait - wait for a fence to signal
536 *
537 * @fence: radeon fence object
538 * @intr: use interruptible sleep
539 *
540 * Wait for the requested fence to signal (all asics).
541 * @intr selects whether to use interruptable (true) or non-interruptable
542 * (false) sleep when waiting for the fence.
543 * Returns 0 if the fence has passed, error for all other cases.
544 */
545 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
546 {
547 uint64_t seq[RADEON_NUM_RINGS] = {};
548 long r;
549
550 /*
551 * This function should not be called on !radeon fences.
552 * If this is the case, it would mean this function can
553 * also be called on radeon fences belonging to another card.
554 * exclusive_lock is not held in that case.
555 */
556 if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
557 return fence_wait(&fence->base, intr);
558
559 seq[fence->ring] = fence->seq;
560 r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
561 if (r < 0) {
562 return r;
563 }
564
565 r = fence_signal(&fence->base);
566 if (!r)
567 FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
568 return 0;
569 }
570
571 /**
572 * radeon_fence_wait_any - wait for a fence to signal on any ring
573 *
574 * @rdev: radeon device pointer
575 * @fences: radeon fence object(s)
576 * @intr: use interruptable sleep
577 *
578 * Wait for any requested fence to signal (all asics). Fence
579 * array is indexed by ring id. @intr selects whether to use
580 * interruptable (true) or non-interruptable (false) sleep when
581 * waiting for the fences. Used by the suballocator.
582 * Returns 0 if any fence has passed, error for all other cases.
583 */
584 int radeon_fence_wait_any(struct radeon_device *rdev,
585 struct radeon_fence **fences,
586 bool intr)
587 {
588 uint64_t seq[RADEON_NUM_RINGS];
589 unsigned i, num_rings = 0;
590 long r;
591
592 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
593 seq[i] = 0;
594
595 if (!fences[i]) {
596 continue;
597 }
598
599 seq[i] = fences[i]->seq;
600 ++num_rings;
601 }
602
603 /* nothing to wait for ? */
604 if (num_rings == 0)
605 return -ENOENT;
606
607 r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
608 if (r < 0) {
609 return r;
610 }
611 return 0;
612 }
613
614 /**
615 * radeon_fence_wait_next - wait for the next fence to signal
616 *
617 * @rdev: radeon device pointer
618 * @ring: ring index the fence is associated with
619 *
620 * Wait for the next fence on the requested ring to signal (all asics).
621 * Returns 0 if the next fence has passed, error for all other cases.
622 * Caller must hold ring lock.
623 */
624 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
625 {
626 uint64_t seq[RADEON_NUM_RINGS] = {};
627 long r;
628
629 seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
630 if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
631 /* nothing to wait for, last_seq is
632 already the last emited fence */
633 return -ENOENT;
634 }
635 r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
636 if (r < 0)
637 return r;
638 return 0;
639 }
640
641 /**
642 * radeon_fence_wait_empty - wait for all fences to signal
643 *
644 * @rdev: radeon device pointer
645 * @ring: ring index the fence is associated with
646 *
647 * Wait for all fences on the requested ring to signal (all asics).
648 * Returns 0 if the fences have passed, error for all other cases.
649 * Caller must hold ring lock.
650 */
651 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
652 {
653 uint64_t seq[RADEON_NUM_RINGS] = {};
654 long r;
655
656 seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
657 if (!seq[ring])
658 return 0;
659
660 r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
661 if (r < 0) {
662 if (r == -EDEADLK)
663 return -EDEADLK;
664
665 dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
666 ring, r);
667 }
668 return 0;
669 }
670
671 /**
672 * radeon_fence_ref - take a ref on a fence
673 *
674 * @fence: radeon fence object
675 *
676 * Take a reference on a fence (all asics).
677 * Returns the fence.
678 */
679 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
680 {
681 fence_get(&fence->base);
682 return fence;
683 }
684
685 /**
686 * radeon_fence_unref - remove a ref on a fence
687 *
688 * @fence: radeon fence object
689 *
690 * Remove a reference on a fence (all asics).
691 */
692 void radeon_fence_unref(struct radeon_fence **fence)
693 {
694 struct radeon_fence *tmp = *fence;
695
696 *fence = NULL;
697 if (tmp) {
698 fence_put(&tmp->base);
699 }
700 }
701
702 /**
703 * radeon_fence_count_emitted - get the count of emitted fences
704 *
705 * @rdev: radeon device pointer
706 * @ring: ring index the fence is associated with
707 *
708 * Get the number of fences emitted on the requested ring (all asics).
709 * Returns the number of emitted fences on the ring. Used by the
710 * dynpm code to ring track activity.
711 */
712 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
713 {
714 uint64_t emitted;
715
716 /* We are not protected by ring lock when reading the last sequence
717 * but it's ok to report slightly wrong fence count here.
718 */
719 radeon_fence_process(rdev, ring);
720 emitted = rdev->fence_drv[ring].sync_seq[ring]
721 - atomic64_read(&rdev->fence_drv[ring].last_seq);
722 /* to avoid 32bits warp around */
723 if (emitted > 0x10000000) {
724 emitted = 0x10000000;
725 }
726 return (unsigned)emitted;
727 }
728
729 /**
730 * radeon_fence_need_sync - do we need a semaphore
731 *
732 * @fence: radeon fence object
733 * @dst_ring: which ring to check against
734 *
735 * Check if the fence needs to be synced against another ring
736 * (all asics). If so, we need to emit a semaphore.
737 * Returns true if we need to sync with another ring, false if
738 * not.
739 */
740 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
741 {
742 struct radeon_fence_driver *fdrv;
743
744 if (!fence) {
745 return false;
746 }
747
748 if (fence->ring == dst_ring) {
749 return false;
750 }
751
752 /* we are protected by the ring mutex */
753 fdrv = &fence->rdev->fence_drv[dst_ring];
754 if (fence->seq <= fdrv->sync_seq[fence->ring]) {
755 return false;
756 }
757
758 return true;
759 }
760
761 /**
762 * radeon_fence_note_sync - record the sync point
763 *
764 * @fence: radeon fence object
765 * @dst_ring: which ring to check against
766 *
767 * Note the sequence number at which point the fence will
768 * be synced with the requested ring (all asics).
769 */
770 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
771 {
772 struct radeon_fence_driver *dst, *src;
773 unsigned i;
774
775 if (!fence) {
776 return;
777 }
778
779 if (fence->ring == dst_ring) {
780 return;
781 }
782
783 /* we are protected by the ring mutex */
784 src = &fence->rdev->fence_drv[fence->ring];
785 dst = &fence->rdev->fence_drv[dst_ring];
786 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
787 if (i == dst_ring) {
788 continue;
789 }
790 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
791 }
792 }
793
794 /**
795 * radeon_fence_driver_start_ring - make the fence driver
796 * ready for use on the requested ring.
797 *
798 * @rdev: radeon device pointer
799 * @ring: ring index to start the fence driver on
800 *
801 * Make the fence driver ready for processing (all asics).
802 * Not all asics have all rings, so each asic will only
803 * start the fence driver on the rings it has.
804 * Returns 0 for success, errors for failure.
805 */
806 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
807 {
808 uint64_t index;
809 int r;
810
811 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
812 if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
813 rdev->fence_drv[ring].scratch_reg = 0;
814 if (ring != R600_RING_TYPE_UVD_INDEX) {
815 index = R600_WB_EVENT_OFFSET + ring * 4;
816 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
817 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
818 index;
819
820 } else {
821 /* put fence directly behind firmware */
822 index = ALIGN(rdev->uvd_fw->size, 8);
823 rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
824 rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
825 }
826
827 } else {
828 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
829 if (r) {
830 dev_err(rdev->dev, "fence failed to get scratch register\n");
831 return r;
832 }
833 index = RADEON_WB_SCRATCH_OFFSET +
834 rdev->fence_drv[ring].scratch_reg -
835 rdev->scratch.reg_base;
836 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
837 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
838 }
839 radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
840 rdev->fence_drv[ring].initialized = true;
841 dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
842 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
843 return 0;
844 }
845
846 /**
847 * radeon_fence_driver_init_ring - init the fence driver
848 * for the requested ring.
849 *
850 * @rdev: radeon device pointer
851 * @ring: ring index to start the fence driver on
852 *
853 * Init the fence driver for the requested ring (all asics).
854 * Helper function for radeon_fence_driver_init().
855 */
856 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
857 {
858 int i;
859
860 rdev->fence_drv[ring].scratch_reg = -1;
861 rdev->fence_drv[ring].cpu_addr = NULL;
862 rdev->fence_drv[ring].gpu_addr = 0;
863 for (i = 0; i < RADEON_NUM_RINGS; ++i)
864 rdev->fence_drv[ring].sync_seq[i] = 0;
865 atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
866 rdev->fence_drv[ring].initialized = false;
867 INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
868 radeon_fence_check_lockup);
869 rdev->fence_drv[ring].rdev = rdev;
870 }
871
872 /**
873 * radeon_fence_driver_init - init the fence driver
874 * for all possible rings.
875 *
876 * @rdev: radeon device pointer
877 *
878 * Init the fence driver for all possible rings (all asics).
879 * Not all asics have all rings, so each asic will only
880 * start the fence driver on the rings it has using
881 * radeon_fence_driver_start_ring().
882 * Returns 0 for success.
883 */
884 int radeon_fence_driver_init(struct radeon_device *rdev)
885 {
886 int ring;
887
888 init_waitqueue_head(&rdev->fence_queue);
889 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
890 radeon_fence_driver_init_ring(rdev, ring);
891 }
892 if (radeon_debugfs_fence_init(rdev)) {
893 dev_err(rdev->dev, "fence debugfs file creation failed\n");
894 }
895 return 0;
896 }
897
898 /**
899 * radeon_fence_driver_fini - tear down the fence driver
900 * for all possible rings.
901 *
902 * @rdev: radeon device pointer
903 *
904 * Tear down the fence driver for all possible rings (all asics).
905 */
906 void radeon_fence_driver_fini(struct radeon_device *rdev)
907 {
908 int ring, r;
909
910 mutex_lock(&rdev->ring_lock);
911 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
912 if (!rdev->fence_drv[ring].initialized)
913 continue;
914 r = radeon_fence_wait_empty(rdev, ring);
915 if (r) {
916 /* no need to trigger GPU reset as we are unloading */
917 radeon_fence_driver_force_completion(rdev, ring);
918 }
919 cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
920 wake_up_all(&rdev->fence_queue);
921 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
922 rdev->fence_drv[ring].initialized = false;
923 }
924 mutex_unlock(&rdev->ring_lock);
925 }
926
927 /**
928 * radeon_fence_driver_force_completion - force all fence waiter to complete
929 *
930 * @rdev: radeon device pointer
931 * @ring: the ring to complete
932 *
933 * In case of GPU reset failure make sure no process keep waiting on fence
934 * that will never complete.
935 */
936 void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
937 {
938 if (rdev->fence_drv[ring].initialized) {
939 radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
940 cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
941 }
942 }
943
944
945 /*
946 * Fence debugfs
947 */
948 #if defined(CONFIG_DEBUG_FS)
949 static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
950 {
951 struct drm_info_node *node = (struct drm_info_node *)m->private;
952 struct drm_device *dev = node->minor->dev;
953 struct radeon_device *rdev = dev->dev_private;
954 int i, j;
955
956 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
957 if (!rdev->fence_drv[i].initialized)
958 continue;
959
960 radeon_fence_process(rdev, i);
961
962 seq_printf(m, "--- ring %d ---\n", i);
963 seq_printf(m, "Last signaled fence 0x%016llx\n",
964 (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
965 seq_printf(m, "Last emitted 0x%016llx\n",
966 rdev->fence_drv[i].sync_seq[i]);
967
968 for (j = 0; j < RADEON_NUM_RINGS; ++j) {
969 if (i != j && rdev->fence_drv[j].initialized)
970 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
971 j, rdev->fence_drv[i].sync_seq[j]);
972 }
973 }
974 return 0;
975 }
976
977 /**
978 * radeon_debugfs_gpu_reset - manually trigger a gpu reset
979 *
980 * Manually trigger a gpu reset at the next fence wait.
981 */
982 static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
983 {
984 struct drm_info_node *node = (struct drm_info_node *) m->private;
985 struct drm_device *dev = node->minor->dev;
986 struct radeon_device *rdev = dev->dev_private;
987
988 down_read(&rdev->exclusive_lock);
989 seq_printf(m, "%d\n", rdev->needs_reset);
990 rdev->needs_reset = true;
991 wake_up_all(&rdev->fence_queue);
992 up_read(&rdev->exclusive_lock);
993
994 return 0;
995 }
996
997 static struct drm_info_list radeon_debugfs_fence_list[] = {
998 {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
999 {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
1000 };
1001 #endif
1002
1003 int radeon_debugfs_fence_init(struct radeon_device *rdev)
1004 {
1005 #if defined(CONFIG_DEBUG_FS)
1006 return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
1007 #else
1008 return 0;
1009 #endif
1010 }
1011
1012 static const char *radeon_fence_get_driver_name(struct fence *fence)
1013 {
1014 return "radeon";
1015 }
1016
1017 static const char *radeon_fence_get_timeline_name(struct fence *f)
1018 {
1019 struct radeon_fence *fence = to_radeon_fence(f);
1020 switch (fence->ring) {
1021 case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
1022 case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
1023 case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
1024 case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
1025 case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
1026 case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
1027 case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
1028 case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
1029 default: WARN_ON_ONCE(1); return "radeon.unk";
1030 }
1031 }
1032
1033 static inline bool radeon_test_signaled(struct radeon_fence *fence)
1034 {
1035 return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1036 }
1037
1038 struct radeon_wait_cb {
1039 struct fence_cb base;
1040 struct task_struct *task;
1041 };
1042
1043 static void
1044 radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
1045 {
1046 struct radeon_wait_cb *wait =
1047 container_of(cb, struct radeon_wait_cb, base);
1048
1049 wake_up_process(wait->task);
1050 }
1051
1052 static signed long radeon_fence_default_wait(struct fence *f, bool intr,
1053 signed long t)
1054 {
1055 struct radeon_fence *fence = to_radeon_fence(f);
1056 struct radeon_device *rdev = fence->rdev;
1057 struct radeon_wait_cb cb;
1058
1059 cb.task = current;
1060
1061 if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
1062 return t;
1063
1064 while (t > 0) {
1065 if (intr)
1066 set_current_state(TASK_INTERRUPTIBLE);
1067 else
1068 set_current_state(TASK_UNINTERRUPTIBLE);
1069
1070 /*
1071 * radeon_test_signaled must be called after
1072 * set_current_state to prevent a race with wake_up_process
1073 */
1074 if (radeon_test_signaled(fence))
1075 break;
1076
1077 if (rdev->needs_reset) {
1078 t = -EDEADLK;
1079 break;
1080 }
1081
1082 t = schedule_timeout(t);
1083
1084 if (t > 0 && intr && signal_pending(current))
1085 t = -ERESTARTSYS;
1086 }
1087
1088 __set_current_state(TASK_RUNNING);
1089 fence_remove_callback(f, &cb.base);
1090
1091 return t;
1092 }
1093
1094 const struct fence_ops radeon_fence_ops = {
1095 .get_driver_name = radeon_fence_get_driver_name,
1096 .get_timeline_name = radeon_fence_get_timeline_name,
1097 .enable_signaling = radeon_fence_enable_signaling,
1098 .signaled = radeon_fence_is_signaled,
1099 .wait = radeon_fence_default_wait,
1100 .release = NULL,
1101 };
1102