Home | History | Annotate | Line # | Download | only in radeon
      1 /*	$NetBSD: radeon_fence.c,v 1.22 2021/12/19 11:52:38 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2009 Jerome Glisse.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * The above copyright notice and this permission notice (including the
     24  * next paragraph) shall be included in all copies or substantial portions
     25  * of the Software.
     26  *
     27  */
     28 /*
     29  * Authors:
     30  *    Jerome Glisse <glisse (at) freedesktop.org>
     31  *    Dave Airlie
     32  */
     33 
     34 #include <sys/cdefs.h>
     35 __KERNEL_RCSID(0, "$NetBSD: radeon_fence.c,v 1.22 2021/12/19 11:52:38 riastradh Exp $");
     36 
     37 #include <linux/atomic.h>
     38 #include <linux/firmware.h>
     39 #include <linux/kref.h>
     40 #include <linux/sched/signal.h>
     41 #include <linux/seq_file.h>
     42 #include <linux/slab.h>
     43 #include <linux/wait.h>
     44 
     45 #include <drm/drm_debugfs.h>
     46 #include <drm/drm_device.h>
     47 #include <drm/drm_file.h>
     48 
     49 #include "radeon.h"
     50 #include "radeon_reg.h"
     51 #include "radeon_trace.h"
     52 
     53 #include <linux/nbsd-namespace.h>
     54 
     55 /*
     56  * Fences
     57  * Fences mark an event in the GPUs pipeline and are used
     58  * for GPU/CPU synchronization.  When the fence is written,
     59  * it is expected that all buffers associated with that fence
     60  * are no longer in use by the associated ring on the GPU and
     61  * that the the relevant GPU caches have been flushed.  Whether
     62  * we use a scratch register or memory location depends on the asic
     63  * and whether writeback is enabled.
     64  */
     65 
     66 /**
     67  * radeon_fence_write - write a fence value
     68  *
     69  * @rdev: radeon_device pointer
     70  * @seq: sequence number to write
     71  * @ring: ring index the fence is associated with
     72  *
     73  * Writes a fence value to memory or a scratch register (all asics).
     74  */
     75 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
     76 {
     77 	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
     78 	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
     79 		if (drv->cpu_addr) {
     80 			*drv->cpu_addr = cpu_to_le32(seq);
     81 		}
     82 	} else {
     83 		WREG32(drv->scratch_reg, seq);
     84 	}
     85 }
     86 
     87 /**
     88  * radeon_fence_read - read a fence value
     89  *
     90  * @rdev: radeon_device pointer
     91  * @ring: ring index the fence is associated with
     92  *
     93  * Reads a fence value from memory or a scratch register (all asics).
     94  * Returns the value of the fence read from memory or register.
     95  */
     96 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
     97 {
     98 	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
     99 	u32 seq = 0;
    100 
    101 	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
    102 		if (drv->cpu_addr) {
    103 			seq = le32_to_cpu(*drv->cpu_addr);
    104 		} else {
    105 			seq = lower_32_bits(atomic64_read(&drv->last_seq));
    106 		}
    107 	} else {
    108 		seq = RREG32(drv->scratch_reg);
    109 	}
    110 	return seq;
    111 }
    112 
    113 /**
    114  * radeon_fence_schedule_check - schedule lockup check
    115  *
    116  * @rdev: radeon_device pointer
    117  * @ring: ring index we should work with
    118  *
    119  * Queues a delayed work item to check for lockups.
    120  */
    121 static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
    122 {
    123 	/*
    124 	 * Do not reset the timer here with mod_delayed_work,
    125 	 * this can livelock in an interaction with TTM delayed destroy.
    126 	 */
    127 	queue_delayed_work(system_power_efficient_wq,
    128 			   &rdev->fence_drv[ring].lockup_work,
    129 			   RADEON_FENCE_JIFFIES_TIMEOUT);
    130 }
    131 
    132 /**
    133  * radeon_fence_emit - emit a fence on the requested ring
    134  *
    135  * @rdev: radeon_device pointer
    136  * @fence: radeon fence object
    137  * @ring: ring index the fence is associated with
    138  *
    139  * Emits a fence command on the requested ring (all asics).
    140  * Returns 0 on success, -ENOMEM on failure.
    141  */
    142 int radeon_fence_emit(struct radeon_device *rdev,
    143 		      struct radeon_fence **fence,
    144 		      int ring)
    145 {
    146 	u64 seq;
    147 
    148 	/* we are protected by the ring emission mutex */
    149 	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
    150 	if ((*fence) == NULL) {
    151 		return -ENOMEM;
    152 	}
    153 	(*fence)->rdev = rdev;
    154 	(*fence)->seq = seq = ++rdev->fence_drv[ring].sync_seq[ring];
    155 	(*fence)->ring = ring;
    156 	(*fence)->is_vm_update = false;
    157 	dma_fence_init(&(*fence)->base, &radeon_fence_ops,
    158 		       &rdev->fence_lock,
    159 		       rdev->fence_context + ring,
    160 		       seq);
    161 	radeon_fence_ring_emit(rdev, ring, *fence);
    162 	trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
    163 	radeon_fence_schedule_check(rdev, ring);
    164 	return 0;
    165 }
    166 
    167 /**
    168  * radeon_fence_check_signaled - callback from fence_queue
    169  *
    170  * this function is called with fence_queue lock held, which is also used
    171  * for the fence locking itself, so unlocked variants are used for
    172  * fence_signal, and remove_wait_queue.
    173  */
    174 static int radeon_fence_check_signaled(struct radeon_fence *fence)
    175 {
    176 	u64 seq;
    177 
    178 	BUG_ON(!spin_is_locked(&fence->rdev->fence_lock));
    179 
    180 	/*
    181 	 * We cannot use radeon_fence_process here because we're already
    182 	 * in the waitqueue, in a call from wake_up_all.
    183 	 */
    184 	seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
    185 	if (seq >= fence->seq) {
    186 		int ret = dma_fence_signal_locked(&fence->base);
    187 
    188 		if (!ret)
    189 			DMA_FENCE_TRACE(&fence->base, "signaled from irq context\n");
    190 		else
    191 			DMA_FENCE_TRACE(&fence->base, "was already signaled\n");
    192 
    193 		radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
    194 		TAILQ_REMOVE(&fence->rdev->fence_check, fence, fence_check);
    195 		dma_fence_put(&fence->base);
    196 	} else
    197 		DMA_FENCE_TRACE(&fence->base, "pending\n");
    198 	return 0;
    199 }
    200 
    201 void
    202 radeon_fence_wakeup_locked(struct radeon_device *rdev)
    203 {
    204 	struct radeon_fence *fence, *next;
    205 
    206 	BUG_ON(!spin_is_locked(&rdev->fence_lock));
    207 	DRM_SPIN_WAKEUP_ALL(&rdev->fence_queue, &rdev->fence_lock);
    208 	TAILQ_FOREACH_SAFE(fence, &rdev->fence_check, fence_check, next) {
    209 		radeon_fence_check_signaled(fence);
    210 	}
    211 }
    212 
    213 /**
    214  * radeon_fence_activity - check for fence activity
    215  *
    216  * @rdev: radeon_device pointer
    217  * @ring: ring index the fence is associated with
    218  *
    219  * Checks the current fence value and calculates the last
    220  * signalled fence value. Returns true if activity occured
    221  * on the ring, and the fence_queue should be waken up.
    222  */
    223 static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
    224 {
    225 	uint64_t seq, last_seq, last_emitted;
    226 	unsigned count_loop = 0;
    227 	bool wake = false;
    228 
    229 	BUG_ON(!spin_is_locked(&rdev->fence_lock));
    230 
    231 	/* Note there is a scenario here for an infinite loop but it's
    232 	 * very unlikely to happen. For it to happen, the current polling
    233 	 * process need to be interrupted by another process and another
    234 	 * process needs to update the last_seq btw the atomic read and
    235 	 * xchg of the current process.
    236 	 *
    237 	 * More over for this to go in infinite loop there need to be
    238 	 * continuously new fence signaled ie radeon_fence_read needs
    239 	 * to return a different value each time for both the currently
    240 	 * polling process and the other process that xchg the last_seq
    241 	 * btw atomic read and xchg of the current process. And the
    242 	 * value the other process set as last seq must be higher than
    243 	 * the seq value we just read. Which means that current process
    244 	 * need to be interrupted after radeon_fence_read and before
    245 	 * atomic xchg.
    246 	 *
    247 	 * To be even more safe we count the number of time we loop and
    248 	 * we bail after 10 loop just accepting the fact that we might
    249 	 * have temporarly set the last_seq not to the true real last
    250 	 * seq but to an older one.
    251 	 */
    252 	last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
    253 	do {
    254 		last_emitted = rdev->fence_drv[ring].sync_seq[ring];
    255 		seq = radeon_fence_read(rdev, ring);
    256 		seq |= last_seq & 0xffffffff00000000LL;
    257 		if (seq < last_seq) {
    258 			seq &= 0xffffffff;
    259 			seq |= last_emitted & 0xffffffff00000000LL;
    260 		}
    261 
    262 		if (seq <= last_seq || seq > last_emitted) {
    263 			break;
    264 		}
    265 		/* If we loop over we don't want to return without
    266 		 * checking if a fence is signaled as it means that the
    267 		 * seq we just read is different from the previous on.
    268 		 */
    269 		wake = true;
    270 		last_seq = seq;
    271 		if ((count_loop++) > 10) {
    272 			/* We looped over too many time leave with the
    273 			 * fact that we might have set an older fence
    274 			 * seq then the current real last seq as signaled
    275 			 * by the hw.
    276 			 */
    277 			break;
    278 		}
    279 	} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
    280 
    281 	if (seq < last_emitted)
    282 		radeon_fence_schedule_check(rdev, ring);
    283 
    284 	return wake;
    285 }
    286 
    287 /**
    288  * radeon_fence_check_lockup - check for hardware lockup
    289  *
    290  * @work: delayed work item
    291  *
    292  * Checks for fence activity and if there is none probe
    293  * the hardware if a lockup occured.
    294  */
    295 static void radeon_fence_check_lockup(struct work_struct *work)
    296 {
    297 	struct radeon_fence_driver *fence_drv;
    298 	struct radeon_device *rdev;
    299 	int ring;
    300 
    301 	fence_drv = container_of(work, struct radeon_fence_driver,
    302 				 lockup_work.work);
    303 	rdev = fence_drv->rdev;
    304 	ring = fence_drv - &rdev->fence_drv[0];
    305 
    306 	spin_lock(&rdev->fence_lock);
    307 
    308 	if (!down_read_trylock(&rdev->exclusive_lock)) {
    309 		/* just reschedule the check if a reset is going on */
    310 		radeon_fence_schedule_check(rdev, ring);
    311 		spin_unlock(&rdev->fence_lock);
    312 		return;
    313 	}
    314 
    315 	if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
    316 		unsigned long irqflags;
    317 
    318 		fence_drv->delayed_irq = false;
    319 		spin_lock_irqsave(&rdev->irq.lock, irqflags);
    320 		radeon_irq_set(rdev);
    321 		spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
    322 	}
    323 
    324 	if (radeon_fence_activity(rdev, ring))
    325 		radeon_fence_wakeup_locked(rdev);
    326 
    327 	else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
    328 
    329 		/* good news we believe it's a lockup */
    330 		dev_warn(rdev->dev, "GPU lockup (current fence id "
    331 			 "0x%016"PRIx64" last fence id 0x%016"PRIx64" on ring %d)\n",
    332 			 (uint64_t)atomic64_read(&fence_drv->last_seq),
    333 			 fence_drv->sync_seq[ring], ring);
    334 
    335 		/* remember that we need an reset */
    336 		rdev->needs_reset = true;
    337 		radeon_fence_wakeup_locked(rdev);
    338 	}
    339 	up_read(&rdev->exclusive_lock);
    340 	spin_unlock(&rdev->fence_lock);
    341 }
    342 
    343 /**
    344  * radeon_fence_process - process a fence
    345  *
    346  * @rdev: radeon_device pointer
    347  * @ring: ring index the fence is associated with
    348  *
    349  * Checks the current fence value and wakes the fence queue
    350  * if the sequence number has increased (all asics).
    351  */
    352 static void radeon_fence_process_locked(struct radeon_device *rdev, int ring)
    353 {
    354 	if (radeon_fence_activity(rdev, ring))
    355 		radeon_fence_wakeup_locked(rdev);
    356 }
    357 
    358 void radeon_fence_process(struct radeon_device *rdev, int ring)
    359 {
    360 
    361 	spin_lock(&rdev->fence_lock);
    362 	radeon_fence_process_locked(rdev, ring);
    363 	spin_unlock(&rdev->fence_lock);
    364 }
    365 
    366 /**
    367  * radeon_fence_seq_signaled - check if a fence sequence number has signaled
    368  *
    369  * @rdev: radeon device pointer
    370  * @seq: sequence number
    371  * @ring: ring index the fence is associated with
    372  *
    373  * Check if the last signaled fence sequnce number is >= the requested
    374  * sequence number (all asics).
    375  * Returns true if the fence has signaled (current fence value
    376  * is >= requested value) or false if it has not (current fence
    377  * value is < the requested value.  Helper function for
    378  * radeon_fence_signaled().
    379  */
    380 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
    381 				      u64 seq, unsigned ring)
    382 {
    383 	BUG_ON(!spin_is_locked(&rdev->fence_lock));
    384 	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
    385 		return true;
    386 	}
    387 	/* poll new last sequence at least once */
    388 	radeon_fence_process_locked(rdev, ring);
    389 	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
    390 		return true;
    391 	}
    392 	return false;
    393 }
    394 
    395 static bool radeon_fence_is_signaled(struct dma_fence *f)
    396 {
    397 	struct radeon_fence *fence = to_radeon_fence(f);
    398 	struct radeon_device *rdev = fence->rdev;
    399 	unsigned ring = fence->ring;
    400 	u64 seq = fence->seq;
    401 
    402 	BUG_ON(!spin_is_locked(&rdev->fence_lock));
    403 
    404 	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
    405 		return true;
    406 	}
    407 
    408 	if (down_read_trylock(&rdev->exclusive_lock)) {
    409 		radeon_fence_process_locked(rdev, ring);
    410 		up_read(&rdev->exclusive_lock);
    411 
    412 		if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
    413 			return true;
    414 		}
    415 	}
    416 	return false;
    417 }
    418 
    419 /**
    420  * radeon_fence_enable_signaling - enable signalling on fence
    421  * @fence: fence
    422  *
    423  * This function is called with fence_queue lock held, and adds a callback
    424  * to fence_queue that checks if this fence is signaled, and if so it
    425  * signals the fence and removes itself.
    426  */
    427 static bool radeon_fence_enable_signaling(struct dma_fence *f)
    428 {
    429 	struct radeon_fence *fence = to_radeon_fence(f);
    430 	struct radeon_device *rdev = fence->rdev;
    431 
    432 	BUG_ON(!spin_is_locked(&rdev->fence_lock));
    433 
    434 	if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
    435 		return false;
    436 
    437 	if (down_read_trylock(&rdev->exclusive_lock)) {
    438 		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
    439 
    440 		if (radeon_fence_activity(rdev, fence->ring))
    441 			radeon_fence_wakeup_locked(rdev);
    442 
    443 		/* did fence get signaled after we enabled the sw irq? */
    444 		if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
    445 			radeon_irq_kms_sw_irq_put(rdev, fence->ring);
    446 			up_read(&rdev->exclusive_lock);
    447 			return false;
    448 		}
    449 
    450 		up_read(&rdev->exclusive_lock);
    451 	} else {
    452 		/* we're probably in a lockup, lets not fiddle too much */
    453 		if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
    454 			rdev->fence_drv[fence->ring].delayed_irq = true;
    455 		radeon_fence_schedule_check(rdev, fence->ring);
    456 	}
    457 
    458 	TAILQ_INSERT_TAIL(&rdev->fence_check, fence, fence_check);
    459 	dma_fence_get(f);
    460 
    461 	DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
    462 	return true;
    463 }
    464 
    465 /**
    466  * radeon_fence_signaled - check if a fence has signaled
    467  *
    468  * @fence: radeon fence object
    469  *
    470  * Check if the requested fence has signaled (all asics).
    471  * Returns true if the fence has signaled or false if it has not.
    472  */
    473 bool radeon_fence_signaled(struct radeon_fence *fence)
    474 {
    475 	if (!fence)
    476 		return true;
    477 
    478 	spin_lock(&fence->rdev->fence_lock);
    479 	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
    480 		int ret;
    481 
    482 		ret = dma_fence_signal_locked(&fence->base);
    483 		if (!ret)
    484 			DMA_FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
    485 		spin_unlock(&fence->rdev->fence_lock);
    486 		return true;
    487 	}
    488 	spin_unlock(&fence->rdev->fence_lock);
    489 	return false;
    490 }
    491 
    492 /**
    493  * radeon_fence_any_seq_signaled - check if any sequence number is signaled
    494  *
    495  * @rdev: radeon device pointer
    496  * @seq: sequence numbers
    497  *
    498  * Check if the last signaled fence sequnce number is >= the requested
    499  * sequence number (all asics).
    500  * Returns true if any has signaled (current value is >= requested value)
    501  * or false if it has not. Helper function for radeon_fence_wait_seq.
    502  */
    503 static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
    504 {
    505 	unsigned i;
    506 
    507 	BUG_ON(!spin_is_locked(&rdev->fence_lock));
    508 
    509 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    510 		if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
    511 			return true;
    512 	}
    513 	return false;
    514 }
    515 
    516 /**
    517  * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
    518  *
    519  * @rdev: radeon device pointer
    520  * @target_seq: sequence number(s) we want to wait for
    521  * @intr: use interruptable sleep
    522  * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
    523  *
    524  * Wait for the requested sequence number(s) to be written by any ring
    525  * (all asics).  Sequnce number array is indexed by ring id.
    526  * @intr selects whether to use interruptable (true) or non-interruptable
    527  * (false) sleep when waiting for the sequence number.  Helper function
    528  * for radeon_fence_wait_*().
    529  * Returns remaining time if the sequence number has passed, 0 when
    530  * the wait timeout, or an error for all other cases.
    531  * -EDEADLK is returned when a GPU lockup has been detected.
    532  */
    533 static long radeon_fence_wait_seq_timeout_locked(struct radeon_device *rdev,
    534 					  u64 *target_seq, bool intr,
    535 					  long timeout)
    536 {
    537 	long r;
    538 	int i;
    539 
    540 	if (radeon_fence_any_seq_signaled(rdev, target_seq))
    541 		return timeout;
    542 
    543 	/* enable IRQs and tracing */
    544 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    545 		if (!target_seq[i])
    546 			continue;
    547 
    548 		trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
    549 		radeon_irq_kms_sw_irq_get(rdev, i);
    550 	}
    551 
    552 	if (intr)
    553 		DRM_SPIN_TIMED_WAIT_UNTIL(r, &rdev->fence_queue,
    554 		    &rdev->fence_lock, timeout,
    555 		    (radeon_fence_any_seq_signaled(rdev, target_seq)
    556 			|| rdev->needs_reset));
    557 	else
    558 		DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(r, &rdev->fence_queue,
    559 		    &rdev->fence_lock, timeout,
    560 		    (radeon_fence_any_seq_signaled(rdev, target_seq)
    561 			|| rdev->needs_reset));
    562 
    563 	if (rdev->needs_reset)
    564 		r = -EDEADLK;
    565 
    566 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    567 		if (!target_seq[i])
    568 			continue;
    569 
    570 		radeon_irq_kms_sw_irq_put(rdev, i);
    571 		trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
    572 	}
    573 
    574 	return r;
    575 }
    576 
    577 static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
    578     u64 *target_seq, bool intr, long timo)
    579 {
    580 	long r;
    581 
    582 	spin_lock(&rdev->fence_lock);
    583 	r = radeon_fence_wait_seq_timeout_locked(rdev, target_seq, intr, timo);
    584 	spin_unlock(&rdev->fence_lock);
    585 
    586 	return r;
    587 }
    588 
    589 /**
    590  * radeon_fence_wait_timeout - wait for a fence to signal with timeout
    591  *
    592  * @fence: radeon fence object
    593  * @intr: use interruptible sleep
    594  *
    595  * Wait for the requested fence to signal (all asics).
    596  * @intr selects whether to use interruptable (true) or non-interruptable
    597  * (false) sleep when waiting for the fence.
    598  * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
    599  * Returns remaining time if the sequence number has passed, 0 when
    600  * the wait timeout, or an error for all other cases.
    601  */
    602 long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout)
    603 {
    604 	uint64_t seq[RADEON_NUM_RINGS] = {};
    605 	long r;
    606 	int r_sig;
    607 
    608 	/*
    609 	 * This function should not be called on !radeon fences.
    610 	 * If this is the case, it would mean this function can
    611 	 * also be called on radeon fences belonging to another card.
    612 	 * exclusive_lock is not held in that case.
    613 	 */
    614 	if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
    615 		return dma_fence_wait(&fence->base, intr);
    616 
    617 	seq[fence->ring] = fence->seq;
    618 	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout);
    619 	if (r <= 0) {
    620 		return r;
    621 	}
    622 
    623 	r_sig = dma_fence_signal(&fence->base);
    624 	if (!r_sig)
    625 		DMA_FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
    626 	return r;
    627 }
    628 
    629 /**
    630  * radeon_fence_wait - wait for a fence to signal
    631  *
    632  * @fence: radeon fence object
    633  * @intr: use interruptible sleep
    634  *
    635  * Wait for the requested fence to signal (all asics).
    636  * @intr selects whether to use interruptable (true) or non-interruptable
    637  * (false) sleep when waiting for the fence.
    638  * Returns 0 if the fence has passed, error for all other cases.
    639  */
    640 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
    641 {
    642 	long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
    643 	if (r > 0) {
    644 		return 0;
    645 	} else {
    646 		return r;
    647 	}
    648 }
    649 
    650 /**
    651  * radeon_fence_wait_any - wait for a fence to signal on any ring
    652  *
    653  * @rdev: radeon device pointer
    654  * @fences: radeon fence object(s)
    655  * @intr: use interruptable sleep
    656  *
    657  * Wait for any requested fence to signal (all asics).  Fence
    658  * array is indexed by ring id.  @intr selects whether to use
    659  * interruptable (true) or non-interruptable (false) sleep when
    660  * waiting for the fences. Used by the suballocator.
    661  * Returns 0 if any fence has passed, error for all other cases.
    662  */
    663 int radeon_fence_wait_any(struct radeon_device *rdev,
    664 			  struct radeon_fence **fences,
    665 			  bool intr)
    666 {
    667 	uint64_t seq[RADEON_NUM_RINGS];
    668 	unsigned i, num_rings = 0;
    669 	long r;
    670 
    671 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    672 		seq[i] = 0;
    673 
    674 		if (!fences[i]) {
    675 			continue;
    676 		}
    677 
    678 		seq[i] = fences[i]->seq;
    679 		++num_rings;
    680 	}
    681 
    682 	/* nothing to wait for ? */
    683 	if (num_rings == 0)
    684 		return -ENOENT;
    685 
    686 	r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
    687 	if (r < 0) {
    688 		return r;
    689 	}
    690 	return 0;
    691 }
    692 
    693 /**
    694  * radeon_fence_wait_next - wait for the next fence to signal
    695  *
    696  * @rdev: radeon device pointer
    697  * @ring: ring index the fence is associated with
    698  *
    699  * Wait for the next fence on the requested ring to signal (all asics).
    700  * Returns 0 if the next fence has passed, error for all other cases.
    701  * Caller must hold ring lock.
    702  */
    703 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
    704 {
    705 	uint64_t seq[RADEON_NUM_RINGS] = {};
    706 	long r;
    707 
    708 	seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
    709 	if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
    710 		/* nothing to wait for, last_seq is
    711 		   already the last emited fence */
    712 		return -ENOENT;
    713 	}
    714 	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
    715 	if (r < 0)
    716 		return r;
    717 	return 0;
    718 }
    719 
    720 /**
    721  * radeon_fence_wait_empty - wait for all fences to signal
    722  *
    723  * @rdev: radeon device pointer
    724  * @ring: ring index the fence is associated with
    725  *
    726  * Wait for all fences on the requested ring to signal (all asics).
    727  * Returns 0 if the fences have passed, error for all other cases.
    728  * Caller must hold ring lock.
    729  */
    730 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
    731 {
    732 	uint64_t seq[RADEON_NUM_RINGS] = {};
    733 	long r;
    734 
    735 	seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
    736 	if (!seq[ring])
    737 		return 0;
    738 
    739 	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
    740 	if (r < 0) {
    741 		if (r == -EDEADLK)
    742 			return -EDEADLK;
    743 
    744 		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
    745 			ring, r);
    746 	}
    747 	return 0;
    748 }
    749 
    750 /**
    751  * radeon_fence_ref - take a ref on a fence
    752  *
    753  * @fence: radeon fence object
    754  *
    755  * Take a reference on a fence (all asics).
    756  * Returns the fence.
    757  */
    758 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
    759 {
    760 	dma_fence_get(&fence->base);
    761 	return fence;
    762 }
    763 
    764 /**
    765  * radeon_fence_unref - remove a ref on a fence
    766  *
    767  * @fence: radeon fence object
    768  *
    769  * Remove a reference on a fence (all asics).
    770  */
    771 void radeon_fence_unref(struct radeon_fence **fence)
    772 {
    773 	struct radeon_fence *tmp = *fence;
    774 
    775 	*fence = NULL;
    776 	if (tmp) {
    777 		dma_fence_put(&tmp->base);
    778 	}
    779 }
    780 
    781 /**
    782  * radeon_fence_count_emitted - get the count of emitted fences
    783  *
    784  * @rdev: radeon device pointer
    785  * @ring: ring index the fence is associated with
    786  *
    787  * Get the number of fences emitted on the requested ring (all asics).
    788  * Returns the number of emitted fences on the ring.  Used by the
    789  * dynpm code to ring track activity.
    790  */
    791 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
    792 {
    793 	uint64_t emitted;
    794 
    795 	/* We are not protected by ring lock when reading the last sequence
    796 	 * but it's ok to report slightly wrong fence count here.
    797 	 */
    798 	radeon_fence_process(rdev, ring);
    799 	emitted = rdev->fence_drv[ring].sync_seq[ring]
    800 		- atomic64_read(&rdev->fence_drv[ring].last_seq);
    801 	/* to avoid 32bits warp around */
    802 	if (emitted > 0x10000000) {
    803 		emitted = 0x10000000;
    804 	}
    805 	return (unsigned)emitted;
    806 }
    807 
    808 /**
    809  * radeon_fence_need_sync - do we need a semaphore
    810  *
    811  * @fence: radeon fence object
    812  * @dst_ring: which ring to check against
    813  *
    814  * Check if the fence needs to be synced against another ring
    815  * (all asics).  If so, we need to emit a semaphore.
    816  * Returns true if we need to sync with another ring, false if
    817  * not.
    818  */
    819 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
    820 {
    821 	struct radeon_fence_driver *fdrv;
    822 
    823 	if (!fence) {
    824 		return false;
    825 	}
    826 
    827 	if (fence->ring == dst_ring) {
    828 		return false;
    829 	}
    830 
    831 	/* we are protected by the ring mutex */
    832 	fdrv = &fence->rdev->fence_drv[dst_ring];
    833 	if (fence->seq <= fdrv->sync_seq[fence->ring]) {
    834 		return false;
    835 	}
    836 
    837 	return true;
    838 }
    839 
    840 /**
    841  * radeon_fence_note_sync - record the sync point
    842  *
    843  * @fence: radeon fence object
    844  * @dst_ring: which ring to check against
    845  *
    846  * Note the sequence number at which point the fence will
    847  * be synced with the requested ring (all asics).
    848  */
    849 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
    850 {
    851 	struct radeon_fence_driver *dst, *src;
    852 	unsigned i;
    853 
    854 	if (!fence) {
    855 		return;
    856 	}
    857 
    858 	if (fence->ring == dst_ring) {
    859 		return;
    860 	}
    861 
    862 	/* we are protected by the ring mutex */
    863 	src = &fence->rdev->fence_drv[fence->ring];
    864 	dst = &fence->rdev->fence_drv[dst_ring];
    865 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    866 		if (i == dst_ring) {
    867 			continue;
    868 		}
    869 		dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
    870 	}
    871 }
    872 
    873 /**
    874  * radeon_fence_driver_start_ring - make the fence driver
    875  * ready for use on the requested ring.
    876  *
    877  * @rdev: radeon device pointer
    878  * @ring: ring index to start the fence driver on
    879  *
    880  * Make the fence driver ready for processing (all asics).
    881  * Not all asics have all rings, so each asic will only
    882  * start the fence driver on the rings it has.
    883  * Returns 0 for success, errors for failure.
    884  */
    885 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
    886 {
    887 	uint64_t index;
    888 	int r;
    889 
    890 	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
    891 	if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
    892 		rdev->fence_drv[ring].scratch_reg = 0;
    893 		if (ring != R600_RING_TYPE_UVD_INDEX) {
    894 			index = R600_WB_EVENT_OFFSET + ring * 4;
    895 			rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
    896 			rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
    897 							 index;
    898 
    899 		} else {
    900 			/* put fence directly behind firmware */
    901 			index = ALIGN(rdev->uvd_fw->size, 8);
    902 			rdev->fence_drv[ring].cpu_addr = (uint32_t *)((uint8_t *)rdev->uvd.cpu_addr + index);
    903 			rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
    904 		}
    905 
    906 	} else {
    907 		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
    908 		if (r) {
    909 			dev_err(rdev->dev, "fence failed to get scratch register\n");
    910 			return r;
    911 		}
    912 		index = RADEON_WB_SCRATCH_OFFSET +
    913 			rdev->fence_drv[ring].scratch_reg -
    914 			rdev->scratch.reg_base;
    915 		rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
    916 		rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
    917 	}
    918 	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
    919 	rdev->fence_drv[ring].initialized = true;
    920 	dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016"PRIx64" and cpu addr 0x%p\n",
    921 		 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
    922 	return 0;
    923 }
    924 
    925 /**
    926  * radeon_fence_driver_init_ring - init the fence driver
    927  * for the requested ring.
    928  *
    929  * @rdev: radeon device pointer
    930  * @ring: ring index to start the fence driver on
    931  *
    932  * Init the fence driver for the requested ring (all asics).
    933  * Helper function for radeon_fence_driver_init().
    934  */
    935 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
    936 {
    937 	int i;
    938 
    939 	rdev->fence_drv[ring].scratch_reg = -1;
    940 	rdev->fence_drv[ring].cpu_addr = NULL;
    941 	rdev->fence_drv[ring].gpu_addr = 0;
    942 	for (i = 0; i < RADEON_NUM_RINGS; ++i)
    943 		rdev->fence_drv[ring].sync_seq[i] = 0;
    944 	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
    945 	rdev->fence_drv[ring].initialized = false;
    946 	INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
    947 			  radeon_fence_check_lockup);
    948 	rdev->fence_drv[ring].rdev = rdev;
    949 }
    950 
    951 /**
    952  * radeon_fence_driver_init - init the fence driver
    953  * for all possible rings.
    954  *
    955  * @rdev: radeon device pointer
    956  *
    957  * Init the fence driver for all possible rings (all asics).
    958  * Not all asics have all rings, so each asic will only
    959  * start the fence driver on the rings it has using
    960  * radeon_fence_driver_start_ring().
    961  * Returns 0 for success.
    962  */
    963 int radeon_fence_driver_init(struct radeon_device *rdev)
    964 {
    965 	int ring;
    966 
    967 	spin_lock_init(&rdev->fence_lock);
    968 	DRM_INIT_WAITQUEUE(&rdev->fence_queue, "radfence");
    969 	TAILQ_INIT(&rdev->fence_check);
    970 	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
    971 		radeon_fence_driver_init_ring(rdev, ring);
    972 	}
    973 	if (radeon_debugfs_fence_init(rdev)) {
    974 		dev_err(rdev->dev, "fence debugfs file creation failed\n");
    975 	}
    976 	return 0;
    977 }
    978 
    979 /**
    980  * radeon_fence_driver_fini - tear down the fence driver
    981  * for all possible rings.
    982  *
    983  * @rdev: radeon device pointer
    984  *
    985  * Tear down the fence driver for all possible rings (all asics).
    986  */
    987 void radeon_fence_driver_fini(struct radeon_device *rdev)
    988 {
    989 	int ring, r;
    990 
    991 	mutex_lock(&rdev->ring_lock);
    992 	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
    993 		if (!rdev->fence_drv[ring].initialized)
    994 			continue;
    995 		r = radeon_fence_wait_empty(rdev, ring);
    996 		if (r) {
    997 			/* no need to trigger GPU reset as we are unloading */
    998 			radeon_fence_driver_force_completion(rdev, ring);
    999 		}
   1000 		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
   1001 		spin_lock(&rdev->fence_lock);
   1002 		radeon_fence_wakeup_locked(rdev);
   1003 		spin_unlock(&rdev->fence_lock);
   1004 		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
   1005 		rdev->fence_drv[ring].initialized = false;
   1006 	}
   1007 	mutex_unlock(&rdev->ring_lock);
   1008 
   1009 	BUG_ON(!TAILQ_EMPTY(&rdev->fence_check));
   1010 	DRM_DESTROY_WAITQUEUE(&rdev->fence_queue);
   1011 	spin_lock_destroy(&rdev->fence_lock);
   1012 }
   1013 
   1014 /**
   1015  * radeon_fence_driver_force_completion - force all fence waiter to complete
   1016  *
   1017  * @rdev: radeon device pointer
   1018  * @ring: the ring to complete
   1019  *
   1020  * In case of GPU reset failure make sure no process keep waiting on fence
   1021  * that will never complete.
   1022  */
   1023 void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
   1024 {
   1025 	if (rdev->fence_drv[ring].initialized) {
   1026 		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
   1027 		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
   1028 	}
   1029 }
   1030 
   1031 
   1032 /*
   1033  * Fence debugfs
   1034  */
   1035 #if defined(CONFIG_DEBUG_FS)
   1036 static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
   1037 {
   1038 	struct drm_info_node *node = (struct drm_info_node *)m->private;
   1039 	struct drm_device *dev = node->minor->dev;
   1040 	struct radeon_device *rdev = dev->dev_private;
   1041 	int i, j;
   1042 
   1043 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
   1044 		if (!rdev->fence_drv[i].initialized)
   1045 			continue;
   1046 
   1047 		radeon_fence_process(rdev, i);
   1048 
   1049 		seq_printf(m, "--- ring %d ---\n", i);
   1050 		seq_printf(m, "Last signaled fence 0x%016llx\n",
   1051 			   (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
   1052 		seq_printf(m, "Last emitted        0x%016"PRIx64"\n",
   1053 			   rdev->fence_drv[i].sync_seq[i]);
   1054 
   1055 		for (j = 0; j < RADEON_NUM_RINGS; ++j) {
   1056 			if (i != j && rdev->fence_drv[j].initialized)
   1057 				seq_printf(m, "Last sync to ring %d 0x%016"PRIx64"\n",
   1058 					   j, rdev->fence_drv[i].sync_seq[j]);
   1059 		}
   1060 	}
   1061 	return 0;
   1062 }
   1063 
   1064 /**
   1065  * radeon_debugfs_gpu_reset - manually trigger a gpu reset
   1066  *
   1067  * Manually trigger a gpu reset at the next fence wait.
   1068  */
   1069 static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
   1070 {
   1071 	struct drm_info_node *node = (struct drm_info_node *) m->private;
   1072 	struct drm_device *dev = node->minor->dev;
   1073 	struct radeon_device *rdev = dev->dev_private;
   1074 
   1075 	down_read(&rdev->exclusive_lock);
   1076 	seq_printf(m, "%d\n", rdev->needs_reset);
   1077 	rdev->needs_reset = true;
   1078 	wake_up_all(&rdev->fence_queue);
   1079 	up_read(&rdev->exclusive_lock);
   1080 
   1081 	return 0;
   1082 }
   1083 
   1084 static struct drm_info_list radeon_debugfs_fence_list[] = {
   1085 	{"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
   1086 	{"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
   1087 };
   1088 #endif
   1089 
   1090 int radeon_debugfs_fence_init(struct radeon_device *rdev)
   1091 {
   1092 #if defined(CONFIG_DEBUG_FS)
   1093 	return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
   1094 #else
   1095 	return 0;
   1096 #endif
   1097 }
   1098 
   1099 static const char *radeon_fence_get_driver_name(struct dma_fence *fence)
   1100 {
   1101 	return "radeon";
   1102 }
   1103 
   1104 static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
   1105 {
   1106 	struct radeon_fence *fence = to_radeon_fence(f);
   1107 	switch (fence->ring) {
   1108 	case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
   1109 	case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
   1110 	case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
   1111 	case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
   1112 	case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
   1113 	case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
   1114 	case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
   1115 	case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
   1116 	default: WARN_ON_ONCE(1); return "radeon.unk";
   1117 	}
   1118 }
   1119 
   1120 static inline bool radeon_test_signaled(struct radeon_fence *fence)
   1121 {
   1122 	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
   1123 }
   1124 
   1125 struct radeon_wait_cb {
   1126 	struct dma_fence_cb base;
   1127 };
   1128 
   1129 static void
   1130 radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
   1131 {
   1132 	struct radeon_fence *rfence = to_radeon_fence(fence);
   1133 	struct radeon_device *rdev = rfence->rdev;
   1134 
   1135 	BUG_ON(!spin_is_locked(&rdev->fence_lock));
   1136 	DRM_SPIN_WAKEUP_ALL(&rdev->fence_queue, &rdev->fence_lock);
   1137 }
   1138 
   1139 static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
   1140 					     signed long t)
   1141 {
   1142 	struct radeon_fence *fence = to_radeon_fence(f);
   1143 	struct radeon_device *rdev = fence->rdev;
   1144 	struct radeon_wait_cb cb;
   1145 	int r;
   1146 
   1147 	if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
   1148 		return t;
   1149 
   1150 	spin_lock(&rdev->fence_lock);
   1151 	if (intr) {
   1152 		DRM_SPIN_TIMED_WAIT_UNTIL(r, &rdev->fence_queue,
   1153 		    &rdev->fence_lock, t,
   1154 		    radeon_test_signaled(fence));
   1155 	} else {
   1156 		DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(r, &rdev->fence_queue,
   1157 		    &rdev->fence_lock, t,
   1158 		    radeon_test_signaled(fence));
   1159 	}
   1160 	spin_unlock(&rdev->fence_lock);
   1161 
   1162 	dma_fence_remove_callback(f, &cb.base);
   1163 
   1164 	return r;
   1165 }
   1166 
   1167 const struct dma_fence_ops radeon_fence_ops = {
   1168 	.get_driver_name = radeon_fence_get_driver_name,
   1169 	.get_timeline_name = radeon_fence_get_timeline_name,
   1170 	.enable_signaling = radeon_fence_enable_signaling,
   1171 	.signaled = radeon_fence_is_signaled,
   1172 	.wait = radeon_fence_default_wait,
   1173 	.release = NULL,
   1174 };
   1175