Home | History | Annotate | Line # | Download | only in radeon
radeon_fence.c revision 1.1.1.2
      1 /*	$NetBSD: radeon_fence.c,v 1.1.1.2 2018/08/27 01:34:58 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2009 Jerome Glisse.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * The above copyright notice and this permission notice (including the
     24  * next paragraph) shall be included in all copies or substantial portions
     25  * of the Software.
     26  *
     27  */
     28 /*
     29  * Authors:
     30  *    Jerome Glisse <glisse (at) freedesktop.org>
     31  *    Dave Airlie
     32  */
     33 #include <sys/cdefs.h>
     34 __KERNEL_RCSID(0, "$NetBSD: radeon_fence.c,v 1.1.1.2 2018/08/27 01:34:58 riastradh Exp $");
     35 
     36 #include <linux/seq_file.h>
     37 #include <linux/atomic.h>
     38 #include <linux/wait.h>
     39 #include <linux/kref.h>
     40 #include <linux/slab.h>
     41 #include <linux/firmware.h>
     42 #include <drm/drmP.h>
     43 #include "radeon_reg.h"
     44 #include "radeon.h"
     45 #include "radeon_trace.h"
     46 
     47 /*
     48  * Fences
     49  * Fences mark an event in the GPUs pipeline and are used
     50  * for GPU/CPU synchronization.  When the fence is written,
     51  * it is expected that all buffers associated with that fence
     52  * are no longer in use by the associated ring on the GPU and
     53  * that the the relevant GPU caches have been flushed.  Whether
     54  * we use a scratch register or memory location depends on the asic
     55  * and whether writeback is enabled.
     56  */
     57 
     58 /**
     59  * radeon_fence_write - write a fence value
     60  *
     61  * @rdev: radeon_device pointer
     62  * @seq: sequence number to write
     63  * @ring: ring index the fence is associated with
     64  *
     65  * Writes a fence value to memory or a scratch register (all asics).
     66  */
     67 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
     68 {
     69 	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
     70 	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
     71 		if (drv->cpu_addr) {
     72 			*drv->cpu_addr = cpu_to_le32(seq);
     73 		}
     74 	} else {
     75 		WREG32(drv->scratch_reg, seq);
     76 	}
     77 }
     78 
     79 /**
     80  * radeon_fence_read - read a fence value
     81  *
     82  * @rdev: radeon_device pointer
     83  * @ring: ring index the fence is associated with
     84  *
     85  * Reads a fence value from memory or a scratch register (all asics).
     86  * Returns the value of the fence read from memory or register.
     87  */
     88 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
     89 {
     90 	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
     91 	u32 seq = 0;
     92 
     93 	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
     94 		if (drv->cpu_addr) {
     95 			seq = le32_to_cpu(*drv->cpu_addr);
     96 		} else {
     97 			seq = lower_32_bits(atomic64_read(&drv->last_seq));
     98 		}
     99 	} else {
    100 		seq = RREG32(drv->scratch_reg);
    101 	}
    102 	return seq;
    103 }
    104 
    105 /**
    106  * radeon_fence_schedule_check - schedule lockup check
    107  *
    108  * @rdev: radeon_device pointer
    109  * @ring: ring index we should work with
    110  *
    111  * Queues a delayed work item to check for lockups.
    112  */
    113 static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
    114 {
    115 	/*
    116 	 * Do not reset the timer here with mod_delayed_work,
    117 	 * this can livelock in an interaction with TTM delayed destroy.
    118 	 */
    119 	queue_delayed_work(system_power_efficient_wq,
    120 			   &rdev->fence_drv[ring].lockup_work,
    121 			   RADEON_FENCE_JIFFIES_TIMEOUT);
    122 }
    123 
    124 /**
    125  * radeon_fence_emit - emit a fence on the requested ring
    126  *
    127  * @rdev: radeon_device pointer
    128  * @fence: radeon fence object
    129  * @ring: ring index the fence is associated with
    130  *
    131  * Emits a fence command on the requested ring (all asics).
    132  * Returns 0 on success, -ENOMEM on failure.
    133  */
    134 int radeon_fence_emit(struct radeon_device *rdev,
    135 		      struct radeon_fence **fence,
    136 		      int ring)
    137 {
    138 	u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
    139 
    140 	/* we are protected by the ring emission mutex */
    141 	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
    142 	if ((*fence) == NULL) {
    143 		return -ENOMEM;
    144 	}
    145 	(*fence)->rdev = rdev;
    146 	(*fence)->seq = seq;
    147 	(*fence)->ring = ring;
    148 	(*fence)->is_vm_update = false;
    149 	fence_init(&(*fence)->base, &radeon_fence_ops,
    150 		   &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
    151 	radeon_fence_ring_emit(rdev, ring, *fence);
    152 	trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
    153 	radeon_fence_schedule_check(rdev, ring);
    154 	return 0;
    155 }
    156 
    157 /**
    158  * radeon_fence_check_signaled - callback from fence_queue
    159  *
    160  * this function is called with fence_queue lock held, which is also used
    161  * for the fence locking itself, so unlocked variants are used for
    162  * fence_signal, and remove_wait_queue.
    163  */
    164 static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
    165 {
    166 	struct radeon_fence *fence;
    167 	u64 seq;
    168 
    169 	fence = container_of(wait, struct radeon_fence, fence_wake);
    170 
    171 	/*
    172 	 * We cannot use radeon_fence_process here because we're already
    173 	 * in the waitqueue, in a call from wake_up_all.
    174 	 */
    175 	seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
    176 	if (seq >= fence->seq) {
    177 		int ret = fence_signal_locked(&fence->base);
    178 
    179 		if (!ret)
    180 			FENCE_TRACE(&fence->base, "signaled from irq context\n");
    181 		else
    182 			FENCE_TRACE(&fence->base, "was already signaled\n");
    183 
    184 		radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
    185 		__remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
    186 		fence_put(&fence->base);
    187 	} else
    188 		FENCE_TRACE(&fence->base, "pending\n");
    189 	return 0;
    190 }
    191 
    192 /**
    193  * radeon_fence_activity - check for fence activity
    194  *
    195  * @rdev: radeon_device pointer
    196  * @ring: ring index the fence is associated with
    197  *
    198  * Checks the current fence value and calculates the last
    199  * signalled fence value. Returns true if activity occured
    200  * on the ring, and the fence_queue should be waken up.
    201  */
    202 static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
    203 {
    204 	uint64_t seq, last_seq, last_emitted;
    205 	unsigned count_loop = 0;
    206 	bool wake = false;
    207 
    208 	/* Note there is a scenario here for an infinite loop but it's
    209 	 * very unlikely to happen. For it to happen, the current polling
    210 	 * process need to be interrupted by another process and another
    211 	 * process needs to update the last_seq btw the atomic read and
    212 	 * xchg of the current process.
    213 	 *
    214 	 * More over for this to go in infinite loop there need to be
    215 	 * continuously new fence signaled ie radeon_fence_read needs
    216 	 * to return a different value each time for both the currently
    217 	 * polling process and the other process that xchg the last_seq
    218 	 * btw atomic read and xchg of the current process. And the
    219 	 * value the other process set as last seq must be higher than
    220 	 * the seq value we just read. Which means that current process
    221 	 * need to be interrupted after radeon_fence_read and before
    222 	 * atomic xchg.
    223 	 *
    224 	 * To be even more safe we count the number of time we loop and
    225 	 * we bail after 10 loop just accepting the fact that we might
    226 	 * have temporarly set the last_seq not to the true real last
    227 	 * seq but to an older one.
    228 	 */
    229 	last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
    230 	do {
    231 		last_emitted = rdev->fence_drv[ring].sync_seq[ring];
    232 		seq = radeon_fence_read(rdev, ring);
    233 		seq |= last_seq & 0xffffffff00000000LL;
    234 		if (seq < last_seq) {
    235 			seq &= 0xffffffff;
    236 			seq |= last_emitted & 0xffffffff00000000LL;
    237 		}
    238 
    239 		if (seq <= last_seq || seq > last_emitted) {
    240 			break;
    241 		}
    242 		/* If we loop over we don't want to return without
    243 		 * checking if a fence is signaled as it means that the
    244 		 * seq we just read is different from the previous on.
    245 		 */
    246 		wake = true;
    247 		last_seq = seq;
    248 		if ((count_loop++) > 10) {
    249 			/* We looped over too many time leave with the
    250 			 * fact that we might have set an older fence
    251 			 * seq then the current real last seq as signaled
    252 			 * by the hw.
    253 			 */
    254 			break;
    255 		}
    256 	} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
    257 
    258 	if (seq < last_emitted)
    259 		radeon_fence_schedule_check(rdev, ring);
    260 
    261 	return wake;
    262 }
    263 
    264 /**
    265  * radeon_fence_check_lockup - check for hardware lockup
    266  *
    267  * @work: delayed work item
    268  *
    269  * Checks for fence activity and if there is none probe
    270  * the hardware if a lockup occured.
    271  */
    272 static void radeon_fence_check_lockup(struct work_struct *work)
    273 {
    274 	struct radeon_fence_driver *fence_drv;
    275 	struct radeon_device *rdev;
    276 	int ring;
    277 
    278 	fence_drv = container_of(work, struct radeon_fence_driver,
    279 				 lockup_work.work);
    280 	rdev = fence_drv->rdev;
    281 	ring = fence_drv - &rdev->fence_drv[0];
    282 
    283 	if (!down_read_trylock(&rdev->exclusive_lock)) {
    284 		/* just reschedule the check if a reset is going on */
    285 		radeon_fence_schedule_check(rdev, ring);
    286 		return;
    287 	}
    288 
    289 	if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
    290 		unsigned long irqflags;
    291 
    292 		fence_drv->delayed_irq = false;
    293 		spin_lock_irqsave(&rdev->irq.lock, irqflags);
    294 		radeon_irq_set(rdev);
    295 		spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
    296 	}
    297 
    298 	if (radeon_fence_activity(rdev, ring))
    299 		wake_up_all(&rdev->fence_queue);
    300 
    301 	else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
    302 
    303 		/* good news we believe it's a lockup */
    304 		dev_warn(rdev->dev, "GPU lockup (current fence id "
    305 			 "0x%016llx last fence id 0x%016llx on ring %d)\n",
    306 			 (uint64_t)atomic64_read(&fence_drv->last_seq),
    307 			 fence_drv->sync_seq[ring], ring);
    308 
    309 		/* remember that we need an reset */
    310 		rdev->needs_reset = true;
    311 		wake_up_all(&rdev->fence_queue);
    312 	}
    313 	up_read(&rdev->exclusive_lock);
    314 }
    315 
    316 /**
    317  * radeon_fence_process - process a fence
    318  *
    319  * @rdev: radeon_device pointer
    320  * @ring: ring index the fence is associated with
    321  *
    322  * Checks the current fence value and wakes the fence queue
    323  * if the sequence number has increased (all asics).
    324  */
    325 void radeon_fence_process(struct radeon_device *rdev, int ring)
    326 {
    327 	if (radeon_fence_activity(rdev, ring))
    328 		wake_up_all(&rdev->fence_queue);
    329 }
    330 
    331 /**
    332  * radeon_fence_seq_signaled - check if a fence sequence number has signaled
    333  *
    334  * @rdev: radeon device pointer
    335  * @seq: sequence number
    336  * @ring: ring index the fence is associated with
    337  *
    338  * Check if the last signaled fence sequnce number is >= the requested
    339  * sequence number (all asics).
    340  * Returns true if the fence has signaled (current fence value
    341  * is >= requested value) or false if it has not (current fence
    342  * value is < the requested value.  Helper function for
    343  * radeon_fence_signaled().
    344  */
    345 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
    346 				      u64 seq, unsigned ring)
    347 {
    348 	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
    349 		return true;
    350 	}
    351 	/* poll new last sequence at least once */
    352 	radeon_fence_process(rdev, ring);
    353 	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
    354 		return true;
    355 	}
    356 	return false;
    357 }
    358 
    359 static bool radeon_fence_is_signaled(struct fence *f)
    360 {
    361 	struct radeon_fence *fence = to_radeon_fence(f);
    362 	struct radeon_device *rdev = fence->rdev;
    363 	unsigned ring = fence->ring;
    364 	u64 seq = fence->seq;
    365 
    366 	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
    367 		return true;
    368 	}
    369 
    370 	if (down_read_trylock(&rdev->exclusive_lock)) {
    371 		radeon_fence_process(rdev, ring);
    372 		up_read(&rdev->exclusive_lock);
    373 
    374 		if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
    375 			return true;
    376 		}
    377 	}
    378 	return false;
    379 }
    380 
    381 /**
    382  * radeon_fence_enable_signaling - enable signalling on fence
    383  * @fence: fence
    384  *
    385  * This function is called with fence_queue lock held, and adds a callback
    386  * to fence_queue that checks if this fence is signaled, and if so it
    387  * signals the fence and removes itself.
    388  */
    389 static bool radeon_fence_enable_signaling(struct fence *f)
    390 {
    391 	struct radeon_fence *fence = to_radeon_fence(f);
    392 	struct radeon_device *rdev = fence->rdev;
    393 
    394 	if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
    395 		return false;
    396 
    397 	if (down_read_trylock(&rdev->exclusive_lock)) {
    398 		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
    399 
    400 		if (radeon_fence_activity(rdev, fence->ring))
    401 			wake_up_all_locked(&rdev->fence_queue);
    402 
    403 		/* did fence get signaled after we enabled the sw irq? */
    404 		if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
    405 			radeon_irq_kms_sw_irq_put(rdev, fence->ring);
    406 			up_read(&rdev->exclusive_lock);
    407 			return false;
    408 		}
    409 
    410 		up_read(&rdev->exclusive_lock);
    411 	} else {
    412 		/* we're probably in a lockup, lets not fiddle too much */
    413 		if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
    414 			rdev->fence_drv[fence->ring].delayed_irq = true;
    415 		radeon_fence_schedule_check(rdev, fence->ring);
    416 	}
    417 
    418 	fence->fence_wake.flags = 0;
    419 	fence->fence_wake.private = NULL;
    420 	fence->fence_wake.func = radeon_fence_check_signaled;
    421 	__add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
    422 	fence_get(f);
    423 
    424 	FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
    425 	return true;
    426 }
    427 
    428 /**
    429  * radeon_fence_signaled - check if a fence has signaled
    430  *
    431  * @fence: radeon fence object
    432  *
    433  * Check if the requested fence has signaled (all asics).
    434  * Returns true if the fence has signaled or false if it has not.
    435  */
    436 bool radeon_fence_signaled(struct radeon_fence *fence)
    437 {
    438 	if (!fence)
    439 		return true;
    440 
    441 	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
    442 		int ret;
    443 
    444 		ret = fence_signal(&fence->base);
    445 		if (!ret)
    446 			FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
    447 		return true;
    448 	}
    449 	return false;
    450 }
    451 
    452 /**
    453  * radeon_fence_any_seq_signaled - check if any sequence number is signaled
    454  *
    455  * @rdev: radeon device pointer
    456  * @seq: sequence numbers
    457  *
    458  * Check if the last signaled fence sequnce number is >= the requested
    459  * sequence number (all asics).
    460  * Returns true if any has signaled (current value is >= requested value)
    461  * or false if it has not. Helper function for radeon_fence_wait_seq.
    462  */
    463 static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
    464 {
    465 	unsigned i;
    466 
    467 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    468 		if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
    469 			return true;
    470 	}
    471 	return false;
    472 }
    473 
    474 /**
    475  * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
    476  *
    477  * @rdev: radeon device pointer
    478  * @target_seq: sequence number(s) we want to wait for
    479  * @intr: use interruptable sleep
    480  * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
    481  *
    482  * Wait for the requested sequence number(s) to be written by any ring
    483  * (all asics).  Sequnce number array is indexed by ring id.
    484  * @intr selects whether to use interruptable (true) or non-interruptable
    485  * (false) sleep when waiting for the sequence number.  Helper function
    486  * for radeon_fence_wait_*().
    487  * Returns remaining time if the sequence number has passed, 0 when
    488  * the wait timeout, or an error for all other cases.
    489  * -EDEADLK is returned when a GPU lockup has been detected.
    490  */
    491 static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
    492 					  u64 *target_seq, bool intr,
    493 					  long timeout)
    494 {
    495 	long r;
    496 	int i;
    497 
    498 	if (radeon_fence_any_seq_signaled(rdev, target_seq))
    499 		return timeout;
    500 
    501 	/* enable IRQs and tracing */
    502 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    503 		if (!target_seq[i])
    504 			continue;
    505 
    506 		trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
    507 		radeon_irq_kms_sw_irq_get(rdev, i);
    508 	}
    509 
    510 	if (intr) {
    511 		r = wait_event_interruptible_timeout(rdev->fence_queue, (
    512 			radeon_fence_any_seq_signaled(rdev, target_seq)
    513 			 || rdev->needs_reset), timeout);
    514 	} else {
    515 		r = wait_event_timeout(rdev->fence_queue, (
    516 			radeon_fence_any_seq_signaled(rdev, target_seq)
    517 			 || rdev->needs_reset), timeout);
    518 	}
    519 
    520 	if (rdev->needs_reset)
    521 		r = -EDEADLK;
    522 
    523 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    524 		if (!target_seq[i])
    525 			continue;
    526 
    527 		radeon_irq_kms_sw_irq_put(rdev, i);
    528 		trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
    529 	}
    530 
    531 	return r;
    532 }
    533 
    534 /**
    535  * radeon_fence_wait - wait for a fence to signal
    536  *
    537  * @fence: radeon fence object
    538  * @intr: use interruptible sleep
    539  *
    540  * Wait for the requested fence to signal (all asics).
    541  * @intr selects whether to use interruptable (true) or non-interruptable
    542  * (false) sleep when waiting for the fence.
    543  * Returns 0 if the fence has passed, error for all other cases.
    544  */
    545 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
    546 {
    547 	uint64_t seq[RADEON_NUM_RINGS] = {};
    548 	long r;
    549 
    550 	/*
    551 	 * This function should not be called on !radeon fences.
    552 	 * If this is the case, it would mean this function can
    553 	 * also be called on radeon fences belonging to another card.
    554 	 * exclusive_lock is not held in that case.
    555 	 */
    556 	if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
    557 		return fence_wait(&fence->base, intr);
    558 
    559 	seq[fence->ring] = fence->seq;
    560 	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
    561 	if (r < 0) {
    562 		return r;
    563 	}
    564 
    565 	r = fence_signal(&fence->base);
    566 	if (!r)
    567 		FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
    568 	return 0;
    569 }
    570 
    571 /**
    572  * radeon_fence_wait_any - wait for a fence to signal on any ring
    573  *
    574  * @rdev: radeon device pointer
    575  * @fences: radeon fence object(s)
    576  * @intr: use interruptable sleep
    577  *
    578  * Wait for any requested fence to signal (all asics).  Fence
    579  * array is indexed by ring id.  @intr selects whether to use
    580  * interruptable (true) or non-interruptable (false) sleep when
    581  * waiting for the fences. Used by the suballocator.
    582  * Returns 0 if any fence has passed, error for all other cases.
    583  */
    584 int radeon_fence_wait_any(struct radeon_device *rdev,
    585 			  struct radeon_fence **fences,
    586 			  bool intr)
    587 {
    588 	uint64_t seq[RADEON_NUM_RINGS];
    589 	unsigned i, num_rings = 0;
    590 	long r;
    591 
    592 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    593 		seq[i] = 0;
    594 
    595 		if (!fences[i]) {
    596 			continue;
    597 		}
    598 
    599 		seq[i] = fences[i]->seq;
    600 		++num_rings;
    601 	}
    602 
    603 	/* nothing to wait for ? */
    604 	if (num_rings == 0)
    605 		return -ENOENT;
    606 
    607 	r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
    608 	if (r < 0) {
    609 		return r;
    610 	}
    611 	return 0;
    612 }
    613 
    614 /**
    615  * radeon_fence_wait_next - wait for the next fence to signal
    616  *
    617  * @rdev: radeon device pointer
    618  * @ring: ring index the fence is associated with
    619  *
    620  * Wait for the next fence on the requested ring to signal (all asics).
    621  * Returns 0 if the next fence has passed, error for all other cases.
    622  * Caller must hold ring lock.
    623  */
    624 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
    625 {
    626 	uint64_t seq[RADEON_NUM_RINGS] = {};
    627 	long r;
    628 
    629 	seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
    630 	if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
    631 		/* nothing to wait for, last_seq is
    632 		   already the last emited fence */
    633 		return -ENOENT;
    634 	}
    635 	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
    636 	if (r < 0)
    637 		return r;
    638 	return 0;
    639 }
    640 
    641 /**
    642  * radeon_fence_wait_empty - wait for all fences to signal
    643  *
    644  * @rdev: radeon device pointer
    645  * @ring: ring index the fence is associated with
    646  *
    647  * Wait for all fences on the requested ring to signal (all asics).
    648  * Returns 0 if the fences have passed, error for all other cases.
    649  * Caller must hold ring lock.
    650  */
    651 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
    652 {
    653 	uint64_t seq[RADEON_NUM_RINGS] = {};
    654 	long r;
    655 
    656 	seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
    657 	if (!seq[ring])
    658 		return 0;
    659 
    660 	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
    661 	if (r < 0) {
    662 		if (r == -EDEADLK)
    663 			return -EDEADLK;
    664 
    665 		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
    666 			ring, r);
    667 	}
    668 	return 0;
    669 }
    670 
    671 /**
    672  * radeon_fence_ref - take a ref on a fence
    673  *
    674  * @fence: radeon fence object
    675  *
    676  * Take a reference on a fence (all asics).
    677  * Returns the fence.
    678  */
    679 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
    680 {
    681 	fence_get(&fence->base);
    682 	return fence;
    683 }
    684 
    685 /**
    686  * radeon_fence_unref - remove a ref on a fence
    687  *
    688  * @fence: radeon fence object
    689  *
    690  * Remove a reference on a fence (all asics).
    691  */
    692 void radeon_fence_unref(struct radeon_fence **fence)
    693 {
    694 	struct radeon_fence *tmp = *fence;
    695 
    696 	*fence = NULL;
    697 	if (tmp) {
    698 		fence_put(&tmp->base);
    699 	}
    700 }
    701 
    702 /**
    703  * radeon_fence_count_emitted - get the count of emitted fences
    704  *
    705  * @rdev: radeon device pointer
    706  * @ring: ring index the fence is associated with
    707  *
    708  * Get the number of fences emitted on the requested ring (all asics).
    709  * Returns the number of emitted fences on the ring.  Used by the
    710  * dynpm code to ring track activity.
    711  */
    712 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
    713 {
    714 	uint64_t emitted;
    715 
    716 	/* We are not protected by ring lock when reading the last sequence
    717 	 * but it's ok to report slightly wrong fence count here.
    718 	 */
    719 	radeon_fence_process(rdev, ring);
    720 	emitted = rdev->fence_drv[ring].sync_seq[ring]
    721 		- atomic64_read(&rdev->fence_drv[ring].last_seq);
    722 	/* to avoid 32bits warp around */
    723 	if (emitted > 0x10000000) {
    724 		emitted = 0x10000000;
    725 	}
    726 	return (unsigned)emitted;
    727 }
    728 
    729 /**
    730  * radeon_fence_need_sync - do we need a semaphore
    731  *
    732  * @fence: radeon fence object
    733  * @dst_ring: which ring to check against
    734  *
    735  * Check if the fence needs to be synced against another ring
    736  * (all asics).  If so, we need to emit a semaphore.
    737  * Returns true if we need to sync with another ring, false if
    738  * not.
    739  */
    740 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
    741 {
    742 	struct radeon_fence_driver *fdrv;
    743 
    744 	if (!fence) {
    745 		return false;
    746 	}
    747 
    748 	if (fence->ring == dst_ring) {
    749 		return false;
    750 	}
    751 
    752 	/* we are protected by the ring mutex */
    753 	fdrv = &fence->rdev->fence_drv[dst_ring];
    754 	if (fence->seq <= fdrv->sync_seq[fence->ring]) {
    755 		return false;
    756 	}
    757 
    758 	return true;
    759 }
    760 
    761 /**
    762  * radeon_fence_note_sync - record the sync point
    763  *
    764  * @fence: radeon fence object
    765  * @dst_ring: which ring to check against
    766  *
    767  * Note the sequence number at which point the fence will
    768  * be synced with the requested ring (all asics).
    769  */
    770 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
    771 {
    772 	struct radeon_fence_driver *dst, *src;
    773 	unsigned i;
    774 
    775 	if (!fence) {
    776 		return;
    777 	}
    778 
    779 	if (fence->ring == dst_ring) {
    780 		return;
    781 	}
    782 
    783 	/* we are protected by the ring mutex */
    784 	src = &fence->rdev->fence_drv[fence->ring];
    785 	dst = &fence->rdev->fence_drv[dst_ring];
    786 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    787 		if (i == dst_ring) {
    788 			continue;
    789 		}
    790 		dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
    791 	}
    792 }
    793 
    794 /**
    795  * radeon_fence_driver_start_ring - make the fence driver
    796  * ready for use on the requested ring.
    797  *
    798  * @rdev: radeon device pointer
    799  * @ring: ring index to start the fence driver on
    800  *
    801  * Make the fence driver ready for processing (all asics).
    802  * Not all asics have all rings, so each asic will only
    803  * start the fence driver on the rings it has.
    804  * Returns 0 for success, errors for failure.
    805  */
    806 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
    807 {
    808 	uint64_t index;
    809 	int r;
    810 
    811 	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
    812 	if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
    813 		rdev->fence_drv[ring].scratch_reg = 0;
    814 		if (ring != R600_RING_TYPE_UVD_INDEX) {
    815 			index = R600_WB_EVENT_OFFSET + ring * 4;
    816 			rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
    817 			rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
    818 							 index;
    819 
    820 		} else {
    821 			/* put fence directly behind firmware */
    822 			index = ALIGN(rdev->uvd_fw->size, 8);
    823 			rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
    824 			rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
    825 		}
    826 
    827 	} else {
    828 		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
    829 		if (r) {
    830 			dev_err(rdev->dev, "fence failed to get scratch register\n");
    831 			return r;
    832 		}
    833 		index = RADEON_WB_SCRATCH_OFFSET +
    834 			rdev->fence_drv[ring].scratch_reg -
    835 			rdev->scratch.reg_base;
    836 		rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
    837 		rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
    838 	}
    839 	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
    840 	rdev->fence_drv[ring].initialized = true;
    841 	dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
    842 		 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
    843 	return 0;
    844 }
    845 
    846 /**
    847  * radeon_fence_driver_init_ring - init the fence driver
    848  * for the requested ring.
    849  *
    850  * @rdev: radeon device pointer
    851  * @ring: ring index to start the fence driver on
    852  *
    853  * Init the fence driver for the requested ring (all asics).
    854  * Helper function for radeon_fence_driver_init().
    855  */
    856 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
    857 {
    858 	int i;
    859 
    860 	rdev->fence_drv[ring].scratch_reg = -1;
    861 	rdev->fence_drv[ring].cpu_addr = NULL;
    862 	rdev->fence_drv[ring].gpu_addr = 0;
    863 	for (i = 0; i < RADEON_NUM_RINGS; ++i)
    864 		rdev->fence_drv[ring].sync_seq[i] = 0;
    865 	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
    866 	rdev->fence_drv[ring].initialized = false;
    867 	INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
    868 			  radeon_fence_check_lockup);
    869 	rdev->fence_drv[ring].rdev = rdev;
    870 }
    871 
    872 /**
    873  * radeon_fence_driver_init - init the fence driver
    874  * for all possible rings.
    875  *
    876  * @rdev: radeon device pointer
    877  *
    878  * Init the fence driver for all possible rings (all asics).
    879  * Not all asics have all rings, so each asic will only
    880  * start the fence driver on the rings it has using
    881  * radeon_fence_driver_start_ring().
    882  * Returns 0 for success.
    883  */
    884 int radeon_fence_driver_init(struct radeon_device *rdev)
    885 {
    886 	int ring;
    887 
    888 	init_waitqueue_head(&rdev->fence_queue);
    889 	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
    890 		radeon_fence_driver_init_ring(rdev, ring);
    891 	}
    892 	if (radeon_debugfs_fence_init(rdev)) {
    893 		dev_err(rdev->dev, "fence debugfs file creation failed\n");
    894 	}
    895 	return 0;
    896 }
    897 
    898 /**
    899  * radeon_fence_driver_fini - tear down the fence driver
    900  * for all possible rings.
    901  *
    902  * @rdev: radeon device pointer
    903  *
    904  * Tear down the fence driver for all possible rings (all asics).
    905  */
    906 void radeon_fence_driver_fini(struct radeon_device *rdev)
    907 {
    908 	int ring, r;
    909 
    910 	mutex_lock(&rdev->ring_lock);
    911 	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
    912 		if (!rdev->fence_drv[ring].initialized)
    913 			continue;
    914 		r = radeon_fence_wait_empty(rdev, ring);
    915 		if (r) {
    916 			/* no need to trigger GPU reset as we are unloading */
    917 			radeon_fence_driver_force_completion(rdev, ring);
    918 		}
    919 		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
    920 		wake_up_all(&rdev->fence_queue);
    921 		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
    922 		rdev->fence_drv[ring].initialized = false;
    923 	}
    924 	mutex_unlock(&rdev->ring_lock);
    925 }
    926 
    927 /**
    928  * radeon_fence_driver_force_completion - force all fence waiter to complete
    929  *
    930  * @rdev: radeon device pointer
    931  * @ring: the ring to complete
    932  *
    933  * In case of GPU reset failure make sure no process keep waiting on fence
    934  * that will never complete.
    935  */
    936 void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
    937 {
    938 	if (rdev->fence_drv[ring].initialized) {
    939 		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
    940 		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
    941 	}
    942 }
    943 
    944 
    945 /*
    946  * Fence debugfs
    947  */
    948 #if defined(CONFIG_DEBUG_FS)
    949 static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
    950 {
    951 	struct drm_info_node *node = (struct drm_info_node *)m->private;
    952 	struct drm_device *dev = node->minor->dev;
    953 	struct radeon_device *rdev = dev->dev_private;
    954 	int i, j;
    955 
    956 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
    957 		if (!rdev->fence_drv[i].initialized)
    958 			continue;
    959 
    960 		radeon_fence_process(rdev, i);
    961 
    962 		seq_printf(m, "--- ring %d ---\n", i);
    963 		seq_printf(m, "Last signaled fence 0x%016llx\n",
    964 			   (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
    965 		seq_printf(m, "Last emitted        0x%016llx\n",
    966 			   rdev->fence_drv[i].sync_seq[i]);
    967 
    968 		for (j = 0; j < RADEON_NUM_RINGS; ++j) {
    969 			if (i != j && rdev->fence_drv[j].initialized)
    970 				seq_printf(m, "Last sync to ring %d 0x%016llx\n",
    971 					   j, rdev->fence_drv[i].sync_seq[j]);
    972 		}
    973 	}
    974 	return 0;
    975 }
    976 
    977 /**
    978  * radeon_debugfs_gpu_reset - manually trigger a gpu reset
    979  *
    980  * Manually trigger a gpu reset at the next fence wait.
    981  */
    982 static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
    983 {
    984 	struct drm_info_node *node = (struct drm_info_node *) m->private;
    985 	struct drm_device *dev = node->minor->dev;
    986 	struct radeon_device *rdev = dev->dev_private;
    987 
    988 	down_read(&rdev->exclusive_lock);
    989 	seq_printf(m, "%d\n", rdev->needs_reset);
    990 	rdev->needs_reset = true;
    991 	wake_up_all(&rdev->fence_queue);
    992 	up_read(&rdev->exclusive_lock);
    993 
    994 	return 0;
    995 }
    996 
    997 static struct drm_info_list radeon_debugfs_fence_list[] = {
    998 	{"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
    999 	{"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
   1000 };
   1001 #endif
   1002 
   1003 int radeon_debugfs_fence_init(struct radeon_device *rdev)
   1004 {
   1005 #if defined(CONFIG_DEBUG_FS)
   1006 	return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
   1007 #else
   1008 	return 0;
   1009 #endif
   1010 }
   1011 
   1012 static const char *radeon_fence_get_driver_name(struct fence *fence)
   1013 {
   1014 	return "radeon";
   1015 }
   1016 
   1017 static const char *radeon_fence_get_timeline_name(struct fence *f)
   1018 {
   1019 	struct radeon_fence *fence = to_radeon_fence(f);
   1020 	switch (fence->ring) {
   1021 	case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
   1022 	case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
   1023 	case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
   1024 	case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
   1025 	case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
   1026 	case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
   1027 	case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
   1028 	case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
   1029 	default: WARN_ON_ONCE(1); return "radeon.unk";
   1030 	}
   1031 }
   1032 
   1033 static inline bool radeon_test_signaled(struct radeon_fence *fence)
   1034 {
   1035 	return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
   1036 }
   1037 
   1038 struct radeon_wait_cb {
   1039 	struct fence_cb base;
   1040 	struct task_struct *task;
   1041 };
   1042 
   1043 static void
   1044 radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
   1045 {
   1046 	struct radeon_wait_cb *wait =
   1047 		container_of(cb, struct radeon_wait_cb, base);
   1048 
   1049 	wake_up_process(wait->task);
   1050 }
   1051 
   1052 static signed long radeon_fence_default_wait(struct fence *f, bool intr,
   1053 					     signed long t)
   1054 {
   1055 	struct radeon_fence *fence = to_radeon_fence(f);
   1056 	struct radeon_device *rdev = fence->rdev;
   1057 	struct radeon_wait_cb cb;
   1058 
   1059 	cb.task = current;
   1060 
   1061 	if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
   1062 		return t;
   1063 
   1064 	while (t > 0) {
   1065 		if (intr)
   1066 			set_current_state(TASK_INTERRUPTIBLE);
   1067 		else
   1068 			set_current_state(TASK_UNINTERRUPTIBLE);
   1069 
   1070 		/*
   1071 		 * radeon_test_signaled must be called after
   1072 		 * set_current_state to prevent a race with wake_up_process
   1073 		 */
   1074 		if (radeon_test_signaled(fence))
   1075 			break;
   1076 
   1077 		if (rdev->needs_reset) {
   1078 			t = -EDEADLK;
   1079 			break;
   1080 		}
   1081 
   1082 		t = schedule_timeout(t);
   1083 
   1084 		if (t > 0 && intr && signal_pending(current))
   1085 			t = -ERESTARTSYS;
   1086 	}
   1087 
   1088 	__set_current_state(TASK_RUNNING);
   1089 	fence_remove_callback(f, &cb.base);
   1090 
   1091 	return t;
   1092 }
   1093 
   1094 const struct fence_ops radeon_fence_ops = {
   1095 	.get_driver_name = radeon_fence_get_driver_name,
   1096 	.get_timeline_name = radeon_fence_get_timeline_name,
   1097 	.enable_signaling = radeon_fence_enable_signaling,
   1098 	.signaled = radeon_fence_is_signaled,
   1099 	.wait = radeon_fence_default_wait,
   1100 	.release = NULL,
   1101 };
   1102