Home | History | Annotate | Line # | Download | only in amdgpu
      1 /*	$NetBSD: amdgpu_ring.c,v 1.7 2021/12/19 12:31:45 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2008 Advanced Micro Devices, Inc.
      5  * Copyright 2008 Red Hat Inc.
      6  * Copyright 2009 Jerome Glisse.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included in
     16  * all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     21  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     22  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     23  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     24  * OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  * Authors: Dave Airlie
     27  *          Alex Deucher
     28  *          Jerome Glisse
     29  *          Christian Knig
     30  */
     31 #include <sys/cdefs.h>
     32 __KERNEL_RCSID(0, "$NetBSD: amdgpu_ring.c,v 1.7 2021/12/19 12:31:45 riastradh Exp $");
     33 
     34 #include <linux/seq_file.h>
     35 #include <linux/slab.h>
     36 #include <linux/uaccess.h>
     37 #include <linux/debugfs.h>
     38 
     39 #include <drm/amdgpu_drm.h>
     40 #include "amdgpu.h"
     41 #include "atom.h"
     42 
     43 #include <linux/nbsd-namespace.h>
     44 
     45 /*
     46  * Rings
     47  * Most engines on the GPU are fed via ring buffers.  Ring
     48  * buffers are areas of GPU accessible memory that the host
     49  * writes commands into and the GPU reads commands out of.
     50  * There is a rptr (read pointer) that determines where the
     51  * GPU is currently reading, and a wptr (write pointer)
     52  * which determines where the host has written.  When the
     53  * pointers are equal, the ring is idle.  When the host
     54  * writes commands to the ring buffer, it increments the
     55  * wptr.  The GPU then starts fetching commands and executes
     56  * them until the pointers are equal again.
     57  */
     58 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
     59 				    struct amdgpu_ring *ring);
     60 static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);
     61 
     62 /**
     63  * amdgpu_ring_alloc - allocate space on the ring buffer
     64  *
     65  * @adev: amdgpu_device pointer
     66  * @ring: amdgpu_ring structure holding ring information
     67  * @ndw: number of dwords to allocate in the ring buffer
     68  *
     69  * Allocate @ndw dwords in the ring buffer (all asics).
     70  * Returns 0 on success, error on failure.
     71  */
     72 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
     73 {
     74 	/* Align requested size with padding so unlock_commit can
     75 	 * pad safely */
     76 	ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
     77 
     78 	/* Make sure we aren't trying to allocate more space
     79 	 * than the maximum for one submission
     80 	 */
     81 	if (WARN_ON_ONCE(ndw > ring->max_dw))
     82 		return -ENOMEM;
     83 
     84 	ring->count_dw = ndw;
     85 	ring->wptr_old = ring->wptr;
     86 
     87 	if (ring->funcs->begin_use)
     88 		ring->funcs->begin_use(ring);
     89 
     90 	return 0;
     91 }
     92 
     93 /** amdgpu_ring_insert_nop - insert NOP packets
     94  *
     95  * @ring: amdgpu_ring structure holding ring information
     96  * @count: the number of NOP packets to insert
     97  *
     98  * This is the generic insert_nop function for rings except SDMA
     99  */
    100 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
    101 {
    102 	int i;
    103 
    104 	for (i = 0; i < count; i++)
    105 		amdgpu_ring_write(ring, ring->funcs->nop);
    106 }
    107 
    108 /** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
    109  *
    110  * @ring: amdgpu_ring structure holding ring information
    111  * @ib: IB to add NOP packets to
    112  *
    113  * This is the generic pad_ib function for rings except SDMA
    114  */
    115 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
    116 {
    117 	while (ib->length_dw & ring->funcs->align_mask)
    118 		ib->ptr[ib->length_dw++] = ring->funcs->nop;
    119 }
    120 
    121 /**
    122  * amdgpu_ring_commit - tell the GPU to execute the new
    123  * commands on the ring buffer
    124  *
    125  * @adev: amdgpu_device pointer
    126  * @ring: amdgpu_ring structure holding ring information
    127  *
    128  * Update the wptr (write pointer) to tell the GPU to
    129  * execute new commands on the ring buffer (all asics).
    130  */
    131 void amdgpu_ring_commit(struct amdgpu_ring *ring)
    132 {
    133 	uint32_t count;
    134 
    135 	/* We pad to match fetch size */
    136 	count = ring->funcs->align_mask + 1 -
    137 		(ring->wptr & ring->funcs->align_mask);
    138 	count %= ring->funcs->align_mask + 1;
    139 	ring->funcs->insert_nop(ring, count);
    140 
    141 	mb();
    142 	amdgpu_ring_set_wptr(ring);
    143 
    144 	if (ring->funcs->end_use)
    145 		ring->funcs->end_use(ring);
    146 }
    147 
    148 /**
    149  * amdgpu_ring_undo - reset the wptr
    150  *
    151  * @ring: amdgpu_ring structure holding ring information
    152  *
    153  * Reset the driver's copy of the wptr (all asics).
    154  */
    155 void amdgpu_ring_undo(struct amdgpu_ring *ring)
    156 {
    157 	ring->wptr = ring->wptr_old;
    158 
    159 	if (ring->funcs->end_use)
    160 		ring->funcs->end_use(ring);
    161 }
    162 
    163 /**
    164  * amdgpu_ring_priority_put - restore a ring's priority
    165  *
    166  * @ring: amdgpu_ring structure holding the information
    167  * @priority: target priority
    168  *
    169  * Release a request for executing at @priority
    170  */
    171 void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
    172 			      enum drm_sched_priority priority)
    173 {
    174 	int i;
    175 
    176 	if (!ring->funcs->set_priority)
    177 		return;
    178 
    179 	if (atomic_dec_return(&ring->num_jobs[priority]) > 0)
    180 		return;
    181 
    182 	/* no need to restore if the job is already at the lowest priority */
    183 	if (priority == DRM_SCHED_PRIORITY_NORMAL)
    184 		return;
    185 
    186 	mutex_lock(&ring->priority_mutex);
    187 	/* something higher prio is executing, no need to decay */
    188 	if (ring->priority > priority)
    189 		goto out_unlock;
    190 
    191 	/* decay priority to the next level with a job available */
    192 	for (i = priority; i >= DRM_SCHED_PRIORITY_MIN; i--) {
    193 		if (i == DRM_SCHED_PRIORITY_NORMAL
    194 				|| atomic_read(&ring->num_jobs[i])) {
    195 			ring->priority = i;
    196 			ring->funcs->set_priority(ring, i);
    197 			break;
    198 		}
    199 	}
    200 
    201 out_unlock:
    202 	mutex_unlock(&ring->priority_mutex);
    203 }
    204 
    205 /**
    206  * amdgpu_ring_priority_get - change the ring's priority
    207  *
    208  * @ring: amdgpu_ring structure holding the information
    209  * @priority: target priority
    210  *
    211  * Request a ring's priority to be raised to @priority (refcounted).
    212  */
    213 void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
    214 			      enum drm_sched_priority priority)
    215 {
    216 	if (!ring->funcs->set_priority)
    217 		return;
    218 
    219 	if (atomic_inc_return(&ring->num_jobs[priority]) <= 0)
    220 		return;
    221 
    222 	mutex_lock(&ring->priority_mutex);
    223 	if (priority <= ring->priority)
    224 		goto out_unlock;
    225 
    226 	ring->priority = priority;
    227 	ring->funcs->set_priority(ring, priority);
    228 
    229 out_unlock:
    230 	mutex_unlock(&ring->priority_mutex);
    231 }
    232 
    233 /**
    234  * amdgpu_ring_init - init driver ring struct.
    235  *
    236  * @adev: amdgpu_device pointer
    237  * @ring: amdgpu_ring structure holding ring information
    238  * @max_ndw: maximum number of dw for ring alloc
    239  * @nop: nop packet for this ring
    240  *
    241  * Initialize the driver information for the selected ring (all asics).
    242  * Returns 0 on success, error on failure.
    243  */
    244 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
    245 		     unsigned max_dw, struct amdgpu_irq_src *irq_src,
    246 		     unsigned irq_type)
    247 {
    248 	int r, i;
    249 	int sched_hw_submission = amdgpu_sched_hw_submission;
    250 
    251 	/* Set the hw submission limit higher for KIQ because
    252 	 * it's used for a number of gfx/compute tasks by both
    253 	 * KFD and KGD which may have outstanding fences and
    254 	 * it doesn't really use the gpu scheduler anyway;
    255 	 * KIQ tasks get submitted directly to the ring.
    256 	 */
    257 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
    258 		sched_hw_submission = max(sched_hw_submission, 256);
    259 	else if (ring == &adev->sdma.instance[0].page)
    260 		sched_hw_submission = 256;
    261 
    262 	if (ring->adev == NULL) {
    263 		if (adev->num_rings >= AMDGPU_MAX_RINGS)
    264 			return -EINVAL;
    265 
    266 		ring->adev = adev;
    267 		ring->idx = adev->num_rings++;
    268 		adev->rings[ring->idx] = ring;
    269 		r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
    270 		if (r)
    271 			return r;
    272 	}
    273 
    274 	r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
    275 	if (r) {
    276 		dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
    277 		return r;
    278 	}
    279 
    280 	r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
    281 	if (r) {
    282 		dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
    283 		return r;
    284 	}
    285 
    286 	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
    287 	if (r) {
    288 		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
    289 		return r;
    290 	}
    291 
    292 	r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
    293 	if (r) {
    294 		dev_err(adev->dev,
    295 			"(%d) ring trail_fence_offs wb alloc failed\n", r);
    296 		return r;
    297 	}
    298 	ring->trail_fence_gpu_addr =
    299 		adev->wb.gpu_addr + (ring->trail_fence_offs * 4);
    300 	ring->trail_fence_cpu_addr = &adev->wb.wb[ring->trail_fence_offs];
    301 
    302 	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
    303 	if (r) {
    304 		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
    305 		return r;
    306 	}
    307 	ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4);
    308 	ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs];
    309 	/* always set cond_exec_polling to CONTINUE */
    310 	*ring->cond_exe_cpu_addr = 1;
    311 
    312 	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
    313 	if (r) {
    314 		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
    315 		return r;
    316 	}
    317 
    318 	ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
    319 
    320 	ring->buf_mask = (ring->ring_size / 4) - 1;
    321 	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
    322 		0xffffffffffffffff : ring->buf_mask;
    323 	/* Allocate ring buffer */
    324 	if (ring->ring_obj == NULL) {
    325 		r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
    326 					    AMDGPU_GEM_DOMAIN_GTT,
    327 					    &ring->ring_obj,
    328 					    &ring->gpu_addr,
    329 					    (void **)__UNVOLATILE(&ring->ring));
    330 		if (r) {
    331 			dev_err(adev->dev, "(%d) ring create failed\n", r);
    332 			return r;
    333 		}
    334 		amdgpu_ring_clear_ring(ring);
    335 	}
    336 
    337 	ring->max_dw = max_dw;
    338 	ring->priority = DRM_SCHED_PRIORITY_NORMAL;
    339 	mutex_init(&ring->priority_mutex);
    340 
    341 	for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
    342 		atomic_set(&ring->num_jobs[i], 0);
    343 
    344 	if (amdgpu_debugfs_ring_init(adev, ring)) {
    345 		DRM_ERROR("Failed to register debugfs file for rings !\n");
    346 	}
    347 
    348 	return 0;
    349 }
    350 
    351 /**
    352  * amdgpu_ring_fini - tear down the driver ring struct.
    353  *
    354  * @adev: amdgpu_device pointer
    355  * @ring: amdgpu_ring structure holding ring information
    356  *
    357  * Tear down the driver information for the selected ring (all asics).
    358  */
    359 void amdgpu_ring_fini(struct amdgpu_ring *ring)
    360 {
    361 	ring->sched.ready = false;
    362 
    363 	/* Not to finish a ring which is not initialized */
    364 	if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
    365 		return;
    366 
    367 	amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
    368 	amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
    369 
    370 	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
    371 	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
    372 
    373 	amdgpu_bo_free_kernel(&ring->ring_obj,
    374 			      &ring->gpu_addr,
    375 			      (void **)__UNVOLATILE(&ring->ring));
    376 
    377 	amdgpu_debugfs_ring_fini(ring);
    378 
    379 	dma_fence_put(ring->vmid_wait);
    380 	ring->vmid_wait = NULL;
    381 	ring->me = 0;
    382 
    383 	ring->adev->rings[ring->idx] = NULL;
    384 
    385 	mutex_destroy(&ring->priority_mutex);
    386 }
    387 
    388 /**
    389  * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
    390  *
    391  * @adev: amdgpu_device pointer
    392  * @reg0: register to write
    393  * @reg1: register to wait on
    394  * @ref: reference value to write/wait on
    395  * @mask: mask to wait on
    396  *
    397  * Helper for rings that don't support write and wait in a
    398  * single oneshot packet.
    399  */
    400 void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
    401 						uint32_t reg0, uint32_t reg1,
    402 						uint32_t ref, uint32_t mask)
    403 {
    404 	amdgpu_ring_emit_wreg(ring, reg0, ref);
    405 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
    406 }
    407 
    408 /**
    409  * amdgpu_ring_soft_recovery - try to soft recover a ring lockup
    410  *
    411  * @ring: ring to try the recovery on
    412  * @vmid: VMID we try to get going again
    413  * @fence: timedout fence
    414  *
    415  * Tries to get a ring proceeding again when it is stuck.
    416  */
    417 bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
    418 			       struct dma_fence *fence)
    419 {
    420 	ktime_t deadline = ktime_add_us(ktime_get(), 10000);
    421 
    422 	if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
    423 		return false;
    424 
    425 	atomic_inc(&ring->adev->gpu_reset_counter);
    426 	while (!dma_fence_is_signaled(fence) &&
    427 	       ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
    428 		ring->funcs->soft_recovery(ring, vmid);
    429 
    430 	return dma_fence_is_signaled(fence);
    431 }
    432 
    433 /*
    434  * Debugfs info
    435  */
    436 #if defined(CONFIG_DEBUG_FS)
    437 
    438 /* Layout of file is 12 bytes consisting of
    439  * - rptr
    440  * - wptr
    441  * - driver's copy of wptr
    442  *
    443  * followed by n-words of ring data
    444  */
    445 static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
    446 					size_t size, loff_t *pos)
    447 {
    448 	struct amdgpu_ring *ring = file_inode(f)->i_private;
    449 	int r, i;
    450 	uint32_t value, result, early[3];
    451 
    452 	if (*pos & 3 || size & 3)
    453 		return -EINVAL;
    454 
    455 	result = 0;
    456 
    457 	if (*pos < 12) {
    458 		early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
    459 		early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
    460 		early[2] = ring->wptr & ring->buf_mask;
    461 		for (i = *pos / 4; i < 3 && size; i++) {
    462 			r = put_user(early[i], (uint32_t *)buf);
    463 			if (r)
    464 				return r;
    465 			buf += 4;
    466 			result += 4;
    467 			size -= 4;
    468 			*pos += 4;
    469 		}
    470 	}
    471 
    472 	while (size) {
    473 		if (*pos >= (ring->ring_size + 12))
    474 			return result;
    475 
    476 		value = ring->ring[(*pos - 12)/4];
    477 		r = put_user(value, (uint32_t*)buf);
    478 		if (r)
    479 			return r;
    480 		buf += 4;
    481 		result += 4;
    482 		size -= 4;
    483 		*pos += 4;
    484 	}
    485 
    486 	return result;
    487 }
    488 
    489 static const struct file_operations amdgpu_debugfs_ring_fops = {
    490 	.owner = THIS_MODULE,
    491 	.read = amdgpu_debugfs_ring_read,
    492 	.llseek = default_llseek
    493 };
    494 
    495 #endif
    496 
    497 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
    498 				    struct amdgpu_ring *ring)
    499 {
    500 #if defined(CONFIG_DEBUG_FS)
    501 	struct drm_minor *minor = adev->ddev->primary;
    502 	struct dentry *ent, *root = minor->debugfs_root;
    503 	char name[32];
    504 
    505 	sprintf(name, "amdgpu_ring_%s", ring->name);
    506 
    507 	ent = debugfs_create_file(name,
    508 				  S_IFREG | S_IRUGO, root,
    509 				  ring, &amdgpu_debugfs_ring_fops);
    510 	if (!ent)
    511 		return -ENOMEM;
    512 
    513 	i_size_write(ent->d_inode, ring->ring_size + 12);
    514 	ring->ent = ent;
    515 #endif
    516 	return 0;
    517 }
    518 
    519 static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
    520 {
    521 #if defined(CONFIG_DEBUG_FS)
    522 	debugfs_remove(ring->ent);
    523 #endif
    524 }
    525 
    526 /**
    527  * amdgpu_ring_test_helper - tests ring and set sched readiness status
    528  *
    529  * @ring: ring to try the recovery on
    530  *
    531  * Tests ring and set sched readiness status
    532  *
    533  * Returns 0 on success, error on failure.
    534  */
    535 int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
    536 {
    537 	struct amdgpu_device *adev = ring->adev;
    538 	int r;
    539 
    540 	r = amdgpu_ring_test_ring(ring);
    541 	if (r)
    542 		DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n",
    543 			      ring->name, r);
    544 	else
    545 		DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
    546 			      ring->name);
    547 
    548 	ring->sched.ready = !r;
    549 	return r;
    550 }
    551