1 /* $NetBSD: radeon_ring.c,v 1.5 2021/12/18 23:45:43 riastradh Exp $ */ 2 3 /* 4 * Copyright 2008 Advanced Micro Devices, Inc. 5 * Copyright 2008 Red Hat Inc. 6 * Copyright 2009 Jerome Glisse. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 * 26 * Authors: Dave Airlie 27 * Alex Deucher 28 * Jerome Glisse 29 * Christian Knig 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: radeon_ring.c,v 1.5 2021/12/18 23:45:43 riastradh Exp $"); 34 35 #include <drm/drm_debugfs.h> 36 #include <drm/drm_device.h> 37 #include <drm/drm_file.h> 38 39 #include "radeon.h" 40 41 /* 42 * Rings 43 * Most engines on the GPU are fed via ring buffers. Ring 44 * buffers are areas of GPU accessible memory that the host 45 * writes commands into and the GPU reads commands out of. 46 * There is a rptr (read pointer) that determines where the 47 * GPU is currently reading, and a wptr (write pointer) 48 * which determines where the host has written. When the 49 * pointers are equal, the ring is idle. When the host 50 * writes commands to the ring buffer, it increments the 51 * wptr. The GPU then starts fetching commands and executes 52 * them until the pointers are equal again. 53 */ 54 static int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring); 55 56 /** 57 * radeon_ring_supports_scratch_reg - check if the ring supports 58 * writing to scratch registers 59 * 60 * @rdev: radeon_device pointer 61 * @ring: radeon_ring structure holding ring information 62 * 63 * Check if a specific ring supports writing to scratch registers (all asics). 64 * Returns true if the ring supports writing to scratch regs, false if not. 65 */ 66 bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, 67 struct radeon_ring *ring) 68 { 69 switch (ring->idx) { 70 case RADEON_RING_TYPE_GFX_INDEX: 71 case CAYMAN_RING_TYPE_CP1_INDEX: 72 case CAYMAN_RING_TYPE_CP2_INDEX: 73 return true; 74 default: 75 return false; 76 } 77 } 78 79 /** 80 * radeon_ring_free_size - update the free size 81 * 82 * @rdev: radeon_device pointer 83 * @ring: radeon_ring structure holding ring information 84 * 85 * Update the free dw slots in the ring buffer (all asics). 86 */ 87 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) 88 { 89 uint32_t rptr = radeon_ring_get_rptr(rdev, ring); 90 91 /* This works because ring_size is a power of 2 */ 92 ring->ring_free_dw = rptr + (ring->ring_size / 4); 93 ring->ring_free_dw -= ring->wptr; 94 ring->ring_free_dw &= ring->ptr_mask; 95 if (!ring->ring_free_dw) { 96 /* this is an empty ring */ 97 ring->ring_free_dw = ring->ring_size / 4; 98 /* update lockup info to avoid false positive */ 99 radeon_ring_lockup_update(rdev, ring); 100 } 101 } 102 103 /** 104 * radeon_ring_alloc - allocate space on the ring buffer 105 * 106 * @rdev: radeon_device pointer 107 * @ring: radeon_ring structure holding ring information 108 * @ndw: number of dwords to allocate in the ring buffer 109 * 110 * Allocate @ndw dwords in the ring buffer (all asics). 111 * Returns 0 on success, error on failure. 112 */ 113 int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw) 114 { 115 int r; 116 117 /* make sure we aren't trying to allocate more space than there is on the ring */ 118 if (ndw > (ring->ring_size / 4)) 119 return -ENOMEM; 120 /* Align requested size with padding so unlock_commit can 121 * pad safely */ 122 radeon_ring_free_size(rdev, ring); 123 ndw = (ndw + ring->align_mask) & ~ring->align_mask; 124 while (ndw > (ring->ring_free_dw - 1)) { 125 radeon_ring_free_size(rdev, ring); 126 if (ndw < ring->ring_free_dw) { 127 break; 128 } 129 r = radeon_fence_wait_next(rdev, ring->idx); 130 if (r) 131 return r; 132 } 133 ring->count_dw = ndw; 134 ring->wptr_old = ring->wptr; 135 return 0; 136 } 137 138 /** 139 * radeon_ring_lock - lock the ring and allocate space on it 140 * 141 * @rdev: radeon_device pointer 142 * @ring: radeon_ring structure holding ring information 143 * @ndw: number of dwords to allocate in the ring buffer 144 * 145 * Lock the ring and allocate @ndw dwords in the ring buffer 146 * (all asics). 147 * Returns 0 on success, error on failure. 148 */ 149 int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw) 150 { 151 int r; 152 153 mutex_lock(&rdev->ring_lock); 154 r = radeon_ring_alloc(rdev, ring, ndw); 155 if (r) { 156 mutex_unlock(&rdev->ring_lock); 157 return r; 158 } 159 return 0; 160 } 161 162 /** 163 * radeon_ring_commit - tell the GPU to execute the new 164 * commands on the ring buffer 165 * 166 * @rdev: radeon_device pointer 167 * @ring: radeon_ring structure holding ring information 168 * @hdp_flush: Whether or not to perform an HDP cache flush 169 * 170 * Update the wptr (write pointer) to tell the GPU to 171 * execute new commands on the ring buffer (all asics). 172 */ 173 void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, 174 bool hdp_flush) 175 { 176 /* If we are emitting the HDP flush via the ring buffer, we need to 177 * do it before padding. 178 */ 179 if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush) 180 rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); 181 /* We pad to match fetch size */ 182 while (ring->wptr & ring->align_mask) { 183 radeon_ring_write(ring, ring->nop); 184 } 185 mb(); 186 /* If we are emitting the HDP flush via MMIO, we need to do it after 187 * all CPU writes to VRAM finished. 188 */ 189 if (hdp_flush && rdev->asic->mmio_hdp_flush) 190 rdev->asic->mmio_hdp_flush(rdev); 191 radeon_ring_set_wptr(rdev, ring); 192 } 193 194 /** 195 * radeon_ring_unlock_commit - tell the GPU to execute the new 196 * commands on the ring buffer and unlock it 197 * 198 * @rdev: radeon_device pointer 199 * @ring: radeon_ring structure holding ring information 200 * @hdp_flush: Whether or not to perform an HDP cache flush 201 * 202 * Call radeon_ring_commit() then unlock the ring (all asics). 203 */ 204 void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring, 205 bool hdp_flush) 206 { 207 radeon_ring_commit(rdev, ring, hdp_flush); 208 mutex_unlock(&rdev->ring_lock); 209 } 210 211 /** 212 * radeon_ring_undo - reset the wptr 213 * 214 * @ring: radeon_ring structure holding ring information 215 * 216 * Reset the driver's copy of the wptr (all asics). 217 */ 218 void radeon_ring_undo(struct radeon_ring *ring) 219 { 220 ring->wptr = ring->wptr_old; 221 } 222 223 /** 224 * radeon_ring_unlock_undo - reset the wptr and unlock the ring 225 * 226 * @ring: radeon_ring structure holding ring information 227 * 228 * Call radeon_ring_undo() then unlock the ring (all asics). 229 */ 230 void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *ring) 231 { 232 radeon_ring_undo(ring); 233 mutex_unlock(&rdev->ring_lock); 234 } 235 236 /** 237 * radeon_ring_lockup_update - update lockup variables 238 * 239 * @ring: radeon_ring structure holding ring information 240 * 241 * Update the last rptr value and timestamp (all asics). 242 */ 243 void radeon_ring_lockup_update(struct radeon_device *rdev, 244 struct radeon_ring *ring) 245 { 246 atomic_set(&ring->last_rptr, radeon_ring_get_rptr(rdev, ring)); 247 atomic64_set(&ring->last_activity, jiffies_64); 248 } 249 250 /** 251 * radeon_ring_test_lockup() - check if ring is lockedup by recording information 252 * @rdev: radeon device structure 253 * @ring: radeon_ring structure holding ring information 254 * 255 */ 256 bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 257 { 258 uint32_t rptr = radeon_ring_get_rptr(rdev, ring); 259 uint64_t last = atomic64_read(&ring->last_activity); 260 uint64_t elapsed; 261 262 if (rptr != atomic_read(&ring->last_rptr)) { 263 /* ring is still working, no lockup */ 264 radeon_ring_lockup_update(rdev, ring); 265 return false; 266 } 267 268 elapsed = jiffies_to_msecs(jiffies_64 - last); 269 if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) { 270 dev_err(rdev->dev, "ring %d stalled for more than %"PRIu64"msec\n", 271 ring->idx, elapsed); 272 return true; 273 } 274 /* give a chance to the GPU ... */ 275 return false; 276 } 277 278 /** 279 * radeon_ring_backup - Back up the content of a ring 280 * 281 * @rdev: radeon_device pointer 282 * @ring: the ring we want to back up 283 * 284 * Saves all unprocessed commits from a ring, returns the number of dwords saved. 285 */ 286 unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring, 287 uint32_t **data) 288 { 289 unsigned size, ptr, i; 290 291 /* just in case lock the ring */ 292 mutex_lock(&rdev->ring_lock); 293 *data = NULL; 294 295 if (ring->ring_obj == NULL) { 296 mutex_unlock(&rdev->ring_lock); 297 return 0; 298 } 299 300 /* it doesn't make sense to save anything if all fences are signaled */ 301 if (!radeon_fence_count_emitted(rdev, ring->idx)) { 302 mutex_unlock(&rdev->ring_lock); 303 return 0; 304 } 305 306 /* calculate the number of dw on the ring */ 307 if (ring->rptr_save_reg) 308 ptr = RREG32(ring->rptr_save_reg); 309 else if (rdev->wb.enabled) 310 ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); 311 else { 312 /* no way to read back the next rptr */ 313 mutex_unlock(&rdev->ring_lock); 314 return 0; 315 } 316 317 size = ring->wptr + (ring->ring_size / 4); 318 size -= ptr; 319 size &= ring->ptr_mask; 320 if (size == 0) { 321 mutex_unlock(&rdev->ring_lock); 322 return 0; 323 } 324 325 /* and then save the content of the ring */ 326 *data = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); 327 if (!*data) { 328 mutex_unlock(&rdev->ring_lock); 329 return 0; 330 } 331 for (i = 0; i < size; ++i) { 332 (*data)[i] = ring->ring[ptr++]; 333 ptr &= ring->ptr_mask; 334 } 335 336 mutex_unlock(&rdev->ring_lock); 337 return size; 338 } 339 340 /** 341 * radeon_ring_restore - append saved commands to the ring again 342 * 343 * @rdev: radeon_device pointer 344 * @ring: ring to append commands to 345 * @size: number of dwords we want to write 346 * @data: saved commands 347 * 348 * Allocates space on the ring and restore the previously saved commands. 349 */ 350 int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, 351 unsigned size, uint32_t *data) 352 { 353 int i, r; 354 355 if (!size || !data) 356 return 0; 357 358 /* restore the saved ring content */ 359 r = radeon_ring_lock(rdev, ring, size); 360 if (r) 361 return r; 362 363 for (i = 0; i < size; ++i) { 364 radeon_ring_write(ring, data[i]); 365 } 366 367 radeon_ring_unlock_commit(rdev, ring, false); 368 kvfree(data); 369 return 0; 370 } 371 372 /** 373 * radeon_ring_init - init driver ring struct. 374 * 375 * @rdev: radeon_device pointer 376 * @ring: radeon_ring structure holding ring information 377 * @ring_size: size of the ring 378 * @rptr_offs: offset of the rptr writeback location in the WB buffer 379 * @nop: nop packet for this ring 380 * 381 * Initialize the driver information for the selected ring (all asics). 382 * Returns 0 on success, error on failure. 383 */ 384 int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size, 385 unsigned rptr_offs, u32 nop) 386 { 387 int r; 388 389 ring->ring_size = ring_size; 390 ring->rptr_offs = rptr_offs; 391 ring->nop = nop; 392 /* Allocate ring buffer */ 393 if (ring->ring_obj == NULL) { 394 r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, 395 RADEON_GEM_DOMAIN_GTT, 0, NULL, 396 NULL, &ring->ring_obj); 397 if (r) { 398 dev_err(rdev->dev, "(%d) ring create failed\n", r); 399 return r; 400 } 401 r = radeon_bo_reserve(ring->ring_obj, false); 402 if (unlikely(r != 0)) 403 return r; 404 r = radeon_bo_pin(ring->ring_obj, RADEON_GEM_DOMAIN_GTT, 405 &ring->gpu_addr); 406 if (r) { 407 radeon_bo_unreserve(ring->ring_obj); 408 dev_err(rdev->dev, "(%d) ring pin failed\n", r); 409 return r; 410 } 411 r = radeon_bo_kmap(ring->ring_obj, 412 (void **)__UNVOLATILE(&ring->ring)); 413 radeon_bo_unreserve(ring->ring_obj); 414 if (r) { 415 dev_err(rdev->dev, "(%d) ring map failed\n", r); 416 return r; 417 } 418 } 419 ring->ptr_mask = (ring->ring_size / 4) - 1; 420 ring->ring_free_dw = ring->ring_size / 4; 421 if (rdev->wb.enabled) { 422 u32 index = RADEON_WB_RING0_NEXT_RPTR + (ring->idx * 4); 423 ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index; 424 ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4]; 425 } 426 if (radeon_debugfs_ring_init(rdev, ring)) { 427 DRM_ERROR("Failed to register debugfs file for rings !\n"); 428 } 429 radeon_ring_lockup_update(rdev, ring); 430 return 0; 431 } 432 433 /** 434 * radeon_ring_fini - tear down the driver ring struct. 435 * 436 * @rdev: radeon_device pointer 437 * @ring: radeon_ring structure holding ring information 438 * 439 * Tear down the driver information for the selected ring (all asics). 440 */ 441 void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *ring) 442 { 443 int r; 444 struct radeon_bo *ring_obj; 445 446 mutex_lock(&rdev->ring_lock); 447 ring_obj = ring->ring_obj; 448 ring->ready = false; 449 ring->ring = NULL; 450 ring->ring_obj = NULL; 451 mutex_unlock(&rdev->ring_lock); 452 453 if (ring_obj) { 454 r = radeon_bo_reserve(ring_obj, false); 455 if (likely(r == 0)) { 456 radeon_bo_kunmap(ring_obj); 457 radeon_bo_unpin(ring_obj); 458 radeon_bo_unreserve(ring_obj); 459 } 460 radeon_bo_unref(&ring_obj); 461 } 462 } 463 464 /* 465 * Debugfs info 466 */ 467 #if defined(CONFIG_DEBUG_FS) 468 469 static int radeon_debugfs_ring_info(struct seq_file *m, void *data) 470 { 471 struct drm_info_node *node = (struct drm_info_node *) m->private; 472 struct drm_device *dev = node->minor->dev; 473 struct radeon_device *rdev = dev->dev_private; 474 int ridx = *(int*)node->info_ent->data; 475 struct radeon_ring *ring = &rdev->ring[ridx]; 476 477 uint32_t rptr, wptr, rptr_next; 478 unsigned count, i, j; 479 480 radeon_ring_free_size(rdev, ring); 481 count = (ring->ring_size / 4) - ring->ring_free_dw; 482 483 wptr = radeon_ring_get_wptr(rdev, ring); 484 seq_printf(m, "wptr: 0x%08x [%5d]\n", 485 wptr, wptr); 486 487 rptr = radeon_ring_get_rptr(rdev, ring); 488 seq_printf(m, "rptr: 0x%08x [%5d]\n", 489 rptr, rptr); 490 491 if (ring->rptr_save_reg) { 492 rptr_next = RREG32(ring->rptr_save_reg); 493 seq_printf(m, "rptr next(0x%04x): 0x%08x [%5d]\n", 494 ring->rptr_save_reg, rptr_next, rptr_next); 495 } else 496 rptr_next = ~0; 497 498 seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", 499 ring->wptr, ring->wptr); 500 seq_printf(m, "last semaphore signal addr : 0x%016llx\n", 501 ring->last_semaphore_signal_addr); 502 seq_printf(m, "last semaphore wait addr : 0x%016llx\n", 503 ring->last_semaphore_wait_addr); 504 seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); 505 seq_printf(m, "%u dwords in ring\n", count); 506 507 if (!ring->ring) 508 return 0; 509 510 /* print 8 dw before current rptr as often it's the last executed 511 * packet that is the root issue 512 */ 513 i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; 514 for (j = 0; j <= (count + 32); j++) { 515 seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); 516 if (rptr == i) 517 seq_puts(m, " *"); 518 if (rptr_next == i) 519 seq_puts(m, " #"); 520 seq_puts(m, "\n"); 521 i = (i + 1) & ring->ptr_mask; 522 } 523 return 0; 524 } 525 526 static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX; 527 static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; 528 static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; 529 static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; 530 static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; 531 static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; 532 static int si_vce1_index = TN_RING_TYPE_VCE1_INDEX; 533 static int si_vce2_index = TN_RING_TYPE_VCE2_INDEX; 534 535 static struct drm_info_list radeon_debugfs_ring_info_list[] = { 536 {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, 537 {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index}, 538 {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index}, 539 {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, 540 {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, 541 {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, 542 {"radeon_ring_vce1", radeon_debugfs_ring_info, 0, &si_vce1_index}, 543 {"radeon_ring_vce2", radeon_debugfs_ring_info, 0, &si_vce2_index}, 544 }; 545 546 #endif 547 548 static int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring) 549 { 550 #if defined(CONFIG_DEBUG_FS) 551 unsigned i; 552 for (i = 0; i < ARRAY_SIZE(radeon_debugfs_ring_info_list); ++i) { 553 struct drm_info_list *info = &radeon_debugfs_ring_info_list[i]; 554 int ridx = *(int*)radeon_debugfs_ring_info_list[i].data; 555 unsigned r; 556 557 if (&rdev->ring[ridx] != ring) 558 continue; 559 560 r = radeon_debugfs_add_files(rdev, info, 1); 561 if (r) 562 return r; 563 } 564 #endif 565 return 0; 566 } 567