1 /* $NetBSD: radeon_ni_dma.c,v 1.2 2021/12/18 23:45:43 riastradh Exp $ */ 2 3 /* 4 * Copyright 2010 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Alex Deucher 25 */ 26 27 #include <sys/cdefs.h> 28 __KERNEL_RCSID(0, "$NetBSD: radeon_ni_dma.c,v 1.2 2021/12/18 23:45:43 riastradh Exp $"); 29 30 #include "radeon.h" 31 #include "radeon_asic.h" 32 #include "radeon_trace.h" 33 #include "nid.h" 34 35 u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); 36 37 /* 38 * DMA 39 * Starting with R600, the GPU has an asynchronous 40 * DMA engine. The programming model is very similar 41 * to the 3D engine (ring buffer, IBs, etc.), but the 42 * DMA controller has it's own packet format that is 43 * different form the PM4 format used by the 3D engine. 44 * It supports copying data, writing embedded data, 45 * solid fills, and a number of other things. It also 46 * has support for tiling/detiling of buffers. 47 * Cayman and newer support two asynchronous DMA engines. 48 */ 49 50 /** 51 * cayman_dma_get_rptr - get the current read pointer 52 * 53 * @rdev: radeon_device pointer 54 * @ring: radeon ring pointer 55 * 56 * Get the current rptr from the hardware (cayman+). 57 */ 58 uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, 59 struct radeon_ring *ring) 60 { 61 u32 rptr, reg; 62 63 if (rdev->wb.enabled) { 64 rptr = rdev->wb.wb[ring->rptr_offs/4]; 65 } else { 66 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 67 reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; 68 else 69 reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; 70 71 rptr = RREG32(reg); 72 } 73 74 return (rptr & 0x3fffc) >> 2; 75 } 76 77 /** 78 * cayman_dma_get_wptr - get the current write pointer 79 * 80 * @rdev: radeon_device pointer 81 * @ring: radeon ring pointer 82 * 83 * Get the current wptr from the hardware (cayman+). 84 */ 85 uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, 86 struct radeon_ring *ring) 87 { 88 u32 reg; 89 90 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 91 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 92 else 93 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 94 95 return (RREG32(reg) & 0x3fffc) >> 2; 96 } 97 98 /** 99 * cayman_dma_set_wptr - commit the write pointer 100 * 101 * @rdev: radeon_device pointer 102 * @ring: radeon ring pointer 103 * 104 * Write the wptr back to the hardware (cayman+). 105 */ 106 void cayman_dma_set_wptr(struct radeon_device *rdev, 107 struct radeon_ring *ring) 108 { 109 u32 reg; 110 111 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 112 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 113 else 114 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 115 116 WREG32(reg, (ring->wptr << 2) & 0x3fffc); 117 } 118 119 /** 120 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine 121 * 122 * @rdev: radeon_device pointer 123 * @ib: IB object to schedule 124 * 125 * Schedule an IB in the DMA ring (cayman-SI). 126 */ 127 void cayman_dma_ring_ib_execute(struct radeon_device *rdev, 128 struct radeon_ib *ib) 129 { 130 struct radeon_ring *ring = &rdev->ring[ib->ring]; 131 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; 132 133 if (rdev->wb.enabled) { 134 u32 next_rptr = ring->wptr + 4; 135 while ((next_rptr & 7) != 5) 136 next_rptr++; 137 next_rptr += 3; 138 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 139 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 140 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 141 radeon_ring_write(ring, next_rptr); 142 } 143 144 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 145 * Pad as necessary with NOPs. 146 */ 147 while ((ring->wptr & 7) != 5) 148 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 149 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 150 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 151 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 152 153 } 154 155 /** 156 * cayman_dma_stop - stop the async dma engines 157 * 158 * @rdev: radeon_device pointer 159 * 160 * Stop the async dma engines (cayman-SI). 161 */ 162 void cayman_dma_stop(struct radeon_device *rdev) 163 { 164 u32 rb_cntl; 165 166 if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 167 (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 168 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 169 170 /* dma0 */ 171 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); 172 rb_cntl &= ~DMA_RB_ENABLE; 173 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); 174 175 /* dma1 */ 176 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); 177 rb_cntl &= ~DMA_RB_ENABLE; 178 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); 179 180 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 181 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; 182 } 183 184 /** 185 * cayman_dma_resume - setup and start the async dma engines 186 * 187 * @rdev: radeon_device pointer 188 * 189 * Set up the DMA ring buffers and enable them. (cayman-SI). 190 * Returns 0 for success, error for failure. 191 */ 192 int cayman_dma_resume(struct radeon_device *rdev) 193 { 194 struct radeon_ring *ring; 195 u32 rb_cntl, dma_cntl, ib_cntl; 196 u32 rb_bufsz; 197 u32 reg_offset, wb_offset; 198 int i, r; 199 200 for (i = 0; i < 2; i++) { 201 if (i == 0) { 202 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 203 reg_offset = DMA0_REGISTER_OFFSET; 204 wb_offset = R600_WB_DMA_RPTR_OFFSET; 205 } else { 206 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 207 reg_offset = DMA1_REGISTER_OFFSET; 208 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 209 } 210 211 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 212 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 213 214 /* Set ring buffer size in dwords */ 215 rb_bufsz = order_base_2(ring->ring_size / 4); 216 rb_cntl = rb_bufsz << 1; 217 #ifdef __BIG_ENDIAN 218 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 219 #endif 220 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); 221 222 /* Initialize the ring buffer's read and write pointers */ 223 WREG32(DMA_RB_RPTR + reg_offset, 0); 224 WREG32(DMA_RB_WPTR + reg_offset, 0); 225 226 /* set the wb address whether it's enabled or not */ 227 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, 228 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); 229 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, 230 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 231 232 if (rdev->wb.enabled) 233 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 234 235 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); 236 237 /* enable DMA IBs */ 238 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 239 #ifdef __BIG_ENDIAN 240 ib_cntl |= DMA_IB_SWAP_ENABLE; 241 #endif 242 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); 243 244 dma_cntl = RREG32(DMA_CNTL + reg_offset); 245 dma_cntl &= ~CTXEMPTY_INT_ENABLE; 246 WREG32(DMA_CNTL + reg_offset, dma_cntl); 247 248 ring->wptr = 0; 249 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 250 251 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 252 253 ring->ready = true; 254 255 r = radeon_ring_test(rdev, ring->idx, ring); 256 if (r) { 257 ring->ready = false; 258 return r; 259 } 260 } 261 262 if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 263 (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 264 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 265 266 return 0; 267 } 268 269 /** 270 * cayman_dma_fini - tear down the async dma engines 271 * 272 * @rdev: radeon_device pointer 273 * 274 * Stop the async dma engines and free the rings (cayman-SI). 275 */ 276 void cayman_dma_fini(struct radeon_device *rdev) 277 { 278 cayman_dma_stop(rdev); 279 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 280 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 281 } 282 283 /** 284 * cayman_dma_is_lockup - Check if the DMA engine is locked up 285 * 286 * @rdev: radeon_device pointer 287 * @ring: radeon_ring structure holding ring information 288 * 289 * Check if the async DMA engine is locked up. 290 * Returns true if the engine appears to be locked up, false if not. 291 */ 292 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 293 { 294 u32 reset_mask = cayman_gpu_check_soft_reset(rdev); 295 u32 mask; 296 297 if (ring->idx == R600_RING_TYPE_DMA_INDEX) 298 mask = RADEON_RESET_DMA; 299 else 300 mask = RADEON_RESET_DMA1; 301 302 if (!(reset_mask & mask)) { 303 radeon_ring_lockup_update(rdev, ring); 304 return false; 305 } 306 return radeon_ring_test_lockup(rdev, ring); 307 } 308 309 /** 310 * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART 311 * 312 * @rdev: radeon_device pointer 313 * @ib: indirect buffer to fill with commands 314 * @pe: addr of the page entry 315 * @src: src addr where to copy from 316 * @count: number of page entries to update 317 * 318 * Update PTEs by copying them from the GART using the DMA (cayman/TN). 319 */ 320 void cayman_dma_vm_copy_pages(struct radeon_device *rdev, 321 struct radeon_ib *ib, 322 uint64_t pe, uint64_t src, 323 unsigned count) 324 { 325 unsigned ndw; 326 327 while (count) { 328 ndw = count * 2; 329 if (ndw > 0xFFFFE) 330 ndw = 0xFFFFE; 331 332 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 333 0, 0, ndw); 334 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 335 ib->ptr[ib->length_dw++] = lower_32_bits(src); 336 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 337 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 338 339 pe += ndw * 4; 340 src += ndw * 4; 341 count -= ndw / 2; 342 } 343 } 344 345 /** 346 * cayman_dma_vm_write_pages - update PTEs by writing them manually 347 * 348 * @rdev: radeon_device pointer 349 * @ib: indirect buffer to fill with commands 350 * @pe: addr of the page entry 351 * @addr: dst addr to write into pe 352 * @count: number of page entries to update 353 * @incr: increase next addr by incr bytes 354 * @flags: hw access flags 355 * 356 * Update PTEs by writing them manually using the DMA (cayman/TN). 357 */ 358 void cayman_dma_vm_write_pages(struct radeon_device *rdev, 359 struct radeon_ib *ib, 360 uint64_t pe, 361 uint64_t addr, unsigned count, 362 uint32_t incr, uint32_t flags) 363 { 364 uint64_t value; 365 unsigned ndw; 366 367 while (count) { 368 ndw = count * 2; 369 if (ndw > 0xFFFFE) 370 ndw = 0xFFFFE; 371 372 /* for non-physically contiguous pages (system) */ 373 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 374 0, 0, ndw); 375 ib->ptr[ib->length_dw++] = pe; 376 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 377 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 378 if (flags & R600_PTE_SYSTEM) { 379 value = radeon_vm_map_gart(rdev, addr); 380 } else if (flags & R600_PTE_VALID) { 381 value = addr; 382 } else { 383 value = 0; 384 } 385 addr += incr; 386 value |= flags; 387 ib->ptr[ib->length_dw++] = value; 388 ib->ptr[ib->length_dw++] = upper_32_bits(value); 389 } 390 } 391 } 392 393 /** 394 * cayman_dma_vm_set_pages - update the page tables using the DMA 395 * 396 * @rdev: radeon_device pointer 397 * @ib: indirect buffer to fill with commands 398 * @pe: addr of the page entry 399 * @addr: dst addr to write into pe 400 * @count: number of page entries to update 401 * @incr: increase next addr by incr bytes 402 * @flags: hw access flags 403 * 404 * Update the page tables using the DMA (cayman/TN). 405 */ 406 void cayman_dma_vm_set_pages(struct radeon_device *rdev, 407 struct radeon_ib *ib, 408 uint64_t pe, 409 uint64_t addr, unsigned count, 410 uint32_t incr, uint32_t flags) 411 { 412 uint64_t value; 413 unsigned ndw; 414 415 while (count) { 416 ndw = count * 2; 417 if (ndw > 0xFFFFE) 418 ndw = 0xFFFFE; 419 420 if (flags & R600_PTE_VALID) 421 value = addr; 422 else 423 value = 0; 424 425 /* for physically contiguous pages (vram) */ 426 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 427 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 428 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 429 ib->ptr[ib->length_dw++] = flags; /* mask */ 430 ib->ptr[ib->length_dw++] = 0; 431 ib->ptr[ib->length_dw++] = value; /* value */ 432 ib->ptr[ib->length_dw++] = upper_32_bits(value); 433 ib->ptr[ib->length_dw++] = incr; /* increment size */ 434 ib->ptr[ib->length_dw++] = 0; 435 436 pe += ndw * 4; 437 addr += (ndw / 2) * incr; 438 count -= ndw / 2; 439 } 440 } 441 442 /** 443 * cayman_dma_vm_pad_ib - pad the IB to the required number of dw 444 * 445 * @ib: indirect buffer to fill with padding 446 * 447 */ 448 void cayman_dma_vm_pad_ib(struct radeon_ib *ib) 449 { 450 while (ib->length_dw & 0x7) 451 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 452 } 453 454 void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, 455 unsigned vm_id, uint64_t pd_addr) 456 { 457 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 458 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); 459 radeon_ring_write(ring, pd_addr >> 12); 460 461 /* flush hdp cache */ 462 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 463 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 464 radeon_ring_write(ring, 1); 465 466 /* bits 0-7 are the VM contexts0-7 */ 467 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 468 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); 469 radeon_ring_write(ring, 1 << vm_id); 470 471 /* wait for invalidate to complete */ 472 radeon_ring_write(ring, DMA_SRBM_READ_PACKET); 473 radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); 474 radeon_ring_write(ring, 0); /* mask */ 475 radeon_ring_write(ring, 0); /* value */ 476 } 477 478