1 1.1 riastrad /* $NetBSD: radeon_ni_dma.c,v 1.2 2021/12/18 23:45:43 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2010 Advanced Micro Devices, Inc. 5 1.1 riastrad * 6 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 7 1.1 riastrad * copy of this software and associated documentation files (the "Software"), 8 1.1 riastrad * to deal in the Software without restriction, including without limitation 9 1.1 riastrad * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 1.1 riastrad * and/or sell copies of the Software, and to permit persons to whom the 11 1.1 riastrad * Software is furnished to do so, subject to the following conditions: 12 1.1 riastrad * 13 1.1 riastrad * The above copyright notice and this permission notice shall be included in 14 1.1 riastrad * all copies or substantial portions of the Software. 15 1.1 riastrad * 16 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 1.1 riastrad * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 1.1 riastrad * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 1.1 riastrad * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 1.1 riastrad * OTHER DEALINGS IN THE SOFTWARE. 23 1.1 riastrad * 24 1.1 riastrad * Authors: Alex Deucher 25 1.1 riastrad */ 26 1.2 riastrad 27 1.1 riastrad #include <sys/cdefs.h> 28 1.1 riastrad __KERNEL_RCSID(0, "$NetBSD: radeon_ni_dma.c,v 1.2 2021/12/18 23:45:43 riastradh Exp $"); 29 1.1 riastrad 30 1.1 riastrad #include "radeon.h" 31 1.1 riastrad #include "radeon_asic.h" 32 1.1 riastrad #include "radeon_trace.h" 33 1.1 riastrad #include "nid.h" 34 1.1 riastrad 35 1.1 riastrad u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); 36 1.1 riastrad 37 1.1 riastrad /* 38 1.1 riastrad * DMA 39 1.1 riastrad * Starting with R600, the GPU has an asynchronous 40 1.1 riastrad * DMA engine. The programming model is very similar 41 1.1 riastrad * to the 3D engine (ring buffer, IBs, etc.), but the 42 1.1 riastrad * DMA controller has it's own packet format that is 43 1.1 riastrad * different form the PM4 format used by the 3D engine. 44 1.1 riastrad * It supports copying data, writing embedded data, 45 1.1 riastrad * solid fills, and a number of other things. It also 46 1.1 riastrad * has support for tiling/detiling of buffers. 47 1.1 riastrad * Cayman and newer support two asynchronous DMA engines. 48 1.1 riastrad */ 49 1.1 riastrad 50 1.1 riastrad /** 51 1.1 riastrad * cayman_dma_get_rptr - get the current read pointer 52 1.1 riastrad * 53 1.1 riastrad * @rdev: radeon_device pointer 54 1.1 riastrad * @ring: radeon ring pointer 55 1.1 riastrad * 56 1.1 riastrad * Get the current rptr from the hardware (cayman+). 57 1.1 riastrad */ 58 1.1 riastrad uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, 59 1.1 riastrad struct radeon_ring *ring) 60 1.1 riastrad { 61 1.1 riastrad u32 rptr, reg; 62 1.1 riastrad 63 1.1 riastrad if (rdev->wb.enabled) { 64 1.1 riastrad rptr = rdev->wb.wb[ring->rptr_offs/4]; 65 1.1 riastrad } else { 66 1.1 riastrad if (ring->idx == R600_RING_TYPE_DMA_INDEX) 67 1.1 riastrad reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; 68 1.1 riastrad else 69 1.1 riastrad reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; 70 1.1 riastrad 71 1.1 riastrad rptr = RREG32(reg); 72 1.1 riastrad } 73 1.1 riastrad 74 1.1 riastrad return (rptr & 0x3fffc) >> 2; 75 1.1 riastrad } 76 1.1 riastrad 77 1.1 riastrad /** 78 1.1 riastrad * cayman_dma_get_wptr - get the current write pointer 79 1.1 riastrad * 80 1.1 riastrad * @rdev: radeon_device pointer 81 1.1 riastrad * @ring: radeon ring pointer 82 1.1 riastrad * 83 1.1 riastrad * Get the current wptr from the hardware (cayman+). 84 1.1 riastrad */ 85 1.1 riastrad uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, 86 1.1 riastrad struct radeon_ring *ring) 87 1.1 riastrad { 88 1.1 riastrad u32 reg; 89 1.1 riastrad 90 1.1 riastrad if (ring->idx == R600_RING_TYPE_DMA_INDEX) 91 1.1 riastrad reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 92 1.1 riastrad else 93 1.1 riastrad reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 94 1.1 riastrad 95 1.1 riastrad return (RREG32(reg) & 0x3fffc) >> 2; 96 1.1 riastrad } 97 1.1 riastrad 98 1.1 riastrad /** 99 1.1 riastrad * cayman_dma_set_wptr - commit the write pointer 100 1.1 riastrad * 101 1.1 riastrad * @rdev: radeon_device pointer 102 1.1 riastrad * @ring: radeon ring pointer 103 1.1 riastrad * 104 1.1 riastrad * Write the wptr back to the hardware (cayman+). 105 1.1 riastrad */ 106 1.1 riastrad void cayman_dma_set_wptr(struct radeon_device *rdev, 107 1.1 riastrad struct radeon_ring *ring) 108 1.1 riastrad { 109 1.1 riastrad u32 reg; 110 1.1 riastrad 111 1.1 riastrad if (ring->idx == R600_RING_TYPE_DMA_INDEX) 112 1.1 riastrad reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 113 1.1 riastrad else 114 1.1 riastrad reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 115 1.1 riastrad 116 1.1 riastrad WREG32(reg, (ring->wptr << 2) & 0x3fffc); 117 1.1 riastrad } 118 1.1 riastrad 119 1.1 riastrad /** 120 1.1 riastrad * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine 121 1.1 riastrad * 122 1.1 riastrad * @rdev: radeon_device pointer 123 1.1 riastrad * @ib: IB object to schedule 124 1.1 riastrad * 125 1.1 riastrad * Schedule an IB in the DMA ring (cayman-SI). 126 1.1 riastrad */ 127 1.1 riastrad void cayman_dma_ring_ib_execute(struct radeon_device *rdev, 128 1.1 riastrad struct radeon_ib *ib) 129 1.1 riastrad { 130 1.1 riastrad struct radeon_ring *ring = &rdev->ring[ib->ring]; 131 1.1 riastrad unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; 132 1.1 riastrad 133 1.1 riastrad if (rdev->wb.enabled) { 134 1.1 riastrad u32 next_rptr = ring->wptr + 4; 135 1.1 riastrad while ((next_rptr & 7) != 5) 136 1.1 riastrad next_rptr++; 137 1.1 riastrad next_rptr += 3; 138 1.1 riastrad radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 139 1.1 riastrad radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 140 1.1 riastrad radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 141 1.1 riastrad radeon_ring_write(ring, next_rptr); 142 1.1 riastrad } 143 1.1 riastrad 144 1.1 riastrad /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 145 1.1 riastrad * Pad as necessary with NOPs. 146 1.1 riastrad */ 147 1.1 riastrad while ((ring->wptr & 7) != 5) 148 1.1 riastrad radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 149 1.1 riastrad radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 150 1.1 riastrad radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 151 1.1 riastrad radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 152 1.1 riastrad 153 1.1 riastrad } 154 1.1 riastrad 155 1.1 riastrad /** 156 1.1 riastrad * cayman_dma_stop - stop the async dma engines 157 1.1 riastrad * 158 1.1 riastrad * @rdev: radeon_device pointer 159 1.1 riastrad * 160 1.1 riastrad * Stop the async dma engines (cayman-SI). 161 1.1 riastrad */ 162 1.1 riastrad void cayman_dma_stop(struct radeon_device *rdev) 163 1.1 riastrad { 164 1.1 riastrad u32 rb_cntl; 165 1.1 riastrad 166 1.1 riastrad if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 167 1.1 riastrad (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 168 1.1 riastrad radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 169 1.1 riastrad 170 1.1 riastrad /* dma0 */ 171 1.1 riastrad rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); 172 1.1 riastrad rb_cntl &= ~DMA_RB_ENABLE; 173 1.1 riastrad WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); 174 1.1 riastrad 175 1.1 riastrad /* dma1 */ 176 1.1 riastrad rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); 177 1.1 riastrad rb_cntl &= ~DMA_RB_ENABLE; 178 1.1 riastrad WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); 179 1.1 riastrad 180 1.1 riastrad rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 181 1.1 riastrad rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; 182 1.1 riastrad } 183 1.1 riastrad 184 1.1 riastrad /** 185 1.1 riastrad * cayman_dma_resume - setup and start the async dma engines 186 1.1 riastrad * 187 1.1 riastrad * @rdev: radeon_device pointer 188 1.1 riastrad * 189 1.1 riastrad * Set up the DMA ring buffers and enable them. (cayman-SI). 190 1.1 riastrad * Returns 0 for success, error for failure. 191 1.1 riastrad */ 192 1.1 riastrad int cayman_dma_resume(struct radeon_device *rdev) 193 1.1 riastrad { 194 1.1 riastrad struct radeon_ring *ring; 195 1.1 riastrad u32 rb_cntl, dma_cntl, ib_cntl; 196 1.1 riastrad u32 rb_bufsz; 197 1.1 riastrad u32 reg_offset, wb_offset; 198 1.1 riastrad int i, r; 199 1.1 riastrad 200 1.1 riastrad for (i = 0; i < 2; i++) { 201 1.1 riastrad if (i == 0) { 202 1.1 riastrad ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 203 1.1 riastrad reg_offset = DMA0_REGISTER_OFFSET; 204 1.1 riastrad wb_offset = R600_WB_DMA_RPTR_OFFSET; 205 1.1 riastrad } else { 206 1.1 riastrad ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 207 1.1 riastrad reg_offset = DMA1_REGISTER_OFFSET; 208 1.1 riastrad wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 209 1.1 riastrad } 210 1.1 riastrad 211 1.1 riastrad WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 212 1.1 riastrad WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 213 1.1 riastrad 214 1.1 riastrad /* Set ring buffer size in dwords */ 215 1.1 riastrad rb_bufsz = order_base_2(ring->ring_size / 4); 216 1.1 riastrad rb_cntl = rb_bufsz << 1; 217 1.1 riastrad #ifdef __BIG_ENDIAN 218 1.1 riastrad rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 219 1.1 riastrad #endif 220 1.1 riastrad WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); 221 1.1 riastrad 222 1.1 riastrad /* Initialize the ring buffer's read and write pointers */ 223 1.1 riastrad WREG32(DMA_RB_RPTR + reg_offset, 0); 224 1.1 riastrad WREG32(DMA_RB_WPTR + reg_offset, 0); 225 1.1 riastrad 226 1.1 riastrad /* set the wb address whether it's enabled or not */ 227 1.1 riastrad WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, 228 1.1 riastrad upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); 229 1.1 riastrad WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, 230 1.1 riastrad ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 231 1.1 riastrad 232 1.1 riastrad if (rdev->wb.enabled) 233 1.1 riastrad rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 234 1.1 riastrad 235 1.1 riastrad WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); 236 1.1 riastrad 237 1.1 riastrad /* enable DMA IBs */ 238 1.1 riastrad ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 239 1.1 riastrad #ifdef __BIG_ENDIAN 240 1.1 riastrad ib_cntl |= DMA_IB_SWAP_ENABLE; 241 1.1 riastrad #endif 242 1.1 riastrad WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); 243 1.1 riastrad 244 1.1 riastrad dma_cntl = RREG32(DMA_CNTL + reg_offset); 245 1.1 riastrad dma_cntl &= ~CTXEMPTY_INT_ENABLE; 246 1.1 riastrad WREG32(DMA_CNTL + reg_offset, dma_cntl); 247 1.1 riastrad 248 1.1 riastrad ring->wptr = 0; 249 1.1 riastrad WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 250 1.1 riastrad 251 1.1 riastrad WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 252 1.1 riastrad 253 1.1 riastrad ring->ready = true; 254 1.1 riastrad 255 1.1 riastrad r = radeon_ring_test(rdev, ring->idx, ring); 256 1.1 riastrad if (r) { 257 1.1 riastrad ring->ready = false; 258 1.1 riastrad return r; 259 1.1 riastrad } 260 1.1 riastrad } 261 1.1 riastrad 262 1.1 riastrad if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 263 1.1 riastrad (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 264 1.1 riastrad radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 265 1.1 riastrad 266 1.1 riastrad return 0; 267 1.1 riastrad } 268 1.1 riastrad 269 1.1 riastrad /** 270 1.1 riastrad * cayman_dma_fini - tear down the async dma engines 271 1.1 riastrad * 272 1.1 riastrad * @rdev: radeon_device pointer 273 1.1 riastrad * 274 1.1 riastrad * Stop the async dma engines and free the rings (cayman-SI). 275 1.1 riastrad */ 276 1.1 riastrad void cayman_dma_fini(struct radeon_device *rdev) 277 1.1 riastrad { 278 1.1 riastrad cayman_dma_stop(rdev); 279 1.1 riastrad radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 280 1.1 riastrad radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 281 1.1 riastrad } 282 1.1 riastrad 283 1.1 riastrad /** 284 1.1 riastrad * cayman_dma_is_lockup - Check if the DMA engine is locked up 285 1.1 riastrad * 286 1.1 riastrad * @rdev: radeon_device pointer 287 1.1 riastrad * @ring: radeon_ring structure holding ring information 288 1.1 riastrad * 289 1.1 riastrad * Check if the async DMA engine is locked up. 290 1.1 riastrad * Returns true if the engine appears to be locked up, false if not. 291 1.1 riastrad */ 292 1.1 riastrad bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 293 1.1 riastrad { 294 1.1 riastrad u32 reset_mask = cayman_gpu_check_soft_reset(rdev); 295 1.1 riastrad u32 mask; 296 1.1 riastrad 297 1.1 riastrad if (ring->idx == R600_RING_TYPE_DMA_INDEX) 298 1.1 riastrad mask = RADEON_RESET_DMA; 299 1.1 riastrad else 300 1.1 riastrad mask = RADEON_RESET_DMA1; 301 1.1 riastrad 302 1.1 riastrad if (!(reset_mask & mask)) { 303 1.1 riastrad radeon_ring_lockup_update(rdev, ring); 304 1.1 riastrad return false; 305 1.1 riastrad } 306 1.1 riastrad return radeon_ring_test_lockup(rdev, ring); 307 1.1 riastrad } 308 1.1 riastrad 309 1.1 riastrad /** 310 1.1 riastrad * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART 311 1.1 riastrad * 312 1.1 riastrad * @rdev: radeon_device pointer 313 1.1 riastrad * @ib: indirect buffer to fill with commands 314 1.1 riastrad * @pe: addr of the page entry 315 1.1 riastrad * @src: src addr where to copy from 316 1.1 riastrad * @count: number of page entries to update 317 1.1 riastrad * 318 1.1 riastrad * Update PTEs by copying them from the GART using the DMA (cayman/TN). 319 1.1 riastrad */ 320 1.1 riastrad void cayman_dma_vm_copy_pages(struct radeon_device *rdev, 321 1.1 riastrad struct radeon_ib *ib, 322 1.1 riastrad uint64_t pe, uint64_t src, 323 1.1 riastrad unsigned count) 324 1.1 riastrad { 325 1.1 riastrad unsigned ndw; 326 1.1 riastrad 327 1.1 riastrad while (count) { 328 1.1 riastrad ndw = count * 2; 329 1.1 riastrad if (ndw > 0xFFFFE) 330 1.1 riastrad ndw = 0xFFFFE; 331 1.1 riastrad 332 1.1 riastrad ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 333 1.1 riastrad 0, 0, ndw); 334 1.1 riastrad ib->ptr[ib->length_dw++] = lower_32_bits(pe); 335 1.1 riastrad ib->ptr[ib->length_dw++] = lower_32_bits(src); 336 1.1 riastrad ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 337 1.1 riastrad ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 338 1.1 riastrad 339 1.1 riastrad pe += ndw * 4; 340 1.1 riastrad src += ndw * 4; 341 1.1 riastrad count -= ndw / 2; 342 1.1 riastrad } 343 1.1 riastrad } 344 1.1 riastrad 345 1.1 riastrad /** 346 1.1 riastrad * cayman_dma_vm_write_pages - update PTEs by writing them manually 347 1.1 riastrad * 348 1.1 riastrad * @rdev: radeon_device pointer 349 1.1 riastrad * @ib: indirect buffer to fill with commands 350 1.1 riastrad * @pe: addr of the page entry 351 1.1 riastrad * @addr: dst addr to write into pe 352 1.1 riastrad * @count: number of page entries to update 353 1.1 riastrad * @incr: increase next addr by incr bytes 354 1.1 riastrad * @flags: hw access flags 355 1.1 riastrad * 356 1.1 riastrad * Update PTEs by writing them manually using the DMA (cayman/TN). 357 1.1 riastrad */ 358 1.1 riastrad void cayman_dma_vm_write_pages(struct radeon_device *rdev, 359 1.1 riastrad struct radeon_ib *ib, 360 1.1 riastrad uint64_t pe, 361 1.1 riastrad uint64_t addr, unsigned count, 362 1.1 riastrad uint32_t incr, uint32_t flags) 363 1.1 riastrad { 364 1.1 riastrad uint64_t value; 365 1.1 riastrad unsigned ndw; 366 1.1 riastrad 367 1.1 riastrad while (count) { 368 1.1 riastrad ndw = count * 2; 369 1.1 riastrad if (ndw > 0xFFFFE) 370 1.1 riastrad ndw = 0xFFFFE; 371 1.1 riastrad 372 1.1 riastrad /* for non-physically contiguous pages (system) */ 373 1.1 riastrad ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 374 1.1 riastrad 0, 0, ndw); 375 1.1 riastrad ib->ptr[ib->length_dw++] = pe; 376 1.1 riastrad ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 377 1.1 riastrad for (; ndw > 0; ndw -= 2, --count, pe += 8) { 378 1.1 riastrad if (flags & R600_PTE_SYSTEM) { 379 1.1 riastrad value = radeon_vm_map_gart(rdev, addr); 380 1.1 riastrad } else if (flags & R600_PTE_VALID) { 381 1.1 riastrad value = addr; 382 1.1 riastrad } else { 383 1.1 riastrad value = 0; 384 1.1 riastrad } 385 1.1 riastrad addr += incr; 386 1.1 riastrad value |= flags; 387 1.1 riastrad ib->ptr[ib->length_dw++] = value; 388 1.1 riastrad ib->ptr[ib->length_dw++] = upper_32_bits(value); 389 1.1 riastrad } 390 1.1 riastrad } 391 1.1 riastrad } 392 1.1 riastrad 393 1.1 riastrad /** 394 1.1 riastrad * cayman_dma_vm_set_pages - update the page tables using the DMA 395 1.1 riastrad * 396 1.1 riastrad * @rdev: radeon_device pointer 397 1.1 riastrad * @ib: indirect buffer to fill with commands 398 1.1 riastrad * @pe: addr of the page entry 399 1.1 riastrad * @addr: dst addr to write into pe 400 1.1 riastrad * @count: number of page entries to update 401 1.1 riastrad * @incr: increase next addr by incr bytes 402 1.1 riastrad * @flags: hw access flags 403 1.1 riastrad * 404 1.1 riastrad * Update the page tables using the DMA (cayman/TN). 405 1.1 riastrad */ 406 1.1 riastrad void cayman_dma_vm_set_pages(struct radeon_device *rdev, 407 1.1 riastrad struct radeon_ib *ib, 408 1.1 riastrad uint64_t pe, 409 1.1 riastrad uint64_t addr, unsigned count, 410 1.1 riastrad uint32_t incr, uint32_t flags) 411 1.1 riastrad { 412 1.1 riastrad uint64_t value; 413 1.1 riastrad unsigned ndw; 414 1.1 riastrad 415 1.1 riastrad while (count) { 416 1.1 riastrad ndw = count * 2; 417 1.1 riastrad if (ndw > 0xFFFFE) 418 1.1 riastrad ndw = 0xFFFFE; 419 1.1 riastrad 420 1.1 riastrad if (flags & R600_PTE_VALID) 421 1.1 riastrad value = addr; 422 1.1 riastrad else 423 1.1 riastrad value = 0; 424 1.1 riastrad 425 1.1 riastrad /* for physically contiguous pages (vram) */ 426 1.1 riastrad ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 427 1.1 riastrad ib->ptr[ib->length_dw++] = pe; /* dst addr */ 428 1.1 riastrad ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 429 1.1 riastrad ib->ptr[ib->length_dw++] = flags; /* mask */ 430 1.1 riastrad ib->ptr[ib->length_dw++] = 0; 431 1.1 riastrad ib->ptr[ib->length_dw++] = value; /* value */ 432 1.1 riastrad ib->ptr[ib->length_dw++] = upper_32_bits(value); 433 1.1 riastrad ib->ptr[ib->length_dw++] = incr; /* increment size */ 434 1.1 riastrad ib->ptr[ib->length_dw++] = 0; 435 1.1 riastrad 436 1.1 riastrad pe += ndw * 4; 437 1.1 riastrad addr += (ndw / 2) * incr; 438 1.1 riastrad count -= ndw / 2; 439 1.1 riastrad } 440 1.1 riastrad } 441 1.1 riastrad 442 1.1 riastrad /** 443 1.1 riastrad * cayman_dma_vm_pad_ib - pad the IB to the required number of dw 444 1.1 riastrad * 445 1.1 riastrad * @ib: indirect buffer to fill with padding 446 1.1 riastrad * 447 1.1 riastrad */ 448 1.1 riastrad void cayman_dma_vm_pad_ib(struct radeon_ib *ib) 449 1.1 riastrad { 450 1.1 riastrad while (ib->length_dw & 0x7) 451 1.1 riastrad ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 452 1.1 riastrad } 453 1.1 riastrad 454 1.1 riastrad void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, 455 1.1 riastrad unsigned vm_id, uint64_t pd_addr) 456 1.1 riastrad { 457 1.1 riastrad radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 458 1.1 riastrad radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); 459 1.1 riastrad radeon_ring_write(ring, pd_addr >> 12); 460 1.1 riastrad 461 1.1 riastrad /* flush hdp cache */ 462 1.1 riastrad radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 463 1.1 riastrad radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 464 1.1 riastrad radeon_ring_write(ring, 1); 465 1.1 riastrad 466 1.1 riastrad /* bits 0-7 are the VM contexts0-7 */ 467 1.1 riastrad radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 468 1.1 riastrad radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); 469 1.1 riastrad radeon_ring_write(ring, 1 << vm_id); 470 1.1 riastrad 471 1.1 riastrad /* wait for invalidate to complete */ 472 1.1 riastrad radeon_ring_write(ring, DMA_SRBM_READ_PACKET); 473 1.1 riastrad radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); 474 1.1 riastrad radeon_ring_write(ring, 0); /* mask */ 475 1.1 riastrad radeon_ring_write(ring, 0); /* value */ 476 1.1 riastrad } 477 1.1 riastrad 478