1 /* $NetBSD: amdgpu_vce_v3_0.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $ */ 2 3 /* 4 * Copyright 2014 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 * Authors: Christian Knig <christian.koenig (at) amd.com> 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce_v3_0.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $"); 32 33 #include <linux/firmware.h> 34 35 #include "amdgpu.h" 36 #include "amdgpu_vce.h" 37 #include "vid.h" 38 #include "vce/vce_3_0_d.h" 39 #include "vce/vce_3_0_sh_mask.h" 40 #include "oss/oss_3_0_d.h" 41 #include "oss/oss_3_0_sh_mask.h" 42 #include "gca/gfx_8_0_d.h" 43 #include "smu/smu_7_1_2_d.h" 44 #include "smu/smu_7_1_2_sh_mask.h" 45 #include "gca/gfx_8_0_sh_mask.h" 46 #include "ivsrcid/ivsrcid_vislands30.h" 47 48 49 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04 50 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10 51 #define GRBM_GFX_INDEX__VCE_ALL_PIPE 0x07 52 53 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616 54 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617 55 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618 56 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000 57 58 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 59 60 #define VCE_V3_0_FW_SIZE (384 * 1024) 61 #define VCE_V3_0_STACK_SIZE (64 * 1024) 62 #define VCE_V3_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 63 64 #define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) 65 66 #define GET_VCE_INSTANCE(i) ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \ 67 | GRBM_GFX_INDEX__VCE_ALL_PIPE) 68 69 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx); 70 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev); 71 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev); 72 static int vce_v3_0_wait_for_idle(void *handle); 73 static int vce_v3_0_set_clockgating_state(void *handle, 74 enum amd_clockgating_state state); 75 /** 76 * vce_v3_0_ring_get_rptr - get read pointer 77 * 78 * @ring: amdgpu_ring pointer 79 * 80 * Returns the current hardware read pointer 81 */ 82 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) 83 { 84 struct amdgpu_device *adev = ring->adev; 85 u32 v; 86 87 mutex_lock(&adev->grbm_idx_mutex); 88 if (adev->vce.harvest_config == 0 || 89 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) 90 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); 91 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) 92 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); 93 94 if (ring->me == 0) 95 v = RREG32(mmVCE_RB_RPTR); 96 else if (ring->me == 1) 97 v = RREG32(mmVCE_RB_RPTR2); 98 else 99 v = RREG32(mmVCE_RB_RPTR3); 100 101 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); 102 mutex_unlock(&adev->grbm_idx_mutex); 103 104 return v; 105 } 106 107 /** 108 * vce_v3_0_ring_get_wptr - get write pointer 109 * 110 * @ring: amdgpu_ring pointer 111 * 112 * Returns the current hardware write pointer 113 */ 114 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) 115 { 116 struct amdgpu_device *adev = ring->adev; 117 u32 v; 118 119 mutex_lock(&adev->grbm_idx_mutex); 120 if (adev->vce.harvest_config == 0 || 121 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) 122 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); 123 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) 124 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); 125 126 if (ring->me == 0) 127 v = RREG32(mmVCE_RB_WPTR); 128 else if (ring->me == 1) 129 v = RREG32(mmVCE_RB_WPTR2); 130 else 131 v = RREG32(mmVCE_RB_WPTR3); 132 133 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); 134 mutex_unlock(&adev->grbm_idx_mutex); 135 136 return v; 137 } 138 139 /** 140 * vce_v3_0_ring_set_wptr - set write pointer 141 * 142 * @ring: amdgpu_ring pointer 143 * 144 * Commits the write pointer to the hardware 145 */ 146 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) 147 { 148 struct amdgpu_device *adev = ring->adev; 149 150 mutex_lock(&adev->grbm_idx_mutex); 151 if (adev->vce.harvest_config == 0 || 152 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) 153 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); 154 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) 155 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); 156 157 if (ring->me == 0) 158 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); 159 else if (ring->me == 1) 160 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); 161 else 162 WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); 163 164 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); 165 mutex_unlock(&adev->grbm_idx_mutex); 166 } 167 168 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 169 { 170 WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0); 171 } 172 173 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 174 bool gated) 175 { 176 u32 data; 177 178 /* Set Override to disable Clock Gating */ 179 vce_v3_0_override_vce_clock_gating(adev, true); 180 181 /* This function enables MGCG which is controlled by firmware. 182 With the clocks in the gated state the core is still 183 accessible but the firmware will throttle the clocks on the 184 fly as necessary. 185 */ 186 if (!gated) { 187 data = RREG32(mmVCE_CLOCK_GATING_B); 188 data |= 0x1ff; 189 data &= ~0xef0000; 190 WREG32(mmVCE_CLOCK_GATING_B, data); 191 192 data = RREG32(mmVCE_UENC_CLOCK_GATING); 193 data |= 0x3ff000; 194 data &= ~0xffc00000; 195 WREG32(mmVCE_UENC_CLOCK_GATING, data); 196 197 data = RREG32(mmVCE_UENC_CLOCK_GATING_2); 198 data |= 0x2; 199 data &= ~0x00010000; 200 WREG32(mmVCE_UENC_CLOCK_GATING_2, data); 201 202 data = RREG32(mmVCE_UENC_REG_CLOCK_GATING); 203 data |= 0x37f; 204 WREG32(mmVCE_UENC_REG_CLOCK_GATING, data); 205 206 data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); 207 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 208 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 209 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 210 0x8; 211 WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data); 212 } else { 213 data = RREG32(mmVCE_CLOCK_GATING_B); 214 data &= ~0x80010; 215 data |= 0xe70008; 216 WREG32(mmVCE_CLOCK_GATING_B, data); 217 218 data = RREG32(mmVCE_UENC_CLOCK_GATING); 219 data |= 0xffc00000; 220 WREG32(mmVCE_UENC_CLOCK_GATING, data); 221 222 data = RREG32(mmVCE_UENC_CLOCK_GATING_2); 223 data |= 0x10000; 224 WREG32(mmVCE_UENC_CLOCK_GATING_2, data); 225 226 data = RREG32(mmVCE_UENC_REG_CLOCK_GATING); 227 data &= ~0x3ff; 228 WREG32(mmVCE_UENC_REG_CLOCK_GATING, data); 229 230 data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); 231 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 232 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 233 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 234 0x8); 235 WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data); 236 } 237 vce_v3_0_override_vce_clock_gating(adev, false); 238 } 239 240 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev) 241 { 242 int i, j; 243 244 for (i = 0; i < 10; ++i) { 245 for (j = 0; j < 100; ++j) { 246 uint32_t status = RREG32(mmVCE_STATUS); 247 248 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 249 return 0; 250 mdelay(10); 251 } 252 253 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 254 WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1); 255 mdelay(10); 256 WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0); 257 mdelay(10); 258 } 259 260 return -ETIMEDOUT; 261 } 262 263 /** 264 * vce_v3_0_start - start VCE block 265 * 266 * @adev: amdgpu_device pointer 267 * 268 * Setup and start the VCE block 269 */ 270 static int vce_v3_0_start(struct amdgpu_device *adev) 271 { 272 struct amdgpu_ring *ring; 273 int idx, r; 274 275 mutex_lock(&adev->grbm_idx_mutex); 276 for (idx = 0; idx < 2; ++idx) { 277 if (adev->vce.harvest_config & (1 << idx)) 278 continue; 279 280 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); 281 282 /* Program instance 0 reg space for two instances or instance 0 case 283 program instance 1 reg space for only instance 1 available case */ 284 if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) { 285 ring = &adev->vce.ring[0]; 286 WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); 287 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); 288 WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); 289 WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); 290 WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); 291 292 ring = &adev->vce.ring[1]; 293 WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); 294 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); 295 WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); 296 WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); 297 WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); 298 299 ring = &adev->vce.ring[2]; 300 WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); 301 WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); 302 WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); 303 WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); 304 WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); 305 } 306 307 vce_v3_0_mc_resume(adev, idx); 308 WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1); 309 310 if (adev->asic_type >= CHIP_STONEY) 311 WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001); 312 else 313 WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1); 314 315 WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0); 316 mdelay(100); 317 318 r = vce_v3_0_firmware_loaded(adev); 319 320 /* clear BUSY flag */ 321 WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0); 322 323 if (r) { 324 DRM_ERROR("VCE not responding, giving up!!!\n"); 325 mutex_unlock(&adev->grbm_idx_mutex); 326 return r; 327 } 328 } 329 330 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); 331 mutex_unlock(&adev->grbm_idx_mutex); 332 333 return 0; 334 } 335 336 static int vce_v3_0_stop(struct amdgpu_device *adev) 337 { 338 int idx; 339 340 mutex_lock(&adev->grbm_idx_mutex); 341 for (idx = 0; idx < 2; ++idx) { 342 if (adev->vce.harvest_config & (1 << idx)) 343 continue; 344 345 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); 346 347 if (adev->asic_type >= CHIP_STONEY) 348 WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001); 349 else 350 WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0); 351 352 /* hold on ECPU */ 353 WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1); 354 355 /* clear VCE STATUS */ 356 WREG32(mmVCE_STATUS, 0); 357 } 358 359 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); 360 mutex_unlock(&adev->grbm_idx_mutex); 361 362 return 0; 363 } 364 365 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS 0xC0014074 366 #define VCE_HARVEST_FUSE_MACRO__SHIFT 27 367 #define VCE_HARVEST_FUSE_MACRO__MASK 0x18000000 368 369 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) 370 { 371 u32 tmp; 372 373 if ((adev->asic_type == CHIP_FIJI) || 374 (adev->asic_type == CHIP_STONEY)) 375 return AMDGPU_VCE_HARVEST_VCE1; 376 377 if (adev->flags & AMD_IS_APU) 378 tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) & 379 VCE_HARVEST_FUSE_MACRO__MASK) >> 380 VCE_HARVEST_FUSE_MACRO__SHIFT; 381 else 382 tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) & 383 CC_HARVEST_FUSES__VCE_DISABLE_MASK) >> 384 CC_HARVEST_FUSES__VCE_DISABLE__SHIFT; 385 386 switch (tmp) { 387 case 1: 388 return AMDGPU_VCE_HARVEST_VCE0; 389 case 2: 390 return AMDGPU_VCE_HARVEST_VCE1; 391 case 3: 392 return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1; 393 default: 394 if ((adev->asic_type == CHIP_POLARIS10) || 395 (adev->asic_type == CHIP_POLARIS11) || 396 (adev->asic_type == CHIP_POLARIS12) || 397 (adev->asic_type == CHIP_VEGAM)) 398 return AMDGPU_VCE_HARVEST_VCE1; 399 400 return 0; 401 } 402 } 403 404 static int vce_v3_0_early_init(void *handle) 405 { 406 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 407 408 adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev); 409 410 if ((adev->vce.harvest_config & 411 (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) == 412 (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) 413 return -ENOENT; 414 415 adev->vce.num_rings = 3; 416 417 vce_v3_0_set_ring_funcs(adev); 418 vce_v3_0_set_irq_funcs(adev); 419 420 return 0; 421 } 422 423 static int vce_v3_0_sw_init(void *handle) 424 { 425 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 426 struct amdgpu_ring *ring; 427 int r, i; 428 429 /* VCE */ 430 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq); 431 if (r) 432 return r; 433 434 r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE + 435 (VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2); 436 if (r) 437 return r; 438 439 /* 52.8.3 required for 3 ring support */ 440 if (adev->vce.fw_version < FW_52_8_3) 441 adev->vce.num_rings = 2; 442 443 r = amdgpu_vce_resume(adev); 444 if (r) 445 return r; 446 447 for (i = 0; i < adev->vce.num_rings; i++) { 448 ring = &adev->vce.ring[i]; 449 snprintf(ring->name, sizeof ring->name, "vce%d", i); 450 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 451 if (r) 452 return r; 453 } 454 455 r = amdgpu_vce_entity_init(adev); 456 457 return r; 458 } 459 460 static int vce_v3_0_sw_fini(void *handle) 461 { 462 int r; 463 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 464 465 r = amdgpu_vce_suspend(adev); 466 if (r) 467 return r; 468 469 return amdgpu_vce_sw_fini(adev); 470 } 471 472 static int vce_v3_0_hw_init(void *handle) 473 { 474 int r, i; 475 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 476 477 vce_v3_0_override_vce_clock_gating(adev, true); 478 479 amdgpu_asic_set_vce_clocks(adev, 10000, 10000); 480 481 for (i = 0; i < adev->vce.num_rings; i++) { 482 r = amdgpu_ring_test_helper(&adev->vce.ring[i]); 483 if (r) 484 return r; 485 } 486 487 DRM_INFO("VCE initialized successfully.\n"); 488 489 return 0; 490 } 491 492 static int vce_v3_0_hw_fini(void *handle) 493 { 494 int r; 495 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 496 497 r = vce_v3_0_wait_for_idle(handle); 498 if (r) 499 return r; 500 501 vce_v3_0_stop(adev); 502 return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); 503 } 504 505 static int vce_v3_0_suspend(void *handle) 506 { 507 int r; 508 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 509 510 r = vce_v3_0_hw_fini(adev); 511 if (r) 512 return r; 513 514 return amdgpu_vce_suspend(adev); 515 } 516 517 static int vce_v3_0_resume(void *handle) 518 { 519 int r; 520 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 521 522 r = amdgpu_vce_resume(adev); 523 if (r) 524 return r; 525 526 return vce_v3_0_hw_init(adev); 527 } 528 529 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) 530 { 531 uint32_t offset, size; 532 533 WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16)); 534 WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); 535 WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); 536 WREG32(mmVCE_CLOCK_GATING_B, 0x1FF); 537 538 WREG32(mmVCE_LMI_CTRL, 0x00398000); 539 WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1); 540 WREG32(mmVCE_LMI_SWAP_CNTL, 0); 541 WREG32(mmVCE_LMI_SWAP_CNTL1, 0); 542 WREG32(mmVCE_LMI_VM_CTRL, 0); 543 WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000); 544 545 if (adev->asic_type >= CHIP_STONEY) { 546 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8)); 547 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8)); 548 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8)); 549 } else 550 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); 551 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 552 size = VCE_V3_0_FW_SIZE; 553 WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); 554 WREG32(mmVCE_VCPU_CACHE_SIZE0, size); 555 556 if (idx == 0) { 557 offset += size; 558 size = VCE_V3_0_STACK_SIZE; 559 WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff); 560 WREG32(mmVCE_VCPU_CACHE_SIZE1, size); 561 offset += size; 562 size = VCE_V3_0_DATA_SIZE; 563 WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff); 564 WREG32(mmVCE_VCPU_CACHE_SIZE2, size); 565 } else { 566 offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE; 567 size = VCE_V3_0_STACK_SIZE; 568 WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff); 569 WREG32(mmVCE_VCPU_CACHE_SIZE1, size); 570 offset += size; 571 size = VCE_V3_0_DATA_SIZE; 572 WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff); 573 WREG32(mmVCE_VCPU_CACHE_SIZE2, size); 574 } 575 576 WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100); 577 WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1); 578 } 579 580 static bool vce_v3_0_is_idle(void *handle) 581 { 582 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 583 u32 mask = 0; 584 585 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 586 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 587 588 return !(RREG32(mmSRBM_STATUS2) & mask); 589 } 590 591 static int vce_v3_0_wait_for_idle(void *handle) 592 { 593 unsigned i; 594 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 595 596 for (i = 0; i < adev->usec_timeout; i++) 597 if (vce_v3_0_is_idle(handle)) 598 return 0; 599 600 return -ETIMEDOUT; 601 } 602 603 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 604 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 605 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 606 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 607 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 608 609 static bool vce_v3_0_check_soft_reset(void *handle) 610 { 611 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 612 u32 srbm_soft_reset = 0; 613 614 /* According to VCE team , we should use VCE_STATUS instead 615 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 616 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 617 * instance's registers are accessed 618 * (0 for 1st instance, 10 for 2nd instance). 619 * 620 *VCE_STATUS 621 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 622 *|----+----+-----------+----+----+----+----------+---------+----| 623 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 624 * 625 * VCE team suggest use bit 3--bit 6 for busy status check 626 */ 627 mutex_lock(&adev->grbm_idx_mutex); 628 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); 629 if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 630 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 631 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 632 } 633 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); 634 if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 635 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 636 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 637 } 638 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); 639 mutex_unlock(&adev->grbm_idx_mutex); 640 641 if (srbm_soft_reset) { 642 adev->vce.srbm_soft_reset = srbm_soft_reset; 643 return true; 644 } else { 645 adev->vce.srbm_soft_reset = 0; 646 return false; 647 } 648 } 649 650 static int vce_v3_0_soft_reset(void *handle) 651 { 652 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 653 u32 srbm_soft_reset; 654 655 if (!adev->vce.srbm_soft_reset) 656 return 0; 657 srbm_soft_reset = adev->vce.srbm_soft_reset; 658 659 if (srbm_soft_reset) { 660 u32 tmp; 661 662 tmp = RREG32(mmSRBM_SOFT_RESET); 663 tmp |= srbm_soft_reset; 664 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 665 WREG32(mmSRBM_SOFT_RESET, tmp); 666 tmp = RREG32(mmSRBM_SOFT_RESET); 667 668 udelay(50); 669 670 tmp &= ~srbm_soft_reset; 671 WREG32(mmSRBM_SOFT_RESET, tmp); 672 tmp = RREG32(mmSRBM_SOFT_RESET); 673 674 /* Wait a little for things to settle down */ 675 udelay(50); 676 } 677 678 return 0; 679 } 680 681 static int vce_v3_0_pre_soft_reset(void *handle) 682 { 683 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 684 685 if (!adev->vce.srbm_soft_reset) 686 return 0; 687 688 mdelay(5); 689 690 return vce_v3_0_suspend(adev); 691 } 692 693 694 static int vce_v3_0_post_soft_reset(void *handle) 695 { 696 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 697 698 if (!adev->vce.srbm_soft_reset) 699 return 0; 700 701 mdelay(5); 702 703 return vce_v3_0_resume(adev); 704 } 705 706 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev, 707 struct amdgpu_irq_src *source, 708 unsigned type, 709 enum amdgpu_interrupt_state state) 710 { 711 uint32_t val = 0; 712 713 if (state == AMDGPU_IRQ_STATE_ENABLE) 714 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 715 716 WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 717 return 0; 718 } 719 720 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev, 721 struct amdgpu_irq_src *source, 722 struct amdgpu_iv_entry *entry) 723 { 724 DRM_DEBUG("IH: VCE\n"); 725 726 WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1); 727 728 switch (entry->src_data[0]) { 729 case 0: 730 case 1: 731 case 2: 732 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 733 break; 734 default: 735 DRM_ERROR("Unhandled interrupt: %d %d\n", 736 entry->src_id, entry->src_data[0]); 737 break; 738 } 739 740 return 0; 741 } 742 743 static int vce_v3_0_set_clockgating_state(void *handle, 744 enum amd_clockgating_state state) 745 { 746 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 747 bool enable = (state == AMD_CG_STATE_GATE); 748 int i; 749 750 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 751 return 0; 752 753 mutex_lock(&adev->grbm_idx_mutex); 754 for (i = 0; i < 2; i++) { 755 /* Program VCE Instance 0 or 1 if not harvested */ 756 if (adev->vce.harvest_config & (1 << i)) 757 continue; 758 759 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i)); 760 761 if (!enable) { 762 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 763 uint32_t data = RREG32(mmVCE_CLOCK_GATING_A); 764 data &= ~(0xf | 0xff0); 765 data |= ((0x0 << 0) | (0x04 << 4)); 766 WREG32(mmVCE_CLOCK_GATING_A, data); 767 768 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 769 data = RREG32(mmVCE_UENC_CLOCK_GATING); 770 data &= ~(0xf | 0xff0); 771 data |= ((0x0 << 0) | (0x04 << 4)); 772 WREG32(mmVCE_UENC_CLOCK_GATING, data); 773 } 774 775 vce_v3_0_set_vce_sw_clock_gating(adev, enable); 776 } 777 778 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); 779 mutex_unlock(&adev->grbm_idx_mutex); 780 781 return 0; 782 } 783 784 static int vce_v3_0_set_powergating_state(void *handle, 785 enum amd_powergating_state state) 786 { 787 /* This doesn't actually powergate the VCE block. 788 * That's done in the dpm code via the SMC. This 789 * just re-inits the block as necessary. The actual 790 * gating still happens in the dpm code. We should 791 * revisit this when there is a cleaner line between 792 * the smc and the hw blocks 793 */ 794 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 795 int ret = 0; 796 797 if (state == AMD_PG_STATE_GATE) { 798 ret = vce_v3_0_stop(adev); 799 if (ret) 800 goto out; 801 } else { 802 ret = vce_v3_0_start(adev); 803 if (ret) 804 goto out; 805 } 806 807 out: 808 return ret; 809 } 810 811 static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags) 812 { 813 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 814 int data; 815 816 mutex_lock(&adev->pm.mutex); 817 818 if (adev->flags & AMD_IS_APU) 819 data = RREG32_SMC(ixCURRENT_PG_STATUS_APU); 820 else 821 data = RREG32_SMC(ixCURRENT_PG_STATUS); 822 823 if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) { 824 DRM_INFO("Cannot get clockgating state when VCE is powergated.\n"); 825 goto out; 826 } 827 828 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 829 830 /* AMD_CG_SUPPORT_VCE_MGCG */ 831 data = RREG32(mmVCE_CLOCK_GATING_A); 832 if (data & (0x04 << 4)) 833 *flags |= AMD_CG_SUPPORT_VCE_MGCG; 834 835 out: 836 mutex_unlock(&adev->pm.mutex); 837 } 838 839 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, 840 struct amdgpu_job *job, 841 struct amdgpu_ib *ib, 842 uint32_t flags) 843 { 844 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 845 846 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 847 amdgpu_ring_write(ring, vmid); 848 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 849 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 850 amdgpu_ring_write(ring, ib->length_dw); 851 } 852 853 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring, 854 unsigned int vmid, uint64_t pd_addr) 855 { 856 amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB); 857 amdgpu_ring_write(ring, vmid); 858 amdgpu_ring_write(ring, pd_addr >> 12); 859 860 amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB); 861 amdgpu_ring_write(ring, vmid); 862 amdgpu_ring_write(ring, VCE_CMD_END); 863 } 864 865 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring) 866 { 867 uint32_t seq = ring->fence_drv.sync_seq; 868 uint64_t addr = ring->fence_drv.gpu_addr; 869 870 amdgpu_ring_write(ring, VCE_CMD_WAIT_GE); 871 amdgpu_ring_write(ring, lower_32_bits(addr)); 872 amdgpu_ring_write(ring, upper_32_bits(addr)); 873 amdgpu_ring_write(ring, seq); 874 } 875 876 static const struct amd_ip_funcs vce_v3_0_ip_funcs = { 877 .name = "vce_v3_0", 878 .early_init = vce_v3_0_early_init, 879 .late_init = NULL, 880 .sw_init = vce_v3_0_sw_init, 881 .sw_fini = vce_v3_0_sw_fini, 882 .hw_init = vce_v3_0_hw_init, 883 .hw_fini = vce_v3_0_hw_fini, 884 .suspend = vce_v3_0_suspend, 885 .resume = vce_v3_0_resume, 886 .is_idle = vce_v3_0_is_idle, 887 .wait_for_idle = vce_v3_0_wait_for_idle, 888 .check_soft_reset = vce_v3_0_check_soft_reset, 889 .pre_soft_reset = vce_v3_0_pre_soft_reset, 890 .soft_reset = vce_v3_0_soft_reset, 891 .post_soft_reset = vce_v3_0_post_soft_reset, 892 .set_clockgating_state = vce_v3_0_set_clockgating_state, 893 .set_powergating_state = vce_v3_0_set_powergating_state, 894 .get_clockgating_state = vce_v3_0_get_clockgating_state, 895 }; 896 897 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { 898 .type = AMDGPU_RING_TYPE_VCE, 899 .align_mask = 0xf, 900 .nop = VCE_CMD_NO_OP, 901 .support_64bit_ptrs = false, 902 .no_user_fence = true, 903 .get_rptr = vce_v3_0_ring_get_rptr, 904 .get_wptr = vce_v3_0_ring_get_wptr, 905 .set_wptr = vce_v3_0_ring_set_wptr, 906 .parse_cs = amdgpu_vce_ring_parse_cs, 907 .emit_frame_size = 908 4 + /* vce_v3_0_emit_pipeline_sync */ 909 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */ 910 .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */ 911 .emit_ib = amdgpu_vce_ring_emit_ib, 912 .emit_fence = amdgpu_vce_ring_emit_fence, 913 .test_ring = amdgpu_vce_ring_test_ring, 914 .test_ib = amdgpu_vce_ring_test_ib, 915 .insert_nop = amdgpu_ring_insert_nop, 916 .pad_ib = amdgpu_ring_generic_pad_ib, 917 .begin_use = amdgpu_vce_ring_begin_use, 918 .end_use = amdgpu_vce_ring_end_use, 919 }; 920 921 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { 922 .type = AMDGPU_RING_TYPE_VCE, 923 .align_mask = 0xf, 924 .nop = VCE_CMD_NO_OP, 925 .support_64bit_ptrs = false, 926 .no_user_fence = true, 927 .get_rptr = vce_v3_0_ring_get_rptr, 928 .get_wptr = vce_v3_0_ring_get_wptr, 929 .set_wptr = vce_v3_0_ring_set_wptr, 930 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 931 .emit_frame_size = 932 6 + /* vce_v3_0_emit_vm_flush */ 933 4 + /* vce_v3_0_emit_pipeline_sync */ 934 6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */ 935 .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */ 936 .emit_ib = vce_v3_0_ring_emit_ib, 937 .emit_vm_flush = vce_v3_0_emit_vm_flush, 938 .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync, 939 .emit_fence = amdgpu_vce_ring_emit_fence, 940 .test_ring = amdgpu_vce_ring_test_ring, 941 .test_ib = amdgpu_vce_ring_test_ib, 942 .insert_nop = amdgpu_ring_insert_nop, 943 .pad_ib = amdgpu_ring_generic_pad_ib, 944 .begin_use = amdgpu_vce_ring_begin_use, 945 .end_use = amdgpu_vce_ring_end_use, 946 }; 947 948 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) 949 { 950 int i; 951 952 if (adev->asic_type >= CHIP_STONEY) { 953 for (i = 0; i < adev->vce.num_rings; i++) { 954 adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs; 955 adev->vce.ring[i].me = i; 956 } 957 DRM_INFO("VCE enabled in VM mode\n"); 958 } else { 959 for (i = 0; i < adev->vce.num_rings; i++) { 960 adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs; 961 adev->vce.ring[i].me = i; 962 } 963 DRM_INFO("VCE enabled in physical mode\n"); 964 } 965 } 966 967 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = { 968 .set = vce_v3_0_set_interrupt_state, 969 .process = vce_v3_0_process_interrupt, 970 }; 971 972 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev) 973 { 974 adev->vce.irq.num_types = 1; 975 adev->vce.irq.funcs = &vce_v3_0_irq_funcs; 976 }; 977 978 const struct amdgpu_ip_block_version vce_v3_0_ip_block = 979 { 980 .type = AMD_IP_BLOCK_TYPE_VCE, 981 .major = 3, 982 .minor = 0, 983 .rev = 0, 984 .funcs = &vce_v3_0_ip_funcs, 985 }; 986 987 const struct amdgpu_ip_block_version vce_v3_1_ip_block = 988 { 989 .type = AMD_IP_BLOCK_TYPE_VCE, 990 .major = 3, 991 .minor = 1, 992 .rev = 0, 993 .funcs = &vce_v3_0_ip_funcs, 994 }; 995 996 const struct amdgpu_ip_block_version vce_v3_4_ip_block = 997 { 998 .type = AMD_IP_BLOCK_TYPE_VCE, 999 .major = 3, 1000 .minor = 4, 1001 .rev = 0, 1002 .funcs = &vce_v3_0_ip_funcs, 1003 }; 1004