1 /* $NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $ */ 2 3 /* 4 * Copyright 2016 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vce_v4_0.c,v 1.3 2021/12/19 12:21:29 riastradh Exp $"); 31 32 #include <linux/firmware.h> 33 34 #include "amdgpu.h" 35 #include "amdgpu_vce.h" 36 #include "soc15.h" 37 #include "soc15d.h" 38 #include "soc15_common.h" 39 #include "mmsch_v1_0.h" 40 41 #include "vce/vce_4_0_offset.h" 42 #include "vce/vce_4_0_default.h" 43 #include "vce/vce_4_0_sh_mask.h" 44 #include "mmhub/mmhub_1_0_offset.h" 45 #include "mmhub/mmhub_1_0_sh_mask.h" 46 47 #include "ivsrcid/vce/irqsrcs_vce_4_0.h" 48 49 #include <linux/nbsd-namespace.h> 50 51 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 52 53 #define VCE_V4_0_FW_SIZE (384 * 1024) 54 #define VCE_V4_0_STACK_SIZE (64 * 1024) 55 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 56 57 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 58 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 59 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 60 61 /** 62 * vce_v4_0_ring_get_rptr - get read pointer 63 * 64 * @ring: amdgpu_ring pointer 65 * 66 * Returns the current hardware read pointer 67 */ 68 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 69 { 70 struct amdgpu_device *adev = ring->adev; 71 72 if (ring->me == 0) 73 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 74 else if (ring->me == 1) 75 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 76 else 77 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 78 } 79 80 /** 81 * vce_v4_0_ring_get_wptr - get write pointer 82 * 83 * @ring: amdgpu_ring pointer 84 * 85 * Returns the current hardware write pointer 86 */ 87 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 88 { 89 struct amdgpu_device *adev = ring->adev; 90 91 if (ring->use_doorbell) 92 return adev->wb.wb[ring->wptr_offs]; 93 94 if (ring->me == 0) 95 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 96 else if (ring->me == 1) 97 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 98 else 99 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 100 } 101 102 /** 103 * vce_v4_0_ring_set_wptr - set write pointer 104 * 105 * @ring: amdgpu_ring pointer 106 * 107 * Commits the write pointer to the hardware 108 */ 109 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 110 { 111 struct amdgpu_device *adev = ring->adev; 112 113 if (ring->use_doorbell) { 114 /* XXX check if swapping is necessary on BE */ 115 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 116 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 117 return; 118 } 119 120 if (ring->me == 0) 121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 122 lower_32_bits(ring->wptr)); 123 else if (ring->me == 1) 124 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 125 lower_32_bits(ring->wptr)); 126 else 127 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 128 lower_32_bits(ring->wptr)); 129 } 130 131 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 132 { 133 int i, j; 134 135 for (i = 0; i < 10; ++i) { 136 for (j = 0; j < 100; ++j) { 137 uint32_t status = 138 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 139 140 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 141 return 0; 142 mdelay(10); 143 } 144 145 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 146 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 147 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 148 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 149 mdelay(10); 150 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 151 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 152 mdelay(10); 153 154 } 155 156 return -ETIMEDOUT; 157 } 158 159 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 160 struct amdgpu_mm_table *table) 161 { 162 uint32_t data = 0, loop; 163 uint64_t addr = table->gpu_addr; 164 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 165 uint32_t size; 166 167 size = header->header_size + header->vce_table_size + header->uvd_table_size; 168 169 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 172 173 /* 2, update vmid of descriptor */ 174 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 175 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 176 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 178 179 /* 3, notify mmsch about the size of this descriptor */ 180 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 181 182 /* 4, set resp to zero */ 183 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 184 185 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 186 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; 187 adev->vce.ring[0].wptr = 0; 188 adev->vce.ring[0].wptr_old = 0; 189 190 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 191 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 192 193 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 194 loop = 1000; 195 while ((data & 0x10000002) != 0x10000002) { 196 udelay(10); 197 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 198 loop--; 199 if (!loop) 200 break; 201 } 202 203 if (!loop) { 204 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 205 return -EBUSY; 206 } 207 208 return 0; 209 } 210 211 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 212 { 213 struct amdgpu_ring *ring; 214 uint32_t offset, size; 215 uint32_t table_size = 0; 216 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 217 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 218 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 219 struct mmsch_v1_0_cmd_end end = { { 0 } }; 220 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 221 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 222 223 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 224 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 225 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 226 end.cmd_header.command_type = MMSCH_COMMAND__END; 227 228 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 229 header->version = MMSCH_VERSION; 230 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 231 232 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 233 header->vce_table_offset = header->header_size; 234 else 235 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 236 237 init_table += header->vce_table_offset; 238 239 ring = &adev->vce.ring[0]; 240 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 241 lower_32_bits(ring->gpu_addr)); 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 243 upper_32_bits(ring->gpu_addr)); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 245 ring->ring_size / 4); 246 247 /* BEGING OF MC_RESUME */ 248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 249 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 250 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 251 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 252 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 253 254 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 255 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 256 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 257 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; 258 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; 259 260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 261 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); 262 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 263 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 264 (tmr_mc_addr >> 40) & 0xff); 265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 266 } else { 267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 268 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 269 adev->vce.gpu_addr >> 8); 270 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 271 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 272 (adev->vce.gpu_addr >> 40) & 0xff); 273 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 274 offset & ~0x0f000000); 275 276 } 277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 279 adev->vce.gpu_addr >> 8); 280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 282 (adev->vce.gpu_addr >> 40) & 0xff); 283 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 284 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 285 adev->vce.gpu_addr >> 8); 286 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 287 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 288 (adev->vce.gpu_addr >> 40) & 0xff); 289 290 size = VCE_V4_0_FW_SIZE; 291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 292 293 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 294 size = VCE_V4_0_STACK_SIZE; 295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 296 (offset & ~0x0f000000) | (1 << 24)); 297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 298 299 offset += size; 300 size = VCE_V4_0_DATA_SIZE; 301 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 302 (offset & ~0x0f000000) | (2 << 24)); 303 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 304 305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 306 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 307 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 308 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 309 310 /* end of MC_RESUME */ 311 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 312 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 313 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 314 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 315 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 316 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 317 318 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 319 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 320 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 321 322 /* clear BUSY flag */ 323 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 324 ~VCE_STATUS__JOB_BUSY_MASK, 0); 325 326 /* add end packet */ 327 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 328 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 329 header->vce_table_size = table_size; 330 } 331 332 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 333 } 334 335 /** 336 * vce_v4_0_start - start VCE block 337 * 338 * @adev: amdgpu_device pointer 339 * 340 * Setup and start the VCE block 341 */ 342 static int vce_v4_0_start(struct amdgpu_device *adev) 343 { 344 struct amdgpu_ring *ring; 345 int r; 346 347 ring = &adev->vce.ring[0]; 348 349 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 350 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 354 355 ring = &adev->vce.ring[1]; 356 357 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 358 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 362 363 ring = &adev->vce.ring[2]; 364 365 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 366 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 367 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 368 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 369 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 370 371 vce_v4_0_mc_resume(adev); 372 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 373 ~VCE_STATUS__JOB_BUSY_MASK); 374 375 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 376 377 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 378 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 379 mdelay(100); 380 381 r = vce_v4_0_firmware_loaded(adev); 382 383 /* clear BUSY flag */ 384 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 385 386 if (r) { 387 DRM_ERROR("VCE not responding, giving up!!!\n"); 388 return r; 389 } 390 391 return 0; 392 } 393 394 static int vce_v4_0_stop(struct amdgpu_device *adev) 395 { 396 397 /* Disable VCPU */ 398 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 399 400 /* hold on ECPU */ 401 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 402 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 403 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 404 405 /* clear VCE_STATUS */ 406 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); 407 408 /* Set Clock-Gating off */ 409 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 410 vce_v4_0_set_vce_sw_clock_gating(adev, false); 411 */ 412 413 return 0; 414 } 415 416 static int vce_v4_0_early_init(void *handle) 417 { 418 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 419 420 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 421 adev->vce.num_rings = 1; 422 else 423 adev->vce.num_rings = 3; 424 425 vce_v4_0_set_ring_funcs(adev); 426 vce_v4_0_set_irq_funcs(adev); 427 428 return 0; 429 } 430 431 static int vce_v4_0_sw_init(void *handle) 432 { 433 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 434 struct amdgpu_ring *ring; 435 436 unsigned size; 437 int r, i; 438 439 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 440 if (r) 441 return r; 442 443 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 444 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 445 size += VCE_V4_0_FW_SIZE; 446 447 r = amdgpu_vce_sw_init(adev, size); 448 if (r) 449 return r; 450 451 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 452 const struct common_firmware_header *hdr; 453 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 454 455 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); 456 if (!adev->vce.saved_bo) 457 return -ENOMEM; 458 459 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 460 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 461 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 462 adev->firmware.fw_size += 463 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 464 DRM_INFO("PSP loading VCE firmware\n"); 465 } else { 466 r = amdgpu_vce_resume(adev); 467 if (r) 468 return r; 469 } 470 471 for (i = 0; i < adev->vce.num_rings; i++) { 472 ring = &adev->vce.ring[i]; 473 snprintf(ring->name, sizeof(ring->name), "vce%d", i); 474 if (amdgpu_sriov_vf(adev)) { 475 /* DOORBELL only works under SRIOV */ 476 ring->use_doorbell = true; 477 478 /* currently only use the first encoding ring for sriov, 479 * so set unused location for other unused rings. 480 */ 481 if (i == 0) 482 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; 483 else 484 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; 485 } 486 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 487 if (r) 488 return r; 489 } 490 491 492 r = amdgpu_vce_entity_init(adev); 493 if (r) 494 return r; 495 496 r = amdgpu_virt_alloc_mm_table(adev); 497 if (r) 498 return r; 499 500 return r; 501 } 502 503 static int vce_v4_0_sw_fini(void *handle) 504 { 505 int r; 506 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 507 508 /* free MM table */ 509 amdgpu_virt_free_mm_table(adev); 510 511 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 512 kvfree(adev->vce.saved_bo); 513 adev->vce.saved_bo = NULL; 514 } 515 516 r = amdgpu_vce_suspend(adev); 517 if (r) 518 return r; 519 520 return amdgpu_vce_sw_fini(adev); 521 } 522 523 static int vce_v4_0_hw_init(void *handle) 524 { 525 int r, i; 526 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 527 528 if (amdgpu_sriov_vf(adev)) 529 r = vce_v4_0_sriov_start(adev); 530 else 531 r = vce_v4_0_start(adev); 532 if (r) 533 return r; 534 535 for (i = 0; i < adev->vce.num_rings; i++) { 536 r = amdgpu_ring_test_helper(&adev->vce.ring[i]); 537 if (r) 538 return r; 539 } 540 541 DRM_INFO("VCE initialized successfully.\n"); 542 543 return 0; 544 } 545 546 static int vce_v4_0_hw_fini(void *handle) 547 { 548 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 549 int i; 550 551 if (!amdgpu_sriov_vf(adev)) { 552 /* vce_v4_0_wait_for_idle(handle); */ 553 vce_v4_0_stop(adev); 554 } else { 555 /* full access mode, so don't touch any VCE register */ 556 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 557 } 558 559 for (i = 0; i < adev->vce.num_rings; i++) 560 adev->vce.ring[i].sched.ready = false; 561 562 return 0; 563 } 564 565 static int vce_v4_0_suspend(void *handle) 566 { 567 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 568 int r; 569 570 if (adev->vce.vcpu_bo == NULL) 571 return 0; 572 573 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 574 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 575 void *ptr = adev->vce.cpu_addr; 576 577 memcpy_fromio(adev->vce.saved_bo, ptr, size); 578 } 579 580 r = vce_v4_0_hw_fini(adev); 581 if (r) 582 return r; 583 584 return amdgpu_vce_suspend(adev); 585 } 586 587 static int vce_v4_0_resume(void *handle) 588 { 589 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 590 int r; 591 592 if (adev->vce.vcpu_bo == NULL) 593 return -EINVAL; 594 595 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 596 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 597 void *ptr = adev->vce.cpu_addr; 598 599 memcpy_toio(ptr, adev->vce.saved_bo, size); 600 } else { 601 r = amdgpu_vce_resume(adev); 602 if (r) 603 return r; 604 } 605 606 return vce_v4_0_hw_init(adev); 607 } 608 609 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 610 { 611 uint32_t offset, size; 612 uint64_t tmr_mc_addr; 613 614 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 615 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 616 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 617 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 618 619 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 620 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 623 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 624 625 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 626 627 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 628 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | 629 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 631 (tmr_mc_addr >> 8)); 632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 633 (tmr_mc_addr >> 40) & 0xff); 634 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 635 } else { 636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 637 (adev->vce.gpu_addr >> 8)); 638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 639 (adev->vce.gpu_addr >> 40) & 0xff); 640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 641 } 642 643 size = VCE_V4_0_FW_SIZE; 644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 645 646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 648 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 649 size = VCE_V4_0_STACK_SIZE; 650 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 651 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 652 653 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 654 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 655 offset += size; 656 size = VCE_V4_0_DATA_SIZE; 657 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 658 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 659 660 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 661 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 662 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 663 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 664 } 665 666 static int vce_v4_0_set_clockgating_state(void *handle, 667 enum amd_clockgating_state state) 668 { 669 /* needed for driver unload*/ 670 return 0; 671 } 672 673 #if 0 674 static bool vce_v4_0_is_idle(void *handle) 675 { 676 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 677 u32 mask = 0; 678 679 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 680 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 681 682 return !(RREG32(mmSRBM_STATUS2) & mask); 683 } 684 685 static int vce_v4_0_wait_for_idle(void *handle) 686 { 687 unsigned i; 688 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 689 690 for (i = 0; i < adev->usec_timeout; i++) 691 if (vce_v4_0_is_idle(handle)) 692 return 0; 693 694 return -ETIMEDOUT; 695 } 696 697 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 698 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 699 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 700 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 701 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 702 703 static bool vce_v4_0_check_soft_reset(void *handle) 704 { 705 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 706 u32 srbm_soft_reset = 0; 707 708 /* According to VCE team , we should use VCE_STATUS instead 709 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 710 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 711 * instance's registers are accessed 712 * (0 for 1st instance, 10 for 2nd instance). 713 * 714 *VCE_STATUS 715 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 716 *|----+----+-----------+----+----+----+----------+---------+----| 717 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 718 * 719 * VCE team suggest use bit 3--bit 6 for busy status check 720 */ 721 mutex_lock(&adev->grbm_idx_mutex); 722 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 723 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 724 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 725 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 726 } 727 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 728 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 729 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 730 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 731 } 732 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 733 mutex_unlock(&adev->grbm_idx_mutex); 734 735 if (srbm_soft_reset) { 736 adev->vce.srbm_soft_reset = srbm_soft_reset; 737 return true; 738 } else { 739 adev->vce.srbm_soft_reset = 0; 740 return false; 741 } 742 } 743 744 static int vce_v4_0_soft_reset(void *handle) 745 { 746 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 747 u32 srbm_soft_reset; 748 749 if (!adev->vce.srbm_soft_reset) 750 return 0; 751 srbm_soft_reset = adev->vce.srbm_soft_reset; 752 753 if (srbm_soft_reset) { 754 u32 tmp; 755 756 tmp = RREG32(mmSRBM_SOFT_RESET); 757 tmp |= srbm_soft_reset; 758 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 759 WREG32(mmSRBM_SOFT_RESET, tmp); 760 tmp = RREG32(mmSRBM_SOFT_RESET); 761 762 udelay(50); 763 764 tmp &= ~srbm_soft_reset; 765 WREG32(mmSRBM_SOFT_RESET, tmp); 766 tmp = RREG32(mmSRBM_SOFT_RESET); 767 768 /* Wait a little for things to settle down */ 769 udelay(50); 770 } 771 772 return 0; 773 } 774 775 static int vce_v4_0_pre_soft_reset(void *handle) 776 { 777 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 778 779 if (!adev->vce.srbm_soft_reset) 780 return 0; 781 782 mdelay(5); 783 784 return vce_v4_0_suspend(adev); 785 } 786 787 788 static int vce_v4_0_post_soft_reset(void *handle) 789 { 790 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 791 792 if (!adev->vce.srbm_soft_reset) 793 return 0; 794 795 mdelay(5); 796 797 return vce_v4_0_resume(adev); 798 } 799 800 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 801 { 802 u32 tmp, data; 803 804 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 805 if (override) 806 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 807 else 808 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 809 810 if (tmp != data) 811 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 812 } 813 814 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 815 bool gated) 816 { 817 u32 data; 818 819 /* Set Override to disable Clock Gating */ 820 vce_v4_0_override_vce_clock_gating(adev, true); 821 822 /* This function enables MGCG which is controlled by firmware. 823 With the clocks in the gated state the core is still 824 accessible but the firmware will throttle the clocks on the 825 fly as necessary. 826 */ 827 if (gated) { 828 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 829 data |= 0x1ff; 830 data &= ~0xef0000; 831 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 832 833 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 834 data |= 0x3ff000; 835 data &= ~0xffc00000; 836 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 837 838 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 839 data |= 0x2; 840 data &= ~0x00010000; 841 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 842 843 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 844 data |= 0x37f; 845 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 846 847 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 848 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 849 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 850 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 851 0x8; 852 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 853 } else { 854 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 855 data &= ~0x80010; 856 data |= 0xe70008; 857 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 858 859 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 860 data |= 0xffc00000; 861 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 862 863 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 864 data |= 0x10000; 865 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 866 867 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 868 data &= ~0xffc00000; 869 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 870 871 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 872 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 873 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 874 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 875 0x8); 876 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 877 } 878 vce_v4_0_override_vce_clock_gating(adev, false); 879 } 880 881 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 882 { 883 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 884 885 if (enable) 886 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 887 else 888 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 889 890 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 891 } 892 893 static int vce_v4_0_set_clockgating_state(void *handle, 894 enum amd_clockgating_state state) 895 { 896 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 897 bool enable = (state == AMD_CG_STATE_GATE); 898 int i; 899 900 if ((adev->asic_type == CHIP_POLARIS10) || 901 (adev->asic_type == CHIP_TONGA) || 902 (adev->asic_type == CHIP_FIJI)) 903 vce_v4_0_set_bypass_mode(adev, enable); 904 905 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 906 return 0; 907 908 mutex_lock(&adev->grbm_idx_mutex); 909 for (i = 0; i < 2; i++) { 910 /* Program VCE Instance 0 or 1 if not harvested */ 911 if (adev->vce.harvest_config & (1 << i)) 912 continue; 913 914 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 915 916 if (enable) { 917 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 918 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 919 data &= ~(0xf | 0xff0); 920 data |= ((0x0 << 0) | (0x04 << 4)); 921 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 922 923 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 924 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 925 data &= ~(0xf | 0xff0); 926 data |= ((0x0 << 0) | (0x04 << 4)); 927 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 928 } 929 930 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 931 } 932 933 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 934 mutex_unlock(&adev->grbm_idx_mutex); 935 936 return 0; 937 } 938 #endif 939 940 static int vce_v4_0_set_powergating_state(void *handle, 941 enum amd_powergating_state state) 942 { 943 /* This doesn't actually powergate the VCE block. 944 * That's done in the dpm code via the SMC. This 945 * just re-inits the block as necessary. The actual 946 * gating still happens in the dpm code. We should 947 * revisit this when there is a cleaner line between 948 * the smc and the hw blocks 949 */ 950 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 951 952 if (state == AMD_PG_STATE_GATE) 953 return vce_v4_0_stop(adev); 954 else 955 return vce_v4_0_start(adev); 956 } 957 958 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, 959 struct amdgpu_ib *ib, uint32_t flags) 960 { 961 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 962 963 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 964 amdgpu_ring_write(ring, vmid); 965 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 966 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 967 amdgpu_ring_write(ring, ib->length_dw); 968 } 969 970 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 971 u64 seq, unsigned flags) 972 { 973 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 974 975 amdgpu_ring_write(ring, VCE_CMD_FENCE); 976 amdgpu_ring_write(ring, addr); 977 amdgpu_ring_write(ring, upper_32_bits(addr)); 978 amdgpu_ring_write(ring, seq); 979 amdgpu_ring_write(ring, VCE_CMD_TRAP); 980 } 981 982 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 983 { 984 amdgpu_ring_write(ring, VCE_CMD_END); 985 } 986 987 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 988 uint32_t val, uint32_t mask) 989 { 990 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 991 amdgpu_ring_write(ring, reg << 2); 992 amdgpu_ring_write(ring, mask); 993 amdgpu_ring_write(ring, val); 994 } 995 996 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 997 unsigned int vmid, uint64_t pd_addr) 998 { 999 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1000 1001 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 1002 1003 /* wait for reg writes */ 1004 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, 1005 lower_32_bits(pd_addr), 0xffffffff); 1006 } 1007 1008 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, 1009 uint32_t reg, uint32_t val) 1010 { 1011 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1012 amdgpu_ring_write(ring, reg << 2); 1013 amdgpu_ring_write(ring, val); 1014 } 1015 1016 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 1017 struct amdgpu_irq_src *source, 1018 unsigned type, 1019 enum amdgpu_interrupt_state state) 1020 { 1021 uint32_t val = 0; 1022 1023 if (!amdgpu_sriov_vf(adev)) { 1024 if (state == AMDGPU_IRQ_STATE_ENABLE) 1025 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1026 1027 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1028 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1029 } 1030 return 0; 1031 } 1032 1033 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1034 struct amdgpu_irq_src *source, 1035 struct amdgpu_iv_entry *entry) 1036 { 1037 DRM_DEBUG("IH: VCE\n"); 1038 1039 switch (entry->src_data[0]) { 1040 case 0: 1041 case 1: 1042 case 2: 1043 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1044 break; 1045 default: 1046 DRM_ERROR("Unhandled interrupt: %d %d\n", 1047 entry->src_id, entry->src_data[0]); 1048 break; 1049 } 1050 1051 return 0; 1052 } 1053 1054 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1055 .name = "vce_v4_0", 1056 .early_init = vce_v4_0_early_init, 1057 .late_init = NULL, 1058 .sw_init = vce_v4_0_sw_init, 1059 .sw_fini = vce_v4_0_sw_fini, 1060 .hw_init = vce_v4_0_hw_init, 1061 .hw_fini = vce_v4_0_hw_fini, 1062 .suspend = vce_v4_0_suspend, 1063 .resume = vce_v4_0_resume, 1064 .is_idle = NULL /* vce_v4_0_is_idle */, 1065 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1066 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1067 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1068 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1069 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1070 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1071 .set_powergating_state = vce_v4_0_set_powergating_state, 1072 }; 1073 1074 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1075 .type = AMDGPU_RING_TYPE_VCE, 1076 .align_mask = 0x3f, 1077 .nop = VCE_CMD_NO_OP, 1078 .support_64bit_ptrs = false, 1079 .no_user_fence = true, 1080 .vmhub = AMDGPU_MMHUB_0, 1081 .get_rptr = vce_v4_0_ring_get_rptr, 1082 .get_wptr = vce_v4_0_ring_get_wptr, 1083 .set_wptr = vce_v4_0_ring_set_wptr, 1084 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1085 .emit_frame_size = 1086 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1087 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1088 4 + /* vce_v4_0_emit_vm_flush */ 1089 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1090 1, /* vce_v4_0_ring_insert_end */ 1091 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1092 .emit_ib = vce_v4_0_ring_emit_ib, 1093 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1094 .emit_fence = vce_v4_0_ring_emit_fence, 1095 .test_ring = amdgpu_vce_ring_test_ring, 1096 .test_ib = amdgpu_vce_ring_test_ib, 1097 .insert_nop = amdgpu_ring_insert_nop, 1098 .insert_end = vce_v4_0_ring_insert_end, 1099 .pad_ib = amdgpu_ring_generic_pad_ib, 1100 .begin_use = amdgpu_vce_ring_begin_use, 1101 .end_use = amdgpu_vce_ring_end_use, 1102 .emit_wreg = vce_v4_0_emit_wreg, 1103 .emit_reg_wait = vce_v4_0_emit_reg_wait, 1104 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1105 }; 1106 1107 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1108 { 1109 int i; 1110 1111 for (i = 0; i < adev->vce.num_rings; i++) { 1112 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1113 adev->vce.ring[i].me = i; 1114 } 1115 DRM_INFO("VCE enabled in VM mode\n"); 1116 } 1117 1118 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1119 .set = vce_v4_0_set_interrupt_state, 1120 .process = vce_v4_0_process_interrupt, 1121 }; 1122 1123 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1124 { 1125 adev->vce.irq.num_types = 1; 1126 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1127 }; 1128 1129 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1130 { 1131 .type = AMD_IP_BLOCK_TYPE_VCE, 1132 .major = 4, 1133 .minor = 0, 1134 .rev = 0, 1135 .funcs = &vce_v4_0_ip_funcs, 1136 }; 1137