1 /* $NetBSD: amdgpu_gmc_v9_0.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $ */ 2 3 /* 4 * Copyright 2016 Advanced Micro Devices, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 #include <sys/cdefs.h> 27 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gmc_v9_0.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $"); 28 29 #include <linux/firmware.h> 30 #include <linux/pci.h> 31 32 #include <drm/drm_cache.h> 33 34 #include "amdgpu.h" 35 #include "gmc_v9_0.h" 36 #include "amdgpu_atomfirmware.h" 37 #include "amdgpu_gem.h" 38 39 #include "hdp/hdp_4_0_offset.h" 40 #include "hdp/hdp_4_0_sh_mask.h" 41 #include "gc/gc_9_0_sh_mask.h" 42 #include "dce/dce_12_0_offset.h" 43 #include "dce/dce_12_0_sh_mask.h" 44 #include "vega10_enum.h" 45 #include "mmhub/mmhub_1_0_offset.h" 46 #include "athub/athub_1_0_sh_mask.h" 47 #include "athub/athub_1_0_offset.h" 48 #include "oss/osssys_4_0_offset.h" 49 50 #include "soc15.h" 51 #include "soc15d.h" 52 #include "soc15_common.h" 53 #include "umc/umc_6_0_sh_mask.h" 54 55 #include "gfxhub_v1_0.h" 56 #include "mmhub_v1_0.h" 57 #include "athub_v1_0.h" 58 #include "gfxhub_v1_1.h" 59 #include "mmhub_v9_4.h" 60 #include "umc_v6_1.h" 61 #include "umc_v6_0.h" 62 63 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" 64 65 #include "amdgpu_ras.h" 66 #include "amdgpu_xgmi.h" 67 68 /* add these here since we already include dce12 headers and these are for DCN */ 69 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d 70 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 71 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0 72 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10 73 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL 74 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L 75 76 /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ 77 #define AMDGPU_NUM_OF_VMIDS 8 78 79 static const u32 golden_settings_vega10_hdp[] = 80 { 81 0xf64, 0x0fffffff, 0x00000000, 82 0xf65, 0x0fffffff, 0x00000000, 83 0xf66, 0x0fffffff, 0x00000000, 84 0xf67, 0x0fffffff, 0x00000000, 85 0xf68, 0x0fffffff, 0x00000000, 86 0xf6a, 0x0fffffff, 0x00000000, 87 0xf6b, 0x0fffffff, 0x00000000, 88 0xf6c, 0x0fffffff, 0x00000000, 89 0xf6d, 0x0fffffff, 0x00000000, 90 0xf6e, 0x0fffffff, 0x00000000, 91 }; 92 93 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = 94 { 95 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa), 96 SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565) 97 }; 98 99 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = 100 { 101 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800), 102 SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008) 103 }; 104 105 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = { 106 (0x000143c0 + 0x00000000), 107 (0x000143c0 + 0x00000800), 108 (0x000143c0 + 0x00001000), 109 (0x000143c0 + 0x00001800), 110 (0x000543c0 + 0x00000000), 111 (0x000543c0 + 0x00000800), 112 (0x000543c0 + 0x00001000), 113 (0x000543c0 + 0x00001800), 114 (0x000943c0 + 0x00000000), 115 (0x000943c0 + 0x00000800), 116 (0x000943c0 + 0x00001000), 117 (0x000943c0 + 0x00001800), 118 (0x000d43c0 + 0x00000000), 119 (0x000d43c0 + 0x00000800), 120 (0x000d43c0 + 0x00001000), 121 (0x000d43c0 + 0x00001800), 122 (0x001143c0 + 0x00000000), 123 (0x001143c0 + 0x00000800), 124 (0x001143c0 + 0x00001000), 125 (0x001143c0 + 0x00001800), 126 (0x001543c0 + 0x00000000), 127 (0x001543c0 + 0x00000800), 128 (0x001543c0 + 0x00001000), 129 (0x001543c0 + 0x00001800), 130 (0x001943c0 + 0x00000000), 131 (0x001943c0 + 0x00000800), 132 (0x001943c0 + 0x00001000), 133 (0x001943c0 + 0x00001800), 134 (0x001d43c0 + 0x00000000), 135 (0x001d43c0 + 0x00000800), 136 (0x001d43c0 + 0x00001000), 137 (0x001d43c0 + 0x00001800), 138 }; 139 140 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { 141 (0x000143e0 + 0x00000000), 142 (0x000143e0 + 0x00000800), 143 (0x000143e0 + 0x00001000), 144 (0x000143e0 + 0x00001800), 145 (0x000543e0 + 0x00000000), 146 (0x000543e0 + 0x00000800), 147 (0x000543e0 + 0x00001000), 148 (0x000543e0 + 0x00001800), 149 (0x000943e0 + 0x00000000), 150 (0x000943e0 + 0x00000800), 151 (0x000943e0 + 0x00001000), 152 (0x000943e0 + 0x00001800), 153 (0x000d43e0 + 0x00000000), 154 (0x000d43e0 + 0x00000800), 155 (0x000d43e0 + 0x00001000), 156 (0x000d43e0 + 0x00001800), 157 (0x001143e0 + 0x00000000), 158 (0x001143e0 + 0x00000800), 159 (0x001143e0 + 0x00001000), 160 (0x001143e0 + 0x00001800), 161 (0x001543e0 + 0x00000000), 162 (0x001543e0 + 0x00000800), 163 (0x001543e0 + 0x00001000), 164 (0x001543e0 + 0x00001800), 165 (0x001943e0 + 0x00000000), 166 (0x001943e0 + 0x00000800), 167 (0x001943e0 + 0x00001000), 168 (0x001943e0 + 0x00001800), 169 (0x001d43e0 + 0x00000000), 170 (0x001d43e0 + 0x00000800), 171 (0x001d43e0 + 0x00001000), 172 (0x001d43e0 + 0x00001800), 173 }; 174 175 static const uint32_t ecc_umc_mcumc_status_addrs[] __unused = { 176 (0x000143c2 + 0x00000000), 177 (0x000143c2 + 0x00000800), 178 (0x000143c2 + 0x00001000), 179 (0x000143c2 + 0x00001800), 180 (0x000543c2 + 0x00000000), 181 (0x000543c2 + 0x00000800), 182 (0x000543c2 + 0x00001000), 183 (0x000543c2 + 0x00001800), 184 (0x000943c2 + 0x00000000), 185 (0x000943c2 + 0x00000800), 186 (0x000943c2 + 0x00001000), 187 (0x000943c2 + 0x00001800), 188 (0x000d43c2 + 0x00000000), 189 (0x000d43c2 + 0x00000800), 190 (0x000d43c2 + 0x00001000), 191 (0x000d43c2 + 0x00001800), 192 (0x001143c2 + 0x00000000), 193 (0x001143c2 + 0x00000800), 194 (0x001143c2 + 0x00001000), 195 (0x001143c2 + 0x00001800), 196 (0x001543c2 + 0x00000000), 197 (0x001543c2 + 0x00000800), 198 (0x001543c2 + 0x00001000), 199 (0x001543c2 + 0x00001800), 200 (0x001943c2 + 0x00000000), 201 (0x001943c2 + 0x00000800), 202 (0x001943c2 + 0x00001000), 203 (0x001943c2 + 0x00001800), 204 (0x001d43c2 + 0x00000000), 205 (0x001d43c2 + 0x00000800), 206 (0x001d43c2 + 0x00001000), 207 (0x001d43c2 + 0x00001800), 208 }; 209 210 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, 211 struct amdgpu_irq_src *src, 212 unsigned type, 213 enum amdgpu_interrupt_state state) 214 { 215 u32 bits, i, tmp, reg; 216 217 /* Devices newer then VEGA10/12 shall have these programming 218 sequences performed by PSP BL */ 219 if (adev->asic_type >= CHIP_VEGA20) 220 return 0; 221 222 bits = 0x7f; 223 224 switch (state) { 225 case AMDGPU_IRQ_STATE_DISABLE: 226 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) { 227 reg = ecc_umc_mcumc_ctrl_addrs[i]; 228 tmp = RREG32(reg); 229 tmp &= ~bits; 230 WREG32(reg, tmp); 231 } 232 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) { 233 reg = ecc_umc_mcumc_ctrl_mask_addrs[i]; 234 tmp = RREG32(reg); 235 tmp &= ~bits; 236 WREG32(reg, tmp); 237 } 238 break; 239 case AMDGPU_IRQ_STATE_ENABLE: 240 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) { 241 reg = ecc_umc_mcumc_ctrl_addrs[i]; 242 tmp = RREG32(reg); 243 tmp |= bits; 244 WREG32(reg, tmp); 245 } 246 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) { 247 reg = ecc_umc_mcumc_ctrl_mask_addrs[i]; 248 tmp = RREG32(reg); 249 tmp |= bits; 250 WREG32(reg, tmp); 251 } 252 break; 253 default: 254 break; 255 } 256 257 return 0; 258 } 259 260 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, 261 struct amdgpu_irq_src *src, 262 unsigned type, 263 enum amdgpu_interrupt_state state) 264 { 265 struct amdgpu_vmhub *hub; 266 u32 tmp, reg, bits, i, j; 267 268 bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 269 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 270 VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 271 VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 272 VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 273 VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 274 VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 275 276 switch (state) { 277 case AMDGPU_IRQ_STATE_DISABLE: 278 for (j = 0; j < adev->num_vmhubs; j++) { 279 hub = &adev->vmhub[j]; 280 for (i = 0; i < 16; i++) { 281 reg = hub->vm_context0_cntl + i; 282 tmp = RREG32(reg); 283 tmp &= ~bits; 284 WREG32(reg, tmp); 285 } 286 } 287 break; 288 case AMDGPU_IRQ_STATE_ENABLE: 289 for (j = 0; j < adev->num_vmhubs; j++) { 290 hub = &adev->vmhub[j]; 291 for (i = 0; i < 16; i++) { 292 reg = hub->vm_context0_cntl + i; 293 tmp = RREG32(reg); 294 tmp |= bits; 295 WREG32(reg, tmp); 296 } 297 } 298 default: 299 break; 300 } 301 302 return 0; 303 } 304 305 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, 306 struct amdgpu_irq_src *source, 307 struct amdgpu_iv_entry *entry) 308 { 309 struct amdgpu_vmhub *hub; 310 bool retry_fault = !!(entry->src_data[1] & 0x80); 311 uint32_t status = 0; 312 u64 addr; 313 char hub_name[10]; 314 315 addr = (u64)entry->src_data[0] << 12; 316 addr |= ((u64)entry->src_data[1] & 0xf) << 44; 317 318 if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid, 319 entry->timestamp)) 320 return 1; /* This also prevents sending it to KFD */ 321 322 if (entry->client_id == SOC15_IH_CLIENTID_VMC) { 323 snprintf(hub_name, sizeof(hub_name), "mmhub0"); 324 hub = &adev->vmhub[AMDGPU_MMHUB_0]; 325 } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { 326 snprintf(hub_name, sizeof(hub_name), "mmhub1"); 327 hub = &adev->vmhub[AMDGPU_MMHUB_1]; 328 } else { 329 snprintf(hub_name, sizeof(hub_name), "gfxhub0"); 330 hub = &adev->vmhub[AMDGPU_GFXHUB_0]; 331 } 332 333 /* If it's the first fault for this address, process it normally */ 334 if (retry_fault && !in_interrupt() && 335 amdgpu_vm_handle_fault(adev, entry->pasid, addr)) 336 return 1; /* This also prevents sending it to KFD */ 337 338 if (!amdgpu_sriov_vf(adev)) { 339 /* 340 * Issue a dummy read to wait for the status register to 341 * be updated to avoid reading an incorrect value due to 342 * the new fast GRBM interface. 343 */ 344 if (entry->vmid_src == AMDGPU_GFXHUB_0) 345 RREG32(hub->vm_l2_pro_fault_status); 346 347 status = RREG32(hub->vm_l2_pro_fault_status); 348 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); 349 } 350 351 if (printk_ratelimit()) { 352 struct amdgpu_task_info task_info; 353 354 memset(&task_info, 0, sizeof(struct amdgpu_task_info)); 355 amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); 356 357 dev_err(adev->dev, 358 "[%s] %s page fault (src_id:%u ring:%u vmid:%u " 359 "pasid:%u, for process %s pid %d thread %s pid %d)\n", 360 hub_name, retry_fault ? "retry" : "no-retry", 361 entry->src_id, entry->ring_id, entry->vmid, 362 entry->pasid, task_info.process_name, task_info.tgid, 363 task_info.task_name, task_info.pid); 364 dev_err(adev->dev, " in page starting at address 0x%016"PRIx64" from client %d\n", 365 addr, entry->client_id); 366 if (!amdgpu_sriov_vf(adev)) { 367 dev_err(adev->dev, 368 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 369 status); 370 dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", 371 REG_GET_FIELD(status, 372 VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); 373 dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", 374 REG_GET_FIELD(status, 375 VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); 376 dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", 377 REG_GET_FIELD(status, 378 VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); 379 dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", 380 REG_GET_FIELD(status, 381 VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); 382 dev_err(adev->dev, "\t RW: 0x%lx\n", 383 REG_GET_FIELD(status, 384 VM_L2_PROTECTION_FAULT_STATUS, RW)); 385 386 } 387 } 388 389 return 0; 390 } 391 392 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { 393 .set = gmc_v9_0_vm_fault_interrupt_state, 394 .process = gmc_v9_0_process_interrupt, 395 }; 396 397 398 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = { 399 .set = gmc_v9_0_ecc_interrupt_state, 400 .process = amdgpu_umc_process_ecc_irq, 401 }; 402 403 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) 404 { 405 adev->gmc.vm_fault.num_types = 1; 406 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; 407 408 if (!amdgpu_sriov_vf(adev)) { 409 adev->gmc.ecc_irq.num_types = 1; 410 adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; 411 } 412 } 413 414 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, 415 uint32_t flush_type) 416 { 417 u32 req = 0; 418 419 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, 420 PER_VMID_INVALIDATE_REQ, 1 << vmid); 421 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); 422 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); 423 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); 424 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); 425 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); 426 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); 427 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, 428 CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); 429 430 return req; 431 } 432 433 /** 434 * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore 435 * 436 * @adev: amdgpu_device pointer 437 * @vmhub: vmhub type 438 * 439 */ 440 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, 441 uint32_t vmhub) 442 { 443 return ((vmhub == AMDGPU_MMHUB_0 || 444 vmhub == AMDGPU_MMHUB_1) && 445 (!amdgpu_sriov_vf(adev)) && 446 (!(adev->asic_type == CHIP_RAVEN && 447 adev->rev_id < 0x8 && 448 adev->pdev->device == 0x15d8))); 449 } 450 451 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, 452 uint8_t vmid, uint16_t *p_pasid) 453 { 454 uint32_t value; 455 456 value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) 457 + vmid); 458 *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; 459 460 return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); 461 } 462 463 /* 464 * GART 465 * VMID 0 is the physical GPU addresses as used by the kernel. 466 * VMIDs 1-15 are used for userspace clients and are handled 467 * by the amdgpu vm/hsa code. 468 */ 469 470 /** 471 * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type 472 * 473 * @adev: amdgpu_device pointer 474 * @vmid: vm instance to flush 475 * @flush_type: the flush type 476 * 477 * Flush the TLB for the requested page table using certain type. 478 */ 479 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, 480 uint32_t vmhub, uint32_t flush_type) 481 { 482 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); 483 const unsigned eng = 17; 484 u32 j, inv_req, tmp; 485 struct amdgpu_vmhub *hub; 486 487 BUG_ON(vmhub >= adev->num_vmhubs); 488 489 hub = &adev->vmhub[vmhub]; 490 inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); 491 492 /* This is necessary for a HW workaround under SRIOV as well 493 * as GFXOFF under bare metal 494 */ 495 if (adev->gfx.kiq.ring.sched.ready && 496 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && 497 !adev->in_gpu_reset) { 498 uint32_t req = hub->vm_inv_eng0_req + eng; 499 uint32_t ack = hub->vm_inv_eng0_ack + eng; 500 501 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 502 1 << vmid); 503 return; 504 } 505 506 spin_lock(&adev->gmc.invalidate_lock); 507 508 /* 509 * It may lose gpuvm invalidate acknowldege state across power-gating 510 * off cycle, add semaphore acquire before invalidation and semaphore 511 * release after invalidation to avoid entering power gated state 512 * to WA the Issue 513 */ 514 515 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 516 if (use_semaphore) { 517 for (j = 0; j < adev->usec_timeout; j++) { 518 /* a read return value of 1 means semaphore acuqire */ 519 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); 520 if (tmp & 0x1) 521 break; 522 udelay(1); 523 } 524 525 if (j >= adev->usec_timeout) 526 DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); 527 } 528 529 WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); 530 531 /* 532 * Issue a dummy read to wait for the ACK register to be cleared 533 * to avoid a false ACK due to the new fast GRBM interface. 534 */ 535 if (vmhub == AMDGPU_GFXHUB_0) 536 RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); 537 538 for (j = 0; j < adev->usec_timeout; j++) { 539 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); 540 if (tmp & (1 << vmid)) 541 break; 542 udelay(1); 543 } 544 545 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 546 if (use_semaphore) 547 /* 548 * add semaphore release after invalidation, 549 * write with 0 means semaphore release 550 */ 551 WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); 552 553 spin_unlock(&adev->gmc.invalidate_lock); 554 555 if (j < adev->usec_timeout) 556 return; 557 558 DRM_ERROR("Timeout waiting for VM flush ACK!\n"); 559 } 560 561 /** 562 * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid 563 * 564 * @adev: amdgpu_device pointer 565 * @pasid: pasid to be flush 566 * 567 * Flush the TLB for the requested pasid. 568 */ 569 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, 570 uint16_t pasid, uint32_t flush_type, 571 bool all_hub) 572 { 573 int vmid, i; 574 signed long r; 575 uint32_t seq; 576 uint16_t queried_pasid; 577 bool ret; 578 struct amdgpu_ring *ring = &adev->gfx.kiq.ring; 579 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 580 581 if (adev->in_gpu_reset) 582 return -EIO; 583 584 if (ring->sched.ready) { 585 spin_lock(&adev->gfx.kiq.ring_lock); 586 /* 2 dwords flush + 8 dwords fence */ 587 amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); 588 kiq->pmf->kiq_invalidate_tlbs(ring, 589 pasid, flush_type, all_hub); 590 amdgpu_fence_emit_polling(ring, &seq); 591 amdgpu_ring_commit(ring); 592 spin_unlock(&adev->gfx.kiq.ring_lock); 593 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); 594 if (r < 1) { 595 DRM_ERROR("wait for kiq fence error: %ld.\n", r); 596 return -ETIME; 597 } 598 599 return 0; 600 } 601 602 for (vmid = 1; vmid < 16; vmid++) { 603 604 ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, 605 &queried_pasid); 606 if (ret && queried_pasid == pasid) { 607 if (all_hub) { 608 for (i = 0; i < adev->num_vmhubs; i++) 609 gmc_v9_0_flush_gpu_tlb(adev, vmid, 610 i, flush_type); 611 } else { 612 gmc_v9_0_flush_gpu_tlb(adev, vmid, 613 AMDGPU_GFXHUB_0, flush_type); 614 } 615 break; 616 } 617 } 618 619 return 0; 620 621 } 622 623 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, 624 unsigned vmid, uint64_t pd_addr) 625 { 626 bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); 627 struct amdgpu_device *adev = ring->adev; 628 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; 629 uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); 630 unsigned eng = ring->vm_inv_eng; 631 632 /* 633 * It may lose gpuvm invalidate acknowldege state across power-gating 634 * off cycle, add semaphore acquire before invalidation and semaphore 635 * release after invalidation to avoid entering power gated state 636 * to WA the Issue 637 */ 638 639 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 640 if (use_semaphore) 641 /* a read return value of 1 means semaphore acuqire */ 642 amdgpu_ring_emit_reg_wait(ring, 643 hub->vm_inv_eng0_sem + eng, 0x1, 0x1); 644 645 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), 646 lower_32_bits(pd_addr)); 647 648 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), 649 upper_32_bits(pd_addr)); 650 651 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, 652 hub->vm_inv_eng0_ack + eng, 653 req, 1 << vmid); 654 655 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ 656 if (use_semaphore) 657 /* 658 * add semaphore release after invalidation, 659 * write with 0 means semaphore release 660 */ 661 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); 662 663 return pd_addr; 664 } 665 666 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, 667 unsigned pasid) 668 { 669 struct amdgpu_device *adev = ring->adev; 670 uint32_t reg; 671 672 /* Do nothing because there's no lut register for mmhub1. */ 673 if (ring->funcs->vmhub == AMDGPU_MMHUB_1) 674 return; 675 676 if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) 677 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; 678 else 679 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; 680 681 amdgpu_ring_emit_wreg(ring, reg, pasid); 682 } 683 684 /* 685 * PTE format on VEGA 10: 686 * 63:59 reserved 687 * 58:57 mtype 688 * 56 F 689 * 55 L 690 * 54 P 691 * 53 SW 692 * 52 T 693 * 50:48 reserved 694 * 47:12 4k physical page base address 695 * 11:7 fragment 696 * 6 write 697 * 5 read 698 * 4 exe 699 * 3 Z 700 * 2 snooped 701 * 1 system 702 * 0 valid 703 * 704 * PDE format on VEGA 10: 705 * 63:59 block fragment size 706 * 58:55 reserved 707 * 54 P 708 * 53:48 reserved 709 * 47:6 physical base address of PD or PTE 710 * 5:3 reserved 711 * 2 C 712 * 1 system 713 * 0 valid 714 */ 715 716 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) 717 718 { 719 switch (flags) { 720 case AMDGPU_VM_MTYPE_DEFAULT: 721 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); 722 case AMDGPU_VM_MTYPE_NC: 723 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); 724 case AMDGPU_VM_MTYPE_WC: 725 return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); 726 case AMDGPU_VM_MTYPE_RW: 727 return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); 728 case AMDGPU_VM_MTYPE_CC: 729 return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); 730 case AMDGPU_VM_MTYPE_UC: 731 return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); 732 default: 733 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); 734 } 735 } 736 737 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, 738 uint64_t *addr, uint64_t *flags) 739 { 740 if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) 741 *addr = adev->vm_manager.vram_base_offset + *addr - 742 adev->gmc.vram_start; 743 BUG_ON(*addr & 0xFFFF00000000003FULL); 744 745 if (!adev->gmc.translate_further) 746 return; 747 748 if (level == AMDGPU_VM_PDB1) { 749 /* Set the block fragment size */ 750 if (!(*flags & AMDGPU_PDE_PTE)) 751 *flags |= AMDGPU_PDE_BFS(0x9); 752 753 } else if (level == AMDGPU_VM_PDB0) { 754 if (*flags & AMDGPU_PDE_PTE) 755 *flags &= ~AMDGPU_PDE_PTE; 756 else 757 *flags |= AMDGPU_PTE_TF; 758 } 759 } 760 761 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, 762 struct amdgpu_bo_va_mapping *mapping, 763 uint64_t *flags) 764 { 765 *flags &= ~AMDGPU_PTE_EXECUTABLE; 766 *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; 767 768 *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; 769 *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; 770 771 if (mapping->flags & AMDGPU_PTE_PRT) { 772 *flags |= AMDGPU_PTE_PRT; 773 *flags &= ~AMDGPU_PTE_VALID; 774 } 775 776 if (adev->asic_type == CHIP_ARCTURUS && 777 !(*flags & AMDGPU_PTE_SYSTEM) && 778 mapping->bo_va->is_xgmi) 779 *flags |= AMDGPU_PTE_SNOOPED; 780 } 781 782 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { 783 .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, 784 .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, 785 .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, 786 .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, 787 .map_mtype = gmc_v9_0_map_mtype, 788 .get_vm_pde = gmc_v9_0_get_vm_pde, 789 .get_vm_pte = gmc_v9_0_get_vm_pte 790 }; 791 792 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) 793 { 794 adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; 795 } 796 797 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) 798 { 799 switch (adev->asic_type) { 800 case CHIP_VEGA10: 801 adev->umc.funcs = &umc_v6_0_funcs; 802 break; 803 case CHIP_VEGA20: 804 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; 805 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; 806 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; 807 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; 808 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; 809 adev->umc.funcs = &umc_v6_1_funcs; 810 break; 811 case CHIP_ARCTURUS: 812 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; 813 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; 814 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; 815 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; 816 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; 817 adev->umc.funcs = &umc_v6_1_funcs; 818 break; 819 default: 820 break; 821 } 822 } 823 824 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) 825 { 826 switch (adev->asic_type) { 827 case CHIP_VEGA20: 828 adev->mmhub.funcs = &mmhub_v1_0_funcs; 829 break; 830 case CHIP_ARCTURUS: 831 adev->mmhub.funcs = &mmhub_v9_4_funcs; 832 break; 833 default: 834 break; 835 } 836 } 837 838 static int gmc_v9_0_early_init(void *handle) 839 { 840 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 841 842 gmc_v9_0_set_gmc_funcs(adev); 843 gmc_v9_0_set_irq_funcs(adev); 844 gmc_v9_0_set_umc_funcs(adev); 845 gmc_v9_0_set_mmhub_funcs(adev); 846 847 adev->gmc.shared_aperture_start = 0x2000000000000000ULL; 848 adev->gmc.shared_aperture_end = 849 adev->gmc.shared_aperture_start + (4ULL << 30) - 1; 850 adev->gmc.private_aperture_start = 0x1000000000000000ULL; 851 adev->gmc.private_aperture_end = 852 adev->gmc.private_aperture_start + (4ULL << 30) - 1; 853 854 return 0; 855 } 856 857 static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) 858 { 859 860 /* 861 * TODO: 862 * Currently there is a bug where some memory client outside 863 * of the driver writes to first 8M of VRAM on S3 resume, 864 * this overrides GART which by default gets placed in first 8M and 865 * causes VM_FAULTS once GTT is accessed. 866 * Keep the stolen memory reservation until the while this is not solved. 867 * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init 868 */ 869 switch (adev->asic_type) { 870 case CHIP_VEGA10: 871 case CHIP_RAVEN: 872 case CHIP_ARCTURUS: 873 case CHIP_RENOIR: 874 return true; 875 case CHIP_VEGA12: 876 case CHIP_VEGA20: 877 default: 878 return false; 879 } 880 } 881 882 static int gmc_v9_0_late_init(void *handle) 883 { 884 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 885 int r; 886 887 if (!gmc_v9_0_keep_stolen_memory(adev)) 888 amdgpu_bo_late_init(adev); 889 890 r = amdgpu_gmc_allocate_vm_inv_eng(adev); 891 if (r) 892 return r; 893 /* Check if ecc is available */ 894 if (!amdgpu_sriov_vf(adev)) { 895 switch (adev->asic_type) { 896 case CHIP_VEGA10: 897 case CHIP_VEGA20: 898 case CHIP_ARCTURUS: 899 r = amdgpu_atomfirmware_mem_ecc_supported(adev); 900 if (!r) { 901 DRM_INFO("ECC is not present.\n"); 902 if (adev->df.funcs->enable_ecc_force_par_wr_rmw) 903 adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); 904 } else { 905 DRM_INFO("ECC is active.\n"); 906 } 907 908 r = amdgpu_atomfirmware_sram_ecc_supported(adev); 909 if (!r) { 910 DRM_INFO("SRAM ECC is not present.\n"); 911 } else { 912 DRM_INFO("SRAM ECC is active.\n"); 913 } 914 break; 915 default: 916 break; 917 } 918 } 919 920 r = amdgpu_gmc_ras_late_init(adev); 921 if (r) 922 return r; 923 924 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); 925 } 926 927 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, 928 struct amdgpu_gmc *mc) 929 { 930 u64 base = 0; 931 932 if (adev->asic_type == CHIP_ARCTURUS) 933 base = mmhub_v9_4_get_fb_location(adev); 934 else if (!amdgpu_sriov_vf(adev)) 935 base = mmhub_v1_0_get_fb_location(adev); 936 937 /* add the xgmi offset of the physical node */ 938 base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; 939 amdgpu_gmc_vram_location(adev, mc, base); 940 amdgpu_gmc_gart_location(adev, mc); 941 amdgpu_gmc_agp_location(adev, mc); 942 /* base offset of vram pages */ 943 adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); 944 945 /* XXX: add the xgmi offset of the physical node? */ 946 adev->vm_manager.vram_base_offset += 947 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; 948 } 949 950 /** 951 * gmc_v9_0_mc_init - initialize the memory controller driver params 952 * 953 * @adev: amdgpu_device pointer 954 * 955 * Look up the amount of vram, vram width, and decide how to place 956 * vram and gart within the GPU's physical address space. 957 * Returns 0 for success. 958 */ 959 static int gmc_v9_0_mc_init(struct amdgpu_device *adev) 960 { 961 int r; 962 963 /* size in MB on si */ 964 adev->gmc.mc_vram_size = 965 adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; 966 adev->gmc.real_vram_size = adev->gmc.mc_vram_size; 967 968 if (!(adev->flags & AMD_IS_APU)) { 969 r = amdgpu_device_resize_fb_bar(adev); 970 if (r) 971 return r; 972 } 973 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); 974 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); 975 976 #ifdef __NetBSD__ 977 adev->gmc.aper_tag = adev->pdev->pd_pa.pa_memt; 978 #endif 979 980 #ifdef CONFIG_X86_64 981 if (adev->flags & AMD_IS_APU) { 982 adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev); 983 adev->gmc.aper_size = adev->gmc.real_vram_size; 984 } 985 #endif 986 /* In case the PCI BAR is larger than the actual amount of vram */ 987 adev->gmc.visible_vram_size = adev->gmc.aper_size; 988 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) 989 adev->gmc.visible_vram_size = adev->gmc.real_vram_size; 990 991 /* set the gart size */ 992 if (amdgpu_gart_size == -1) { 993 switch (adev->asic_type) { 994 case CHIP_VEGA10: /* all engines support GPUVM */ 995 case CHIP_VEGA12: /* all engines support GPUVM */ 996 case CHIP_VEGA20: 997 case CHIP_ARCTURUS: 998 default: 999 adev->gmc.gart_size = 512ULL << 20; 1000 break; 1001 case CHIP_RAVEN: /* DCE SG support */ 1002 case CHIP_RENOIR: 1003 adev->gmc.gart_size = 1024ULL << 20; 1004 break; 1005 } 1006 } else { 1007 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; 1008 } 1009 1010 gmc_v9_0_vram_gtt_location(adev, &adev->gmc); 1011 1012 return 0; 1013 } 1014 1015 static int gmc_v9_0_gart_init(struct amdgpu_device *adev) 1016 { 1017 int r; 1018 1019 if (adev->gart.bo) { 1020 WARN(1, "VEGA10 PCIE GART already initialized\n"); 1021 return 0; 1022 } 1023 /* Initialize common gart structure */ 1024 r = amdgpu_gart_init(adev); 1025 if (r) 1026 return r; 1027 adev->gart.table_size = adev->gart.num_gpu_pages * 8; 1028 adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | 1029 AMDGPU_PTE_EXECUTABLE; 1030 return amdgpu_gart_table_vram_alloc(adev); 1031 } 1032 1033 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) 1034 { 1035 u32 d1vga_control; 1036 unsigned size; 1037 1038 /* 1039 * TODO Remove once GART corruption is resolved 1040 * Check related code in gmc_v9_0_sw_fini 1041 * */ 1042 if (gmc_v9_0_keep_stolen_memory(adev)) 1043 return 9 * 1024 * 1024; 1044 1045 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); 1046 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { 1047 size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ 1048 } else { 1049 u32 viewport; 1050 1051 switch (adev->asic_type) { 1052 case CHIP_RAVEN: 1053 case CHIP_RENOIR: 1054 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); 1055 size = (REG_GET_FIELD(viewport, 1056 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * 1057 REG_GET_FIELD(viewport, 1058 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * 1059 4); 1060 break; 1061 case CHIP_VEGA10: 1062 case CHIP_VEGA12: 1063 case CHIP_VEGA20: 1064 default: 1065 viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); 1066 size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * 1067 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) * 1068 4); 1069 break; 1070 } 1071 } 1072 /* return 0 if the pre-OS buffer uses up most of vram */ 1073 if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) 1074 return 0; 1075 1076 return size; 1077 } 1078 1079 static int gmc_v9_0_sw_init(void *handle) 1080 { 1081 int r, vram_width = 0, vram_type = 0, vram_vendor = 0; 1082 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1083 1084 gfxhub_v1_0_init(adev); 1085 if (adev->asic_type == CHIP_ARCTURUS) 1086 mmhub_v9_4_init(adev); 1087 else 1088 mmhub_v1_0_init(adev); 1089 1090 spin_lock_init(&adev->gmc.invalidate_lock); 1091 1092 r = amdgpu_atomfirmware_get_vram_info(adev, 1093 &vram_width, &vram_type, &vram_vendor); 1094 if (amdgpu_sriov_vf(adev)) 1095 /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, 1096 * and DF related registers is not readable, seems hardcord is the 1097 * only way to set the correct vram_width 1098 */ 1099 adev->gmc.vram_width = 2048; 1100 else if (amdgpu_emu_mode != 1) 1101 adev->gmc.vram_width = vram_width; 1102 1103 if (!adev->gmc.vram_width) { 1104 int chansize, numchan; 1105 1106 /* hbm memory channel size */ 1107 if (adev->flags & AMD_IS_APU) 1108 chansize = 64; 1109 else 1110 chansize = 128; 1111 1112 numchan = adev->df.funcs->get_hbm_channel_number(adev); 1113 adev->gmc.vram_width = numchan * chansize; 1114 } 1115 1116 adev->gmc.vram_type = vram_type; 1117 adev->gmc.vram_vendor = vram_vendor; 1118 switch (adev->asic_type) { 1119 case CHIP_RAVEN: 1120 adev->num_vmhubs = 2; 1121 1122 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { 1123 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); 1124 } else { 1125 /* vm_size is 128TB + 512GB for legacy 3-level page support */ 1126 amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48); 1127 adev->gmc.translate_further = 1128 adev->vm_manager.num_level > 1; 1129 } 1130 break; 1131 case CHIP_VEGA10: 1132 case CHIP_VEGA12: 1133 case CHIP_VEGA20: 1134 case CHIP_RENOIR: 1135 adev->num_vmhubs = 2; 1136 1137 1138 /* 1139 * To fulfill 4-level page support, 1140 * vm size is 256TB (48bit), maximum size of Vega10, 1141 * block size 512 (9bit) 1142 */ 1143 /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */ 1144 if (amdgpu_sriov_vf(adev)) 1145 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); 1146 else 1147 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); 1148 break; 1149 case CHIP_ARCTURUS: 1150 adev->num_vmhubs = 3; 1151 1152 /* Keep the vm size same with Vega20 */ 1153 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); 1154 break; 1155 default: 1156 break; 1157 } 1158 1159 /* This interrupt is VMC page fault.*/ 1160 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, 1161 &adev->gmc.vm_fault); 1162 if (r) 1163 return r; 1164 1165 if (adev->asic_type == CHIP_ARCTURUS) { 1166 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, 1167 &adev->gmc.vm_fault); 1168 if (r) 1169 return r; 1170 } 1171 1172 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, 1173 &adev->gmc.vm_fault); 1174 1175 if (r) 1176 return r; 1177 1178 if (!amdgpu_sriov_vf(adev)) { 1179 /* interrupt sent to DF. */ 1180 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, 1181 &adev->gmc.ecc_irq); 1182 if (r) 1183 return r; 1184 } 1185 1186 /* Set the internal MC address mask 1187 * This is the max address of the GPU's 1188 * internal address space. 1189 */ 1190 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ 1191 1192 #ifdef __NetBSD__ 1193 r = drm_limit_dma_space(adev->ddev, 0, DMA_BIT_MASK(44)); 1194 #else 1195 r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); 1196 #endif 1197 if (r) { 1198 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); 1199 return r; 1200 } 1201 adev->need_swiotlb = drm_need_swiotlb(44); 1202 1203 if (adev->gmc.xgmi.supported) { 1204 r = gfxhub_v1_1_get_xgmi_info(adev); 1205 if (r) 1206 return r; 1207 } 1208 1209 r = gmc_v9_0_mc_init(adev); 1210 if (r) 1211 return r; 1212 1213 adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev); 1214 1215 /* Memory manager */ 1216 r = amdgpu_bo_init(adev); 1217 if (r) 1218 return r; 1219 1220 r = gmc_v9_0_gart_init(adev); 1221 if (r) 1222 return r; 1223 1224 /* 1225 * number of VMs 1226 * VMID 0 is reserved for System 1227 * amdgpu graphics/compute will use VMIDs 1-7 1228 * amdkfd will use VMIDs 8-15 1229 */ 1230 adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; 1231 adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; 1232 adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; 1233 1234 amdgpu_vm_manager_init(adev); 1235 1236 return 0; 1237 } 1238 1239 static int gmc_v9_0_sw_fini(void *handle) 1240 { 1241 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1242 void *stolen_vga_buf; 1243 1244 amdgpu_gmc_ras_fini(adev); 1245 amdgpu_gem_force_release(adev); 1246 amdgpu_vm_manager_fini(adev); 1247 1248 if (gmc_v9_0_keep_stolen_memory(adev)) 1249 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); 1250 1251 amdgpu_gart_table_vram_free(adev); 1252 amdgpu_bo_fini(adev); 1253 amdgpu_gart_fini(adev); 1254 1255 spin_lock_destroy(&adev->gmc.invalidate_lock); 1256 1257 return 0; 1258 } 1259 1260 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) 1261 { 1262 1263 switch (adev->asic_type) { 1264 case CHIP_VEGA10: 1265 if (amdgpu_sriov_vf(adev)) 1266 break; 1267 /* fall through */ 1268 case CHIP_VEGA20: 1269 soc15_program_register_sequence(adev, 1270 golden_settings_mmhub_1_0_0, 1271 ARRAY_SIZE(golden_settings_mmhub_1_0_0)); 1272 soc15_program_register_sequence(adev, 1273 golden_settings_athub_1_0_0, 1274 ARRAY_SIZE(golden_settings_athub_1_0_0)); 1275 break; 1276 case CHIP_VEGA12: 1277 break; 1278 case CHIP_RAVEN: 1279 /* TODO for renoir */ 1280 soc15_program_register_sequence(adev, 1281 golden_settings_athub_1_0_0, 1282 ARRAY_SIZE(golden_settings_athub_1_0_0)); 1283 break; 1284 default: 1285 break; 1286 } 1287 } 1288 1289 /** 1290 * gmc_v9_0_gart_enable - gart enable 1291 * 1292 * @adev: amdgpu_device pointer 1293 */ 1294 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) 1295 { 1296 int r; 1297 1298 if (adev->gart.bo == NULL) { 1299 dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); 1300 return -EINVAL; 1301 } 1302 r = amdgpu_gart_table_vram_pin(adev); 1303 if (r) 1304 return r; 1305 1306 r = gfxhub_v1_0_gart_enable(adev); 1307 if (r) 1308 return r; 1309 1310 if (adev->asic_type == CHIP_ARCTURUS) 1311 r = mmhub_v9_4_gart_enable(adev); 1312 else 1313 r = mmhub_v1_0_gart_enable(adev); 1314 if (r) 1315 return r; 1316 1317 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 1318 (unsigned)(adev->gmc.gart_size >> 20), 1319 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); 1320 adev->gart.ready = true; 1321 return 0; 1322 } 1323 1324 static int gmc_v9_0_hw_init(void *handle) 1325 { 1326 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1327 bool value; 1328 int r, i; 1329 u32 tmp; 1330 1331 /* The sequence of these two function calls matters.*/ 1332 gmc_v9_0_init_golden_registers(adev); 1333 1334 if (adev->mode_info.num_crtc) { 1335 if (adev->asic_type != CHIP_ARCTURUS) { 1336 /* Lockout access through VGA aperture*/ 1337 WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); 1338 1339 /* disable VGA render */ 1340 WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); 1341 } 1342 } 1343 1344 amdgpu_device_program_register_sequence(adev, 1345 golden_settings_vega10_hdp, 1346 ARRAY_SIZE(golden_settings_vega10_hdp)); 1347 1348 switch (adev->asic_type) { 1349 case CHIP_RAVEN: 1350 /* TODO for renoir */ 1351 mmhub_v1_0_update_power_gating(adev, true); 1352 break; 1353 case CHIP_ARCTURUS: 1354 WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1); 1355 break; 1356 default: 1357 break; 1358 } 1359 1360 WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); 1361 1362 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); 1363 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); 1364 1365 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); 1366 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); 1367 1368 /* After HDP is initialized, flush HDP.*/ 1369 adev->nbio.funcs->hdp_flush(adev, NULL); 1370 1371 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) 1372 value = false; 1373 else 1374 value = true; 1375 1376 if (!amdgpu_sriov_vf(adev)) { 1377 gfxhub_v1_0_set_fault_enable_default(adev, value); 1378 if (adev->asic_type == CHIP_ARCTURUS) 1379 mmhub_v9_4_set_fault_enable_default(adev, value); 1380 else 1381 mmhub_v1_0_set_fault_enable_default(adev, value); 1382 } 1383 for (i = 0; i < adev->num_vmhubs; ++i) 1384 gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); 1385 1386 if (adev->umc.funcs && adev->umc.funcs->init_registers) 1387 adev->umc.funcs->init_registers(adev); 1388 1389 r = gmc_v9_0_gart_enable(adev); 1390 1391 return r; 1392 } 1393 1394 /** 1395 * gmc_v9_0_gart_disable - gart disable 1396 * 1397 * @adev: amdgpu_device pointer 1398 * 1399 * This disables all VM page table. 1400 */ 1401 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) 1402 { 1403 gfxhub_v1_0_gart_disable(adev); 1404 if (adev->asic_type == CHIP_ARCTURUS) 1405 mmhub_v9_4_gart_disable(adev); 1406 else 1407 mmhub_v1_0_gart_disable(adev); 1408 amdgpu_gart_table_vram_unpin(adev); 1409 } 1410 1411 static int gmc_v9_0_hw_fini(void *handle) 1412 { 1413 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1414 1415 if (amdgpu_sriov_vf(adev)) { 1416 /* full access mode, so don't touch any GMC register */ 1417 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 1418 return 0; 1419 } 1420 1421 amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); 1422 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); 1423 gmc_v9_0_gart_disable(adev); 1424 1425 return 0; 1426 } 1427 1428 static int gmc_v9_0_suspend(void *handle) 1429 { 1430 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1431 1432 return gmc_v9_0_hw_fini(adev); 1433 } 1434 1435 static int gmc_v9_0_resume(void *handle) 1436 { 1437 int r; 1438 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1439 1440 r = gmc_v9_0_hw_init(adev); 1441 if (r) 1442 return r; 1443 1444 amdgpu_vmid_reset_all(adev); 1445 1446 return 0; 1447 } 1448 1449 static bool gmc_v9_0_is_idle(void *handle) 1450 { 1451 /* MC is always ready in GMC v9.*/ 1452 return true; 1453 } 1454 1455 static int gmc_v9_0_wait_for_idle(void *handle) 1456 { 1457 /* There is no need to wait for MC idle in GMC v9.*/ 1458 return 0; 1459 } 1460 1461 static int gmc_v9_0_soft_reset(void *handle) 1462 { 1463 /* XXX for emulation.*/ 1464 return 0; 1465 } 1466 1467 static int gmc_v9_0_set_clockgating_state(void *handle, 1468 enum amd_clockgating_state state) 1469 { 1470 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1471 1472 if (adev->asic_type == CHIP_ARCTURUS) 1473 mmhub_v9_4_set_clockgating(adev, state); 1474 else 1475 mmhub_v1_0_set_clockgating(adev, state); 1476 1477 athub_v1_0_set_clockgating(adev, state); 1478 1479 return 0; 1480 } 1481 1482 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) 1483 { 1484 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1485 1486 if (adev->asic_type == CHIP_ARCTURUS) 1487 mmhub_v9_4_get_clockgating(adev, flags); 1488 else 1489 mmhub_v1_0_get_clockgating(adev, flags); 1490 1491 athub_v1_0_get_clockgating(adev, flags); 1492 } 1493 1494 static int gmc_v9_0_set_powergating_state(void *handle, 1495 enum amd_powergating_state state) 1496 { 1497 return 0; 1498 } 1499 1500 const struct amd_ip_funcs gmc_v9_0_ip_funcs = { 1501 .name = "gmc_v9_0", 1502 .early_init = gmc_v9_0_early_init, 1503 .late_init = gmc_v9_0_late_init, 1504 .sw_init = gmc_v9_0_sw_init, 1505 .sw_fini = gmc_v9_0_sw_fini, 1506 .hw_init = gmc_v9_0_hw_init, 1507 .hw_fini = gmc_v9_0_hw_fini, 1508 .suspend = gmc_v9_0_suspend, 1509 .resume = gmc_v9_0_resume, 1510 .is_idle = gmc_v9_0_is_idle, 1511 .wait_for_idle = gmc_v9_0_wait_for_idle, 1512 .soft_reset = gmc_v9_0_soft_reset, 1513 .set_clockgating_state = gmc_v9_0_set_clockgating_state, 1514 .set_powergating_state = gmc_v9_0_set_powergating_state, 1515 .get_clockgating_state = gmc_v9_0_get_clockgating_state, 1516 }; 1517 1518 const struct amdgpu_ip_block_version gmc_v9_0_ip_block = 1519 { 1520 .type = AMD_IP_BLOCK_TYPE_GMC, 1521 .major = 9, 1522 .minor = 0, 1523 .rev = 0, 1524 .funcs = &gmc_v9_0_ip_funcs, 1525 }; 1526