Home | History | Annotate | Line # | Download | only in amdgpu
      1 /*	$NetBSD: amdgpu_gmc_v9_0.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2016 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  */
     25 
     26 #include <sys/cdefs.h>
     27 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gmc_v9_0.c,v 1.5 2021/12/19 12:31:45 riastradh Exp $");
     28 
     29 #include <linux/firmware.h>
     30 #include <linux/pci.h>
     31 
     32 #include <drm/drm_cache.h>
     33 
     34 #include "amdgpu.h"
     35 #include "gmc_v9_0.h"
     36 #include "amdgpu_atomfirmware.h"
     37 #include "amdgpu_gem.h"
     38 
     39 #include "hdp/hdp_4_0_offset.h"
     40 #include "hdp/hdp_4_0_sh_mask.h"
     41 #include "gc/gc_9_0_sh_mask.h"
     42 #include "dce/dce_12_0_offset.h"
     43 #include "dce/dce_12_0_sh_mask.h"
     44 #include "vega10_enum.h"
     45 #include "mmhub/mmhub_1_0_offset.h"
     46 #include "athub/athub_1_0_sh_mask.h"
     47 #include "athub/athub_1_0_offset.h"
     48 #include "oss/osssys_4_0_offset.h"
     49 
     50 #include "soc15.h"
     51 #include "soc15d.h"
     52 #include "soc15_common.h"
     53 #include "umc/umc_6_0_sh_mask.h"
     54 
     55 #include "gfxhub_v1_0.h"
     56 #include "mmhub_v1_0.h"
     57 #include "athub_v1_0.h"
     58 #include "gfxhub_v1_1.h"
     59 #include "mmhub_v9_4.h"
     60 #include "umc_v6_1.h"
     61 #include "umc_v6_0.h"
     62 
     63 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
     64 
     65 #include "amdgpu_ras.h"
     66 #include "amdgpu_xgmi.h"
     67 
     68 /* add these here since we already include dce12 headers and these are for DCN */
     69 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
     70 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
     71 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT                                        0x0
     72 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT                                       0x10
     73 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK                                          0x00003FFFL
     74 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK                                         0x3FFF0000L
     75 
     76 /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
     77 #define AMDGPU_NUM_OF_VMIDS			8
     78 
     79 static const u32 golden_settings_vega10_hdp[] =
     80 {
     81 	0xf64, 0x0fffffff, 0x00000000,
     82 	0xf65, 0x0fffffff, 0x00000000,
     83 	0xf66, 0x0fffffff, 0x00000000,
     84 	0xf67, 0x0fffffff, 0x00000000,
     85 	0xf68, 0x0fffffff, 0x00000000,
     86 	0xf6a, 0x0fffffff, 0x00000000,
     87 	0xf6b, 0x0fffffff, 0x00000000,
     88 	0xf6c, 0x0fffffff, 0x00000000,
     89 	0xf6d, 0x0fffffff, 0x00000000,
     90 	0xf6e, 0x0fffffff, 0x00000000,
     91 };
     92 
     93 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
     94 {
     95 	SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
     96 	SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
     97 };
     98 
     99 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
    100 {
    101 	SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
    102 	SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
    103 };
    104 
    105 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
    106 	(0x000143c0 + 0x00000000),
    107 	(0x000143c0 + 0x00000800),
    108 	(0x000143c0 + 0x00001000),
    109 	(0x000143c0 + 0x00001800),
    110 	(0x000543c0 + 0x00000000),
    111 	(0x000543c0 + 0x00000800),
    112 	(0x000543c0 + 0x00001000),
    113 	(0x000543c0 + 0x00001800),
    114 	(0x000943c0 + 0x00000000),
    115 	(0x000943c0 + 0x00000800),
    116 	(0x000943c0 + 0x00001000),
    117 	(0x000943c0 + 0x00001800),
    118 	(0x000d43c0 + 0x00000000),
    119 	(0x000d43c0 + 0x00000800),
    120 	(0x000d43c0 + 0x00001000),
    121 	(0x000d43c0 + 0x00001800),
    122 	(0x001143c0 + 0x00000000),
    123 	(0x001143c0 + 0x00000800),
    124 	(0x001143c0 + 0x00001000),
    125 	(0x001143c0 + 0x00001800),
    126 	(0x001543c0 + 0x00000000),
    127 	(0x001543c0 + 0x00000800),
    128 	(0x001543c0 + 0x00001000),
    129 	(0x001543c0 + 0x00001800),
    130 	(0x001943c0 + 0x00000000),
    131 	(0x001943c0 + 0x00000800),
    132 	(0x001943c0 + 0x00001000),
    133 	(0x001943c0 + 0x00001800),
    134 	(0x001d43c0 + 0x00000000),
    135 	(0x001d43c0 + 0x00000800),
    136 	(0x001d43c0 + 0x00001000),
    137 	(0x001d43c0 + 0x00001800),
    138 };
    139 
    140 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
    141 	(0x000143e0 + 0x00000000),
    142 	(0x000143e0 + 0x00000800),
    143 	(0x000143e0 + 0x00001000),
    144 	(0x000143e0 + 0x00001800),
    145 	(0x000543e0 + 0x00000000),
    146 	(0x000543e0 + 0x00000800),
    147 	(0x000543e0 + 0x00001000),
    148 	(0x000543e0 + 0x00001800),
    149 	(0x000943e0 + 0x00000000),
    150 	(0x000943e0 + 0x00000800),
    151 	(0x000943e0 + 0x00001000),
    152 	(0x000943e0 + 0x00001800),
    153 	(0x000d43e0 + 0x00000000),
    154 	(0x000d43e0 + 0x00000800),
    155 	(0x000d43e0 + 0x00001000),
    156 	(0x000d43e0 + 0x00001800),
    157 	(0x001143e0 + 0x00000000),
    158 	(0x001143e0 + 0x00000800),
    159 	(0x001143e0 + 0x00001000),
    160 	(0x001143e0 + 0x00001800),
    161 	(0x001543e0 + 0x00000000),
    162 	(0x001543e0 + 0x00000800),
    163 	(0x001543e0 + 0x00001000),
    164 	(0x001543e0 + 0x00001800),
    165 	(0x001943e0 + 0x00000000),
    166 	(0x001943e0 + 0x00000800),
    167 	(0x001943e0 + 0x00001000),
    168 	(0x001943e0 + 0x00001800),
    169 	(0x001d43e0 + 0x00000000),
    170 	(0x001d43e0 + 0x00000800),
    171 	(0x001d43e0 + 0x00001000),
    172 	(0x001d43e0 + 0x00001800),
    173 };
    174 
    175 static const uint32_t ecc_umc_mcumc_status_addrs[] __unused = {
    176 	(0x000143c2 + 0x00000000),
    177 	(0x000143c2 + 0x00000800),
    178 	(0x000143c2 + 0x00001000),
    179 	(0x000143c2 + 0x00001800),
    180 	(0x000543c2 + 0x00000000),
    181 	(0x000543c2 + 0x00000800),
    182 	(0x000543c2 + 0x00001000),
    183 	(0x000543c2 + 0x00001800),
    184 	(0x000943c2 + 0x00000000),
    185 	(0x000943c2 + 0x00000800),
    186 	(0x000943c2 + 0x00001000),
    187 	(0x000943c2 + 0x00001800),
    188 	(0x000d43c2 + 0x00000000),
    189 	(0x000d43c2 + 0x00000800),
    190 	(0x000d43c2 + 0x00001000),
    191 	(0x000d43c2 + 0x00001800),
    192 	(0x001143c2 + 0x00000000),
    193 	(0x001143c2 + 0x00000800),
    194 	(0x001143c2 + 0x00001000),
    195 	(0x001143c2 + 0x00001800),
    196 	(0x001543c2 + 0x00000000),
    197 	(0x001543c2 + 0x00000800),
    198 	(0x001543c2 + 0x00001000),
    199 	(0x001543c2 + 0x00001800),
    200 	(0x001943c2 + 0x00000000),
    201 	(0x001943c2 + 0x00000800),
    202 	(0x001943c2 + 0x00001000),
    203 	(0x001943c2 + 0x00001800),
    204 	(0x001d43c2 + 0x00000000),
    205 	(0x001d43c2 + 0x00000800),
    206 	(0x001d43c2 + 0x00001000),
    207 	(0x001d43c2 + 0x00001800),
    208 };
    209 
    210 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
    211 		struct amdgpu_irq_src *src,
    212 		unsigned type,
    213 		enum amdgpu_interrupt_state state)
    214 {
    215 	u32 bits, i, tmp, reg;
    216 
    217 	/* Devices newer then VEGA10/12 shall have these programming
    218 	     sequences performed by PSP BL */
    219 	if (adev->asic_type >= CHIP_VEGA20)
    220 		return 0;
    221 
    222 	bits = 0x7f;
    223 
    224 	switch (state) {
    225 	case AMDGPU_IRQ_STATE_DISABLE:
    226 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
    227 			reg = ecc_umc_mcumc_ctrl_addrs[i];
    228 			tmp = RREG32(reg);
    229 			tmp &= ~bits;
    230 			WREG32(reg, tmp);
    231 		}
    232 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
    233 			reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
    234 			tmp = RREG32(reg);
    235 			tmp &= ~bits;
    236 			WREG32(reg, tmp);
    237 		}
    238 		break;
    239 	case AMDGPU_IRQ_STATE_ENABLE:
    240 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
    241 			reg = ecc_umc_mcumc_ctrl_addrs[i];
    242 			tmp = RREG32(reg);
    243 			tmp |= bits;
    244 			WREG32(reg, tmp);
    245 		}
    246 		for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
    247 			reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
    248 			tmp = RREG32(reg);
    249 			tmp |= bits;
    250 			WREG32(reg, tmp);
    251 		}
    252 		break;
    253 	default:
    254 		break;
    255 	}
    256 
    257 	return 0;
    258 }
    259 
    260 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
    261 					struct amdgpu_irq_src *src,
    262 					unsigned type,
    263 					enum amdgpu_interrupt_state state)
    264 {
    265 	struct amdgpu_vmhub *hub;
    266 	u32 tmp, reg, bits, i, j;
    267 
    268 	bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
    269 		VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
    270 		VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
    271 		VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
    272 		VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
    273 		VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
    274 		VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
    275 
    276 	switch (state) {
    277 	case AMDGPU_IRQ_STATE_DISABLE:
    278 		for (j = 0; j < adev->num_vmhubs; j++) {
    279 			hub = &adev->vmhub[j];
    280 			for (i = 0; i < 16; i++) {
    281 				reg = hub->vm_context0_cntl + i;
    282 				tmp = RREG32(reg);
    283 				tmp &= ~bits;
    284 				WREG32(reg, tmp);
    285 			}
    286 		}
    287 		break;
    288 	case AMDGPU_IRQ_STATE_ENABLE:
    289 		for (j = 0; j < adev->num_vmhubs; j++) {
    290 			hub = &adev->vmhub[j];
    291 			for (i = 0; i < 16; i++) {
    292 				reg = hub->vm_context0_cntl + i;
    293 				tmp = RREG32(reg);
    294 				tmp |= bits;
    295 				WREG32(reg, tmp);
    296 			}
    297 		}
    298 	default:
    299 		break;
    300 	}
    301 
    302 	return 0;
    303 }
    304 
    305 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
    306 				struct amdgpu_irq_src *source,
    307 				struct amdgpu_iv_entry *entry)
    308 {
    309 	struct amdgpu_vmhub *hub;
    310 	bool retry_fault = !!(entry->src_data[1] & 0x80);
    311 	uint32_t status = 0;
    312 	u64 addr;
    313 	char hub_name[10];
    314 
    315 	addr = (u64)entry->src_data[0] << 12;
    316 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
    317 
    318 	if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
    319 						    entry->timestamp))
    320 		return 1; /* This also prevents sending it to KFD */
    321 
    322 	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
    323 		snprintf(hub_name, sizeof(hub_name), "mmhub0");
    324 		hub = &adev->vmhub[AMDGPU_MMHUB_0];
    325 	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
    326 		snprintf(hub_name, sizeof(hub_name), "mmhub1");
    327 		hub = &adev->vmhub[AMDGPU_MMHUB_1];
    328 	} else {
    329 		snprintf(hub_name, sizeof(hub_name), "gfxhub0");
    330 		hub = &adev->vmhub[AMDGPU_GFXHUB_0];
    331 	}
    332 
    333 	/* If it's the first fault for this address, process it normally */
    334 	if (retry_fault && !in_interrupt() &&
    335 	    amdgpu_vm_handle_fault(adev, entry->pasid, addr))
    336 		return 1; /* This also prevents sending it to KFD */
    337 
    338 	if (!amdgpu_sriov_vf(adev)) {
    339 		/*
    340 		 * Issue a dummy read to wait for the status register to
    341 		 * be updated to avoid reading an incorrect value due to
    342 		 * the new fast GRBM interface.
    343 		 */
    344 		if (entry->vmid_src == AMDGPU_GFXHUB_0)
    345 			RREG32(hub->vm_l2_pro_fault_status);
    346 
    347 		status = RREG32(hub->vm_l2_pro_fault_status);
    348 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
    349 	}
    350 
    351 	if (printk_ratelimit()) {
    352 		struct amdgpu_task_info task_info;
    353 
    354 		memset(&task_info, 0, sizeof(struct amdgpu_task_info));
    355 		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
    356 
    357 		dev_err(adev->dev,
    358 			"[%s] %s page fault (src_id:%u ring:%u vmid:%u "
    359 			"pasid:%u, for process %s pid %d thread %s pid %d)\n",
    360 			hub_name, retry_fault ? "retry" : "no-retry",
    361 			entry->src_id, entry->ring_id, entry->vmid,
    362 			entry->pasid, task_info.process_name, task_info.tgid,
    363 			task_info.task_name, task_info.pid);
    364 		dev_err(adev->dev, "  in page starting at address 0x%016"PRIx64" from client %d\n",
    365 			addr, entry->client_id);
    366 		if (!amdgpu_sriov_vf(adev)) {
    367 			dev_err(adev->dev,
    368 				"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
    369 				status);
    370 			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
    371 				REG_GET_FIELD(status,
    372 				VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
    373 			dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
    374 				REG_GET_FIELD(status,
    375 				VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
    376 			dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
    377 				REG_GET_FIELD(status,
    378 				VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
    379 			dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
    380 				REG_GET_FIELD(status,
    381 				VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
    382 			dev_err(adev->dev, "\t RW: 0x%lx\n",
    383 				REG_GET_FIELD(status,
    384 				VM_L2_PROTECTION_FAULT_STATUS, RW));
    385 
    386 		}
    387 	}
    388 
    389 	return 0;
    390 }
    391 
    392 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
    393 	.set = gmc_v9_0_vm_fault_interrupt_state,
    394 	.process = gmc_v9_0_process_interrupt,
    395 };
    396 
    397 
    398 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
    399 	.set = gmc_v9_0_ecc_interrupt_state,
    400 	.process = amdgpu_umc_process_ecc_irq,
    401 };
    402 
    403 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
    404 {
    405 	adev->gmc.vm_fault.num_types = 1;
    406 	adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
    407 
    408 	if (!amdgpu_sriov_vf(adev)) {
    409 		adev->gmc.ecc_irq.num_types = 1;
    410 		adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
    411 	}
    412 }
    413 
    414 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
    415 					uint32_t flush_type)
    416 {
    417 	u32 req = 0;
    418 
    419 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
    420 			    PER_VMID_INVALIDATE_REQ, 1 << vmid);
    421 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
    422 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
    423 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
    424 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
    425 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
    426 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
    427 	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
    428 			    CLEAR_PROTECTION_FAULT_STATUS_ADDR,	0);
    429 
    430 	return req;
    431 }
    432 
    433 /**
    434  * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
    435  *
    436  * @adev: amdgpu_device pointer
    437  * @vmhub: vmhub type
    438  *
    439  */
    440 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
    441 				       uint32_t vmhub)
    442 {
    443 	return ((vmhub == AMDGPU_MMHUB_0 ||
    444 		 vmhub == AMDGPU_MMHUB_1) &&
    445 		(!amdgpu_sriov_vf(adev)) &&
    446 		(!(adev->asic_type == CHIP_RAVEN &&
    447 		   adev->rev_id < 0x8 &&
    448 		   adev->pdev->device == 0x15d8)));
    449 }
    450 
    451 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
    452 					uint8_t vmid, uint16_t *p_pasid)
    453 {
    454 	uint32_t value;
    455 
    456 	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
    457 		     + vmid);
    458 	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
    459 
    460 	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
    461 }
    462 
    463 /*
    464  * GART
    465  * VMID 0 is the physical GPU addresses as used by the kernel.
    466  * VMIDs 1-15 are used for userspace clients and are handled
    467  * by the amdgpu vm/hsa code.
    468  */
    469 
    470 /**
    471  * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
    472  *
    473  * @adev: amdgpu_device pointer
    474  * @vmid: vm instance to flush
    475  * @flush_type: the flush type
    476  *
    477  * Flush the TLB for the requested page table using certain type.
    478  */
    479 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
    480 					uint32_t vmhub, uint32_t flush_type)
    481 {
    482 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
    483 	const unsigned eng = 17;
    484 	u32 j, inv_req, tmp;
    485 	struct amdgpu_vmhub *hub;
    486 
    487 	BUG_ON(vmhub >= adev->num_vmhubs);
    488 
    489 	hub = &adev->vmhub[vmhub];
    490 	inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
    491 
    492 	/* This is necessary for a HW workaround under SRIOV as well
    493 	 * as GFXOFF under bare metal
    494 	 */
    495 	if (adev->gfx.kiq.ring.sched.ready &&
    496 			(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
    497 			!adev->in_gpu_reset) {
    498 		uint32_t req = hub->vm_inv_eng0_req + eng;
    499 		uint32_t ack = hub->vm_inv_eng0_ack + eng;
    500 
    501 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
    502 				1 << vmid);
    503 		return;
    504 	}
    505 
    506 	spin_lock(&adev->gmc.invalidate_lock);
    507 
    508 	/*
    509 	 * It may lose gpuvm invalidate acknowldege state across power-gating
    510 	 * off cycle, add semaphore acquire before invalidation and semaphore
    511 	 * release after invalidation to avoid entering power gated state
    512 	 * to WA the Issue
    513 	 */
    514 
    515 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
    516 	if (use_semaphore) {
    517 		for (j = 0; j < adev->usec_timeout; j++) {
    518 			/* a read return value of 1 means semaphore acuqire */
    519 			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
    520 			if (tmp & 0x1)
    521 				break;
    522 			udelay(1);
    523 		}
    524 
    525 		if (j >= adev->usec_timeout)
    526 			DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
    527 	}
    528 
    529 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
    530 
    531 	/*
    532 	 * Issue a dummy read to wait for the ACK register to be cleared
    533 	 * to avoid a false ACK due to the new fast GRBM interface.
    534 	 */
    535 	if (vmhub == AMDGPU_GFXHUB_0)
    536 		RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
    537 
    538 	for (j = 0; j < adev->usec_timeout; j++) {
    539 		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
    540 		if (tmp & (1 << vmid))
    541 			break;
    542 		udelay(1);
    543 	}
    544 
    545 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
    546 	if (use_semaphore)
    547 		/*
    548 		 * add semaphore release after invalidation,
    549 		 * write with 0 means semaphore release
    550 		 */
    551 		WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
    552 
    553 	spin_unlock(&adev->gmc.invalidate_lock);
    554 
    555 	if (j < adev->usec_timeout)
    556 		return;
    557 
    558 	DRM_ERROR("Timeout waiting for VM flush ACK!\n");
    559 }
    560 
    561 /**
    562  * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
    563  *
    564  * @adev: amdgpu_device pointer
    565  * @pasid: pasid to be flush
    566  *
    567  * Flush the TLB for the requested pasid.
    568  */
    569 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
    570 					uint16_t pasid, uint32_t flush_type,
    571 					bool all_hub)
    572 {
    573 	int vmid, i;
    574 	signed long r;
    575 	uint32_t seq;
    576 	uint16_t queried_pasid;
    577 	bool ret;
    578 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
    579 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    580 
    581 	if (adev->in_gpu_reset)
    582 		return -EIO;
    583 
    584 	if (ring->sched.ready) {
    585 		spin_lock(&adev->gfx.kiq.ring_lock);
    586 		/* 2 dwords flush + 8 dwords fence */
    587 		amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
    588 		kiq->pmf->kiq_invalidate_tlbs(ring,
    589 					pasid, flush_type, all_hub);
    590 		amdgpu_fence_emit_polling(ring, &seq);
    591 		amdgpu_ring_commit(ring);
    592 		spin_unlock(&adev->gfx.kiq.ring_lock);
    593 		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
    594 		if (r < 1) {
    595 			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
    596 			return -ETIME;
    597 		}
    598 
    599 		return 0;
    600 	}
    601 
    602 	for (vmid = 1; vmid < 16; vmid++) {
    603 
    604 		ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
    605 				&queried_pasid);
    606 		if (ret && queried_pasid == pasid) {
    607 			if (all_hub) {
    608 				for (i = 0; i < adev->num_vmhubs; i++)
    609 					gmc_v9_0_flush_gpu_tlb(adev, vmid,
    610 							i, flush_type);
    611 			} else {
    612 				gmc_v9_0_flush_gpu_tlb(adev, vmid,
    613 						AMDGPU_GFXHUB_0, flush_type);
    614 			}
    615 			break;
    616 		}
    617 	}
    618 
    619 	return 0;
    620 
    621 }
    622 
    623 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
    624 					    unsigned vmid, uint64_t pd_addr)
    625 {
    626 	bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
    627 	struct amdgpu_device *adev = ring->adev;
    628 	struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
    629 	uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
    630 	unsigned eng = ring->vm_inv_eng;
    631 
    632 	/*
    633 	 * It may lose gpuvm invalidate acknowldege state across power-gating
    634 	 * off cycle, add semaphore acquire before invalidation and semaphore
    635 	 * release after invalidation to avoid entering power gated state
    636 	 * to WA the Issue
    637 	 */
    638 
    639 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
    640 	if (use_semaphore)
    641 		/* a read return value of 1 means semaphore acuqire */
    642 		amdgpu_ring_emit_reg_wait(ring,
    643 					  hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
    644 
    645 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
    646 			      lower_32_bits(pd_addr));
    647 
    648 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
    649 			      upper_32_bits(pd_addr));
    650 
    651 	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
    652 					    hub->vm_inv_eng0_ack + eng,
    653 					    req, 1 << vmid);
    654 
    655 	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
    656 	if (use_semaphore)
    657 		/*
    658 		 * add semaphore release after invalidation,
    659 		 * write with 0 means semaphore release
    660 		 */
    661 		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
    662 
    663 	return pd_addr;
    664 }
    665 
    666 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
    667 					unsigned pasid)
    668 {
    669 	struct amdgpu_device *adev = ring->adev;
    670 	uint32_t reg;
    671 
    672 	/* Do nothing because there's no lut register for mmhub1. */
    673 	if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
    674 		return;
    675 
    676 	if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
    677 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
    678 	else
    679 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
    680 
    681 	amdgpu_ring_emit_wreg(ring, reg, pasid);
    682 }
    683 
    684 /*
    685  * PTE format on VEGA 10:
    686  * 63:59 reserved
    687  * 58:57 mtype
    688  * 56 F
    689  * 55 L
    690  * 54 P
    691  * 53 SW
    692  * 52 T
    693  * 50:48 reserved
    694  * 47:12 4k physical page base address
    695  * 11:7 fragment
    696  * 6 write
    697  * 5 read
    698  * 4 exe
    699  * 3 Z
    700  * 2 snooped
    701  * 1 system
    702  * 0 valid
    703  *
    704  * PDE format on VEGA 10:
    705  * 63:59 block fragment size
    706  * 58:55 reserved
    707  * 54 P
    708  * 53:48 reserved
    709  * 47:6 physical base address of PD or PTE
    710  * 5:3 reserved
    711  * 2 C
    712  * 1 system
    713  * 0 valid
    714  */
    715 
    716 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
    717 
    718 {
    719 	switch (flags) {
    720 	case AMDGPU_VM_MTYPE_DEFAULT:
    721 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
    722 	case AMDGPU_VM_MTYPE_NC:
    723 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
    724 	case AMDGPU_VM_MTYPE_WC:
    725 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
    726 	case AMDGPU_VM_MTYPE_RW:
    727 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
    728 	case AMDGPU_VM_MTYPE_CC:
    729 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
    730 	case AMDGPU_VM_MTYPE_UC:
    731 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
    732 	default:
    733 		return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
    734 	}
    735 }
    736 
    737 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
    738 				uint64_t *addr, uint64_t *flags)
    739 {
    740 	if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
    741 		*addr = adev->vm_manager.vram_base_offset + *addr -
    742 			adev->gmc.vram_start;
    743 	BUG_ON(*addr & 0xFFFF00000000003FULL);
    744 
    745 	if (!adev->gmc.translate_further)
    746 		return;
    747 
    748 	if (level == AMDGPU_VM_PDB1) {
    749 		/* Set the block fragment size */
    750 		if (!(*flags & AMDGPU_PDE_PTE))
    751 			*flags |= AMDGPU_PDE_BFS(0x9);
    752 
    753 	} else if (level == AMDGPU_VM_PDB0) {
    754 		if (*flags & AMDGPU_PDE_PTE)
    755 			*flags &= ~AMDGPU_PDE_PTE;
    756 		else
    757 			*flags |= AMDGPU_PTE_TF;
    758 	}
    759 }
    760 
    761 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
    762 				struct amdgpu_bo_va_mapping *mapping,
    763 				uint64_t *flags)
    764 {
    765 	*flags &= ~AMDGPU_PTE_EXECUTABLE;
    766 	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
    767 
    768 	*flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
    769 	*flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
    770 
    771 	if (mapping->flags & AMDGPU_PTE_PRT) {
    772 		*flags |= AMDGPU_PTE_PRT;
    773 		*flags &= ~AMDGPU_PTE_VALID;
    774 	}
    775 
    776 	if (adev->asic_type == CHIP_ARCTURUS &&
    777 	    !(*flags & AMDGPU_PTE_SYSTEM) &&
    778 	    mapping->bo_va->is_xgmi)
    779 		*flags |= AMDGPU_PTE_SNOOPED;
    780 }
    781 
    782 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
    783 	.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
    784 	.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
    785 	.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
    786 	.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
    787 	.map_mtype = gmc_v9_0_map_mtype,
    788 	.get_vm_pde = gmc_v9_0_get_vm_pde,
    789 	.get_vm_pte = gmc_v9_0_get_vm_pte
    790 };
    791 
    792 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
    793 {
    794 	adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
    795 }
    796 
    797 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
    798 {
    799 	switch (adev->asic_type) {
    800 	case CHIP_VEGA10:
    801 		adev->umc.funcs = &umc_v6_0_funcs;
    802 		break;
    803 	case CHIP_VEGA20:
    804 		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
    805 		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
    806 		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
    807 		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
    808 		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
    809 		adev->umc.funcs = &umc_v6_1_funcs;
    810 		break;
    811 	case CHIP_ARCTURUS:
    812 		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
    813 		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
    814 		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
    815 		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
    816 		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
    817 		adev->umc.funcs = &umc_v6_1_funcs;
    818 		break;
    819 	default:
    820 		break;
    821 	}
    822 }
    823 
    824 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
    825 {
    826 	switch (adev->asic_type) {
    827 	case CHIP_VEGA20:
    828 		adev->mmhub.funcs = &mmhub_v1_0_funcs;
    829 		break;
    830 	case CHIP_ARCTURUS:
    831 		adev->mmhub.funcs = &mmhub_v9_4_funcs;
    832 		break;
    833 	default:
    834 		break;
    835 	}
    836 }
    837 
    838 static int gmc_v9_0_early_init(void *handle)
    839 {
    840 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    841 
    842 	gmc_v9_0_set_gmc_funcs(adev);
    843 	gmc_v9_0_set_irq_funcs(adev);
    844 	gmc_v9_0_set_umc_funcs(adev);
    845 	gmc_v9_0_set_mmhub_funcs(adev);
    846 
    847 	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
    848 	adev->gmc.shared_aperture_end =
    849 		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
    850 	adev->gmc.private_aperture_start = 0x1000000000000000ULL;
    851 	adev->gmc.private_aperture_end =
    852 		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
    853 
    854 	return 0;
    855 }
    856 
    857 static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
    858 {
    859 
    860 	/*
    861 	 * TODO:
    862 	 * Currently there is a bug where some memory client outside
    863 	 * of the driver writes to first 8M of VRAM on S3 resume,
    864 	 * this overrides GART which by default gets placed in first 8M and
    865 	 * causes VM_FAULTS once GTT is accessed.
    866 	 * Keep the stolen memory reservation until the while this is not solved.
    867 	 * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
    868 	 */
    869 	switch (adev->asic_type) {
    870 	case CHIP_VEGA10:
    871 	case CHIP_RAVEN:
    872 	case CHIP_ARCTURUS:
    873 	case CHIP_RENOIR:
    874 		return true;
    875 	case CHIP_VEGA12:
    876 	case CHIP_VEGA20:
    877 	default:
    878 		return false;
    879 	}
    880 }
    881 
    882 static int gmc_v9_0_late_init(void *handle)
    883 {
    884 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    885 	int r;
    886 
    887 	if (!gmc_v9_0_keep_stolen_memory(adev))
    888 		amdgpu_bo_late_init(adev);
    889 
    890 	r = amdgpu_gmc_allocate_vm_inv_eng(adev);
    891 	if (r)
    892 		return r;
    893 	/* Check if ecc is available */
    894 	if (!amdgpu_sriov_vf(adev)) {
    895 		switch (adev->asic_type) {
    896 		case CHIP_VEGA10:
    897 		case CHIP_VEGA20:
    898 		case CHIP_ARCTURUS:
    899 			r = amdgpu_atomfirmware_mem_ecc_supported(adev);
    900 			if (!r) {
    901 				DRM_INFO("ECC is not present.\n");
    902 				if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
    903 					adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
    904 			} else {
    905 				DRM_INFO("ECC is active.\n");
    906 			}
    907 
    908 			r = amdgpu_atomfirmware_sram_ecc_supported(adev);
    909 			if (!r) {
    910 				DRM_INFO("SRAM ECC is not present.\n");
    911 			} else {
    912 				DRM_INFO("SRAM ECC is active.\n");
    913 			}
    914 			break;
    915 		default:
    916 			break;
    917 		}
    918 	}
    919 
    920 	r = amdgpu_gmc_ras_late_init(adev);
    921 	if (r)
    922 		return r;
    923 
    924 	return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
    925 }
    926 
    927 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
    928 					struct amdgpu_gmc *mc)
    929 {
    930 	u64 base = 0;
    931 
    932 	if (adev->asic_type == CHIP_ARCTURUS)
    933 		base = mmhub_v9_4_get_fb_location(adev);
    934 	else if (!amdgpu_sriov_vf(adev))
    935 		base = mmhub_v1_0_get_fb_location(adev);
    936 
    937 	/* add the xgmi offset of the physical node */
    938 	base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
    939 	amdgpu_gmc_vram_location(adev, mc, base);
    940 	amdgpu_gmc_gart_location(adev, mc);
    941 	amdgpu_gmc_agp_location(adev, mc);
    942 	/* base offset of vram pages */
    943 	adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
    944 
    945 	/* XXX: add the xgmi offset of the physical node? */
    946 	adev->vm_manager.vram_base_offset +=
    947 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
    948 }
    949 
    950 /**
    951  * gmc_v9_0_mc_init - initialize the memory controller driver params
    952  *
    953  * @adev: amdgpu_device pointer
    954  *
    955  * Look up the amount of vram, vram width, and decide how to place
    956  * vram and gart within the GPU's physical address space.
    957  * Returns 0 for success.
    958  */
    959 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
    960 {
    961 	int r;
    962 
    963 	/* size in MB on si */
    964 	adev->gmc.mc_vram_size =
    965 		adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
    966 	adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
    967 
    968 	if (!(adev->flags & AMD_IS_APU)) {
    969 		r = amdgpu_device_resize_fb_bar(adev);
    970 		if (r)
    971 			return r;
    972 	}
    973 	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
    974 	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
    975 
    976 #ifdef __NetBSD__
    977 	adev->gmc.aper_tag = adev->pdev->pd_pa.pa_memt;
    978 #endif
    979 
    980 #ifdef CONFIG_X86_64
    981 	if (adev->flags & AMD_IS_APU) {
    982 		adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev);
    983 		adev->gmc.aper_size = adev->gmc.real_vram_size;
    984 	}
    985 #endif
    986 	/* In case the PCI BAR is larger than the actual amount of vram */
    987 	adev->gmc.visible_vram_size = adev->gmc.aper_size;
    988 	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
    989 		adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
    990 
    991 	/* set the gart size */
    992 	if (amdgpu_gart_size == -1) {
    993 		switch (adev->asic_type) {
    994 		case CHIP_VEGA10:  /* all engines support GPUVM */
    995 		case CHIP_VEGA12:  /* all engines support GPUVM */
    996 		case CHIP_VEGA20:
    997 		case CHIP_ARCTURUS:
    998 		default:
    999 			adev->gmc.gart_size = 512ULL << 20;
   1000 			break;
   1001 		case CHIP_RAVEN:   /* DCE SG support */
   1002 		case CHIP_RENOIR:
   1003 			adev->gmc.gart_size = 1024ULL << 20;
   1004 			break;
   1005 		}
   1006 	} else {
   1007 		adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
   1008 	}
   1009 
   1010 	gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
   1011 
   1012 	return 0;
   1013 }
   1014 
   1015 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
   1016 {
   1017 	int r;
   1018 
   1019 	if (adev->gart.bo) {
   1020 		WARN(1, "VEGA10 PCIE GART already initialized\n");
   1021 		return 0;
   1022 	}
   1023 	/* Initialize common gart structure */
   1024 	r = amdgpu_gart_init(adev);
   1025 	if (r)
   1026 		return r;
   1027 	adev->gart.table_size = adev->gart.num_gpu_pages * 8;
   1028 	adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
   1029 				 AMDGPU_PTE_EXECUTABLE;
   1030 	return amdgpu_gart_table_vram_alloc(adev);
   1031 }
   1032 
   1033 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
   1034 {
   1035 	u32 d1vga_control;
   1036 	unsigned size;
   1037 
   1038 	/*
   1039 	 * TODO Remove once GART corruption is resolved
   1040 	 * Check related code in gmc_v9_0_sw_fini
   1041 	 * */
   1042 	if (gmc_v9_0_keep_stolen_memory(adev))
   1043 		return 9 * 1024 * 1024;
   1044 
   1045 	d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
   1046 	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
   1047 		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
   1048 	} else {
   1049 		u32 viewport;
   1050 
   1051 		switch (adev->asic_type) {
   1052 		case CHIP_RAVEN:
   1053 		case CHIP_RENOIR:
   1054 			viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
   1055 			size = (REG_GET_FIELD(viewport,
   1056 					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
   1057 				REG_GET_FIELD(viewport,
   1058 					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
   1059 				4);
   1060 			break;
   1061 		case CHIP_VEGA10:
   1062 		case CHIP_VEGA12:
   1063 		case CHIP_VEGA20:
   1064 		default:
   1065 			viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
   1066 			size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
   1067 				REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
   1068 				4);
   1069 			break;
   1070 		}
   1071 	}
   1072 	/* return 0 if the pre-OS buffer uses up most of vram */
   1073 	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
   1074 		return 0;
   1075 
   1076 	return size;
   1077 }
   1078 
   1079 static int gmc_v9_0_sw_init(void *handle)
   1080 {
   1081 	int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
   1082 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1083 
   1084 	gfxhub_v1_0_init(adev);
   1085 	if (adev->asic_type == CHIP_ARCTURUS)
   1086 		mmhub_v9_4_init(adev);
   1087 	else
   1088 		mmhub_v1_0_init(adev);
   1089 
   1090 	spin_lock_init(&adev->gmc.invalidate_lock);
   1091 
   1092 	r = amdgpu_atomfirmware_get_vram_info(adev,
   1093 		&vram_width, &vram_type, &vram_vendor);
   1094 	if (amdgpu_sriov_vf(adev))
   1095 		/* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
   1096 		 * and DF related registers is not readable, seems hardcord is the
   1097 		 * only way to set the correct vram_width
   1098 		 */
   1099 		adev->gmc.vram_width = 2048;
   1100 	else if (amdgpu_emu_mode != 1)
   1101 		adev->gmc.vram_width = vram_width;
   1102 
   1103 	if (!adev->gmc.vram_width) {
   1104 		int chansize, numchan;
   1105 
   1106 		/* hbm memory channel size */
   1107 		if (adev->flags & AMD_IS_APU)
   1108 			chansize = 64;
   1109 		else
   1110 			chansize = 128;
   1111 
   1112 		numchan = adev->df.funcs->get_hbm_channel_number(adev);
   1113 		adev->gmc.vram_width = numchan * chansize;
   1114 	}
   1115 
   1116 	adev->gmc.vram_type = vram_type;
   1117 	adev->gmc.vram_vendor = vram_vendor;
   1118 	switch (adev->asic_type) {
   1119 	case CHIP_RAVEN:
   1120 		adev->num_vmhubs = 2;
   1121 
   1122 		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
   1123 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
   1124 		} else {
   1125 			/* vm_size is 128TB + 512GB for legacy 3-level page support */
   1126 			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
   1127 			adev->gmc.translate_further =
   1128 				adev->vm_manager.num_level > 1;
   1129 		}
   1130 		break;
   1131 	case CHIP_VEGA10:
   1132 	case CHIP_VEGA12:
   1133 	case CHIP_VEGA20:
   1134 	case CHIP_RENOIR:
   1135 		adev->num_vmhubs = 2;
   1136 
   1137 
   1138 		/*
   1139 		 * To fulfill 4-level page support,
   1140 		 * vm size is 256TB (48bit), maximum size of Vega10,
   1141 		 * block size 512 (9bit)
   1142 		 */
   1143 		/* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
   1144 		if (amdgpu_sriov_vf(adev))
   1145 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
   1146 		else
   1147 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
   1148 		break;
   1149 	case CHIP_ARCTURUS:
   1150 		adev->num_vmhubs = 3;
   1151 
   1152 		/* Keep the vm size same with Vega20 */
   1153 		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
   1154 		break;
   1155 	default:
   1156 		break;
   1157 	}
   1158 
   1159 	/* This interrupt is VMC page fault.*/
   1160 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
   1161 				&adev->gmc.vm_fault);
   1162 	if (r)
   1163 		return r;
   1164 
   1165 	if (adev->asic_type == CHIP_ARCTURUS) {
   1166 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
   1167 					&adev->gmc.vm_fault);
   1168 		if (r)
   1169 			return r;
   1170 	}
   1171 
   1172 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
   1173 				&adev->gmc.vm_fault);
   1174 
   1175 	if (r)
   1176 		return r;
   1177 
   1178 	if (!amdgpu_sriov_vf(adev)) {
   1179 		/* interrupt sent to DF. */
   1180 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
   1181 				      &adev->gmc.ecc_irq);
   1182 		if (r)
   1183 			return r;
   1184 	}
   1185 
   1186 	/* Set the internal MC address mask
   1187 	 * This is the max address of the GPU's
   1188 	 * internal address space.
   1189 	 */
   1190 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
   1191 
   1192 #ifdef __NetBSD__
   1193 	r = drm_limit_dma_space(adev->ddev, 0, DMA_BIT_MASK(44));
   1194 #else
   1195 	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
   1196 #endif
   1197 	if (r) {
   1198 		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
   1199 		return r;
   1200 	}
   1201 	adev->need_swiotlb = drm_need_swiotlb(44);
   1202 
   1203 	if (adev->gmc.xgmi.supported) {
   1204 		r = gfxhub_v1_1_get_xgmi_info(adev);
   1205 		if (r)
   1206 			return r;
   1207 	}
   1208 
   1209 	r = gmc_v9_0_mc_init(adev);
   1210 	if (r)
   1211 		return r;
   1212 
   1213 	adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
   1214 
   1215 	/* Memory manager */
   1216 	r = amdgpu_bo_init(adev);
   1217 	if (r)
   1218 		return r;
   1219 
   1220 	r = gmc_v9_0_gart_init(adev);
   1221 	if (r)
   1222 		return r;
   1223 
   1224 	/*
   1225 	 * number of VMs
   1226 	 * VMID 0 is reserved for System
   1227 	 * amdgpu graphics/compute will use VMIDs 1-7
   1228 	 * amdkfd will use VMIDs 8-15
   1229 	 */
   1230 	adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
   1231 	adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;
   1232 	adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS;
   1233 
   1234 	amdgpu_vm_manager_init(adev);
   1235 
   1236 	return 0;
   1237 }
   1238 
   1239 static int gmc_v9_0_sw_fini(void *handle)
   1240 {
   1241 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1242 	void *stolen_vga_buf;
   1243 
   1244 	amdgpu_gmc_ras_fini(adev);
   1245 	amdgpu_gem_force_release(adev);
   1246 	amdgpu_vm_manager_fini(adev);
   1247 
   1248 	if (gmc_v9_0_keep_stolen_memory(adev))
   1249 		amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
   1250 
   1251 	amdgpu_gart_table_vram_free(adev);
   1252 	amdgpu_bo_fini(adev);
   1253 	amdgpu_gart_fini(adev);
   1254 
   1255 	spin_lock_destroy(&adev->gmc.invalidate_lock);
   1256 
   1257 	return 0;
   1258 }
   1259 
   1260 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
   1261 {
   1262 
   1263 	switch (adev->asic_type) {
   1264 	case CHIP_VEGA10:
   1265 		if (amdgpu_sriov_vf(adev))
   1266 			break;
   1267 		/* fall through */
   1268 	case CHIP_VEGA20:
   1269 		soc15_program_register_sequence(adev,
   1270 						golden_settings_mmhub_1_0_0,
   1271 						ARRAY_SIZE(golden_settings_mmhub_1_0_0));
   1272 		soc15_program_register_sequence(adev,
   1273 						golden_settings_athub_1_0_0,
   1274 						ARRAY_SIZE(golden_settings_athub_1_0_0));
   1275 		break;
   1276 	case CHIP_VEGA12:
   1277 		break;
   1278 	case CHIP_RAVEN:
   1279 		/* TODO for renoir */
   1280 		soc15_program_register_sequence(adev,
   1281 						golden_settings_athub_1_0_0,
   1282 						ARRAY_SIZE(golden_settings_athub_1_0_0));
   1283 		break;
   1284 	default:
   1285 		break;
   1286 	}
   1287 }
   1288 
   1289 /**
   1290  * gmc_v9_0_gart_enable - gart enable
   1291  *
   1292  * @adev: amdgpu_device pointer
   1293  */
   1294 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
   1295 {
   1296 	int r;
   1297 
   1298 	if (adev->gart.bo == NULL) {
   1299 		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
   1300 		return -EINVAL;
   1301 	}
   1302 	r = amdgpu_gart_table_vram_pin(adev);
   1303 	if (r)
   1304 		return r;
   1305 
   1306 	r = gfxhub_v1_0_gart_enable(adev);
   1307 	if (r)
   1308 		return r;
   1309 
   1310 	if (adev->asic_type == CHIP_ARCTURUS)
   1311 		r = mmhub_v9_4_gart_enable(adev);
   1312 	else
   1313 		r = mmhub_v1_0_gart_enable(adev);
   1314 	if (r)
   1315 		return r;
   1316 
   1317 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
   1318 		 (unsigned)(adev->gmc.gart_size >> 20),
   1319 		 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
   1320 	adev->gart.ready = true;
   1321 	return 0;
   1322 }
   1323 
   1324 static int gmc_v9_0_hw_init(void *handle)
   1325 {
   1326 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1327 	bool value;
   1328 	int r, i;
   1329 	u32 tmp;
   1330 
   1331 	/* The sequence of these two function calls matters.*/
   1332 	gmc_v9_0_init_golden_registers(adev);
   1333 
   1334 	if (adev->mode_info.num_crtc) {
   1335 		if (adev->asic_type != CHIP_ARCTURUS) {
   1336 			/* Lockout access through VGA aperture*/
   1337 			WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
   1338 
   1339 			/* disable VGA render */
   1340 			WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
   1341 		}
   1342 	}
   1343 
   1344 	amdgpu_device_program_register_sequence(adev,
   1345 						golden_settings_vega10_hdp,
   1346 						ARRAY_SIZE(golden_settings_vega10_hdp));
   1347 
   1348 	switch (adev->asic_type) {
   1349 	case CHIP_RAVEN:
   1350 		/* TODO for renoir */
   1351 		mmhub_v1_0_update_power_gating(adev, true);
   1352 		break;
   1353 	case CHIP_ARCTURUS:
   1354 		WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
   1355 		break;
   1356 	default:
   1357 		break;
   1358 	}
   1359 
   1360 	WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
   1361 
   1362 	tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
   1363 	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
   1364 
   1365 	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
   1366 	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
   1367 
   1368 	/* After HDP is initialized, flush HDP.*/
   1369 	adev->nbio.funcs->hdp_flush(adev, NULL);
   1370 
   1371 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
   1372 		value = false;
   1373 	else
   1374 		value = true;
   1375 
   1376 	if (!amdgpu_sriov_vf(adev)) {
   1377 		gfxhub_v1_0_set_fault_enable_default(adev, value);
   1378 		if (adev->asic_type == CHIP_ARCTURUS)
   1379 			mmhub_v9_4_set_fault_enable_default(adev, value);
   1380 		else
   1381 			mmhub_v1_0_set_fault_enable_default(adev, value);
   1382 	}
   1383 	for (i = 0; i < adev->num_vmhubs; ++i)
   1384 		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
   1385 
   1386 	if (adev->umc.funcs && adev->umc.funcs->init_registers)
   1387 		adev->umc.funcs->init_registers(adev);
   1388 
   1389 	r = gmc_v9_0_gart_enable(adev);
   1390 
   1391 	return r;
   1392 }
   1393 
   1394 /**
   1395  * gmc_v9_0_gart_disable - gart disable
   1396  *
   1397  * @adev: amdgpu_device pointer
   1398  *
   1399  * This disables all VM page table.
   1400  */
   1401 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
   1402 {
   1403 	gfxhub_v1_0_gart_disable(adev);
   1404 	if (adev->asic_type == CHIP_ARCTURUS)
   1405 		mmhub_v9_4_gart_disable(adev);
   1406 	else
   1407 		mmhub_v1_0_gart_disable(adev);
   1408 	amdgpu_gart_table_vram_unpin(adev);
   1409 }
   1410 
   1411 static int gmc_v9_0_hw_fini(void *handle)
   1412 {
   1413 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1414 
   1415 	if (amdgpu_sriov_vf(adev)) {
   1416 		/* full access mode, so don't touch any GMC register */
   1417 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
   1418 		return 0;
   1419 	}
   1420 
   1421 	amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
   1422 	amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
   1423 	gmc_v9_0_gart_disable(adev);
   1424 
   1425 	return 0;
   1426 }
   1427 
   1428 static int gmc_v9_0_suspend(void *handle)
   1429 {
   1430 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1431 
   1432 	return gmc_v9_0_hw_fini(adev);
   1433 }
   1434 
   1435 static int gmc_v9_0_resume(void *handle)
   1436 {
   1437 	int r;
   1438 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1439 
   1440 	r = gmc_v9_0_hw_init(adev);
   1441 	if (r)
   1442 		return r;
   1443 
   1444 	amdgpu_vmid_reset_all(adev);
   1445 
   1446 	return 0;
   1447 }
   1448 
   1449 static bool gmc_v9_0_is_idle(void *handle)
   1450 {
   1451 	/* MC is always ready in GMC v9.*/
   1452 	return true;
   1453 }
   1454 
   1455 static int gmc_v9_0_wait_for_idle(void *handle)
   1456 {
   1457 	/* There is no need to wait for MC idle in GMC v9.*/
   1458 	return 0;
   1459 }
   1460 
   1461 static int gmc_v9_0_soft_reset(void *handle)
   1462 {
   1463 	/* XXX for emulation.*/
   1464 	return 0;
   1465 }
   1466 
   1467 static int gmc_v9_0_set_clockgating_state(void *handle,
   1468 					enum amd_clockgating_state state)
   1469 {
   1470 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1471 
   1472 	if (adev->asic_type == CHIP_ARCTURUS)
   1473 		mmhub_v9_4_set_clockgating(adev, state);
   1474 	else
   1475 		mmhub_v1_0_set_clockgating(adev, state);
   1476 
   1477 	athub_v1_0_set_clockgating(adev, state);
   1478 
   1479 	return 0;
   1480 }
   1481 
   1482 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
   1483 {
   1484 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1485 
   1486 	if (adev->asic_type == CHIP_ARCTURUS)
   1487 		mmhub_v9_4_get_clockgating(adev, flags);
   1488 	else
   1489 		mmhub_v1_0_get_clockgating(adev, flags);
   1490 
   1491 	athub_v1_0_get_clockgating(adev, flags);
   1492 }
   1493 
   1494 static int gmc_v9_0_set_powergating_state(void *handle,
   1495 					enum amd_powergating_state state)
   1496 {
   1497 	return 0;
   1498 }
   1499 
   1500 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
   1501 	.name = "gmc_v9_0",
   1502 	.early_init = gmc_v9_0_early_init,
   1503 	.late_init = gmc_v9_0_late_init,
   1504 	.sw_init = gmc_v9_0_sw_init,
   1505 	.sw_fini = gmc_v9_0_sw_fini,
   1506 	.hw_init = gmc_v9_0_hw_init,
   1507 	.hw_fini = gmc_v9_0_hw_fini,
   1508 	.suspend = gmc_v9_0_suspend,
   1509 	.resume = gmc_v9_0_resume,
   1510 	.is_idle = gmc_v9_0_is_idle,
   1511 	.wait_for_idle = gmc_v9_0_wait_for_idle,
   1512 	.soft_reset = gmc_v9_0_soft_reset,
   1513 	.set_clockgating_state = gmc_v9_0_set_clockgating_state,
   1514 	.set_powergating_state = gmc_v9_0_set_powergating_state,
   1515 	.get_clockgating_state = gmc_v9_0_get_clockgating_state,
   1516 };
   1517 
   1518 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
   1519 {
   1520 	.type = AMD_IP_BLOCK_TYPE_GMC,
   1521 	.major = 9,
   1522 	.minor = 0,
   1523 	.rev = 0,
   1524 	.funcs = &gmc_v9_0_ip_funcs,
   1525 };
   1526