Home | History | Annotate | Line # | Download | only in amdgpu
      1 /*	$NetBSD: amdgpu_df_v3_6.c,v 1.5 2021/12/19 11:59:53 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2018 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  */
     25 #include <sys/cdefs.h>
     26 __KERNEL_RCSID(0, "$NetBSD: amdgpu_df_v3_6.c,v 1.5 2021/12/19 11:59:53 riastradh Exp $");
     27 
     28 #include "amdgpu.h"
     29 #include "df_v3_6.h"
     30 
     31 #include "df/df_3_6_default.h"
     32 #include "df/df_3_6_offset.h"
     33 #include "df/df_3_6_sh_mask.h"
     34 
     35 #define DF_3_6_SMN_REG_INST_DIST        0x8
     36 #define DF_3_6_INST_CNT                 8
     37 
     38 static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
     39 				       16, 32, 0, 0, 0, 2, 4, 8};
     40 
     41 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
     42 
     43 /* init df format attrs */
     44 AMDGPU_PMU_ATTR(event,		"config:0-7");
     45 AMDGPU_PMU_ATTR(instance,	"config:8-15");
     46 AMDGPU_PMU_ATTR(umask,		"config:16-23");
     47 
     48 /* df format attributes  */
     49 static struct attribute *df_v3_6_format_attrs[] = {
     50 	&pmu_attr_event.attr,
     51 	&pmu_attr_instance.attr,
     52 	&pmu_attr_umask.attr,
     53 	NULL
     54 };
     55 
     56 /* df format attribute group */
     57 static struct attribute_group df_v3_6_format_attr_group = {
     58 	.name = "format",
     59 	.attrs = df_v3_6_format_attrs,
     60 };
     61 
     62 /* df event attrs */
     63 AMDGPU_PMU_ATTR(cake0_pcsout_txdata,
     64 		      "event=0x7,instance=0x46,umask=0x2");
     65 AMDGPU_PMU_ATTR(cake1_pcsout_txdata,
     66 		      "event=0x7,instance=0x47,umask=0x2");
     67 AMDGPU_PMU_ATTR(cake0_pcsout_txmeta,
     68 		      "event=0x7,instance=0x46,umask=0x4");
     69 AMDGPU_PMU_ATTR(cake1_pcsout_txmeta,
     70 		      "event=0x7,instance=0x47,umask=0x4");
     71 AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc,
     72 		      "event=0xb,instance=0x46,umask=0x4");
     73 AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc,
     74 		      "event=0xb,instance=0x47,umask=0x4");
     75 AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc,
     76 		      "event=0xb,instance=0x46,umask=0x8");
     77 AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc,
     78 		      "event=0xb,instance=0x47,umask=0x8");
     79 
     80 /* df event attributes  */
     81 static struct attribute *df_v3_6_event_attrs[] = {
     82 	&pmu_attr_cake0_pcsout_txdata.attr,
     83 	&pmu_attr_cake1_pcsout_txdata.attr,
     84 	&pmu_attr_cake0_pcsout_txmeta.attr,
     85 	&pmu_attr_cake1_pcsout_txmeta.attr,
     86 	&pmu_attr_cake0_ftiinstat_reqalloc.attr,
     87 	&pmu_attr_cake1_ftiinstat_reqalloc.attr,
     88 	&pmu_attr_cake0_ftiinstat_rspalloc.attr,
     89 	&pmu_attr_cake1_ftiinstat_rspalloc.attr,
     90 	NULL
     91 };
     92 
     93 /* df event attribute group */
     94 static struct attribute_group df_v3_6_event_attr_group = {
     95 	.name = "events",
     96 	.attrs = df_v3_6_event_attrs
     97 };
     98 
     99 /* df event attr groups  */
    100 const struct attribute_group *df_v3_6_attr_groups[] = {
    101 		&df_v3_6_format_attr_group,
    102 		&df_v3_6_event_attr_group,
    103 		NULL
    104 };
    105 
    106 #endif	/* __NetBSD__ */
    107 
    108 static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
    109 				 uint32_t ficaa_val)
    110 {
    111 	unsigned long flags, address, data;
    112 	uint32_t ficadl_val, ficadh_val;
    113 
    114 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
    115 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
    116 
    117 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
    118 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
    119 	WREG32(data, ficaa_val);
    120 
    121 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
    122 	ficadl_val = RREG32(data);
    123 
    124 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
    125 	ficadh_val = RREG32(data);
    126 
    127 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
    128 
    129 	return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
    130 }
    131 
    132 static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
    133 			     uint32_t ficadl_val, uint32_t ficadh_val)
    134 {
    135 	unsigned long flags, address, data;
    136 
    137 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
    138 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
    139 
    140 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
    141 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
    142 	WREG32(data, ficaa_val);
    143 
    144 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
    145 	WREG32(data, ficadl_val);
    146 
    147 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
    148 	WREG32(data, ficadh_val);
    149 
    150 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
    151 }
    152 
    153 /*
    154  * df_v3_6_perfmon_rreg - read perfmon lo and hi
    155  *
    156  * required to be atomic.  no mmio method provided so subsequent reads for lo
    157  * and hi require to preserve df finite state machine
    158  */
    159 static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
    160 			    uint32_t lo_addr, uint32_t *lo_val,
    161 			    uint32_t hi_addr, uint32_t *hi_val)
    162 {
    163 	unsigned long flags, address, data;
    164 
    165 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
    166 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
    167 
    168 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
    169 	WREG32(address, lo_addr);
    170 	*lo_val = RREG32(data);
    171 	WREG32(address, hi_addr);
    172 	*hi_val = RREG32(data);
    173 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
    174 }
    175 
    176 /*
    177  * df_v3_6_perfmon_wreg - write to perfmon lo and hi
    178  *
    179  * required to be atomic.  no mmio method provided so subsequent reads after
    180  * data writes cannot occur to preserve data fabrics finite state machine.
    181  */
    182 static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
    183 			    uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
    184 {
    185 	unsigned long flags, address, data;
    186 
    187 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
    188 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
    189 
    190 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
    191 	WREG32(address, lo_addr);
    192 	WREG32(data, lo_val);
    193 	WREG32(address, hi_addr);
    194 	WREG32(data, hi_val);
    195 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
    196 }
    197 
    198 /* same as perfmon_wreg but return status on write value check */
    199 static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
    200 					  uint32_t lo_addr, uint32_t lo_val,
    201 					  uint32_t hi_addr, uint32_t  hi_val)
    202 {
    203 	unsigned long flags, address, data;
    204 	uint32_t lo_val_rb, hi_val_rb;
    205 
    206 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
    207 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
    208 
    209 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
    210 	WREG32(address, lo_addr);
    211 	WREG32(data, lo_val);
    212 	WREG32(address, hi_addr);
    213 	WREG32(data, hi_val);
    214 
    215 	WREG32(address, lo_addr);
    216 	lo_val_rb = RREG32(data);
    217 	WREG32(address, hi_addr);
    218 	hi_val_rb = RREG32(data);
    219 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
    220 
    221 	if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
    222 		return -EBUSY;
    223 
    224 	return 0;
    225 }
    226 
    227 
    228 /*
    229  * retry arming counters every 100 usecs within 1 millisecond interval.
    230  * if retry fails after time out, return error.
    231  */
    232 #define ARM_RETRY_USEC_TIMEOUT	1000
    233 #define ARM_RETRY_USEC_INTERVAL	100
    234 static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
    235 					  uint32_t lo_addr, uint32_t lo_val,
    236 					  uint32_t hi_addr, uint32_t  hi_val)
    237 {
    238 	int countdown = ARM_RETRY_USEC_TIMEOUT;
    239 
    240 	while (countdown) {
    241 
    242 		if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
    243 						     hi_addr, hi_val))
    244 			break;
    245 
    246 		countdown -= ARM_RETRY_USEC_INTERVAL;
    247 		udelay(ARM_RETRY_USEC_INTERVAL);
    248 	}
    249 
    250 	return countdown > 0 ? 0 : -ETIME;
    251 }
    252 
    253 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
    254 
    255 /* get the number of df counters available */
    256 static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
    257 		struct device_attribute *attr,
    258 		char *buf)
    259 {
    260 	struct amdgpu_device *adev;
    261 	struct drm_device *ddev;
    262 	int i, count;
    263 
    264 	ddev = dev_get_drvdata(dev);
    265 	adev = ddev->dev_private;
    266 	count = 0;
    267 
    268 	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
    269 		if (adev->df_perfmon_config_assign_mask[i] == 0)
    270 			count++;
    271 	}
    272 
    273 	return snprintf(buf, PAGE_SIZE,	"%i\n", count);
    274 }
    275 
    276 /* device attr for available perfmon counters */
    277 static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
    278 
    279 #endif	/* __NetBSD__ */
    280 
    281 static void df_v3_6_query_hashes(struct amdgpu_device *adev)
    282 {
    283 	u32 tmp;
    284 
    285 	adev->df.hash_status.hash_64k = false;
    286 	adev->df.hash_status.hash_2m = false;
    287 	adev->df.hash_status.hash_1g = false;
    288 
    289 	if (adev->asic_type != CHIP_ARCTURUS)
    290 		return;
    291 
    292 	/* encoding for hash-enabled on Arcturus */
    293 	if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
    294 		tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
    295 		adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
    296 						DF_CS_UMC_AON0_DfGlobalCtrl,
    297 						GlbHashIntlvCtl64K);
    298 		adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
    299 						DF_CS_UMC_AON0_DfGlobalCtrl,
    300 						GlbHashIntlvCtl2M);
    301 		adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
    302 						DF_CS_UMC_AON0_DfGlobalCtrl,
    303 						GlbHashIntlvCtl1G);
    304 	}
    305 }
    306 
    307 /* init perfmons */
    308 static void df_v3_6_sw_init(struct amdgpu_device *adev)
    309 {
    310 	int i, ret;
    311 
    312 #ifdef __NetBSD__		/* XXX amdgpu sysfs */
    313 	__USE(ret);
    314 #else
    315 	ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
    316 	if (ret)
    317 		DRM_ERROR("failed to create file for available df counters\n");
    318 #endif
    319 
    320 	for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
    321 		adev->df_perfmon_config_assign_mask[i] = 0;
    322 
    323 	df_v3_6_query_hashes(adev);
    324 }
    325 
    326 static void df_v3_6_sw_fini(struct amdgpu_device *adev)
    327 {
    328 
    329 #ifndef __NetBSD__		/* XXX amdgpu sysfs */
    330 	device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
    331 #endif
    332 
    333 }
    334 
    335 static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
    336 					  bool enable)
    337 {
    338 	u32 tmp;
    339 
    340 	if (enable) {
    341 		tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
    342 		tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
    343 		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
    344 	} else
    345 		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
    346 			     mmFabricConfigAccessControl_DEFAULT);
    347 }
    348 
    349 static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
    350 {
    351 	u32 tmp;
    352 
    353 	tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
    354 	tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
    355 	tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
    356 
    357 	return tmp;
    358 }
    359 
    360 static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
    361 {
    362 	int fb_channel_number;
    363 
    364 	fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
    365 	if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
    366 		fb_channel_number = 0;
    367 
    368 	return df_v3_6_channel_number[fb_channel_number];
    369 }
    370 
    371 static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
    372 						     bool enable)
    373 {
    374 	u32 tmp;
    375 
    376 	if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
    377 		/* Put DF on broadcast mode */
    378 		adev->df.funcs->enable_broadcast_mode(adev, true);
    379 
    380 		if (enable) {
    381 			tmp = RREG32_SOC15(DF, 0,
    382 					mmDF_PIE_AON0_DfGlobalClkGater);
    383 			tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
    384 			tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
    385 			WREG32_SOC15(DF, 0,
    386 					mmDF_PIE_AON0_DfGlobalClkGater, tmp);
    387 		} else {
    388 			tmp = RREG32_SOC15(DF, 0,
    389 					mmDF_PIE_AON0_DfGlobalClkGater);
    390 			tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
    391 			tmp |= DF_V3_6_MGCG_DISABLE;
    392 			WREG32_SOC15(DF, 0,
    393 					mmDF_PIE_AON0_DfGlobalClkGater, tmp);
    394 		}
    395 
    396 		/* Exit broadcast mode */
    397 		adev->df.funcs->enable_broadcast_mode(adev, false);
    398 	}
    399 }
    400 
    401 static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
    402 					  u32 *flags)
    403 {
    404 	u32 tmp;
    405 
    406 	/* AMD_CG_SUPPORT_DF_MGCG */
    407 	tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
    408 	if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
    409 		*flags |= AMD_CG_SUPPORT_DF_MGCG;
    410 }
    411 
    412 /* get assigned df perfmon ctr as int */
    413 static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
    414 				      uint64_t config)
    415 {
    416 	int i;
    417 
    418 	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
    419 		if ((config & 0x0FFFFFFUL) ==
    420 					adev->df_perfmon_config_assign_mask[i])
    421 			return i;
    422 	}
    423 
    424 	return -EINVAL;
    425 }
    426 
    427 /* get address based on counter assignment */
    428 static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
    429 				 uint64_t config,
    430 				 int is_ctrl,
    431 				 uint32_t *lo_base_addr,
    432 				 uint32_t *hi_base_addr)
    433 {
    434 	int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
    435 
    436 	if (target_cntr < 0)
    437 		return;
    438 
    439 	switch (target_cntr) {
    440 
    441 	case 0:
    442 		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
    443 		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
    444 		break;
    445 	case 1:
    446 		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
    447 		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
    448 		break;
    449 	case 2:
    450 		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
    451 		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
    452 		break;
    453 	case 3:
    454 		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
    455 		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
    456 		break;
    457 
    458 	}
    459 
    460 }
    461 
    462 /* get read counter address */
    463 static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
    464 					  uint64_t config,
    465 					  uint32_t *lo_base_addr,
    466 					  uint32_t *hi_base_addr)
    467 {
    468 	df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
    469 }
    470 
    471 /* get control counter settings i.e. address and values to set */
    472 static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
    473 					  uint64_t config,
    474 					  uint32_t *lo_base_addr,
    475 					  uint32_t *hi_base_addr,
    476 					  uint32_t *lo_val,
    477 					  uint32_t *hi_val)
    478 {
    479 
    480 	uint32_t eventsel, instance, unitmask;
    481 	uint32_t instance_10, instance_5432, instance_76;
    482 
    483 	df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
    484 
    485 	if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
    486 		DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
    487 				*lo_base_addr, *hi_base_addr);
    488 		return -ENXIO;
    489 	}
    490 
    491 	eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
    492 	unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
    493 	instance = DF_V3_6_GET_INSTANCE(config);
    494 
    495 	instance_10 = instance & 0x3;
    496 	instance_5432 = (instance >> 2) & 0xf;
    497 	instance_76 = (instance >> 6) & 0x3;
    498 
    499 	*lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22);
    500 	*hi_val = (instance_76 << 29) | instance_5432;
    501 
    502 	DRM_DEBUG_DRIVER("config=%"PRIx64" addr=%08x:%08x val=%08x:%08x",
    503 		config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
    504 
    505 	return 0;
    506 }
    507 
    508 /* add df performance counters for read */
    509 static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
    510 				   uint64_t config)
    511 {
    512 	int i, target_cntr;
    513 
    514 	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
    515 
    516 	if (target_cntr >= 0)
    517 		return 0;
    518 
    519 	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
    520 		if (adev->df_perfmon_config_assign_mask[i] == 0U) {
    521 			adev->df_perfmon_config_assign_mask[i] =
    522 							config & 0x0FFFFFFUL;
    523 			return 0;
    524 		}
    525 	}
    526 
    527 	return -ENOSPC;
    528 }
    529 
    530 #define DEFERRED_ARM_MASK	(1 << 31)
    531 static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
    532 				    uint64_t config, bool is_deferred)
    533 {
    534 	int target_cntr;
    535 
    536 	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
    537 
    538 	if (target_cntr < 0)
    539 		return -EINVAL;
    540 
    541 	if (is_deferred)
    542 		adev->df_perfmon_config_assign_mask[target_cntr] |=
    543 							DEFERRED_ARM_MASK;
    544 	else
    545 		adev->df_perfmon_config_assign_mask[target_cntr] &=
    546 							~DEFERRED_ARM_MASK;
    547 
    548 	return 0;
    549 }
    550 
    551 static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
    552 				    uint64_t config)
    553 {
    554 	int target_cntr;
    555 
    556 	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
    557 
    558 	/*
    559 	 * we never get target_cntr < 0 since this funciton is only called in
    560 	 * pmc_count for now but we should check anyways.
    561 	 */
    562 	return (target_cntr >= 0 &&
    563 			(adev->df_perfmon_config_assign_mask[target_cntr]
    564 			& DEFERRED_ARM_MASK));
    565 
    566 }
    567 
    568 /* release performance counter */
    569 static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
    570 				     uint64_t config)
    571 {
    572 	int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
    573 
    574 	if (target_cntr >= 0)
    575 		adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL;
    576 }
    577 
    578 
    579 static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
    580 					 uint64_t config)
    581 {
    582 	uint32_t lo_base_addr = 0, hi_base_addr = 0;
    583 
    584 	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
    585 				      &hi_base_addr);
    586 
    587 	if ((lo_base_addr == 0) || (hi_base_addr == 0))
    588 		return;
    589 
    590 	df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
    591 }
    592 
    593 static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
    594 			     int is_enable)
    595 {
    596 	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
    597 	int err = 0, ret = 0;
    598 
    599 	switch (adev->asic_type) {
    600 	case CHIP_VEGA20:
    601 		if (is_enable)
    602 			return df_v3_6_pmc_add_cntr(adev, config);
    603 
    604 		df_v3_6_reset_perfmon_cntr(adev, config);
    605 
    606 		ret = df_v3_6_pmc_get_ctrl_settings(adev,
    607 					config,
    608 					&lo_base_addr,
    609 					&hi_base_addr,
    610 					&lo_val,
    611 					&hi_val);
    612 
    613 		if (ret)
    614 			return ret;
    615 
    616 		err = df_v3_6_perfmon_arm_with_retry(adev,
    617 						     lo_base_addr,
    618 						     lo_val,
    619 						     hi_base_addr,
    620 						     hi_val);
    621 
    622 		if (err)
    623 			ret = df_v3_6_pmc_set_deferred(adev, config, true);
    624 
    625 		break;
    626 	default:
    627 		break;
    628 	}
    629 
    630 	return ret;
    631 }
    632 
    633 static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
    634 			    int is_disable)
    635 {
    636 	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
    637 	int ret = 0;
    638 
    639 	switch (adev->asic_type) {
    640 	case CHIP_VEGA20:
    641 		ret = df_v3_6_pmc_get_ctrl_settings(adev,
    642 			config,
    643 			&lo_base_addr,
    644 			&hi_base_addr,
    645 			&lo_val,
    646 			&hi_val);
    647 
    648 		if (ret)
    649 			return ret;
    650 
    651 		df_v3_6_reset_perfmon_cntr(adev, config);
    652 
    653 		if (is_disable)
    654 			df_v3_6_pmc_release_cntr(adev, config);
    655 
    656 		break;
    657 	default:
    658 		break;
    659 	}
    660 
    661 	return ret;
    662 }
    663 
    664 static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
    665 				  uint64_t config,
    666 				  uint64_t *count)
    667 {
    668 	uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0;
    669 	*count = 0;
    670 
    671 	switch (adev->asic_type) {
    672 	case CHIP_VEGA20:
    673 		df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
    674 				      &hi_base_addr);
    675 
    676 		if ((lo_base_addr == 0) || (hi_base_addr == 0))
    677 			return;
    678 
    679 		/* rearm the counter or throw away count value on failure */
    680 		if (df_v3_6_pmc_is_deferred(adev, config)) {
    681 			int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
    682 							lo_base_addr, lo_val,
    683 							hi_base_addr, hi_val);
    684 
    685 			if (rearm_err)
    686 				return;
    687 
    688 			df_v3_6_pmc_set_deferred(adev, config, false);
    689 		}
    690 
    691 		df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
    692 				hi_base_addr, &hi_val);
    693 
    694 		*count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
    695 
    696 		if (*count >= DF_V3_6_PERFMON_OVERFLOW)
    697 			*count = 0;
    698 
    699 		DRM_DEBUG_DRIVER("config=%"PRIx64" addr=%08x:%08x val=%08x:%08x",
    700 			 config, lo_base_addr, hi_base_addr, lo_val, hi_val);
    701 
    702 		break;
    703 	default:
    704 		break;
    705 	}
    706 }
    707 
    708 static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
    709 					   uint32_t df_inst)
    710 {
    711 	uint32_t base_addr_reg_val 	= 0;
    712 	uint64_t base_addr	 	= 0;
    713 
    714 	base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
    715 					df_inst * DF_3_6_SMN_REG_INST_DIST);
    716 
    717 	if (REG_GET_FIELD(base_addr_reg_val,
    718 			  DF_CS_UMC_AON0_DramBaseAddress0,
    719 			  AddrRngVal) == 0) {
    720 		DRM_WARN("address range not valid");
    721 		return 0;
    722 	}
    723 
    724 	base_addr = REG_GET_FIELD(base_addr_reg_val,
    725 				  DF_CS_UMC_AON0_DramBaseAddress0,
    726 				  DramBaseAddr);
    727 
    728 	return base_addr << 28;
    729 }
    730 
    731 static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
    732 {
    733 	uint32_t xgmi_node_id	= 0;
    734 	uint32_t df_inst_id 	= 0;
    735 
    736 	/* Walk through DF dst nodes to find current XGMI node */
    737 	for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
    738 
    739 		xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
    740 					   df_inst_id * DF_3_6_SMN_REG_INST_DIST);
    741 		xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
    742 					     DF_CS_UMC_AON0_DramLimitAddress0,
    743 					     DstFabricID);
    744 
    745 		/* TODO: establish reason dest fabric id is offset by 7 */
    746 		xgmi_node_id = xgmi_node_id >> 7;
    747 
    748 		if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
    749 			break;
    750 	}
    751 
    752 	if (df_inst_id == DF_3_6_INST_CNT) {
    753 		DRM_WARN("cant match df dst id with gpu node");
    754 		return 0;
    755 	}
    756 
    757 	return df_inst_id;
    758 }
    759 
    760 const struct amdgpu_df_funcs df_v3_6_funcs = {
    761 	.sw_init = df_v3_6_sw_init,
    762 	.sw_fini = df_v3_6_sw_fini,
    763 	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
    764 	.get_fb_channel_number = df_v3_6_get_fb_channel_number,
    765 	.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
    766 	.update_medium_grain_clock_gating =
    767 			df_v3_6_update_medium_grain_clock_gating,
    768 	.get_clockgating_state = df_v3_6_get_clockgating_state,
    769 	.pmc_start = df_v3_6_pmc_start,
    770 	.pmc_stop = df_v3_6_pmc_stop,
    771 	.pmc_get_count = df_v3_6_pmc_get_count,
    772 	.get_fica = df_v3_6_get_fica,
    773 	.set_fica = df_v3_6_set_fica,
    774 	.get_dram_base_addr = df_v3_6_get_dram_base_addr,
    775 	.get_df_inst_id = df_v3_6_get_df_inst_id
    776 };
    777