Home | History | Annotate | Line # | Download | only in gt
intel_rps.c revision 1.2
      1 /*	$NetBSD: intel_rps.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2019 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: intel_rps.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $");
     11 
     12 #include "i915_drv.h"
     13 #include "intel_gt.h"
     14 #include "intel_gt_irq.h"
     15 #include "intel_gt_pm_irq.h"
     16 #include "intel_rps.h"
     17 #include "intel_sideband.h"
     18 #include "../../../platform/x86/intel_ips.h"
     19 
     20 /*
     21  * Lock protecting IPS related data structures
     22  */
     23 #ifdef __NetBSD__
     24 spinlock_t mchdev_lock;
     25 #else
     26 static DEFINE_SPINLOCK(mchdev_lock);
     27 #endif
     28 
     29 static struct intel_gt *rps_to_gt(struct intel_rps *rps)
     30 {
     31 	return container_of(rps, struct intel_gt, rps);
     32 }
     33 
     34 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
     35 {
     36 	return rps_to_gt(rps)->i915;
     37 }
     38 
     39 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
     40 {
     41 	return rps_to_gt(rps)->uncore;
     42 }
     43 
     44 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
     45 {
     46 	return mask & ~rps->pm_intrmsk_mbz;
     47 }
     48 
     49 static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
     50 {
     51 	intel_uncore_write_fw(uncore, reg, val);
     52 }
     53 
     54 static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
     55 {
     56 	u32 mask = 0;
     57 
     58 	/* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
     59 	if (val > rps->min_freq_softlimit)
     60 		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
     61 			 GEN6_PM_RP_DOWN_THRESHOLD |
     62 			 GEN6_PM_RP_DOWN_TIMEOUT);
     63 
     64 	if (val < rps->max_freq_softlimit)
     65 		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
     66 
     67 	mask &= rps->pm_events;
     68 
     69 	return rps_pm_sanitize_mask(rps, ~mask);
     70 }
     71 
     72 static void rps_reset_ei(struct intel_rps *rps)
     73 {
     74 	memset(&rps->ei, 0, sizeof(rps->ei));
     75 }
     76 
     77 static void rps_enable_interrupts(struct intel_rps *rps)
     78 {
     79 	struct intel_gt *gt = rps_to_gt(rps);
     80 
     81 	rps_reset_ei(rps);
     82 
     83 	if (IS_VALLEYVIEW(gt->i915))
     84 		/* WaGsvRC0ResidencyMethod:vlv */
     85 		rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
     86 	else
     87 		rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
     88 				  GEN6_PM_RP_DOWN_THRESHOLD |
     89 				  GEN6_PM_RP_DOWN_TIMEOUT);
     90 
     91 	spin_lock_irq(&gt->irq_lock);
     92 	gen6_gt_pm_enable_irq(gt, rps->pm_events);
     93 	spin_unlock_irq(&gt->irq_lock);
     94 
     95 	set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
     96 }
     97 
     98 static void gen6_rps_reset_interrupts(struct intel_rps *rps)
     99 {
    100 	gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
    101 }
    102 
    103 static void gen11_rps_reset_interrupts(struct intel_rps *rps)
    104 {
    105 	while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
    106 		;
    107 }
    108 
    109 static void rps_reset_interrupts(struct intel_rps *rps)
    110 {
    111 	struct intel_gt *gt = rps_to_gt(rps);
    112 
    113 	spin_lock_irq(&gt->irq_lock);
    114 	if (INTEL_GEN(gt->i915) >= 11)
    115 		gen11_rps_reset_interrupts(rps);
    116 	else
    117 		gen6_rps_reset_interrupts(rps);
    118 
    119 	rps->pm_iir = 0;
    120 	spin_unlock_irq(&gt->irq_lock);
    121 }
    122 
    123 static void rps_disable_interrupts(struct intel_rps *rps)
    124 {
    125 	struct intel_gt *gt = rps_to_gt(rps);
    126 
    127 	rps->pm_events = 0;
    128 
    129 	set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
    130 
    131 	spin_lock_irq(&gt->irq_lock);
    132 	gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
    133 	spin_unlock_irq(&gt->irq_lock);
    134 
    135 	intel_synchronize_irq(gt->i915);
    136 
    137 	/*
    138 	 * Now that we will not be generating any more work, flush any
    139 	 * outstanding tasks. As we are called on the RPS idle path,
    140 	 * we will reset the GPU to minimum frequencies, so the current
    141 	 * state of the worker can be discarded.
    142 	 */
    143 	cancel_work_sync(&rps->work);
    144 
    145 	rps_reset_interrupts(rps);
    146 }
    147 
    148 static const struct cparams {
    149 	u16 i;
    150 	u16 t;
    151 	u16 m;
    152 	u16 c;
    153 } cparams[] = {
    154 	{ 1, 1333, 301, 28664 },
    155 	{ 1, 1066, 294, 24460 },
    156 	{ 1, 800, 294, 25192 },
    157 	{ 0, 1333, 276, 27605 },
    158 	{ 0, 1066, 276, 27605 },
    159 	{ 0, 800, 231, 23784 },
    160 };
    161 
    162 static void gen5_rps_init(struct intel_rps *rps)
    163 {
    164 	struct drm_i915_private *i915 = rps_to_i915(rps);
    165 	struct intel_uncore *uncore = rps_to_uncore(rps);
    166 	u8 fmax, fmin, fstart;
    167 	u32 rgvmodectl;
    168 	int c_m, i;
    169 
    170 	if (i915->fsb_freq <= 3200)
    171 		c_m = 0;
    172 	else if (i915->fsb_freq <= 4800)
    173 		c_m = 1;
    174 	else
    175 		c_m = 2;
    176 
    177 	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
    178 		if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
    179 			rps->ips.m = cparams[i].m;
    180 			rps->ips.c = cparams[i].c;
    181 			break;
    182 		}
    183 	}
    184 
    185 	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
    186 
    187 	/* Set up min, max, and cur for interrupt handling */
    188 	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
    189 	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
    190 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
    191 		MEMMODE_FSTART_SHIFT;
    192 	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
    193 			 fmax, fmin, fstart);
    194 
    195 	rps->min_freq = fmax;
    196 	rps->max_freq = fmin;
    197 
    198 	rps->idle_freq = rps->min_freq;
    199 	rps->cur_freq = rps->idle_freq;
    200 }
    201 
    202 static unsigned long
    203 __ips_chipset_val(struct intel_ips *ips)
    204 {
    205 	struct intel_uncore *uncore =
    206 		rps_to_uncore(container_of(ips, struct intel_rps, ips));
    207 	unsigned long now = jiffies_to_msecs(jiffies), dt;
    208 	unsigned long result;
    209 	u64 total, delta;
    210 
    211 	lockdep_assert_held(&mchdev_lock);
    212 
    213 	/*
    214 	 * Prevent division-by-zero if we are asking too fast.
    215 	 * Also, we don't get interesting results if we are polling
    216 	 * faster than once in 10ms, so just return the saved value
    217 	 * in such cases.
    218 	 */
    219 	dt = now - ips->last_time1;
    220 	if (dt <= 10)
    221 		return ips->chipset_power;
    222 
    223 	/* FIXME: handle per-counter overflow */
    224 	total = intel_uncore_read(uncore, DMIEC);
    225 	total += intel_uncore_read(uncore, DDREC);
    226 	total += intel_uncore_read(uncore, CSIEC);
    227 
    228 	delta = total - ips->last_count1;
    229 
    230 	result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
    231 
    232 	ips->last_count1 = total;
    233 	ips->last_time1 = now;
    234 
    235 	ips->chipset_power = result;
    236 
    237 	return result;
    238 }
    239 
    240 static unsigned long ips_mch_val(struct intel_uncore *uncore)
    241 {
    242 	unsigned int m, x, b;
    243 	u32 tsfs;
    244 
    245 	tsfs = intel_uncore_read(uncore, TSFS);
    246 	x = intel_uncore_read8(uncore, TR1);
    247 
    248 	b = tsfs & TSFS_INTR_MASK;
    249 	m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
    250 
    251 	return m * x / 127 - b;
    252 }
    253 
    254 static int _pxvid_to_vd(u8 pxvid)
    255 {
    256 	if (pxvid == 0)
    257 		return 0;
    258 
    259 	if (pxvid >= 8 && pxvid < 31)
    260 		pxvid = 31;
    261 
    262 	return (pxvid + 2) * 125;
    263 }
    264 
    265 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
    266 {
    267 	const int vd = _pxvid_to_vd(pxvid);
    268 
    269 	if (INTEL_INFO(i915)->is_mobile)
    270 		return max(vd - 1125, 0);
    271 
    272 	return vd;
    273 }
    274 
    275 static void __gen5_ips_update(struct intel_ips *ips)
    276 {
    277 	struct intel_uncore *uncore =
    278 		rps_to_uncore(container_of(ips, struct intel_rps, ips));
    279 	u64 now, delta, dt;
    280 	u32 count;
    281 
    282 	lockdep_assert_held(&mchdev_lock);
    283 
    284 	now = ktime_get_raw_ns();
    285 	dt = now - ips->last_time2;
    286 	do_div(dt, NSEC_PER_MSEC);
    287 
    288 	/* Don't divide by 0 */
    289 	if (dt <= 10)
    290 		return;
    291 
    292 	count = intel_uncore_read(uncore, GFXEC);
    293 	delta = count - ips->last_count2;
    294 
    295 	ips->last_count2 = count;
    296 	ips->last_time2 = now;
    297 
    298 	/* More magic constants... */
    299 	ips->gfx_power = div_u64(delta * 1181, dt * 10);
    300 }
    301 
    302 static void gen5_rps_update(struct intel_rps *rps)
    303 {
    304 	spin_lock_irq(&mchdev_lock);
    305 	__gen5_ips_update(&rps->ips);
    306 	spin_unlock_irq(&mchdev_lock);
    307 }
    308 
    309 static bool gen5_rps_set(struct intel_rps *rps, u8 val)
    310 {
    311 	struct intel_uncore *uncore = rps_to_uncore(rps);
    312 	u16 rgvswctl;
    313 
    314 	lockdep_assert_held(&mchdev_lock);
    315 
    316 	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
    317 	if (rgvswctl & MEMCTL_CMD_STS) {
    318 		DRM_DEBUG("gpu busy, RCS change rejected\n");
    319 		return false; /* still busy with another command */
    320 	}
    321 
    322 	/* Invert the frequency bin into an ips delay */
    323 	val = rps->max_freq - val;
    324 	val = rps->min_freq + val;
    325 
    326 	rgvswctl =
    327 		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
    328 		(val << MEMCTL_FREQ_SHIFT) |
    329 		MEMCTL_SFCAVM;
    330 	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
    331 	intel_uncore_posting_read16(uncore, MEMSWCTL);
    332 
    333 	rgvswctl |= MEMCTL_CMD_STS;
    334 	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
    335 
    336 	return true;
    337 }
    338 
    339 static unsigned long intel_pxfreq(u32 vidfreq)
    340 {
    341 	int div = (vidfreq & 0x3f0000) >> 16;
    342 	int post = (vidfreq & 0x3000) >> 12;
    343 	int pre = (vidfreq & 0x7);
    344 
    345 	if (!pre)
    346 		return 0;
    347 
    348 	return div * 133333 / (pre << post);
    349 }
    350 
    351 static unsigned int init_emon(struct intel_uncore *uncore)
    352 {
    353 	u8 pxw[16];
    354 	int i;
    355 
    356 	/* Disable to program */
    357 	intel_uncore_write(uncore, ECR, 0);
    358 	intel_uncore_posting_read(uncore, ECR);
    359 
    360 	/* Program energy weights for various events */
    361 	intel_uncore_write(uncore, SDEW, 0x15040d00);
    362 	intel_uncore_write(uncore, CSIEW0, 0x007f0000);
    363 	intel_uncore_write(uncore, CSIEW1, 0x1e220004);
    364 	intel_uncore_write(uncore, CSIEW2, 0x04000004);
    365 
    366 	for (i = 0; i < 5; i++)
    367 		intel_uncore_write(uncore, PEW(i), 0);
    368 	for (i = 0; i < 3; i++)
    369 		intel_uncore_write(uncore, DEW(i), 0);
    370 
    371 	/* Program P-state weights to account for frequency power adjustment */
    372 	for (i = 0; i < 16; i++) {
    373 		u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
    374 		unsigned int freq = intel_pxfreq(pxvidfreq);
    375 		unsigned int vid =
    376 			(pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
    377 		unsigned int val;
    378 
    379 		val = vid * vid * freq / 1000 * 255;
    380 		val /= 127 * 127 * 900;
    381 
    382 		pxw[i] = val;
    383 	}
    384 	/* Render standby states get 0 weight */
    385 	pxw[14] = 0;
    386 	pxw[15] = 0;
    387 
    388 	for (i = 0; i < 4; i++) {
    389 		intel_uncore_write(uncore, PXW(i),
    390 				   pxw[i * 4 + 0] << 24 |
    391 				   pxw[i * 4 + 1] << 16 |
    392 				   pxw[i * 4 + 2] <<  8 |
    393 				   pxw[i * 4 + 3] <<  0);
    394 	}
    395 
    396 	/* Adjust magic regs to magic values (more experimental results) */
    397 	intel_uncore_write(uncore, OGW0, 0);
    398 	intel_uncore_write(uncore, OGW1, 0);
    399 	intel_uncore_write(uncore, EG0, 0x00007f00);
    400 	intel_uncore_write(uncore, EG1, 0x0000000e);
    401 	intel_uncore_write(uncore, EG2, 0x000e0000);
    402 	intel_uncore_write(uncore, EG3, 0x68000300);
    403 	intel_uncore_write(uncore, EG4, 0x42000000);
    404 	intel_uncore_write(uncore, EG5, 0x00140031);
    405 	intel_uncore_write(uncore, EG6, 0);
    406 	intel_uncore_write(uncore, EG7, 0);
    407 
    408 	for (i = 0; i < 8; i++)
    409 		intel_uncore_write(uncore, PXWL(i), 0);
    410 
    411 	/* Enable PMON + select events */
    412 	intel_uncore_write(uncore, ECR, 0x80000019);
    413 
    414 	return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
    415 }
    416 
    417 static bool gen5_rps_enable(struct intel_rps *rps)
    418 {
    419 	struct intel_uncore *uncore = rps_to_uncore(rps);
    420 	u8 fstart, vstart;
    421 	u32 rgvmodectl;
    422 
    423 	spin_lock_irq(&mchdev_lock);
    424 
    425 	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
    426 
    427 	/* Enable temp reporting */
    428 	intel_uncore_write16(uncore, PMMISC,
    429 			     intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
    430 	intel_uncore_write16(uncore, TSC1,
    431 			     intel_uncore_read16(uncore, TSC1) | TSE);
    432 
    433 	/* 100ms RC evaluation intervals */
    434 	intel_uncore_write(uncore, RCUPEI, 100000);
    435 	intel_uncore_write(uncore, RCDNEI, 100000);
    436 
    437 	/* Set max/min thresholds to 90ms and 80ms respectively */
    438 	intel_uncore_write(uncore, RCBMAXAVG, 90000);
    439 	intel_uncore_write(uncore, RCBMINAVG, 80000);
    440 
    441 	intel_uncore_write(uncore, MEMIHYST, 1);
    442 
    443 	/* Set up min, max, and cur for interrupt handling */
    444 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
    445 		MEMMODE_FSTART_SHIFT;
    446 
    447 	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
    448 		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
    449 
    450 	intel_uncore_write(uncore,
    451 			   MEMINTREN,
    452 			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
    453 
    454 	intel_uncore_write(uncore, VIDSTART, vstart);
    455 	intel_uncore_posting_read(uncore, VIDSTART);
    456 
    457 	rgvmodectl |= MEMMODE_SWMODE_EN;
    458 	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
    459 
    460 	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
    461 			     MEMCTL_CMD_STS) == 0, 10))
    462 		DRM_ERROR("stuck trying to change perf mode\n");
    463 	mdelay(1);
    464 
    465 	gen5_rps_set(rps, rps->cur_freq);
    466 
    467 	rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
    468 	rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
    469 	rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
    470 	rps->ips.last_time1 = jiffies_to_msecs(jiffies);
    471 
    472 	rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
    473 	rps->ips.last_time2 = ktime_get_raw_ns();
    474 
    475 	spin_unlock_irq(&mchdev_lock);
    476 
    477 	rps->ips.corr = init_emon(uncore);
    478 
    479 	return true;
    480 }
    481 
    482 static void gen5_rps_disable(struct intel_rps *rps)
    483 {
    484 	struct intel_uncore *uncore = rps_to_uncore(rps);
    485 	u16 rgvswctl;
    486 
    487 	spin_lock_irq(&mchdev_lock);
    488 
    489 	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
    490 
    491 	/* Ack interrupts, disable EFC interrupt */
    492 	intel_uncore_write(uncore, MEMINTREN,
    493 			   intel_uncore_read(uncore, MEMINTREN) &
    494 			   ~MEMINT_EVAL_CHG_EN);
    495 	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
    496 	intel_uncore_write(uncore, DEIER,
    497 			   intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
    498 	intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
    499 	intel_uncore_write(uncore, DEIMR,
    500 			   intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
    501 
    502 	/* Go back to the starting frequency */
    503 	gen5_rps_set(rps, rps->idle_freq);
    504 	mdelay(1);
    505 	rgvswctl |= MEMCTL_CMD_STS;
    506 	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
    507 	mdelay(1);
    508 
    509 	spin_unlock_irq(&mchdev_lock);
    510 }
    511 
    512 static u32 rps_limits(struct intel_rps *rps, u8 val)
    513 {
    514 	u32 limits;
    515 
    516 	/*
    517 	 * Only set the down limit when we've reached the lowest level to avoid
    518 	 * getting more interrupts, otherwise leave this clear. This prevents a
    519 	 * race in the hw when coming out of rc6: There's a tiny window where
    520 	 * the hw runs at the minimal clock before selecting the desired
    521 	 * frequency, if the down threshold expires in that window we will not
    522 	 * receive a down interrupt.
    523 	 */
    524 	if (INTEL_GEN(rps_to_i915(rps)) >= 9) {
    525 		limits = rps->max_freq_softlimit << 23;
    526 		if (val <= rps->min_freq_softlimit)
    527 			limits |= rps->min_freq_softlimit << 14;
    528 	} else {
    529 		limits = rps->max_freq_softlimit << 24;
    530 		if (val <= rps->min_freq_softlimit)
    531 			limits |= rps->min_freq_softlimit << 16;
    532 	}
    533 
    534 	return limits;
    535 }
    536 
    537 static void rps_set_power(struct intel_rps *rps, int new_power)
    538 {
    539 	struct intel_uncore *uncore = rps_to_uncore(rps);
    540 	struct drm_i915_private *i915 = rps_to_i915(rps);
    541 	u32 threshold_up = 0, threshold_down = 0; /* in % */
    542 	u32 ei_up = 0, ei_down = 0;
    543 
    544 	lockdep_assert_held(&rps->power.mutex);
    545 
    546 	if (new_power == rps->power.mode)
    547 		return;
    548 
    549 	/* Note the units here are not exactly 1us, but 1280ns. */
    550 	switch (new_power) {
    551 	case LOW_POWER:
    552 		/* Upclock if more than 95% busy over 16ms */
    553 		ei_up = 16000;
    554 		threshold_up = 95;
    555 
    556 		/* Downclock if less than 85% busy over 32ms */
    557 		ei_down = 32000;
    558 		threshold_down = 85;
    559 		break;
    560 
    561 	case BETWEEN:
    562 		/* Upclock if more than 90% busy over 13ms */
    563 		ei_up = 13000;
    564 		threshold_up = 90;
    565 
    566 		/* Downclock if less than 75% busy over 32ms */
    567 		ei_down = 32000;
    568 		threshold_down = 75;
    569 		break;
    570 
    571 	case HIGH_POWER:
    572 		/* Upclock if more than 85% busy over 10ms */
    573 		ei_up = 10000;
    574 		threshold_up = 85;
    575 
    576 		/* Downclock if less than 60% busy over 32ms */
    577 		ei_down = 32000;
    578 		threshold_down = 60;
    579 		break;
    580 	}
    581 
    582 	/* When byt can survive without system hang with dynamic
    583 	 * sw freq adjustments, this restriction can be lifted.
    584 	 */
    585 	if (IS_VALLEYVIEW(i915))
    586 		goto skip_hw_write;
    587 
    588 	set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
    589 	set(uncore, GEN6_RP_UP_THRESHOLD,
    590 	    GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
    591 
    592 	set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
    593 	set(uncore, GEN6_RP_DOWN_THRESHOLD,
    594 	    GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
    595 
    596 	set(uncore, GEN6_RP_CONTROL,
    597 	    (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
    598 	    GEN6_RP_MEDIA_HW_NORMAL_MODE |
    599 	    GEN6_RP_MEDIA_IS_GFX |
    600 	    GEN6_RP_ENABLE |
    601 	    GEN6_RP_UP_BUSY_AVG |
    602 	    GEN6_RP_DOWN_IDLE_AVG);
    603 
    604 skip_hw_write:
    605 	rps->power.mode = new_power;
    606 	rps->power.up_threshold = threshold_up;
    607 	rps->power.down_threshold = threshold_down;
    608 }
    609 
    610 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
    611 {
    612 	int new_power;
    613 
    614 	new_power = rps->power.mode;
    615 	switch (rps->power.mode) {
    616 	case LOW_POWER:
    617 		if (val > rps->efficient_freq + 1 &&
    618 		    val > rps->cur_freq)
    619 			new_power = BETWEEN;
    620 		break;
    621 
    622 	case BETWEEN:
    623 		if (val <= rps->efficient_freq &&
    624 		    val < rps->cur_freq)
    625 			new_power = LOW_POWER;
    626 		else if (val >= rps->rp0_freq &&
    627 			 val > rps->cur_freq)
    628 			new_power = HIGH_POWER;
    629 		break;
    630 
    631 	case HIGH_POWER:
    632 		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
    633 		    val < rps->cur_freq)
    634 			new_power = BETWEEN;
    635 		break;
    636 	}
    637 	/* Max/min bins are special */
    638 	if (val <= rps->min_freq_softlimit)
    639 		new_power = LOW_POWER;
    640 	if (val >= rps->max_freq_softlimit)
    641 		new_power = HIGH_POWER;
    642 
    643 	mutex_lock(&rps->power.mutex);
    644 	if (rps->power.interactive)
    645 		new_power = HIGH_POWER;
    646 	rps_set_power(rps, new_power);
    647 	mutex_unlock(&rps->power.mutex);
    648 }
    649 
    650 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
    651 {
    652 	mutex_lock(&rps->power.mutex);
    653 	if (interactive) {
    654 		if (!rps->power.interactive++ && rps->active)
    655 			rps_set_power(rps, HIGH_POWER);
    656 	} else {
    657 		GEM_BUG_ON(!rps->power.interactive);
    658 		rps->power.interactive--;
    659 	}
    660 	mutex_unlock(&rps->power.mutex);
    661 }
    662 
    663 static int gen6_rps_set(struct intel_rps *rps, u8 val)
    664 {
    665 	struct intel_uncore *uncore = rps_to_uncore(rps);
    666 	struct drm_i915_private *i915 = rps_to_i915(rps);
    667 	u32 swreq;
    668 
    669 	if (INTEL_GEN(i915) >= 9)
    670 		swreq = GEN9_FREQUENCY(val);
    671 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
    672 		swreq = HSW_FREQUENCY(val);
    673 	else
    674 		swreq = (GEN6_FREQUENCY(val) |
    675 			 GEN6_OFFSET(0) |
    676 			 GEN6_AGGRESSIVE_TURBO);
    677 	set(uncore, GEN6_RPNSWREQ, swreq);
    678 
    679 	return 0;
    680 }
    681 
    682 static int vlv_rps_set(struct intel_rps *rps, u8 val)
    683 {
    684 	struct drm_i915_private *i915 = rps_to_i915(rps);
    685 	int err;
    686 
    687 	vlv_punit_get(i915);
    688 	err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
    689 	vlv_punit_put(i915);
    690 
    691 	return err;
    692 }
    693 
    694 static int rps_set(struct intel_rps *rps, u8 val, bool update)
    695 {
    696 	struct drm_i915_private *i915 = rps_to_i915(rps);
    697 	int err;
    698 
    699 	if (INTEL_GEN(i915) < 6)
    700 		return 0;
    701 
    702 	if (val == rps->last_freq)
    703 		return 0;
    704 
    705 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
    706 		err = vlv_rps_set(rps, val);
    707 	else
    708 		err = gen6_rps_set(rps, val);
    709 	if (err)
    710 		return err;
    711 
    712 	if (update)
    713 		gen6_rps_set_thresholds(rps, val);
    714 	rps->last_freq = val;
    715 
    716 	return 0;
    717 }
    718 
    719 void intel_rps_unpark(struct intel_rps *rps)
    720 {
    721 	u8 freq;
    722 
    723 	if (!rps->enabled)
    724 		return;
    725 
    726 	/*
    727 	 * Use the user's desired frequency as a guide, but for better
    728 	 * performance, jump directly to RPe as our starting frequency.
    729 	 */
    730 	mutex_lock(&rps->lock);
    731 	rps->active = true;
    732 	freq = max(rps->cur_freq, rps->efficient_freq),
    733 	freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
    734 	intel_rps_set(rps, freq);
    735 	rps->last_adj = 0;
    736 	mutex_unlock(&rps->lock);
    737 
    738 	if (INTEL_GEN(rps_to_i915(rps)) >= 6)
    739 		rps_enable_interrupts(rps);
    740 
    741 	if (IS_GEN(rps_to_i915(rps), 5))
    742 		gen5_rps_update(rps);
    743 }
    744 
    745 void intel_rps_park(struct intel_rps *rps)
    746 {
    747 	struct drm_i915_private *i915 = rps_to_i915(rps);
    748 
    749 	if (!rps->enabled)
    750 		return;
    751 
    752 	if (INTEL_GEN(i915) >= 6)
    753 		rps_disable_interrupts(rps);
    754 
    755 	rps->active = false;
    756 	if (rps->last_freq <= rps->idle_freq)
    757 		return;
    758 
    759 	/*
    760 	 * The punit delays the write of the frequency and voltage until it
    761 	 * determines the GPU is awake. During normal usage we don't want to
    762 	 * waste power changing the frequency if the GPU is sleeping (rc6).
    763 	 * However, the GPU and driver is now idle and we do not want to delay
    764 	 * switching to minimum voltage (reducing power whilst idle) as we do
    765 	 * not expect to be woken in the near future and so must flush the
    766 	 * change by waking the device.
    767 	 *
    768 	 * We choose to take the media powerwell (either would do to trick the
    769 	 * punit into committing the voltage change) as that takes a lot less
    770 	 * power than the render powerwell.
    771 	 */
    772 	intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
    773 	rps_set(rps, rps->idle_freq, false);
    774 	intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
    775 }
    776 
    777 void intel_rps_boost(struct i915_request *rq)
    778 {
    779 	struct intel_rps *rps = &rq->engine->gt->rps;
    780 	unsigned long flags;
    781 
    782 	if (i915_request_signaled(rq) || !rps->active)
    783 		return;
    784 
    785 	/* Serializes with i915_request_retire() */
    786 	spin_lock_irqsave(&rq->lock, flags);
    787 	if (!i915_request_has_waitboost(rq) &&
    788 	    !dma_fence_is_signaled_locked(&rq->fence)) {
    789 		set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
    790 
    791 		if (!atomic_fetch_inc(&rps->num_waiters) &&
    792 		    READ_ONCE(rps->cur_freq) < rps->boost_freq)
    793 			schedule_work(&rps->work);
    794 
    795 		atomic_inc(&rps->boosts);
    796 	}
    797 	spin_unlock_irqrestore(&rq->lock, flags);
    798 }
    799 
    800 int intel_rps_set(struct intel_rps *rps, u8 val)
    801 {
    802 	int err;
    803 
    804 	lockdep_assert_held(&rps->lock);
    805 	GEM_BUG_ON(val > rps->max_freq);
    806 	GEM_BUG_ON(val < rps->min_freq);
    807 
    808 	if (rps->active) {
    809 		err = rps_set(rps, val, true);
    810 		if (err)
    811 			return err;
    812 
    813 		/*
    814 		 * Make sure we continue to get interrupts
    815 		 * until we hit the minimum or maximum frequencies.
    816 		 */
    817 		if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
    818 			struct intel_uncore *uncore = rps_to_uncore(rps);
    819 
    820 			set(uncore,
    821 			    GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
    822 
    823 			set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
    824 		}
    825 	}
    826 
    827 	rps->cur_freq = val;
    828 	return 0;
    829 }
    830 
    831 static void gen6_rps_init(struct intel_rps *rps)
    832 {
    833 	struct drm_i915_private *i915 = rps_to_i915(rps);
    834 	struct intel_uncore *uncore = rps_to_uncore(rps);
    835 
    836 	/* All of these values are in units of 50MHz */
    837 
    838 	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
    839 	if (IS_GEN9_LP(i915)) {
    840 		u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
    841 
    842 		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
    843 		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
    844 		rps->min_freq = (rp_state_cap >>  0) & 0xff;
    845 	} else {
    846 		u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
    847 
    848 		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
    849 		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
    850 		rps->min_freq = (rp_state_cap >> 16) & 0xff;
    851 	}
    852 
    853 	/* hw_max = RP0 until we check for overclocking */
    854 	rps->max_freq = rps->rp0_freq;
    855 
    856 	rps->efficient_freq = rps->rp1_freq;
    857 	if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
    858 	    IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
    859 		u32 ddcc_status = 0;
    860 
    861 		if (sandybridge_pcode_read(i915,
    862 					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
    863 					   &ddcc_status, NULL) == 0)
    864 			rps->efficient_freq =
    865 				clamp_t(u8,
    866 					(ddcc_status >> 8) & 0xff,
    867 					rps->min_freq,
    868 					rps->max_freq);
    869 	}
    870 
    871 	if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
    872 		/* Store the frequency values in 16.66 MHZ units, which is
    873 		 * the natural hardware unit for SKL
    874 		 */
    875 		rps->rp0_freq *= GEN9_FREQ_SCALER;
    876 		rps->rp1_freq *= GEN9_FREQ_SCALER;
    877 		rps->min_freq *= GEN9_FREQ_SCALER;
    878 		rps->max_freq *= GEN9_FREQ_SCALER;
    879 		rps->efficient_freq *= GEN9_FREQ_SCALER;
    880 	}
    881 }
    882 
    883 static bool rps_reset(struct intel_rps *rps)
    884 {
    885 	/* force a reset */
    886 	rps->power.mode = -1;
    887 	rps->last_freq = -1;
    888 
    889 	if (rps_set(rps, rps->min_freq, true)) {
    890 		DRM_ERROR("Failed to reset RPS to initial values\n");
    891 		return false;
    892 	}
    893 
    894 	rps->cur_freq = rps->min_freq;
    895 	return true;
    896 }
    897 
    898 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
    899 static bool gen9_rps_enable(struct intel_rps *rps)
    900 {
    901 	struct drm_i915_private *i915 = rps_to_i915(rps);
    902 	struct intel_uncore *uncore = rps_to_uncore(rps);
    903 
    904 	/* Program defaults and thresholds for RPS */
    905 	if (IS_GEN(i915, 9))
    906 		intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
    907 				      GEN9_FREQUENCY(rps->rp1_freq));
    908 
    909 	/* 1 second timeout */
    910 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
    911 			      GT_INTERVAL_FROM_US(i915, 1000000));
    912 
    913 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
    914 
    915 	return rps_reset(rps);
    916 }
    917 
    918 static bool gen8_rps_enable(struct intel_rps *rps)
    919 {
    920 	struct intel_uncore *uncore = rps_to_uncore(rps);
    921 
    922 	intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
    923 			      HSW_FREQUENCY(rps->rp1_freq));
    924 
    925 	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
    926 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
    927 			      100000000 / 128); /* 1 second timeout */
    928 
    929 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
    930 
    931 	return rps_reset(rps);
    932 }
    933 
    934 static bool gen6_rps_enable(struct intel_rps *rps)
    935 {
    936 	struct intel_uncore *uncore = rps_to_uncore(rps);
    937 
    938 	/* Power down if completely idle for over 50ms */
    939 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
    940 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
    941 
    942 	return rps_reset(rps);
    943 }
    944 
    945 static int chv_rps_max_freq(struct intel_rps *rps)
    946 {
    947 	struct drm_i915_private *i915 = rps_to_i915(rps);
    948 	u32 val;
    949 
    950 	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
    951 
    952 	switch (RUNTIME_INFO(i915)->sseu.eu_total) {
    953 	case 8:
    954 		/* (2 * 4) config */
    955 		val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
    956 		break;
    957 	case 12:
    958 		/* (2 * 6) config */
    959 		val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
    960 		break;
    961 	case 16:
    962 		/* (2 * 8) config */
    963 	default:
    964 		/* Setting (2 * 8) Min RP0 for any other combination */
    965 		val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
    966 		break;
    967 	}
    968 
    969 	return val & FB_GFX_FREQ_FUSE_MASK;
    970 }
    971 
    972 static int chv_rps_rpe_freq(struct intel_rps *rps)
    973 {
    974 	struct drm_i915_private *i915 = rps_to_i915(rps);
    975 	u32 val;
    976 
    977 	val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
    978 	val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
    979 
    980 	return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
    981 }
    982 
    983 static int chv_rps_guar_freq(struct intel_rps *rps)
    984 {
    985 	struct drm_i915_private *i915 = rps_to_i915(rps);
    986 	u32 val;
    987 
    988 	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
    989 
    990 	return val & FB_GFX_FREQ_FUSE_MASK;
    991 }
    992 
    993 static u32 chv_rps_min_freq(struct intel_rps *rps)
    994 {
    995 	struct drm_i915_private *i915 = rps_to_i915(rps);
    996 	u32 val;
    997 
    998 	val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
    999 	val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
   1000 
   1001 	return val & FB_GFX_FREQ_FUSE_MASK;
   1002 }
   1003 
   1004 static bool chv_rps_enable(struct intel_rps *rps)
   1005 {
   1006 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1007 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1008 	u32 val;
   1009 
   1010 	/* 1: Program defaults and thresholds for RPS*/
   1011 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
   1012 	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
   1013 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
   1014 	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
   1015 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
   1016 
   1017 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
   1018 
   1019 	/* 2: Enable RPS */
   1020 	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
   1021 			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
   1022 			      GEN6_RP_MEDIA_IS_GFX |
   1023 			      GEN6_RP_ENABLE |
   1024 			      GEN6_RP_UP_BUSY_AVG |
   1025 			      GEN6_RP_DOWN_IDLE_AVG);
   1026 
   1027 	/* Setting Fixed Bias */
   1028 	vlv_punit_get(i915);
   1029 
   1030 	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
   1031 	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
   1032 
   1033 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1034 
   1035 	vlv_punit_put(i915);
   1036 
   1037 	/* RPS code assumes GPLL is used */
   1038 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
   1039 
   1040 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
   1041 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
   1042 
   1043 	return rps_reset(rps);
   1044 }
   1045 
   1046 static int vlv_rps_guar_freq(struct intel_rps *rps)
   1047 {
   1048 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1049 	u32 val, rp1;
   1050 
   1051 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
   1052 
   1053 	rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
   1054 	rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
   1055 
   1056 	return rp1;
   1057 }
   1058 
   1059 static int vlv_rps_max_freq(struct intel_rps *rps)
   1060 {
   1061 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1062 	u32 val, rp0;
   1063 
   1064 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
   1065 
   1066 	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
   1067 	/* Clamp to max */
   1068 	rp0 = min_t(u32, rp0, 0xea);
   1069 
   1070 	return rp0;
   1071 }
   1072 
   1073 static int vlv_rps_rpe_freq(struct intel_rps *rps)
   1074 {
   1075 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1076 	u32 val, rpe;
   1077 
   1078 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
   1079 	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
   1080 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
   1081 	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
   1082 
   1083 	return rpe;
   1084 }
   1085 
   1086 static int vlv_rps_min_freq(struct intel_rps *rps)
   1087 {
   1088 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1089 	u32 val;
   1090 
   1091 	val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
   1092 	/*
   1093 	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
   1094 	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
   1095 	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
   1096 	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
   1097 	 * to make sure it matches what Punit accepts.
   1098 	 */
   1099 	return max_t(u32, val, 0xc0);
   1100 }
   1101 
   1102 static bool vlv_rps_enable(struct intel_rps *rps)
   1103 {
   1104 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1105 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1106 	u32 val;
   1107 
   1108 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
   1109 	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
   1110 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
   1111 	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
   1112 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
   1113 
   1114 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
   1115 
   1116 	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
   1117 			      GEN6_RP_MEDIA_TURBO |
   1118 			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
   1119 			      GEN6_RP_MEDIA_IS_GFX |
   1120 			      GEN6_RP_ENABLE |
   1121 			      GEN6_RP_UP_BUSY_AVG |
   1122 			      GEN6_RP_DOWN_IDLE_CONT);
   1123 
   1124 	vlv_punit_get(i915);
   1125 
   1126 	/* Setting Fixed Bias */
   1127 	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
   1128 	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
   1129 
   1130 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1131 
   1132 	vlv_punit_put(i915);
   1133 
   1134 	/* RPS code assumes GPLL is used */
   1135 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
   1136 
   1137 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
   1138 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
   1139 
   1140 	return rps_reset(rps);
   1141 }
   1142 
   1143 static unsigned long __ips_gfx_val(struct intel_ips *ips)
   1144 {
   1145 	struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
   1146 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1147 	unsigned long t, corr, state1, corr2, state2;
   1148 	u32 pxvid, ext_v;
   1149 
   1150 	lockdep_assert_held(&mchdev_lock);
   1151 
   1152 	pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
   1153 	pxvid = (pxvid >> 24) & 0x7f;
   1154 	ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
   1155 
   1156 	state1 = ext_v;
   1157 
   1158 	/* Revel in the empirically derived constants */
   1159 
   1160 	/* Correction factor in 1/100000 units */
   1161 	t = ips_mch_val(uncore);
   1162 	if (t > 80)
   1163 		corr = t * 2349 + 135940;
   1164 	else if (t >= 50)
   1165 		corr = t * 964 + 29317;
   1166 	else /* < 50 */
   1167 		corr = t * 301 + 1004;
   1168 
   1169 	corr = corr * 150142 * state1 / 10000 - 78642;
   1170 	corr /= 100000;
   1171 	corr2 = corr * ips->corr;
   1172 
   1173 	state2 = corr2 * state1 / 10000;
   1174 	state2 /= 100; /* convert to mW */
   1175 
   1176 	__gen5_ips_update(ips);
   1177 
   1178 	return ips->gfx_power + state2;
   1179 }
   1180 
   1181 void intel_rps_enable(struct intel_rps *rps)
   1182 {
   1183 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1184 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1185 
   1186 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
   1187 	if (IS_CHERRYVIEW(i915))
   1188 		rps->enabled = chv_rps_enable(rps);
   1189 	else if (IS_VALLEYVIEW(i915))
   1190 		rps->enabled = vlv_rps_enable(rps);
   1191 	else if (INTEL_GEN(i915) >= 9)
   1192 		rps->enabled = gen9_rps_enable(rps);
   1193 	else if (INTEL_GEN(i915) >= 8)
   1194 		rps->enabled = gen8_rps_enable(rps);
   1195 	else if (INTEL_GEN(i915) >= 6)
   1196 		rps->enabled = gen6_rps_enable(rps);
   1197 	else if (IS_IRONLAKE_M(i915))
   1198 		rps->enabled = gen5_rps_enable(rps);
   1199 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
   1200 	if (!rps->enabled)
   1201 		return;
   1202 
   1203 	WARN_ON(rps->max_freq < rps->min_freq);
   1204 	WARN_ON(rps->idle_freq > rps->max_freq);
   1205 
   1206 	WARN_ON(rps->efficient_freq < rps->min_freq);
   1207 	WARN_ON(rps->efficient_freq > rps->max_freq);
   1208 }
   1209 
   1210 static void gen6_rps_disable(struct intel_rps *rps)
   1211 {
   1212 	set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
   1213 }
   1214 
   1215 void intel_rps_disable(struct intel_rps *rps)
   1216 {
   1217 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1218 
   1219 	rps->enabled = false;
   1220 
   1221 	if (INTEL_GEN(i915) >= 6)
   1222 		gen6_rps_disable(rps);
   1223 	else if (IS_IRONLAKE_M(i915))
   1224 		gen5_rps_disable(rps);
   1225 }
   1226 
   1227 static int byt_gpu_freq(struct intel_rps *rps, int val)
   1228 {
   1229 	/*
   1230 	 * N = val - 0xb7
   1231 	 * Slow = Fast = GPLL ref * N
   1232 	 */
   1233 	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
   1234 }
   1235 
   1236 static int byt_freq_opcode(struct intel_rps *rps, int val)
   1237 {
   1238 	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
   1239 }
   1240 
   1241 static int chv_gpu_freq(struct intel_rps *rps, int val)
   1242 {
   1243 	/*
   1244 	 * N = val / 2
   1245 	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
   1246 	 */
   1247 	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
   1248 }
   1249 
   1250 static int chv_freq_opcode(struct intel_rps *rps, int val)
   1251 {
   1252 	/* CHV needs even values */
   1253 	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
   1254 }
   1255 
   1256 int intel_gpu_freq(struct intel_rps *rps, int val)
   1257 {
   1258 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1259 
   1260 	if (INTEL_GEN(i915) >= 9)
   1261 		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
   1262 					 GEN9_FREQ_SCALER);
   1263 	else if (IS_CHERRYVIEW(i915))
   1264 		return chv_gpu_freq(rps, val);
   1265 	else if (IS_VALLEYVIEW(i915))
   1266 		return byt_gpu_freq(rps, val);
   1267 	else
   1268 		return val * GT_FREQUENCY_MULTIPLIER;
   1269 }
   1270 
   1271 int intel_freq_opcode(struct intel_rps *rps, int val)
   1272 {
   1273 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1274 
   1275 	if (INTEL_GEN(i915) >= 9)
   1276 		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
   1277 					 GT_FREQUENCY_MULTIPLIER);
   1278 	else if (IS_CHERRYVIEW(i915))
   1279 		return chv_freq_opcode(rps, val);
   1280 	else if (IS_VALLEYVIEW(i915))
   1281 		return byt_freq_opcode(rps, val);
   1282 	else
   1283 		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
   1284 }
   1285 
   1286 static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
   1287 {
   1288 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1289 
   1290 	rps->gpll_ref_freq =
   1291 		vlv_get_cck_clock(i915, "GPLL ref",
   1292 				  CCK_GPLL_CLOCK_CONTROL,
   1293 				  i915->czclk_freq);
   1294 
   1295 	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
   1296 }
   1297 
   1298 static void vlv_rps_init(struct intel_rps *rps)
   1299 {
   1300 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1301 	u32 val;
   1302 
   1303 	vlv_iosf_sb_get(i915,
   1304 			BIT(VLV_IOSF_SB_PUNIT) |
   1305 			BIT(VLV_IOSF_SB_NC) |
   1306 			BIT(VLV_IOSF_SB_CCK));
   1307 
   1308 	vlv_init_gpll_ref_freq(rps);
   1309 
   1310 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1311 	switch ((val >> 6) & 3) {
   1312 	case 0:
   1313 	case 1:
   1314 		i915->mem_freq = 800;
   1315 		break;
   1316 	case 2:
   1317 		i915->mem_freq = 1066;
   1318 		break;
   1319 	case 3:
   1320 		i915->mem_freq = 1333;
   1321 		break;
   1322 	}
   1323 	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
   1324 
   1325 	rps->max_freq = vlv_rps_max_freq(rps);
   1326 	rps->rp0_freq = rps->max_freq;
   1327 	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
   1328 			 intel_gpu_freq(rps, rps->max_freq),
   1329 			 rps->max_freq);
   1330 
   1331 	rps->efficient_freq = vlv_rps_rpe_freq(rps);
   1332 	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
   1333 			 intel_gpu_freq(rps, rps->efficient_freq),
   1334 			 rps->efficient_freq);
   1335 
   1336 	rps->rp1_freq = vlv_rps_guar_freq(rps);
   1337 	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
   1338 			 intel_gpu_freq(rps, rps->rp1_freq),
   1339 			 rps->rp1_freq);
   1340 
   1341 	rps->min_freq = vlv_rps_min_freq(rps);
   1342 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
   1343 			 intel_gpu_freq(rps, rps->min_freq),
   1344 			 rps->min_freq);
   1345 
   1346 	vlv_iosf_sb_put(i915,
   1347 			BIT(VLV_IOSF_SB_PUNIT) |
   1348 			BIT(VLV_IOSF_SB_NC) |
   1349 			BIT(VLV_IOSF_SB_CCK));
   1350 }
   1351 
   1352 static void chv_rps_init(struct intel_rps *rps)
   1353 {
   1354 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1355 	u32 val;
   1356 
   1357 	vlv_iosf_sb_get(i915,
   1358 			BIT(VLV_IOSF_SB_PUNIT) |
   1359 			BIT(VLV_IOSF_SB_NC) |
   1360 			BIT(VLV_IOSF_SB_CCK));
   1361 
   1362 	vlv_init_gpll_ref_freq(rps);
   1363 
   1364 	val = vlv_cck_read(i915, CCK_FUSE_REG);
   1365 
   1366 	switch ((val >> 2) & 0x7) {
   1367 	case 3:
   1368 		i915->mem_freq = 2000;
   1369 		break;
   1370 	default:
   1371 		i915->mem_freq = 1600;
   1372 		break;
   1373 	}
   1374 	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
   1375 
   1376 	rps->max_freq = chv_rps_max_freq(rps);
   1377 	rps->rp0_freq = rps->max_freq;
   1378 	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
   1379 			 intel_gpu_freq(rps, rps->max_freq),
   1380 			 rps->max_freq);
   1381 
   1382 	rps->efficient_freq = chv_rps_rpe_freq(rps);
   1383 	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
   1384 			 intel_gpu_freq(rps, rps->efficient_freq),
   1385 			 rps->efficient_freq);
   1386 
   1387 	rps->rp1_freq = chv_rps_guar_freq(rps);
   1388 	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
   1389 			 intel_gpu_freq(rps, rps->rp1_freq),
   1390 			 rps->rp1_freq);
   1391 
   1392 	rps->min_freq = chv_rps_min_freq(rps);
   1393 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
   1394 			 intel_gpu_freq(rps, rps->min_freq),
   1395 			 rps->min_freq);
   1396 
   1397 	vlv_iosf_sb_put(i915,
   1398 			BIT(VLV_IOSF_SB_PUNIT) |
   1399 			BIT(VLV_IOSF_SB_NC) |
   1400 			BIT(VLV_IOSF_SB_CCK));
   1401 
   1402 	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
   1403 		   rps->min_freq) & 1,
   1404 		  "Odd GPU freq values\n");
   1405 }
   1406 
   1407 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
   1408 {
   1409 	ei->ktime = ktime_get_raw();
   1410 	ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
   1411 	ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
   1412 }
   1413 
   1414 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
   1415 {
   1416 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1417 	const struct intel_rps_ei *prev = &rps->ei;
   1418 	struct intel_rps_ei now;
   1419 	u32 events = 0;
   1420 
   1421 	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
   1422 		return 0;
   1423 
   1424 	vlv_c0_read(uncore, &now);
   1425 
   1426 	if (prev->ktime) {
   1427 		u64 time, c0;
   1428 		u32 render, media;
   1429 
   1430 		time = ktime_us_delta(now.ktime, prev->ktime);
   1431 
   1432 		time *= rps_to_i915(rps)->czclk_freq;
   1433 
   1434 		/* Workload can be split between render + media,
   1435 		 * e.g. SwapBuffers being blitted in X after being rendered in
   1436 		 * mesa. To account for this we need to combine both engines
   1437 		 * into our activity counter.
   1438 		 */
   1439 		render = now.render_c0 - prev->render_c0;
   1440 		media = now.media_c0 - prev->media_c0;
   1441 		c0 = max(render, media);
   1442 		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
   1443 
   1444 		if (c0 > time * rps->power.up_threshold)
   1445 			events = GEN6_PM_RP_UP_THRESHOLD;
   1446 		else if (c0 < time * rps->power.down_threshold)
   1447 			events = GEN6_PM_RP_DOWN_THRESHOLD;
   1448 	}
   1449 
   1450 	rps->ei = now;
   1451 	return events;
   1452 }
   1453 
   1454 static void rps_work(struct work_struct *work)
   1455 {
   1456 	struct intel_rps *rps = container_of(work, typeof(*rps), work);
   1457 	struct intel_gt *gt = rps_to_gt(rps);
   1458 	bool client_boost = false;
   1459 	int new_freq, adj, min, max;
   1460 	u32 pm_iir = 0;
   1461 
   1462 	spin_lock_irq(&gt->irq_lock);
   1463 	pm_iir = fetch_and_zero(&rps->pm_iir);
   1464 	client_boost = atomic_read(&rps->num_waiters);
   1465 	spin_unlock_irq(&gt->irq_lock);
   1466 
   1467 	/* Make sure we didn't queue anything we're not going to process. */
   1468 	if ((pm_iir & rps->pm_events) == 0 && !client_boost)
   1469 		goto out;
   1470 
   1471 	mutex_lock(&rps->lock);
   1472 
   1473 	pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
   1474 
   1475 	adj = rps->last_adj;
   1476 	new_freq = rps->cur_freq;
   1477 	min = rps->min_freq_softlimit;
   1478 	max = rps->max_freq_softlimit;
   1479 	if (client_boost)
   1480 		max = rps->max_freq;
   1481 	if (client_boost && new_freq < rps->boost_freq) {
   1482 		new_freq = rps->boost_freq;
   1483 		adj = 0;
   1484 	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
   1485 		if (adj > 0)
   1486 			adj *= 2;
   1487 		else /* CHV needs even encode values */
   1488 			adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
   1489 
   1490 		if (new_freq >= rps->max_freq_softlimit)
   1491 			adj = 0;
   1492 	} else if (client_boost) {
   1493 		adj = 0;
   1494 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
   1495 		if (rps->cur_freq > rps->efficient_freq)
   1496 			new_freq = rps->efficient_freq;
   1497 		else if (rps->cur_freq > rps->min_freq_softlimit)
   1498 			new_freq = rps->min_freq_softlimit;
   1499 		adj = 0;
   1500 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
   1501 		if (adj < 0)
   1502 			adj *= 2;
   1503 		else /* CHV needs even encode values */
   1504 			adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
   1505 
   1506 		if (new_freq <= rps->min_freq_softlimit)
   1507 			adj = 0;
   1508 	} else { /* unknown event */
   1509 		adj = 0;
   1510 	}
   1511 
   1512 	rps->last_adj = adj;
   1513 
   1514 	/*
   1515 	 * Limit deboosting and boosting to keep ourselves at the extremes
   1516 	 * when in the respective power modes (i.e. slowly decrease frequencies
   1517 	 * while in the HIGH_POWER zone and slowly increase frequencies while
   1518 	 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
   1519 	 * to the next level quickly, and conversely if busy we expect to
   1520 	 * hit a waitboost and rapidly switch into max power.
   1521 	 */
   1522 	if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
   1523 	    (adj > 0 && rps->power.mode == LOW_POWER))
   1524 		rps->last_adj = 0;
   1525 
   1526 	/* sysfs frequency interfaces may have snuck in while servicing the
   1527 	 * interrupt
   1528 	 */
   1529 	new_freq += adj;
   1530 	new_freq = clamp_t(int, new_freq, min, max);
   1531 
   1532 	if (intel_rps_set(rps, new_freq)) {
   1533 		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
   1534 		rps->last_adj = 0;
   1535 	}
   1536 
   1537 	mutex_unlock(&rps->lock);
   1538 
   1539 out:
   1540 	spin_lock_irq(&gt->irq_lock);
   1541 	gen6_gt_pm_unmask_irq(gt, rps->pm_events);
   1542 	spin_unlock_irq(&gt->irq_lock);
   1543 }
   1544 
   1545 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
   1546 {
   1547 	struct intel_gt *gt = rps_to_gt(rps);
   1548 	const u32 events = rps->pm_events & pm_iir;
   1549 
   1550 	lockdep_assert_held(&gt->irq_lock);
   1551 
   1552 	if (unlikely(!events))
   1553 		return;
   1554 
   1555 	gen6_gt_pm_mask_irq(gt, events);
   1556 
   1557 	rps->pm_iir |= events;
   1558 	schedule_work(&rps->work);
   1559 }
   1560 
   1561 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
   1562 {
   1563 	struct intel_gt *gt = rps_to_gt(rps);
   1564 
   1565 	if (pm_iir & rps->pm_events) {
   1566 		spin_lock(&gt->irq_lock);
   1567 		gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
   1568 		rps->pm_iir |= pm_iir & rps->pm_events;
   1569 		schedule_work(&rps->work);
   1570 		spin_unlock(&gt->irq_lock);
   1571 	}
   1572 
   1573 	if (INTEL_GEN(gt->i915) >= 8)
   1574 		return;
   1575 
   1576 	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
   1577 		intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
   1578 
   1579 	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
   1580 		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
   1581 }
   1582 
   1583 void gen5_rps_irq_handler(struct intel_rps *rps)
   1584 {
   1585 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1586 	u32 busy_up, busy_down, max_avg, min_avg;
   1587 	u8 new_freq;
   1588 
   1589 	spin_lock(&mchdev_lock);
   1590 
   1591 	intel_uncore_write16(uncore,
   1592 			     MEMINTRSTS,
   1593 			     intel_uncore_read(uncore, MEMINTRSTS));
   1594 
   1595 	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
   1596 	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
   1597 	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
   1598 	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
   1599 	min_avg = intel_uncore_read(uncore, RCBMINAVG);
   1600 
   1601 	/* Handle RCS change request from hw */
   1602 	new_freq = rps->cur_freq;
   1603 	if (busy_up > max_avg)
   1604 		new_freq++;
   1605 	else if (busy_down < min_avg)
   1606 		new_freq--;
   1607 	new_freq = clamp(new_freq,
   1608 			 rps->min_freq_softlimit,
   1609 			 rps->max_freq_softlimit);
   1610 
   1611 	if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq))
   1612 		rps->cur_freq = new_freq;
   1613 
   1614 	spin_unlock(&mchdev_lock);
   1615 }
   1616 
   1617 void intel_rps_init_early(struct intel_rps *rps)
   1618 {
   1619 	mutex_init(&rps->lock);
   1620 	mutex_init(&rps->power.mutex);
   1621 
   1622 	INIT_WORK(&rps->work, rps_work);
   1623 
   1624 	atomic_set(&rps->num_waiters, 0);
   1625 }
   1626 
   1627 void intel_rps_init(struct intel_rps *rps)
   1628 {
   1629 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1630 
   1631 	if (IS_CHERRYVIEW(i915))
   1632 		chv_rps_init(rps);
   1633 	else if (IS_VALLEYVIEW(i915))
   1634 		vlv_rps_init(rps);
   1635 	else if (INTEL_GEN(i915) >= 6)
   1636 		gen6_rps_init(rps);
   1637 	else if (IS_IRONLAKE_M(i915))
   1638 		gen5_rps_init(rps);
   1639 
   1640 	/* Derive initial user preferences/limits from the hardware limits */
   1641 	rps->max_freq_softlimit = rps->max_freq;
   1642 	rps->min_freq_softlimit = rps->min_freq;
   1643 
   1644 	/* After setting max-softlimit, find the overclock max freq */
   1645 	if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
   1646 		u32 params = 0;
   1647 
   1648 		sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
   1649 				       &params, NULL);
   1650 		if (params & BIT(31)) { /* OC supported */
   1651 			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
   1652 					 (rps->max_freq & 0xff) * 50,
   1653 					 (params & 0xff) * 50);
   1654 			rps->max_freq = params & 0xff;
   1655 		}
   1656 	}
   1657 
   1658 	/* Finally allow us to boost to max by default */
   1659 	rps->boost_freq = rps->max_freq;
   1660 	rps->idle_freq = rps->min_freq;
   1661 	rps->cur_freq = rps->idle_freq;
   1662 
   1663 	rps->pm_intrmsk_mbz = 0;
   1664 
   1665 	/*
   1666 	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
   1667 	 * if GEN6_PM_UP_EI_EXPIRED is masked.
   1668 	 *
   1669 	 * TODO: verify if this can be reproduced on VLV,CHV.
   1670 	 */
   1671 	if (INTEL_GEN(i915) <= 7)
   1672 		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
   1673 
   1674 	if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
   1675 		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
   1676 }
   1677 
   1678 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
   1679 {
   1680 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1681 	u32 cagf;
   1682 
   1683 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
   1684 		cagf = (rpstat >> 8) & 0xff;
   1685 	else if (INTEL_GEN(i915) >= 9)
   1686 		cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
   1687 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
   1688 		cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
   1689 	else
   1690 		cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
   1691 
   1692 	return cagf;
   1693 }
   1694 
   1695 static u32 read_cagf(struct intel_rps *rps)
   1696 {
   1697 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1698 	u32 freq;
   1699 
   1700 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
   1701 		vlv_punit_get(i915);
   1702 		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1703 		vlv_punit_put(i915);
   1704 	} else {
   1705 		freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
   1706 	}
   1707 
   1708 	return intel_rps_get_cagf(rps, freq);
   1709 }
   1710 
   1711 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
   1712 {
   1713 	struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
   1714 	intel_wakeref_t wakeref;
   1715 	u32 freq = 0;
   1716 
   1717 	with_intel_runtime_pm_if_in_use(rpm, wakeref)
   1718 		freq = intel_gpu_freq(rps, read_cagf(rps));
   1719 
   1720 	return freq;
   1721 }
   1722 
   1723 /* External interface for intel_ips.ko */
   1724 
   1725 static struct drm_i915_private __rcu *ips_mchdev;
   1726 
   1727 /**
   1728  * Tells the intel_ips driver that the i915 driver is now loaded, if
   1729  * IPS got loaded first.
   1730  *
   1731  * This awkward dance is so that neither module has to depend on the
   1732  * other in order for IPS to do the appropriate communication of
   1733  * GPU turbo limits to i915.
   1734  */
   1735 static void
   1736 ips_ping_for_i915_load(void)
   1737 {
   1738 #ifndef __NetBSD__		/* XXX IPS GPU turbo limits what?  */
   1739 	void (*link)(void);
   1740 
   1741 	link = symbol_get(ips_link_to_i915_driver);
   1742 	if (link) {
   1743 		link();
   1744 		symbol_put(ips_link_to_i915_driver);
   1745 	}
   1746 #endif
   1747 }
   1748 
   1749 void intel_rps_driver_register(struct intel_rps *rps)
   1750 {
   1751 	struct intel_gt *gt = rps_to_gt(rps);
   1752 
   1753 	/*
   1754 	 * We only register the i915 ips part with intel-ips once everything is
   1755 	 * set up, to avoid intel-ips sneaking in and reading bogus values.
   1756 	 */
   1757 	if (IS_GEN(gt->i915, 5)) {
   1758 		GEM_BUG_ON(ips_mchdev);
   1759 		rcu_assign_pointer(ips_mchdev, gt->i915);
   1760 		ips_ping_for_i915_load();
   1761 	}
   1762 }
   1763 
   1764 void intel_rps_driver_unregister(struct intel_rps *rps)
   1765 {
   1766 	if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
   1767 		rcu_assign_pointer(ips_mchdev, NULL);
   1768 }
   1769 
   1770 static struct drm_i915_private *mchdev_get(void)
   1771 {
   1772 	struct drm_i915_private *i915;
   1773 
   1774 	rcu_read_lock();
   1775 	i915 = rcu_dereference(ips_mchdev);
   1776 	if (!kref_get_unless_zero(&i915->drm.ref))
   1777 		i915 = NULL;
   1778 	rcu_read_unlock();
   1779 
   1780 	return i915;
   1781 }
   1782 
   1783 /**
   1784  * i915_read_mch_val - return value for IPS use
   1785  *
   1786  * Calculate and return a value for the IPS driver to use when deciding whether
   1787  * we have thermal and power headroom to increase CPU or GPU power budget.
   1788  */
   1789 unsigned long i915_read_mch_val(void)
   1790 {
   1791 	struct drm_i915_private *i915;
   1792 	unsigned long chipset_val = 0;
   1793 	unsigned long graphics_val = 0;
   1794 	intel_wakeref_t wakeref;
   1795 
   1796 	i915 = mchdev_get();
   1797 	if (!i915)
   1798 		return 0;
   1799 
   1800 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
   1801 		struct intel_ips *ips = &i915->gt.rps.ips;
   1802 
   1803 		spin_lock_irq(&mchdev_lock);
   1804 		chipset_val = __ips_chipset_val(ips);
   1805 		graphics_val = __ips_gfx_val(ips);
   1806 		spin_unlock_irq(&mchdev_lock);
   1807 	}
   1808 
   1809 	drm_dev_put(&i915->drm);
   1810 	return chipset_val + graphics_val;
   1811 }
   1812 EXPORT_SYMBOL_GPL(i915_read_mch_val);
   1813 
   1814 /**
   1815  * i915_gpu_raise - raise GPU frequency limit
   1816  *
   1817  * Raise the limit; IPS indicates we have thermal headroom.
   1818  */
   1819 bool i915_gpu_raise(void)
   1820 {
   1821 	struct drm_i915_private *i915;
   1822 	struct intel_rps *rps;
   1823 
   1824 	i915 = mchdev_get();
   1825 	if (!i915)
   1826 		return false;
   1827 
   1828 	rps = &i915->gt.rps;
   1829 
   1830 	spin_lock_irq(&mchdev_lock);
   1831 	if (rps->max_freq_softlimit < rps->max_freq)
   1832 		rps->max_freq_softlimit++;
   1833 	spin_unlock_irq(&mchdev_lock);
   1834 
   1835 	drm_dev_put(&i915->drm);
   1836 	return true;
   1837 }
   1838 EXPORT_SYMBOL_GPL(i915_gpu_raise);
   1839 
   1840 /**
   1841  * i915_gpu_lower - lower GPU frequency limit
   1842  *
   1843  * IPS indicates we're close to a thermal limit, so throttle back the GPU
   1844  * frequency maximum.
   1845  */
   1846 bool i915_gpu_lower(void)
   1847 {
   1848 	struct drm_i915_private *i915;
   1849 	struct intel_rps *rps;
   1850 
   1851 	i915 = mchdev_get();
   1852 	if (!i915)
   1853 		return false;
   1854 
   1855 	rps = &i915->gt.rps;
   1856 
   1857 	spin_lock_irq(&mchdev_lock);
   1858 	if (rps->max_freq_softlimit > rps->min_freq)
   1859 		rps->max_freq_softlimit--;
   1860 	spin_unlock_irq(&mchdev_lock);
   1861 
   1862 	drm_dev_put(&i915->drm);
   1863 	return true;
   1864 }
   1865 EXPORT_SYMBOL_GPL(i915_gpu_lower);
   1866 
   1867 /**
   1868  * i915_gpu_busy - indicate GPU business to IPS
   1869  *
   1870  * Tell the IPS driver whether or not the GPU is busy.
   1871  */
   1872 bool i915_gpu_busy(void)
   1873 {
   1874 	struct drm_i915_private *i915;
   1875 	bool ret;
   1876 
   1877 	i915 = mchdev_get();
   1878 	if (!i915)
   1879 		return false;
   1880 
   1881 	ret = i915->gt.awake;
   1882 
   1883 	drm_dev_put(&i915->drm);
   1884 	return ret;
   1885 }
   1886 EXPORT_SYMBOL_GPL(i915_gpu_busy);
   1887 
   1888 /**
   1889  * i915_gpu_turbo_disable - disable graphics turbo
   1890  *
   1891  * Disable graphics turbo by resetting the max frequency and setting the
   1892  * current frequency to the default.
   1893  */
   1894 bool i915_gpu_turbo_disable(void)
   1895 {
   1896 	struct drm_i915_private *i915;
   1897 	struct intel_rps *rps;
   1898 	bool ret;
   1899 
   1900 	i915 = mchdev_get();
   1901 	if (!i915)
   1902 		return false;
   1903 
   1904 	rps = &i915->gt.rps;
   1905 
   1906 	spin_lock_irq(&mchdev_lock);
   1907 	rps->max_freq_softlimit = rps->min_freq;
   1908 	ret = gen5_rps_set(&i915->gt.rps, rps->min_freq);
   1909 	spin_unlock_irq(&mchdev_lock);
   1910 
   1911 	drm_dev_put(&i915->drm);
   1912 	return ret;
   1913 }
   1914 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
   1915