Home | History | Annotate | Line # | Download | only in gt
      1 /*	$NetBSD: intel_rps.c,v 1.5 2021/12/19 12:32:15 riastradh Exp $	*/
      2 
      3 /*
      4  * SPDX-License-Identifier: MIT
      5  *
      6  * Copyright  2019 Intel Corporation
      7  */
      8 
      9 #include <sys/cdefs.h>
     10 __KERNEL_RCSID(0, "$NetBSD: intel_rps.c,v 1.5 2021/12/19 12:32:15 riastradh Exp $");
     11 
     12 #include "i915_drv.h"
     13 #include "intel_gt.h"
     14 #include "intel_gt_irq.h"
     15 #include "intel_gt_pm_irq.h"
     16 #include "intel_rps.h"
     17 #include "intel_sideband.h"
     18 /* #include "../../../platform/x86/intel_ips.h" */
     19 
     20 #include <linux/nbsd-namespace.h>
     21 
     22 /*
     23  * Lock protecting IPS related data structures
     24  */
     25 #ifdef __NetBSD__
     26 spinlock_t mchdev_lock;
     27 #else
     28 static DEFINE_SPINLOCK(mchdev_lock);
     29 #endif
     30 
     31 static struct intel_gt *rps_to_gt(struct intel_rps *rps)
     32 {
     33 	return container_of(rps, struct intel_gt, rps);
     34 }
     35 
     36 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
     37 {
     38 	return rps_to_gt(rps)->i915;
     39 }
     40 
     41 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
     42 {
     43 	return rps_to_gt(rps)->uncore;
     44 }
     45 
     46 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
     47 {
     48 	return mask & ~rps->pm_intrmsk_mbz;
     49 }
     50 
     51 static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
     52 {
     53 	intel_uncore_write_fw(uncore, reg, val);
     54 }
     55 
     56 static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
     57 {
     58 	u32 mask = 0;
     59 
     60 	/* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
     61 	if (val > rps->min_freq_softlimit)
     62 		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
     63 			 GEN6_PM_RP_DOWN_THRESHOLD |
     64 			 GEN6_PM_RP_DOWN_TIMEOUT);
     65 
     66 	if (val < rps->max_freq_softlimit)
     67 		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
     68 
     69 	mask &= rps->pm_events;
     70 
     71 	return rps_pm_sanitize_mask(rps, ~mask);
     72 }
     73 
     74 static void rps_reset_ei(struct intel_rps *rps)
     75 {
     76 	memset(&rps->ei, 0, sizeof(rps->ei));
     77 }
     78 
     79 static void rps_enable_interrupts(struct intel_rps *rps)
     80 {
     81 	struct intel_gt *gt = rps_to_gt(rps);
     82 
     83 	rps_reset_ei(rps);
     84 
     85 	if (IS_VALLEYVIEW(gt->i915))
     86 		/* WaGsvRC0ResidencyMethod:vlv */
     87 		rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
     88 	else
     89 		rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
     90 				  GEN6_PM_RP_DOWN_THRESHOLD |
     91 				  GEN6_PM_RP_DOWN_TIMEOUT);
     92 
     93 	spin_lock_irq(&gt->irq_lock);
     94 	gen6_gt_pm_enable_irq(gt, rps->pm_events);
     95 	spin_unlock_irq(&gt->irq_lock);
     96 
     97 	set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
     98 }
     99 
    100 static void gen6_rps_reset_interrupts(struct intel_rps *rps)
    101 {
    102 	gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
    103 }
    104 
    105 static void gen11_rps_reset_interrupts(struct intel_rps *rps)
    106 {
    107 	while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
    108 		;
    109 }
    110 
    111 static void rps_reset_interrupts(struct intel_rps *rps)
    112 {
    113 	struct intel_gt *gt = rps_to_gt(rps);
    114 
    115 	spin_lock_irq(&gt->irq_lock);
    116 	if (INTEL_GEN(gt->i915) >= 11)
    117 		gen11_rps_reset_interrupts(rps);
    118 	else
    119 		gen6_rps_reset_interrupts(rps);
    120 
    121 	rps->pm_iir = 0;
    122 	spin_unlock_irq(&gt->irq_lock);
    123 }
    124 
    125 static void rps_disable_interrupts(struct intel_rps *rps)
    126 {
    127 	struct intel_gt *gt = rps_to_gt(rps);
    128 
    129 	rps->pm_events = 0;
    130 
    131 	set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
    132 
    133 	spin_lock_irq(&gt->irq_lock);
    134 	gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
    135 	spin_unlock_irq(&gt->irq_lock);
    136 
    137 	intel_synchronize_irq(gt->i915);
    138 
    139 	/*
    140 	 * Now that we will not be generating any more work, flush any
    141 	 * outstanding tasks. As we are called on the RPS idle path,
    142 	 * we will reset the GPU to minimum frequencies, so the current
    143 	 * state of the worker can be discarded.
    144 	 */
    145 	cancel_work_sync(&rps->work);
    146 
    147 	rps_reset_interrupts(rps);
    148 }
    149 
    150 static const struct cparams {
    151 	u16 i;
    152 	u16 t;
    153 	u16 m;
    154 	u16 c;
    155 } cparams[] = {
    156 	{ 1, 1333, 301, 28664 },
    157 	{ 1, 1066, 294, 24460 },
    158 	{ 1, 800, 294, 25192 },
    159 	{ 0, 1333, 276, 27605 },
    160 	{ 0, 1066, 276, 27605 },
    161 	{ 0, 800, 231, 23784 },
    162 };
    163 
    164 static void gen5_rps_init(struct intel_rps *rps)
    165 {
    166 	struct drm_i915_private *i915 = rps_to_i915(rps);
    167 	struct intel_uncore *uncore = rps_to_uncore(rps);
    168 	u8 fmax, fmin, fstart;
    169 	u32 rgvmodectl;
    170 	int c_m, i;
    171 
    172 	if (i915->fsb_freq <= 3200)
    173 		c_m = 0;
    174 	else if (i915->fsb_freq <= 4800)
    175 		c_m = 1;
    176 	else
    177 		c_m = 2;
    178 
    179 	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
    180 		if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
    181 			rps->ips.m = cparams[i].m;
    182 			rps->ips.c = cparams[i].c;
    183 			break;
    184 		}
    185 	}
    186 
    187 	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
    188 
    189 	/* Set up min, max, and cur for interrupt handling */
    190 	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
    191 	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
    192 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
    193 		MEMMODE_FSTART_SHIFT;
    194 	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
    195 			 fmax, fmin, fstart);
    196 
    197 	rps->min_freq = fmax;
    198 	rps->max_freq = fmin;
    199 
    200 	rps->idle_freq = rps->min_freq;
    201 	rps->cur_freq = rps->idle_freq;
    202 }
    203 
    204 static unsigned long
    205 __ips_chipset_val(struct intel_ips *ips)
    206 {
    207 	struct intel_uncore *uncore =
    208 		rps_to_uncore(container_of(ips, struct intel_rps, ips));
    209 	unsigned long now = jiffies_to_msecs(jiffies), dt;
    210 	unsigned long result;
    211 	u64 total, delta;
    212 
    213 	lockdep_assert_held(&mchdev_lock);
    214 
    215 	/*
    216 	 * Prevent division-by-zero if we are asking too fast.
    217 	 * Also, we don't get interesting results if we are polling
    218 	 * faster than once in 10ms, so just return the saved value
    219 	 * in such cases.
    220 	 */
    221 	dt = now - ips->last_time1;
    222 	if (dt <= 10)
    223 		return ips->chipset_power;
    224 
    225 	/* FIXME: handle per-counter overflow */
    226 	total = intel_uncore_read(uncore, DMIEC);
    227 	total += intel_uncore_read(uncore, DDREC);
    228 	total += intel_uncore_read(uncore, CSIEC);
    229 
    230 	delta = total - ips->last_count1;
    231 
    232 	result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
    233 
    234 	ips->last_count1 = total;
    235 	ips->last_time1 = now;
    236 
    237 	ips->chipset_power = result;
    238 
    239 	return result;
    240 }
    241 
    242 static unsigned long ips_mch_val(struct intel_uncore *uncore)
    243 {
    244 	unsigned int m, x, b;
    245 	u32 tsfs;
    246 
    247 	tsfs = intel_uncore_read(uncore, TSFS);
    248 	x = intel_uncore_read8(uncore, TR1);
    249 
    250 	b = tsfs & TSFS_INTR_MASK;
    251 	m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
    252 
    253 	return m * x / 127 - b;
    254 }
    255 
    256 static int _pxvid_to_vd(u8 pxvid)
    257 {
    258 	if (pxvid == 0)
    259 		return 0;
    260 
    261 	if (pxvid >= 8 && pxvid < 31)
    262 		pxvid = 31;
    263 
    264 	return (pxvid + 2) * 125;
    265 }
    266 
    267 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
    268 {
    269 	const int vd = _pxvid_to_vd(pxvid);
    270 
    271 	if (INTEL_INFO(i915)->is_mobile)
    272 		return max(vd - 1125, 0);
    273 
    274 	return vd;
    275 }
    276 
    277 static void __gen5_ips_update(struct intel_ips *ips)
    278 {
    279 	struct intel_uncore *uncore =
    280 		rps_to_uncore(container_of(ips, struct intel_rps, ips));
    281 	u64 now, delta, dt;
    282 	u32 count;
    283 
    284 	lockdep_assert_held(&mchdev_lock);
    285 
    286 	now = ktime_get_raw_ns();
    287 	dt = now - ips->last_time2;
    288 	do_div(dt, NSEC_PER_MSEC);
    289 
    290 	/* Don't divide by 0 */
    291 	if (dt <= 10)
    292 		return;
    293 
    294 	count = intel_uncore_read(uncore, GFXEC);
    295 	delta = count - ips->last_count2;
    296 
    297 	ips->last_count2 = count;
    298 	ips->last_time2 = now;
    299 
    300 	/* More magic constants... */
    301 	ips->gfx_power = div_u64(delta * 1181, dt * 10);
    302 }
    303 
    304 static void gen5_rps_update(struct intel_rps *rps)
    305 {
    306 	spin_lock_irq(&mchdev_lock);
    307 	__gen5_ips_update(&rps->ips);
    308 	spin_unlock_irq(&mchdev_lock);
    309 }
    310 
    311 static bool gen5_rps_set(struct intel_rps *rps, u8 val)
    312 {
    313 	struct intel_uncore *uncore = rps_to_uncore(rps);
    314 	u16 rgvswctl;
    315 
    316 	lockdep_assert_held(&mchdev_lock);
    317 
    318 	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
    319 	if (rgvswctl & MEMCTL_CMD_STS) {
    320 		DRM_DEBUG("gpu busy, RCS change rejected\n");
    321 		return false; /* still busy with another command */
    322 	}
    323 
    324 	/* Invert the frequency bin into an ips delay */
    325 	val = rps->max_freq - val;
    326 	val = rps->min_freq + val;
    327 
    328 	rgvswctl =
    329 		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
    330 		(val << MEMCTL_FREQ_SHIFT) |
    331 		MEMCTL_SFCAVM;
    332 	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
    333 	intel_uncore_posting_read16(uncore, MEMSWCTL);
    334 
    335 	rgvswctl |= MEMCTL_CMD_STS;
    336 	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
    337 
    338 	return true;
    339 }
    340 
    341 static unsigned long intel_pxfreq(u32 vidfreq)
    342 {
    343 	int div = (vidfreq & 0x3f0000) >> 16;
    344 	int post = (vidfreq & 0x3000) >> 12;
    345 	int pre = (vidfreq & 0x7);
    346 
    347 	if (!pre)
    348 		return 0;
    349 
    350 	return div * 133333 / (pre << post);
    351 }
    352 
    353 static unsigned int init_emon(struct intel_uncore *uncore)
    354 {
    355 	u8 pxw[16];
    356 	int i;
    357 
    358 	/* Disable to program */
    359 	intel_uncore_write(uncore, ECR, 0);
    360 	intel_uncore_posting_read(uncore, ECR);
    361 
    362 	/* Program energy weights for various events */
    363 	intel_uncore_write(uncore, SDEW, 0x15040d00);
    364 	intel_uncore_write(uncore, CSIEW0, 0x007f0000);
    365 	intel_uncore_write(uncore, CSIEW1, 0x1e220004);
    366 	intel_uncore_write(uncore, CSIEW2, 0x04000004);
    367 
    368 	for (i = 0; i < 5; i++)
    369 		intel_uncore_write(uncore, PEW(i), 0);
    370 	for (i = 0; i < 3; i++)
    371 		intel_uncore_write(uncore, DEW(i), 0);
    372 
    373 	/* Program P-state weights to account for frequency power adjustment */
    374 	for (i = 0; i < 16; i++) {
    375 		u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
    376 		unsigned int freq = intel_pxfreq(pxvidfreq);
    377 		unsigned int vid =
    378 			(pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
    379 		unsigned int val;
    380 
    381 		val = vid * vid * freq / 1000 * 255;
    382 		val /= 127 * 127 * 900;
    383 
    384 		pxw[i] = val;
    385 	}
    386 	/* Render standby states get 0 weight */
    387 	pxw[14] = 0;
    388 	pxw[15] = 0;
    389 
    390 	for (i = 0; i < 4; i++) {
    391 		intel_uncore_write(uncore, PXW(i),
    392 				   pxw[i * 4 + 0] << 24 |
    393 				   pxw[i * 4 + 1] << 16 |
    394 				   pxw[i * 4 + 2] <<  8 |
    395 				   pxw[i * 4 + 3] <<  0);
    396 	}
    397 
    398 	/* Adjust magic regs to magic values (more experimental results) */
    399 	intel_uncore_write(uncore, OGW0, 0);
    400 	intel_uncore_write(uncore, OGW1, 0);
    401 	intel_uncore_write(uncore, EG0, 0x00007f00);
    402 	intel_uncore_write(uncore, EG1, 0x0000000e);
    403 	intel_uncore_write(uncore, EG2, 0x000e0000);
    404 	intel_uncore_write(uncore, EG3, 0x68000300);
    405 	intel_uncore_write(uncore, EG4, 0x42000000);
    406 	intel_uncore_write(uncore, EG5, 0x00140031);
    407 	intel_uncore_write(uncore, EG6, 0);
    408 	intel_uncore_write(uncore, EG7, 0);
    409 
    410 	for (i = 0; i < 8; i++)
    411 		intel_uncore_write(uncore, PXWL(i), 0);
    412 
    413 	/* Enable PMON + select events */
    414 	intel_uncore_write(uncore, ECR, 0x80000019);
    415 
    416 	return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
    417 }
    418 
    419 static bool gen5_rps_enable(struct intel_rps *rps)
    420 {
    421 	struct intel_uncore *uncore = rps_to_uncore(rps);
    422 	u8 fstart, vstart;
    423 	u32 rgvmodectl;
    424 
    425 	spin_lock_irq(&mchdev_lock);
    426 
    427 	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
    428 
    429 	/* Enable temp reporting */
    430 	intel_uncore_write16(uncore, PMMISC,
    431 			     intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
    432 	intel_uncore_write16(uncore, TSC1,
    433 			     intel_uncore_read16(uncore, TSC1) | TSE);
    434 
    435 	/* 100ms RC evaluation intervals */
    436 	intel_uncore_write(uncore, RCUPEI, 100000);
    437 	intel_uncore_write(uncore, RCDNEI, 100000);
    438 
    439 	/* Set max/min thresholds to 90ms and 80ms respectively */
    440 	intel_uncore_write(uncore, RCBMAXAVG, 90000);
    441 	intel_uncore_write(uncore, RCBMINAVG, 80000);
    442 
    443 	intel_uncore_write(uncore, MEMIHYST, 1);
    444 
    445 	/* Set up min, max, and cur for interrupt handling */
    446 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
    447 		MEMMODE_FSTART_SHIFT;
    448 
    449 	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
    450 		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
    451 
    452 	intel_uncore_write(uncore,
    453 			   MEMINTREN,
    454 			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
    455 
    456 	intel_uncore_write(uncore, VIDSTART, vstart);
    457 	intel_uncore_posting_read(uncore, VIDSTART);
    458 
    459 	rgvmodectl |= MEMMODE_SWMODE_EN;
    460 	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
    461 
    462 	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
    463 			     MEMCTL_CMD_STS) == 0, 10))
    464 		DRM_ERROR("stuck trying to change perf mode\n");
    465 	mdelay(1);
    466 
    467 	gen5_rps_set(rps, rps->cur_freq);
    468 
    469 	rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
    470 	rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
    471 	rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
    472 	rps->ips.last_time1 = jiffies_to_msecs(jiffies);
    473 
    474 	rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
    475 	rps->ips.last_time2 = ktime_get_raw_ns();
    476 
    477 	spin_unlock_irq(&mchdev_lock);
    478 
    479 	rps->ips.corr = init_emon(uncore);
    480 
    481 	return true;
    482 }
    483 
    484 static void gen5_rps_disable(struct intel_rps *rps)
    485 {
    486 	struct intel_uncore *uncore = rps_to_uncore(rps);
    487 	u16 rgvswctl;
    488 
    489 	spin_lock_irq(&mchdev_lock);
    490 
    491 	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
    492 
    493 	/* Ack interrupts, disable EFC interrupt */
    494 	intel_uncore_write(uncore, MEMINTREN,
    495 			   intel_uncore_read(uncore, MEMINTREN) &
    496 			   ~MEMINT_EVAL_CHG_EN);
    497 	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
    498 	intel_uncore_write(uncore, DEIER,
    499 			   intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
    500 	intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
    501 	intel_uncore_write(uncore, DEIMR,
    502 			   intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
    503 
    504 	/* Go back to the starting frequency */
    505 	gen5_rps_set(rps, rps->idle_freq);
    506 	mdelay(1);
    507 	rgvswctl |= MEMCTL_CMD_STS;
    508 	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
    509 	mdelay(1);
    510 
    511 	spin_unlock_irq(&mchdev_lock);
    512 }
    513 
    514 static u32 rps_limits(struct intel_rps *rps, u8 val)
    515 {
    516 	u32 limits;
    517 
    518 	/*
    519 	 * Only set the down limit when we've reached the lowest level to avoid
    520 	 * getting more interrupts, otherwise leave this clear. This prevents a
    521 	 * race in the hw when coming out of rc6: There's a tiny window where
    522 	 * the hw runs at the minimal clock before selecting the desired
    523 	 * frequency, if the down threshold expires in that window we will not
    524 	 * receive a down interrupt.
    525 	 */
    526 	if (INTEL_GEN(rps_to_i915(rps)) >= 9) {
    527 		limits = rps->max_freq_softlimit << 23;
    528 		if (val <= rps->min_freq_softlimit)
    529 			limits |= rps->min_freq_softlimit << 14;
    530 	} else {
    531 		limits = rps->max_freq_softlimit << 24;
    532 		if (val <= rps->min_freq_softlimit)
    533 			limits |= rps->min_freq_softlimit << 16;
    534 	}
    535 
    536 	return limits;
    537 }
    538 
    539 static void rps_set_power(struct intel_rps *rps, int new_power)
    540 {
    541 	struct intel_uncore *uncore = rps_to_uncore(rps);
    542 	struct drm_i915_private *i915 = rps_to_i915(rps);
    543 	u32 threshold_up = 0, threshold_down = 0; /* in % */
    544 	u32 ei_up = 0, ei_down = 0;
    545 
    546 	lockdep_assert_held(&rps->power.mutex);
    547 
    548 	if (new_power == rps->power.mode)
    549 		return;
    550 
    551 	/* Note the units here are not exactly 1us, but 1280ns. */
    552 	switch (new_power) {
    553 	case LOW_POWER:
    554 		/* Upclock if more than 95% busy over 16ms */
    555 		ei_up = 16000;
    556 		threshold_up = 95;
    557 
    558 		/* Downclock if less than 85% busy over 32ms */
    559 		ei_down = 32000;
    560 		threshold_down = 85;
    561 		break;
    562 
    563 	case BETWEEN:
    564 		/* Upclock if more than 90% busy over 13ms */
    565 		ei_up = 13000;
    566 		threshold_up = 90;
    567 
    568 		/* Downclock if less than 75% busy over 32ms */
    569 		ei_down = 32000;
    570 		threshold_down = 75;
    571 		break;
    572 
    573 	case HIGH_POWER:
    574 		/* Upclock if more than 85% busy over 10ms */
    575 		ei_up = 10000;
    576 		threshold_up = 85;
    577 
    578 		/* Downclock if less than 60% busy over 32ms */
    579 		ei_down = 32000;
    580 		threshold_down = 60;
    581 		break;
    582 	}
    583 
    584 	/* When byt can survive without system hang with dynamic
    585 	 * sw freq adjustments, this restriction can be lifted.
    586 	 */
    587 	if (IS_VALLEYVIEW(i915))
    588 		goto skip_hw_write;
    589 
    590 	set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
    591 	set(uncore, GEN6_RP_UP_THRESHOLD,
    592 	    GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
    593 
    594 	set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
    595 	set(uncore, GEN6_RP_DOWN_THRESHOLD,
    596 	    GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
    597 
    598 	set(uncore, GEN6_RP_CONTROL,
    599 	    (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
    600 	    GEN6_RP_MEDIA_HW_NORMAL_MODE |
    601 	    GEN6_RP_MEDIA_IS_GFX |
    602 	    GEN6_RP_ENABLE |
    603 	    GEN6_RP_UP_BUSY_AVG |
    604 	    GEN6_RP_DOWN_IDLE_AVG);
    605 
    606 skip_hw_write:
    607 	rps->power.mode = new_power;
    608 	rps->power.up_threshold = threshold_up;
    609 	rps->power.down_threshold = threshold_down;
    610 }
    611 
    612 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
    613 {
    614 	int new_power;
    615 
    616 	new_power = rps->power.mode;
    617 	switch (rps->power.mode) {
    618 	case LOW_POWER:
    619 		if (val > rps->efficient_freq + 1 &&
    620 		    val > rps->cur_freq)
    621 			new_power = BETWEEN;
    622 		break;
    623 
    624 	case BETWEEN:
    625 		if (val <= rps->efficient_freq &&
    626 		    val < rps->cur_freq)
    627 			new_power = LOW_POWER;
    628 		else if (val >= rps->rp0_freq &&
    629 			 val > rps->cur_freq)
    630 			new_power = HIGH_POWER;
    631 		break;
    632 
    633 	case HIGH_POWER:
    634 		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
    635 		    val < rps->cur_freq)
    636 			new_power = BETWEEN;
    637 		break;
    638 	}
    639 	/* Max/min bins are special */
    640 	if (val <= rps->min_freq_softlimit)
    641 		new_power = LOW_POWER;
    642 	if (val >= rps->max_freq_softlimit)
    643 		new_power = HIGH_POWER;
    644 
    645 	mutex_lock(&rps->power.mutex);
    646 	if (rps->power.interactive)
    647 		new_power = HIGH_POWER;
    648 	rps_set_power(rps, new_power);
    649 	mutex_unlock(&rps->power.mutex);
    650 }
    651 
    652 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
    653 {
    654 	mutex_lock(&rps->power.mutex);
    655 	if (interactive) {
    656 		if (!rps->power.interactive++ && rps->active)
    657 			rps_set_power(rps, HIGH_POWER);
    658 	} else {
    659 		GEM_BUG_ON(!rps->power.interactive);
    660 		rps->power.interactive--;
    661 	}
    662 	mutex_unlock(&rps->power.mutex);
    663 }
    664 
    665 static int gen6_rps_set(struct intel_rps *rps, u8 val)
    666 {
    667 	struct intel_uncore *uncore = rps_to_uncore(rps);
    668 	struct drm_i915_private *i915 = rps_to_i915(rps);
    669 	u32 swreq;
    670 
    671 	if (INTEL_GEN(i915) >= 9)
    672 		swreq = GEN9_FREQUENCY(val);
    673 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
    674 		swreq = HSW_FREQUENCY(val);
    675 	else
    676 		swreq = (GEN6_FREQUENCY(val) |
    677 			 GEN6_OFFSET(0) |
    678 			 GEN6_AGGRESSIVE_TURBO);
    679 	set(uncore, GEN6_RPNSWREQ, swreq);
    680 
    681 	return 0;
    682 }
    683 
    684 static int vlv_rps_set(struct intel_rps *rps, u8 val)
    685 {
    686 	struct drm_i915_private *i915 = rps_to_i915(rps);
    687 	int err;
    688 
    689 	vlv_punit_get(i915);
    690 	err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
    691 	vlv_punit_put(i915);
    692 
    693 	return err;
    694 }
    695 
    696 static int rps_set(struct intel_rps *rps, u8 val, bool update)
    697 {
    698 	struct drm_i915_private *i915 = rps_to_i915(rps);
    699 	int err;
    700 
    701 	if (INTEL_GEN(i915) < 6)
    702 		return 0;
    703 
    704 	if (val == rps->last_freq)
    705 		return 0;
    706 
    707 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
    708 		err = vlv_rps_set(rps, val);
    709 	else
    710 		err = gen6_rps_set(rps, val);
    711 	if (err)
    712 		return err;
    713 
    714 	if (update)
    715 		gen6_rps_set_thresholds(rps, val);
    716 	rps->last_freq = val;
    717 
    718 	return 0;
    719 }
    720 
    721 void intel_rps_unpark(struct intel_rps *rps)
    722 {
    723 	u8 freq;
    724 
    725 	if (!rps->enabled)
    726 		return;
    727 
    728 	/*
    729 	 * Use the user's desired frequency as a guide, but for better
    730 	 * performance, jump directly to RPe as our starting frequency.
    731 	 */
    732 	mutex_lock(&rps->lock);
    733 	rps->active = true;
    734 	freq = max(rps->cur_freq, rps->efficient_freq),
    735 	freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
    736 	intel_rps_set(rps, freq);
    737 	rps->last_adj = 0;
    738 	mutex_unlock(&rps->lock);
    739 
    740 	if (INTEL_GEN(rps_to_i915(rps)) >= 6)
    741 		rps_enable_interrupts(rps);
    742 
    743 	if (IS_GEN(rps_to_i915(rps), 5))
    744 		gen5_rps_update(rps);
    745 }
    746 
    747 void intel_rps_park(struct intel_rps *rps)
    748 {
    749 	struct drm_i915_private *i915 = rps_to_i915(rps);
    750 
    751 	if (!rps->enabled)
    752 		return;
    753 
    754 	if (INTEL_GEN(i915) >= 6)
    755 		rps_disable_interrupts(rps);
    756 
    757 	rps->active = false;
    758 	if (rps->last_freq <= rps->idle_freq)
    759 		return;
    760 
    761 	/*
    762 	 * The punit delays the write of the frequency and voltage until it
    763 	 * determines the GPU is awake. During normal usage we don't want to
    764 	 * waste power changing the frequency if the GPU is sleeping (rc6).
    765 	 * However, the GPU and driver is now idle and we do not want to delay
    766 	 * switching to minimum voltage (reducing power whilst idle) as we do
    767 	 * not expect to be woken in the near future and so must flush the
    768 	 * change by waking the device.
    769 	 *
    770 	 * We choose to take the media powerwell (either would do to trick the
    771 	 * punit into committing the voltage change) as that takes a lot less
    772 	 * power than the render powerwell.
    773 	 */
    774 	intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
    775 	rps_set(rps, rps->idle_freq, false);
    776 	intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
    777 }
    778 
    779 void intel_rps_boost(struct i915_request *rq)
    780 {
    781 	struct intel_rps *rps = &rq->engine->gt->rps;
    782 	unsigned long flags;
    783 
    784 	if (i915_request_signaled(rq) || !rps->active)
    785 		return;
    786 
    787 	/* Serializes with i915_request_retire() */
    788 	spin_lock_irqsave(&rq->lock, flags);
    789 	if (!i915_request_has_waitboost(rq) &&
    790 	    !dma_fence_is_signaled_locked(&rq->fence)) {
    791 		set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
    792 
    793 		if (!atomic_fetch_inc(&rps->num_waiters) &&
    794 		    READ_ONCE(rps->cur_freq) < rps->boost_freq)
    795 			schedule_work(&rps->work);
    796 
    797 		atomic_inc(&rps->boosts);
    798 	}
    799 	spin_unlock_irqrestore(&rq->lock, flags);
    800 }
    801 
    802 int intel_rps_set(struct intel_rps *rps, u8 val)
    803 {
    804 	int err;
    805 
    806 	lockdep_assert_held(&rps->lock);
    807 	GEM_BUG_ON(val > rps->max_freq);
    808 	GEM_BUG_ON(val < rps->min_freq);
    809 
    810 	if (rps->active) {
    811 		err = rps_set(rps, val, true);
    812 		if (err)
    813 			return err;
    814 
    815 		/*
    816 		 * Make sure we continue to get interrupts
    817 		 * until we hit the minimum or maximum frequencies.
    818 		 */
    819 		if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
    820 			struct intel_uncore *uncore = rps_to_uncore(rps);
    821 
    822 			set(uncore,
    823 			    GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
    824 
    825 			set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
    826 		}
    827 	}
    828 
    829 	rps->cur_freq = val;
    830 	return 0;
    831 }
    832 
    833 static void gen6_rps_init(struct intel_rps *rps)
    834 {
    835 	struct drm_i915_private *i915 = rps_to_i915(rps);
    836 	struct intel_uncore *uncore = rps_to_uncore(rps);
    837 
    838 	/* All of these values are in units of 50MHz */
    839 
    840 	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
    841 	if (IS_GEN9_LP(i915)) {
    842 		u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
    843 
    844 		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
    845 		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
    846 		rps->min_freq = (rp_state_cap >>  0) & 0xff;
    847 	} else {
    848 		u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
    849 
    850 		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
    851 		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
    852 		rps->min_freq = (rp_state_cap >> 16) & 0xff;
    853 	}
    854 
    855 	/* hw_max = RP0 until we check for overclocking */
    856 	rps->max_freq = rps->rp0_freq;
    857 
    858 	rps->efficient_freq = rps->rp1_freq;
    859 	if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
    860 	    IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
    861 		u32 ddcc_status = 0;
    862 
    863 		if (sandybridge_pcode_read(i915,
    864 					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
    865 					   &ddcc_status, NULL) == 0)
    866 			rps->efficient_freq =
    867 				clamp_t(u8,
    868 					(ddcc_status >> 8) & 0xff,
    869 					rps->min_freq,
    870 					rps->max_freq);
    871 	}
    872 
    873 	if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
    874 		/* Store the frequency values in 16.66 MHZ units, which is
    875 		 * the natural hardware unit for SKL
    876 		 */
    877 		rps->rp0_freq *= GEN9_FREQ_SCALER;
    878 		rps->rp1_freq *= GEN9_FREQ_SCALER;
    879 		rps->min_freq *= GEN9_FREQ_SCALER;
    880 		rps->max_freq *= GEN9_FREQ_SCALER;
    881 		rps->efficient_freq *= GEN9_FREQ_SCALER;
    882 	}
    883 }
    884 
    885 static bool rps_reset(struct intel_rps *rps)
    886 {
    887 	/* force a reset */
    888 	rps->power.mode = -1;
    889 	rps->last_freq = -1;
    890 
    891 	if (rps_set(rps, rps->min_freq, true)) {
    892 		DRM_ERROR("Failed to reset RPS to initial values\n");
    893 		return false;
    894 	}
    895 
    896 	rps->cur_freq = rps->min_freq;
    897 	return true;
    898 }
    899 
    900 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
    901 static bool gen9_rps_enable(struct intel_rps *rps)
    902 {
    903 	struct drm_i915_private *i915 = rps_to_i915(rps);
    904 	struct intel_uncore *uncore = rps_to_uncore(rps);
    905 
    906 	/* Program defaults and thresholds for RPS */
    907 	if (IS_GEN(i915, 9))
    908 		intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
    909 				      GEN9_FREQUENCY(rps->rp1_freq));
    910 
    911 	/* 1 second timeout */
    912 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
    913 			      GT_INTERVAL_FROM_US(i915, 1000000));
    914 
    915 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
    916 
    917 	return rps_reset(rps);
    918 }
    919 
    920 static bool gen8_rps_enable(struct intel_rps *rps)
    921 {
    922 	struct intel_uncore *uncore = rps_to_uncore(rps);
    923 
    924 	intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
    925 			      HSW_FREQUENCY(rps->rp1_freq));
    926 
    927 	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
    928 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
    929 			      100000000 / 128); /* 1 second timeout */
    930 
    931 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
    932 
    933 	return rps_reset(rps);
    934 }
    935 
    936 static bool gen6_rps_enable(struct intel_rps *rps)
    937 {
    938 	struct intel_uncore *uncore = rps_to_uncore(rps);
    939 
    940 	/* Power down if completely idle for over 50ms */
    941 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
    942 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
    943 
    944 	return rps_reset(rps);
    945 }
    946 
    947 static int chv_rps_max_freq(struct intel_rps *rps)
    948 {
    949 	struct drm_i915_private *i915 = rps_to_i915(rps);
    950 	u32 val;
    951 
    952 	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
    953 
    954 	switch (RUNTIME_INFO(i915)->sseu.eu_total) {
    955 	case 8:
    956 		/* (2 * 4) config */
    957 		val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
    958 		break;
    959 	case 12:
    960 		/* (2 * 6) config */
    961 		val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
    962 		break;
    963 	case 16:
    964 		/* (2 * 8) config */
    965 	default:
    966 		/* Setting (2 * 8) Min RP0 for any other combination */
    967 		val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
    968 		break;
    969 	}
    970 
    971 	return val & FB_GFX_FREQ_FUSE_MASK;
    972 }
    973 
    974 static int chv_rps_rpe_freq(struct intel_rps *rps)
    975 {
    976 	struct drm_i915_private *i915 = rps_to_i915(rps);
    977 	u32 val;
    978 
    979 	val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
    980 	val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
    981 
    982 	return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
    983 }
    984 
    985 static int chv_rps_guar_freq(struct intel_rps *rps)
    986 {
    987 	struct drm_i915_private *i915 = rps_to_i915(rps);
    988 	u32 val;
    989 
    990 	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
    991 
    992 	return val & FB_GFX_FREQ_FUSE_MASK;
    993 }
    994 
    995 static u32 chv_rps_min_freq(struct intel_rps *rps)
    996 {
    997 	struct drm_i915_private *i915 = rps_to_i915(rps);
    998 	u32 val;
    999 
   1000 	val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
   1001 	val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
   1002 
   1003 	return val & FB_GFX_FREQ_FUSE_MASK;
   1004 }
   1005 
   1006 static bool chv_rps_enable(struct intel_rps *rps)
   1007 {
   1008 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1009 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1010 	u32 val;
   1011 
   1012 	/* 1: Program defaults and thresholds for RPS*/
   1013 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
   1014 	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
   1015 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
   1016 	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
   1017 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
   1018 
   1019 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
   1020 
   1021 	/* 2: Enable RPS */
   1022 	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
   1023 			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
   1024 			      GEN6_RP_MEDIA_IS_GFX |
   1025 			      GEN6_RP_ENABLE |
   1026 			      GEN6_RP_UP_BUSY_AVG |
   1027 			      GEN6_RP_DOWN_IDLE_AVG);
   1028 
   1029 	/* Setting Fixed Bias */
   1030 	vlv_punit_get(i915);
   1031 
   1032 	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
   1033 	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
   1034 
   1035 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1036 
   1037 	vlv_punit_put(i915);
   1038 
   1039 	/* RPS code assumes GPLL is used */
   1040 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
   1041 
   1042 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
   1043 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
   1044 
   1045 	return rps_reset(rps);
   1046 }
   1047 
   1048 static int vlv_rps_guar_freq(struct intel_rps *rps)
   1049 {
   1050 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1051 	u32 val, rp1;
   1052 
   1053 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
   1054 
   1055 	rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
   1056 	rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
   1057 
   1058 	return rp1;
   1059 }
   1060 
   1061 static int vlv_rps_max_freq(struct intel_rps *rps)
   1062 {
   1063 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1064 	u32 val, rp0;
   1065 
   1066 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
   1067 
   1068 	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
   1069 	/* Clamp to max */
   1070 	rp0 = min_t(u32, rp0, 0xea);
   1071 
   1072 	return rp0;
   1073 }
   1074 
   1075 static int vlv_rps_rpe_freq(struct intel_rps *rps)
   1076 {
   1077 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1078 	u32 val, rpe;
   1079 
   1080 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
   1081 	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
   1082 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
   1083 	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
   1084 
   1085 	return rpe;
   1086 }
   1087 
   1088 static int vlv_rps_min_freq(struct intel_rps *rps)
   1089 {
   1090 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1091 	u32 val;
   1092 
   1093 	val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
   1094 	/*
   1095 	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
   1096 	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
   1097 	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
   1098 	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
   1099 	 * to make sure it matches what Punit accepts.
   1100 	 */
   1101 	return max_t(u32, val, 0xc0);
   1102 }
   1103 
   1104 static bool vlv_rps_enable(struct intel_rps *rps)
   1105 {
   1106 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1107 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1108 	u32 val;
   1109 
   1110 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
   1111 	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
   1112 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
   1113 	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
   1114 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
   1115 
   1116 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
   1117 
   1118 	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
   1119 			      GEN6_RP_MEDIA_TURBO |
   1120 			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
   1121 			      GEN6_RP_MEDIA_IS_GFX |
   1122 			      GEN6_RP_ENABLE |
   1123 			      GEN6_RP_UP_BUSY_AVG |
   1124 			      GEN6_RP_DOWN_IDLE_CONT);
   1125 
   1126 	vlv_punit_get(i915);
   1127 
   1128 	/* Setting Fixed Bias */
   1129 	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
   1130 	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
   1131 
   1132 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1133 
   1134 	vlv_punit_put(i915);
   1135 
   1136 	/* RPS code assumes GPLL is used */
   1137 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
   1138 
   1139 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
   1140 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
   1141 
   1142 	return rps_reset(rps);
   1143 }
   1144 
   1145 static unsigned long __ips_gfx_val(struct intel_ips *ips)
   1146 {
   1147 	struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
   1148 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1149 	unsigned long t, corr, state1, corr2, state2;
   1150 	u32 pxvid, ext_v;
   1151 
   1152 	lockdep_assert_held(&mchdev_lock);
   1153 
   1154 	pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
   1155 	pxvid = (pxvid >> 24) & 0x7f;
   1156 	ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
   1157 
   1158 	state1 = ext_v;
   1159 
   1160 	/* Revel in the empirically derived constants */
   1161 
   1162 	/* Correction factor in 1/100000 units */
   1163 	t = ips_mch_val(uncore);
   1164 	if (t > 80)
   1165 		corr = t * 2349 + 135940;
   1166 	else if (t >= 50)
   1167 		corr = t * 964 + 29317;
   1168 	else /* < 50 */
   1169 		corr = t * 301 + 1004;
   1170 
   1171 	corr = corr * 150142 * state1 / 10000 - 78642;
   1172 	corr /= 100000;
   1173 	corr2 = corr * ips->corr;
   1174 
   1175 	state2 = corr2 * state1 / 10000;
   1176 	state2 /= 100; /* convert to mW */
   1177 
   1178 	__gen5_ips_update(ips);
   1179 
   1180 	return ips->gfx_power + state2;
   1181 }
   1182 
   1183 void intel_rps_enable(struct intel_rps *rps)
   1184 {
   1185 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1186 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1187 
   1188 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
   1189 	if (IS_CHERRYVIEW(i915))
   1190 		rps->enabled = chv_rps_enable(rps);
   1191 	else if (IS_VALLEYVIEW(i915))
   1192 		rps->enabled = vlv_rps_enable(rps);
   1193 	else if (INTEL_GEN(i915) >= 9)
   1194 		rps->enabled = gen9_rps_enable(rps);
   1195 	else if (INTEL_GEN(i915) >= 8)
   1196 		rps->enabled = gen8_rps_enable(rps);
   1197 	else if (INTEL_GEN(i915) >= 6)
   1198 		rps->enabled = gen6_rps_enable(rps);
   1199 	else if (IS_IRONLAKE_M(i915))
   1200 		rps->enabled = gen5_rps_enable(rps);
   1201 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
   1202 	if (!rps->enabled)
   1203 		return;
   1204 
   1205 	WARN_ON(rps->max_freq < rps->min_freq);
   1206 	WARN_ON(rps->idle_freq > rps->max_freq);
   1207 
   1208 	WARN_ON(rps->efficient_freq < rps->min_freq);
   1209 	WARN_ON(rps->efficient_freq > rps->max_freq);
   1210 }
   1211 
   1212 static void gen6_rps_disable(struct intel_rps *rps)
   1213 {
   1214 	set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
   1215 }
   1216 
   1217 void intel_rps_disable(struct intel_rps *rps)
   1218 {
   1219 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1220 
   1221 	rps->enabled = false;
   1222 
   1223 	if (INTEL_GEN(i915) >= 6)
   1224 		gen6_rps_disable(rps);
   1225 	else if (IS_IRONLAKE_M(i915))
   1226 		gen5_rps_disable(rps);
   1227 }
   1228 
   1229 static int byt_gpu_freq(struct intel_rps *rps, int val)
   1230 {
   1231 	/*
   1232 	 * N = val - 0xb7
   1233 	 * Slow = Fast = GPLL ref * N
   1234 	 */
   1235 	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
   1236 }
   1237 
   1238 static int byt_freq_opcode(struct intel_rps *rps, int val)
   1239 {
   1240 	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
   1241 }
   1242 
   1243 static int chv_gpu_freq(struct intel_rps *rps, int val)
   1244 {
   1245 	/*
   1246 	 * N = val / 2
   1247 	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
   1248 	 */
   1249 	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
   1250 }
   1251 
   1252 static int chv_freq_opcode(struct intel_rps *rps, int val)
   1253 {
   1254 	/* CHV needs even values */
   1255 	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
   1256 }
   1257 
   1258 int intel_gpu_freq(struct intel_rps *rps, int val)
   1259 {
   1260 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1261 
   1262 	if (INTEL_GEN(i915) >= 9)
   1263 		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
   1264 					 GEN9_FREQ_SCALER);
   1265 	else if (IS_CHERRYVIEW(i915))
   1266 		return chv_gpu_freq(rps, val);
   1267 	else if (IS_VALLEYVIEW(i915))
   1268 		return byt_gpu_freq(rps, val);
   1269 	else
   1270 		return val * GT_FREQUENCY_MULTIPLIER;
   1271 }
   1272 
   1273 int intel_freq_opcode(struct intel_rps *rps, int val)
   1274 {
   1275 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1276 
   1277 	if (INTEL_GEN(i915) >= 9)
   1278 		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
   1279 					 GT_FREQUENCY_MULTIPLIER);
   1280 	else if (IS_CHERRYVIEW(i915))
   1281 		return chv_freq_opcode(rps, val);
   1282 	else if (IS_VALLEYVIEW(i915))
   1283 		return byt_freq_opcode(rps, val);
   1284 	else
   1285 		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
   1286 }
   1287 
   1288 static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
   1289 {
   1290 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1291 
   1292 	rps->gpll_ref_freq =
   1293 		vlv_get_cck_clock(i915, "GPLL ref",
   1294 				  CCK_GPLL_CLOCK_CONTROL,
   1295 				  i915->czclk_freq);
   1296 
   1297 	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
   1298 }
   1299 
   1300 static void vlv_rps_init(struct intel_rps *rps)
   1301 {
   1302 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1303 	u32 val;
   1304 
   1305 	vlv_iosf_sb_get(i915,
   1306 			BIT(VLV_IOSF_SB_PUNIT) |
   1307 			BIT(VLV_IOSF_SB_NC) |
   1308 			BIT(VLV_IOSF_SB_CCK));
   1309 
   1310 	vlv_init_gpll_ref_freq(rps);
   1311 
   1312 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1313 	switch ((val >> 6) & 3) {
   1314 	case 0:
   1315 	case 1:
   1316 		i915->mem_freq = 800;
   1317 		break;
   1318 	case 2:
   1319 		i915->mem_freq = 1066;
   1320 		break;
   1321 	case 3:
   1322 		i915->mem_freq = 1333;
   1323 		break;
   1324 	}
   1325 	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
   1326 
   1327 	rps->max_freq = vlv_rps_max_freq(rps);
   1328 	rps->rp0_freq = rps->max_freq;
   1329 	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
   1330 			 intel_gpu_freq(rps, rps->max_freq),
   1331 			 rps->max_freq);
   1332 
   1333 	rps->efficient_freq = vlv_rps_rpe_freq(rps);
   1334 	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
   1335 			 intel_gpu_freq(rps, rps->efficient_freq),
   1336 			 rps->efficient_freq);
   1337 
   1338 	rps->rp1_freq = vlv_rps_guar_freq(rps);
   1339 	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
   1340 			 intel_gpu_freq(rps, rps->rp1_freq),
   1341 			 rps->rp1_freq);
   1342 
   1343 	rps->min_freq = vlv_rps_min_freq(rps);
   1344 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
   1345 			 intel_gpu_freq(rps, rps->min_freq),
   1346 			 rps->min_freq);
   1347 
   1348 	vlv_iosf_sb_put(i915,
   1349 			BIT(VLV_IOSF_SB_PUNIT) |
   1350 			BIT(VLV_IOSF_SB_NC) |
   1351 			BIT(VLV_IOSF_SB_CCK));
   1352 }
   1353 
   1354 static void chv_rps_init(struct intel_rps *rps)
   1355 {
   1356 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1357 	u32 val;
   1358 
   1359 	vlv_iosf_sb_get(i915,
   1360 			BIT(VLV_IOSF_SB_PUNIT) |
   1361 			BIT(VLV_IOSF_SB_NC) |
   1362 			BIT(VLV_IOSF_SB_CCK));
   1363 
   1364 	vlv_init_gpll_ref_freq(rps);
   1365 
   1366 	val = vlv_cck_read(i915, CCK_FUSE_REG);
   1367 
   1368 	switch ((val >> 2) & 0x7) {
   1369 	case 3:
   1370 		i915->mem_freq = 2000;
   1371 		break;
   1372 	default:
   1373 		i915->mem_freq = 1600;
   1374 		break;
   1375 	}
   1376 	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
   1377 
   1378 	rps->max_freq = chv_rps_max_freq(rps);
   1379 	rps->rp0_freq = rps->max_freq;
   1380 	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
   1381 			 intel_gpu_freq(rps, rps->max_freq),
   1382 			 rps->max_freq);
   1383 
   1384 	rps->efficient_freq = chv_rps_rpe_freq(rps);
   1385 	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
   1386 			 intel_gpu_freq(rps, rps->efficient_freq),
   1387 			 rps->efficient_freq);
   1388 
   1389 	rps->rp1_freq = chv_rps_guar_freq(rps);
   1390 	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
   1391 			 intel_gpu_freq(rps, rps->rp1_freq),
   1392 			 rps->rp1_freq);
   1393 
   1394 	rps->min_freq = chv_rps_min_freq(rps);
   1395 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
   1396 			 intel_gpu_freq(rps, rps->min_freq),
   1397 			 rps->min_freq);
   1398 
   1399 	vlv_iosf_sb_put(i915,
   1400 			BIT(VLV_IOSF_SB_PUNIT) |
   1401 			BIT(VLV_IOSF_SB_NC) |
   1402 			BIT(VLV_IOSF_SB_CCK));
   1403 
   1404 	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
   1405 		   rps->min_freq) & 1,
   1406 		  "Odd GPU freq values\n");
   1407 }
   1408 
   1409 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
   1410 {
   1411 	ei->ktime = ktime_get_raw();
   1412 	ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
   1413 	ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
   1414 }
   1415 
   1416 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
   1417 {
   1418 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1419 	const struct intel_rps_ei *prev = &rps->ei;
   1420 	struct intel_rps_ei now;
   1421 	u32 events = 0;
   1422 
   1423 	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
   1424 		return 0;
   1425 
   1426 	vlv_c0_read(uncore, &now);
   1427 
   1428 	if (prev->ktime) {
   1429 		u64 time, c0;
   1430 		u32 render, media;
   1431 
   1432 		time = ktime_us_delta(now.ktime, prev->ktime);
   1433 
   1434 		time *= rps_to_i915(rps)->czclk_freq;
   1435 
   1436 		/* Workload can be split between render + media,
   1437 		 * e.g. SwapBuffers being blitted in X after being rendered in
   1438 		 * mesa. To account for this we need to combine both engines
   1439 		 * into our activity counter.
   1440 		 */
   1441 		render = now.render_c0 - prev->render_c0;
   1442 		media = now.media_c0 - prev->media_c0;
   1443 		c0 = max(render, media);
   1444 		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
   1445 
   1446 		if (c0 > time * rps->power.up_threshold)
   1447 			events = GEN6_PM_RP_UP_THRESHOLD;
   1448 		else if (c0 < time * rps->power.down_threshold)
   1449 			events = GEN6_PM_RP_DOWN_THRESHOLD;
   1450 	}
   1451 
   1452 	rps->ei = now;
   1453 	return events;
   1454 }
   1455 
   1456 static void rps_work(struct work_struct *work)
   1457 {
   1458 	struct intel_rps *rps = container_of(work, typeof(*rps), work);
   1459 	struct intel_gt *gt = rps_to_gt(rps);
   1460 	bool client_boost = false;
   1461 	int new_freq, adj, min, max;
   1462 	u32 pm_iir = 0;
   1463 
   1464 	spin_lock_irq(&gt->irq_lock);
   1465 	pm_iir = fetch_and_zero(&rps->pm_iir);
   1466 	client_boost = atomic_read(&rps->num_waiters);
   1467 	spin_unlock_irq(&gt->irq_lock);
   1468 
   1469 	/* Make sure we didn't queue anything we're not going to process. */
   1470 	if ((pm_iir & rps->pm_events) == 0 && !client_boost)
   1471 		goto out;
   1472 
   1473 	mutex_lock(&rps->lock);
   1474 
   1475 	pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
   1476 
   1477 	adj = rps->last_adj;
   1478 	new_freq = rps->cur_freq;
   1479 	min = rps->min_freq_softlimit;
   1480 	max = rps->max_freq_softlimit;
   1481 	if (client_boost)
   1482 		max = rps->max_freq;
   1483 	if (client_boost && new_freq < rps->boost_freq) {
   1484 		new_freq = rps->boost_freq;
   1485 		adj = 0;
   1486 	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
   1487 		if (adj > 0)
   1488 			adj *= 2;
   1489 		else /* CHV needs even encode values */
   1490 			adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
   1491 
   1492 		if (new_freq >= rps->max_freq_softlimit)
   1493 			adj = 0;
   1494 	} else if (client_boost) {
   1495 		adj = 0;
   1496 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
   1497 		if (rps->cur_freq > rps->efficient_freq)
   1498 			new_freq = rps->efficient_freq;
   1499 		else if (rps->cur_freq > rps->min_freq_softlimit)
   1500 			new_freq = rps->min_freq_softlimit;
   1501 		adj = 0;
   1502 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
   1503 		if (adj < 0)
   1504 			adj *= 2;
   1505 		else /* CHV needs even encode values */
   1506 			adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
   1507 
   1508 		if (new_freq <= rps->min_freq_softlimit)
   1509 			adj = 0;
   1510 	} else { /* unknown event */
   1511 		adj = 0;
   1512 	}
   1513 
   1514 	rps->last_adj = adj;
   1515 
   1516 	/*
   1517 	 * Limit deboosting and boosting to keep ourselves at the extremes
   1518 	 * when in the respective power modes (i.e. slowly decrease frequencies
   1519 	 * while in the HIGH_POWER zone and slowly increase frequencies while
   1520 	 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
   1521 	 * to the next level quickly, and conversely if busy we expect to
   1522 	 * hit a waitboost and rapidly switch into max power.
   1523 	 */
   1524 	if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
   1525 	    (adj > 0 && rps->power.mode == LOW_POWER))
   1526 		rps->last_adj = 0;
   1527 
   1528 	/* sysfs frequency interfaces may have snuck in while servicing the
   1529 	 * interrupt
   1530 	 */
   1531 	new_freq += adj;
   1532 	new_freq = clamp_t(int, new_freq, min, max);
   1533 
   1534 	if (intel_rps_set(rps, new_freq)) {
   1535 		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
   1536 		rps->last_adj = 0;
   1537 	}
   1538 
   1539 	mutex_unlock(&rps->lock);
   1540 
   1541 out:
   1542 	spin_lock_irq(&gt->irq_lock);
   1543 	gen6_gt_pm_unmask_irq(gt, rps->pm_events);
   1544 	spin_unlock_irq(&gt->irq_lock);
   1545 }
   1546 
   1547 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
   1548 {
   1549 	struct intel_gt *gt = rps_to_gt(rps);
   1550 	const u32 events = rps->pm_events & pm_iir;
   1551 
   1552 	lockdep_assert_held(&gt->irq_lock);
   1553 
   1554 	if (unlikely(!events))
   1555 		return;
   1556 
   1557 	gen6_gt_pm_mask_irq(gt, events);
   1558 
   1559 	rps->pm_iir |= events;
   1560 	schedule_work(&rps->work);
   1561 }
   1562 
   1563 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
   1564 {
   1565 	struct intel_gt *gt = rps_to_gt(rps);
   1566 
   1567 	if (pm_iir & rps->pm_events) {
   1568 		spin_lock(&gt->irq_lock);
   1569 		gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
   1570 		rps->pm_iir |= pm_iir & rps->pm_events;
   1571 		schedule_work(&rps->work);
   1572 		spin_unlock(&gt->irq_lock);
   1573 	}
   1574 
   1575 	if (INTEL_GEN(gt->i915) >= 8)
   1576 		return;
   1577 
   1578 	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
   1579 		intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
   1580 
   1581 	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
   1582 		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
   1583 }
   1584 
   1585 void gen5_rps_irq_handler(struct intel_rps *rps)
   1586 {
   1587 	struct intel_uncore *uncore = rps_to_uncore(rps);
   1588 	u32 busy_up, busy_down, max_avg, min_avg;
   1589 	u8 new_freq;
   1590 
   1591 	spin_lock(&mchdev_lock);
   1592 
   1593 	intel_uncore_write16(uncore,
   1594 			     MEMINTRSTS,
   1595 			     intel_uncore_read(uncore, MEMINTRSTS));
   1596 
   1597 	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
   1598 	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
   1599 	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
   1600 	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
   1601 	min_avg = intel_uncore_read(uncore, RCBMINAVG);
   1602 
   1603 	/* Handle RCS change request from hw */
   1604 	new_freq = rps->cur_freq;
   1605 	if (busy_up > max_avg)
   1606 		new_freq++;
   1607 	else if (busy_down < min_avg)
   1608 		new_freq--;
   1609 	new_freq = clamp(new_freq,
   1610 			 rps->min_freq_softlimit,
   1611 			 rps->max_freq_softlimit);
   1612 
   1613 	if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq))
   1614 		rps->cur_freq = new_freq;
   1615 
   1616 	spin_unlock(&mchdev_lock);
   1617 }
   1618 
   1619 void intel_rps_init_early(struct intel_rps *rps)
   1620 {
   1621 	mutex_init(&rps->lock);
   1622 	mutex_init(&rps->power.mutex);
   1623 
   1624 	INIT_WORK(&rps->work, rps_work);
   1625 
   1626 	atomic_set(&rps->num_waiters, 0);
   1627 }
   1628 
   1629 void intel_rps_init(struct intel_rps *rps)
   1630 {
   1631 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1632 
   1633 	if (IS_CHERRYVIEW(i915))
   1634 		chv_rps_init(rps);
   1635 	else if (IS_VALLEYVIEW(i915))
   1636 		vlv_rps_init(rps);
   1637 	else if (INTEL_GEN(i915) >= 6)
   1638 		gen6_rps_init(rps);
   1639 	else if (IS_IRONLAKE_M(i915))
   1640 		gen5_rps_init(rps);
   1641 
   1642 	/* Derive initial user preferences/limits from the hardware limits */
   1643 	rps->max_freq_softlimit = rps->max_freq;
   1644 	rps->min_freq_softlimit = rps->min_freq;
   1645 
   1646 	/* After setting max-softlimit, find the overclock max freq */
   1647 	if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
   1648 		u32 params = 0;
   1649 
   1650 		sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
   1651 				       &params, NULL);
   1652 		if (params & BIT(31)) { /* OC supported */
   1653 			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
   1654 					 (rps->max_freq & 0xff) * 50,
   1655 					 (params & 0xff) * 50);
   1656 			rps->max_freq = params & 0xff;
   1657 		}
   1658 	}
   1659 
   1660 	/* Finally allow us to boost to max by default */
   1661 	rps->boost_freq = rps->max_freq;
   1662 	rps->idle_freq = rps->min_freq;
   1663 	rps->cur_freq = rps->idle_freq;
   1664 
   1665 	rps->pm_intrmsk_mbz = 0;
   1666 
   1667 	/*
   1668 	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
   1669 	 * if GEN6_PM_UP_EI_EXPIRED is masked.
   1670 	 *
   1671 	 * TODO: verify if this can be reproduced on VLV,CHV.
   1672 	 */
   1673 	if (INTEL_GEN(i915) <= 7)
   1674 		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
   1675 
   1676 	if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
   1677 		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
   1678 }
   1679 
   1680 void intel_rps_fini(struct intel_rps *rps)
   1681 {
   1682 
   1683 	mutex_destroy(&rps->power.mutex);
   1684 	mutex_destroy(&rps->lock);
   1685 }
   1686 
   1687 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
   1688 {
   1689 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1690 	u32 cagf;
   1691 
   1692 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
   1693 		cagf = (rpstat >> 8) & 0xff;
   1694 	else if (INTEL_GEN(i915) >= 9)
   1695 		cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
   1696 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
   1697 		cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
   1698 	else
   1699 		cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
   1700 
   1701 	return cagf;
   1702 }
   1703 
   1704 static u32 read_cagf(struct intel_rps *rps)
   1705 {
   1706 	struct drm_i915_private *i915 = rps_to_i915(rps);
   1707 	u32 freq;
   1708 
   1709 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
   1710 		vlv_punit_get(i915);
   1711 		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
   1712 		vlv_punit_put(i915);
   1713 	} else {
   1714 		freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
   1715 	}
   1716 
   1717 	return intel_rps_get_cagf(rps, freq);
   1718 }
   1719 
   1720 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
   1721 {
   1722 	struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
   1723 	intel_wakeref_t wakeref;
   1724 	u32 freq = 0;
   1725 
   1726 	with_intel_runtime_pm_if_in_use(rpm, wakeref)
   1727 		freq = intel_gpu_freq(rps, read_cagf(rps));
   1728 
   1729 	return freq;
   1730 }
   1731 
   1732 /* External interface for intel_ips.ko */
   1733 
   1734 static struct drm_i915_private __rcu *ips_mchdev;
   1735 
   1736 /**
   1737  * Tells the intel_ips driver that the i915 driver is now loaded, if
   1738  * IPS got loaded first.
   1739  *
   1740  * This awkward dance is so that neither module has to depend on the
   1741  * other in order for IPS to do the appropriate communication of
   1742  * GPU turbo limits to i915.
   1743  */
   1744 static void
   1745 ips_ping_for_i915_load(void)
   1746 {
   1747 #ifndef __NetBSD__		/* XXX IPS GPU turbo limits what?  */
   1748 	void (*link)(void);
   1749 
   1750 	link = symbol_get(ips_link_to_i915_driver);
   1751 	if (link) {
   1752 		link();
   1753 		symbol_put(ips_link_to_i915_driver);
   1754 	}
   1755 #endif
   1756 }
   1757 
   1758 void intel_rps_driver_register(struct intel_rps *rps)
   1759 {
   1760 	struct intel_gt *gt = rps_to_gt(rps);
   1761 
   1762 	/*
   1763 	 * We only register the i915 ips part with intel-ips once everything is
   1764 	 * set up, to avoid intel-ips sneaking in and reading bogus values.
   1765 	 */
   1766 	if (IS_GEN(gt->i915, 5)) {
   1767 		GEM_BUG_ON(ips_mchdev);
   1768 		rcu_assign_pointer(ips_mchdev, gt->i915);
   1769 		ips_ping_for_i915_load();
   1770 	}
   1771 }
   1772 
   1773 void intel_rps_driver_unregister(struct intel_rps *rps)
   1774 {
   1775 	if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
   1776 		rcu_assign_pointer(ips_mchdev, NULL);
   1777 }
   1778 
   1779 static struct drm_i915_private *mchdev_get(void)
   1780 {
   1781 	struct drm_i915_private *i915;
   1782 
   1783 	rcu_read_lock();
   1784 	i915 = rcu_dereference(ips_mchdev);
   1785 	if (!kref_get_unless_zero(&i915->drm.ref))
   1786 		i915 = NULL;
   1787 	rcu_read_unlock();
   1788 
   1789 	return i915;
   1790 }
   1791 
   1792 /**
   1793  * i915_read_mch_val - return value for IPS use
   1794  *
   1795  * Calculate and return a value for the IPS driver to use when deciding whether
   1796  * we have thermal and power headroom to increase CPU or GPU power budget.
   1797  */
   1798 unsigned long i915_read_mch_val(void)
   1799 {
   1800 	struct drm_i915_private *i915;
   1801 	unsigned long chipset_val = 0;
   1802 	unsigned long graphics_val = 0;
   1803 	intel_wakeref_t wakeref;
   1804 
   1805 	i915 = mchdev_get();
   1806 	if (!i915)
   1807 		return 0;
   1808 
   1809 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
   1810 		struct intel_ips *ips = &i915->gt.rps.ips;
   1811 
   1812 		spin_lock_irq(&mchdev_lock);
   1813 		chipset_val = __ips_chipset_val(ips);
   1814 		graphics_val = __ips_gfx_val(ips);
   1815 		spin_unlock_irq(&mchdev_lock);
   1816 	}
   1817 
   1818 	drm_dev_put(&i915->drm);
   1819 	return chipset_val + graphics_val;
   1820 }
   1821 EXPORT_SYMBOL_GPL(i915_read_mch_val);
   1822 
   1823 /**
   1824  * i915_gpu_raise - raise GPU frequency limit
   1825  *
   1826  * Raise the limit; IPS indicates we have thermal headroom.
   1827  */
   1828 bool i915_gpu_raise(void)
   1829 {
   1830 	struct drm_i915_private *i915;
   1831 	struct intel_rps *rps;
   1832 
   1833 	i915 = mchdev_get();
   1834 	if (!i915)
   1835 		return false;
   1836 
   1837 	rps = &i915->gt.rps;
   1838 
   1839 	spin_lock_irq(&mchdev_lock);
   1840 	if (rps->max_freq_softlimit < rps->max_freq)
   1841 		rps->max_freq_softlimit++;
   1842 	spin_unlock_irq(&mchdev_lock);
   1843 
   1844 	drm_dev_put(&i915->drm);
   1845 	return true;
   1846 }
   1847 EXPORT_SYMBOL_GPL(i915_gpu_raise);
   1848 
   1849 /**
   1850  * i915_gpu_lower - lower GPU frequency limit
   1851  *
   1852  * IPS indicates we're close to a thermal limit, so throttle back the GPU
   1853  * frequency maximum.
   1854  */
   1855 bool i915_gpu_lower(void)
   1856 {
   1857 	struct drm_i915_private *i915;
   1858 	struct intel_rps *rps;
   1859 
   1860 	i915 = mchdev_get();
   1861 	if (!i915)
   1862 		return false;
   1863 
   1864 	rps = &i915->gt.rps;
   1865 
   1866 	spin_lock_irq(&mchdev_lock);
   1867 	if (rps->max_freq_softlimit > rps->min_freq)
   1868 		rps->max_freq_softlimit--;
   1869 	spin_unlock_irq(&mchdev_lock);
   1870 
   1871 	drm_dev_put(&i915->drm);
   1872 	return true;
   1873 }
   1874 EXPORT_SYMBOL_GPL(i915_gpu_lower);
   1875 
   1876 /**
   1877  * i915_gpu_busy - indicate GPU business to IPS
   1878  *
   1879  * Tell the IPS driver whether or not the GPU is busy.
   1880  */
   1881 bool i915_gpu_busy(void)
   1882 {
   1883 	struct drm_i915_private *i915;
   1884 	bool ret;
   1885 
   1886 	i915 = mchdev_get();
   1887 	if (!i915)
   1888 		return false;
   1889 
   1890 	ret = i915->gt.awake;
   1891 
   1892 	drm_dev_put(&i915->drm);
   1893 	return ret;
   1894 }
   1895 EXPORT_SYMBOL_GPL(i915_gpu_busy);
   1896 
   1897 /**
   1898  * i915_gpu_turbo_disable - disable graphics turbo
   1899  *
   1900  * Disable graphics turbo by resetting the max frequency and setting the
   1901  * current frequency to the default.
   1902  */
   1903 bool i915_gpu_turbo_disable(void)
   1904 {
   1905 	struct drm_i915_private *i915;
   1906 	struct intel_rps *rps;
   1907 	bool ret;
   1908 
   1909 	i915 = mchdev_get();
   1910 	if (!i915)
   1911 		return false;
   1912 
   1913 	rps = &i915->gt.rps;
   1914 
   1915 	spin_lock_irq(&mchdev_lock);
   1916 	rps->max_freq_softlimit = rps->min_freq;
   1917 	ret = gen5_rps_set(&i915->gt.rps, rps->min_freq);
   1918 	spin_unlock_irq(&mchdev_lock);
   1919 
   1920 	drm_dev_put(&i915->drm);
   1921 	return ret;
   1922 }
   1923 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
   1924