intel_pm.c revision 1.18 1 /* $NetBSD: intel_pm.c,v 1.18 2019/08/05 10:14:19 msaitoh Exp $ */
2
3 /*
4 * Copyright 2012 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 *
25 * Authors:
26 * Eugeni Dodonov <eugeni.dodonov (at) intel.com>
27 *
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: intel_pm.c,v 1.18 2019/08/05 10:14:19 msaitoh Exp $");
32
33 #include <linux/bitops.h>
34 #include <linux/cpufreq.h>
35 #include <linux/export.h>
36 #include "i915_drv.h"
37 #include "i915_trace.h"
38 #include "intel_drv.h"
39 #ifndef __NetBSD__
40 #include "../../../platform/x86/intel_ips.h"
41 #endif
42 #include <linux/module.h>
43 #include <linux/log2.h>
44 #include <linux/math64.h>
45 #include <linux/time.h>
46
47 /**
48 * RC6 is a special power stage which allows the GPU to enter an very
49 * low-voltage mode when idle, using down to 0V while at this stage. This
50 * stage is entered automatically when the GPU is idle when RC6 support is
51 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
52 *
53 * There are different RC6 modes available in Intel GPU, which differentiate
54 * among each other with the latency required to enter and leave RC6 and
55 * voltage consumed by the GPU in different states.
56 *
57 * The combination of the following flags define which states GPU is allowed
58 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
59 * RC6pp is deepest RC6. Their support by hardware varies according to the
60 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
61 * which brings the most power savings; deeper states save more power, but
62 * require higher latency to switch to and wake up.
63 */
64 #define INTEL_RC6_ENABLE (1<<0)
65 #define INTEL_RC6p_ENABLE (1<<1)
66 #define INTEL_RC6pp_ENABLE (1<<2)
67
68 static void bxt_init_clock_gating(struct drm_device *dev)
69 {
70 struct drm_i915_private *dev_priv = dev->dev_private;
71
72 /* WaDisableSDEUnitClockGating:bxt */
73 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
74 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
75
76 /*
77 * FIXME:
78 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
79 */
80 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
81 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
82 }
83
84 static void i915_pineview_get_mem_freq(struct drm_device *dev)
85 {
86 struct drm_i915_private *dev_priv = dev->dev_private;
87 u32 tmp;
88
89 tmp = I915_READ(CLKCFG);
90
91 switch (tmp & CLKCFG_FSB_MASK) {
92 case CLKCFG_FSB_533:
93 dev_priv->fsb_freq = 533; /* 133*4 */
94 break;
95 case CLKCFG_FSB_800:
96 dev_priv->fsb_freq = 800; /* 200*4 */
97 break;
98 case CLKCFG_FSB_667:
99 dev_priv->fsb_freq = 667; /* 167*4 */
100 break;
101 case CLKCFG_FSB_400:
102 dev_priv->fsb_freq = 400; /* 100*4 */
103 break;
104 }
105
106 switch (tmp & CLKCFG_MEM_MASK) {
107 case CLKCFG_MEM_533:
108 dev_priv->mem_freq = 533;
109 break;
110 case CLKCFG_MEM_667:
111 dev_priv->mem_freq = 667;
112 break;
113 case CLKCFG_MEM_800:
114 dev_priv->mem_freq = 800;
115 break;
116 }
117
118 /* detect pineview DDR3 setting */
119 tmp = I915_READ(CSHRDDR3CTL);
120 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
121 }
122
123 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
124 {
125 struct drm_i915_private *dev_priv = dev->dev_private;
126 u16 ddrpll, csipll;
127
128 ddrpll = I915_READ16(DDRMPLL1);
129 csipll = I915_READ16(CSIPLL0);
130
131 switch (ddrpll & 0xff) {
132 case 0xc:
133 dev_priv->mem_freq = 800;
134 break;
135 case 0x10:
136 dev_priv->mem_freq = 1066;
137 break;
138 case 0x14:
139 dev_priv->mem_freq = 1333;
140 break;
141 case 0x18:
142 dev_priv->mem_freq = 1600;
143 break;
144 default:
145 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
146 ddrpll & 0xff);
147 dev_priv->mem_freq = 0;
148 break;
149 }
150
151 dev_priv->ips.r_t = dev_priv->mem_freq;
152
153 switch (csipll & 0x3ff) {
154 case 0x00c:
155 dev_priv->fsb_freq = 3200;
156 break;
157 case 0x00e:
158 dev_priv->fsb_freq = 3733;
159 break;
160 case 0x010:
161 dev_priv->fsb_freq = 4266;
162 break;
163 case 0x012:
164 dev_priv->fsb_freq = 4800;
165 break;
166 case 0x014:
167 dev_priv->fsb_freq = 5333;
168 break;
169 case 0x016:
170 dev_priv->fsb_freq = 5866;
171 break;
172 case 0x018:
173 dev_priv->fsb_freq = 6400;
174 break;
175 default:
176 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
177 csipll & 0x3ff);
178 dev_priv->fsb_freq = 0;
179 break;
180 }
181
182 if (dev_priv->fsb_freq == 3200) {
183 dev_priv->ips.c_m = 0;
184 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
185 dev_priv->ips.c_m = 1;
186 } else {
187 dev_priv->ips.c_m = 2;
188 }
189 }
190
191 static const struct cxsr_latency cxsr_latency_table[] = {
192 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
193 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
194 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
195 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
196 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
197
198 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
199 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
200 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
201 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
202 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
203
204 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
205 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
206 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
207 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
208 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
209
210 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
211 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
212 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
213 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
214 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
215
216 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
217 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
218 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
219 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
220 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
221
222 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
223 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
224 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
225 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
226 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
227 };
228
229 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
230 int is_ddr3,
231 int fsb,
232 int mem)
233 {
234 const struct cxsr_latency *latency;
235 int i;
236
237 if (fsb == 0 || mem == 0)
238 return NULL;
239
240 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
241 latency = &cxsr_latency_table[i];
242 if (is_desktop == latency->is_desktop &&
243 is_ddr3 == latency->is_ddr3 &&
244 fsb == latency->fsb_freq && mem == latency->mem_freq)
245 return latency;
246 }
247
248 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
249
250 return NULL;
251 }
252
253 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
254 {
255 u32 val;
256
257 mutex_lock(&dev_priv->rps.hw_lock);
258
259 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
260 if (enable)
261 val &= ~FORCE_DDR_HIGH_FREQ;
262 else
263 val |= FORCE_DDR_HIGH_FREQ;
264 val &= ~FORCE_DDR_LOW_FREQ;
265 val |= FORCE_DDR_FREQ_REQ_ACK;
266 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
267
268 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
269 FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
270 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
271
272 mutex_unlock(&dev_priv->rps.hw_lock);
273 }
274
275 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
276 {
277 u32 val;
278
279 mutex_lock(&dev_priv->rps.hw_lock);
280
281 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
282 if (enable)
283 val |= DSP_MAXFIFO_PM5_ENABLE;
284 else
285 val &= ~DSP_MAXFIFO_PM5_ENABLE;
286 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
287
288 mutex_unlock(&dev_priv->rps.hw_lock);
289 }
290
291 #define FW_WM(value, plane) \
292 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
293
294 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
295 {
296 struct drm_device *dev = dev_priv->dev;
297 u32 val;
298
299 if (IS_VALLEYVIEW(dev)) {
300 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
301 POSTING_READ(FW_BLC_SELF_VLV);
302 dev_priv->wm.vlv.cxsr = enable;
303 } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
304 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
305 POSTING_READ(FW_BLC_SELF);
306 } else if (IS_PINEVIEW(dev)) {
307 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
308 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
309 I915_WRITE(DSPFW3, val);
310 POSTING_READ(DSPFW3);
311 } else if (IS_I945G(dev) || IS_I945GM(dev)) {
312 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
313 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
314 I915_WRITE(FW_BLC_SELF, val);
315 POSTING_READ(FW_BLC_SELF);
316 } else if (IS_I915GM(dev)) {
317 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
318 _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
319 I915_WRITE(INSTPM, val);
320 POSTING_READ(INSTPM);
321 } else {
322 return;
323 }
324
325 DRM_DEBUG_KMS("memory self-refresh is %s\n",
326 enable ? "enabled" : "disabled");
327 }
328
329
330 /*
331 * Latency for FIFO fetches is dependent on several factors:
332 * - memory configuration (speed, channels)
333 * - chipset
334 * - current MCH state
335 * It can be fairly high in some situations, so here we assume a fairly
336 * pessimal value. It's a tradeoff between extra memory fetches (if we
337 * set this value too high, the FIFO will fetch frequently to stay full)
338 * and power consumption (set it too low to save power and we might see
339 * FIFO underruns and display "flicker").
340 *
341 * A value of 5us seems to be a good balance; safe for very low end
342 * platforms but not overly aggressive on lower latency configs.
343 */
344 static const int pessimal_latency_ns = 5000;
345
346 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
347 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
348
349 static int vlv_get_fifo_size(struct drm_device *dev,
350 enum i915_pipe pipe, int plane)
351 {
352 struct drm_i915_private *dev_priv = dev->dev_private;
353 int sprite0_start, sprite1_start, size;
354
355 switch (pipe) {
356 uint32_t dsparb, dsparb2, dsparb3;
357 case PIPE_A:
358 dsparb = I915_READ(DSPARB);
359 dsparb2 = I915_READ(DSPARB2);
360 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
361 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
362 break;
363 case PIPE_B:
364 dsparb = I915_READ(DSPARB);
365 dsparb2 = I915_READ(DSPARB2);
366 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
367 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
368 break;
369 case PIPE_C:
370 dsparb2 = I915_READ(DSPARB2);
371 dsparb3 = I915_READ(DSPARB3);
372 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
373 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
374 break;
375 default:
376 return 0;
377 }
378
379 switch (plane) {
380 case 0:
381 size = sprite0_start;
382 break;
383 case 1:
384 size = sprite1_start - sprite0_start;
385 break;
386 case 2:
387 size = 512 - 1 - sprite1_start;
388 break;
389 default:
390 return 0;
391 }
392
393 DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n",
394 pipe_name(pipe), plane == 0 ? "primary" : "sprite",
395 plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1),
396 size);
397
398 return size;
399 }
400
401 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
402 {
403 struct drm_i915_private *dev_priv = dev->dev_private;
404 uint32_t dsparb = I915_READ(DSPARB);
405 int size;
406
407 size = dsparb & 0x7f;
408 if (plane)
409 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
410
411 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
412 plane ? "B" : "A", size);
413
414 return size;
415 }
416
417 static int i830_get_fifo_size(struct drm_device *dev, int plane)
418 {
419 struct drm_i915_private *dev_priv = dev->dev_private;
420 uint32_t dsparb = I915_READ(DSPARB);
421 int size;
422
423 size = dsparb & 0x1ff;
424 if (plane)
425 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
426 size >>= 1; /* Convert to cachelines */
427
428 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
429 plane ? "B" : "A", size);
430
431 return size;
432 }
433
434 static int i845_get_fifo_size(struct drm_device *dev, int plane)
435 {
436 struct drm_i915_private *dev_priv = dev->dev_private;
437 uint32_t dsparb = I915_READ(DSPARB);
438 int size;
439
440 size = dsparb & 0x7f;
441 size >>= 2; /* Convert to cachelines */
442
443 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
444 plane ? "B" : "A",
445 size);
446
447 return size;
448 }
449
450 /* Pineview has different values for various configs */
451 static const struct intel_watermark_params pineview_display_wm = {
452 .fifo_size = PINEVIEW_DISPLAY_FIFO,
453 .max_wm = PINEVIEW_MAX_WM,
454 .default_wm = PINEVIEW_DFT_WM,
455 .guard_size = PINEVIEW_GUARD_WM,
456 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
457 };
458 static const struct intel_watermark_params pineview_display_hplloff_wm = {
459 .fifo_size = PINEVIEW_DISPLAY_FIFO,
460 .max_wm = PINEVIEW_MAX_WM,
461 .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
462 .guard_size = PINEVIEW_GUARD_WM,
463 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
464 };
465 static const struct intel_watermark_params pineview_cursor_wm = {
466 .fifo_size = PINEVIEW_CURSOR_FIFO,
467 .max_wm = PINEVIEW_CURSOR_MAX_WM,
468 .default_wm = PINEVIEW_CURSOR_DFT_WM,
469 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
470 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
471 };
472 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
473 .fifo_size = PINEVIEW_CURSOR_FIFO,
474 .max_wm = PINEVIEW_CURSOR_MAX_WM,
475 .default_wm = PINEVIEW_CURSOR_DFT_WM,
476 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
477 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
478 };
479 static const struct intel_watermark_params g4x_wm_info = {
480 .fifo_size = G4X_FIFO_SIZE,
481 .max_wm = G4X_MAX_WM,
482 .default_wm = G4X_MAX_WM,
483 .guard_size = 2,
484 .cacheline_size = G4X_FIFO_LINE_SIZE,
485 };
486 static const struct intel_watermark_params g4x_cursor_wm_info = {
487 .fifo_size = I965_CURSOR_FIFO,
488 .max_wm = I965_CURSOR_MAX_WM,
489 .default_wm = I965_CURSOR_DFT_WM,
490 .guard_size = 2,
491 .cacheline_size = G4X_FIFO_LINE_SIZE,
492 };
493 static const struct intel_watermark_params valleyview_wm_info __unused = {
494 .fifo_size = VALLEYVIEW_FIFO_SIZE,
495 .max_wm = VALLEYVIEW_MAX_WM,
496 .default_wm = VALLEYVIEW_MAX_WM,
497 .guard_size = 2,
498 .cacheline_size = G4X_FIFO_LINE_SIZE,
499 };
500 static const struct intel_watermark_params valleyview_cursor_wm_info __unused = {
501 .fifo_size = I965_CURSOR_FIFO,
502 .max_wm = VALLEYVIEW_CURSOR_MAX_WM,
503 .default_wm = I965_CURSOR_DFT_WM,
504 .guard_size = 2,
505 .cacheline_size = G4X_FIFO_LINE_SIZE,
506 };
507 static const struct intel_watermark_params i965_cursor_wm_info = {
508 .fifo_size = I965_CURSOR_FIFO,
509 .max_wm = I965_CURSOR_MAX_WM,
510 .default_wm = I965_CURSOR_DFT_WM,
511 .guard_size = 2,
512 .cacheline_size = I915_FIFO_LINE_SIZE,
513 };
514 static const struct intel_watermark_params i945_wm_info = {
515 .fifo_size = I945_FIFO_SIZE,
516 .max_wm = I915_MAX_WM,
517 .default_wm = 1,
518 .guard_size = 2,
519 .cacheline_size = I915_FIFO_LINE_SIZE,
520 };
521 static const struct intel_watermark_params i915_wm_info = {
522 .fifo_size = I915_FIFO_SIZE,
523 .max_wm = I915_MAX_WM,
524 .default_wm = 1,
525 .guard_size = 2,
526 .cacheline_size = I915_FIFO_LINE_SIZE,
527 };
528 static const struct intel_watermark_params i830_a_wm_info = {
529 .fifo_size = I855GM_FIFO_SIZE,
530 .max_wm = I915_MAX_WM,
531 .default_wm = 1,
532 .guard_size = 2,
533 .cacheline_size = I830_FIFO_LINE_SIZE,
534 };
535 static const struct intel_watermark_params i830_bc_wm_info = {
536 .fifo_size = I855GM_FIFO_SIZE,
537 .max_wm = I915_MAX_WM/2,
538 .default_wm = 1,
539 .guard_size = 2,
540 .cacheline_size = I830_FIFO_LINE_SIZE,
541 };
542 static const struct intel_watermark_params i845_wm_info = {
543 .fifo_size = I830_FIFO_SIZE,
544 .max_wm = I915_MAX_WM,
545 .default_wm = 1,
546 .guard_size = 2,
547 .cacheline_size = I830_FIFO_LINE_SIZE,
548 };
549
550 /**
551 * intel_calculate_wm - calculate watermark level
552 * @clock_in_khz: pixel clock
553 * @wm: chip FIFO params
554 * @pixel_size: display pixel size
555 * @latency_ns: memory latency for the platform
556 *
557 * Calculate the watermark level (the level at which the display plane will
558 * start fetching from memory again). Each chip has a different display
559 * FIFO size and allocation, so the caller needs to figure that out and pass
560 * in the correct intel_watermark_params structure.
561 *
562 * As the pixel clock runs, the FIFO will be drained at a rate that depends
563 * on the pixel size. When it reaches the watermark level, it'll start
564 * fetching FIFO line sized based chunks from memory until the FIFO fills
565 * past the watermark point. If the FIFO drains completely, a FIFO underrun
566 * will occur, and a display engine hang could result.
567 */
568 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
569 const struct intel_watermark_params *wm,
570 int fifo_size,
571 int pixel_size,
572 unsigned long latency_ns)
573 {
574 long entries_required, wm_size;
575
576 /*
577 * Note: we need to make sure we don't overflow for various clock &
578 * latency values.
579 * clocks go from a few thousand to several hundred thousand.
580 * latency is usually a few thousand
581 */
582 entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
583 1000;
584 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
585
586 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
587
588 wm_size = fifo_size - (entries_required + wm->guard_size);
589
590 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
591
592 /* Don't promote wm_size to unsigned... */
593 if (wm_size > (long)wm->max_wm)
594 wm_size = wm->max_wm;
595 if (wm_size <= 0)
596 wm_size = wm->default_wm;
597
598 /*
599 * Bspec seems to indicate that the value shouldn't be lower than
600 * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
601 * Lets go for 8 which is the burst size since certain platforms
602 * already use a hardcoded 8 (which is what the spec says should be
603 * done).
604 */
605 if (wm_size <= 8)
606 wm_size = 8;
607
608 return wm_size;
609 }
610
611 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
612 {
613 struct drm_crtc *crtc, *enabled = NULL;
614
615 for_each_crtc(dev, crtc) {
616 if (intel_crtc_active(crtc)) {
617 if (enabled)
618 return NULL;
619 enabled = crtc;
620 }
621 }
622
623 return enabled;
624 }
625
626 static void pineview_update_wm(struct drm_crtc *unused_crtc)
627 {
628 struct drm_device *dev = unused_crtc->dev;
629 struct drm_i915_private *dev_priv = dev->dev_private;
630 struct drm_crtc *crtc;
631 const struct cxsr_latency *latency;
632 u32 reg;
633 unsigned long wm;
634
635 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
636 dev_priv->fsb_freq, dev_priv->mem_freq);
637 if (!latency) {
638 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
639 intel_set_memory_cxsr(dev_priv, false);
640 return;
641 }
642
643 crtc = single_enabled_crtc(dev);
644 if (crtc) {
645 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
646 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
647 int clock = adjusted_mode->crtc_clock;
648
649 /* Display SR */
650 wm = intel_calculate_wm(clock, &pineview_display_wm,
651 pineview_display_wm.fifo_size,
652 pixel_size, latency->display_sr);
653 reg = I915_READ(DSPFW1);
654 reg &= ~DSPFW_SR_MASK;
655 reg |= FW_WM(wm, SR);
656 I915_WRITE(DSPFW1, reg);
657 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
658
659 /* cursor SR */
660 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
661 pineview_display_wm.fifo_size,
662 pixel_size, latency->cursor_sr);
663 reg = I915_READ(DSPFW3);
664 reg &= ~DSPFW_CURSOR_SR_MASK;
665 reg |= FW_WM(wm, CURSOR_SR);
666 I915_WRITE(DSPFW3, reg);
667
668 /* Display HPLL off SR */
669 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
670 pineview_display_hplloff_wm.fifo_size,
671 pixel_size, latency->display_hpll_disable);
672 reg = I915_READ(DSPFW3);
673 reg &= ~DSPFW_HPLL_SR_MASK;
674 reg |= FW_WM(wm, HPLL_SR);
675 I915_WRITE(DSPFW3, reg);
676
677 /* cursor HPLL off SR */
678 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
679 pineview_display_hplloff_wm.fifo_size,
680 pixel_size, latency->cursor_hpll_disable);
681 reg = I915_READ(DSPFW3);
682 reg &= ~DSPFW_HPLL_CURSOR_MASK;
683 reg |= FW_WM(wm, HPLL_CURSOR);
684 I915_WRITE(DSPFW3, reg);
685 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
686
687 intel_set_memory_cxsr(dev_priv, true);
688 } else {
689 intel_set_memory_cxsr(dev_priv, false);
690 }
691 }
692
693 static bool g4x_compute_wm0(struct drm_device *dev,
694 int plane,
695 const struct intel_watermark_params *display,
696 int display_latency_ns,
697 const struct intel_watermark_params *cursor,
698 int cursor_latency_ns,
699 int *plane_wm,
700 int *cursor_wm)
701 {
702 struct drm_crtc *crtc;
703 const struct drm_display_mode *adjusted_mode;
704 int htotal, hdisplay, clock, pixel_size;
705 int line_time_us, line_count;
706 int entries, tlb_miss;
707
708 crtc = intel_get_crtc_for_plane(dev, plane);
709 if (!intel_crtc_active(crtc)) {
710 *cursor_wm = cursor->guard_size;
711 *plane_wm = display->guard_size;
712 return false;
713 }
714
715 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
716 clock = adjusted_mode->crtc_clock;
717 htotal = adjusted_mode->crtc_htotal;
718 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
719 pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
720
721 /* Use the small buffer method to calculate plane watermark */
722 entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
723 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
724 if (tlb_miss > 0)
725 entries += tlb_miss;
726 entries = DIV_ROUND_UP(entries, display->cacheline_size);
727 *plane_wm = entries + display->guard_size;
728 if (*plane_wm > (int)display->max_wm)
729 *plane_wm = display->max_wm;
730
731 /* Use the large buffer method to calculate cursor watermark */
732 line_time_us = max(htotal * 1000 / clock, 1);
733 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
734 entries = line_count * crtc->cursor->state->crtc_w * pixel_size;
735 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
736 if (tlb_miss > 0)
737 entries += tlb_miss;
738 entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
739 *cursor_wm = entries + cursor->guard_size;
740 if (*cursor_wm > (int)cursor->max_wm)
741 *cursor_wm = (int)cursor->max_wm;
742
743 return true;
744 }
745
746 /*
747 * Check the wm result.
748 *
749 * If any calculated watermark values is larger than the maximum value that
750 * can be programmed into the associated watermark register, that watermark
751 * must be disabled.
752 */
753 static bool g4x_check_srwm(struct drm_device *dev,
754 int display_wm, int cursor_wm,
755 const struct intel_watermark_params *display,
756 const struct intel_watermark_params *cursor)
757 {
758 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
759 display_wm, cursor_wm);
760
761 if (display_wm > display->max_wm) {
762 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
763 display_wm, display->max_wm);
764 return false;
765 }
766
767 if (cursor_wm > cursor->max_wm) {
768 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
769 cursor_wm, cursor->max_wm);
770 return false;
771 }
772
773 if (!(display_wm || cursor_wm)) {
774 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
775 return false;
776 }
777
778 return true;
779 }
780
781 static bool g4x_compute_srwm(struct drm_device *dev,
782 int plane,
783 int latency_ns,
784 const struct intel_watermark_params *display,
785 const struct intel_watermark_params *cursor,
786 int *display_wm, int *cursor_wm)
787 {
788 struct drm_crtc *crtc;
789 const struct drm_display_mode *adjusted_mode;
790 int hdisplay, htotal, pixel_size, clock;
791 unsigned long line_time_us;
792 int line_count, line_size;
793 int small, large;
794 int entries;
795
796 if (!latency_ns) {
797 *display_wm = *cursor_wm = 0;
798 return false;
799 }
800
801 crtc = intel_get_crtc_for_plane(dev, plane);
802 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
803 clock = adjusted_mode->crtc_clock;
804 htotal = adjusted_mode->crtc_htotal;
805 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
806 pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
807
808 line_time_us = max(htotal * 1000 / clock, 1);
809 line_count = (latency_ns / line_time_us + 1000) / 1000;
810 line_size = hdisplay * pixel_size;
811
812 /* Use the minimum of the small and large buffer method for primary */
813 small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
814 large = line_count * line_size;
815
816 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
817 *display_wm = entries + display->guard_size;
818
819 /* calculate the self-refresh watermark for display cursor */
820 entries = line_count * pixel_size * crtc->cursor->state->crtc_w;
821 entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
822 *cursor_wm = entries + cursor->guard_size;
823
824 return g4x_check_srwm(dev,
825 *display_wm, *cursor_wm,
826 display, cursor);
827 }
828
829 #define FW_WM_VLV(value, plane) \
830 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
831
832 static void vlv_write_wm_values(struct intel_crtc *crtc,
833 const struct vlv_wm_values *wm)
834 {
835 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
836 enum i915_pipe pipe = crtc->pipe;
837
838 I915_WRITE(VLV_DDL(pipe),
839 (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) |
840 (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) |
841 (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) |
842 (wm->ddl[pipe].primary << DDL_PLANE_SHIFT));
843
844 I915_WRITE(DSPFW1,
845 FW_WM(wm->sr.plane, SR) |
846 FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) |
847 FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) |
848 FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA));
849 I915_WRITE(DSPFW2,
850 FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) |
851 FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) |
852 FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA));
853 I915_WRITE(DSPFW3,
854 FW_WM(wm->sr.cursor, CURSOR_SR));
855
856 if (IS_CHERRYVIEW(dev_priv)) {
857 I915_WRITE(DSPFW7_CHV,
858 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
859 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
860 I915_WRITE(DSPFW8_CHV,
861 FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) |
862 FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE));
863 I915_WRITE(DSPFW9_CHV,
864 FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) |
865 FW_WM(wm->pipe[PIPE_C].cursor, CURSORC));
866 I915_WRITE(DSPHOWM,
867 FW_WM(wm->sr.plane >> 9, SR_HI) |
868 FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) |
869 FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) |
870 FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) |
871 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
872 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
873 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
874 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
875 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
876 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
877 } else {
878 I915_WRITE(DSPFW7,
879 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
880 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
881 I915_WRITE(DSPHOWM,
882 FW_WM(wm->sr.plane >> 9, SR_HI) |
883 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
884 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
885 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
886 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
887 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
888 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
889 }
890
891 /* zero (unused) WM1 watermarks */
892 I915_WRITE(DSPFW4, 0);
893 I915_WRITE(DSPFW5, 0);
894 I915_WRITE(DSPFW6, 0);
895 I915_WRITE(DSPHOWM1, 0);
896
897 POSTING_READ(DSPFW1);
898 }
899
900 #undef FW_WM_VLV
901
902 enum vlv_wm_level {
903 VLV_WM_LEVEL_PM2,
904 VLV_WM_LEVEL_PM5,
905 VLV_WM_LEVEL_DDR_DVFS,
906 };
907
908 /* latency must be in 0.1us units. */
909 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
910 unsigned int pipe_htotal,
911 unsigned int horiz_pixels,
912 unsigned int bytes_per_pixel,
913 unsigned int latency)
914 {
915 unsigned int ret;
916
917 ret = (latency * pixel_rate) / (pipe_htotal * 10000);
918 ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
919 ret = DIV_ROUND_UP(ret, 64);
920
921 return ret;
922 }
923
924 static void vlv_setup_wm_latency(struct drm_device *dev)
925 {
926 struct drm_i915_private *dev_priv = dev->dev_private;
927
928 /* all latencies in usec */
929 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
930
931 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
932
933 if (IS_CHERRYVIEW(dev_priv)) {
934 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
935 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
936
937 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
938 }
939 }
940
941 static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
942 struct intel_crtc *crtc,
943 const struct intel_plane_state *state,
944 int level)
945 {
946 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
947 int clock, htotal, pixel_size, width, wm;
948
949 if (dev_priv->wm.pri_latency[level] == 0)
950 return USHRT_MAX;
951
952 if (!state->visible)
953 return 0;
954
955 pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
956 clock = crtc->config->base.adjusted_mode.crtc_clock;
957 htotal = crtc->config->base.adjusted_mode.crtc_htotal;
958 width = crtc->config->pipe_src_w;
959 if (WARN_ON(htotal == 0))
960 htotal = 1;
961
962 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
963 /*
964 * FIXME the formula gives values that are
965 * too big for the cursor FIFO, and hence we
966 * would never be able to use cursors. For
967 * now just hardcode the watermark.
968 */
969 wm = 63;
970 } else {
971 wm = vlv_wm_method2(clock, htotal, width, pixel_size,
972 dev_priv->wm.pri_latency[level] * 10);
973 }
974
975 return min_t(int, wm, USHRT_MAX);
976 }
977
978 static void vlv_compute_fifo(struct intel_crtc *crtc)
979 {
980 struct drm_device *dev = crtc->base.dev;
981 struct vlv_wm_state *wm_state = &crtc->wm_state;
982 struct intel_plane *plane;
983 unsigned int total_rate = 0;
984 const int fifo_size = 512 - 1;
985 int fifo_extra, fifo_left = fifo_size;
986
987 for_each_intel_plane_on_crtc(dev, crtc, plane) {
988 struct intel_plane_state *state =
989 to_intel_plane_state(plane->base.state);
990
991 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
992 continue;
993
994 if (state->visible) {
995 wm_state->num_active_planes++;
996 total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0);
997 }
998 }
999
1000 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1001 struct intel_plane_state *state =
1002 to_intel_plane_state(plane->base.state);
1003 unsigned int rate;
1004
1005 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1006 plane->wm.fifo_size = 63;
1007 continue;
1008 }
1009
1010 if (!state->visible) {
1011 plane->wm.fifo_size = 0;
1012 continue;
1013 }
1014
1015 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1016 plane->wm.fifo_size = fifo_size * rate / total_rate;
1017 fifo_left -= plane->wm.fifo_size;
1018 }
1019
1020 fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1);
1021
1022 /* spread the remainder evenly */
1023 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1024 int plane_extra;
1025
1026 if (fifo_left == 0)
1027 break;
1028
1029 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
1030 continue;
1031
1032 /* give it all to the first plane if none are active */
1033 if (plane->wm.fifo_size == 0 &&
1034 wm_state->num_active_planes)
1035 continue;
1036
1037 plane_extra = min(fifo_extra, fifo_left);
1038 plane->wm.fifo_size += plane_extra;
1039 fifo_left -= plane_extra;
1040 }
1041
1042 WARN_ON(fifo_left != 0);
1043 }
1044
1045 static void vlv_invert_wms(struct intel_crtc *crtc)
1046 {
1047 struct vlv_wm_state *wm_state = &crtc->wm_state;
1048 int level;
1049
1050 for (level = 0; level < wm_state->num_levels; level++) {
1051 struct drm_device *dev = crtc->base.dev;
1052 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1053 struct intel_plane *plane;
1054
1055 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
1056 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
1057
1058 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1059 switch (plane->base.type) {
1060 int sprite;
1061 case DRM_PLANE_TYPE_CURSOR:
1062 wm_state->wm[level].cursor = plane->wm.fifo_size -
1063 wm_state->wm[level].cursor;
1064 break;
1065 case DRM_PLANE_TYPE_PRIMARY:
1066 wm_state->wm[level].primary = plane->wm.fifo_size -
1067 wm_state->wm[level].primary;
1068 break;
1069 case DRM_PLANE_TYPE_OVERLAY:
1070 sprite = plane->plane;
1071 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
1072 wm_state->wm[level].sprite[sprite];
1073 break;
1074 }
1075 }
1076 }
1077 }
1078
1079 static void vlv_compute_wm(struct intel_crtc *crtc)
1080 {
1081 struct drm_device *dev = crtc->base.dev;
1082 struct vlv_wm_state *wm_state = &crtc->wm_state;
1083 struct intel_plane *plane;
1084 int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1085 int level;
1086
1087 memset(wm_state, 0, sizeof(*wm_state));
1088
1089 wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed;
1090 wm_state->num_levels = to_i915(dev)->wm.max_level + 1;
1091
1092 wm_state->num_active_planes = 0;
1093
1094 vlv_compute_fifo(crtc);
1095
1096 if (wm_state->num_active_planes != 1)
1097 wm_state->cxsr = false;
1098
1099 if (wm_state->cxsr) {
1100 for (level = 0; level < wm_state->num_levels; level++) {
1101 wm_state->sr[level].plane = sr_fifo_size;
1102 wm_state->sr[level].cursor = 63;
1103 }
1104 }
1105
1106 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1107 struct intel_plane_state *state =
1108 to_intel_plane_state(plane->base.state);
1109
1110 if (!state->visible)
1111 continue;
1112
1113 /* normal watermarks */
1114 for (level = 0; level < wm_state->num_levels; level++) {
1115 int wm = vlv_compute_wm_level(plane, crtc, state, level);
1116 int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
1117
1118 /* hack */
1119 if (WARN_ON(level == 0 && wm > max_wm))
1120 wm = max_wm;
1121
1122 if (wm > plane->wm.fifo_size)
1123 break;
1124
1125 switch (plane->base.type) {
1126 int sprite;
1127 case DRM_PLANE_TYPE_CURSOR:
1128 wm_state->wm[level].cursor = wm;
1129 break;
1130 case DRM_PLANE_TYPE_PRIMARY:
1131 wm_state->wm[level].primary = wm;
1132 break;
1133 case DRM_PLANE_TYPE_OVERLAY:
1134 sprite = plane->plane;
1135 wm_state->wm[level].sprite[sprite] = wm;
1136 break;
1137 }
1138 }
1139
1140 wm_state->num_levels = level;
1141
1142 if (!wm_state->cxsr)
1143 continue;
1144
1145 /* maxfifo watermarks */
1146 switch (plane->base.type) {
1147 int sprite, level;
1148 case DRM_PLANE_TYPE_CURSOR:
1149 for (level = 0; level < wm_state->num_levels; level++)
1150 wm_state->sr[level].cursor =
1151 wm_state->wm[level].cursor;
1152 break;
1153 case DRM_PLANE_TYPE_PRIMARY:
1154 for (level = 0; level < wm_state->num_levels; level++)
1155 wm_state->sr[level].plane =
1156 min(wm_state->sr[level].plane,
1157 wm_state->wm[level].primary);
1158 break;
1159 case DRM_PLANE_TYPE_OVERLAY:
1160 sprite = plane->plane;
1161 for (level = 0; level < wm_state->num_levels; level++)
1162 wm_state->sr[level].plane =
1163 min(wm_state->sr[level].plane,
1164 wm_state->wm[level].sprite[sprite]);
1165 break;
1166 }
1167 }
1168
1169 /* clear any (partially) filled invalid levels */
1170 for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) {
1171 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
1172 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
1173 }
1174
1175 vlv_invert_wms(crtc);
1176 }
1177
1178 #define VLV_FIFO(plane, value) \
1179 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1180
1181 static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc)
1182 {
1183 struct drm_device *dev = crtc->base.dev;
1184 struct drm_i915_private *dev_priv = to_i915(dev);
1185 struct intel_plane *plane;
1186 int sprite0_start = 0, sprite1_start = 0, fifo_size = 0;
1187
1188 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1189 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1190 WARN_ON(plane->wm.fifo_size != 63);
1191 continue;
1192 }
1193
1194 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
1195 sprite0_start = plane->wm.fifo_size;
1196 else if (plane->plane == 0)
1197 sprite1_start = sprite0_start + plane->wm.fifo_size;
1198 else
1199 fifo_size = sprite1_start + plane->wm.fifo_size;
1200 }
1201
1202 WARN_ON(fifo_size != 512 - 1);
1203
1204 DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n",
1205 pipe_name(crtc->pipe), sprite0_start,
1206 sprite1_start, fifo_size);
1207
1208 switch (crtc->pipe) {
1209 uint32_t dsparb, dsparb2, dsparb3;
1210 case PIPE_A:
1211 dsparb = I915_READ(DSPARB);
1212 dsparb2 = I915_READ(DSPARB2);
1213
1214 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1215 VLV_FIFO(SPRITEB, 0xff));
1216 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1217 VLV_FIFO(SPRITEB, sprite1_start));
1218
1219 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1220 VLV_FIFO(SPRITEB_HI, 0x1));
1221 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1222 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1223
1224 I915_WRITE(DSPARB, dsparb);
1225 I915_WRITE(DSPARB2, dsparb2);
1226 break;
1227 case PIPE_B:
1228 dsparb = I915_READ(DSPARB);
1229 dsparb2 = I915_READ(DSPARB2);
1230
1231 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1232 VLV_FIFO(SPRITED, 0xff));
1233 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1234 VLV_FIFO(SPRITED, sprite1_start));
1235
1236 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1237 VLV_FIFO(SPRITED_HI, 0xff));
1238 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
1239 VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
1240
1241 I915_WRITE(DSPARB, dsparb);
1242 I915_WRITE(DSPARB2, dsparb2);
1243 break;
1244 case PIPE_C:
1245 dsparb3 = I915_READ(DSPARB3);
1246 dsparb2 = I915_READ(DSPARB2);
1247
1248 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
1249 VLV_FIFO(SPRITEF, 0xff));
1250 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
1251 VLV_FIFO(SPRITEF, sprite1_start));
1252
1253 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
1254 VLV_FIFO(SPRITEF_HI, 0xff));
1255 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
1256 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
1257
1258 I915_WRITE(DSPARB3, dsparb3);
1259 I915_WRITE(DSPARB2, dsparb2);
1260 break;
1261 default:
1262 break;
1263 }
1264 }
1265
1266 #undef VLV_FIFO
1267
1268 static void vlv_merge_wm(struct drm_device *dev,
1269 struct vlv_wm_values *wm)
1270 {
1271 struct intel_crtc *crtc;
1272 int num_active_crtcs = 0;
1273
1274 wm->level = to_i915(dev)->wm.max_level;
1275 wm->cxsr = true;
1276
1277 for_each_intel_crtc(dev, crtc) {
1278 const struct vlv_wm_state *wm_state = &crtc->wm_state;
1279
1280 if (!crtc->active)
1281 continue;
1282
1283 if (!wm_state->cxsr)
1284 wm->cxsr = false;
1285
1286 num_active_crtcs++;
1287 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
1288 }
1289
1290 if (num_active_crtcs != 1)
1291 wm->cxsr = false;
1292
1293 if (num_active_crtcs > 1)
1294 wm->level = VLV_WM_LEVEL_PM2;
1295
1296 for_each_intel_crtc(dev, crtc) {
1297 struct vlv_wm_state *wm_state = &crtc->wm_state;
1298 enum i915_pipe pipe = crtc->pipe;
1299
1300 if (!crtc->active)
1301 continue;
1302
1303 wm->pipe[pipe] = wm_state->wm[wm->level];
1304 if (wm->cxsr)
1305 wm->sr = wm_state->sr[wm->level];
1306
1307 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
1308 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
1309 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
1310 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
1311 }
1312 }
1313
1314 static void vlv_update_wm(struct drm_crtc *crtc)
1315 {
1316 struct drm_device *dev = crtc->dev;
1317 struct drm_i915_private *dev_priv = dev->dev_private;
1318 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1319 enum i915_pipe pipe = intel_crtc->pipe;
1320 struct vlv_wm_values wm = {};
1321
1322 vlv_compute_wm(intel_crtc);
1323 vlv_merge_wm(dev, &wm);
1324
1325 if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) {
1326 /* FIXME should be part of crtc atomic commit */
1327 vlv_pipe_set_fifo_size(intel_crtc);
1328 return;
1329 }
1330
1331 if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
1332 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
1333 chv_set_memory_dvfs(dev_priv, false);
1334
1335 if (wm.level < VLV_WM_LEVEL_PM5 &&
1336 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
1337 chv_set_memory_pm5(dev_priv, false);
1338
1339 if (!wm.cxsr && dev_priv->wm.vlv.cxsr)
1340 intel_set_memory_cxsr(dev_priv, false);
1341
1342 /* FIXME should be part of crtc atomic commit */
1343 vlv_pipe_set_fifo_size(intel_crtc);
1344
1345 vlv_write_wm_values(intel_crtc, &wm);
1346
1347 DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
1348 "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
1349 pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
1350 wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
1351 wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
1352
1353 if (wm.cxsr && !dev_priv->wm.vlv.cxsr)
1354 intel_set_memory_cxsr(dev_priv, true);
1355
1356 if (wm.level >= VLV_WM_LEVEL_PM5 &&
1357 dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
1358 chv_set_memory_pm5(dev_priv, true);
1359
1360 if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
1361 dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
1362 chv_set_memory_dvfs(dev_priv, true);
1363
1364 dev_priv->wm.vlv = wm;
1365 }
1366
1367 #define single_plane_enabled(mask) is_power_of_2(mask)
1368
1369 static void g4x_update_wm(struct drm_crtc *crtc)
1370 {
1371 struct drm_device *dev = crtc->dev;
1372 static const int sr_latency_ns = 12000;
1373 struct drm_i915_private *dev_priv = dev->dev_private;
1374 int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1375 int plane_sr, cursor_sr;
1376 unsigned int enabled = 0;
1377 bool cxsr_enabled;
1378
1379 if (g4x_compute_wm0(dev, PIPE_A,
1380 &g4x_wm_info, pessimal_latency_ns,
1381 &g4x_cursor_wm_info, pessimal_latency_ns,
1382 &planea_wm, &cursora_wm))
1383 enabled |= 1 << PIPE_A;
1384
1385 if (g4x_compute_wm0(dev, PIPE_B,
1386 &g4x_wm_info, pessimal_latency_ns,
1387 &g4x_cursor_wm_info, pessimal_latency_ns,
1388 &planeb_wm, &cursorb_wm))
1389 enabled |= 1 << PIPE_B;
1390
1391 if (single_plane_enabled(enabled) &&
1392 g4x_compute_srwm(dev, ffs(enabled) - 1,
1393 sr_latency_ns,
1394 &g4x_wm_info,
1395 &g4x_cursor_wm_info,
1396 &plane_sr, &cursor_sr)) {
1397 cxsr_enabled = true;
1398 } else {
1399 cxsr_enabled = false;
1400 intel_set_memory_cxsr(dev_priv, false);
1401 plane_sr = cursor_sr = 0;
1402 }
1403
1404 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1405 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1406 planea_wm, cursora_wm,
1407 planeb_wm, cursorb_wm,
1408 plane_sr, cursor_sr);
1409
1410 I915_WRITE(DSPFW1,
1411 FW_WM(plane_sr, SR) |
1412 FW_WM(cursorb_wm, CURSORB) |
1413 FW_WM(planeb_wm, PLANEB) |
1414 FW_WM(planea_wm, PLANEA));
1415 I915_WRITE(DSPFW2,
1416 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1417 FW_WM(cursora_wm, CURSORA));
1418 /* HPLL off in SR has some issues on G4x... disable it */
1419 I915_WRITE(DSPFW3,
1420 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1421 FW_WM(cursor_sr, CURSOR_SR));
1422
1423 if (cxsr_enabled)
1424 intel_set_memory_cxsr(dev_priv, true);
1425 }
1426
1427 static void i965_update_wm(struct drm_crtc *unused_crtc)
1428 {
1429 struct drm_device *dev = unused_crtc->dev;
1430 struct drm_i915_private *dev_priv = dev->dev_private;
1431 struct drm_crtc *crtc;
1432 int srwm = 1;
1433 int cursor_sr = 16;
1434 bool cxsr_enabled;
1435
1436 /* Calc sr entries for one plane configs */
1437 crtc = single_enabled_crtc(dev);
1438 if (crtc) {
1439 /* self-refresh has much higher latency */
1440 static const int sr_latency_ns = 12000;
1441 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1442 int clock = adjusted_mode->crtc_clock;
1443 int htotal = adjusted_mode->crtc_htotal;
1444 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
1445 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
1446 unsigned long line_time_us;
1447 int entries;
1448
1449 line_time_us = max(htotal * 1000 / clock, 1);
1450
1451 /* Use ns/us then divide to preserve precision */
1452 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1453 pixel_size * hdisplay;
1454 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1455 srwm = I965_FIFO_SIZE - entries;
1456 if (srwm < 0)
1457 srwm = 1;
1458 srwm &= 0x1ff;
1459 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1460 entries, srwm);
1461
1462 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1463 pixel_size * crtc->cursor->state->crtc_w;
1464 entries = DIV_ROUND_UP(entries,
1465 i965_cursor_wm_info.cacheline_size);
1466 cursor_sr = i965_cursor_wm_info.fifo_size -
1467 (entries + i965_cursor_wm_info.guard_size);
1468
1469 if (cursor_sr > i965_cursor_wm_info.max_wm)
1470 cursor_sr = i965_cursor_wm_info.max_wm;
1471
1472 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1473 "cursor %d\n", srwm, cursor_sr);
1474
1475 cxsr_enabled = true;
1476 } else {
1477 cxsr_enabled = false;
1478 /* Turn off self refresh if both pipes are enabled */
1479 intel_set_memory_cxsr(dev_priv, false);
1480 }
1481
1482 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1483 srwm);
1484
1485 /* 965 has limitations... */
1486 I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
1487 FW_WM(8, CURSORB) |
1488 FW_WM(8, PLANEB) |
1489 FW_WM(8, PLANEA));
1490 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
1491 FW_WM(8, PLANEC_OLD));
1492 /* update cursor SR watermark */
1493 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
1494
1495 if (cxsr_enabled)
1496 intel_set_memory_cxsr(dev_priv, true);
1497 }
1498
1499 #undef FW_WM
1500
1501 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1502 {
1503 struct drm_device *dev = unused_crtc->dev;
1504 struct drm_i915_private *dev_priv = dev->dev_private;
1505 const struct intel_watermark_params *wm_info;
1506 uint32_t fwater_lo;
1507 uint32_t fwater_hi;
1508 int cwm, srwm = 1;
1509 int fifo_size;
1510 int planea_wm, planeb_wm;
1511 struct drm_crtc *crtc, *enabled = NULL;
1512
1513 if (IS_I945GM(dev))
1514 wm_info = &i945_wm_info;
1515 else if (!IS_GEN2(dev))
1516 wm_info = &i915_wm_info;
1517 else
1518 wm_info = &i830_a_wm_info;
1519
1520 fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1521 crtc = intel_get_crtc_for_plane(dev, 0);
1522 if (intel_crtc_active(crtc)) {
1523 const struct drm_display_mode *adjusted_mode;
1524 int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1525 if (IS_GEN2(dev))
1526 cpp = 4;
1527
1528 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1529 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1530 wm_info, fifo_size, cpp,
1531 pessimal_latency_ns);
1532 enabled = crtc;
1533 } else {
1534 planea_wm = fifo_size - wm_info->guard_size;
1535 if (planea_wm > (long)wm_info->max_wm)
1536 planea_wm = wm_info->max_wm;
1537 }
1538
1539 if (IS_GEN2(dev))
1540 wm_info = &i830_bc_wm_info;
1541
1542 fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1543 crtc = intel_get_crtc_for_plane(dev, 1);
1544 if (intel_crtc_active(crtc)) {
1545 const struct drm_display_mode *adjusted_mode;
1546 int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1547 if (IS_GEN2(dev))
1548 cpp = 4;
1549
1550 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1551 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1552 wm_info, fifo_size, cpp,
1553 pessimal_latency_ns);
1554 if (enabled == NULL)
1555 enabled = crtc;
1556 else
1557 enabled = NULL;
1558 } else {
1559 planeb_wm = fifo_size - wm_info->guard_size;
1560 if (planeb_wm > (long)wm_info->max_wm)
1561 planeb_wm = wm_info->max_wm;
1562 }
1563
1564 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1565
1566 if (IS_I915GM(dev) && enabled) {
1567 struct drm_i915_gem_object *obj;
1568
1569 obj = intel_fb_obj(enabled->primary->state->fb);
1570
1571 /* self-refresh seems busted with untiled */
1572 if (obj->tiling_mode == I915_TILING_NONE)
1573 enabled = NULL;
1574 }
1575
1576 /*
1577 * Overlay gets an aggressive default since video jitter is bad.
1578 */
1579 cwm = 2;
1580
1581 /* Play safe and disable self-refresh before adjusting watermarks. */
1582 intel_set_memory_cxsr(dev_priv, false);
1583
1584 /* Calc sr entries for one plane configs */
1585 if (HAS_FW_BLC(dev) && enabled) {
1586 /* self-refresh has much higher latency */
1587 static const int sr_latency_ns = 6000;
1588 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode;
1589 int clock = adjusted_mode->crtc_clock;
1590 int htotal = adjusted_mode->crtc_htotal;
1591 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
1592 int pixel_size = enabled->primary->state->fb->bits_per_pixel / 8;
1593 unsigned long line_time_us;
1594 int entries;
1595
1596 line_time_us = max(htotal * 1000 / clock, 1);
1597
1598 /* Use ns/us then divide to preserve precision */
1599 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1600 pixel_size * hdisplay;
1601 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1602 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1603 srwm = wm_info->fifo_size - entries;
1604 if (srwm < 0)
1605 srwm = 1;
1606
1607 if (IS_I945G(dev) || IS_I945GM(dev))
1608 I915_WRITE(FW_BLC_SELF,
1609 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1610 else if (IS_I915GM(dev))
1611 I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1612 }
1613
1614 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1615 planea_wm, planeb_wm, cwm, srwm);
1616
1617 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1618 fwater_hi = (cwm & 0x1f);
1619
1620 /* Set request length to 8 cachelines per fetch */
1621 fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1622 fwater_hi = fwater_hi | (1 << 8);
1623
1624 I915_WRITE(FW_BLC, fwater_lo);
1625 I915_WRITE(FW_BLC2, fwater_hi);
1626
1627 if (enabled)
1628 intel_set_memory_cxsr(dev_priv, true);
1629 }
1630
1631 static void i845_update_wm(struct drm_crtc *unused_crtc)
1632 {
1633 struct drm_device *dev = unused_crtc->dev;
1634 struct drm_i915_private *dev_priv = dev->dev_private;
1635 struct drm_crtc *crtc;
1636 const struct drm_display_mode *adjusted_mode;
1637 uint32_t fwater_lo;
1638 int planea_wm;
1639
1640 crtc = single_enabled_crtc(dev);
1641 if (crtc == NULL)
1642 return;
1643
1644 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1645 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1646 &i845_wm_info,
1647 dev_priv->display.get_fifo_size(dev, 0),
1648 4, pessimal_latency_ns);
1649 fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1650 fwater_lo |= (3<<8) | planea_wm;
1651
1652 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1653
1654 I915_WRITE(FW_BLC, fwater_lo);
1655 }
1656
1657 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
1658 {
1659 uint32_t pixel_rate;
1660
1661 pixel_rate = pipe_config->base.adjusted_mode.crtc_clock;
1662
1663 /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1664 * adjust the pixel_rate here. */
1665
1666 if (pipe_config->pch_pfit.enabled) {
1667 uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1668 uint32_t pfit_size = pipe_config->pch_pfit.size;
1669
1670 pipe_w = pipe_config->pipe_src_w;
1671 pipe_h = pipe_config->pipe_src_h;
1672
1673 pfit_w = (pfit_size >> 16) & 0xFFFF;
1674 pfit_h = pfit_size & 0xFFFF;
1675 if (pipe_w < pfit_w)
1676 pipe_w = pfit_w;
1677 if (pipe_h < pfit_h)
1678 pipe_h = pfit_h;
1679
1680 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1681 pfit_w * pfit_h);
1682 }
1683
1684 return pixel_rate;
1685 }
1686
1687 /* latency must be in 0.1us units. */
1688 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
1689 uint32_t latency)
1690 {
1691 uint64_t ret;
1692
1693 if (WARN(latency == 0, "Latency value missing\n"))
1694 return UINT_MAX;
1695
1696 ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
1697 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1698
1699 return ret;
1700 }
1701
1702 /* latency must be in 0.1us units. */
1703 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1704 uint32_t horiz_pixels, uint8_t bytes_per_pixel,
1705 uint32_t latency)
1706 {
1707 uint32_t ret;
1708
1709 if (WARN(latency == 0, "Latency value missing\n"))
1710 return UINT_MAX;
1711
1712 ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1713 ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
1714 ret = DIV_ROUND_UP(ret, 64) + 2;
1715 return ret;
1716 }
1717
1718 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1719 uint8_t bytes_per_pixel)
1720 {
1721 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
1722 }
1723
1724 struct skl_pipe_wm_parameters {
1725 bool active;
1726 uint32_t pipe_htotal;
1727 uint32_t pixel_rate; /* in KHz */
1728 struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
1729 };
1730
1731 struct ilk_wm_maximums {
1732 uint16_t pri;
1733 uint16_t spr;
1734 uint16_t cur;
1735 uint16_t fbc;
1736 };
1737
1738 /* used in computing the new watermarks state */
1739 struct intel_wm_config {
1740 unsigned int num_pipes_active;
1741 bool sprites_enabled;
1742 bool sprites_scaled;
1743 };
1744
1745 /*
1746 * For both WM_PIPE and WM_LP.
1747 * mem_value must be in 0.1us units.
1748 */
1749 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
1750 const struct intel_plane_state *pstate,
1751 uint32_t mem_value,
1752 bool is_lp)
1753 {
1754 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1755 uint32_t method1, method2;
1756
1757 if (!cstate->base.active || !pstate->visible)
1758 return 0;
1759
1760 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
1761
1762 if (!is_lp)
1763 return method1;
1764
1765 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1766 cstate->base.adjusted_mode.crtc_htotal,
1767 drm_rect_width(&pstate->dst),
1768 bpp,
1769 mem_value);
1770
1771 return min(method1, method2);
1772 }
1773
1774 /*
1775 * For both WM_PIPE and WM_LP.
1776 * mem_value must be in 0.1us units.
1777 */
1778 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
1779 const struct intel_plane_state *pstate,
1780 uint32_t mem_value)
1781 {
1782 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1783 uint32_t method1, method2;
1784
1785 if (!cstate->base.active || !pstate->visible)
1786 return 0;
1787
1788 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
1789 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1790 cstate->base.adjusted_mode.crtc_htotal,
1791 drm_rect_width(&pstate->dst),
1792 bpp,
1793 mem_value);
1794 return min(method1, method2);
1795 }
1796
1797 /*
1798 * For both WM_PIPE and WM_LP.
1799 * mem_value must be in 0.1us units.
1800 */
1801 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
1802 const struct intel_plane_state *pstate,
1803 uint32_t mem_value)
1804 {
1805 /*
1806 * We treat the cursor plane as always-on for the purposes of watermark
1807 * calculation. Until we have two-stage watermark programming merged,
1808 * this is necessary to avoid flickering.
1809 */
1810 int cpp = 4;
1811 int width = pstate->visible ? pstate->base.crtc_w : 64;
1812
1813 if (!cstate->base.active)
1814 return 0;
1815
1816 return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1817 cstate->base.adjusted_mode.crtc_htotal,
1818 width, cpp, mem_value);
1819 }
1820
1821 /* Only for WM_LP. */
1822 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1823 const struct intel_plane_state *pstate,
1824 uint32_t pri_val)
1825 {
1826 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1827
1828 if (!cstate->base.active || !pstate->visible)
1829 return 0;
1830
1831 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), bpp);
1832 }
1833
1834 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1835 {
1836 if (INTEL_INFO(dev)->gen >= 8)
1837 return 3072;
1838 else if (INTEL_INFO(dev)->gen >= 7)
1839 return 768;
1840 else
1841 return 512;
1842 }
1843
1844 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
1845 int level, bool is_sprite)
1846 {
1847 if (INTEL_INFO(dev)->gen >= 8)
1848 /* BDW primary/sprite plane watermarks */
1849 return level == 0 ? 255 : 2047;
1850 else if (INTEL_INFO(dev)->gen >= 7)
1851 /* IVB/HSW primary/sprite plane watermarks */
1852 return level == 0 ? 127 : 1023;
1853 else if (!is_sprite)
1854 /* ILK/SNB primary plane watermarks */
1855 return level == 0 ? 127 : 511;
1856 else
1857 /* ILK/SNB sprite plane watermarks */
1858 return level == 0 ? 63 : 255;
1859 }
1860
1861 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
1862 int level)
1863 {
1864 if (INTEL_INFO(dev)->gen >= 7)
1865 return level == 0 ? 63 : 255;
1866 else
1867 return level == 0 ? 31 : 63;
1868 }
1869
1870 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
1871 {
1872 if (INTEL_INFO(dev)->gen >= 8)
1873 return 31;
1874 else
1875 return 15;
1876 }
1877
1878 /* Calculate the maximum primary/sprite plane watermark */
1879 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1880 int level,
1881 const struct intel_wm_config *config,
1882 enum intel_ddb_partitioning ddb_partitioning,
1883 bool is_sprite)
1884 {
1885 unsigned int fifo_size = ilk_display_fifo_size(dev);
1886
1887 /* if sprites aren't enabled, sprites get nothing */
1888 if (is_sprite && !config->sprites_enabled)
1889 return 0;
1890
1891 /* HSW allows LP1+ watermarks even with multiple pipes */
1892 if (level == 0 || config->num_pipes_active > 1) {
1893 fifo_size /= INTEL_INFO(dev)->num_pipes;
1894
1895 /*
1896 * For some reason the non self refresh
1897 * FIFO size is only half of the self
1898 * refresh FIFO size on ILK/SNB.
1899 */
1900 if (INTEL_INFO(dev)->gen <= 6)
1901 fifo_size /= 2;
1902 }
1903
1904 if (config->sprites_enabled) {
1905 /* level 0 is always calculated with 1:1 split */
1906 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1907 if (is_sprite)
1908 fifo_size *= 5;
1909 fifo_size /= 6;
1910 } else {
1911 fifo_size /= 2;
1912 }
1913 }
1914
1915 /* clamp to max that the registers can hold */
1916 return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
1917 }
1918
1919 /* Calculate the maximum cursor plane watermark */
1920 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1921 int level,
1922 const struct intel_wm_config *config)
1923 {
1924 /* HSW LP1+ watermarks w/ multiple pipes */
1925 if (level > 0 && config->num_pipes_active > 1)
1926 return 64;
1927
1928 /* otherwise just report max that registers can hold */
1929 return ilk_cursor_wm_reg_max(dev, level);
1930 }
1931
1932 static void ilk_compute_wm_maximums(const struct drm_device *dev,
1933 int level,
1934 const struct intel_wm_config *config,
1935 enum intel_ddb_partitioning ddb_partitioning,
1936 struct ilk_wm_maximums *max)
1937 {
1938 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1939 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1940 max->cur = ilk_cursor_wm_max(dev, level, config);
1941 max->fbc = ilk_fbc_wm_reg_max(dev);
1942 }
1943
1944 static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
1945 int level,
1946 struct ilk_wm_maximums *max)
1947 {
1948 max->pri = ilk_plane_wm_reg_max(dev, level, false);
1949 max->spr = ilk_plane_wm_reg_max(dev, level, true);
1950 max->cur = ilk_cursor_wm_reg_max(dev, level);
1951 max->fbc = ilk_fbc_wm_reg_max(dev);
1952 }
1953
1954 static bool ilk_validate_wm_level(int level,
1955 const struct ilk_wm_maximums *max,
1956 struct intel_wm_level *result)
1957 {
1958 bool ret;
1959
1960 /* already determined to be invalid? */
1961 if (!result->enable)
1962 return false;
1963
1964 result->enable = result->pri_val <= max->pri &&
1965 result->spr_val <= max->spr &&
1966 result->cur_val <= max->cur;
1967
1968 ret = result->enable;
1969
1970 /*
1971 * HACK until we can pre-compute everything,
1972 * and thus fail gracefully if LP0 watermarks
1973 * are exceeded...
1974 */
1975 if (level == 0 && !result->enable) {
1976 if (result->pri_val > max->pri)
1977 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
1978 level, result->pri_val, max->pri);
1979 if (result->spr_val > max->spr)
1980 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
1981 level, result->spr_val, max->spr);
1982 if (result->cur_val > max->cur)
1983 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
1984 level, result->cur_val, max->cur);
1985
1986 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
1987 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
1988 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
1989 result->enable = true;
1990 }
1991
1992 return ret;
1993 }
1994
1995 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
1996 const struct intel_crtc *intel_crtc,
1997 int level,
1998 struct intel_crtc_state *cstate,
1999 struct intel_wm_level *result)
2000 {
2001 struct intel_plane *intel_plane;
2002 uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2003 uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2004 uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2005
2006 /* WM1+ latency values stored in 0.5us units */
2007 if (level > 0) {
2008 pri_latency *= 5;
2009 spr_latency *= 5;
2010 cur_latency *= 5;
2011 }
2012
2013 for_each_intel_plane_on_crtc(dev_priv->dev, intel_crtc, intel_plane) {
2014 struct intel_plane_state *pstate =
2015 to_intel_plane_state(intel_plane->base.state);
2016
2017 switch (intel_plane->base.type) {
2018 case DRM_PLANE_TYPE_PRIMARY:
2019 result->pri_val = ilk_compute_pri_wm(cstate, pstate,
2020 pri_latency,
2021 level);
2022 result->fbc_val = ilk_compute_fbc_wm(cstate, pstate,
2023 result->pri_val);
2024 break;
2025 case DRM_PLANE_TYPE_OVERLAY:
2026 result->spr_val = ilk_compute_spr_wm(cstate, pstate,
2027 spr_latency);
2028 break;
2029 case DRM_PLANE_TYPE_CURSOR:
2030 result->cur_val = ilk_compute_cur_wm(cstate, pstate,
2031 cur_latency);
2032 break;
2033 }
2034 }
2035
2036 result->enable = true;
2037 }
2038
2039 static uint32_t
2040 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
2041 {
2042 struct drm_i915_private *dev_priv = dev->dev_private;
2043 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2044 const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode;
2045 u32 linetime, ips_linetime;
2046
2047 if (!intel_crtc->active)
2048 return 0;
2049
2050 /* The WM are computed with base on how long it takes to fill a single
2051 * row at the given clock rate, multiplied by 8.
2052 * */
2053 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2054 adjusted_mode->crtc_clock);
2055 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2056 dev_priv->cdclk_freq);
2057
2058 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2059 PIPE_WM_LINETIME_TIME(linetime);
2060 }
2061
2062 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
2063 {
2064 struct drm_i915_private *dev_priv = dev->dev_private;
2065
2066 if (IS_GEN9(dev)) {
2067 uint32_t val;
2068 int ret, i;
2069 int level, max_level = ilk_wm_max_level(dev);
2070
2071 /* read the first set of memory latencies[0:3] */
2072 val = 0; /* data0 to be programmed to 0 for first set */
2073 mutex_lock(&dev_priv->rps.hw_lock);
2074 ret = sandybridge_pcode_read(dev_priv,
2075 GEN9_PCODE_READ_MEM_LATENCY,
2076 &val);
2077 mutex_unlock(&dev_priv->rps.hw_lock);
2078
2079 if (ret) {
2080 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2081 return;
2082 }
2083
2084 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2085 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2086 GEN9_MEM_LATENCY_LEVEL_MASK;
2087 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2088 GEN9_MEM_LATENCY_LEVEL_MASK;
2089 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2090 GEN9_MEM_LATENCY_LEVEL_MASK;
2091
2092 /* read the second set of memory latencies[4:7] */
2093 val = 1; /* data0 to be programmed to 1 for second set */
2094 mutex_lock(&dev_priv->rps.hw_lock);
2095 ret = sandybridge_pcode_read(dev_priv,
2096 GEN9_PCODE_READ_MEM_LATENCY,
2097 &val);
2098 mutex_unlock(&dev_priv->rps.hw_lock);
2099 if (ret) {
2100 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2101 return;
2102 }
2103
2104 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2105 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2106 GEN9_MEM_LATENCY_LEVEL_MASK;
2107 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2108 GEN9_MEM_LATENCY_LEVEL_MASK;
2109 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2110 GEN9_MEM_LATENCY_LEVEL_MASK;
2111
2112 /*
2113 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2114 * need to be disabled. We make sure to sanitize the values out
2115 * of the punit to satisfy this requirement.
2116 */
2117 for (level = 1; level <= max_level; level++) {
2118 if (wm[level] == 0) {
2119 for (i = level + 1; i <= max_level; i++)
2120 wm[i] = 0;
2121 break;
2122 }
2123 }
2124
2125 /*
2126 * WaWmMemoryReadLatency:skl
2127 *
2128 * punit doesn't take into account the read latency so we need
2129 * to add 2us to the various latency levels we retrieve from the
2130 * punit when level 0 response data us 0us.
2131 */
2132 if (wm[0] == 0) {
2133 wm[0] += 2;
2134 for (level = 1; level <= max_level; level++) {
2135 if (wm[level] == 0)
2136 break;
2137 wm[level] += 2;
2138 }
2139 }
2140
2141 } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2142 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2143
2144 wm[0] = (sskpd >> 56) & 0xFF;
2145 if (wm[0] == 0)
2146 wm[0] = sskpd & 0xF;
2147 wm[1] = (sskpd >> 4) & 0xFF;
2148 wm[2] = (sskpd >> 12) & 0xFF;
2149 wm[3] = (sskpd >> 20) & 0x1FF;
2150 wm[4] = (sskpd >> 32) & 0x1FF;
2151 } else if (INTEL_INFO(dev)->gen >= 6) {
2152 uint32_t sskpd = I915_READ(MCH_SSKPD);
2153
2154 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2155 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2156 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2157 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2158 } else if (INTEL_INFO(dev)->gen >= 5) {
2159 uint32_t mltr = I915_READ(MLTR_ILK);
2160
2161 /* ILK primary LP0 latency is 700 ns */
2162 wm[0] = 7;
2163 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2164 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2165 }
2166 }
2167
2168 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
2169 {
2170 /* ILK sprite LP0 latency is 1300 ns */
2171 if (INTEL_INFO(dev)->gen == 5)
2172 wm[0] = 13;
2173 }
2174
2175 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
2176 {
2177 /* ILK cursor LP0 latency is 1300 ns */
2178 if (INTEL_INFO(dev)->gen == 5)
2179 wm[0] = 13;
2180
2181 /* WaDoubleCursorLP3Latency:ivb */
2182 if (IS_IVYBRIDGE(dev))
2183 wm[3] *= 2;
2184 }
2185
2186 int ilk_wm_max_level(const struct drm_device *dev)
2187 {
2188 /* how many WM levels are we expecting */
2189 if (INTEL_INFO(dev)->gen >= 9)
2190 return 7;
2191 else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2192 return 4;
2193 else if (INTEL_INFO(dev)->gen >= 6)
2194 return 3;
2195 else
2196 return 2;
2197 }
2198
2199 static void intel_print_wm_latency(struct drm_device *dev,
2200 const char *name,
2201 const uint16_t wm[8])
2202 {
2203 int level, max_level = ilk_wm_max_level(dev);
2204
2205 for (level = 0; level <= max_level; level++) {
2206 unsigned int latency = wm[level];
2207
2208 if (latency == 0) {
2209 DRM_ERROR("%s WM%d latency not provided\n",
2210 name, level);
2211 continue;
2212 }
2213
2214 /*
2215 * - latencies are in us on gen9.
2216 * - before then, WM1+ latency values are in 0.5us units
2217 */
2218 if (IS_GEN9(dev))
2219 latency *= 10;
2220 else if (level > 0)
2221 latency *= 5;
2222
2223 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2224 name, level, wm[level],
2225 latency / 10, latency % 10);
2226 }
2227 }
2228
2229 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2230 uint16_t wm[5], uint16_t min)
2231 {
2232 int level, max_level = ilk_wm_max_level(dev_priv->dev);
2233
2234 if (wm[0] >= min)
2235 return false;
2236
2237 wm[0] = max(wm[0], min);
2238 for (level = 1; level <= max_level; level++)
2239 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2240
2241 return true;
2242 }
2243
2244 static void snb_wm_latency_quirk(struct drm_device *dev)
2245 {
2246 struct drm_i915_private *dev_priv = dev->dev_private;
2247 bool changed;
2248
2249 /*
2250 * The BIOS provided WM memory latency values are often
2251 * inadequate for high resolution displays. Adjust them.
2252 */
2253 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2254 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2255 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2256
2257 if (!changed)
2258 return;
2259
2260 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2261 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2262 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2263 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2264 }
2265
2266 static void ilk_setup_wm_latency(struct drm_device *dev)
2267 {
2268 struct drm_i915_private *dev_priv = dev->dev_private;
2269
2270 intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
2271
2272 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2273 sizeof(dev_priv->wm.pri_latency));
2274 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2275 sizeof(dev_priv->wm.pri_latency));
2276
2277 intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
2278 intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
2279
2280 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2281 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2282 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2283
2284 if (IS_GEN6(dev))
2285 snb_wm_latency_quirk(dev);
2286 }
2287
2288 static void skl_setup_wm_latency(struct drm_device *dev)
2289 {
2290 struct drm_i915_private *dev_priv = dev->dev_private;
2291
2292 intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
2293 intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
2294 }
2295
2296 static void ilk_compute_wm_config(struct drm_device *dev,
2297 struct intel_wm_config *config)
2298 {
2299 struct intel_crtc *intel_crtc;
2300
2301 /* Compute the currently _active_ config */
2302 for_each_intel_crtc(dev, intel_crtc) {
2303 const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
2304
2305 if (!wm->pipe_enabled)
2306 continue;
2307
2308 config->sprites_enabled |= wm->sprites_enabled;
2309 config->sprites_scaled |= wm->sprites_scaled;
2310 config->num_pipes_active++;
2311 }
2312 }
2313
2314 /* Compute new watermarks for the pipe */
2315 static bool intel_compute_pipe_wm(struct intel_crtc_state *cstate,
2316 struct intel_pipe_wm *pipe_wm)
2317 {
2318 struct drm_crtc *crtc = cstate->base.crtc;
2319 struct drm_device *dev = crtc->dev;
2320 const struct drm_i915_private *dev_priv = dev->dev_private;
2321 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2322 struct intel_plane *intel_plane;
2323 struct intel_plane_state *sprstate = NULL;
2324 int level, max_level = ilk_wm_max_level(dev);
2325 /* LP0 watermark maximums depend on this pipe alone */
2326 struct intel_wm_config config = {
2327 .num_pipes_active = 1,
2328 };
2329 struct ilk_wm_maximums max;
2330
2331 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
2332 if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) {
2333 sprstate = to_intel_plane_state(intel_plane->base.state);
2334 break;
2335 }
2336 }
2337
2338 config.sprites_enabled = sprstate->visible;
2339 config.sprites_scaled = sprstate->visible &&
2340 (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 ||
2341 drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16);
2342
2343 pipe_wm->pipe_enabled = cstate->base.active;
2344 pipe_wm->sprites_enabled = sprstate->visible;
2345 pipe_wm->sprites_scaled = config.sprites_scaled;
2346
2347 /* ILK/SNB: LP2+ watermarks only w/o sprites */
2348 if (INTEL_INFO(dev)->gen <= 6 && sprstate->visible)
2349 max_level = 1;
2350
2351 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
2352 if (config.sprites_scaled)
2353 max_level = 0;
2354
2355 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, &pipe_wm->wm[0]);
2356
2357 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2358 pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
2359
2360 /* LP0 watermarks always use 1/2 DDB partitioning */
2361 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2362
2363 /* At least LP0 must be valid */
2364 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
2365 return false;
2366
2367 ilk_compute_wm_reg_maximums(dev, 1, &max);
2368
2369 for (level = 1; level <= max_level; level++) {
2370 struct intel_wm_level wm = {};
2371
2372 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, &wm);
2373
2374 /*
2375 * Disable any watermark level that exceeds the
2376 * register maximums since such watermarks are
2377 * always invalid.
2378 */
2379 if (!ilk_validate_wm_level(level, &max, &wm))
2380 break;
2381
2382 pipe_wm->wm[level] = wm;
2383 }
2384
2385 return true;
2386 }
2387
2388 /*
2389 * Merge the watermarks from all active pipes for a specific level.
2390 */
2391 static void ilk_merge_wm_level(struct drm_device *dev,
2392 int level,
2393 struct intel_wm_level *ret_wm)
2394 {
2395 const struct intel_crtc *intel_crtc;
2396
2397 ret_wm->enable = true;
2398
2399 for_each_intel_crtc(dev, intel_crtc) {
2400 const struct intel_pipe_wm *active = &intel_crtc->wm.active;
2401 const struct intel_wm_level *wm = &active->wm[level];
2402
2403 if (!active->pipe_enabled)
2404 continue;
2405
2406 /*
2407 * The watermark values may have been used in the past,
2408 * so we must maintain them in the registers for some
2409 * time even if the level is now disabled.
2410 */
2411 if (!wm->enable)
2412 ret_wm->enable = false;
2413
2414 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2415 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2416 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2417 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2418 }
2419 }
2420
2421 /*
2422 * Merge all low power watermarks for all active pipes.
2423 */
2424 static void ilk_wm_merge(struct drm_device *dev,
2425 const struct intel_wm_config *config,
2426 const struct ilk_wm_maximums *max,
2427 struct intel_pipe_wm *merged)
2428 {
2429 struct drm_i915_private *dev_priv = dev->dev_private;
2430 int level, max_level = ilk_wm_max_level(dev);
2431 int last_enabled_level = max_level;
2432
2433 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2434 if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2435 config->num_pipes_active > 1)
2436 return;
2437
2438 /* ILK: FBC WM must be disabled always */
2439 merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2440
2441 /* merge each WM1+ level */
2442 for (level = 1; level <= max_level; level++) {
2443 struct intel_wm_level *wm = &merged->wm[level];
2444
2445 ilk_merge_wm_level(dev, level, wm);
2446
2447 if (level > last_enabled_level)
2448 wm->enable = false;
2449 else if (!ilk_validate_wm_level(level, max, wm))
2450 /* make sure all following levels get disabled */
2451 last_enabled_level = level - 1;
2452
2453 /*
2454 * The spec says it is preferred to disable
2455 * FBC WMs instead of disabling a WM level.
2456 */
2457 if (wm->fbc_val > max->fbc) {
2458 if (wm->enable)
2459 merged->fbc_wm_enabled = false;
2460 wm->fbc_val = 0;
2461 }
2462 }
2463
2464 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2465 /*
2466 * FIXME this is racy. FBC might get enabled later.
2467 * What we should check here is whether FBC can be
2468 * enabled sometime later.
2469 */
2470 if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
2471 intel_fbc_enabled(dev_priv)) {
2472 for (level = 2; level <= max_level; level++) {
2473 struct intel_wm_level *wm = &merged->wm[level];
2474
2475 wm->enable = false;
2476 }
2477 }
2478 }
2479
2480 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2481 {
2482 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2483 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2484 }
2485
2486 /* The value we need to program into the WM_LPx latency field */
2487 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2488 {
2489 struct drm_i915_private *dev_priv = dev->dev_private;
2490
2491 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2492 return 2 * level;
2493 else
2494 return dev_priv->wm.pri_latency[level];
2495 }
2496
2497 static void ilk_compute_wm_results(struct drm_device *dev,
2498 const struct intel_pipe_wm *merged,
2499 enum intel_ddb_partitioning partitioning,
2500 struct ilk_wm_values *results)
2501 {
2502 struct intel_crtc *intel_crtc;
2503 int level, wm_lp;
2504
2505 results->enable_fbc_wm = merged->fbc_wm_enabled;
2506 results->partitioning = partitioning;
2507
2508 /* LP1+ register values */
2509 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2510 const struct intel_wm_level *r;
2511
2512 level = ilk_wm_lp_to_level(wm_lp, merged);
2513
2514 r = &merged->wm[level];
2515
2516 /*
2517 * Maintain the watermark values even if the level is
2518 * disabled. Doing otherwise could cause underruns.
2519 */
2520 results->wm_lp[wm_lp - 1] =
2521 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2522 (r->pri_val << WM1_LP_SR_SHIFT) |
2523 r->cur_val;
2524
2525 if (r->enable)
2526 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2527
2528 if (INTEL_INFO(dev)->gen >= 8)
2529 results->wm_lp[wm_lp - 1] |=
2530 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2531 else
2532 results->wm_lp[wm_lp - 1] |=
2533 r->fbc_val << WM1_LP_FBC_SHIFT;
2534
2535 /*
2536 * Always set WM1S_LP_EN when spr_val != 0, even if the
2537 * level is disabled. Doing otherwise could cause underruns.
2538 */
2539 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2540 WARN_ON(wm_lp != 1);
2541 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2542 } else
2543 results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2544 }
2545
2546 /* LP0 register values */
2547 for_each_intel_crtc(dev, intel_crtc) {
2548 enum i915_pipe pipe = intel_crtc->pipe;
2549 const struct intel_wm_level *r =
2550 &intel_crtc->wm.active.wm[0];
2551
2552 if (WARN_ON(!r->enable))
2553 continue;
2554
2555 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2556
2557 results->wm_pipe[pipe] =
2558 (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2559 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2560 r->cur_val;
2561 }
2562 }
2563
2564 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2565 * case both are at the same level. Prefer r1 in case they're the same. */
2566 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2567 struct intel_pipe_wm *r1,
2568 struct intel_pipe_wm *r2)
2569 {
2570 int level, max_level = ilk_wm_max_level(dev);
2571 int level1 = 0, level2 = 0;
2572
2573 for (level = 1; level <= max_level; level++) {
2574 if (r1->wm[level].enable)
2575 level1 = level;
2576 if (r2->wm[level].enable)
2577 level2 = level;
2578 }
2579
2580 if (level1 == level2) {
2581 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2582 return r2;
2583 else
2584 return r1;
2585 } else if (level1 > level2) {
2586 return r1;
2587 } else {
2588 return r2;
2589 }
2590 }
2591
2592 /* dirty bits used to track which watermarks need changes */
2593 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2594 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2595 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2596 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2597 #define WM_DIRTY_FBC (1 << 24)
2598 #define WM_DIRTY_DDB (1 << 25)
2599
2600 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
2601 const struct ilk_wm_values *old,
2602 const struct ilk_wm_values *new)
2603 {
2604 unsigned int dirty = 0;
2605 enum i915_pipe pipe;
2606 int wm_lp;
2607
2608 for_each_pipe(dev_priv, pipe) {
2609 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2610 dirty |= WM_DIRTY_LINETIME(pipe);
2611 /* Must disable LP1+ watermarks too */
2612 dirty |= WM_DIRTY_LP_ALL;
2613 }
2614
2615 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2616 dirty |= WM_DIRTY_PIPE(pipe);
2617 /* Must disable LP1+ watermarks too */
2618 dirty |= WM_DIRTY_LP_ALL;
2619 }
2620 }
2621
2622 if (old->enable_fbc_wm != new->enable_fbc_wm) {
2623 dirty |= WM_DIRTY_FBC;
2624 /* Must disable LP1+ watermarks too */
2625 dirty |= WM_DIRTY_LP_ALL;
2626 }
2627
2628 if (old->partitioning != new->partitioning) {
2629 dirty |= WM_DIRTY_DDB;
2630 /* Must disable LP1+ watermarks too */
2631 dirty |= WM_DIRTY_LP_ALL;
2632 }
2633
2634 /* LP1+ watermarks already deemed dirty, no need to continue */
2635 if (dirty & WM_DIRTY_LP_ALL)
2636 return dirty;
2637
2638 /* Find the lowest numbered LP1+ watermark in need of an update... */
2639 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2640 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2641 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2642 break;
2643 }
2644
2645 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2646 for (; wm_lp <= 3; wm_lp++)
2647 dirty |= WM_DIRTY_LP(wm_lp);
2648
2649 return dirty;
2650 }
2651
2652 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2653 unsigned int dirty)
2654 {
2655 struct ilk_wm_values *previous = &dev_priv->wm.hw;
2656 bool changed = false;
2657
2658 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2659 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2660 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2661 changed = true;
2662 }
2663 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2664 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2665 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2666 changed = true;
2667 }
2668 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2669 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2670 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2671 changed = true;
2672 }
2673
2674 /*
2675 * Don't touch WM1S_LP_EN here.
2676 * Doing so could cause underruns.
2677 */
2678
2679 return changed;
2680 }
2681
2682 /*
2683 * The spec says we shouldn't write when we don't need, because every write
2684 * causes WMs to be re-evaluated, expending some power.
2685 */
2686 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2687 struct ilk_wm_values *results)
2688 {
2689 struct drm_device *dev = dev_priv->dev;
2690 struct ilk_wm_values *previous = &dev_priv->wm.hw;
2691 unsigned int dirty;
2692 uint32_t val;
2693
2694 dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
2695 if (!dirty)
2696 return;
2697
2698 _ilk_disable_lp_wm(dev_priv, dirty);
2699
2700 if (dirty & WM_DIRTY_PIPE(PIPE_A))
2701 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2702 if (dirty & WM_DIRTY_PIPE(PIPE_B))
2703 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2704 if (dirty & WM_DIRTY_PIPE(PIPE_C))
2705 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2706
2707 if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2708 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2709 if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2710 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2711 if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2712 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2713
2714 if (dirty & WM_DIRTY_DDB) {
2715 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2716 val = I915_READ(WM_MISC);
2717 if (results->partitioning == INTEL_DDB_PART_1_2)
2718 val &= ~WM_MISC_DATA_PARTITION_5_6;
2719 else
2720 val |= WM_MISC_DATA_PARTITION_5_6;
2721 I915_WRITE(WM_MISC, val);
2722 } else {
2723 val = I915_READ(DISP_ARB_CTL2);
2724 if (results->partitioning == INTEL_DDB_PART_1_2)
2725 val &= ~DISP_DATA_PARTITION_5_6;
2726 else
2727 val |= DISP_DATA_PARTITION_5_6;
2728 I915_WRITE(DISP_ARB_CTL2, val);
2729 }
2730 }
2731
2732 if (dirty & WM_DIRTY_FBC) {
2733 val = I915_READ(DISP_ARB_CTL);
2734 if (results->enable_fbc_wm)
2735 val &= ~DISP_FBC_WM_DIS;
2736 else
2737 val |= DISP_FBC_WM_DIS;
2738 I915_WRITE(DISP_ARB_CTL, val);
2739 }
2740
2741 if (dirty & WM_DIRTY_LP(1) &&
2742 previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2743 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2744
2745 if (INTEL_INFO(dev)->gen >= 7) {
2746 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2747 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2748 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2749 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2750 }
2751
2752 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2753 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2754 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2755 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2756 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2757 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2758
2759 dev_priv->wm.hw = *results;
2760 }
2761
2762 static bool ilk_disable_lp_wm(struct drm_device *dev)
2763 {
2764 struct drm_i915_private *dev_priv = dev->dev_private;
2765
2766 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2767 }
2768
2769 /*
2770 * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
2771 * different active planes.
2772 */
2773
2774 #define SKL_DDB_SIZE 896 /* in blocks */
2775 #define BXT_DDB_SIZE 512
2776
2777 static void
2778 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
2779 struct drm_crtc *for_crtc,
2780 const struct intel_wm_config *config,
2781 const struct skl_pipe_wm_parameters *params,
2782 struct skl_ddb_entry *alloc /* out */)
2783 {
2784 struct drm_crtc *crtc;
2785 unsigned int pipe_size, ddb_size;
2786 int nth_active_pipe;
2787
2788 if (!params->active) {
2789 alloc->start = 0;
2790 alloc->end = 0;
2791 return;
2792 }
2793
2794 if (IS_BROXTON(dev))
2795 ddb_size = BXT_DDB_SIZE;
2796 else
2797 ddb_size = SKL_DDB_SIZE;
2798
2799 ddb_size -= 4; /* 4 blocks for bypass path allocation */
2800
2801 nth_active_pipe = 0;
2802 for_each_crtc(dev, crtc) {
2803 if (!to_intel_crtc(crtc)->active)
2804 continue;
2805
2806 if (crtc == for_crtc)
2807 break;
2808
2809 nth_active_pipe++;
2810 }
2811
2812 pipe_size = ddb_size / config->num_pipes_active;
2813 alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
2814 alloc->end = alloc->start + pipe_size;
2815 }
2816
2817 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
2818 {
2819 if (config->num_pipes_active == 1)
2820 return 32;
2821
2822 return 8;
2823 }
2824
2825 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
2826 {
2827 entry->start = reg & 0x3ff;
2828 entry->end = (reg >> 16) & 0x3ff;
2829 if (entry->end)
2830 entry->end += 1;
2831 }
2832
2833 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
2834 struct skl_ddb_allocation *ddb /* out */)
2835 {
2836 enum i915_pipe pipe;
2837 int plane;
2838 u32 val;
2839
2840 memset(ddb, 0, sizeof(*ddb));
2841
2842 for_each_pipe(dev_priv, pipe) {
2843 if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
2844 continue;
2845
2846 for_each_plane(dev_priv, pipe, plane) {
2847 val = I915_READ(PLANE_BUF_CFG(pipe, plane));
2848 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
2849 val);
2850 }
2851
2852 val = I915_READ(CUR_BUF_CFG(pipe));
2853 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
2854 val);
2855 }
2856 }
2857
2858 static unsigned int
2859 skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
2860 {
2861
2862 /* for planar format */
2863 if (p->y_bytes_per_pixel) {
2864 if (y) /* y-plane data rate */
2865 return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
2866 else /* uv-plane data rate */
2867 return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
2868 }
2869
2870 /* for packed formats */
2871 return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
2872 }
2873
2874 /*
2875 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
2876 * a 8192x4096@32bpp framebuffer:
2877 * 3 * 4096 * 8192 * 4 < 2^32
2878 */
2879 static unsigned int
2880 skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
2881 const struct skl_pipe_wm_parameters *params)
2882 {
2883 unsigned int total_data_rate = 0;
2884 int plane;
2885
2886 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2887 const struct intel_plane_wm_parameters *p;
2888
2889 p = ¶ms->plane[plane];
2890 if (!p->enabled)
2891 continue;
2892
2893 total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
2894 if (p->y_bytes_per_pixel) {
2895 total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
2896 }
2897 }
2898
2899 return total_data_rate;
2900 }
2901
2902 static void
2903 skl_allocate_pipe_ddb(struct drm_crtc *crtc,
2904 const struct intel_wm_config *config,
2905 const struct skl_pipe_wm_parameters *params,
2906 struct skl_ddb_allocation *ddb /* out */)
2907 {
2908 struct drm_device *dev = crtc->dev;
2909 struct drm_i915_private *dev_priv = dev->dev_private;
2910 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2911 enum i915_pipe pipe = intel_crtc->pipe;
2912 struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
2913 uint16_t alloc_size, start, cursor_blocks;
2914 uint16_t minimum[I915_MAX_PLANES];
2915 uint16_t y_minimum[I915_MAX_PLANES];
2916 unsigned int total_data_rate;
2917 int plane;
2918
2919 skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
2920 alloc_size = skl_ddb_entry_size(alloc);
2921 if (alloc_size == 0) {
2922 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
2923 memset(&ddb->plane[pipe][PLANE_CURSOR], 0,
2924 sizeof(ddb->plane[pipe][PLANE_CURSOR]));
2925 return;
2926 }
2927
2928 cursor_blocks = skl_cursor_allocation(config);
2929 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks;
2930 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
2931
2932 alloc_size -= cursor_blocks;
2933 alloc->end -= cursor_blocks;
2934
2935 /* 1. Allocate the mininum required blocks for each active plane */
2936 for_each_plane(dev_priv, pipe, plane) {
2937 const struct intel_plane_wm_parameters *p;
2938
2939 p = ¶ms->plane[plane];
2940 if (!p->enabled)
2941 continue;
2942
2943 minimum[plane] = 8;
2944 alloc_size -= minimum[plane];
2945 y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
2946 alloc_size -= y_minimum[plane];
2947 }
2948
2949 /*
2950 * 2. Distribute the remaining space in proportion to the amount of
2951 * data each plane needs to fetch from memory.
2952 *
2953 * FIXME: we may not allocate every single block here.
2954 */
2955 total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
2956
2957 start = alloc->start;
2958 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2959 const struct intel_plane_wm_parameters *p;
2960 unsigned int data_rate, y_data_rate;
2961 uint16_t plane_blocks, y_plane_blocks = 0;
2962
2963 p = ¶ms->plane[plane];
2964 if (!p->enabled)
2965 continue;
2966
2967 data_rate = skl_plane_relative_data_rate(p, 0);
2968
2969 /*
2970 * allocation for (packed formats) or (uv-plane part of planar format):
2971 * promote the expression to 64 bits to avoid overflowing, the
2972 * result is < available as data_rate / total_data_rate < 1
2973 */
2974 plane_blocks = minimum[plane];
2975 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
2976 total_data_rate);
2977
2978 ddb->plane[pipe][plane].start = start;
2979 ddb->plane[pipe][plane].end = start + plane_blocks;
2980
2981 start += plane_blocks;
2982
2983 /*
2984 * allocation for y_plane part of planar format:
2985 */
2986 if (p->y_bytes_per_pixel) {
2987 y_data_rate = skl_plane_relative_data_rate(p, 1);
2988 y_plane_blocks = y_minimum[plane];
2989 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
2990 total_data_rate);
2991
2992 ddb->y_plane[pipe][plane].start = start;
2993 ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
2994
2995 start += y_plane_blocks;
2996 }
2997
2998 }
2999
3000 }
3001
3002 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
3003 {
3004 /* TODO: Take into account the scalers once we support them */
3005 return config->base.adjusted_mode.crtc_clock;
3006 }
3007
3008 /*
3009 * The max latency should be 257 (max the punit can code is 255 and we add 2us
3010 * for the read latency) and bytes_per_pixel should always be <= 8, so that
3011 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
3012 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
3013 */
3014 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
3015 uint32_t latency)
3016 {
3017 uint32_t wm_intermediate_val, ret;
3018
3019 if (latency == 0)
3020 return UINT_MAX;
3021
3022 wm_intermediate_val = latency * pixel_rate * bytes_per_pixel / 512;
3023 ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
3024
3025 return ret;
3026 }
3027
3028 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
3029 uint32_t horiz_pixels, uint8_t bytes_per_pixel,
3030 uint64_t tiling, uint32_t latency)
3031 {
3032 uint32_t ret;
3033 uint32_t plane_bytes_per_line, plane_blocks_per_line;
3034 uint32_t wm_intermediate_val;
3035
3036 if (latency == 0)
3037 return UINT_MAX;
3038
3039 plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
3040
3041 if (tiling == I915_FORMAT_MOD_Y_TILED ||
3042 tiling == I915_FORMAT_MOD_Yf_TILED) {
3043 plane_bytes_per_line *= 4;
3044 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3045 plane_blocks_per_line /= 4;
3046 } else {
3047 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3048 }
3049
3050 wm_intermediate_val = latency * pixel_rate;
3051 ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
3052 plane_blocks_per_line;
3053
3054 return ret;
3055 }
3056
3057 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
3058 const struct intel_crtc *intel_crtc)
3059 {
3060 struct drm_device *dev = intel_crtc->base.dev;
3061 struct drm_i915_private *dev_priv = dev->dev_private;
3062 const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
3063 enum i915_pipe pipe = intel_crtc->pipe;
3064
3065 if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
3066 sizeof(new_ddb->plane[pipe])))
3067 return true;
3068
3069 if (memcmp(&new_ddb->plane[pipe][PLANE_CURSOR], &cur_ddb->plane[pipe][PLANE_CURSOR],
3070 sizeof(new_ddb->plane[pipe][PLANE_CURSOR])))
3071 return true;
3072
3073 return false;
3074 }
3075
3076 static void skl_compute_wm_global_parameters(struct drm_device *dev,
3077 struct intel_wm_config *config)
3078 {
3079 struct drm_crtc *crtc;
3080 struct drm_plane *plane;
3081
3082 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3083 config->num_pipes_active += to_intel_crtc(crtc)->active;
3084
3085 /* FIXME: I don't think we need those two global parameters on SKL */
3086 list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3087 struct intel_plane *intel_plane = to_intel_plane(plane);
3088
3089 config->sprites_enabled |= intel_plane->wm.enabled;
3090 config->sprites_scaled |= intel_plane->wm.scaled;
3091 }
3092 }
3093
3094 static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
3095 struct skl_pipe_wm_parameters *p)
3096 {
3097 struct drm_device *dev = crtc->dev;
3098 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3099 enum i915_pipe pipe = intel_crtc->pipe;
3100 struct drm_plane *plane;
3101 struct drm_framebuffer *fb;
3102 int i = 1; /* Index for sprite planes start */
3103
3104 p->active = intel_crtc->active;
3105 if (p->active) {
3106 p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
3107 p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
3108
3109 fb = crtc->primary->state->fb;
3110 /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
3111 if (fb) {
3112 p->plane[0].enabled = true;
3113 p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3114 drm_format_plane_cpp(fb->pixel_format, 1) :
3115 drm_format_plane_cpp(fb->pixel_format, 0);
3116 p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3117 drm_format_plane_cpp(fb->pixel_format, 0) : 0;
3118 p->plane[0].tiling = fb->modifier[0];
3119 } else {
3120 p->plane[0].enabled = false;
3121 p->plane[0].bytes_per_pixel = 0;
3122 p->plane[0].y_bytes_per_pixel = 0;
3123 p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
3124 }
3125 p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
3126 p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
3127 p->plane[0].rotation = crtc->primary->state->rotation;
3128
3129 fb = crtc->cursor->state->fb;
3130 p->plane[PLANE_CURSOR].y_bytes_per_pixel = 0;
3131 if (fb) {
3132 p->plane[PLANE_CURSOR].enabled = true;
3133 p->plane[PLANE_CURSOR].bytes_per_pixel = fb->bits_per_pixel / 8;
3134 p->plane[PLANE_CURSOR].horiz_pixels = crtc->cursor->state->crtc_w;
3135 p->plane[PLANE_CURSOR].vert_pixels = crtc->cursor->state->crtc_h;
3136 } else {
3137 p->plane[PLANE_CURSOR].enabled = false;
3138 p->plane[PLANE_CURSOR].bytes_per_pixel = 0;
3139 p->plane[PLANE_CURSOR].horiz_pixels = 64;
3140 p->plane[PLANE_CURSOR].vert_pixels = 64;
3141 }
3142 }
3143
3144 list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3145 struct intel_plane *intel_plane = to_intel_plane(plane);
3146
3147 if (intel_plane->pipe == pipe &&
3148 plane->type == DRM_PLANE_TYPE_OVERLAY)
3149 p->plane[i++] = intel_plane->wm;
3150 }
3151 }
3152
3153 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3154 struct skl_pipe_wm_parameters *p,
3155 struct intel_plane_wm_parameters *p_params,
3156 uint16_t ddb_allocation,
3157 int level,
3158 uint16_t *out_blocks, /* out */
3159 uint8_t *out_lines /* out */)
3160 {
3161 uint32_t latency = dev_priv->wm.skl_latency[level];
3162 uint32_t method1, method2;
3163 uint32_t plane_bytes_per_line, plane_blocks_per_line;
3164 uint32_t res_blocks, res_lines;
3165 uint32_t selected_result;
3166 uint8_t bytes_per_pixel;
3167
3168 if (latency == 0 || !p->active || !p_params->enabled)
3169 return false;
3170
3171 bytes_per_pixel = p_params->y_bytes_per_pixel ?
3172 p_params->y_bytes_per_pixel :
3173 p_params->bytes_per_pixel;
3174 method1 = skl_wm_method1(p->pixel_rate,
3175 bytes_per_pixel,
3176 latency);
3177 method2 = skl_wm_method2(p->pixel_rate,
3178 p->pipe_htotal,
3179 p_params->horiz_pixels,
3180 bytes_per_pixel,
3181 p_params->tiling,
3182 latency);
3183
3184 plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
3185 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3186
3187 if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3188 p_params->tiling == I915_FORMAT_MOD_Yf_TILED) {
3189 uint32_t min_scanlines = 4;
3190 uint32_t y_tile_minimum;
3191 if (intel_rotation_90_or_270(p_params->rotation)) {
3192 switch (p_params->bytes_per_pixel) {
3193 case 1:
3194 min_scanlines = 16;
3195 break;
3196 case 2:
3197 min_scanlines = 8;
3198 break;
3199 case 8:
3200 WARN(1, "Unsupported pixel depth for rotation");
3201 }
3202 }
3203 y_tile_minimum = plane_blocks_per_line * min_scanlines;
3204 selected_result = max(method2, y_tile_minimum);
3205 } else {
3206 if ((ddb_allocation / plane_blocks_per_line) >= 1)
3207 selected_result = min(method1, method2);
3208 else
3209 selected_result = method1;
3210 }
3211
3212 res_blocks = selected_result + 1;
3213 res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
3214
3215 if (level >= 1 && level <= 7) {
3216 if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3217 p_params->tiling == I915_FORMAT_MOD_Yf_TILED)
3218 res_lines += 4;
3219 else
3220 res_blocks++;
3221 }
3222
3223 if (res_blocks >= ddb_allocation || res_lines > 31)
3224 return false;
3225
3226 *out_blocks = res_blocks;
3227 *out_lines = res_lines;
3228
3229 return true;
3230 }
3231
3232 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
3233 struct skl_ddb_allocation *ddb,
3234 struct skl_pipe_wm_parameters *p,
3235 enum i915_pipe pipe,
3236 int level,
3237 int num_planes,
3238 struct skl_wm_level *result)
3239 {
3240 uint16_t ddb_blocks;
3241 int i;
3242
3243 for (i = 0; i < num_planes; i++) {
3244 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
3245
3246 result->plane_en[i] = skl_compute_plane_wm(dev_priv,
3247 p, &p->plane[i],
3248 ddb_blocks,
3249 level,
3250 &result->plane_res_b[i],
3251 &result->plane_res_l[i]);
3252 }
3253
3254 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]);
3255 result->plane_en[PLANE_CURSOR] = skl_compute_plane_wm(dev_priv, p,
3256 &p->plane[PLANE_CURSOR],
3257 ddb_blocks, level,
3258 &result->plane_res_b[PLANE_CURSOR],
3259 &result->plane_res_l[PLANE_CURSOR]);
3260 }
3261
3262 static uint32_t
3263 skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
3264 {
3265 if (!to_intel_crtc(crtc)->active)
3266 return 0;
3267
3268 if (WARN_ON(p->pixel_rate == 0))
3269 return 0;
3270
3271 return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
3272 }
3273
3274 static void skl_compute_transition_wm(struct drm_crtc *crtc,
3275 struct skl_pipe_wm_parameters *params,
3276 struct skl_wm_level *trans_wm /* out */)
3277 {
3278 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3279 int i;
3280
3281 if (!params->active)
3282 return;
3283
3284 /* Until we know more, just disable transition WMs */
3285 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3286 trans_wm->plane_en[i] = false;
3287 trans_wm->plane_en[PLANE_CURSOR] = false;
3288 }
3289
3290 static void skl_compute_pipe_wm(struct drm_crtc *crtc,
3291 struct skl_ddb_allocation *ddb,
3292 struct skl_pipe_wm_parameters *params,
3293 struct skl_pipe_wm *pipe_wm)
3294 {
3295 struct drm_device *dev = crtc->dev;
3296 const struct drm_i915_private *dev_priv = dev->dev_private;
3297 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3298 int level, max_level = ilk_wm_max_level(dev);
3299
3300 for (level = 0; level <= max_level; level++) {
3301 skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
3302 level, intel_num_planes(intel_crtc),
3303 &pipe_wm->wm[level]);
3304 }
3305 pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
3306
3307 skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
3308 }
3309
3310 static void skl_compute_wm_results(struct drm_device *dev,
3311 struct skl_pipe_wm_parameters *p,
3312 struct skl_pipe_wm *p_wm,
3313 struct skl_wm_values *r,
3314 struct intel_crtc *intel_crtc)
3315 {
3316 int level, max_level = ilk_wm_max_level(dev);
3317 enum i915_pipe pipe = intel_crtc->pipe;
3318 uint32_t temp;
3319 int i;
3320
3321 for (level = 0; level <= max_level; level++) {
3322 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3323 temp = 0;
3324
3325 temp |= p_wm->wm[level].plane_res_l[i] <<
3326 PLANE_WM_LINES_SHIFT;
3327 temp |= p_wm->wm[level].plane_res_b[i];
3328 if (p_wm->wm[level].plane_en[i])
3329 temp |= PLANE_WM_EN;
3330
3331 r->plane[pipe][i][level] = temp;
3332 }
3333
3334 temp = 0;
3335
3336 temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3337 temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR];
3338
3339 if (p_wm->wm[level].plane_en[PLANE_CURSOR])
3340 temp |= PLANE_WM_EN;
3341
3342 r->plane[pipe][PLANE_CURSOR][level] = temp;
3343
3344 }
3345
3346 /* transition WMs */
3347 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3348 temp = 0;
3349 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
3350 temp |= p_wm->trans_wm.plane_res_b[i];
3351 if (p_wm->trans_wm.plane_en[i])
3352 temp |= PLANE_WM_EN;
3353
3354 r->plane_trans[pipe][i] = temp;
3355 }
3356
3357 temp = 0;
3358 temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3359 temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR];
3360 if (p_wm->trans_wm.plane_en[PLANE_CURSOR])
3361 temp |= PLANE_WM_EN;
3362
3363 r->plane_trans[pipe][PLANE_CURSOR] = temp;
3364
3365 r->wm_linetime[pipe] = p_wm->linetime;
3366 }
3367
3368 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
3369 const struct skl_ddb_entry *entry)
3370 {
3371 if (entry->end)
3372 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
3373 else
3374 I915_WRITE(reg, 0);
3375 }
3376
3377 static void skl_write_wm_values(struct drm_i915_private *dev_priv,
3378 const struct skl_wm_values *new)
3379 {
3380 struct drm_device *dev = dev_priv->dev;
3381 struct intel_crtc *crtc;
3382
3383 list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
3384 int i, level, max_level = ilk_wm_max_level(dev);
3385 enum i915_pipe pipe = crtc->pipe;
3386
3387 if (!new->dirty[pipe])
3388 continue;
3389
3390 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
3391
3392 for (level = 0; level <= max_level; level++) {
3393 for (i = 0; i < intel_num_planes(crtc); i++)
3394 I915_WRITE(PLANE_WM(pipe, i, level),
3395 new->plane[pipe][i][level]);
3396 I915_WRITE(CUR_WM(pipe, level),
3397 new->plane[pipe][PLANE_CURSOR][level]);
3398 }
3399 for (i = 0; i < intel_num_planes(crtc); i++)
3400 I915_WRITE(PLANE_WM_TRANS(pipe, i),
3401 new->plane_trans[pipe][i]);
3402 I915_WRITE(CUR_WM_TRANS(pipe),
3403 new->plane_trans[pipe][PLANE_CURSOR]);
3404
3405 for (i = 0; i < intel_num_planes(crtc); i++) {
3406 skl_ddb_entry_write(dev_priv,
3407 PLANE_BUF_CFG(pipe, i),
3408 &new->ddb.plane[pipe][i]);
3409 skl_ddb_entry_write(dev_priv,
3410 PLANE_NV12_BUF_CFG(pipe, i),
3411 &new->ddb.y_plane[pipe][i]);
3412 }
3413
3414 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
3415 &new->ddb.plane[pipe][PLANE_CURSOR]);
3416 }
3417 }
3418
3419 /*
3420 * When setting up a new DDB allocation arrangement, we need to correctly
3421 * sequence the times at which the new allocations for the pipes are taken into
3422 * account or we'll have pipes fetching from space previously allocated to
3423 * another pipe.
3424 *
3425 * Roughly the sequence looks like:
3426 * 1. re-allocate the pipe(s) with the allocation being reduced and not
3427 * overlapping with a previous light-up pipe (another way to put it is:
3428 * pipes with their new allocation strickly included into their old ones).
3429 * 2. re-allocate the other pipes that get their allocation reduced
3430 * 3. allocate the pipes having their allocation increased
3431 *
3432 * Steps 1. and 2. are here to take care of the following case:
3433 * - Initially DDB looks like this:
3434 * | B | C |
3435 * - enable pipe A.
3436 * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
3437 * allocation
3438 * | A | B | C |
3439 *
3440 * We need to sequence the re-allocation: C, B, A (and not B, C, A).
3441 */
3442
3443 static void
3444 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum i915_pipe pipe, int pass)
3445 {
3446 int plane;
3447
3448 DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
3449
3450 for_each_plane(dev_priv, pipe, plane) {
3451 I915_WRITE(PLANE_SURF(pipe, plane),
3452 I915_READ(PLANE_SURF(pipe, plane)));
3453 }
3454 I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
3455 }
3456
3457 static bool
3458 skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
3459 const struct skl_ddb_allocation *new,
3460 enum i915_pipe pipe)
3461 {
3462 uint16_t old_size, new_size;
3463
3464 old_size = skl_ddb_entry_size(&old->pipe[pipe]);
3465 new_size = skl_ddb_entry_size(&new->pipe[pipe]);
3466
3467 return old_size != new_size &&
3468 new->pipe[pipe].start >= old->pipe[pipe].start &&
3469 new->pipe[pipe].end <= old->pipe[pipe].end;
3470 }
3471
3472 static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
3473 struct skl_wm_values *new_values)
3474 {
3475 struct drm_device *dev = dev_priv->dev;
3476 struct skl_ddb_allocation *cur_ddb, *new_ddb;
3477 bool reallocated[I915_MAX_PIPES] = {};
3478 struct intel_crtc *crtc;
3479 enum i915_pipe pipe;
3480
3481 new_ddb = &new_values->ddb;
3482 cur_ddb = &dev_priv->wm.skl_hw.ddb;
3483
3484 /*
3485 * First pass: flush the pipes with the new allocation contained into
3486 * the old space.
3487 *
3488 * We'll wait for the vblank on those pipes to ensure we can safely
3489 * re-allocate the freed space without this pipe fetching from it.
3490 */
3491 for_each_intel_crtc(dev, crtc) {
3492 if (!crtc->active)
3493 continue;
3494
3495 pipe = crtc->pipe;
3496
3497 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
3498 continue;
3499
3500 skl_wm_flush_pipe(dev_priv, pipe, 1);
3501 intel_wait_for_vblank(dev, pipe);
3502
3503 reallocated[pipe] = true;
3504 }
3505
3506
3507 /*
3508 * Second pass: flush the pipes that are having their allocation
3509 * reduced, but overlapping with a previous allocation.
3510 *
3511 * Here as well we need to wait for the vblank to make sure the freed
3512 * space is not used anymore.
3513 */
3514 for_each_intel_crtc(dev, crtc) {
3515 if (!crtc->active)
3516 continue;
3517
3518 pipe = crtc->pipe;
3519
3520 if (reallocated[pipe])
3521 continue;
3522
3523 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
3524 skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
3525 skl_wm_flush_pipe(dev_priv, pipe, 2);
3526 intel_wait_for_vblank(dev, pipe);
3527 reallocated[pipe] = true;
3528 }
3529 }
3530
3531 /*
3532 * Third pass: flush the pipes that got more space allocated.
3533 *
3534 * We don't need to actively wait for the update here, next vblank
3535 * will just get more DDB space with the correct WM values.
3536 */
3537 for_each_intel_crtc(dev, crtc) {
3538 if (!crtc->active)
3539 continue;
3540
3541 pipe = crtc->pipe;
3542
3543 /*
3544 * At this point, only the pipes more space than before are
3545 * left to re-allocate.
3546 */
3547 if (reallocated[pipe])
3548 continue;
3549
3550 skl_wm_flush_pipe(dev_priv, pipe, 3);
3551 }
3552 }
3553
3554 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
3555 struct skl_pipe_wm_parameters *params,
3556 struct intel_wm_config *config,
3557 struct skl_ddb_allocation *ddb, /* out */
3558 struct skl_pipe_wm *pipe_wm /* out */)
3559 {
3560 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3561
3562 skl_compute_wm_pipe_parameters(crtc, params);
3563 skl_allocate_pipe_ddb(crtc, config, params, ddb);
3564 skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
3565
3566 if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
3567 return false;
3568
3569 intel_crtc->wm.skl_active = *pipe_wm;
3570
3571 return true;
3572 }
3573
3574 static void skl_update_other_pipe_wm(struct drm_device *dev,
3575 struct drm_crtc *crtc,
3576 struct intel_wm_config *config,
3577 struct skl_wm_values *r)
3578 {
3579 struct intel_crtc *intel_crtc;
3580 struct intel_crtc *this_crtc = to_intel_crtc(crtc);
3581
3582 /*
3583 * If the WM update hasn't changed the allocation for this_crtc (the
3584 * crtc we are currently computing the new WM values for), other
3585 * enabled crtcs will keep the same allocation and we don't need to
3586 * recompute anything for them.
3587 */
3588 if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
3589 return;
3590
3591 /*
3592 * Otherwise, because of this_crtc being freshly enabled/disabled, the
3593 * other active pipes need new DDB allocation and WM values.
3594 */
3595 list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
3596 base.head) {
3597 struct skl_pipe_wm_parameters params = {};
3598 struct skl_pipe_wm pipe_wm = {};
3599 bool wm_changed;
3600
3601 if (this_crtc->pipe == intel_crtc->pipe)
3602 continue;
3603
3604 if (!intel_crtc->active)
3605 continue;
3606
3607 wm_changed = skl_update_pipe_wm(&intel_crtc->base,
3608 ¶ms, config,
3609 &r->ddb, &pipe_wm);
3610
3611 /*
3612 * If we end up re-computing the other pipe WM values, it's
3613 * because it was really needed, so we expect the WM values to
3614 * be different.
3615 */
3616 WARN_ON(!wm_changed);
3617
3618 skl_compute_wm_results(dev, ¶ms, &pipe_wm, r, intel_crtc);
3619 r->dirty[intel_crtc->pipe] = true;
3620 }
3621 }
3622
3623 static void skl_clear_wm(struct skl_wm_values *watermarks, enum i915_pipe pipe)
3624 {
3625 watermarks->wm_linetime[pipe] = 0;
3626 memset(watermarks->plane[pipe], 0,
3627 sizeof(uint32_t) * 8 * I915_MAX_PLANES);
3628 memset(watermarks->plane_trans[pipe],
3629 0, sizeof(uint32_t) * I915_MAX_PLANES);
3630 watermarks->plane_trans[pipe][PLANE_CURSOR] = 0;
3631
3632 /* Clear ddb entries for pipe */
3633 memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry));
3634 memset(&watermarks->ddb.plane[pipe], 0,
3635 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3636 memset(&watermarks->ddb.y_plane[pipe], 0,
3637 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3638 memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0,
3639 sizeof(struct skl_ddb_entry));
3640
3641 }
3642
3643 static void skl_update_wm(struct drm_crtc *crtc)
3644 {
3645 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3646 struct drm_device *dev = crtc->dev;
3647 struct drm_i915_private *dev_priv = dev->dev_private;
3648 struct skl_pipe_wm_parameters params = {};
3649 struct skl_wm_values *results = &dev_priv->wm.skl_results;
3650 struct skl_pipe_wm pipe_wm = {};
3651 struct intel_wm_config config = {};
3652
3653
3654 /* Clear all dirty flags */
3655 memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES);
3656
3657 skl_clear_wm(results, intel_crtc->pipe);
3658
3659 skl_compute_wm_global_parameters(dev, &config);
3660
3661 if (!skl_update_pipe_wm(crtc, ¶ms, &config,
3662 &results->ddb, &pipe_wm))
3663 return;
3664
3665 skl_compute_wm_results(dev, ¶ms, &pipe_wm, results, intel_crtc);
3666 results->dirty[intel_crtc->pipe] = true;
3667
3668 skl_update_other_pipe_wm(dev, crtc, &config, results);
3669 skl_write_wm_values(dev_priv, results);
3670 skl_flush_wm_values(dev_priv, results);
3671
3672 /* store the new configuration */
3673 dev_priv->wm.skl_hw = *results;
3674 }
3675
3676 static void
3677 skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
3678 uint32_t sprite_width, uint32_t sprite_height,
3679 int pixel_size, bool enabled, bool scaled)
3680 {
3681 struct intel_plane *intel_plane = to_intel_plane(plane);
3682 struct drm_framebuffer *fb = plane->state->fb;
3683
3684 intel_plane->wm.enabled = enabled;
3685 intel_plane->wm.scaled = scaled;
3686 intel_plane->wm.horiz_pixels = sprite_width;
3687 intel_plane->wm.vert_pixels = sprite_height;
3688 intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
3689
3690 /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
3691 intel_plane->wm.bytes_per_pixel =
3692 (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3693 drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
3694 intel_plane->wm.y_bytes_per_pixel =
3695 (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3696 drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
3697
3698 /*
3699 * Framebuffer can be NULL on plane disable, but it does not
3700 * matter for watermarks if we assume no tiling in that case.
3701 */
3702 if (fb)
3703 intel_plane->wm.tiling = fb->modifier[0];
3704 intel_plane->wm.rotation = plane->state->rotation;
3705
3706 skl_update_wm(crtc);
3707 }
3708
3709 static void ilk_update_wm(struct drm_crtc *crtc)
3710 {
3711 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3712 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
3713 struct drm_device *dev = crtc->dev;
3714 struct drm_i915_private *dev_priv = dev->dev_private;
3715 struct ilk_wm_maximums max;
3716 static const struct ilk_wm_values zero_values;
3717 struct ilk_wm_values results = zero_values;
3718 enum intel_ddb_partitioning partitioning;
3719 static const struct intel_pipe_wm zero_wm;
3720 struct intel_pipe_wm pipe_wm = zero_wm;
3721 struct intel_pipe_wm lp_wm_1_2 = zero_wm, lp_wm_5_6 = zero_wm,
3722 *best_lp_wm;
3723 static const struct intel_wm_config zero_config;
3724 struct intel_wm_config config = zero_config;
3725
3726 WARN_ON(cstate->base.active != intel_crtc->active);
3727
3728 intel_compute_pipe_wm(cstate, &pipe_wm);
3729
3730 if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
3731 return;
3732
3733 intel_crtc->wm.active = pipe_wm;
3734
3735 ilk_compute_wm_config(dev, &config);
3736
3737 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
3738 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
3739
3740 /* 5/6 split only in single pipe config on IVB+ */
3741 if (INTEL_INFO(dev)->gen >= 7 &&
3742 config.num_pipes_active == 1 && config.sprites_enabled) {
3743 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
3744 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
3745
3746 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
3747 } else {
3748 best_lp_wm = &lp_wm_1_2;
3749 }
3750
3751 partitioning = (best_lp_wm == &lp_wm_1_2) ?
3752 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
3753
3754 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
3755
3756 ilk_write_wm_values(dev_priv, &results);
3757 }
3758
3759 static void
3760 ilk_update_sprite_wm(struct drm_plane *plane,
3761 struct drm_crtc *crtc,
3762 uint32_t sprite_width, uint32_t sprite_height,
3763 int pixel_size, bool enabled, bool scaled)
3764 {
3765 struct drm_device *dev = plane->dev;
3766 struct intel_plane *intel_plane = to_intel_plane(plane);
3767
3768 /*
3769 * IVB workaround: must disable low power watermarks for at least
3770 * one frame before enabling scaling. LP watermarks can be re-enabled
3771 * when scaling is disabled.
3772 *
3773 * WaCxSRDisabledForSpriteScaling:ivb
3774 */
3775 if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
3776 intel_wait_for_vblank(dev, intel_plane->pipe);
3777
3778 ilk_update_wm(crtc);
3779 }
3780
3781 static void skl_pipe_wm_active_state(uint32_t val,
3782 struct skl_pipe_wm *active,
3783 bool is_transwm,
3784 bool is_cursor,
3785 int i,
3786 int level)
3787 {
3788 bool is_enabled = (val & PLANE_WM_EN) != 0;
3789
3790 if (!is_transwm) {
3791 if (!is_cursor) {
3792 active->wm[level].plane_en[i] = is_enabled;
3793 active->wm[level].plane_res_b[i] =
3794 val & PLANE_WM_BLOCKS_MASK;
3795 active->wm[level].plane_res_l[i] =
3796 (val >> PLANE_WM_LINES_SHIFT) &
3797 PLANE_WM_LINES_MASK;
3798 } else {
3799 active->wm[level].plane_en[PLANE_CURSOR] = is_enabled;
3800 active->wm[level].plane_res_b[PLANE_CURSOR] =
3801 val & PLANE_WM_BLOCKS_MASK;
3802 active->wm[level].plane_res_l[PLANE_CURSOR] =
3803 (val >> PLANE_WM_LINES_SHIFT) &
3804 PLANE_WM_LINES_MASK;
3805 }
3806 } else {
3807 if (!is_cursor) {
3808 active->trans_wm.plane_en[i] = is_enabled;
3809 active->trans_wm.plane_res_b[i] =
3810 val & PLANE_WM_BLOCKS_MASK;
3811 active->trans_wm.plane_res_l[i] =
3812 (val >> PLANE_WM_LINES_SHIFT) &
3813 PLANE_WM_LINES_MASK;
3814 } else {
3815 active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled;
3816 active->trans_wm.plane_res_b[PLANE_CURSOR] =
3817 val & PLANE_WM_BLOCKS_MASK;
3818 active->trans_wm.plane_res_l[PLANE_CURSOR] =
3819 (val >> PLANE_WM_LINES_SHIFT) &
3820 PLANE_WM_LINES_MASK;
3821 }
3822 }
3823 }
3824
3825 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3826 {
3827 struct drm_device *dev = crtc->dev;
3828 struct drm_i915_private *dev_priv = dev->dev_private;
3829 struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
3830 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3831 struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
3832 enum i915_pipe pipe = intel_crtc->pipe;
3833 int level, i, max_level;
3834 uint32_t temp;
3835
3836 max_level = ilk_wm_max_level(dev);
3837
3838 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3839
3840 for (level = 0; level <= max_level; level++) {
3841 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3842 hw->plane[pipe][i][level] =
3843 I915_READ(PLANE_WM(pipe, i, level));
3844 hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level));
3845 }
3846
3847 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3848 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
3849 hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe));
3850
3851 if (!intel_crtc->active)
3852 return;
3853
3854 hw->dirty[pipe] = true;
3855
3856 active->linetime = hw->wm_linetime[pipe];
3857
3858 for (level = 0; level <= max_level; level++) {
3859 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3860 temp = hw->plane[pipe][i][level];
3861 skl_pipe_wm_active_state(temp, active, false,
3862 false, i, level);
3863 }
3864 temp = hw->plane[pipe][PLANE_CURSOR][level];
3865 skl_pipe_wm_active_state(temp, active, false, true, i, level);
3866 }
3867
3868 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3869 temp = hw->plane_trans[pipe][i];
3870 skl_pipe_wm_active_state(temp, active, true, false, i, 0);
3871 }
3872
3873 temp = hw->plane_trans[pipe][PLANE_CURSOR];
3874 skl_pipe_wm_active_state(temp, active, true, true, i, 0);
3875 }
3876
3877 void skl_wm_get_hw_state(struct drm_device *dev)
3878 {
3879 struct drm_i915_private *dev_priv = dev->dev_private;
3880 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
3881 struct drm_crtc *crtc;
3882
3883 skl_ddb_get_hw_state(dev_priv, ddb);
3884 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3885 skl_pipe_wm_get_hw_state(crtc);
3886 }
3887
3888 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3889 {
3890 struct drm_device *dev = crtc->dev;
3891 struct drm_i915_private *dev_priv = dev->dev_private;
3892 struct ilk_wm_values *hw = &dev_priv->wm.hw;
3893 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3894 struct intel_pipe_wm *active = &intel_crtc->wm.active;
3895 enum i915_pipe pipe = intel_crtc->pipe;
3896 static const unsigned int wm0_pipe_reg[] = {
3897 [PIPE_A] = WM0_PIPEA_ILK,
3898 [PIPE_B] = WM0_PIPEB_ILK,
3899 [PIPE_C] = WM0_PIPEC_IVB,
3900 };
3901
3902 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3903 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3904 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3905
3906 memset(active, 0, sizeof(*active));
3907
3908 active->pipe_enabled = intel_crtc->active;
3909
3910 if (active->pipe_enabled) {
3911 u32 tmp = hw->wm_pipe[pipe];
3912
3913 /*
3914 * For active pipes LP0 watermark is marked as
3915 * enabled, and LP1+ watermaks as disabled since
3916 * we can't really reverse compute them in case
3917 * multiple pipes are active.
3918 */
3919 active->wm[0].enable = true;
3920 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3921 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3922 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3923 active->linetime = hw->wm_linetime[pipe];
3924 } else {
3925 int level, max_level = ilk_wm_max_level(dev);
3926
3927 /*
3928 * For inactive pipes, all watermark levels
3929 * should be marked as enabled but zeroed,
3930 * which is what we'd compute them to.
3931 */
3932 for (level = 0; level <= max_level; level++)
3933 active->wm[level].enable = true;
3934 }
3935 }
3936
3937 #define _FW_WM(value, plane) \
3938 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
3939 #define _FW_WM_VLV(value, plane) \
3940 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
3941
3942 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
3943 struct vlv_wm_values *wm)
3944 {
3945 enum i915_pipe pipe;
3946 uint32_t tmp;
3947
3948 for_each_pipe(dev_priv, pipe) {
3949 tmp = I915_READ(VLV_DDL(pipe));
3950
3951 wm->ddl[pipe].primary =
3952 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3953 wm->ddl[pipe].cursor =
3954 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3955 wm->ddl[pipe].sprite[0] =
3956 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3957 wm->ddl[pipe].sprite[1] =
3958 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3959 }
3960
3961 tmp = I915_READ(DSPFW1);
3962 wm->sr.plane = _FW_WM(tmp, SR);
3963 wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB);
3964 wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB);
3965 wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA);
3966
3967 tmp = I915_READ(DSPFW2);
3968 wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB);
3969 wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA);
3970 wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA);
3971
3972 tmp = I915_READ(DSPFW3);
3973 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
3974
3975 if (IS_CHERRYVIEW(dev_priv)) {
3976 tmp = I915_READ(DSPFW7_CHV);
3977 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
3978 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
3979
3980 tmp = I915_READ(DSPFW8_CHV);
3981 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF);
3982 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE);
3983
3984 tmp = I915_READ(DSPFW9_CHV);
3985 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC);
3986 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC);
3987
3988 tmp = I915_READ(DSPHOWM);
3989 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
3990 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
3991 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
3992 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8;
3993 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
3994 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
3995 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
3996 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
3997 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
3998 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
3999 } else {
4000 tmp = I915_READ(DSPFW7);
4001 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
4002 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
4003
4004 tmp = I915_READ(DSPHOWM);
4005 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
4006 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
4007 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
4008 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
4009 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
4010 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4011 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
4012 }
4013 }
4014
4015 #undef _FW_WM
4016 #undef _FW_WM_VLV
4017
4018 void vlv_wm_get_hw_state(struct drm_device *dev)
4019 {
4020 struct drm_i915_private *dev_priv = to_i915(dev);
4021 struct vlv_wm_values *wm = &dev_priv->wm.vlv;
4022 struct intel_plane *plane;
4023 enum i915_pipe pipe;
4024 u32 val;
4025
4026 vlv_read_wm_values(dev_priv, wm);
4027
4028 for_each_intel_plane(dev, plane) {
4029 switch (plane->base.type) {
4030 int sprite;
4031 case DRM_PLANE_TYPE_CURSOR:
4032 plane->wm.fifo_size = 63;
4033 break;
4034 case DRM_PLANE_TYPE_PRIMARY:
4035 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0);
4036 break;
4037 case DRM_PLANE_TYPE_OVERLAY:
4038 sprite = plane->plane;
4039 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1);
4040 break;
4041 }
4042 }
4043
4044 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
4045 wm->level = VLV_WM_LEVEL_PM2;
4046
4047 if (IS_CHERRYVIEW(dev_priv)) {
4048 mutex_lock(&dev_priv->rps.hw_lock);
4049
4050 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
4051 if (val & DSP_MAXFIFO_PM5_ENABLE)
4052 wm->level = VLV_WM_LEVEL_PM5;
4053
4054 /*
4055 * If DDR DVFS is disabled in the BIOS, Punit
4056 * will never ack the request. So if that happens
4057 * assume we don't have to enable/disable DDR DVFS
4058 * dynamically. To test that just set the REQ_ACK
4059 * bit to poke the Punit, but don't change the
4060 * HIGH/LOW bits so that we don't actually change
4061 * the current state.
4062 */
4063 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4064 val |= FORCE_DDR_FREQ_REQ_ACK;
4065 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
4066
4067 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
4068 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
4069 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
4070 "assuming DDR DVFS is disabled\n");
4071 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
4072 } else {
4073 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4074 if ((val & FORCE_DDR_HIGH_FREQ) == 0)
4075 wm->level = VLV_WM_LEVEL_DDR_DVFS;
4076 }
4077
4078 mutex_unlock(&dev_priv->rps.hw_lock);
4079 }
4080
4081 for_each_pipe(dev_priv, pipe)
4082 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
4083 pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor,
4084 wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]);
4085
4086 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
4087 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
4088 }
4089
4090 void ilk_wm_get_hw_state(struct drm_device *dev)
4091 {
4092 struct drm_i915_private *dev_priv = dev->dev_private;
4093 struct ilk_wm_values *hw = &dev_priv->wm.hw;
4094 struct drm_crtc *crtc;
4095
4096 for_each_crtc(dev, crtc)
4097 ilk_pipe_wm_get_hw_state(crtc);
4098
4099 hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
4100 hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
4101 hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
4102
4103 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
4104 if (INTEL_INFO(dev)->gen >= 7) {
4105 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
4106 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
4107 }
4108
4109 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4110 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
4111 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4112 else if (IS_IVYBRIDGE(dev))
4113 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
4114 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4115
4116 hw->enable_fbc_wm =
4117 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
4118 }
4119
4120 /**
4121 * intel_update_watermarks - update FIFO watermark values based on current modes
4122 *
4123 * Calculate watermark values for the various WM regs based on current mode
4124 * and plane configuration.
4125 *
4126 * There are several cases to deal with here:
4127 * - normal (i.e. non-self-refresh)
4128 * - self-refresh (SR) mode
4129 * - lines are large relative to FIFO size (buffer can hold up to 2)
4130 * - lines are small relative to FIFO size (buffer can hold more than 2
4131 * lines), so need to account for TLB latency
4132 *
4133 * The normal calculation is:
4134 * watermark = dotclock * bytes per pixel * latency
4135 * where latency is platform & configuration dependent (we assume pessimal
4136 * values here).
4137 *
4138 * The SR calculation is:
4139 * watermark = (trunc(latency/line time)+1) * surface width *
4140 * bytes per pixel
4141 * where
4142 * line time = htotal / dotclock
4143 * surface width = hdisplay for normal plane and 64 for cursor
4144 * and latency is assumed to be high, as above.
4145 *
4146 * The final value programmed to the register should always be rounded up,
4147 * and include an extra 2 entries to account for clock crossings.
4148 *
4149 * We don't use the sprite, so we can ignore that. And on Crestline we have
4150 * to set the non-SR watermarks to 8.
4151 */
4152 void intel_update_watermarks(struct drm_crtc *crtc)
4153 {
4154 struct drm_i915_private *dev_priv = crtc->dev->dev_private;
4155
4156 if (dev_priv->display.update_wm)
4157 dev_priv->display.update_wm(crtc);
4158 }
4159
4160 void intel_update_sprite_watermarks(struct drm_plane *plane,
4161 struct drm_crtc *crtc,
4162 uint32_t sprite_width,
4163 uint32_t sprite_height,
4164 int pixel_size,
4165 bool enabled, bool scaled)
4166 {
4167 struct drm_i915_private *dev_priv = plane->dev->dev_private;
4168
4169 if (dev_priv->display.update_sprite_wm)
4170 dev_priv->display.update_sprite_wm(plane, crtc,
4171 sprite_width, sprite_height,
4172 pixel_size, enabled, scaled);
4173 }
4174
4175 /**
4176 * Lock protecting IPS related data structures
4177 */
4178 #ifdef __NetBSD__
4179 spinlock_t mchdev_lock;
4180 #else
4181 DEFINE_SPINLOCK(mchdev_lock);
4182 #endif
4183
4184 /* Global for IPS driver to get at the current i915 device. Protected by
4185 * mchdev_lock. */
4186 static struct drm_i915_private *i915_mch_dev;
4187
4188 bool ironlake_set_drps(struct drm_device *dev, u8 val)
4189 {
4190 struct drm_i915_private *dev_priv = dev->dev_private;
4191 u16 rgvswctl;
4192
4193 assert_spin_locked(&mchdev_lock);
4194
4195 rgvswctl = I915_READ16(MEMSWCTL);
4196 if (rgvswctl & MEMCTL_CMD_STS) {
4197 DRM_DEBUG("gpu busy, RCS change rejected\n");
4198 return false; /* still busy with another command */
4199 }
4200
4201 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
4202 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
4203 I915_WRITE16(MEMSWCTL, rgvswctl);
4204 POSTING_READ16(MEMSWCTL);
4205
4206 rgvswctl |= MEMCTL_CMD_STS;
4207 I915_WRITE16(MEMSWCTL, rgvswctl);
4208
4209 return true;
4210 }
4211
4212 static void ironlake_enable_drps(struct drm_device *dev)
4213 {
4214 struct drm_i915_private *dev_priv = dev->dev_private;
4215 u32 rgvmodectl = I915_READ(MEMMODECTL);
4216 u8 fmax, fmin, fstart, vstart;
4217
4218 spin_lock_irq(&mchdev_lock);
4219
4220 /* Enable temp reporting */
4221 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
4222 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
4223
4224 /* 100ms RC evaluation intervals */
4225 I915_WRITE(RCUPEI, 100000);
4226 I915_WRITE(RCDNEI, 100000);
4227
4228 /* Set max/min thresholds to 90ms and 80ms respectively */
4229 I915_WRITE(RCBMAXAVG, 90000);
4230 I915_WRITE(RCBMINAVG, 80000);
4231
4232 I915_WRITE(MEMIHYST, 1);
4233
4234 /* Set up min, max, and cur for interrupt handling */
4235 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
4236 fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
4237 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
4238 MEMMODE_FSTART_SHIFT;
4239
4240 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
4241 PXVFREQ_PX_SHIFT;
4242
4243 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
4244 dev_priv->ips.fstart = fstart;
4245
4246 dev_priv->ips.max_delay = fstart;
4247 dev_priv->ips.min_delay = fmin;
4248 dev_priv->ips.cur_delay = fstart;
4249
4250 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
4251 fmax, fmin, fstart);
4252
4253 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
4254
4255 /*
4256 * Interrupts will be enabled in ironlake_irq_postinstall
4257 */
4258
4259 I915_WRITE(VIDSTART, vstart);
4260 POSTING_READ(VIDSTART);
4261
4262 rgvmodectl |= MEMMODE_SWMODE_EN;
4263 I915_WRITE(MEMMODECTL, rgvmodectl);
4264
4265 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
4266 DRM_ERROR("stuck trying to change perf mode\n");
4267 mdelay(1);
4268
4269 ironlake_set_drps(dev, fstart);
4270
4271 dev_priv->ips.last_count1 = I915_READ(DMIEC) +
4272 I915_READ(DDREC) + I915_READ(CSIEC);
4273 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
4274 dev_priv->ips.last_count2 = I915_READ(GFXEC);
4275 dev_priv->ips.last_time2 = ktime_get_raw_ns();
4276
4277 spin_unlock_irq(&mchdev_lock);
4278 }
4279
4280 static void ironlake_disable_drps(struct drm_device *dev)
4281 {
4282 struct drm_i915_private *dev_priv = dev->dev_private;
4283 u16 rgvswctl;
4284
4285 spin_lock_irq(&mchdev_lock);
4286
4287 rgvswctl = I915_READ16(MEMSWCTL);
4288
4289 /* Ack interrupts, disable EFC interrupt */
4290 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
4291 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
4292 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
4293 I915_WRITE(DEIIR, DE_PCU_EVENT);
4294 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
4295
4296 /* Go back to the starting frequency */
4297 ironlake_set_drps(dev, dev_priv->ips.fstart);
4298 mdelay(1);
4299 rgvswctl |= MEMCTL_CMD_STS;
4300 I915_WRITE(MEMSWCTL, rgvswctl);
4301 mdelay(1);
4302
4303 spin_unlock_irq(&mchdev_lock);
4304 }
4305
4306 /* There's a funny hw issue where the hw returns all 0 when reading from
4307 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
4308 * ourselves, instead of doing a rmw cycle (which might result in us clearing
4309 * all limits and the gpu stuck at whatever frequency it is at atm).
4310 */
4311 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
4312 {
4313 u32 limits;
4314
4315 /* Only set the down limit when we've reached the lowest level to avoid
4316 * getting more interrupts, otherwise leave this clear. This prevents a
4317 * race in the hw when coming out of rc6: There's a tiny window where
4318 * the hw runs at the minimal clock before selecting the desired
4319 * frequency, if the down threshold expires in that window we will not
4320 * receive a down interrupt. */
4321 if (IS_GEN9(dev_priv->dev)) {
4322 limits = (dev_priv->rps.max_freq_softlimit) << 23;
4323 if (val <= dev_priv->rps.min_freq_softlimit)
4324 limits |= (dev_priv->rps.min_freq_softlimit) << 14;
4325 } else {
4326 limits = dev_priv->rps.max_freq_softlimit << 24;
4327 if (val <= dev_priv->rps.min_freq_softlimit)
4328 limits |= dev_priv->rps.min_freq_softlimit << 16;
4329 }
4330
4331 return limits;
4332 }
4333
4334 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
4335 {
4336 int new_power;
4337 u32 threshold_up = 0, threshold_down = 0; /* in % */
4338 u32 ei_up = 0, ei_down = 0;
4339
4340 new_power = dev_priv->rps.power;
4341 switch (dev_priv->rps.power) {
4342 case LOW_POWER:
4343 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
4344 new_power = BETWEEN;
4345 break;
4346
4347 case BETWEEN:
4348 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
4349 new_power = LOW_POWER;
4350 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
4351 new_power = HIGH_POWER;
4352 break;
4353
4354 case HIGH_POWER:
4355 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
4356 new_power = BETWEEN;
4357 break;
4358 }
4359 /* Max/min bins are special */
4360 if (val <= dev_priv->rps.min_freq_softlimit)
4361 new_power = LOW_POWER;
4362 if (val >= dev_priv->rps.max_freq_softlimit)
4363 new_power = HIGH_POWER;
4364 if (new_power == dev_priv->rps.power)
4365 return;
4366
4367 /* Note the units here are not exactly 1us, but 1280ns. */
4368 switch (new_power) {
4369 case LOW_POWER:
4370 /* Upclock if more than 95% busy over 16ms */
4371 ei_up = 16000;
4372 threshold_up = 95;
4373
4374 /* Downclock if less than 85% busy over 32ms */
4375 ei_down = 32000;
4376 threshold_down = 85;
4377 break;
4378
4379 case BETWEEN:
4380 /* Upclock if more than 90% busy over 13ms */
4381 ei_up = 13000;
4382 threshold_up = 90;
4383
4384 /* Downclock if less than 75% busy over 32ms */
4385 ei_down = 32000;
4386 threshold_down = 75;
4387 break;
4388
4389 case HIGH_POWER:
4390 /* Upclock if more than 85% busy over 10ms */
4391 ei_up = 10000;
4392 threshold_up = 85;
4393
4394 /* Downclock if less than 60% busy over 32ms */
4395 ei_down = 32000;
4396 threshold_down = 60;
4397 break;
4398 }
4399
4400 /* When byt can survive without system hang with dynamic
4401 * sw freq adjustments, this restriction can be lifted.
4402 */
4403 if (IS_VALLEYVIEW(dev_priv))
4404 goto skip_hw_write;
4405
4406 I915_WRITE(GEN6_RP_UP_EI,
4407 GT_INTERVAL_FROM_US(dev_priv, ei_up));
4408 I915_WRITE(GEN6_RP_UP_THRESHOLD,
4409 GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100)));
4410
4411 I915_WRITE(GEN6_RP_DOWN_EI,
4412 GT_INTERVAL_FROM_US(dev_priv, ei_down));
4413 I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
4414 GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100)));
4415
4416 I915_WRITE(GEN6_RP_CONTROL,
4417 GEN6_RP_MEDIA_TURBO |
4418 GEN6_RP_MEDIA_HW_NORMAL_MODE |
4419 GEN6_RP_MEDIA_IS_GFX |
4420 GEN6_RP_ENABLE |
4421 GEN6_RP_UP_BUSY_AVG |
4422 GEN6_RP_DOWN_IDLE_AVG);
4423
4424 skip_hw_write:
4425 dev_priv->rps.power = new_power;
4426 dev_priv->rps.up_threshold = threshold_up;
4427 dev_priv->rps.down_threshold = threshold_down;
4428 dev_priv->rps.last_adj = 0;
4429 }
4430
4431 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
4432 {
4433 u32 mask = 0;
4434
4435 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
4436 if (val > dev_priv->rps.min_freq_softlimit)
4437 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
4438 if (val < dev_priv->rps.max_freq_softlimit)
4439 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
4440
4441 mask &= dev_priv->pm_rps_events;
4442
4443 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
4444 }
4445
4446 /* gen6_set_rps is called to update the frequency request, but should also be
4447 * called when the range (min_delay and max_delay) is modified so that we can
4448 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
4449 static void gen6_set_rps(struct drm_device *dev, u8 val)
4450 {
4451 struct drm_i915_private *dev_priv = dev->dev_private;
4452
4453 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4454 if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0))
4455 return;
4456
4457 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4458 WARN_ON(val > dev_priv->rps.max_freq);
4459 WARN_ON(val < dev_priv->rps.min_freq);
4460
4461 /* min/max delay may still have been modified so be sure to
4462 * write the limits value.
4463 */
4464 if (val != dev_priv->rps.cur_freq) {
4465 gen6_set_rps_thresholds(dev_priv, val);
4466
4467 if (IS_GEN9(dev))
4468 I915_WRITE(GEN6_RPNSWREQ,
4469 GEN9_FREQUENCY(val));
4470 else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4471 I915_WRITE(GEN6_RPNSWREQ,
4472 HSW_FREQUENCY(val));
4473 else
4474 I915_WRITE(GEN6_RPNSWREQ,
4475 GEN6_FREQUENCY(val) |
4476 GEN6_OFFSET(0) |
4477 GEN6_AGGRESSIVE_TURBO);
4478 }
4479
4480 /* Make sure we continue to get interrupts
4481 * until we hit the minimum or maximum frequencies.
4482 */
4483 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
4484 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4485
4486 POSTING_READ(GEN6_RPNSWREQ);
4487
4488 dev_priv->rps.cur_freq = val;
4489 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4490 }
4491
4492 static void valleyview_set_rps(struct drm_device *dev, u8 val)
4493 {
4494 struct drm_i915_private *dev_priv = dev->dev_private;
4495
4496 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4497 WARN_ON(val > dev_priv->rps.max_freq);
4498 WARN_ON(val < dev_priv->rps.min_freq);
4499
4500 if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
4501 "Odd GPU freq value\n"))
4502 val &= ~1;
4503
4504 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4505
4506 if (val != dev_priv->rps.cur_freq) {
4507 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
4508 if (!IS_CHERRYVIEW(dev_priv))
4509 gen6_set_rps_thresholds(dev_priv, val);
4510 }
4511
4512 dev_priv->rps.cur_freq = val;
4513 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4514 }
4515
4516 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
4517 *
4518 * * If Gfx is Idle, then
4519 * 1. Forcewake Media well.
4520 * 2. Request idle freq.
4521 * 3. Release Forcewake of Media well.
4522 */
4523 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
4524 {
4525 u32 val = dev_priv->rps.idle_freq;
4526
4527 if (dev_priv->rps.cur_freq <= val)
4528 return;
4529
4530 /* Wake up the media well, as that takes a lot less
4531 * power than the Render well. */
4532 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
4533 valleyview_set_rps(dev_priv->dev, val);
4534 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
4535 }
4536
4537 void gen6_rps_busy(struct drm_i915_private *dev_priv)
4538 {
4539 mutex_lock(&dev_priv->rps.hw_lock);
4540 if (dev_priv->rps.enabled) {
4541 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
4542 gen6_rps_reset_ei(dev_priv);
4543 I915_WRITE(GEN6_PMINTRMSK,
4544 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
4545 }
4546 mutex_unlock(&dev_priv->rps.hw_lock);
4547 }
4548
4549 void gen6_rps_idle(struct drm_i915_private *dev_priv)
4550 {
4551 struct drm_device *dev = dev_priv->dev;
4552
4553 mutex_lock(&dev_priv->rps.hw_lock);
4554 if (dev_priv->rps.enabled) {
4555 if (IS_VALLEYVIEW(dev))
4556 vlv_set_rps_idle(dev_priv);
4557 else
4558 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4559 dev_priv->rps.last_adj = 0;
4560 I915_WRITE(GEN6_PMINTRMSK,
4561 gen6_sanitize_rps_pm_mask(dev_priv, ~0));
4562 }
4563 mutex_unlock(&dev_priv->rps.hw_lock);
4564
4565 spin_lock(&dev_priv->rps.client_lock);
4566 while (!list_empty(&dev_priv->rps.clients))
4567 list_del_init(dev_priv->rps.clients.next);
4568 spin_unlock(&dev_priv->rps.client_lock);
4569 }
4570
4571 void gen6_rps_boost(struct drm_i915_private *dev_priv,
4572 struct intel_rps_client *rps,
4573 unsigned long submitted)
4574 {
4575 /* This is intentionally racy! We peek at the state here, then
4576 * validate inside the RPS worker.
4577 */
4578 if (!(dev_priv->mm.busy &&
4579 dev_priv->rps.enabled &&
4580 dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
4581 return;
4582
4583 /* Force a RPS boost (and don't count it against the client) if
4584 * the GPU is severely congested.
4585 */
4586 if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
4587 rps = NULL;
4588
4589 spin_lock(&dev_priv->rps.client_lock);
4590 if (rps == NULL || list_empty(&rps->link)) {
4591 spin_lock_irq(&dev_priv->irq_lock);
4592 if (dev_priv->rps.interrupts_enabled) {
4593 dev_priv->rps.client_boost = true;
4594 queue_work(dev_priv->wq, &dev_priv->rps.work);
4595 }
4596 spin_unlock_irq(&dev_priv->irq_lock);
4597
4598 if (rps != NULL) {
4599 list_add(&rps->link, &dev_priv->rps.clients);
4600 rps->boosts++;
4601 } else
4602 dev_priv->rps.boosts++;
4603 }
4604 spin_unlock(&dev_priv->rps.client_lock);
4605 }
4606
4607 void intel_set_rps(struct drm_device *dev, u8 val)
4608 {
4609 if (IS_VALLEYVIEW(dev))
4610 valleyview_set_rps(dev, val);
4611 else
4612 gen6_set_rps(dev, val);
4613 }
4614
4615 static void gen9_disable_rps(struct drm_device *dev)
4616 {
4617 struct drm_i915_private *dev_priv = dev->dev_private;
4618
4619 I915_WRITE(GEN6_RC_CONTROL, 0);
4620 I915_WRITE(GEN9_PG_ENABLE, 0);
4621 }
4622
4623 static void gen6_disable_rps(struct drm_device *dev)
4624 {
4625 struct drm_i915_private *dev_priv = dev->dev_private;
4626
4627 I915_WRITE(GEN6_RC_CONTROL, 0);
4628 I915_WRITE(GEN6_RPNSWREQ, 1UL << 31);
4629 }
4630
4631 static void cherryview_disable_rps(struct drm_device *dev)
4632 {
4633 struct drm_i915_private *dev_priv = dev->dev_private;
4634
4635 I915_WRITE(GEN6_RC_CONTROL, 0);
4636 }
4637
4638 static void valleyview_disable_rps(struct drm_device *dev)
4639 {
4640 struct drm_i915_private *dev_priv = dev->dev_private;
4641
4642 /* we're doing forcewake before Disabling RC6,
4643 * This what the BIOS expects when going into suspend */
4644 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4645
4646 I915_WRITE(GEN6_RC_CONTROL, 0);
4647
4648 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4649 }
4650
4651 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
4652 {
4653 if (IS_VALLEYVIEW(dev)) {
4654 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
4655 mode = GEN6_RC_CTL_RC6_ENABLE;
4656 else
4657 mode = 0;
4658 }
4659 if (HAS_RC6p(dev))
4660 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
4661 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
4662 (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
4663 (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
4664
4665 else
4666 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
4667 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
4668 }
4669
4670 static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
4671 {
4672 /* No RC6 before Ironlake and code is gone for ilk. */
4673 if (INTEL_INFO(dev)->gen < 6)
4674 return 0;
4675
4676 /* Respect the kernel parameter if it is set */
4677 if (enable_rc6 >= 0) {
4678 int mask;
4679
4680 if (HAS_RC6p(dev))
4681 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
4682 INTEL_RC6pp_ENABLE;
4683 else
4684 mask = INTEL_RC6_ENABLE;
4685
4686 if ((enable_rc6 & mask) != enable_rc6)
4687 DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
4688 enable_rc6 & mask, enable_rc6, mask);
4689
4690 return enable_rc6 & mask;
4691 }
4692
4693 if (IS_IVYBRIDGE(dev))
4694 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
4695
4696 return INTEL_RC6_ENABLE;
4697 }
4698
4699 int intel_enable_rc6(const struct drm_device *dev)
4700 {
4701 return i915.enable_rc6;
4702 }
4703
4704 static void gen6_init_rps_frequencies(struct drm_device *dev)
4705 {
4706 struct drm_i915_private *dev_priv = dev->dev_private;
4707 uint32_t rp_state_cap;
4708 u32 ddcc_status = 0;
4709 int ret;
4710
4711 /* All of these values are in units of 50MHz */
4712 dev_priv->rps.cur_freq = 0;
4713 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
4714 if (IS_BROXTON(dev)) {
4715 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
4716 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
4717 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
4718 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff;
4719 } else {
4720 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
4721 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff;
4722 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
4723 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
4724 }
4725
4726 /* hw_max = RP0 until we check for overclocking */
4727 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq;
4728
4729 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
4730 if (IS_HASWELL(dev) || IS_BROADWELL(dev) ||
4731 IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
4732 ret = sandybridge_pcode_read(dev_priv,
4733 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
4734 &ddcc_status);
4735 if (0 == ret)
4736 dev_priv->rps.efficient_freq =
4737 clamp_t(u8,
4738 ((ddcc_status >> 8) & 0xff),
4739 dev_priv->rps.min_freq,
4740 dev_priv->rps.max_freq);
4741 }
4742
4743 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
4744 /* Store the frequency values in 16.66 MHZ units, which is
4745 the natural hardware unit for SKL */
4746 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
4747 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
4748 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
4749 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
4750 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
4751 }
4752
4753 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
4754
4755 /* Preserve min/max settings in case of re-init */
4756 if (dev_priv->rps.max_freq_softlimit == 0)
4757 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4758
4759 if (dev_priv->rps.min_freq_softlimit == 0) {
4760 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4761 dev_priv->rps.min_freq_softlimit =
4762 max_t(int, dev_priv->rps.efficient_freq,
4763 intel_freq_opcode(dev_priv, 450));
4764 else
4765 dev_priv->rps.min_freq_softlimit =
4766 dev_priv->rps.min_freq;
4767 }
4768 }
4769
4770 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
4771 static void gen9_enable_rps(struct drm_device *dev)
4772 {
4773 struct drm_i915_private *dev_priv = dev->dev_private;
4774
4775 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4776
4777 gen6_init_rps_frequencies(dev);
4778
4779 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4780 if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) {
4781 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4782 return;
4783 }
4784
4785 /* Program defaults and thresholds for RPS*/
4786 I915_WRITE(GEN6_RC_VIDEO_FREQ,
4787 GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
4788
4789 /* 1 second timeout*/
4790 I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
4791 GT_INTERVAL_FROM_US(dev_priv, 1000000));
4792
4793 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
4794
4795 /* Leaning on the below call to gen6_set_rps to program/setup the
4796 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
4797 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
4798 dev_priv->rps.power = HIGH_POWER; /* force a reset */
4799 gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4800
4801 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4802 }
4803
4804 static void gen9_enable_rc6(struct drm_device *dev)
4805 {
4806 struct drm_i915_private *dev_priv = dev->dev_private;
4807 struct intel_engine_cs *ring;
4808 uint32_t rc6_mask = 0;
4809 int unused;
4810
4811 /* 1a: Software RC state - RC0 */
4812 I915_WRITE(GEN6_RC_STATE, 0);
4813
4814 /* 1b: Get forcewake during program sequence. Although the driver
4815 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4816 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4817
4818 /* 2a: Disable RC states. */
4819 I915_WRITE(GEN6_RC_CONTROL, 0);
4820
4821 /* 2b: Program RC6 thresholds.*/
4822
4823 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
4824 if (IS_SKYLAKE(dev))
4825 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
4826 else
4827 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
4828 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4829 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4830 for_each_ring(ring, dev_priv, unused)
4831 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4832
4833 if (HAS_GUC_UCODE(dev))
4834 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
4835
4836 I915_WRITE(GEN6_RC_SLEEP, 0);
4837
4838 /* 2c: Program Coarse Power Gating Policies. */
4839 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
4840 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
4841
4842 /* 3a: Enable RC6 */
4843 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4844 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4845 DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4846 "on" : "off");
4847 /* WaRsUseTimeoutMode */
4848 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_D0) ||
4849 (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_A0)) {
4850 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */
4851 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4852 GEN7_RC_CTL_TO_MODE |
4853 rc6_mask);
4854 } else {
4855 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
4856 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4857 GEN6_RC_CTL_EI_MODE(1) |
4858 rc6_mask);
4859 }
4860
4861 /*
4862 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
4863 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
4864 */
4865 if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
4866 ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_F0)))
4867 I915_WRITE(GEN9_PG_ENABLE, 0);
4868 else
4869 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4870 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
4871
4872 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4873
4874 }
4875
4876 static void gen8_enable_rps(struct drm_device *dev)
4877 {
4878 struct drm_i915_private *dev_priv = dev->dev_private;
4879 struct intel_engine_cs *ring;
4880 uint32_t rc6_mask = 0;
4881 int unused;
4882
4883 /* 1a: Software RC state - RC0 */
4884 I915_WRITE(GEN6_RC_STATE, 0);
4885
4886 /* 1c & 1d: Get forcewake during program sequence. Although the driver
4887 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4888 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4889
4890 /* 2a: Disable RC states. */
4891 I915_WRITE(GEN6_RC_CONTROL, 0);
4892
4893 /* Initialize rps frequencies */
4894 gen6_init_rps_frequencies(dev);
4895
4896 /* 2b: Program RC6 thresholds.*/
4897 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4898 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4899 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4900 for_each_ring(ring, dev_priv, unused)
4901 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4902 I915_WRITE(GEN6_RC_SLEEP, 0);
4903 if (IS_BROADWELL(dev))
4904 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
4905 else
4906 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
4907
4908 /* 3: Enable RC6 */
4909 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4910 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4911 intel_print_rc6_info(dev, rc6_mask);
4912 if (IS_BROADWELL(dev))
4913 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4914 GEN7_RC_CTL_TO_MODE |
4915 rc6_mask);
4916 else
4917 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4918 GEN6_RC_CTL_EI_MODE(1) |
4919 rc6_mask);
4920
4921 /* 4 Program defaults and thresholds for RPS*/
4922 I915_WRITE(GEN6_RPNSWREQ,
4923 HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4924 I915_WRITE(GEN6_RC_VIDEO_FREQ,
4925 HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4926 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
4927 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
4928
4929 /* Docs recommend 900MHz, and 300 MHz respectively */
4930 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
4931 dev_priv->rps.max_freq_softlimit << 24 |
4932 dev_priv->rps.min_freq_softlimit << 16);
4933
4934 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
4935 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
4936 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
4937 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
4938
4939 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4940
4941 /* 5: Enable RPS */
4942 I915_WRITE(GEN6_RP_CONTROL,
4943 GEN6_RP_MEDIA_TURBO |
4944 GEN6_RP_MEDIA_HW_NORMAL_MODE |
4945 GEN6_RP_MEDIA_IS_GFX |
4946 GEN6_RP_ENABLE |
4947 GEN6_RP_UP_BUSY_AVG |
4948 GEN6_RP_DOWN_IDLE_AVG);
4949
4950 /* 6: Ring frequency + overclocking (our driver does this later */
4951
4952 dev_priv->rps.power = HIGH_POWER; /* force a reset */
4953 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4954
4955 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4956 }
4957
4958 static void gen6_enable_rps(struct drm_device *dev)
4959 {
4960 struct drm_i915_private *dev_priv = dev->dev_private;
4961 struct intel_engine_cs *ring;
4962 u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
4963 u32 gtfifodbg;
4964 int rc6_mode;
4965 int i, ret;
4966
4967 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4968
4969 /* Here begins a magic sequence of register writes to enable
4970 * auto-downclocking.
4971 *
4972 * Perhaps there might be some value in exposing these to
4973 * userspace...
4974 */
4975 I915_WRITE(GEN6_RC_STATE, 0);
4976
4977 /* Clear the DBG now so we don't confuse earlier errors */
4978 if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4979 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
4980 I915_WRITE(GTFIFODBG, gtfifodbg);
4981 }
4982
4983 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4984
4985 /* Initialize rps frequencies */
4986 gen6_init_rps_frequencies(dev);
4987
4988 /* disable the counters and set deterministic thresholds */
4989 I915_WRITE(GEN6_RC_CONTROL, 0);
4990
4991 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
4992 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
4993 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
4994 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4995 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4996
4997 for_each_ring(ring, dev_priv, i)
4998 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4999
5000 I915_WRITE(GEN6_RC_SLEEP, 0);
5001 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
5002 if (IS_IVYBRIDGE(dev))
5003 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
5004 else
5005 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
5006 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
5007 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
5008
5009 /* Check if we are enabling RC6 */
5010 rc6_mode = intel_enable_rc6(dev_priv->dev);
5011 if (rc6_mode & INTEL_RC6_ENABLE)
5012 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
5013
5014 /* We don't use those on Haswell */
5015 if (!IS_HASWELL(dev)) {
5016 if (rc6_mode & INTEL_RC6p_ENABLE)
5017 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
5018
5019 if (rc6_mode & INTEL_RC6pp_ENABLE)
5020 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
5021 }
5022
5023 intel_print_rc6_info(dev, rc6_mask);
5024
5025 I915_WRITE(GEN6_RC_CONTROL,
5026 rc6_mask |
5027 GEN6_RC_CTL_EI_MODE(1) |
5028 GEN6_RC_CTL_HW_ENABLE);
5029
5030 /* Power down if completely idle for over 50ms */
5031 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
5032 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5033
5034 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
5035 if (ret)
5036 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
5037
5038 ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
5039 if (!ret && (pcu_mbox & __BIT(31))) { /* OC supported */
5040 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
5041 (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
5042 (pcu_mbox & 0xff) * 50);
5043 dev_priv->rps.max_freq = pcu_mbox & 0xff;
5044 }
5045
5046 dev_priv->rps.power = HIGH_POWER; /* force a reset */
5047 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5048
5049 rc6vids = 0;
5050 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
5051 if (IS_GEN6(dev) && ret) {
5052 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
5053 } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
5054 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
5055 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
5056 rc6vids &= 0xffff00;
5057 rc6vids |= GEN6_ENCODE_RC6_VID(450);
5058 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
5059 if (ret)
5060 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
5061 }
5062
5063 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5064 }
5065
5066 static void __gen6_update_ring_freq(struct drm_device *dev)
5067 {
5068 struct drm_i915_private *dev_priv = dev->dev_private;
5069 int min_freq = 15;
5070 unsigned int gpu_freq;
5071 unsigned int max_ia_freq, min_ring_freq;
5072 unsigned int max_gpu_freq, min_gpu_freq;
5073 int scaling_factor = 180;
5074 #ifndef __NetBSD__
5075 struct cpufreq_policy *policy;
5076 #endif
5077
5078 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5079
5080 #ifdef __NetBSD__
5081 {
5082 extern uint64_t tsc_freq; /* x86 TSC frequency in Hz */
5083 max_ia_freq = (tsc_freq / 1000);
5084 }
5085 #else
5086 policy = cpufreq_cpu_get(0);
5087 if (policy) {
5088 max_ia_freq = policy->cpuinfo.max_freq;
5089 cpufreq_cpu_put(policy);
5090 } else {
5091 /*
5092 * Default to measured freq if none found, PCU will ensure we
5093 * don't go over
5094 */
5095 max_ia_freq = tsc_khz;
5096 }
5097 #endif
5098
5099 /* Convert from kHz to MHz */
5100 max_ia_freq /= 1000;
5101
5102 min_ring_freq = I915_READ(DCLK) & 0xf;
5103 /* convert DDR frequency from units of 266.6MHz to bandwidth */
5104 min_ring_freq = mult_frac(min_ring_freq, 8, 3);
5105
5106 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
5107 /* Convert GT frequency to 50 HZ units */
5108 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
5109 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
5110 } else {
5111 min_gpu_freq = dev_priv->rps.min_freq;
5112 max_gpu_freq = dev_priv->rps.max_freq;
5113 }
5114
5115 /*
5116 * For each potential GPU frequency, load a ring frequency we'd like
5117 * to use for memory access. We do this by specifying the IA frequency
5118 * the PCU should use as a reference to determine the ring frequency.
5119 */
5120 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
5121 int diff = max_gpu_freq - gpu_freq;
5122 unsigned int ia_freq = 0, ring_freq = 0;
5123
5124 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
5125 /*
5126 * ring_freq = 2 * GT. ring_freq is in 100MHz units
5127 * No floor required for ring frequency on SKL.
5128 */
5129 ring_freq = gpu_freq;
5130 } else if (INTEL_INFO(dev)->gen >= 8) {
5131 /* max(2 * GT, DDR). NB: GT is 50MHz units */
5132 ring_freq = max(min_ring_freq, gpu_freq);
5133 } else if (IS_HASWELL(dev)) {
5134 ring_freq = mult_frac(gpu_freq, 5, 4);
5135 ring_freq = max(min_ring_freq, ring_freq);
5136 /* leave ia_freq as the default, chosen by cpufreq */
5137 } else {
5138 /* On older processors, there is no separate ring
5139 * clock domain, so in order to boost the bandwidth
5140 * of the ring, we need to upclock the CPU (ia_freq).
5141 *
5142 * For GPU frequencies less than 750MHz,
5143 * just use the lowest ring freq.
5144 */
5145 if (gpu_freq < min_freq)
5146 ia_freq = 800;
5147 else
5148 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
5149 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
5150 }
5151
5152 sandybridge_pcode_write(dev_priv,
5153 GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
5154 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
5155 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
5156 gpu_freq);
5157 }
5158 }
5159
5160 void gen6_update_ring_freq(struct drm_device *dev)
5161 {
5162 struct drm_i915_private *dev_priv = dev->dev_private;
5163
5164 if (!HAS_CORE_RING_FREQ(dev))
5165 return;
5166
5167 mutex_lock(&dev_priv->rps.hw_lock);
5168 __gen6_update_ring_freq(dev);
5169 mutex_unlock(&dev_priv->rps.hw_lock);
5170 }
5171
5172 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
5173 {
5174 struct drm_device *dev = dev_priv->dev;
5175 u32 val, rp0;
5176
5177 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5178
5179 switch (INTEL_INFO(dev)->eu_total) {
5180 case 8:
5181 /* (2 * 4) config */
5182 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
5183 break;
5184 case 12:
5185 /* (2 * 6) config */
5186 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
5187 break;
5188 case 16:
5189 /* (2 * 8) config */
5190 default:
5191 /* Setting (2 * 8) Min RP0 for any other combination */
5192 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
5193 break;
5194 }
5195
5196 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
5197
5198 return rp0;
5199 }
5200
5201 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5202 {
5203 u32 val, rpe;
5204
5205 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
5206 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
5207
5208 return rpe;
5209 }
5210
5211 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
5212 {
5213 u32 val, rp1;
5214
5215 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5216 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
5217
5218 return rp1;
5219 }
5220
5221 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
5222 {
5223 u32 val, rp1;
5224
5225 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5226
5227 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
5228
5229 return rp1;
5230 }
5231
5232 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
5233 {
5234 u32 val, rp0;
5235
5236 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5237
5238 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
5239 /* Clamp to max */
5240 rp0 = min_t(u32, rp0, 0xea);
5241
5242 return rp0;
5243 }
5244
5245 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5246 {
5247 u32 val, rpe;
5248
5249 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
5250 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
5251 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
5252 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
5253
5254 return rpe;
5255 }
5256
5257 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
5258 {
5259 return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
5260 }
5261
5262 /* Check that the pctx buffer wasn't move under us. */
5263 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
5264 {
5265 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5266
5267 WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
5268 dev_priv->vlv_pctx->stolen->start);
5269 }
5270
5271
5272 /* Check that the pcbr address is not empty. */
5273 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
5274 {
5275 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5276
5277 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
5278 }
5279
5280 static void cherryview_setup_pctx(struct drm_device *dev)
5281 {
5282 struct drm_i915_private *dev_priv = dev->dev_private;
5283 unsigned long pctx_paddr, paddr;
5284 struct i915_gtt *gtt = &dev_priv->gtt;
5285 u32 pcbr;
5286 int pctx_size = 32*1024;
5287
5288 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5289
5290 pcbr = I915_READ(VLV_PCBR);
5291 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
5292 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5293 paddr = (dev_priv->mm.stolen_base +
5294 (gtt->stolen_size - pctx_size));
5295
5296 pctx_paddr = (paddr & (~4095));
5297 I915_WRITE(VLV_PCBR, pctx_paddr);
5298 }
5299
5300 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5301 }
5302
5303 static void valleyview_setup_pctx(struct drm_device *dev)
5304 {
5305 struct drm_i915_private *dev_priv = dev->dev_private;
5306 struct drm_i915_gem_object *pctx;
5307 unsigned long pctx_paddr;
5308 u32 pcbr;
5309 int pctx_size = 24*1024;
5310
5311 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5312
5313 pcbr = I915_READ(VLV_PCBR);
5314 if (pcbr) {
5315 /* BIOS set it up already, grab the pre-alloc'd space */
5316 int pcbr_offset;
5317
5318 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
5319 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
5320 pcbr_offset,
5321 I915_GTT_OFFSET_NONE,
5322 pctx_size);
5323 goto out;
5324 }
5325
5326 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5327
5328 /*
5329 * From the Gunit register HAS:
5330 * The Gfx driver is expected to program this register and ensure
5331 * proper allocation within Gfx stolen memory. For example, this
5332 * register should be programmed such than the PCBR range does not
5333 * overlap with other ranges, such as the frame buffer, protected
5334 * memory, or any other relevant ranges.
5335 */
5336 pctx = i915_gem_object_create_stolen(dev, pctx_size);
5337 if (!pctx) {
5338 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
5339 return;
5340 }
5341
5342 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
5343 I915_WRITE(VLV_PCBR, pctx_paddr);
5344
5345 out:
5346 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5347 dev_priv->vlv_pctx = pctx;
5348 }
5349
5350 static void valleyview_cleanup_pctx(struct drm_device *dev)
5351 {
5352 struct drm_i915_private *dev_priv = dev->dev_private;
5353
5354 if (WARN_ON(!dev_priv->vlv_pctx))
5355 return;
5356
5357 drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
5358 dev_priv->vlv_pctx = NULL;
5359 }
5360
5361 static void valleyview_init_gt_powersave(struct drm_device *dev)
5362 {
5363 struct drm_i915_private *dev_priv = dev->dev_private;
5364 u32 val;
5365
5366 valleyview_setup_pctx(dev);
5367
5368 mutex_lock(&dev_priv->rps.hw_lock);
5369
5370 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5371 switch ((val >> 6) & 3) {
5372 case 0:
5373 case 1:
5374 dev_priv->mem_freq = 800;
5375 break;
5376 case 2:
5377 dev_priv->mem_freq = 1066;
5378 break;
5379 case 3:
5380 dev_priv->mem_freq = 1333;
5381 break;
5382 }
5383 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5384
5385 dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
5386 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5387 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5388 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5389 dev_priv->rps.max_freq);
5390
5391 dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
5392 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5393 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5394 dev_priv->rps.efficient_freq);
5395
5396 dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
5397 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
5398 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5399 dev_priv->rps.rp1_freq);
5400
5401 dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
5402 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5403 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5404 dev_priv->rps.min_freq);
5405
5406 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5407
5408 /* Preserve min/max settings in case of re-init */
5409 if (dev_priv->rps.max_freq_softlimit == 0)
5410 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5411
5412 if (dev_priv->rps.min_freq_softlimit == 0)
5413 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5414
5415 mutex_unlock(&dev_priv->rps.hw_lock);
5416 }
5417
5418 static void cherryview_init_gt_powersave(struct drm_device *dev)
5419 {
5420 struct drm_i915_private *dev_priv = dev->dev_private;
5421 u32 val;
5422
5423 cherryview_setup_pctx(dev);
5424
5425 mutex_lock(&dev_priv->rps.hw_lock);
5426
5427 mutex_lock(&dev_priv->sb_lock);
5428 val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
5429 mutex_unlock(&dev_priv->sb_lock);
5430
5431 switch ((val >> 2) & 0x7) {
5432 case 3:
5433 dev_priv->mem_freq = 2000;
5434 break;
5435 default:
5436 dev_priv->mem_freq = 1600;
5437 break;
5438 }
5439 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5440
5441 dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
5442 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5443 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5444 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5445 dev_priv->rps.max_freq);
5446
5447 dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
5448 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5449 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5450 dev_priv->rps.efficient_freq);
5451
5452 dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
5453 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
5454 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5455 dev_priv->rps.rp1_freq);
5456
5457 /* PUnit validated range is only [RPe, RP0] */
5458 dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
5459 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5460 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5461 dev_priv->rps.min_freq);
5462
5463 WARN_ONCE((dev_priv->rps.max_freq |
5464 dev_priv->rps.efficient_freq |
5465 dev_priv->rps.rp1_freq |
5466 dev_priv->rps.min_freq) & 1,
5467 "Odd GPU freq values\n");
5468
5469 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5470
5471 /* Preserve min/max settings in case of re-init */
5472 if (dev_priv->rps.max_freq_softlimit == 0)
5473 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5474
5475 if (dev_priv->rps.min_freq_softlimit == 0)
5476 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5477
5478 mutex_unlock(&dev_priv->rps.hw_lock);
5479 }
5480
5481 static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
5482 {
5483 valleyview_cleanup_pctx(dev);
5484 }
5485
5486 static void cherryview_enable_rps(struct drm_device *dev)
5487 {
5488 struct drm_i915_private *dev_priv = dev->dev_private;
5489 struct intel_engine_cs *ring;
5490 u32 gtfifodbg, val, rc6_mode = 0, pcbr;
5491 int i;
5492
5493 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5494
5495 gtfifodbg = I915_READ(GTFIFODBG);
5496 if (gtfifodbg) {
5497 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5498 gtfifodbg);
5499 I915_WRITE(GTFIFODBG, gtfifodbg);
5500 }
5501
5502 cherryview_check_pctx(dev_priv);
5503
5504 /* 1a & 1b: Get forcewake during program sequence. Although the driver
5505 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
5506 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5507
5508 /* Disable RC states. */
5509 I915_WRITE(GEN6_RC_CONTROL, 0);
5510
5511 /* 2a: Program RC6 thresholds.*/
5512 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
5513 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
5514 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
5515
5516 for_each_ring(ring, dev_priv, i)
5517 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5518 I915_WRITE(GEN6_RC_SLEEP, 0);
5519
5520 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
5521 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
5522
5523 /* allows RC6 residency counter to work */
5524 I915_WRITE(VLV_COUNTER_CONTROL,
5525 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
5526 VLV_MEDIA_RC6_COUNT_EN |
5527 VLV_RENDER_RC6_COUNT_EN));
5528
5529 /* For now we assume BIOS is allocating and populating the PCBR */
5530 pcbr = I915_READ(VLV_PCBR);
5531
5532 /* 3: Enable RC6 */
5533 if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
5534 (pcbr >> VLV_PCBR_ADDR_SHIFT))
5535 rc6_mode = GEN7_RC_CTL_TO_MODE;
5536
5537 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5538
5539 /* 4 Program defaults and thresholds for RPS*/
5540 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5541 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5542 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5543 I915_WRITE(GEN6_RP_UP_EI, 66000);
5544 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5545
5546 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5547
5548 /* 5: Enable RPS */
5549 I915_WRITE(GEN6_RP_CONTROL,
5550 GEN6_RP_MEDIA_HW_NORMAL_MODE |
5551 GEN6_RP_MEDIA_IS_GFX |
5552 GEN6_RP_ENABLE |
5553 GEN6_RP_UP_BUSY_AVG |
5554 GEN6_RP_DOWN_IDLE_AVG);
5555
5556 /* Setting Fixed Bias */
5557 val = VLV_OVERRIDE_EN |
5558 VLV_SOC_TDP_EN |
5559 CHV_BIAS_CPU_50_SOC_50;
5560 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5561
5562 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5563
5564 /* RPS code assumes GPLL is used */
5565 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5566
5567 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5568 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5569
5570 dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5571 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5572 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5573 dev_priv->rps.cur_freq);
5574
5575 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5576 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5577 dev_priv->rps.efficient_freq);
5578
5579 valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5580
5581 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5582 }
5583
5584 static void valleyview_enable_rps(struct drm_device *dev)
5585 {
5586 struct drm_i915_private *dev_priv = dev->dev_private;
5587 struct intel_engine_cs *ring;
5588 u32 gtfifodbg, val, rc6_mode = 0;
5589 int i;
5590
5591 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5592
5593 valleyview_check_pctx(dev_priv);
5594
5595 if ((gtfifodbg = I915_READ(GTFIFODBG))) {
5596 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5597 gtfifodbg);
5598 I915_WRITE(GTFIFODBG, gtfifodbg);
5599 }
5600
5601 /* If VLV, Forcewake all wells, else re-direct to regular path */
5602 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5603
5604 /* Disable RC states. */
5605 I915_WRITE(GEN6_RC_CONTROL, 0);
5606
5607 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5608 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5609 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5610 I915_WRITE(GEN6_RP_UP_EI, 66000);
5611 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5612
5613 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5614
5615 I915_WRITE(GEN6_RP_CONTROL,
5616 GEN6_RP_MEDIA_TURBO |
5617 GEN6_RP_MEDIA_HW_NORMAL_MODE |
5618 GEN6_RP_MEDIA_IS_GFX |
5619 GEN6_RP_ENABLE |
5620 GEN6_RP_UP_BUSY_AVG |
5621 GEN6_RP_DOWN_IDLE_CONT);
5622
5623 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
5624 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
5625 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
5626
5627 for_each_ring(ring, dev_priv, i)
5628 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5629
5630 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
5631
5632 /* allows RC6 residency counter to work */
5633 I915_WRITE(VLV_COUNTER_CONTROL,
5634 _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
5635 VLV_RENDER_RC0_COUNT_EN |
5636 VLV_MEDIA_RC6_COUNT_EN |
5637 VLV_RENDER_RC6_COUNT_EN));
5638
5639 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
5640 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
5641
5642 intel_print_rc6_info(dev, rc6_mode);
5643
5644 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5645
5646 /* Setting Fixed Bias */
5647 val = VLV_OVERRIDE_EN |
5648 VLV_SOC_TDP_EN |
5649 VLV_BIAS_CPU_125_SOC_875;
5650 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5651
5652 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5653
5654 /* RPS code assumes GPLL is used */
5655 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5656
5657 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5658 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5659
5660 dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5661 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5662 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5663 dev_priv->rps.cur_freq);
5664
5665 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5666 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5667 dev_priv->rps.efficient_freq);
5668
5669 valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5670
5671 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5672 }
5673
5674 static unsigned long intel_pxfreq(u32 vidfreq)
5675 {
5676 unsigned long freq;
5677 int div = (vidfreq & 0x3f0000) >> 16;
5678 int post = (vidfreq & 0x3000) >> 12;
5679 int pre = (vidfreq & 0x7);
5680
5681 if (!pre)
5682 return 0;
5683
5684 freq = ((div * 133333) / ((1<<post) * pre));
5685
5686 return freq;
5687 }
5688
5689 static const struct cparams {
5690 u16 i;
5691 u16 t;
5692 u16 m;
5693 u16 c;
5694 } cparams[] = {
5695 { 1, 1333, 301, 28664 },
5696 { 1, 1066, 294, 24460 },
5697 { 1, 800, 294, 25192 },
5698 { 0, 1333, 276, 27605 },
5699 { 0, 1066, 276, 27605 },
5700 { 0, 800, 231, 23784 },
5701 };
5702
5703 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
5704 {
5705 u64 total_count, diff, ret;
5706 u32 count1, count2, count3, m = 0, c = 0;
5707 unsigned long now = jiffies_to_msecs(jiffies), diff1;
5708 int i;
5709
5710 assert_spin_locked(&mchdev_lock);
5711
5712 diff1 = now - dev_priv->ips.last_time1;
5713
5714 /* Prevent division-by-zero if we are asking too fast.
5715 * Also, we don't get interesting results if we are polling
5716 * faster than once in 10ms, so just return the saved value
5717 * in such cases.
5718 */
5719 if (diff1 <= 10)
5720 return dev_priv->ips.chipset_power;
5721
5722 count1 = I915_READ(DMIEC);
5723 count2 = I915_READ(DDREC);
5724 count3 = I915_READ(CSIEC);
5725
5726 total_count = count1 + count2 + count3;
5727
5728 /* FIXME: handle per-counter overflow */
5729 if (total_count < dev_priv->ips.last_count1) {
5730 diff = ~0UL - dev_priv->ips.last_count1;
5731 diff += total_count;
5732 } else {
5733 diff = total_count - dev_priv->ips.last_count1;
5734 }
5735
5736 for (i = 0; i < ARRAY_SIZE(cparams); i++) {
5737 if (cparams[i].i == dev_priv->ips.c_m &&
5738 cparams[i].t == dev_priv->ips.r_t) {
5739 m = cparams[i].m;
5740 c = cparams[i].c;
5741 break;
5742 }
5743 }
5744
5745 diff = div_u64(diff, diff1);
5746 ret = ((m * diff) + c);
5747 ret = div_u64(ret, 10);
5748
5749 dev_priv->ips.last_count1 = total_count;
5750 dev_priv->ips.last_time1 = now;
5751
5752 dev_priv->ips.chipset_power = ret;
5753
5754 return ret;
5755 }
5756
5757 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
5758 {
5759 struct drm_device *dev = dev_priv->dev;
5760 unsigned long val;
5761
5762 if (INTEL_INFO(dev)->gen != 5)
5763 return 0;
5764
5765 spin_lock_irq(&mchdev_lock);
5766
5767 val = __i915_chipset_val(dev_priv);
5768
5769 spin_unlock_irq(&mchdev_lock);
5770
5771 return val;
5772 }
5773
5774 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
5775 {
5776 unsigned long m, x, b;
5777 u32 tsfs;
5778
5779 tsfs = I915_READ(TSFS);
5780
5781 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
5782 x = I915_READ8(TR1);
5783
5784 b = tsfs & TSFS_INTR_MASK;
5785
5786 return ((m * x) / 127) - b;
5787 }
5788
5789 static int _pxvid_to_vd(u8 pxvid)
5790 {
5791 if (pxvid == 0)
5792 return 0;
5793
5794 if (pxvid >= 8 && pxvid < 31)
5795 pxvid = 31;
5796
5797 return (pxvid + 2) * 125;
5798 }
5799
5800 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
5801 {
5802 struct drm_device *dev = dev_priv->dev;
5803 const int vd = _pxvid_to_vd(pxvid);
5804 const int vm = vd - 1125;
5805
5806 if (INTEL_INFO(dev)->is_mobile)
5807 return vm > 0 ? vm : 0;
5808
5809 return vd;
5810 }
5811
5812 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
5813 {
5814 u64 now, diff, diffms;
5815 u32 count;
5816
5817 assert_spin_locked(&mchdev_lock);
5818
5819 now = ktime_get_raw_ns();
5820 diffms = now - dev_priv->ips.last_time2;
5821 do_div(diffms, NSEC_PER_MSEC);
5822
5823 /* Don't divide by 0 */
5824 if (!diffms)
5825 return;
5826
5827 count = I915_READ(GFXEC);
5828
5829 if (count < dev_priv->ips.last_count2) {
5830 diff = ~0UL - dev_priv->ips.last_count2;
5831 diff += count;
5832 } else {
5833 diff = count - dev_priv->ips.last_count2;
5834 }
5835
5836 dev_priv->ips.last_count2 = count;
5837 dev_priv->ips.last_time2 = now;
5838
5839 /* More magic constants... */
5840 diff = diff * 1181;
5841 diff = div_u64(diff, diffms * 10);
5842 dev_priv->ips.gfx_power = diff;
5843 }
5844
5845 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
5846 {
5847 struct drm_device *dev = dev_priv->dev;
5848
5849 if (INTEL_INFO(dev)->gen != 5)
5850 return;
5851
5852 spin_lock_irq(&mchdev_lock);
5853
5854 __i915_update_gfx_val(dev_priv);
5855
5856 spin_unlock_irq(&mchdev_lock);
5857 }
5858
5859 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
5860 {
5861 unsigned long t, corr, state1, corr2, state2;
5862 u32 pxvid, ext_v;
5863
5864 assert_spin_locked(&mchdev_lock);
5865
5866 pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq));
5867 pxvid = (pxvid >> 24) & 0x7f;
5868 ext_v = pvid_to_extvid(dev_priv, pxvid);
5869
5870 state1 = ext_v;
5871
5872 t = i915_mch_val(dev_priv);
5873
5874 /* Revel in the empirically derived constants */
5875
5876 /* Correction factor in 1/100000 units */
5877 if (t > 80)
5878 corr = ((t * 2349) + 135940);
5879 else if (t >= 50)
5880 corr = ((t * 964) + 29317);
5881 else /* < 50 */
5882 corr = ((t * 301) + 1004);
5883
5884 corr = corr * ((150142 * state1) / 10000 - 78642);
5885 corr /= 100000;
5886 corr2 = (corr * dev_priv->ips.corr);
5887
5888 state2 = (corr2 * state1) / 10000;
5889 state2 /= 100; /* convert to mW */
5890
5891 __i915_update_gfx_val(dev_priv);
5892
5893 return dev_priv->ips.gfx_power + state2;
5894 }
5895
5896 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
5897 {
5898 struct drm_device *dev = dev_priv->dev;
5899 unsigned long val;
5900
5901 if (INTEL_INFO(dev)->gen != 5)
5902 return 0;
5903
5904 spin_lock_irq(&mchdev_lock);
5905
5906 val = __i915_gfx_val(dev_priv);
5907
5908 spin_unlock_irq(&mchdev_lock);
5909
5910 return val;
5911 }
5912
5913 /**
5914 * i915_read_mch_val - return value for IPS use
5915 *
5916 * Calculate and return a value for the IPS driver to use when deciding whether
5917 * we have thermal and power headroom to increase CPU or GPU power budget.
5918 */
5919 unsigned long i915_read_mch_val(void)
5920 {
5921 struct drm_i915_private *dev_priv;
5922 unsigned long chipset_val, graphics_val, ret = 0;
5923
5924 spin_lock_irq(&mchdev_lock);
5925 if (!i915_mch_dev)
5926 goto out_unlock;
5927 dev_priv = i915_mch_dev;
5928
5929 chipset_val = __i915_chipset_val(dev_priv);
5930 graphics_val = __i915_gfx_val(dev_priv);
5931
5932 ret = chipset_val + graphics_val;
5933
5934 out_unlock:
5935 spin_unlock_irq(&mchdev_lock);
5936
5937 return ret;
5938 }
5939 EXPORT_SYMBOL_GPL(i915_read_mch_val);
5940
5941 /**
5942 * i915_gpu_raise - raise GPU frequency limit
5943 *
5944 * Raise the limit; IPS indicates we have thermal headroom.
5945 */
5946 bool i915_gpu_raise(void)
5947 {
5948 struct drm_i915_private *dev_priv;
5949 bool ret = true;
5950
5951 spin_lock_irq(&mchdev_lock);
5952 if (!i915_mch_dev) {
5953 ret = false;
5954 goto out_unlock;
5955 }
5956 dev_priv = i915_mch_dev;
5957
5958 if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
5959 dev_priv->ips.max_delay--;
5960
5961 out_unlock:
5962 spin_unlock_irq(&mchdev_lock);
5963
5964 return ret;
5965 }
5966 EXPORT_SYMBOL_GPL(i915_gpu_raise);
5967
5968 /**
5969 * i915_gpu_lower - lower GPU frequency limit
5970 *
5971 * IPS indicates we're close to a thermal limit, so throttle back the GPU
5972 * frequency maximum.
5973 */
5974 bool i915_gpu_lower(void)
5975 {
5976 struct drm_i915_private *dev_priv;
5977 bool ret = true;
5978
5979 spin_lock_irq(&mchdev_lock);
5980 if (!i915_mch_dev) {
5981 ret = false;
5982 goto out_unlock;
5983 }
5984 dev_priv = i915_mch_dev;
5985
5986 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
5987 dev_priv->ips.max_delay++;
5988
5989 out_unlock:
5990 spin_unlock_irq(&mchdev_lock);
5991
5992 return ret;
5993 }
5994 EXPORT_SYMBOL_GPL(i915_gpu_lower);
5995
5996 /**
5997 * i915_gpu_busy - indicate GPU business to IPS
5998 *
5999 * Tell the IPS driver whether or not the GPU is busy.
6000 */
6001 bool i915_gpu_busy(void)
6002 {
6003 struct drm_i915_private *dev_priv;
6004 struct intel_engine_cs *ring;
6005 bool ret = false;
6006 int i;
6007
6008 spin_lock_irq(&mchdev_lock);
6009 if (!i915_mch_dev)
6010 goto out_unlock;
6011 dev_priv = i915_mch_dev;
6012
6013 for_each_ring(ring, dev_priv, i)
6014 ret |= !list_empty(&ring->request_list);
6015
6016 out_unlock:
6017 spin_unlock_irq(&mchdev_lock);
6018
6019 return ret;
6020 }
6021 EXPORT_SYMBOL_GPL(i915_gpu_busy);
6022
6023 /**
6024 * i915_gpu_turbo_disable - disable graphics turbo
6025 *
6026 * Disable graphics turbo by resetting the max frequency and setting the
6027 * current frequency to the default.
6028 */
6029 bool i915_gpu_turbo_disable(void)
6030 {
6031 struct drm_i915_private *dev_priv;
6032 bool ret = true;
6033
6034 spin_lock_irq(&mchdev_lock);
6035 if (!i915_mch_dev) {
6036 ret = false;
6037 goto out_unlock;
6038 }
6039 dev_priv = i915_mch_dev;
6040
6041 dev_priv->ips.max_delay = dev_priv->ips.fstart;
6042
6043 if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
6044 ret = false;
6045
6046 out_unlock:
6047 spin_unlock_irq(&mchdev_lock);
6048
6049 return ret;
6050 }
6051 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
6052
6053 /**
6054 * Tells the intel_ips driver that the i915 driver is now loaded, if
6055 * IPS got loaded first.
6056 *
6057 * This awkward dance is so that neither module has to depend on the
6058 * other in order for IPS to do the appropriate communication of
6059 * GPU turbo limits to i915.
6060 */
6061 static void
6062 ips_ping_for_i915_load(void)
6063 {
6064 #ifndef __NetBSD__ /* XXX IPS GPU turbo limits what? */
6065 void (*link)(void);
6066
6067 link = symbol_get(ips_link_to_i915_driver);
6068 if (link) {
6069 link();
6070 symbol_put(ips_link_to_i915_driver);
6071 }
6072 #endif
6073 }
6074
6075 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
6076 {
6077 /* We only register the i915 ips part with intel-ips once everything is
6078 * set up, to avoid intel-ips sneaking in and reading bogus values. */
6079 spin_lock_irq(&mchdev_lock);
6080 i915_mch_dev = dev_priv;
6081 spin_unlock_irq(&mchdev_lock);
6082
6083 ips_ping_for_i915_load();
6084 }
6085
6086 void intel_gpu_ips_teardown(void)
6087 {
6088 spin_lock_irq(&mchdev_lock);
6089 i915_mch_dev = NULL;
6090 spin_unlock_irq(&mchdev_lock);
6091 }
6092
6093 static void intel_init_emon(struct drm_device *dev)
6094 {
6095 struct drm_i915_private *dev_priv = dev->dev_private;
6096 u32 lcfuse;
6097 u8 pxw[16];
6098 int i;
6099
6100 /* Disable to program */
6101 I915_WRITE(ECR, 0);
6102 POSTING_READ(ECR);
6103
6104 /* Program energy weights for various events */
6105 I915_WRITE(SDEW, 0x15040d00);
6106 I915_WRITE(CSIEW0, 0x007f0000);
6107 I915_WRITE(CSIEW1, 0x1e220004);
6108 I915_WRITE(CSIEW2, 0x04000004);
6109
6110 for (i = 0; i < 5; i++)
6111 I915_WRITE(PEW(i), 0);
6112 for (i = 0; i < 3; i++)
6113 I915_WRITE(DEW(i), 0);
6114
6115 /* Program P-state weights to account for frequency power adjustment */
6116 for (i = 0; i < 16; i++) {
6117 u32 pxvidfreq = I915_READ(PXVFREQ(i));
6118 unsigned long freq = intel_pxfreq(pxvidfreq);
6119 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
6120 PXVFREQ_PX_SHIFT;
6121 unsigned long val;
6122
6123 val = vid * vid;
6124 val *= (freq / 1000);
6125 val *= 255;
6126 val /= (127*127*900);
6127 if (val > 0xff)
6128 DRM_ERROR("bad pxval: %ld\n", val);
6129 pxw[i] = val;
6130 }
6131 /* Render standby states get 0 weight */
6132 pxw[14] = 0;
6133 pxw[15] = 0;
6134
6135 for (i = 0; i < 4; i++) {
6136 u32 val = ((u32)pxw[i*4] << 24) | ((u32)pxw[(i*4)+1] << 16) |
6137 ((u32)pxw[(i*4)+2] << 8) | ((u32)pxw[(i*4)+3]);
6138 I915_WRITE(PXW(i), val);
6139 }
6140
6141 /* Adjust magic regs to magic values (more experimental results) */
6142 I915_WRITE(OGW0, 0);
6143 I915_WRITE(OGW1, 0);
6144 I915_WRITE(EG0, 0x00007f00);
6145 I915_WRITE(EG1, 0x0000000e);
6146 I915_WRITE(EG2, 0x000e0000);
6147 I915_WRITE(EG3, 0x68000300);
6148 I915_WRITE(EG4, 0x42000000);
6149 I915_WRITE(EG5, 0x00140031);
6150 I915_WRITE(EG6, 0);
6151 I915_WRITE(EG7, 0);
6152
6153 for (i = 0; i < 8; i++)
6154 I915_WRITE(PXWL(i), 0);
6155
6156 /* Enable PMON + select events */
6157 I915_WRITE(ECR, 0x80000019);
6158
6159 lcfuse = I915_READ(LCFUSE02);
6160
6161 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
6162 }
6163
6164 void intel_init_gt_powersave(struct drm_device *dev)
6165 {
6166 i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
6167
6168 if (IS_CHERRYVIEW(dev))
6169 cherryview_init_gt_powersave(dev);
6170 else if (IS_VALLEYVIEW(dev))
6171 valleyview_init_gt_powersave(dev);
6172 }
6173
6174 void intel_cleanup_gt_powersave(struct drm_device *dev)
6175 {
6176 if (IS_CHERRYVIEW(dev))
6177 return;
6178 else if (IS_VALLEYVIEW(dev))
6179 valleyview_cleanup_gt_powersave(dev);
6180 }
6181
6182 static void gen6_suspend_rps(struct drm_device *dev)
6183 {
6184 struct drm_i915_private *dev_priv = dev->dev_private;
6185
6186 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
6187
6188 gen6_disable_rps_interrupts(dev);
6189 }
6190
6191 /**
6192 * intel_suspend_gt_powersave - suspend PM work and helper threads
6193 * @dev: drm device
6194 *
6195 * We don't want to disable RC6 or other features here, we just want
6196 * to make sure any work we've queued has finished and won't bother
6197 * us while we're suspended.
6198 */
6199 void intel_suspend_gt_powersave(struct drm_device *dev)
6200 {
6201 struct drm_i915_private *dev_priv = dev->dev_private;
6202
6203 if (INTEL_INFO(dev)->gen < 6)
6204 return;
6205
6206 gen6_suspend_rps(dev);
6207
6208 /* Force GPU to min freq during suspend */
6209 gen6_rps_idle(dev_priv);
6210 }
6211
6212 void intel_disable_gt_powersave(struct drm_device *dev)
6213 {
6214 struct drm_i915_private *dev_priv = dev->dev_private;
6215
6216 if (IS_IRONLAKE_M(dev)) {
6217 ironlake_disable_drps(dev);
6218 } else if (INTEL_INFO(dev)->gen >= 6) {
6219 intel_suspend_gt_powersave(dev);
6220
6221 mutex_lock(&dev_priv->rps.hw_lock);
6222 if (INTEL_INFO(dev)->gen >= 9)
6223 gen9_disable_rps(dev);
6224 else if (IS_CHERRYVIEW(dev))
6225 cherryview_disable_rps(dev);
6226 else if (IS_VALLEYVIEW(dev))
6227 valleyview_disable_rps(dev);
6228 else
6229 gen6_disable_rps(dev);
6230
6231 dev_priv->rps.enabled = false;
6232 mutex_unlock(&dev_priv->rps.hw_lock);
6233 }
6234 }
6235
6236 static void intel_gen6_powersave_work(struct work_struct *work)
6237 {
6238 struct drm_i915_private *dev_priv =
6239 container_of(work, struct drm_i915_private,
6240 rps.delayed_resume_work.work);
6241 struct drm_device *dev = dev_priv->dev;
6242
6243 mutex_lock(&dev_priv->rps.hw_lock);
6244
6245 gen6_reset_rps_interrupts(dev);
6246
6247 if (IS_CHERRYVIEW(dev)) {
6248 cherryview_enable_rps(dev);
6249 } else if (IS_VALLEYVIEW(dev)) {
6250 valleyview_enable_rps(dev);
6251 } else if (INTEL_INFO(dev)->gen >= 9) {
6252 gen9_enable_rc6(dev);
6253 gen9_enable_rps(dev);
6254 if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
6255 __gen6_update_ring_freq(dev);
6256 } else if (IS_BROADWELL(dev)) {
6257 gen8_enable_rps(dev);
6258 __gen6_update_ring_freq(dev);
6259 } else {
6260 gen6_enable_rps(dev);
6261 __gen6_update_ring_freq(dev);
6262 }
6263
6264 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
6265 WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
6266
6267 WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
6268 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
6269
6270 dev_priv->rps.enabled = true;
6271
6272 gen6_enable_rps_interrupts(dev);
6273
6274 mutex_unlock(&dev_priv->rps.hw_lock);
6275
6276 intel_runtime_pm_put(dev_priv);
6277 }
6278
6279 void intel_enable_gt_powersave(struct drm_device *dev)
6280 {
6281 struct drm_i915_private *dev_priv = dev->dev_private;
6282
6283 /* Powersaving is controlled by the host when inside a VM */
6284 if (intel_vgpu_active(dev))
6285 return;
6286
6287 if (IS_IRONLAKE_M(dev)) {
6288 mutex_lock(&dev->struct_mutex);
6289 ironlake_enable_drps(dev);
6290 intel_init_emon(dev);
6291 mutex_unlock(&dev->struct_mutex);
6292 } else if (INTEL_INFO(dev)->gen >= 6) {
6293 /*
6294 * PCU communication is slow and this doesn't need to be
6295 * done at any specific time, so do this out of our fast path
6296 * to make resume and init faster.
6297 *
6298 * We depend on the HW RC6 power context save/restore
6299 * mechanism when entering D3 through runtime PM suspend. So
6300 * disable RPM until RPS/RC6 is properly setup. We can only
6301 * get here via the driver load/system resume/runtime resume
6302 * paths, so the _noresume version is enough (and in case of
6303 * runtime resume it's necessary).
6304 */
6305 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
6306 round_jiffies_up_relative(HZ)))
6307 intel_runtime_pm_get_noresume(dev_priv);
6308 }
6309 }
6310
6311 void intel_reset_gt_powersave(struct drm_device *dev)
6312 {
6313 struct drm_i915_private *dev_priv = dev->dev_private;
6314
6315 if (INTEL_INFO(dev)->gen < 6)
6316 return;
6317
6318 gen6_suspend_rps(dev);
6319 dev_priv->rps.enabled = false;
6320 }
6321
6322 static void ibx_init_clock_gating(struct drm_device *dev)
6323 {
6324 struct drm_i915_private *dev_priv = dev->dev_private;
6325
6326 /*
6327 * On Ibex Peak and Cougar Point, we need to disable clock
6328 * gating for the panel power sequencer or it will fail to
6329 * start up when no ports are active.
6330 */
6331 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
6332 }
6333
6334 static void g4x_disable_trickle_feed(struct drm_device *dev)
6335 {
6336 struct drm_i915_private *dev_priv = dev->dev_private;
6337 enum i915_pipe pipe;
6338
6339 for_each_pipe(dev_priv, pipe) {
6340 I915_WRITE(DSPCNTR(pipe),
6341 I915_READ(DSPCNTR(pipe)) |
6342 DISPPLANE_TRICKLE_FEED_DISABLE);
6343
6344 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
6345 POSTING_READ(DSPSURF(pipe));
6346 }
6347 }
6348
6349 static void ilk_init_lp_watermarks(struct drm_device *dev)
6350 {
6351 struct drm_i915_private *dev_priv = dev->dev_private;
6352
6353 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6354 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6355 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6356
6357 /*
6358 * Don't touch WM1S_LP_EN here.
6359 * Doing so could cause underruns.
6360 */
6361 }
6362
6363 static void ironlake_init_clock_gating(struct drm_device *dev)
6364 {
6365 struct drm_i915_private *dev_priv = dev->dev_private;
6366 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6367
6368 /*
6369 * Required for FBC
6370 * WaFbcDisableDpfcClockGating:ilk
6371 */
6372 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
6373 ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
6374 ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6375
6376 I915_WRITE(PCH_3DCGDIS0,
6377 MARIUNIT_CLOCK_GATE_DISABLE |
6378 SVSMUNIT_CLOCK_GATE_DISABLE);
6379 I915_WRITE(PCH_3DCGDIS1,
6380 VFMUNIT_CLOCK_GATE_DISABLE);
6381
6382 /*
6383 * According to the spec the following bits should be set in
6384 * order to enable memory self-refresh
6385 * The bit 22/21 of 0x42004
6386 * The bit 5 of 0x42020
6387 * The bit 15 of 0x45000
6388 */
6389 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6390 (I915_READ(ILK_DISPLAY_CHICKEN2) |
6391 ILK_DPARB_GATE | ILK_VSDPFD_FULL));
6392 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6393 I915_WRITE(DISP_ARB_CTL,
6394 (I915_READ(DISP_ARB_CTL) |
6395 DISP_FBC_WM_DIS));
6396
6397 ilk_init_lp_watermarks(dev);
6398
6399 /*
6400 * Based on the document from hardware guys the following bits
6401 * should be set unconditionally in order to enable FBC.
6402 * The bit 22 of 0x42000
6403 * The bit 22 of 0x42004
6404 * The bit 7,8,9 of 0x42020.
6405 */
6406 if (IS_IRONLAKE_M(dev)) {
6407 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6408 I915_WRITE(ILK_DISPLAY_CHICKEN1,
6409 I915_READ(ILK_DISPLAY_CHICKEN1) |
6410 ILK_FBCQ_DIS);
6411 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6412 I915_READ(ILK_DISPLAY_CHICKEN2) |
6413 ILK_DPARB_GATE);
6414 }
6415
6416 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6417
6418 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6419 I915_READ(ILK_DISPLAY_CHICKEN2) |
6420 ILK_ELPIN_409_SELECT);
6421 I915_WRITE(_3D_CHICKEN2,
6422 _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
6423 _3D_CHICKEN2_WM_READ_PIPELINED);
6424
6425 /* WaDisableRenderCachePipelinedFlush:ilk */
6426 I915_WRITE(CACHE_MODE_0,
6427 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6428
6429 /* WaDisable_RenderCache_OperationalFlush:ilk */
6430 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6431
6432 g4x_disable_trickle_feed(dev);
6433
6434 ibx_init_clock_gating(dev);
6435 }
6436
6437 static void cpt_init_clock_gating(struct drm_device *dev)
6438 {
6439 struct drm_i915_private *dev_priv = dev->dev_private;
6440 int pipe;
6441 uint32_t val;
6442
6443 /*
6444 * On Ibex Peak and Cougar Point, we need to disable clock
6445 * gating for the panel power sequencer or it will fail to
6446 * start up when no ports are active.
6447 */
6448 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
6449 PCH_DPLUNIT_CLOCK_GATE_DISABLE |
6450 PCH_CPUNIT_CLOCK_GATE_DISABLE);
6451 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
6452 DPLS_EDP_PPS_FIX_DIS);
6453 /* The below fixes the weird display corruption, a few pixels shifted
6454 * downward, on (only) LVDS of some HP laptops with IVY.
6455 */
6456 for_each_pipe(dev_priv, pipe) {
6457 val = I915_READ(TRANS_CHICKEN2(pipe));
6458 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
6459 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6460 if (dev_priv->vbt.fdi_rx_polarity_inverted)
6461 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6462 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
6463 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
6464 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
6465 I915_WRITE(TRANS_CHICKEN2(pipe), val);
6466 }
6467 /* WADP0ClockGatingDisable */
6468 for_each_pipe(dev_priv, pipe) {
6469 I915_WRITE(TRANS_CHICKEN1(pipe),
6470 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6471 }
6472 }
6473
6474 static void gen6_check_mch_setup(struct drm_device *dev)
6475 {
6476 struct drm_i915_private *dev_priv = dev->dev_private;
6477 uint32_t tmp;
6478
6479 tmp = I915_READ(MCH_SSKPD);
6480 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
6481 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
6482 tmp);
6483 }
6484
6485 static void gen6_init_clock_gating(struct drm_device *dev)
6486 {
6487 struct drm_i915_private *dev_priv = dev->dev_private;
6488 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6489
6490 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6491
6492 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6493 I915_READ(ILK_DISPLAY_CHICKEN2) |
6494 ILK_ELPIN_409_SELECT);
6495
6496 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
6497 I915_WRITE(_3D_CHICKEN,
6498 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
6499
6500 /* WaDisable_RenderCache_OperationalFlush:snb */
6501 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6502
6503 /*
6504 * BSpec recoomends 8x4 when MSAA is used,
6505 * however in practice 16x4 seems fastest.
6506 *
6507 * Note that PS/WM thread counts depend on the WIZ hashing
6508 * disable bit, which we don't touch here, but it's good
6509 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6510 */
6511 I915_WRITE(GEN6_GT_MODE,
6512 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6513
6514 ilk_init_lp_watermarks(dev);
6515
6516 I915_WRITE(CACHE_MODE_0,
6517 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6518
6519 I915_WRITE(GEN6_UCGCTL1,
6520 I915_READ(GEN6_UCGCTL1) |
6521 GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
6522 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6523
6524 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
6525 * gating disable must be set. Failure to set it results in
6526 * flickering pixels due to Z write ordering failures after
6527 * some amount of runtime in the Mesa "fire" demo, and Unigine
6528 * Sanctuary and Tropics, and apparently anything else with
6529 * alpha test or pixel discard.
6530 *
6531 * According to the spec, bit 11 (RCCUNIT) must also be set,
6532 * but we didn't debug actual testcases to find it out.
6533 *
6534 * WaDisableRCCUnitClockGating:snb
6535 * WaDisableRCPBUnitClockGating:snb
6536 */
6537 I915_WRITE(GEN6_UCGCTL2,
6538 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
6539 GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
6540
6541 /* WaStripsFansDisableFastClipPerformanceFix:snb */
6542 I915_WRITE(_3D_CHICKEN3,
6543 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6544
6545 /*
6546 * Bspec says:
6547 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
6548 * 3DSTATE_SF number of SF output attributes is more than 16."
6549 */
6550 I915_WRITE(_3D_CHICKEN3,
6551 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
6552
6553 /*
6554 * According to the spec the following bits should be
6555 * set in order to enable memory self-refresh and fbc:
6556 * The bit21 and bit22 of 0x42000
6557 * The bit21 and bit22 of 0x42004
6558 * The bit5 and bit7 of 0x42020
6559 * The bit14 of 0x70180
6560 * The bit14 of 0x71180
6561 *
6562 * WaFbcAsynchFlipDisableFbcQueue:snb
6563 */
6564 I915_WRITE(ILK_DISPLAY_CHICKEN1,
6565 I915_READ(ILK_DISPLAY_CHICKEN1) |
6566 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
6567 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6568 I915_READ(ILK_DISPLAY_CHICKEN2) |
6569 ILK_DPARB_GATE | ILK_VSDPFD_FULL);
6570 I915_WRITE(ILK_DSPCLK_GATE_D,
6571 I915_READ(ILK_DSPCLK_GATE_D) |
6572 ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
6573 ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6574
6575 g4x_disable_trickle_feed(dev);
6576
6577 cpt_init_clock_gating(dev);
6578
6579 gen6_check_mch_setup(dev);
6580 }
6581
6582 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
6583 {
6584 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
6585
6586 /*
6587 * WaVSThreadDispatchOverride:ivb,vlv
6588 *
6589 * This actually overrides the dispatch
6590 * mode for all thread types.
6591 */
6592 reg &= ~GEN7_FF_SCHED_MASK;
6593 reg |= GEN7_FF_TS_SCHED_HW;
6594 reg |= GEN7_FF_VS_SCHED_HW;
6595 reg |= GEN7_FF_DS_SCHED_HW;
6596
6597 I915_WRITE(GEN7_FF_THREAD_MODE, reg);
6598 }
6599
6600 static void lpt_init_clock_gating(struct drm_device *dev)
6601 {
6602 struct drm_i915_private *dev_priv = dev->dev_private;
6603
6604 /*
6605 * TODO: this bit should only be enabled when really needed, then
6606 * disabled when not needed anymore in order to save power.
6607 */
6608 if (HAS_PCH_LPT_LP(dev))
6609 I915_WRITE(SOUTH_DSPCLK_GATE_D,
6610 I915_READ(SOUTH_DSPCLK_GATE_D) |
6611 PCH_LP_PARTITION_LEVEL_DISABLE);
6612
6613 /* WADPOClockGatingDisable:hsw */
6614 I915_WRITE(TRANS_CHICKEN1(PIPE_A),
6615 I915_READ(TRANS_CHICKEN1(PIPE_A)) |
6616 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6617 }
6618
6619 static void lpt_suspend_hw(struct drm_device *dev)
6620 {
6621 struct drm_i915_private *dev_priv = dev->dev_private;
6622
6623 if (HAS_PCH_LPT_LP(dev)) {
6624 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
6625
6626 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
6627 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
6628 }
6629 }
6630
6631 static void broadwell_init_clock_gating(struct drm_device *dev)
6632 {
6633 struct drm_i915_private *dev_priv = dev->dev_private;
6634 enum i915_pipe pipe;
6635 uint32_t misccpctl;
6636
6637 ilk_init_lp_watermarks(dev);
6638
6639 /* WaSwitchSolVfFArbitrationPriority:bdw */
6640 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6641
6642 /* WaPsrDPAMaskVBlankInSRD:bdw */
6643 I915_WRITE(CHICKEN_PAR1_1,
6644 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
6645
6646 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
6647 for_each_pipe(dev_priv, pipe) {
6648 I915_WRITE(CHICKEN_PIPESL_1(pipe),
6649 I915_READ(CHICKEN_PIPESL_1(pipe)) |
6650 BDW_DPRS_MASK_VBLANK_SRD);
6651 }
6652
6653 /* WaVSRefCountFullforceMissDisable:bdw */
6654 /* WaDSRefCountFullforceMissDisable:bdw */
6655 I915_WRITE(GEN7_FF_THREAD_MODE,
6656 I915_READ(GEN7_FF_THREAD_MODE) &
6657 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6658
6659 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6660 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6661
6662 /* WaDisableSDEUnitClockGating:bdw */
6663 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6664 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6665
6666 /*
6667 * WaProgramL3SqcReg1Default:bdw
6668 * WaTempDisableDOPClkGating:bdw
6669 */
6670 misccpctl = I915_READ(GEN7_MISCCPCTL);
6671 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
6672 I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
6673 /*
6674 * Wait at least 100 clocks before re-enabling clock gating. See
6675 * the definition of L3SQCREG1 in BSpec.
6676 */
6677 POSTING_READ(GEN8_L3SQCREG1);
6678 udelay(1);
6679 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
6680
6681 /*
6682 * WaGttCachingOffByDefault:bdw
6683 * GTT cache may not work with big pages, so if those
6684 * are ever enabled GTT cache may need to be disabled.
6685 */
6686 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6687
6688 lpt_init_clock_gating(dev);
6689 }
6690
6691 static void haswell_init_clock_gating(struct drm_device *dev)
6692 {
6693 struct drm_i915_private *dev_priv = dev->dev_private;
6694
6695 ilk_init_lp_watermarks(dev);
6696
6697 /* L3 caching of data atomics doesn't work -- disable it. */
6698 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6699 I915_WRITE(HSW_ROW_CHICKEN3,
6700 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6701
6702 /* This is required by WaCatErrorRejectionIssue:hsw */
6703 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6704 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6705 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6706
6707 /* WaVSRefCountFullforceMissDisable:hsw */
6708 I915_WRITE(GEN7_FF_THREAD_MODE,
6709 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6710
6711 /* WaDisable_RenderCache_OperationalFlush:hsw */
6712 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6713
6714 /* enable HiZ Raw Stall Optimization */
6715 I915_WRITE(CACHE_MODE_0_GEN7,
6716 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6717
6718 /* WaDisable4x2SubspanOptimization:hsw */
6719 I915_WRITE(CACHE_MODE_1,
6720 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6721
6722 /*
6723 * BSpec recommends 8x4 when MSAA is used,
6724 * however in practice 16x4 seems fastest.
6725 *
6726 * Note that PS/WM thread counts depend on the WIZ hashing
6727 * disable bit, which we don't touch here, but it's good
6728 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6729 */
6730 I915_WRITE(GEN7_GT_MODE,
6731 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6732
6733 /* WaSampleCChickenBitEnable:hsw */
6734 I915_WRITE(HALF_SLICE_CHICKEN3,
6735 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
6736
6737 /* WaSwitchSolVfFArbitrationPriority:hsw */
6738 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6739
6740 /* WaRsPkgCStateDisplayPMReq:hsw */
6741 I915_WRITE(CHICKEN_PAR1_1,
6742 I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
6743
6744 lpt_init_clock_gating(dev);
6745 }
6746
6747 static void ivybridge_init_clock_gating(struct drm_device *dev)
6748 {
6749 struct drm_i915_private *dev_priv = dev->dev_private;
6750 uint32_t snpcr;
6751
6752 ilk_init_lp_watermarks(dev);
6753
6754 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6755
6756 /* WaDisableEarlyCull:ivb */
6757 I915_WRITE(_3D_CHICKEN3,
6758 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6759
6760 /* WaDisableBackToBackFlipFix:ivb */
6761 I915_WRITE(IVB_CHICKEN3,
6762 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6763 CHICKEN3_DGMG_DONE_FIX_DISABLE);
6764
6765 /* WaDisablePSDDualDispatchEnable:ivb */
6766 if (IS_IVB_GT1(dev))
6767 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6768 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6769
6770 /* WaDisable_RenderCache_OperationalFlush:ivb */
6771 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6772
6773 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6774 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
6775 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
6776
6777 /* WaApplyL3ControlAndL3ChickenMode:ivb */
6778 I915_WRITE(GEN7_L3CNTLREG1,
6779 GEN7_WA_FOR_GEN7_L3_CONTROL);
6780 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
6781 GEN7_WA_L3_CHICKEN_MODE);
6782 if (IS_IVB_GT1(dev))
6783 I915_WRITE(GEN7_ROW_CHICKEN2,
6784 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6785 else {
6786 /* must write both registers */
6787 I915_WRITE(GEN7_ROW_CHICKEN2,
6788 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6789 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
6790 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6791 }
6792
6793 /* WaForceL3Serialization:ivb */
6794 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6795 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6796
6797 /*
6798 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6799 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
6800 */
6801 I915_WRITE(GEN6_UCGCTL2,
6802 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6803
6804 /* This is required by WaCatErrorRejectionIssue:ivb */
6805 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6806 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6807 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6808
6809 g4x_disable_trickle_feed(dev);
6810
6811 gen7_setup_fixed_func_scheduler(dev_priv);
6812
6813 if (0) { /* causes HiZ corruption on ivb:gt1 */
6814 /* enable HiZ Raw Stall Optimization */
6815 I915_WRITE(CACHE_MODE_0_GEN7,
6816 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6817 }
6818
6819 /* WaDisable4x2SubspanOptimization:ivb */
6820 I915_WRITE(CACHE_MODE_1,
6821 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6822
6823 /*
6824 * BSpec recommends 8x4 when MSAA is used,
6825 * however in practice 16x4 seems fastest.
6826 *
6827 * Note that PS/WM thread counts depend on the WIZ hashing
6828 * disable bit, which we don't touch here, but it's good
6829 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6830 */
6831 I915_WRITE(GEN7_GT_MODE,
6832 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6833
6834 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
6835 snpcr &= ~GEN6_MBC_SNPCR_MASK;
6836 snpcr |= GEN6_MBC_SNPCR_MED;
6837 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
6838
6839 if (!HAS_PCH_NOP(dev))
6840 cpt_init_clock_gating(dev);
6841
6842 gen6_check_mch_setup(dev);
6843 }
6844
6845 static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
6846 {
6847 u32 val;
6848
6849 /*
6850 * On driver load, a pipe may be active and driving a DSI display.
6851 * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck
6852 * (and never recovering) in this case. intel_dsi_post_disable() will
6853 * clear it when we turn off the display.
6854 */
6855 val = I915_READ(DSPCLK_GATE_D);
6856 val &= DPOUNIT_CLOCK_GATE_DISABLE;
6857 val |= VRHUNIT_CLOCK_GATE_DISABLE;
6858 I915_WRITE(DSPCLK_GATE_D, val);
6859
6860 /*
6861 * Disable trickle feed and enable pnd deadline calculation
6862 */
6863 I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6864 I915_WRITE(CBR1_VLV, 0);
6865 }
6866
6867 static void valleyview_init_clock_gating(struct drm_device *dev)
6868 {
6869 struct drm_i915_private *dev_priv = dev->dev_private;
6870
6871 vlv_init_display_clock_gating(dev_priv);
6872
6873 /* WaDisableEarlyCull:vlv */
6874 I915_WRITE(_3D_CHICKEN3,
6875 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6876
6877 /* WaDisableBackToBackFlipFix:vlv */
6878 I915_WRITE(IVB_CHICKEN3,
6879 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6880 CHICKEN3_DGMG_DONE_FIX_DISABLE);
6881
6882 /* WaPsdDispatchEnable:vlv */
6883 /* WaDisablePSDDualDispatchEnable:vlv */
6884 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6885 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
6886 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6887
6888 /* WaDisable_RenderCache_OperationalFlush:vlv */
6889 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6890
6891 /* WaForceL3Serialization:vlv */
6892 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6893 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6894
6895 /* WaDisableDopClockGating:vlv */
6896 I915_WRITE(GEN7_ROW_CHICKEN2,
6897 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6898
6899 /* This is required by WaCatErrorRejectionIssue:vlv */
6900 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6901 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6902 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6903
6904 gen7_setup_fixed_func_scheduler(dev_priv);
6905
6906 /*
6907 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6908 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
6909 */
6910 I915_WRITE(GEN6_UCGCTL2,
6911 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6912
6913 /* WaDisableL3Bank2xClockGate:vlv
6914 * Disabling L3 clock gating- MMIO 940c[25] = 1
6915 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
6916 I915_WRITE(GEN7_UCGCTL4,
6917 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
6918
6919 /*
6920 * BSpec says this must be set, even though
6921 * WaDisable4x2SubspanOptimization isn't listed for VLV.
6922 */
6923 I915_WRITE(CACHE_MODE_1,
6924 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6925
6926 /*
6927 * BSpec recommends 8x4 when MSAA is used,
6928 * however in practice 16x4 seems fastest.
6929 *
6930 * Note that PS/WM thread counts depend on the WIZ hashing
6931 * disable bit, which we don't touch here, but it's good
6932 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6933 */
6934 I915_WRITE(GEN7_GT_MODE,
6935 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6936
6937 /*
6938 * WaIncreaseL3CreditsForVLVB0:vlv
6939 * This is the hardware default actually.
6940 */
6941 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
6942
6943 /*
6944 * WaDisableVLVClockGating_VBIIssue:vlv
6945 * Disable clock gating on th GCFG unit to prevent a delay
6946 * in the reporting of vblank events.
6947 */
6948 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6949 }
6950
6951 static void cherryview_init_clock_gating(struct drm_device *dev)
6952 {
6953 struct drm_i915_private *dev_priv = dev->dev_private;
6954
6955 vlv_init_display_clock_gating(dev_priv);
6956
6957 /* WaVSRefCountFullforceMissDisable:chv */
6958 /* WaDSRefCountFullforceMissDisable:chv */
6959 I915_WRITE(GEN7_FF_THREAD_MODE,
6960 I915_READ(GEN7_FF_THREAD_MODE) &
6961 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6962
6963 /* WaDisableSemaphoreAndSyncFlipWait:chv */
6964 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6965 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6966
6967 /* WaDisableCSUnitClockGating:chv */
6968 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6969 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6970
6971 /* WaDisableSDEUnitClockGating:chv */
6972 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6973 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6974
6975 /*
6976 * GTT cache may not work with big pages, so if those
6977 * are ever enabled GTT cache may need to be disabled.
6978 */
6979 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6980 }
6981
6982 static void g4x_init_clock_gating(struct drm_device *dev)
6983 {
6984 struct drm_i915_private *dev_priv = dev->dev_private;
6985 uint32_t dspclk_gate;
6986
6987 I915_WRITE(RENCLK_GATE_D1, 0);
6988 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
6989 GS_UNIT_CLOCK_GATE_DISABLE |
6990 CL_UNIT_CLOCK_GATE_DISABLE);
6991 I915_WRITE(RAMCLK_GATE_D, 0);
6992 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
6993 OVRUNIT_CLOCK_GATE_DISABLE |
6994 OVCUNIT_CLOCK_GATE_DISABLE;
6995 if (IS_GM45(dev))
6996 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
6997 I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
6998
6999 /* WaDisableRenderCachePipelinedFlush */
7000 I915_WRITE(CACHE_MODE_0,
7001 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
7002
7003 /* WaDisable_RenderCache_OperationalFlush:g4x */
7004 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7005
7006 g4x_disable_trickle_feed(dev);
7007 }
7008
7009 static void crestline_init_clock_gating(struct drm_device *dev)
7010 {
7011 struct drm_i915_private *dev_priv = dev->dev_private;
7012
7013 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
7014 I915_WRITE(RENCLK_GATE_D2, 0);
7015 I915_WRITE(DSPCLK_GATE_D, 0);
7016 I915_WRITE(RAMCLK_GATE_D, 0);
7017 I915_WRITE16(DEUC, 0);
7018 I915_WRITE(MI_ARB_STATE,
7019 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7020
7021 /* WaDisable_RenderCache_OperationalFlush:gen4 */
7022 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7023 }
7024
7025 static void broadwater_init_clock_gating(struct drm_device *dev)
7026 {
7027 struct drm_i915_private *dev_priv = dev->dev_private;
7028
7029 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
7030 I965_RCC_CLOCK_GATE_DISABLE |
7031 I965_RCPB_CLOCK_GATE_DISABLE |
7032 I965_ISC_CLOCK_GATE_DISABLE |
7033 I965_FBC_CLOCK_GATE_DISABLE);
7034 I915_WRITE(RENCLK_GATE_D2, 0);
7035 I915_WRITE(MI_ARB_STATE,
7036 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7037
7038 /* WaDisable_RenderCache_OperationalFlush:gen4 */
7039 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7040 }
7041
7042 static void gen3_init_clock_gating(struct drm_device *dev)
7043 {
7044 struct drm_i915_private *dev_priv = dev->dev_private;
7045 u32 dstate = I915_READ(D_STATE);
7046
7047 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
7048 DSTATE_DOT_CLOCK_GATING;
7049 I915_WRITE(D_STATE, dstate);
7050
7051 if (IS_PINEVIEW(dev))
7052 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
7053
7054 /* IIR "flip pending" means done if this bit is set */
7055 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
7056
7057 /* interrupts should cause a wake up from C3 */
7058 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
7059
7060 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
7061 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
7062
7063 I915_WRITE(MI_ARB_STATE,
7064 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7065 }
7066
7067 static void i85x_init_clock_gating(struct drm_device *dev)
7068 {
7069 struct drm_i915_private *dev_priv = dev->dev_private;
7070
7071 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
7072
7073 /* interrupts should cause a wake up from C3 */
7074 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
7075 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
7076
7077 I915_WRITE(MEM_MODE,
7078 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
7079 }
7080
7081 static void i830_init_clock_gating(struct drm_device *dev)
7082 {
7083 struct drm_i915_private *dev_priv = dev->dev_private;
7084
7085 I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
7086
7087 I915_WRITE(MEM_MODE,
7088 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
7089 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
7090 }
7091
7092 void intel_init_clock_gating(struct drm_device *dev)
7093 {
7094 struct drm_i915_private *dev_priv = dev->dev_private;
7095
7096 if (dev_priv->display.init_clock_gating)
7097 dev_priv->display.init_clock_gating(dev);
7098 }
7099
7100 void intel_suspend_hw(struct drm_device *dev)
7101 {
7102 if (HAS_PCH_LPT(dev))
7103 lpt_suspend_hw(dev);
7104 }
7105
7106 /* Set up chip specific power management-related functions */
7107 void intel_init_pm(struct drm_device *dev)
7108 {
7109 struct drm_i915_private *dev_priv = dev->dev_private;
7110
7111 intel_fbc_init(dev_priv);
7112
7113 /* For cxsr */
7114 if (IS_PINEVIEW(dev))
7115 i915_pineview_get_mem_freq(dev);
7116 else if (IS_GEN5(dev))
7117 i915_ironlake_get_mem_freq(dev);
7118
7119 /* For FIFO watermark updates */
7120 if (INTEL_INFO(dev)->gen >= 9) {
7121 skl_setup_wm_latency(dev);
7122
7123 if (IS_BROXTON(dev))
7124 dev_priv->display.init_clock_gating =
7125 bxt_init_clock_gating;
7126 dev_priv->display.update_wm = skl_update_wm;
7127 dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
7128 } else if (HAS_PCH_SPLIT(dev)) {
7129 ilk_setup_wm_latency(dev);
7130
7131 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
7132 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
7133 (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
7134 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
7135 dev_priv->display.update_wm = ilk_update_wm;
7136 dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
7137 } else {
7138 DRM_DEBUG_KMS("Failed to read display plane latency. "
7139 "Disable CxSR\n");
7140 }
7141
7142 if (IS_GEN5(dev))
7143 dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
7144 else if (IS_GEN6(dev))
7145 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
7146 else if (IS_IVYBRIDGE(dev))
7147 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
7148 else if (IS_HASWELL(dev))
7149 dev_priv->display.init_clock_gating = haswell_init_clock_gating;
7150 else if (INTEL_INFO(dev)->gen == 8)
7151 dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
7152 } else if (IS_CHERRYVIEW(dev)) {
7153 vlv_setup_wm_latency(dev);
7154
7155 dev_priv->display.update_wm = vlv_update_wm;
7156 dev_priv->display.init_clock_gating =
7157 cherryview_init_clock_gating;
7158 } else if (IS_VALLEYVIEW(dev)) {
7159 vlv_setup_wm_latency(dev);
7160
7161 dev_priv->display.update_wm = vlv_update_wm;
7162 dev_priv->display.init_clock_gating =
7163 valleyview_init_clock_gating;
7164 } else if (IS_PINEVIEW(dev)) {
7165 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
7166 dev_priv->is_ddr3,
7167 dev_priv->fsb_freq,
7168 dev_priv->mem_freq)) {
7169 DRM_INFO("failed to find known CxSR latency "
7170 "(found ddr%s fsb freq %d, mem freq %d), "
7171 "disabling CxSR\n",
7172 (dev_priv->is_ddr3 == 1) ? "3" : "2",
7173 dev_priv->fsb_freq, dev_priv->mem_freq);
7174 /* Disable CxSR and never update its watermark again */
7175 intel_set_memory_cxsr(dev_priv, false);
7176 dev_priv->display.update_wm = NULL;
7177 } else
7178 dev_priv->display.update_wm = pineview_update_wm;
7179 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7180 } else if (IS_G4X(dev)) {
7181 dev_priv->display.update_wm = g4x_update_wm;
7182 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
7183 } else if (IS_GEN4(dev)) {
7184 dev_priv->display.update_wm = i965_update_wm;
7185 if (IS_CRESTLINE(dev))
7186 dev_priv->display.init_clock_gating = crestline_init_clock_gating;
7187 else if (IS_BROADWATER(dev))
7188 dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
7189 } else if (IS_GEN3(dev)) {
7190 dev_priv->display.update_wm = i9xx_update_wm;
7191 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
7192 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7193 } else if (IS_GEN2(dev)) {
7194 if (INTEL_INFO(dev)->num_pipes == 1) {
7195 dev_priv->display.update_wm = i845_update_wm;
7196 dev_priv->display.get_fifo_size = i845_get_fifo_size;
7197 } else {
7198 dev_priv->display.update_wm = i9xx_update_wm;
7199 dev_priv->display.get_fifo_size = i830_get_fifo_size;
7200 }
7201
7202 if (IS_I85X(dev) || IS_I865G(dev))
7203 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
7204 else
7205 dev_priv->display.init_clock_gating = i830_init_clock_gating;
7206 } else {
7207 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
7208 }
7209 }
7210
7211 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
7212 {
7213 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7214
7215 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7216 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
7217 return -EAGAIN;
7218 }
7219
7220 I915_WRITE(GEN6_PCODE_DATA, *val);
7221 I915_WRITE(GEN6_PCODE_DATA1, 0);
7222 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7223
7224 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7225 500)) {
7226 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
7227 return -ETIMEDOUT;
7228 }
7229
7230 *val = I915_READ(GEN6_PCODE_DATA);
7231 I915_WRITE(GEN6_PCODE_DATA, 0);
7232
7233 return 0;
7234 }
7235
7236 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
7237 {
7238 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7239
7240 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7241 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
7242 return -EAGAIN;
7243 }
7244
7245 I915_WRITE(GEN6_PCODE_DATA, val);
7246 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7247
7248 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7249 500)) {
7250 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
7251 return -ETIMEDOUT;
7252 }
7253
7254 I915_WRITE(GEN6_PCODE_DATA, 0);
7255
7256 return 0;
7257 }
7258
7259 static int vlv_gpu_freq_div(unsigned int czclk_freq)
7260 {
7261 switch (czclk_freq) {
7262 case 200:
7263 return 10;
7264 case 267:
7265 return 12;
7266 case 320:
7267 case 333:
7268 return 16;
7269 case 400:
7270 return 20;
7271 default:
7272 return -1;
7273 }
7274 }
7275
7276 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
7277 {
7278 int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7279
7280 div = vlv_gpu_freq_div(czclk_freq);
7281 if (div < 0)
7282 return div;
7283
7284 return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div);
7285 }
7286
7287 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
7288 {
7289 int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7290
7291 mul = vlv_gpu_freq_div(czclk_freq);
7292 if (mul < 0)
7293 return mul;
7294
7295 return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6;
7296 }
7297
7298 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
7299 {
7300 int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7301
7302 div = vlv_gpu_freq_div(czclk_freq) / 2;
7303 if (div < 0)
7304 return div;
7305
7306 return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
7307 }
7308
7309 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
7310 {
7311 int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7312
7313 mul = vlv_gpu_freq_div(czclk_freq) / 2;
7314 if (mul < 0)
7315 return mul;
7316
7317 /* CHV needs even values */
7318 return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
7319 }
7320
7321 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
7322 {
7323 if (IS_GEN9(dev_priv->dev))
7324 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
7325 GEN9_FREQ_SCALER);
7326 else if (IS_CHERRYVIEW(dev_priv->dev))
7327 return chv_gpu_freq(dev_priv, val);
7328 else if (IS_VALLEYVIEW(dev_priv->dev))
7329 return byt_gpu_freq(dev_priv, val);
7330 else
7331 return val * GT_FREQUENCY_MULTIPLIER;
7332 }
7333
7334 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
7335 {
7336 if (IS_GEN9(dev_priv->dev))
7337 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
7338 GT_FREQUENCY_MULTIPLIER);
7339 else if (IS_CHERRYVIEW(dev_priv->dev))
7340 return chv_freq_opcode(dev_priv, val);
7341 else if (IS_VALLEYVIEW(dev_priv->dev))
7342 return byt_freq_opcode(dev_priv, val);
7343 else
7344 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
7345 }
7346
7347 struct request_boost {
7348 struct work_struct work;
7349 struct drm_i915_gem_request *req;
7350 };
7351
7352 static void __intel_rps_boost_work(struct work_struct *work)
7353 {
7354 struct request_boost *boost = container_of(work, struct request_boost, work);
7355 struct drm_i915_gem_request *req = boost->req;
7356
7357 if (!i915_gem_request_completed(req, true))
7358 gen6_rps_boost(to_i915(req->ring->dev), NULL,
7359 req->emitted_jiffies);
7360
7361 i915_gem_request_unreference__unlocked(req);
7362 kfree(boost);
7363 }
7364
7365 void intel_queue_rps_boost_for_request(struct drm_device *dev,
7366 struct drm_i915_gem_request *req)
7367 {
7368 struct request_boost *boost;
7369
7370 if (req == NULL || INTEL_INFO(dev)->gen < 6)
7371 return;
7372
7373 if (i915_gem_request_completed(req, true))
7374 return;
7375
7376 boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
7377 if (boost == NULL)
7378 return;
7379
7380 i915_gem_request_reference(req);
7381 boost->req = req;
7382
7383 INIT_WORK(&boost->work, __intel_rps_boost_work);
7384 queue_work(to_i915(dev)->wq, &boost->work);
7385 }
7386
7387 void intel_pm_setup(struct drm_device *dev)
7388 {
7389 struct drm_i915_private *dev_priv = dev->dev_private;
7390
7391 #ifdef __NetBSD__
7392 linux_mutex_init(&dev_priv->rps.hw_lock);
7393 #else
7394 mutex_init(&dev_priv->rps.hw_lock);
7395 #endif
7396 spin_lock_init(&dev_priv->rps.client_lock);
7397
7398 INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
7399 intel_gen6_powersave_work);
7400 INIT_LIST_HEAD(&dev_priv->rps.clients);
7401 INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
7402 INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
7403
7404 dev_priv->pm.suspended = false;
7405 }
7406