intel_pm.c revision 1.15 1 /* $NetBSD: intel_pm.c,v 1.15 2018/08/27 15:09:35 riastradh Exp $ */
2
3 /*
4 * Copyright 2012 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 *
25 * Authors:
26 * Eugeni Dodonov <eugeni.dodonov (at) intel.com>
27 *
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: intel_pm.c,v 1.15 2018/08/27 15:09:35 riastradh Exp $");
32
33 #include <linux/bitops.h>
34 #include <linux/cpufreq.h>
35 #include <linux/export.h>
36 #include "i915_drv.h"
37 #include "i915_trace.h"
38 #include "intel_drv.h"
39 #ifndef __NetBSD__
40 #include "../../../platform/x86/intel_ips.h"
41 #endif
42 #include <linux/module.h>
43 #include <linux/log2.h>
44 #include <linux/math64.h>
45 #include <linux/time.h>
46
47 /**
48 * RC6 is a special power stage which allows the GPU to enter an very
49 * low-voltage mode when idle, using down to 0V while at this stage. This
50 * stage is entered automatically when the GPU is idle when RC6 support is
51 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
52 *
53 * There are different RC6 modes available in Intel GPU, which differentiate
54 * among each other with the latency required to enter and leave RC6 and
55 * voltage consumed by the GPU in different states.
56 *
57 * The combination of the following flags define which states GPU is allowed
58 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
59 * RC6pp is deepest RC6. Their support by hardware varies according to the
60 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
61 * which brings the most power savings; deeper states save more power, but
62 * require higher latency to switch to and wake up.
63 */
64 #define INTEL_RC6_ENABLE (1<<0)
65 #define INTEL_RC6p_ENABLE (1<<1)
66 #define INTEL_RC6pp_ENABLE (1<<2)
67
68 static void bxt_init_clock_gating(struct drm_device *dev)
69 {
70 struct drm_i915_private *dev_priv = dev->dev_private;
71
72 /* WaDisableSDEUnitClockGating:bxt */
73 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
74 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
75
76 /*
77 * FIXME:
78 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
79 */
80 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
81 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
82 }
83
84 static void i915_pineview_get_mem_freq(struct drm_device *dev)
85 {
86 struct drm_i915_private *dev_priv = dev->dev_private;
87 u32 tmp;
88
89 tmp = I915_READ(CLKCFG);
90
91 switch (tmp & CLKCFG_FSB_MASK) {
92 case CLKCFG_FSB_533:
93 dev_priv->fsb_freq = 533; /* 133*4 */
94 break;
95 case CLKCFG_FSB_800:
96 dev_priv->fsb_freq = 800; /* 200*4 */
97 break;
98 case CLKCFG_FSB_667:
99 dev_priv->fsb_freq = 667; /* 167*4 */
100 break;
101 case CLKCFG_FSB_400:
102 dev_priv->fsb_freq = 400; /* 100*4 */
103 break;
104 }
105
106 switch (tmp & CLKCFG_MEM_MASK) {
107 case CLKCFG_MEM_533:
108 dev_priv->mem_freq = 533;
109 break;
110 case CLKCFG_MEM_667:
111 dev_priv->mem_freq = 667;
112 break;
113 case CLKCFG_MEM_800:
114 dev_priv->mem_freq = 800;
115 break;
116 }
117
118 /* detect pineview DDR3 setting */
119 tmp = I915_READ(CSHRDDR3CTL);
120 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
121 }
122
123 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
124 {
125 struct drm_i915_private *dev_priv = dev->dev_private;
126 u16 ddrpll, csipll;
127
128 ddrpll = I915_READ16(DDRMPLL1);
129 csipll = I915_READ16(CSIPLL0);
130
131 switch (ddrpll & 0xff) {
132 case 0xc:
133 dev_priv->mem_freq = 800;
134 break;
135 case 0x10:
136 dev_priv->mem_freq = 1066;
137 break;
138 case 0x14:
139 dev_priv->mem_freq = 1333;
140 break;
141 case 0x18:
142 dev_priv->mem_freq = 1600;
143 break;
144 default:
145 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
146 ddrpll & 0xff);
147 dev_priv->mem_freq = 0;
148 break;
149 }
150
151 dev_priv->ips.r_t = dev_priv->mem_freq;
152
153 switch (csipll & 0x3ff) {
154 case 0x00c:
155 dev_priv->fsb_freq = 3200;
156 break;
157 case 0x00e:
158 dev_priv->fsb_freq = 3733;
159 break;
160 case 0x010:
161 dev_priv->fsb_freq = 4266;
162 break;
163 case 0x012:
164 dev_priv->fsb_freq = 4800;
165 break;
166 case 0x014:
167 dev_priv->fsb_freq = 5333;
168 break;
169 case 0x016:
170 dev_priv->fsb_freq = 5866;
171 break;
172 case 0x018:
173 dev_priv->fsb_freq = 6400;
174 break;
175 default:
176 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
177 csipll & 0x3ff);
178 dev_priv->fsb_freq = 0;
179 break;
180 }
181
182 if (dev_priv->fsb_freq == 3200) {
183 dev_priv->ips.c_m = 0;
184 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
185 dev_priv->ips.c_m = 1;
186 } else {
187 dev_priv->ips.c_m = 2;
188 }
189 }
190
191 static const struct cxsr_latency cxsr_latency_table[] = {
192 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
193 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
194 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
195 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
196 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
197
198 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
199 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
200 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
201 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
202 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
203
204 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
205 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
206 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
207 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
208 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
209
210 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
211 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
212 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
213 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
214 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
215
216 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
217 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
218 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
219 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
220 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
221
222 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
223 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
224 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
225 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
226 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
227 };
228
229 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
230 int is_ddr3,
231 int fsb,
232 int mem)
233 {
234 const struct cxsr_latency *latency;
235 int i;
236
237 if (fsb == 0 || mem == 0)
238 return NULL;
239
240 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
241 latency = &cxsr_latency_table[i];
242 if (is_desktop == latency->is_desktop &&
243 is_ddr3 == latency->is_ddr3 &&
244 fsb == latency->fsb_freq && mem == latency->mem_freq)
245 return latency;
246 }
247
248 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
249
250 return NULL;
251 }
252
253 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
254 {
255 u32 val;
256
257 mutex_lock(&dev_priv->rps.hw_lock);
258
259 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
260 if (enable)
261 val &= ~FORCE_DDR_HIGH_FREQ;
262 else
263 val |= FORCE_DDR_HIGH_FREQ;
264 val &= ~FORCE_DDR_LOW_FREQ;
265 val |= FORCE_DDR_FREQ_REQ_ACK;
266 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
267
268 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
269 FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
270 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
271
272 mutex_unlock(&dev_priv->rps.hw_lock);
273 }
274
275 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
276 {
277 u32 val;
278
279 mutex_lock(&dev_priv->rps.hw_lock);
280
281 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
282 if (enable)
283 val |= DSP_MAXFIFO_PM5_ENABLE;
284 else
285 val &= ~DSP_MAXFIFO_PM5_ENABLE;
286 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
287
288 mutex_unlock(&dev_priv->rps.hw_lock);
289 }
290
291 #define FW_WM(value, plane) \
292 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
293
294 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
295 {
296 struct drm_device *dev = dev_priv->dev;
297 u32 val;
298
299 if (IS_VALLEYVIEW(dev)) {
300 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
301 POSTING_READ(FW_BLC_SELF_VLV);
302 dev_priv->wm.vlv.cxsr = enable;
303 } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
304 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
305 POSTING_READ(FW_BLC_SELF);
306 } else if (IS_PINEVIEW(dev)) {
307 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
308 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
309 I915_WRITE(DSPFW3, val);
310 POSTING_READ(DSPFW3);
311 } else if (IS_I945G(dev) || IS_I945GM(dev)) {
312 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
313 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
314 I915_WRITE(FW_BLC_SELF, val);
315 POSTING_READ(FW_BLC_SELF);
316 } else if (IS_I915GM(dev)) {
317 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
318 _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
319 I915_WRITE(INSTPM, val);
320 POSTING_READ(INSTPM);
321 } else {
322 return;
323 }
324
325 DRM_DEBUG_KMS("memory self-refresh is %s\n",
326 enable ? "enabled" : "disabled");
327 }
328
329
330 /*
331 * Latency for FIFO fetches is dependent on several factors:
332 * - memory configuration (speed, channels)
333 * - chipset
334 * - current MCH state
335 * It can be fairly high in some situations, so here we assume a fairly
336 * pessimal value. It's a tradeoff between extra memory fetches (if we
337 * set this value too high, the FIFO will fetch frequently to stay full)
338 * and power consumption (set it too low to save power and we might see
339 * FIFO underruns and display "flicker").
340 *
341 * A value of 5us seems to be a good balance; safe for very low end
342 * platforms but not overly aggressive on lower latency configs.
343 */
344 static const int pessimal_latency_ns = 5000;
345
346 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
347 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
348
349 static int vlv_get_fifo_size(struct drm_device *dev,
350 enum i915_pipe pipe, int plane)
351 {
352 struct drm_i915_private *dev_priv = dev->dev_private;
353 int sprite0_start, sprite1_start, size;
354
355 switch (pipe) {
356 uint32_t dsparb, dsparb2, dsparb3;
357 case PIPE_A:
358 dsparb = I915_READ(DSPARB);
359 dsparb2 = I915_READ(DSPARB2);
360 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
361 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
362 break;
363 case PIPE_B:
364 dsparb = I915_READ(DSPARB);
365 dsparb2 = I915_READ(DSPARB2);
366 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
367 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
368 break;
369 case PIPE_C:
370 dsparb2 = I915_READ(DSPARB2);
371 dsparb3 = I915_READ(DSPARB3);
372 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
373 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
374 break;
375 default:
376 return 0;
377 }
378
379 switch (plane) {
380 case 0:
381 size = sprite0_start;
382 break;
383 case 1:
384 size = sprite1_start - sprite0_start;
385 break;
386 case 2:
387 size = 512 - 1 - sprite1_start;
388 break;
389 default:
390 return 0;
391 }
392
393 DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n",
394 pipe_name(pipe), plane == 0 ? "primary" : "sprite",
395 plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1),
396 size);
397
398 return size;
399 }
400
401 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
402 {
403 struct drm_i915_private *dev_priv = dev->dev_private;
404 uint32_t dsparb = I915_READ(DSPARB);
405 int size;
406
407 size = dsparb & 0x7f;
408 if (plane)
409 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
410
411 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
412 plane ? "B" : "A", size);
413
414 return size;
415 }
416
417 static int i830_get_fifo_size(struct drm_device *dev, int plane)
418 {
419 struct drm_i915_private *dev_priv = dev->dev_private;
420 uint32_t dsparb = I915_READ(DSPARB);
421 int size;
422
423 size = dsparb & 0x1ff;
424 if (plane)
425 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
426 size >>= 1; /* Convert to cachelines */
427
428 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
429 plane ? "B" : "A", size);
430
431 return size;
432 }
433
434 static int i845_get_fifo_size(struct drm_device *dev, int plane)
435 {
436 struct drm_i915_private *dev_priv = dev->dev_private;
437 uint32_t dsparb = I915_READ(DSPARB);
438 int size;
439
440 size = dsparb & 0x7f;
441 size >>= 2; /* Convert to cachelines */
442
443 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
444 plane ? "B" : "A",
445 size);
446
447 return size;
448 }
449
450 /* Pineview has different values for various configs */
451 static const struct intel_watermark_params pineview_display_wm = {
452 .fifo_size = PINEVIEW_DISPLAY_FIFO,
453 .max_wm = PINEVIEW_MAX_WM,
454 .default_wm = PINEVIEW_DFT_WM,
455 .guard_size = PINEVIEW_GUARD_WM,
456 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
457 };
458 static const struct intel_watermark_params pineview_display_hplloff_wm = {
459 .fifo_size = PINEVIEW_DISPLAY_FIFO,
460 .max_wm = PINEVIEW_MAX_WM,
461 .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
462 .guard_size = PINEVIEW_GUARD_WM,
463 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
464 };
465 static const struct intel_watermark_params pineview_cursor_wm = {
466 .fifo_size = PINEVIEW_CURSOR_FIFO,
467 .max_wm = PINEVIEW_CURSOR_MAX_WM,
468 .default_wm = PINEVIEW_CURSOR_DFT_WM,
469 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
470 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
471 };
472 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
473 .fifo_size = PINEVIEW_CURSOR_FIFO,
474 .max_wm = PINEVIEW_CURSOR_MAX_WM,
475 .default_wm = PINEVIEW_CURSOR_DFT_WM,
476 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
477 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
478 };
479 static const struct intel_watermark_params g4x_wm_info = {
480 .fifo_size = G4X_FIFO_SIZE,
481 .max_wm = G4X_MAX_WM,
482 .default_wm = G4X_MAX_WM,
483 .guard_size = 2,
484 .cacheline_size = G4X_FIFO_LINE_SIZE,
485 };
486 static const struct intel_watermark_params g4x_cursor_wm_info = {
487 .fifo_size = I965_CURSOR_FIFO,
488 .max_wm = I965_CURSOR_MAX_WM,
489 .default_wm = I965_CURSOR_DFT_WM,
490 .guard_size = 2,
491 .cacheline_size = G4X_FIFO_LINE_SIZE,
492 };
493 static const struct intel_watermark_params valleyview_wm_info __unused = {
494 .fifo_size = VALLEYVIEW_FIFO_SIZE,
495 .max_wm = VALLEYVIEW_MAX_WM,
496 .default_wm = VALLEYVIEW_MAX_WM,
497 .guard_size = 2,
498 .cacheline_size = G4X_FIFO_LINE_SIZE,
499 };
500 static const struct intel_watermark_params valleyview_cursor_wm_info __unused = {
501 .fifo_size = I965_CURSOR_FIFO,
502 .max_wm = VALLEYVIEW_CURSOR_MAX_WM,
503 .default_wm = I965_CURSOR_DFT_WM,
504 .guard_size = 2,
505 .cacheline_size = G4X_FIFO_LINE_SIZE,
506 };
507 static const struct intel_watermark_params i965_cursor_wm_info = {
508 .fifo_size = I965_CURSOR_FIFO,
509 .max_wm = I965_CURSOR_MAX_WM,
510 .default_wm = I965_CURSOR_DFT_WM,
511 .guard_size = 2,
512 .cacheline_size = I915_FIFO_LINE_SIZE,
513 };
514 static const struct intel_watermark_params i945_wm_info = {
515 .fifo_size = I945_FIFO_SIZE,
516 .max_wm = I915_MAX_WM,
517 .default_wm = 1,
518 .guard_size = 2,
519 .cacheline_size = I915_FIFO_LINE_SIZE,
520 };
521 static const struct intel_watermark_params i915_wm_info = {
522 .fifo_size = I915_FIFO_SIZE,
523 .max_wm = I915_MAX_WM,
524 .default_wm = 1,
525 .guard_size = 2,
526 .cacheline_size = I915_FIFO_LINE_SIZE,
527 };
528 static const struct intel_watermark_params i830_a_wm_info = {
529 .fifo_size = I855GM_FIFO_SIZE,
530 .max_wm = I915_MAX_WM,
531 .default_wm = 1,
532 .guard_size = 2,
533 .cacheline_size = I830_FIFO_LINE_SIZE,
534 };
535 static const struct intel_watermark_params i830_bc_wm_info = {
536 .fifo_size = I855GM_FIFO_SIZE,
537 .max_wm = I915_MAX_WM/2,
538 .default_wm = 1,
539 .guard_size = 2,
540 .cacheline_size = I830_FIFO_LINE_SIZE,
541 };
542 static const struct intel_watermark_params i845_wm_info = {
543 .fifo_size = I830_FIFO_SIZE,
544 .max_wm = I915_MAX_WM,
545 .default_wm = 1,
546 .guard_size = 2,
547 .cacheline_size = I830_FIFO_LINE_SIZE,
548 };
549
550 /**
551 * intel_calculate_wm - calculate watermark level
552 * @clock_in_khz: pixel clock
553 * @wm: chip FIFO params
554 * @pixel_size: display pixel size
555 * @latency_ns: memory latency for the platform
556 *
557 * Calculate the watermark level (the level at which the display plane will
558 * start fetching from memory again). Each chip has a different display
559 * FIFO size and allocation, so the caller needs to figure that out and pass
560 * in the correct intel_watermark_params structure.
561 *
562 * As the pixel clock runs, the FIFO will be drained at a rate that depends
563 * on the pixel size. When it reaches the watermark level, it'll start
564 * fetching FIFO line sized based chunks from memory until the FIFO fills
565 * past the watermark point. If the FIFO drains completely, a FIFO underrun
566 * will occur, and a display engine hang could result.
567 */
568 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
569 const struct intel_watermark_params *wm,
570 int fifo_size,
571 int pixel_size,
572 unsigned long latency_ns)
573 {
574 long entries_required, wm_size;
575
576 /*
577 * Note: we need to make sure we don't overflow for various clock &
578 * latency values.
579 * clocks go from a few thousand to several hundred thousand.
580 * latency is usually a few thousand
581 */
582 entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
583 1000;
584 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
585
586 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
587
588 wm_size = fifo_size - (entries_required + wm->guard_size);
589
590 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
591
592 /* Don't promote wm_size to unsigned... */
593 if (wm_size > (long)wm->max_wm)
594 wm_size = wm->max_wm;
595 if (wm_size <= 0)
596 wm_size = wm->default_wm;
597
598 /*
599 * Bspec seems to indicate that the value shouldn't be lower than
600 * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
601 * Lets go for 8 which is the burst size since certain platforms
602 * already use a hardcoded 8 (which is what the spec says should be
603 * done).
604 */
605 if (wm_size <= 8)
606 wm_size = 8;
607
608 return wm_size;
609 }
610
611 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
612 {
613 struct drm_crtc *crtc, *enabled = NULL;
614
615 for_each_crtc(dev, crtc) {
616 if (intel_crtc_active(crtc)) {
617 if (enabled)
618 return NULL;
619 enabled = crtc;
620 }
621 }
622
623 return enabled;
624 }
625
626 static void pineview_update_wm(struct drm_crtc *unused_crtc)
627 {
628 struct drm_device *dev = unused_crtc->dev;
629 struct drm_i915_private *dev_priv = dev->dev_private;
630 struct drm_crtc *crtc;
631 const struct cxsr_latency *latency;
632 u32 reg;
633 unsigned long wm;
634
635 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
636 dev_priv->fsb_freq, dev_priv->mem_freq);
637 if (!latency) {
638 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
639 intel_set_memory_cxsr(dev_priv, false);
640 return;
641 }
642
643 crtc = single_enabled_crtc(dev);
644 if (crtc) {
645 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
646 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
647 int clock = adjusted_mode->crtc_clock;
648
649 /* Display SR */
650 wm = intel_calculate_wm(clock, &pineview_display_wm,
651 pineview_display_wm.fifo_size,
652 pixel_size, latency->display_sr);
653 reg = I915_READ(DSPFW1);
654 reg &= ~DSPFW_SR_MASK;
655 reg |= FW_WM(wm, SR);
656 I915_WRITE(DSPFW1, reg);
657 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
658
659 /* cursor SR */
660 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
661 pineview_display_wm.fifo_size,
662 pixel_size, latency->cursor_sr);
663 reg = I915_READ(DSPFW3);
664 reg &= ~DSPFW_CURSOR_SR_MASK;
665 reg |= FW_WM(wm, CURSOR_SR);
666 I915_WRITE(DSPFW3, reg);
667
668 /* Display HPLL off SR */
669 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
670 pineview_display_hplloff_wm.fifo_size,
671 pixel_size, latency->display_hpll_disable);
672 reg = I915_READ(DSPFW3);
673 reg &= ~DSPFW_HPLL_SR_MASK;
674 reg |= FW_WM(wm, HPLL_SR);
675 I915_WRITE(DSPFW3, reg);
676
677 /* cursor HPLL off SR */
678 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
679 pineview_display_hplloff_wm.fifo_size,
680 pixel_size, latency->cursor_hpll_disable);
681 reg = I915_READ(DSPFW3);
682 reg &= ~DSPFW_HPLL_CURSOR_MASK;
683 reg |= FW_WM(wm, HPLL_CURSOR);
684 I915_WRITE(DSPFW3, reg);
685 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
686
687 intel_set_memory_cxsr(dev_priv, true);
688 } else {
689 intel_set_memory_cxsr(dev_priv, false);
690 }
691 }
692
693 static bool g4x_compute_wm0(struct drm_device *dev,
694 int plane,
695 const struct intel_watermark_params *display,
696 int display_latency_ns,
697 const struct intel_watermark_params *cursor,
698 int cursor_latency_ns,
699 int *plane_wm,
700 int *cursor_wm)
701 {
702 struct drm_crtc *crtc;
703 const struct drm_display_mode *adjusted_mode;
704 int htotal, hdisplay, clock, pixel_size;
705 int line_time_us, line_count;
706 int entries, tlb_miss;
707
708 crtc = intel_get_crtc_for_plane(dev, plane);
709 if (!intel_crtc_active(crtc)) {
710 *cursor_wm = cursor->guard_size;
711 *plane_wm = display->guard_size;
712 return false;
713 }
714
715 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
716 clock = adjusted_mode->crtc_clock;
717 htotal = adjusted_mode->crtc_htotal;
718 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
719 pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
720
721 /* Use the small buffer method to calculate plane watermark */
722 entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
723 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
724 if (tlb_miss > 0)
725 entries += tlb_miss;
726 entries = DIV_ROUND_UP(entries, display->cacheline_size);
727 *plane_wm = entries + display->guard_size;
728 if (*plane_wm > (int)display->max_wm)
729 *plane_wm = display->max_wm;
730
731 /* Use the large buffer method to calculate cursor watermark */
732 line_time_us = max(htotal * 1000 / clock, 1);
733 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
734 entries = line_count * crtc->cursor->state->crtc_w * pixel_size;
735 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
736 if (tlb_miss > 0)
737 entries += tlb_miss;
738 entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
739 *cursor_wm = entries + cursor->guard_size;
740 if (*cursor_wm > (int)cursor->max_wm)
741 *cursor_wm = (int)cursor->max_wm;
742
743 return true;
744 }
745
746 /*
747 * Check the wm result.
748 *
749 * If any calculated watermark values is larger than the maximum value that
750 * can be programmed into the associated watermark register, that watermark
751 * must be disabled.
752 */
753 static bool g4x_check_srwm(struct drm_device *dev,
754 int display_wm, int cursor_wm,
755 const struct intel_watermark_params *display,
756 const struct intel_watermark_params *cursor)
757 {
758 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
759 display_wm, cursor_wm);
760
761 if (display_wm > display->max_wm) {
762 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
763 display_wm, display->max_wm);
764 return false;
765 }
766
767 if (cursor_wm > cursor->max_wm) {
768 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
769 cursor_wm, cursor->max_wm);
770 return false;
771 }
772
773 if (!(display_wm || cursor_wm)) {
774 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
775 return false;
776 }
777
778 return true;
779 }
780
781 static bool g4x_compute_srwm(struct drm_device *dev,
782 int plane,
783 int latency_ns,
784 const struct intel_watermark_params *display,
785 const struct intel_watermark_params *cursor,
786 int *display_wm, int *cursor_wm)
787 {
788 struct drm_crtc *crtc;
789 const struct drm_display_mode *adjusted_mode;
790 int hdisplay, htotal, pixel_size, clock;
791 unsigned long line_time_us;
792 int line_count, line_size;
793 int small, large;
794 int entries;
795
796 if (!latency_ns) {
797 *display_wm = *cursor_wm = 0;
798 return false;
799 }
800
801 crtc = intel_get_crtc_for_plane(dev, plane);
802 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
803 clock = adjusted_mode->crtc_clock;
804 htotal = adjusted_mode->crtc_htotal;
805 hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
806 pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
807
808 line_time_us = max(htotal * 1000 / clock, 1);
809 line_count = (latency_ns / line_time_us + 1000) / 1000;
810 line_size = hdisplay * pixel_size;
811
812 /* Use the minimum of the small and large buffer method for primary */
813 small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
814 large = line_count * line_size;
815
816 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
817 *display_wm = entries + display->guard_size;
818
819 /* calculate the self-refresh watermark for display cursor */
820 entries = line_count * pixel_size * crtc->cursor->state->crtc_w;
821 entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
822 *cursor_wm = entries + cursor->guard_size;
823
824 return g4x_check_srwm(dev,
825 *display_wm, *cursor_wm,
826 display, cursor);
827 }
828
829 #define FW_WM_VLV(value, plane) \
830 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
831
832 static void vlv_write_wm_values(struct intel_crtc *crtc,
833 const struct vlv_wm_values *wm)
834 {
835 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
836 enum i915_pipe pipe = crtc->pipe;
837
838 I915_WRITE(VLV_DDL(pipe),
839 (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) |
840 (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) |
841 (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) |
842 (wm->ddl[pipe].primary << DDL_PLANE_SHIFT));
843
844 I915_WRITE(DSPFW1,
845 FW_WM(wm->sr.plane, SR) |
846 FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) |
847 FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) |
848 FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA));
849 I915_WRITE(DSPFW2,
850 FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) |
851 FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) |
852 FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA));
853 I915_WRITE(DSPFW3,
854 FW_WM(wm->sr.cursor, CURSOR_SR));
855
856 if (IS_CHERRYVIEW(dev_priv)) {
857 I915_WRITE(DSPFW7_CHV,
858 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
859 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
860 I915_WRITE(DSPFW8_CHV,
861 FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) |
862 FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE));
863 I915_WRITE(DSPFW9_CHV,
864 FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) |
865 FW_WM(wm->pipe[PIPE_C].cursor, CURSORC));
866 I915_WRITE(DSPHOWM,
867 FW_WM(wm->sr.plane >> 9, SR_HI) |
868 FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) |
869 FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) |
870 FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) |
871 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
872 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
873 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
874 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
875 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
876 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
877 } else {
878 I915_WRITE(DSPFW7,
879 FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) |
880 FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC));
881 I915_WRITE(DSPHOWM,
882 FW_WM(wm->sr.plane >> 9, SR_HI) |
883 FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) |
884 FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) |
885 FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) |
886 FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) |
887 FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) |
888 FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
889 }
890
891 /* zero (unused) WM1 watermarks */
892 I915_WRITE(DSPFW4, 0);
893 I915_WRITE(DSPFW5, 0);
894 I915_WRITE(DSPFW6, 0);
895 I915_WRITE(DSPHOWM1, 0);
896
897 POSTING_READ(DSPFW1);
898 }
899
900 #undef FW_WM_VLV
901
902 enum vlv_wm_level {
903 VLV_WM_LEVEL_PM2,
904 VLV_WM_LEVEL_PM5,
905 VLV_WM_LEVEL_DDR_DVFS,
906 };
907
908 /* latency must be in 0.1us units. */
909 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
910 unsigned int pipe_htotal,
911 unsigned int horiz_pixels,
912 unsigned int bytes_per_pixel,
913 unsigned int latency)
914 {
915 unsigned int ret;
916
917 ret = (latency * pixel_rate) / (pipe_htotal * 10000);
918 ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
919 ret = DIV_ROUND_UP(ret, 64);
920
921 return ret;
922 }
923
924 static void vlv_setup_wm_latency(struct drm_device *dev)
925 {
926 struct drm_i915_private *dev_priv = dev->dev_private;
927
928 /* all latencies in usec */
929 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
930
931 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
932
933 if (IS_CHERRYVIEW(dev_priv)) {
934 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
935 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
936
937 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
938 }
939 }
940
941 static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
942 struct intel_crtc *crtc,
943 const struct intel_plane_state *state,
944 int level)
945 {
946 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
947 int clock, htotal, pixel_size, width, wm;
948
949 if (dev_priv->wm.pri_latency[level] == 0)
950 return USHRT_MAX;
951
952 if (!state->visible)
953 return 0;
954
955 pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
956 clock = crtc->config->base.adjusted_mode.crtc_clock;
957 htotal = crtc->config->base.adjusted_mode.crtc_htotal;
958 width = crtc->config->pipe_src_w;
959 if (WARN_ON(htotal == 0))
960 htotal = 1;
961
962 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
963 /*
964 * FIXME the formula gives values that are
965 * too big for the cursor FIFO, and hence we
966 * would never be able to use cursors. For
967 * now just hardcode the watermark.
968 */
969 wm = 63;
970 } else {
971 wm = vlv_wm_method2(clock, htotal, width, pixel_size,
972 dev_priv->wm.pri_latency[level] * 10);
973 }
974
975 return min_t(int, wm, USHRT_MAX);
976 }
977
978 static void vlv_compute_fifo(struct intel_crtc *crtc)
979 {
980 struct drm_device *dev = crtc->base.dev;
981 struct vlv_wm_state *wm_state = &crtc->wm_state;
982 struct intel_plane *plane;
983 unsigned int total_rate = 0;
984 const int fifo_size = 512 - 1;
985 int fifo_extra, fifo_left = fifo_size;
986
987 for_each_intel_plane_on_crtc(dev, crtc, plane) {
988 struct intel_plane_state *state =
989 to_intel_plane_state(plane->base.state);
990
991 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
992 continue;
993
994 if (state->visible) {
995 wm_state->num_active_planes++;
996 total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0);
997 }
998 }
999
1000 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1001 struct intel_plane_state *state =
1002 to_intel_plane_state(plane->base.state);
1003 unsigned int rate;
1004
1005 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1006 plane->wm.fifo_size = 63;
1007 continue;
1008 }
1009
1010 if (!state->visible) {
1011 plane->wm.fifo_size = 0;
1012 continue;
1013 }
1014
1015 rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
1016 plane->wm.fifo_size = fifo_size * rate / total_rate;
1017 fifo_left -= plane->wm.fifo_size;
1018 }
1019
1020 fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1);
1021
1022 /* spread the remainder evenly */
1023 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1024 int plane_extra;
1025
1026 if (fifo_left == 0)
1027 break;
1028
1029 if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
1030 continue;
1031
1032 /* give it all to the first plane if none are active */
1033 if (plane->wm.fifo_size == 0 &&
1034 wm_state->num_active_planes)
1035 continue;
1036
1037 plane_extra = min(fifo_extra, fifo_left);
1038 plane->wm.fifo_size += plane_extra;
1039 fifo_left -= plane_extra;
1040 }
1041
1042 WARN_ON(fifo_left != 0);
1043 }
1044
1045 static void vlv_invert_wms(struct intel_crtc *crtc)
1046 {
1047 struct vlv_wm_state *wm_state = &crtc->wm_state;
1048 int level;
1049
1050 for (level = 0; level < wm_state->num_levels; level++) {
1051 struct drm_device *dev = crtc->base.dev;
1052 const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1053 struct intel_plane *plane;
1054
1055 wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
1056 wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
1057
1058 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1059 switch (plane->base.type) {
1060 int sprite;
1061 case DRM_PLANE_TYPE_CURSOR:
1062 wm_state->wm[level].cursor = plane->wm.fifo_size -
1063 wm_state->wm[level].cursor;
1064 break;
1065 case DRM_PLANE_TYPE_PRIMARY:
1066 wm_state->wm[level].primary = plane->wm.fifo_size -
1067 wm_state->wm[level].primary;
1068 break;
1069 case DRM_PLANE_TYPE_OVERLAY:
1070 sprite = plane->plane;
1071 wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
1072 wm_state->wm[level].sprite[sprite];
1073 break;
1074 }
1075 }
1076 }
1077 }
1078
1079 static void vlv_compute_wm(struct intel_crtc *crtc)
1080 {
1081 struct drm_device *dev = crtc->base.dev;
1082 struct vlv_wm_state *wm_state = &crtc->wm_state;
1083 struct intel_plane *plane;
1084 int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
1085 int level;
1086
1087 memset(wm_state, 0, sizeof(*wm_state));
1088
1089 wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed;
1090 wm_state->num_levels = to_i915(dev)->wm.max_level + 1;
1091
1092 wm_state->num_active_planes = 0;
1093
1094 vlv_compute_fifo(crtc);
1095
1096 if (wm_state->num_active_planes != 1)
1097 wm_state->cxsr = false;
1098
1099 if (wm_state->cxsr) {
1100 for (level = 0; level < wm_state->num_levels; level++) {
1101 wm_state->sr[level].plane = sr_fifo_size;
1102 wm_state->sr[level].cursor = 63;
1103 }
1104 }
1105
1106 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1107 struct intel_plane_state *state =
1108 to_intel_plane_state(plane->base.state);
1109
1110 if (!state->visible)
1111 continue;
1112
1113 /* normal watermarks */
1114 for (level = 0; level < wm_state->num_levels; level++) {
1115 int wm = vlv_compute_wm_level(plane, crtc, state, level);
1116 int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
1117
1118 /* hack */
1119 if (WARN_ON(level == 0 && wm > max_wm))
1120 wm = max_wm;
1121
1122 if (wm > plane->wm.fifo_size)
1123 break;
1124
1125 switch (plane->base.type) {
1126 int sprite;
1127 case DRM_PLANE_TYPE_CURSOR:
1128 wm_state->wm[level].cursor = wm;
1129 break;
1130 case DRM_PLANE_TYPE_PRIMARY:
1131 wm_state->wm[level].primary = wm;
1132 break;
1133 case DRM_PLANE_TYPE_OVERLAY:
1134 sprite = plane->plane;
1135 wm_state->wm[level].sprite[sprite] = wm;
1136 break;
1137 }
1138 }
1139
1140 wm_state->num_levels = level;
1141
1142 if (!wm_state->cxsr)
1143 continue;
1144
1145 /* maxfifo watermarks */
1146 switch (plane->base.type) {
1147 int sprite, level;
1148 case DRM_PLANE_TYPE_CURSOR:
1149 for (level = 0; level < wm_state->num_levels; level++)
1150 wm_state->sr[level].cursor =
1151 wm_state->wm[level].cursor;
1152 break;
1153 case DRM_PLANE_TYPE_PRIMARY:
1154 for (level = 0; level < wm_state->num_levels; level++)
1155 wm_state->sr[level].plane =
1156 min(wm_state->sr[level].plane,
1157 wm_state->wm[level].primary);
1158 break;
1159 case DRM_PLANE_TYPE_OVERLAY:
1160 sprite = plane->plane;
1161 for (level = 0; level < wm_state->num_levels; level++)
1162 wm_state->sr[level].plane =
1163 min(wm_state->sr[level].plane,
1164 wm_state->wm[level].sprite[sprite]);
1165 break;
1166 }
1167 }
1168
1169 /* clear any (partially) filled invalid levels */
1170 for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) {
1171 memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
1172 memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
1173 }
1174
1175 vlv_invert_wms(crtc);
1176 }
1177
1178 #define VLV_FIFO(plane, value) \
1179 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1180
1181 static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc)
1182 {
1183 struct drm_device *dev = crtc->base.dev;
1184 struct drm_i915_private *dev_priv = to_i915(dev);
1185 struct intel_plane *plane;
1186 int sprite0_start = 0, sprite1_start = 0, fifo_size = 0;
1187
1188 for_each_intel_plane_on_crtc(dev, crtc, plane) {
1189 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
1190 WARN_ON(plane->wm.fifo_size != 63);
1191 continue;
1192 }
1193
1194 if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
1195 sprite0_start = plane->wm.fifo_size;
1196 else if (plane->plane == 0)
1197 sprite1_start = sprite0_start + plane->wm.fifo_size;
1198 else
1199 fifo_size = sprite1_start + plane->wm.fifo_size;
1200 }
1201
1202 WARN_ON(fifo_size != 512 - 1);
1203
1204 DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n",
1205 pipe_name(crtc->pipe), sprite0_start,
1206 sprite1_start, fifo_size);
1207
1208 switch (crtc->pipe) {
1209 uint32_t dsparb, dsparb2, dsparb3;
1210 case PIPE_A:
1211 dsparb = I915_READ(DSPARB);
1212 dsparb2 = I915_READ(DSPARB2);
1213
1214 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1215 VLV_FIFO(SPRITEB, 0xff));
1216 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1217 VLV_FIFO(SPRITEB, sprite1_start));
1218
1219 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1220 VLV_FIFO(SPRITEB_HI, 0x1));
1221 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1222 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1223
1224 I915_WRITE(DSPARB, dsparb);
1225 I915_WRITE(DSPARB2, dsparb2);
1226 break;
1227 case PIPE_B:
1228 dsparb = I915_READ(DSPARB);
1229 dsparb2 = I915_READ(DSPARB2);
1230
1231 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1232 VLV_FIFO(SPRITED, 0xff));
1233 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1234 VLV_FIFO(SPRITED, sprite1_start));
1235
1236 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
1237 VLV_FIFO(SPRITED_HI, 0xff));
1238 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
1239 VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
1240
1241 I915_WRITE(DSPARB, dsparb);
1242 I915_WRITE(DSPARB2, dsparb2);
1243 break;
1244 case PIPE_C:
1245 dsparb3 = I915_READ(DSPARB3);
1246 dsparb2 = I915_READ(DSPARB2);
1247
1248 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
1249 VLV_FIFO(SPRITEF, 0xff));
1250 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
1251 VLV_FIFO(SPRITEF, sprite1_start));
1252
1253 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
1254 VLV_FIFO(SPRITEF_HI, 0xff));
1255 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
1256 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
1257
1258 I915_WRITE(DSPARB3, dsparb3);
1259 I915_WRITE(DSPARB2, dsparb2);
1260 break;
1261 default:
1262 break;
1263 }
1264 }
1265
1266 #undef VLV_FIFO
1267
1268 static void vlv_merge_wm(struct drm_device *dev,
1269 struct vlv_wm_values *wm)
1270 {
1271 struct intel_crtc *crtc;
1272 int num_active_crtcs = 0;
1273
1274 wm->level = to_i915(dev)->wm.max_level;
1275 wm->cxsr = true;
1276
1277 for_each_intel_crtc(dev, crtc) {
1278 const struct vlv_wm_state *wm_state = &crtc->wm_state;
1279
1280 if (!crtc->active)
1281 continue;
1282
1283 if (!wm_state->cxsr)
1284 wm->cxsr = false;
1285
1286 num_active_crtcs++;
1287 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
1288 }
1289
1290 if (num_active_crtcs != 1)
1291 wm->cxsr = false;
1292
1293 if (num_active_crtcs > 1)
1294 wm->level = VLV_WM_LEVEL_PM2;
1295
1296 for_each_intel_crtc(dev, crtc) {
1297 struct vlv_wm_state *wm_state = &crtc->wm_state;
1298 enum i915_pipe pipe = crtc->pipe;
1299
1300 if (!crtc->active)
1301 continue;
1302
1303 wm->pipe[pipe] = wm_state->wm[wm->level];
1304 if (wm->cxsr)
1305 wm->sr = wm_state->sr[wm->level];
1306
1307 wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
1308 wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
1309 wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
1310 wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
1311 }
1312 }
1313
1314 static void vlv_update_wm(struct drm_crtc *crtc)
1315 {
1316 struct drm_device *dev = crtc->dev;
1317 struct drm_i915_private *dev_priv = dev->dev_private;
1318 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1319 enum i915_pipe pipe = intel_crtc->pipe;
1320 struct vlv_wm_values wm = {};
1321
1322 vlv_compute_wm(intel_crtc);
1323 vlv_merge_wm(dev, &wm);
1324
1325 if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) {
1326 /* FIXME should be part of crtc atomic commit */
1327 vlv_pipe_set_fifo_size(intel_crtc);
1328 return;
1329 }
1330
1331 if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
1332 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
1333 chv_set_memory_dvfs(dev_priv, false);
1334
1335 if (wm.level < VLV_WM_LEVEL_PM5 &&
1336 dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
1337 chv_set_memory_pm5(dev_priv, false);
1338
1339 if (!wm.cxsr && dev_priv->wm.vlv.cxsr)
1340 intel_set_memory_cxsr(dev_priv, false);
1341
1342 /* FIXME should be part of crtc atomic commit */
1343 vlv_pipe_set_fifo_size(intel_crtc);
1344
1345 vlv_write_wm_values(intel_crtc, &wm);
1346
1347 DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
1348 "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
1349 pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
1350 wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
1351 wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
1352
1353 if (wm.cxsr && !dev_priv->wm.vlv.cxsr)
1354 intel_set_memory_cxsr(dev_priv, true);
1355
1356 if (wm.level >= VLV_WM_LEVEL_PM5 &&
1357 dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
1358 chv_set_memory_pm5(dev_priv, true);
1359
1360 if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
1361 dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
1362 chv_set_memory_dvfs(dev_priv, true);
1363
1364 dev_priv->wm.vlv = wm;
1365 }
1366
1367 #define single_plane_enabled(mask) is_power_of_2(mask)
1368
1369 static void g4x_update_wm(struct drm_crtc *crtc)
1370 {
1371 struct drm_device *dev = crtc->dev;
1372 static const int sr_latency_ns = 12000;
1373 struct drm_i915_private *dev_priv = dev->dev_private;
1374 int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1375 int plane_sr, cursor_sr;
1376 unsigned int enabled = 0;
1377 bool cxsr_enabled;
1378
1379 if (g4x_compute_wm0(dev, PIPE_A,
1380 &g4x_wm_info, pessimal_latency_ns,
1381 &g4x_cursor_wm_info, pessimal_latency_ns,
1382 &planea_wm, &cursora_wm))
1383 enabled |= 1 << PIPE_A;
1384
1385 if (g4x_compute_wm0(dev, PIPE_B,
1386 &g4x_wm_info, pessimal_latency_ns,
1387 &g4x_cursor_wm_info, pessimal_latency_ns,
1388 &planeb_wm, &cursorb_wm))
1389 enabled |= 1 << PIPE_B;
1390
1391 if (single_plane_enabled(enabled) &&
1392 g4x_compute_srwm(dev, ffs(enabled) - 1,
1393 sr_latency_ns,
1394 &g4x_wm_info,
1395 &g4x_cursor_wm_info,
1396 &plane_sr, &cursor_sr)) {
1397 cxsr_enabled = true;
1398 } else {
1399 cxsr_enabled = false;
1400 intel_set_memory_cxsr(dev_priv, false);
1401 plane_sr = cursor_sr = 0;
1402 }
1403
1404 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1405 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1406 planea_wm, cursora_wm,
1407 planeb_wm, cursorb_wm,
1408 plane_sr, cursor_sr);
1409
1410 I915_WRITE(DSPFW1,
1411 FW_WM(plane_sr, SR) |
1412 FW_WM(cursorb_wm, CURSORB) |
1413 FW_WM(planeb_wm, PLANEB) |
1414 FW_WM(planea_wm, PLANEA));
1415 I915_WRITE(DSPFW2,
1416 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1417 FW_WM(cursora_wm, CURSORA));
1418 /* HPLL off in SR has some issues on G4x... disable it */
1419 I915_WRITE(DSPFW3,
1420 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1421 FW_WM(cursor_sr, CURSOR_SR));
1422
1423 if (cxsr_enabled)
1424 intel_set_memory_cxsr(dev_priv, true);
1425 }
1426
1427 static void i965_update_wm(struct drm_crtc *unused_crtc)
1428 {
1429 struct drm_device *dev = unused_crtc->dev;
1430 struct drm_i915_private *dev_priv = dev->dev_private;
1431 struct drm_crtc *crtc;
1432 int srwm = 1;
1433 int cursor_sr = 16;
1434 bool cxsr_enabled;
1435
1436 /* Calc sr entries for one plane configs */
1437 crtc = single_enabled_crtc(dev);
1438 if (crtc) {
1439 /* self-refresh has much higher latency */
1440 static const int sr_latency_ns = 12000;
1441 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1442 int clock = adjusted_mode->crtc_clock;
1443 int htotal = adjusted_mode->crtc_htotal;
1444 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
1445 int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
1446 unsigned long line_time_us;
1447 int entries;
1448
1449 line_time_us = max(htotal * 1000 / clock, 1);
1450
1451 /* Use ns/us then divide to preserve precision */
1452 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1453 pixel_size * hdisplay;
1454 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1455 srwm = I965_FIFO_SIZE - entries;
1456 if (srwm < 0)
1457 srwm = 1;
1458 srwm &= 0x1ff;
1459 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1460 entries, srwm);
1461
1462 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1463 pixel_size * crtc->cursor->state->crtc_w;
1464 entries = DIV_ROUND_UP(entries,
1465 i965_cursor_wm_info.cacheline_size);
1466 cursor_sr = i965_cursor_wm_info.fifo_size -
1467 (entries + i965_cursor_wm_info.guard_size);
1468
1469 if (cursor_sr > i965_cursor_wm_info.max_wm)
1470 cursor_sr = i965_cursor_wm_info.max_wm;
1471
1472 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1473 "cursor %d\n", srwm, cursor_sr);
1474
1475 cxsr_enabled = true;
1476 } else {
1477 cxsr_enabled = false;
1478 /* Turn off self refresh if both pipes are enabled */
1479 intel_set_memory_cxsr(dev_priv, false);
1480 }
1481
1482 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1483 srwm);
1484
1485 /* 965 has limitations... */
1486 I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
1487 FW_WM(8, CURSORB) |
1488 FW_WM(8, PLANEB) |
1489 FW_WM(8, PLANEA));
1490 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
1491 FW_WM(8, PLANEC_OLD));
1492 /* update cursor SR watermark */
1493 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
1494
1495 if (cxsr_enabled)
1496 intel_set_memory_cxsr(dev_priv, true);
1497 }
1498
1499 #undef FW_WM
1500
1501 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1502 {
1503 struct drm_device *dev = unused_crtc->dev;
1504 struct drm_i915_private *dev_priv = dev->dev_private;
1505 const struct intel_watermark_params *wm_info;
1506 uint32_t fwater_lo;
1507 uint32_t fwater_hi;
1508 int cwm, srwm = 1;
1509 int fifo_size;
1510 int planea_wm, planeb_wm;
1511 struct drm_crtc *crtc, *enabled = NULL;
1512
1513 if (IS_I945GM(dev))
1514 wm_info = &i945_wm_info;
1515 else if (!IS_GEN2(dev))
1516 wm_info = &i915_wm_info;
1517 else
1518 wm_info = &i830_a_wm_info;
1519
1520 fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1521 crtc = intel_get_crtc_for_plane(dev, 0);
1522 if (intel_crtc_active(crtc)) {
1523 const struct drm_display_mode *adjusted_mode;
1524 int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1525 if (IS_GEN2(dev))
1526 cpp = 4;
1527
1528 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1529 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1530 wm_info, fifo_size, cpp,
1531 pessimal_latency_ns);
1532 enabled = crtc;
1533 } else {
1534 planea_wm = fifo_size - wm_info->guard_size;
1535 if (planea_wm > (long)wm_info->max_wm)
1536 planea_wm = wm_info->max_wm;
1537 }
1538
1539 if (IS_GEN2(dev))
1540 wm_info = &i830_bc_wm_info;
1541
1542 fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1543 crtc = intel_get_crtc_for_plane(dev, 1);
1544 if (intel_crtc_active(crtc)) {
1545 const struct drm_display_mode *adjusted_mode;
1546 int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
1547 if (IS_GEN2(dev))
1548 cpp = 4;
1549
1550 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1551 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1552 wm_info, fifo_size, cpp,
1553 pessimal_latency_ns);
1554 if (enabled == NULL)
1555 enabled = crtc;
1556 else
1557 enabled = NULL;
1558 } else {
1559 planeb_wm = fifo_size - wm_info->guard_size;
1560 if (planeb_wm > (long)wm_info->max_wm)
1561 planeb_wm = wm_info->max_wm;
1562 }
1563
1564 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1565
1566 if (IS_I915GM(dev) && enabled) {
1567 struct drm_i915_gem_object *obj;
1568
1569 obj = intel_fb_obj(enabled->primary->state->fb);
1570
1571 /* self-refresh seems busted with untiled */
1572 if (obj->tiling_mode == I915_TILING_NONE)
1573 enabled = NULL;
1574 }
1575
1576 /*
1577 * Overlay gets an aggressive default since video jitter is bad.
1578 */
1579 cwm = 2;
1580
1581 /* Play safe and disable self-refresh before adjusting watermarks. */
1582 intel_set_memory_cxsr(dev_priv, false);
1583
1584 /* Calc sr entries for one plane configs */
1585 if (HAS_FW_BLC(dev) && enabled) {
1586 /* self-refresh has much higher latency */
1587 static const int sr_latency_ns = 6000;
1588 const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode;
1589 int clock = adjusted_mode->crtc_clock;
1590 int htotal = adjusted_mode->crtc_htotal;
1591 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
1592 int pixel_size = enabled->primary->state->fb->bits_per_pixel / 8;
1593 unsigned long line_time_us;
1594 int entries;
1595
1596 line_time_us = max(htotal * 1000 / clock, 1);
1597
1598 /* Use ns/us then divide to preserve precision */
1599 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1600 pixel_size * hdisplay;
1601 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1602 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1603 srwm = wm_info->fifo_size - entries;
1604 if (srwm < 0)
1605 srwm = 1;
1606
1607 if (IS_I945G(dev) || IS_I945GM(dev))
1608 I915_WRITE(FW_BLC_SELF,
1609 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1610 else if (IS_I915GM(dev))
1611 I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1612 }
1613
1614 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1615 planea_wm, planeb_wm, cwm, srwm);
1616
1617 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1618 fwater_hi = (cwm & 0x1f);
1619
1620 /* Set request length to 8 cachelines per fetch */
1621 fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1622 fwater_hi = fwater_hi | (1 << 8);
1623
1624 I915_WRITE(FW_BLC, fwater_lo);
1625 I915_WRITE(FW_BLC2, fwater_hi);
1626
1627 if (enabled)
1628 intel_set_memory_cxsr(dev_priv, true);
1629 }
1630
1631 static void i845_update_wm(struct drm_crtc *unused_crtc)
1632 {
1633 struct drm_device *dev = unused_crtc->dev;
1634 struct drm_i915_private *dev_priv = dev->dev_private;
1635 struct drm_crtc *crtc;
1636 const struct drm_display_mode *adjusted_mode;
1637 uint32_t fwater_lo;
1638 int planea_wm;
1639
1640 crtc = single_enabled_crtc(dev);
1641 if (crtc == NULL)
1642 return;
1643
1644 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1645 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1646 &i845_wm_info,
1647 dev_priv->display.get_fifo_size(dev, 0),
1648 4, pessimal_latency_ns);
1649 fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1650 fwater_lo |= (3<<8) | planea_wm;
1651
1652 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1653
1654 I915_WRITE(FW_BLC, fwater_lo);
1655 }
1656
1657 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
1658 {
1659 uint32_t pixel_rate;
1660
1661 pixel_rate = pipe_config->base.adjusted_mode.crtc_clock;
1662
1663 /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1664 * adjust the pixel_rate here. */
1665
1666 if (pipe_config->pch_pfit.enabled) {
1667 uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1668 uint32_t pfit_size = pipe_config->pch_pfit.size;
1669
1670 pipe_w = pipe_config->pipe_src_w;
1671 pipe_h = pipe_config->pipe_src_h;
1672
1673 pfit_w = (pfit_size >> 16) & 0xFFFF;
1674 pfit_h = pfit_size & 0xFFFF;
1675 if (pipe_w < pfit_w)
1676 pipe_w = pfit_w;
1677 if (pipe_h < pfit_h)
1678 pipe_h = pfit_h;
1679
1680 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1681 pfit_w * pfit_h);
1682 }
1683
1684 return pixel_rate;
1685 }
1686
1687 /* latency must be in 0.1us units. */
1688 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
1689 uint32_t latency)
1690 {
1691 uint64_t ret;
1692
1693 if (WARN(latency == 0, "Latency value missing\n"))
1694 return UINT_MAX;
1695
1696 ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
1697 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1698
1699 return ret;
1700 }
1701
1702 /* latency must be in 0.1us units. */
1703 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1704 uint32_t horiz_pixels, uint8_t bytes_per_pixel,
1705 uint32_t latency)
1706 {
1707 uint32_t ret;
1708
1709 if (WARN(latency == 0, "Latency value missing\n"))
1710 return UINT_MAX;
1711
1712 ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1713 ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
1714 ret = DIV_ROUND_UP(ret, 64) + 2;
1715 return ret;
1716 }
1717
1718 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1719 uint8_t bytes_per_pixel)
1720 {
1721 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
1722 }
1723
1724 struct skl_pipe_wm_parameters {
1725 bool active;
1726 uint32_t pipe_htotal;
1727 uint32_t pixel_rate; /* in KHz */
1728 struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
1729 };
1730
1731 struct ilk_wm_maximums {
1732 uint16_t pri;
1733 uint16_t spr;
1734 uint16_t cur;
1735 uint16_t fbc;
1736 };
1737
1738 /* used in computing the new watermarks state */
1739 struct intel_wm_config {
1740 unsigned int num_pipes_active;
1741 bool sprites_enabled;
1742 bool sprites_scaled;
1743 };
1744
1745 /*
1746 * For both WM_PIPE and WM_LP.
1747 * mem_value must be in 0.1us units.
1748 */
1749 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
1750 const struct intel_plane_state *pstate,
1751 uint32_t mem_value,
1752 bool is_lp)
1753 {
1754 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1755 uint32_t method1, method2;
1756
1757 if (!cstate->base.active || !pstate->visible)
1758 return 0;
1759
1760 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
1761
1762 if (!is_lp)
1763 return method1;
1764
1765 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1766 cstate->base.adjusted_mode.crtc_htotal,
1767 drm_rect_width(&pstate->dst),
1768 bpp,
1769 mem_value);
1770
1771 return min(method1, method2);
1772 }
1773
1774 /*
1775 * For both WM_PIPE and WM_LP.
1776 * mem_value must be in 0.1us units.
1777 */
1778 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
1779 const struct intel_plane_state *pstate,
1780 uint32_t mem_value)
1781 {
1782 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1783 uint32_t method1, method2;
1784
1785 if (!cstate->base.active || !pstate->visible)
1786 return 0;
1787
1788 method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
1789 method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1790 cstate->base.adjusted_mode.crtc_htotal,
1791 drm_rect_width(&pstate->dst),
1792 bpp,
1793 mem_value);
1794 return min(method1, method2);
1795 }
1796
1797 /*
1798 * For both WM_PIPE and WM_LP.
1799 * mem_value must be in 0.1us units.
1800 */
1801 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
1802 const struct intel_plane_state *pstate,
1803 uint32_t mem_value)
1804 {
1805 /*
1806 * We treat the cursor plane as always-on for the purposes of watermark
1807 * calculation. Until we have two-stage watermark programming merged,
1808 * this is necessary to avoid flickering.
1809 */
1810 int cpp = 4;
1811 int width = pstate->visible ? pstate->base.crtc_w : 64;
1812
1813 if (!cstate->base.active)
1814 return 0;
1815
1816 return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
1817 cstate->base.adjusted_mode.crtc_htotal,
1818 width, cpp, mem_value);
1819 }
1820
1821 /* Only for WM_LP. */
1822 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1823 const struct intel_plane_state *pstate,
1824 uint32_t pri_val)
1825 {
1826 int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
1827
1828 if (!cstate->base.active || !pstate->visible)
1829 return 0;
1830
1831 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), bpp);
1832 }
1833
1834 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1835 {
1836 if (INTEL_INFO(dev)->gen >= 8)
1837 return 3072;
1838 else if (INTEL_INFO(dev)->gen >= 7)
1839 return 768;
1840 else
1841 return 512;
1842 }
1843
1844 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
1845 int level, bool is_sprite)
1846 {
1847 if (INTEL_INFO(dev)->gen >= 8)
1848 /* BDW primary/sprite plane watermarks */
1849 return level == 0 ? 255 : 2047;
1850 else if (INTEL_INFO(dev)->gen >= 7)
1851 /* IVB/HSW primary/sprite plane watermarks */
1852 return level == 0 ? 127 : 1023;
1853 else if (!is_sprite)
1854 /* ILK/SNB primary plane watermarks */
1855 return level == 0 ? 127 : 511;
1856 else
1857 /* ILK/SNB sprite plane watermarks */
1858 return level == 0 ? 63 : 255;
1859 }
1860
1861 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
1862 int level)
1863 {
1864 if (INTEL_INFO(dev)->gen >= 7)
1865 return level == 0 ? 63 : 255;
1866 else
1867 return level == 0 ? 31 : 63;
1868 }
1869
1870 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
1871 {
1872 if (INTEL_INFO(dev)->gen >= 8)
1873 return 31;
1874 else
1875 return 15;
1876 }
1877
1878 /* Calculate the maximum primary/sprite plane watermark */
1879 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1880 int level,
1881 const struct intel_wm_config *config,
1882 enum intel_ddb_partitioning ddb_partitioning,
1883 bool is_sprite)
1884 {
1885 unsigned int fifo_size = ilk_display_fifo_size(dev);
1886
1887 /* if sprites aren't enabled, sprites get nothing */
1888 if (is_sprite && !config->sprites_enabled)
1889 return 0;
1890
1891 /* HSW allows LP1+ watermarks even with multiple pipes */
1892 if (level == 0 || config->num_pipes_active > 1) {
1893 fifo_size /= INTEL_INFO(dev)->num_pipes;
1894
1895 /*
1896 * For some reason the non self refresh
1897 * FIFO size is only half of the self
1898 * refresh FIFO size on ILK/SNB.
1899 */
1900 if (INTEL_INFO(dev)->gen <= 6)
1901 fifo_size /= 2;
1902 }
1903
1904 if (config->sprites_enabled) {
1905 /* level 0 is always calculated with 1:1 split */
1906 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1907 if (is_sprite)
1908 fifo_size *= 5;
1909 fifo_size /= 6;
1910 } else {
1911 fifo_size /= 2;
1912 }
1913 }
1914
1915 /* clamp to max that the registers can hold */
1916 return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
1917 }
1918
1919 /* Calculate the maximum cursor plane watermark */
1920 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1921 int level,
1922 const struct intel_wm_config *config)
1923 {
1924 /* HSW LP1+ watermarks w/ multiple pipes */
1925 if (level > 0 && config->num_pipes_active > 1)
1926 return 64;
1927
1928 /* otherwise just report max that registers can hold */
1929 return ilk_cursor_wm_reg_max(dev, level);
1930 }
1931
1932 static void ilk_compute_wm_maximums(const struct drm_device *dev,
1933 int level,
1934 const struct intel_wm_config *config,
1935 enum intel_ddb_partitioning ddb_partitioning,
1936 struct ilk_wm_maximums *max)
1937 {
1938 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1939 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1940 max->cur = ilk_cursor_wm_max(dev, level, config);
1941 max->fbc = ilk_fbc_wm_reg_max(dev);
1942 }
1943
1944 static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
1945 int level,
1946 struct ilk_wm_maximums *max)
1947 {
1948 max->pri = ilk_plane_wm_reg_max(dev, level, false);
1949 max->spr = ilk_plane_wm_reg_max(dev, level, true);
1950 max->cur = ilk_cursor_wm_reg_max(dev, level);
1951 max->fbc = ilk_fbc_wm_reg_max(dev);
1952 }
1953
1954 static bool ilk_validate_wm_level(int level,
1955 const struct ilk_wm_maximums *max,
1956 struct intel_wm_level *result)
1957 {
1958 bool ret;
1959
1960 /* already determined to be invalid? */
1961 if (!result->enable)
1962 return false;
1963
1964 result->enable = result->pri_val <= max->pri &&
1965 result->spr_val <= max->spr &&
1966 result->cur_val <= max->cur;
1967
1968 ret = result->enable;
1969
1970 /*
1971 * HACK until we can pre-compute everything,
1972 * and thus fail gracefully if LP0 watermarks
1973 * are exceeded...
1974 */
1975 if (level == 0 && !result->enable) {
1976 if (result->pri_val > max->pri)
1977 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
1978 level, result->pri_val, max->pri);
1979 if (result->spr_val > max->spr)
1980 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
1981 level, result->spr_val, max->spr);
1982 if (result->cur_val > max->cur)
1983 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
1984 level, result->cur_val, max->cur);
1985
1986 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
1987 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
1988 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
1989 result->enable = true;
1990 }
1991
1992 return ret;
1993 }
1994
1995 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
1996 const struct intel_crtc *intel_crtc,
1997 int level,
1998 struct intel_crtc_state *cstate,
1999 struct intel_wm_level *result)
2000 {
2001 struct intel_plane *intel_plane;
2002 uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2003 uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2004 uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2005
2006 /* WM1+ latency values stored in 0.5us units */
2007 if (level > 0) {
2008 pri_latency *= 5;
2009 spr_latency *= 5;
2010 cur_latency *= 5;
2011 }
2012
2013 for_each_intel_plane_on_crtc(dev_priv->dev, intel_crtc, intel_plane) {
2014 struct intel_plane_state *pstate =
2015 to_intel_plane_state(intel_plane->base.state);
2016
2017 switch (intel_plane->base.type) {
2018 case DRM_PLANE_TYPE_PRIMARY:
2019 result->pri_val = ilk_compute_pri_wm(cstate, pstate,
2020 pri_latency,
2021 level);
2022 result->fbc_val = ilk_compute_fbc_wm(cstate, pstate,
2023 result->pri_val);
2024 break;
2025 case DRM_PLANE_TYPE_OVERLAY:
2026 result->spr_val = ilk_compute_spr_wm(cstate, pstate,
2027 spr_latency);
2028 break;
2029 case DRM_PLANE_TYPE_CURSOR:
2030 result->cur_val = ilk_compute_cur_wm(cstate, pstate,
2031 cur_latency);
2032 break;
2033 }
2034 }
2035
2036 result->enable = true;
2037 }
2038
2039 static uint32_t
2040 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
2041 {
2042 struct drm_i915_private *dev_priv = dev->dev_private;
2043 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2044 const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode;
2045 u32 linetime, ips_linetime;
2046
2047 if (!intel_crtc->active)
2048 return 0;
2049
2050 /* The WM are computed with base on how long it takes to fill a single
2051 * row at the given clock rate, multiplied by 8.
2052 * */
2053 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2054 adjusted_mode->crtc_clock);
2055 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2056 dev_priv->cdclk_freq);
2057
2058 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2059 PIPE_WM_LINETIME_TIME(linetime);
2060 }
2061
2062 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
2063 {
2064 struct drm_i915_private *dev_priv = dev->dev_private;
2065
2066 if (IS_GEN9(dev)) {
2067 uint32_t val;
2068 int ret, i;
2069 int level, max_level = ilk_wm_max_level(dev);
2070
2071 /* read the first set of memory latencies[0:3] */
2072 val = 0; /* data0 to be programmed to 0 for first set */
2073 mutex_lock(&dev_priv->rps.hw_lock);
2074 ret = sandybridge_pcode_read(dev_priv,
2075 GEN9_PCODE_READ_MEM_LATENCY,
2076 &val);
2077 mutex_unlock(&dev_priv->rps.hw_lock);
2078
2079 if (ret) {
2080 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2081 return;
2082 }
2083
2084 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2085 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2086 GEN9_MEM_LATENCY_LEVEL_MASK;
2087 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2088 GEN9_MEM_LATENCY_LEVEL_MASK;
2089 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2090 GEN9_MEM_LATENCY_LEVEL_MASK;
2091
2092 /* read the second set of memory latencies[4:7] */
2093 val = 1; /* data0 to be programmed to 1 for second set */
2094 mutex_lock(&dev_priv->rps.hw_lock);
2095 ret = sandybridge_pcode_read(dev_priv,
2096 GEN9_PCODE_READ_MEM_LATENCY,
2097 &val);
2098 mutex_unlock(&dev_priv->rps.hw_lock);
2099 if (ret) {
2100 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2101 return;
2102 }
2103
2104 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2105 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2106 GEN9_MEM_LATENCY_LEVEL_MASK;
2107 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2108 GEN9_MEM_LATENCY_LEVEL_MASK;
2109 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2110 GEN9_MEM_LATENCY_LEVEL_MASK;
2111
2112 /*
2113 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2114 * need to be disabled. We make sure to sanitize the values out
2115 * of the punit to satisfy this requirement.
2116 */
2117 for (level = 1; level <= max_level; level++) {
2118 if (wm[level] == 0) {
2119 for (i = level + 1; i <= max_level; i++)
2120 wm[i] = 0;
2121 break;
2122 }
2123 }
2124
2125 /*
2126 * WaWmMemoryReadLatency:skl
2127 *
2128 * punit doesn't take into account the read latency so we need
2129 * to add 2us to the various latency levels we retrieve from the
2130 * punit when level 0 response data us 0us.
2131 */
2132 if (wm[0] == 0) {
2133 wm[0] += 2;
2134 for (level = 1; level <= max_level; level++) {
2135 if (wm[level] == 0)
2136 break;
2137 wm[level] += 2;
2138 }
2139 }
2140
2141 } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2142 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2143
2144 wm[0] = (sskpd >> 56) & 0xFF;
2145 if (wm[0] == 0)
2146 wm[0] = sskpd & 0xF;
2147 wm[1] = (sskpd >> 4) & 0xFF;
2148 wm[2] = (sskpd >> 12) & 0xFF;
2149 wm[3] = (sskpd >> 20) & 0x1FF;
2150 wm[4] = (sskpd >> 32) & 0x1FF;
2151 } else if (INTEL_INFO(dev)->gen >= 6) {
2152 uint32_t sskpd = I915_READ(MCH_SSKPD);
2153
2154 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2155 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2156 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2157 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2158 } else if (INTEL_INFO(dev)->gen >= 5) {
2159 uint32_t mltr = I915_READ(MLTR_ILK);
2160
2161 /* ILK primary LP0 latency is 700 ns */
2162 wm[0] = 7;
2163 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2164 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2165 }
2166 }
2167
2168 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
2169 {
2170 /* ILK sprite LP0 latency is 1300 ns */
2171 if (INTEL_INFO(dev)->gen == 5)
2172 wm[0] = 13;
2173 }
2174
2175 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
2176 {
2177 /* ILK cursor LP0 latency is 1300 ns */
2178 if (INTEL_INFO(dev)->gen == 5)
2179 wm[0] = 13;
2180
2181 /* WaDoubleCursorLP3Latency:ivb */
2182 if (IS_IVYBRIDGE(dev))
2183 wm[3] *= 2;
2184 }
2185
2186 int ilk_wm_max_level(const struct drm_device *dev)
2187 {
2188 /* how many WM levels are we expecting */
2189 if (INTEL_INFO(dev)->gen >= 9)
2190 return 7;
2191 else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2192 return 4;
2193 else if (INTEL_INFO(dev)->gen >= 6)
2194 return 3;
2195 else
2196 return 2;
2197 }
2198
2199 static void intel_print_wm_latency(struct drm_device *dev,
2200 const char *name,
2201 const uint16_t wm[8])
2202 {
2203 int level, max_level = ilk_wm_max_level(dev);
2204
2205 for (level = 0; level <= max_level; level++) {
2206 unsigned int latency = wm[level];
2207
2208 if (latency == 0) {
2209 DRM_ERROR("%s WM%d latency not provided\n",
2210 name, level);
2211 continue;
2212 }
2213
2214 /*
2215 * - latencies are in us on gen9.
2216 * - before then, WM1+ latency values are in 0.5us units
2217 */
2218 if (IS_GEN9(dev))
2219 latency *= 10;
2220 else if (level > 0)
2221 latency *= 5;
2222
2223 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2224 name, level, wm[level],
2225 latency / 10, latency % 10);
2226 }
2227 }
2228
2229 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2230 uint16_t wm[5], uint16_t min)
2231 {
2232 int level, max_level = ilk_wm_max_level(dev_priv->dev);
2233
2234 if (wm[0] >= min)
2235 return false;
2236
2237 wm[0] = max(wm[0], min);
2238 for (level = 1; level <= max_level; level++)
2239 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2240
2241 return true;
2242 }
2243
2244 static void snb_wm_latency_quirk(struct drm_device *dev)
2245 {
2246 struct drm_i915_private *dev_priv = dev->dev_private;
2247 bool changed;
2248
2249 /*
2250 * The BIOS provided WM memory latency values are often
2251 * inadequate for high resolution displays. Adjust them.
2252 */
2253 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
2254 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
2255 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
2256
2257 if (!changed)
2258 return;
2259
2260 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
2261 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2262 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2263 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2264 }
2265
2266 static void ilk_setup_wm_latency(struct drm_device *dev)
2267 {
2268 struct drm_i915_private *dev_priv = dev->dev_private;
2269
2270 intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
2271
2272 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2273 sizeof(dev_priv->wm.pri_latency));
2274 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2275 sizeof(dev_priv->wm.pri_latency));
2276
2277 intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
2278 intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
2279
2280 intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2281 intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2282 intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2283
2284 if (IS_GEN6(dev))
2285 snb_wm_latency_quirk(dev);
2286 }
2287
2288 static void skl_setup_wm_latency(struct drm_device *dev)
2289 {
2290 struct drm_i915_private *dev_priv = dev->dev_private;
2291
2292 intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
2293 intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
2294 }
2295
2296 static void ilk_compute_wm_config(struct drm_device *dev,
2297 struct intel_wm_config *config)
2298 {
2299 struct intel_crtc *intel_crtc;
2300
2301 /* Compute the currently _active_ config */
2302 for_each_intel_crtc(dev, intel_crtc) {
2303 const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
2304
2305 if (!wm->pipe_enabled)
2306 continue;
2307
2308 config->sprites_enabled |= wm->sprites_enabled;
2309 config->sprites_scaled |= wm->sprites_scaled;
2310 config->num_pipes_active++;
2311 }
2312 }
2313
2314 /* Compute new watermarks for the pipe */
2315 static bool intel_compute_pipe_wm(struct intel_crtc_state *cstate,
2316 struct intel_pipe_wm *pipe_wm)
2317 {
2318 struct drm_crtc *crtc = cstate->base.crtc;
2319 struct drm_device *dev = crtc->dev;
2320 const struct drm_i915_private *dev_priv = dev->dev_private;
2321 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2322 struct intel_plane *intel_plane;
2323 struct intel_plane_state *sprstate = NULL;
2324 int level, max_level = ilk_wm_max_level(dev);
2325 /* LP0 watermark maximums depend on this pipe alone */
2326 struct intel_wm_config config = {
2327 .num_pipes_active = 1,
2328 };
2329 struct ilk_wm_maximums max;
2330
2331 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
2332 if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) {
2333 sprstate = to_intel_plane_state(intel_plane->base.state);
2334 break;
2335 }
2336 }
2337
2338 config.sprites_enabled = sprstate->visible;
2339 config.sprites_scaled = sprstate->visible &&
2340 (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 ||
2341 drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16);
2342
2343 pipe_wm->pipe_enabled = cstate->base.active;
2344 pipe_wm->sprites_enabled = sprstate->visible;
2345 pipe_wm->sprites_scaled = config.sprites_scaled;
2346
2347 /* ILK/SNB: LP2+ watermarks only w/o sprites */
2348 if (INTEL_INFO(dev)->gen <= 6 && sprstate->visible)
2349 max_level = 1;
2350
2351 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
2352 if (config.sprites_scaled)
2353 max_level = 0;
2354
2355 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, &pipe_wm->wm[0]);
2356
2357 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2358 pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
2359
2360 /* LP0 watermarks always use 1/2 DDB partitioning */
2361 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2362
2363 /* At least LP0 must be valid */
2364 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
2365 return false;
2366
2367 ilk_compute_wm_reg_maximums(dev, 1, &max);
2368
2369 for (level = 1; level <= max_level; level++) {
2370 struct intel_wm_level wm = {};
2371
2372 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, &wm);
2373
2374 /*
2375 * Disable any watermark level that exceeds the
2376 * register maximums since such watermarks are
2377 * always invalid.
2378 */
2379 if (!ilk_validate_wm_level(level, &max, &wm))
2380 break;
2381
2382 pipe_wm->wm[level] = wm;
2383 }
2384
2385 return true;
2386 }
2387
2388 /*
2389 * Merge the watermarks from all active pipes for a specific level.
2390 */
2391 static void ilk_merge_wm_level(struct drm_device *dev,
2392 int level,
2393 struct intel_wm_level *ret_wm)
2394 {
2395 const struct intel_crtc *intel_crtc;
2396
2397 ret_wm->enable = true;
2398
2399 for_each_intel_crtc(dev, intel_crtc) {
2400 const struct intel_pipe_wm *active = &intel_crtc->wm.active;
2401 const struct intel_wm_level *wm = &active->wm[level];
2402
2403 if (!active->pipe_enabled)
2404 continue;
2405
2406 /*
2407 * The watermark values may have been used in the past,
2408 * so we must maintain them in the registers for some
2409 * time even if the level is now disabled.
2410 */
2411 if (!wm->enable)
2412 ret_wm->enable = false;
2413
2414 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2415 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2416 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2417 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2418 }
2419 }
2420
2421 /*
2422 * Merge all low power watermarks for all active pipes.
2423 */
2424 static void ilk_wm_merge(struct drm_device *dev,
2425 const struct intel_wm_config *config,
2426 const struct ilk_wm_maximums *max,
2427 struct intel_pipe_wm *merged)
2428 {
2429 struct drm_i915_private *dev_priv = dev->dev_private;
2430 int level, max_level = ilk_wm_max_level(dev);
2431 int last_enabled_level = max_level;
2432
2433 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2434 if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2435 config->num_pipes_active > 1)
2436 return;
2437
2438 /* ILK: FBC WM must be disabled always */
2439 merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2440
2441 /* merge each WM1+ level */
2442 for (level = 1; level <= max_level; level++) {
2443 struct intel_wm_level *wm = &merged->wm[level];
2444
2445 ilk_merge_wm_level(dev, level, wm);
2446
2447 if (level > last_enabled_level)
2448 wm->enable = false;
2449 else if (!ilk_validate_wm_level(level, max, wm))
2450 /* make sure all following levels get disabled */
2451 last_enabled_level = level - 1;
2452
2453 /*
2454 * The spec says it is preferred to disable
2455 * FBC WMs instead of disabling a WM level.
2456 */
2457 if (wm->fbc_val > max->fbc) {
2458 if (wm->enable)
2459 merged->fbc_wm_enabled = false;
2460 wm->fbc_val = 0;
2461 }
2462 }
2463
2464 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2465 /*
2466 * FIXME this is racy. FBC might get enabled later.
2467 * What we should check here is whether FBC can be
2468 * enabled sometime later.
2469 */
2470 if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
2471 intel_fbc_enabled(dev_priv)) {
2472 for (level = 2; level <= max_level; level++) {
2473 struct intel_wm_level *wm = &merged->wm[level];
2474
2475 wm->enable = false;
2476 }
2477 }
2478 }
2479
2480 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2481 {
2482 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2483 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2484 }
2485
2486 /* The value we need to program into the WM_LPx latency field */
2487 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2488 {
2489 struct drm_i915_private *dev_priv = dev->dev_private;
2490
2491 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2492 return 2 * level;
2493 else
2494 return dev_priv->wm.pri_latency[level];
2495 }
2496
2497 static void ilk_compute_wm_results(struct drm_device *dev,
2498 const struct intel_pipe_wm *merged,
2499 enum intel_ddb_partitioning partitioning,
2500 struct ilk_wm_values *results)
2501 {
2502 struct intel_crtc *intel_crtc;
2503 int level, wm_lp;
2504
2505 results->enable_fbc_wm = merged->fbc_wm_enabled;
2506 results->partitioning = partitioning;
2507
2508 /* LP1+ register values */
2509 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2510 const struct intel_wm_level *r;
2511
2512 level = ilk_wm_lp_to_level(wm_lp, merged);
2513
2514 r = &merged->wm[level];
2515
2516 /*
2517 * Maintain the watermark values even if the level is
2518 * disabled. Doing otherwise could cause underruns.
2519 */
2520 results->wm_lp[wm_lp - 1] =
2521 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2522 (r->pri_val << WM1_LP_SR_SHIFT) |
2523 r->cur_val;
2524
2525 if (r->enable)
2526 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2527
2528 if (INTEL_INFO(dev)->gen >= 8)
2529 results->wm_lp[wm_lp - 1] |=
2530 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2531 else
2532 results->wm_lp[wm_lp - 1] |=
2533 r->fbc_val << WM1_LP_FBC_SHIFT;
2534
2535 /*
2536 * Always set WM1S_LP_EN when spr_val != 0, even if the
2537 * level is disabled. Doing otherwise could cause underruns.
2538 */
2539 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2540 WARN_ON(wm_lp != 1);
2541 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2542 } else
2543 results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2544 }
2545
2546 /* LP0 register values */
2547 for_each_intel_crtc(dev, intel_crtc) {
2548 enum i915_pipe pipe = intel_crtc->pipe;
2549 const struct intel_wm_level *r =
2550 &intel_crtc->wm.active.wm[0];
2551
2552 if (WARN_ON(!r->enable))
2553 continue;
2554
2555 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2556
2557 results->wm_pipe[pipe] =
2558 (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2559 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2560 r->cur_val;
2561 }
2562 }
2563
2564 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2565 * case both are at the same level. Prefer r1 in case they're the same. */
2566 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2567 struct intel_pipe_wm *r1,
2568 struct intel_pipe_wm *r2)
2569 {
2570 int level, max_level = ilk_wm_max_level(dev);
2571 int level1 = 0, level2 = 0;
2572
2573 for (level = 1; level <= max_level; level++) {
2574 if (r1->wm[level].enable)
2575 level1 = level;
2576 if (r2->wm[level].enable)
2577 level2 = level;
2578 }
2579
2580 if (level1 == level2) {
2581 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2582 return r2;
2583 else
2584 return r1;
2585 } else if (level1 > level2) {
2586 return r1;
2587 } else {
2588 return r2;
2589 }
2590 }
2591
2592 /* dirty bits used to track which watermarks need changes */
2593 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2594 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2595 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2596 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2597 #define WM_DIRTY_FBC (1 << 24)
2598 #define WM_DIRTY_DDB (1 << 25)
2599
2600 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
2601 const struct ilk_wm_values *old,
2602 const struct ilk_wm_values *new)
2603 {
2604 unsigned int dirty = 0;
2605 enum i915_pipe pipe;
2606 int wm_lp;
2607
2608 for_each_pipe(dev_priv, pipe) {
2609 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2610 dirty |= WM_DIRTY_LINETIME(pipe);
2611 /* Must disable LP1+ watermarks too */
2612 dirty |= WM_DIRTY_LP_ALL;
2613 }
2614
2615 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2616 dirty |= WM_DIRTY_PIPE(pipe);
2617 /* Must disable LP1+ watermarks too */
2618 dirty |= WM_DIRTY_LP_ALL;
2619 }
2620 }
2621
2622 if (old->enable_fbc_wm != new->enable_fbc_wm) {
2623 dirty |= WM_DIRTY_FBC;
2624 /* Must disable LP1+ watermarks too */
2625 dirty |= WM_DIRTY_LP_ALL;
2626 }
2627
2628 if (old->partitioning != new->partitioning) {
2629 dirty |= WM_DIRTY_DDB;
2630 /* Must disable LP1+ watermarks too */
2631 dirty |= WM_DIRTY_LP_ALL;
2632 }
2633
2634 /* LP1+ watermarks already deemed dirty, no need to continue */
2635 if (dirty & WM_DIRTY_LP_ALL)
2636 return dirty;
2637
2638 /* Find the lowest numbered LP1+ watermark in need of an update... */
2639 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2640 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2641 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2642 break;
2643 }
2644
2645 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2646 for (; wm_lp <= 3; wm_lp++)
2647 dirty |= WM_DIRTY_LP(wm_lp);
2648
2649 return dirty;
2650 }
2651
2652 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2653 unsigned int dirty)
2654 {
2655 struct ilk_wm_values *previous = &dev_priv->wm.hw;
2656 bool changed = false;
2657
2658 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2659 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2660 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2661 changed = true;
2662 }
2663 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2664 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2665 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2666 changed = true;
2667 }
2668 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2669 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2670 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2671 changed = true;
2672 }
2673
2674 /*
2675 * Don't touch WM1S_LP_EN here.
2676 * Doing so could cause underruns.
2677 */
2678
2679 return changed;
2680 }
2681
2682 /*
2683 * The spec says we shouldn't write when we don't need, because every write
2684 * causes WMs to be re-evaluated, expending some power.
2685 */
2686 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2687 struct ilk_wm_values *results)
2688 {
2689 struct drm_device *dev = dev_priv->dev;
2690 struct ilk_wm_values *previous = &dev_priv->wm.hw;
2691 unsigned int dirty;
2692 uint32_t val;
2693
2694 dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
2695 if (!dirty)
2696 return;
2697
2698 _ilk_disable_lp_wm(dev_priv, dirty);
2699
2700 if (dirty & WM_DIRTY_PIPE(PIPE_A))
2701 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2702 if (dirty & WM_DIRTY_PIPE(PIPE_B))
2703 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2704 if (dirty & WM_DIRTY_PIPE(PIPE_C))
2705 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2706
2707 if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2708 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2709 if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2710 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2711 if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2712 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2713
2714 if (dirty & WM_DIRTY_DDB) {
2715 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2716 val = I915_READ(WM_MISC);
2717 if (results->partitioning == INTEL_DDB_PART_1_2)
2718 val &= ~WM_MISC_DATA_PARTITION_5_6;
2719 else
2720 val |= WM_MISC_DATA_PARTITION_5_6;
2721 I915_WRITE(WM_MISC, val);
2722 } else {
2723 val = I915_READ(DISP_ARB_CTL2);
2724 if (results->partitioning == INTEL_DDB_PART_1_2)
2725 val &= ~DISP_DATA_PARTITION_5_6;
2726 else
2727 val |= DISP_DATA_PARTITION_5_6;
2728 I915_WRITE(DISP_ARB_CTL2, val);
2729 }
2730 }
2731
2732 if (dirty & WM_DIRTY_FBC) {
2733 val = I915_READ(DISP_ARB_CTL);
2734 if (results->enable_fbc_wm)
2735 val &= ~DISP_FBC_WM_DIS;
2736 else
2737 val |= DISP_FBC_WM_DIS;
2738 I915_WRITE(DISP_ARB_CTL, val);
2739 }
2740
2741 if (dirty & WM_DIRTY_LP(1) &&
2742 previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2743 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2744
2745 if (INTEL_INFO(dev)->gen >= 7) {
2746 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2747 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2748 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2749 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2750 }
2751
2752 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2753 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2754 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2755 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2756 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2757 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2758
2759 dev_priv->wm.hw = *results;
2760 }
2761
2762 static bool ilk_disable_lp_wm(struct drm_device *dev)
2763 {
2764 struct drm_i915_private *dev_priv = dev->dev_private;
2765
2766 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2767 }
2768
2769 /*
2770 * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
2771 * different active planes.
2772 */
2773
2774 #define SKL_DDB_SIZE 896 /* in blocks */
2775 #define BXT_DDB_SIZE 512
2776
2777 static void
2778 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
2779 struct drm_crtc *for_crtc,
2780 const struct intel_wm_config *config,
2781 const struct skl_pipe_wm_parameters *params,
2782 struct skl_ddb_entry *alloc /* out */)
2783 {
2784 struct drm_crtc *crtc;
2785 unsigned int pipe_size, ddb_size;
2786 int nth_active_pipe;
2787
2788 if (!params->active) {
2789 alloc->start = 0;
2790 alloc->end = 0;
2791 return;
2792 }
2793
2794 if (IS_BROXTON(dev))
2795 ddb_size = BXT_DDB_SIZE;
2796 else
2797 ddb_size = SKL_DDB_SIZE;
2798
2799 ddb_size -= 4; /* 4 blocks for bypass path allocation */
2800
2801 nth_active_pipe = 0;
2802 for_each_crtc(dev, crtc) {
2803 if (!to_intel_crtc(crtc)->active)
2804 continue;
2805
2806 if (crtc == for_crtc)
2807 break;
2808
2809 nth_active_pipe++;
2810 }
2811
2812 pipe_size = ddb_size / config->num_pipes_active;
2813 alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
2814 alloc->end = alloc->start + pipe_size;
2815 }
2816
2817 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
2818 {
2819 if (config->num_pipes_active == 1)
2820 return 32;
2821
2822 return 8;
2823 }
2824
2825 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
2826 {
2827 entry->start = reg & 0x3ff;
2828 entry->end = (reg >> 16) & 0x3ff;
2829 if (entry->end)
2830 entry->end += 1;
2831 }
2832
2833 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
2834 struct skl_ddb_allocation *ddb /* out */)
2835 {
2836 enum i915_pipe pipe;
2837 int plane;
2838 u32 val;
2839
2840 memset(ddb, 0, sizeof(*ddb));
2841
2842 for_each_pipe(dev_priv, pipe) {
2843 if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
2844 continue;
2845
2846 for_each_plane(dev_priv, pipe, plane) {
2847 val = I915_READ(PLANE_BUF_CFG(pipe, plane));
2848 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
2849 val);
2850 }
2851
2852 val = I915_READ(CUR_BUF_CFG(pipe));
2853 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
2854 val);
2855 }
2856 }
2857
2858 static unsigned int
2859 skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
2860 {
2861
2862 /* for planar format */
2863 if (p->y_bytes_per_pixel) {
2864 if (y) /* y-plane data rate */
2865 return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
2866 else /* uv-plane data rate */
2867 return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
2868 }
2869
2870 /* for packed formats */
2871 return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
2872 }
2873
2874 /*
2875 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
2876 * a 8192x4096@32bpp framebuffer:
2877 * 3 * 4096 * 8192 * 4 < 2^32
2878 */
2879 static unsigned int
2880 skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
2881 const struct skl_pipe_wm_parameters *params)
2882 {
2883 unsigned int total_data_rate = 0;
2884 int plane;
2885
2886 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2887 const struct intel_plane_wm_parameters *p;
2888
2889 p = ¶ms->plane[plane];
2890 if (!p->enabled)
2891 continue;
2892
2893 total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
2894 if (p->y_bytes_per_pixel) {
2895 total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
2896 }
2897 }
2898
2899 return total_data_rate;
2900 }
2901
2902 static void
2903 skl_allocate_pipe_ddb(struct drm_crtc *crtc,
2904 const struct intel_wm_config *config,
2905 const struct skl_pipe_wm_parameters *params,
2906 struct skl_ddb_allocation *ddb /* out */)
2907 {
2908 struct drm_device *dev = crtc->dev;
2909 struct drm_i915_private *dev_priv = dev->dev_private;
2910 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2911 enum i915_pipe pipe = intel_crtc->pipe;
2912 struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
2913 uint16_t alloc_size, start, cursor_blocks;
2914 uint16_t minimum[I915_MAX_PLANES];
2915 uint16_t y_minimum[I915_MAX_PLANES];
2916 unsigned int total_data_rate;
2917 int plane;
2918
2919 skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
2920 alloc_size = skl_ddb_entry_size(alloc);
2921 if (alloc_size == 0) {
2922 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
2923 memset(&ddb->plane[pipe][PLANE_CURSOR], 0,
2924 sizeof(ddb->plane[pipe][PLANE_CURSOR]));
2925 return;
2926 }
2927
2928 cursor_blocks = skl_cursor_allocation(config);
2929 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks;
2930 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
2931
2932 alloc_size -= cursor_blocks;
2933 alloc->end -= cursor_blocks;
2934
2935 /* 1. Allocate the mininum required blocks for each active plane */
2936 for_each_plane(dev_priv, pipe, plane) {
2937 const struct intel_plane_wm_parameters *p;
2938
2939 p = ¶ms->plane[plane];
2940 if (!p->enabled)
2941 continue;
2942
2943 minimum[plane] = 8;
2944 alloc_size -= minimum[plane];
2945 y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
2946 alloc_size -= y_minimum[plane];
2947 }
2948
2949 /*
2950 * 2. Distribute the remaining space in proportion to the amount of
2951 * data each plane needs to fetch from memory.
2952 *
2953 * FIXME: we may not allocate every single block here.
2954 */
2955 total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
2956
2957 start = alloc->start;
2958 for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2959 const struct intel_plane_wm_parameters *p;
2960 unsigned int data_rate, y_data_rate;
2961 uint16_t plane_blocks, y_plane_blocks = 0;
2962
2963 p = ¶ms->plane[plane];
2964 if (!p->enabled)
2965 continue;
2966
2967 data_rate = skl_plane_relative_data_rate(p, 0);
2968
2969 /*
2970 * allocation for (packed formats) or (uv-plane part of planar format):
2971 * promote the expression to 64 bits to avoid overflowing, the
2972 * result is < available as data_rate / total_data_rate < 1
2973 */
2974 plane_blocks = minimum[plane];
2975 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
2976 total_data_rate);
2977
2978 ddb->plane[pipe][plane].start = start;
2979 ddb->plane[pipe][plane].end = start + plane_blocks;
2980
2981 start += plane_blocks;
2982
2983 /*
2984 * allocation for y_plane part of planar format:
2985 */
2986 if (p->y_bytes_per_pixel) {
2987 y_data_rate = skl_plane_relative_data_rate(p, 1);
2988 y_plane_blocks = y_minimum[plane];
2989 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
2990 total_data_rate);
2991
2992 ddb->y_plane[pipe][plane].start = start;
2993 ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
2994
2995 start += y_plane_blocks;
2996 }
2997
2998 }
2999
3000 }
3001
3002 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
3003 {
3004 /* TODO: Take into account the scalers once we support them */
3005 return config->base.adjusted_mode.crtc_clock;
3006 }
3007
3008 /*
3009 * The max latency should be 257 (max the punit can code is 255 and we add 2us
3010 * for the read latency) and bytes_per_pixel should always be <= 8, so that
3011 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
3012 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
3013 */
3014 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
3015 uint32_t latency)
3016 {
3017 uint32_t wm_intermediate_val, ret;
3018
3019 if (latency == 0)
3020 return UINT_MAX;
3021
3022 wm_intermediate_val = latency * pixel_rate * bytes_per_pixel / 512;
3023 ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
3024
3025 return ret;
3026 }
3027
3028 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
3029 uint32_t horiz_pixels, uint8_t bytes_per_pixel,
3030 uint64_t tiling, uint32_t latency)
3031 {
3032 uint32_t ret;
3033 uint32_t plane_bytes_per_line, plane_blocks_per_line;
3034 uint32_t wm_intermediate_val;
3035
3036 if (latency == 0)
3037 return UINT_MAX;
3038
3039 plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
3040
3041 if (tiling == I915_FORMAT_MOD_Y_TILED ||
3042 tiling == I915_FORMAT_MOD_Yf_TILED) {
3043 plane_bytes_per_line *= 4;
3044 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3045 plane_blocks_per_line /= 4;
3046 } else {
3047 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3048 }
3049
3050 wm_intermediate_val = latency * pixel_rate;
3051 ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
3052 plane_blocks_per_line;
3053
3054 return ret;
3055 }
3056
3057 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
3058 const struct intel_crtc *intel_crtc)
3059 {
3060 struct drm_device *dev = intel_crtc->base.dev;
3061 struct drm_i915_private *dev_priv = dev->dev_private;
3062 const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
3063 enum i915_pipe pipe = intel_crtc->pipe;
3064
3065 if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
3066 sizeof(new_ddb->plane[pipe])))
3067 return true;
3068
3069 if (memcmp(&new_ddb->plane[pipe][PLANE_CURSOR], &cur_ddb->plane[pipe][PLANE_CURSOR],
3070 sizeof(new_ddb->plane[pipe][PLANE_CURSOR])))
3071 return true;
3072
3073 return false;
3074 }
3075
3076 static void skl_compute_wm_global_parameters(struct drm_device *dev,
3077 struct intel_wm_config *config)
3078 {
3079 struct drm_crtc *crtc;
3080 struct drm_plane *plane;
3081
3082 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3083 config->num_pipes_active += to_intel_crtc(crtc)->active;
3084
3085 /* FIXME: I don't think we need those two global parameters on SKL */
3086 list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3087 struct intel_plane *intel_plane = to_intel_plane(plane);
3088
3089 config->sprites_enabled |= intel_plane->wm.enabled;
3090 config->sprites_scaled |= intel_plane->wm.scaled;
3091 }
3092 }
3093
3094 static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
3095 struct skl_pipe_wm_parameters *p)
3096 {
3097 struct drm_device *dev = crtc->dev;
3098 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3099 enum i915_pipe pipe = intel_crtc->pipe;
3100 struct drm_plane *plane;
3101 struct drm_framebuffer *fb;
3102 int i = 1; /* Index for sprite planes start */
3103
3104 p->active = intel_crtc->active;
3105 if (p->active) {
3106 p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
3107 p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
3108
3109 fb = crtc->primary->state->fb;
3110 /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
3111 if (fb) {
3112 p->plane[0].enabled = true;
3113 p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3114 drm_format_plane_cpp(fb->pixel_format, 1) :
3115 drm_format_plane_cpp(fb->pixel_format, 0);
3116 p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
3117 drm_format_plane_cpp(fb->pixel_format, 0) : 0;
3118 p->plane[0].tiling = fb->modifier[0];
3119 } else {
3120 p->plane[0].enabled = false;
3121 p->plane[0].bytes_per_pixel = 0;
3122 p->plane[0].y_bytes_per_pixel = 0;
3123 p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
3124 }
3125 p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
3126 p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
3127 p->plane[0].rotation = crtc->primary->state->rotation;
3128
3129 fb = crtc->cursor->state->fb;
3130 p->plane[PLANE_CURSOR].y_bytes_per_pixel = 0;
3131 if (fb) {
3132 p->plane[PLANE_CURSOR].enabled = true;
3133 p->plane[PLANE_CURSOR].bytes_per_pixel = fb->bits_per_pixel / 8;
3134 p->plane[PLANE_CURSOR].horiz_pixels = crtc->cursor->state->crtc_w;
3135 p->plane[PLANE_CURSOR].vert_pixels = crtc->cursor->state->crtc_h;
3136 } else {
3137 p->plane[PLANE_CURSOR].enabled = false;
3138 p->plane[PLANE_CURSOR].bytes_per_pixel = 0;
3139 p->plane[PLANE_CURSOR].horiz_pixels = 64;
3140 p->plane[PLANE_CURSOR].vert_pixels = 64;
3141 }
3142 }
3143
3144 list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
3145 struct intel_plane *intel_plane = to_intel_plane(plane);
3146
3147 if (intel_plane->pipe == pipe &&
3148 plane->type == DRM_PLANE_TYPE_OVERLAY)
3149 p->plane[i++] = intel_plane->wm;
3150 }
3151 }
3152
3153 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
3154 struct skl_pipe_wm_parameters *p,
3155 struct intel_plane_wm_parameters *p_params,
3156 uint16_t ddb_allocation,
3157 int level,
3158 uint16_t *out_blocks, /* out */
3159 uint8_t *out_lines /* out */)
3160 {
3161 uint32_t latency = dev_priv->wm.skl_latency[level];
3162 uint32_t method1, method2;
3163 uint32_t plane_bytes_per_line, plane_blocks_per_line;
3164 uint32_t res_blocks, res_lines;
3165 uint32_t selected_result;
3166 uint8_t bytes_per_pixel;
3167
3168 if (latency == 0 || !p->active || !p_params->enabled)
3169 return false;
3170
3171 bytes_per_pixel = p_params->y_bytes_per_pixel ?
3172 p_params->y_bytes_per_pixel :
3173 p_params->bytes_per_pixel;
3174 method1 = skl_wm_method1(p->pixel_rate,
3175 bytes_per_pixel,
3176 latency);
3177 method2 = skl_wm_method2(p->pixel_rate,
3178 p->pipe_htotal,
3179 p_params->horiz_pixels,
3180 bytes_per_pixel,
3181 p_params->tiling,
3182 latency);
3183
3184 plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
3185 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
3186
3187 if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3188 p_params->tiling == I915_FORMAT_MOD_Yf_TILED) {
3189 uint32_t min_scanlines = 4;
3190 uint32_t y_tile_minimum;
3191 if (intel_rotation_90_or_270(p_params->rotation)) {
3192 switch (p_params->bytes_per_pixel) {
3193 case 1:
3194 min_scanlines = 16;
3195 break;
3196 case 2:
3197 min_scanlines = 8;
3198 break;
3199 case 8:
3200 WARN(1, "Unsupported pixel depth for rotation");
3201 }
3202 }
3203 y_tile_minimum = plane_blocks_per_line * min_scanlines;
3204 selected_result = max(method2, y_tile_minimum);
3205 } else {
3206 if ((ddb_allocation / plane_blocks_per_line) >= 1)
3207 selected_result = min(method1, method2);
3208 else
3209 selected_result = method1;
3210 }
3211
3212 res_blocks = selected_result + 1;
3213 res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
3214
3215 if (level >= 1 && level <= 7) {
3216 if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
3217 p_params->tiling == I915_FORMAT_MOD_Yf_TILED)
3218 res_lines += 4;
3219 else
3220 res_blocks++;
3221 }
3222
3223 if (res_blocks >= ddb_allocation || res_lines > 31)
3224 return false;
3225
3226 *out_blocks = res_blocks;
3227 *out_lines = res_lines;
3228
3229 return true;
3230 }
3231
3232 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
3233 struct skl_ddb_allocation *ddb,
3234 struct skl_pipe_wm_parameters *p,
3235 enum i915_pipe pipe,
3236 int level,
3237 int num_planes,
3238 struct skl_wm_level *result)
3239 {
3240 uint16_t ddb_blocks;
3241 int i;
3242
3243 for (i = 0; i < num_planes; i++) {
3244 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
3245
3246 result->plane_en[i] = skl_compute_plane_wm(dev_priv,
3247 p, &p->plane[i],
3248 ddb_blocks,
3249 level,
3250 &result->plane_res_b[i],
3251 &result->plane_res_l[i]);
3252 }
3253
3254 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]);
3255 result->plane_en[PLANE_CURSOR] = skl_compute_plane_wm(dev_priv, p,
3256 &p->plane[PLANE_CURSOR],
3257 ddb_blocks, level,
3258 &result->plane_res_b[PLANE_CURSOR],
3259 &result->plane_res_l[PLANE_CURSOR]);
3260 }
3261
3262 static uint32_t
3263 skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
3264 {
3265 if (!to_intel_crtc(crtc)->active)
3266 return 0;
3267
3268 if (WARN_ON(p->pixel_rate == 0))
3269 return 0;
3270
3271 return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
3272 }
3273
3274 static void skl_compute_transition_wm(struct drm_crtc *crtc,
3275 struct skl_pipe_wm_parameters *params,
3276 struct skl_wm_level *trans_wm /* out */)
3277 {
3278 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3279 int i;
3280
3281 if (!params->active)
3282 return;
3283
3284 /* Until we know more, just disable transition WMs */
3285 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3286 trans_wm->plane_en[i] = false;
3287 trans_wm->plane_en[PLANE_CURSOR] = false;
3288 }
3289
3290 static void skl_compute_pipe_wm(struct drm_crtc *crtc,
3291 struct skl_ddb_allocation *ddb,
3292 struct skl_pipe_wm_parameters *params,
3293 struct skl_pipe_wm *pipe_wm)
3294 {
3295 struct drm_device *dev = crtc->dev;
3296 const struct drm_i915_private *dev_priv = dev->dev_private;
3297 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3298 int level, max_level = ilk_wm_max_level(dev);
3299
3300 for (level = 0; level <= max_level; level++) {
3301 skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
3302 level, intel_num_planes(intel_crtc),
3303 &pipe_wm->wm[level]);
3304 }
3305 pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
3306
3307 skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
3308 }
3309
3310 static void skl_compute_wm_results(struct drm_device *dev,
3311 struct skl_pipe_wm_parameters *p,
3312 struct skl_pipe_wm *p_wm,
3313 struct skl_wm_values *r,
3314 struct intel_crtc *intel_crtc)
3315 {
3316 int level, max_level = ilk_wm_max_level(dev);
3317 enum i915_pipe pipe = intel_crtc->pipe;
3318 uint32_t temp;
3319 int i;
3320
3321 for (level = 0; level <= max_level; level++) {
3322 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3323 temp = 0;
3324
3325 temp |= p_wm->wm[level].plane_res_l[i] <<
3326 PLANE_WM_LINES_SHIFT;
3327 temp |= p_wm->wm[level].plane_res_b[i];
3328 if (p_wm->wm[level].plane_en[i])
3329 temp |= PLANE_WM_EN;
3330
3331 r->plane[pipe][i][level] = temp;
3332 }
3333
3334 temp = 0;
3335
3336 temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3337 temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR];
3338
3339 if (p_wm->wm[level].plane_en[PLANE_CURSOR])
3340 temp |= PLANE_WM_EN;
3341
3342 r->plane[pipe][PLANE_CURSOR][level] = temp;
3343
3344 }
3345
3346 /* transition WMs */
3347 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3348 temp = 0;
3349 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
3350 temp |= p_wm->trans_wm.plane_res_b[i];
3351 if (p_wm->trans_wm.plane_en[i])
3352 temp |= PLANE_WM_EN;
3353
3354 r->plane_trans[pipe][i] = temp;
3355 }
3356
3357 temp = 0;
3358 temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
3359 temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR];
3360 if (p_wm->trans_wm.plane_en[PLANE_CURSOR])
3361 temp |= PLANE_WM_EN;
3362
3363 r->plane_trans[pipe][PLANE_CURSOR] = temp;
3364
3365 r->wm_linetime[pipe] = p_wm->linetime;
3366 }
3367
3368 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
3369 const struct skl_ddb_entry *entry)
3370 {
3371 if (entry->end)
3372 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
3373 else
3374 I915_WRITE(reg, 0);
3375 }
3376
3377 static void skl_write_wm_values(struct drm_i915_private *dev_priv,
3378 const struct skl_wm_values *new)
3379 {
3380 struct drm_device *dev = dev_priv->dev;
3381 struct intel_crtc *crtc;
3382
3383 list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
3384 int i, level, max_level = ilk_wm_max_level(dev);
3385 enum i915_pipe pipe = crtc->pipe;
3386
3387 if (!new->dirty[pipe])
3388 continue;
3389
3390 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
3391
3392 for (level = 0; level <= max_level; level++) {
3393 for (i = 0; i < intel_num_planes(crtc); i++)
3394 I915_WRITE(PLANE_WM(pipe, i, level),
3395 new->plane[pipe][i][level]);
3396 I915_WRITE(CUR_WM(pipe, level),
3397 new->plane[pipe][PLANE_CURSOR][level]);
3398 }
3399 for (i = 0; i < intel_num_planes(crtc); i++)
3400 I915_WRITE(PLANE_WM_TRANS(pipe, i),
3401 new->plane_trans[pipe][i]);
3402 I915_WRITE(CUR_WM_TRANS(pipe),
3403 new->plane_trans[pipe][PLANE_CURSOR]);
3404
3405 for (i = 0; i < intel_num_planes(crtc); i++) {
3406 skl_ddb_entry_write(dev_priv,
3407 PLANE_BUF_CFG(pipe, i),
3408 &new->ddb.plane[pipe][i]);
3409 skl_ddb_entry_write(dev_priv,
3410 PLANE_NV12_BUF_CFG(pipe, i),
3411 &new->ddb.y_plane[pipe][i]);
3412 }
3413
3414 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
3415 &new->ddb.plane[pipe][PLANE_CURSOR]);
3416 }
3417 }
3418
3419 /*
3420 * When setting up a new DDB allocation arrangement, we need to correctly
3421 * sequence the times at which the new allocations for the pipes are taken into
3422 * account or we'll have pipes fetching from space previously allocated to
3423 * another pipe.
3424 *
3425 * Roughly the sequence looks like:
3426 * 1. re-allocate the pipe(s) with the allocation being reduced and not
3427 * overlapping with a previous light-up pipe (another way to put it is:
3428 * pipes with their new allocation strickly included into their old ones).
3429 * 2. re-allocate the other pipes that get their allocation reduced
3430 * 3. allocate the pipes having their allocation increased
3431 *
3432 * Steps 1. and 2. are here to take care of the following case:
3433 * - Initially DDB looks like this:
3434 * | B | C |
3435 * - enable pipe A.
3436 * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
3437 * allocation
3438 * | A | B | C |
3439 *
3440 * We need to sequence the re-allocation: C, B, A (and not B, C, A).
3441 */
3442
3443 static void
3444 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum i915_pipe pipe, int pass)
3445 {
3446 int plane;
3447
3448 DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
3449
3450 for_each_plane(dev_priv, pipe, plane) {
3451 I915_WRITE(PLANE_SURF(pipe, plane),
3452 I915_READ(PLANE_SURF(pipe, plane)));
3453 }
3454 I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
3455 }
3456
3457 static bool
3458 skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
3459 const struct skl_ddb_allocation *new,
3460 enum i915_pipe pipe)
3461 {
3462 uint16_t old_size, new_size;
3463
3464 old_size = skl_ddb_entry_size(&old->pipe[pipe]);
3465 new_size = skl_ddb_entry_size(&new->pipe[pipe]);
3466
3467 return old_size != new_size &&
3468 new->pipe[pipe].start >= old->pipe[pipe].start &&
3469 new->pipe[pipe].end <= old->pipe[pipe].end;
3470 }
3471
3472 static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
3473 struct skl_wm_values *new_values)
3474 {
3475 struct drm_device *dev = dev_priv->dev;
3476 struct skl_ddb_allocation *cur_ddb, *new_ddb;
3477 bool reallocated[I915_MAX_PIPES] = {};
3478 struct intel_crtc *crtc;
3479 enum i915_pipe pipe;
3480
3481 new_ddb = &new_values->ddb;
3482 cur_ddb = &dev_priv->wm.skl_hw.ddb;
3483
3484 /*
3485 * First pass: flush the pipes with the new allocation contained into
3486 * the old space.
3487 *
3488 * We'll wait for the vblank on those pipes to ensure we can safely
3489 * re-allocate the freed space without this pipe fetching from it.
3490 */
3491 for_each_intel_crtc(dev, crtc) {
3492 if (!crtc->active)
3493 continue;
3494
3495 pipe = crtc->pipe;
3496
3497 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
3498 continue;
3499
3500 skl_wm_flush_pipe(dev_priv, pipe, 1);
3501 intel_wait_for_vblank(dev, pipe);
3502
3503 reallocated[pipe] = true;
3504 }
3505
3506
3507 /*
3508 * Second pass: flush the pipes that are having their allocation
3509 * reduced, but overlapping with a previous allocation.
3510 *
3511 * Here as well we need to wait for the vblank to make sure the freed
3512 * space is not used anymore.
3513 */
3514 for_each_intel_crtc(dev, crtc) {
3515 if (!crtc->active)
3516 continue;
3517
3518 pipe = crtc->pipe;
3519
3520 if (reallocated[pipe])
3521 continue;
3522
3523 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
3524 skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
3525 skl_wm_flush_pipe(dev_priv, pipe, 2);
3526 intel_wait_for_vblank(dev, pipe);
3527 reallocated[pipe] = true;
3528 }
3529 }
3530
3531 /*
3532 * Third pass: flush the pipes that got more space allocated.
3533 *
3534 * We don't need to actively wait for the update here, next vblank
3535 * will just get more DDB space with the correct WM values.
3536 */
3537 for_each_intel_crtc(dev, crtc) {
3538 if (!crtc->active)
3539 continue;
3540
3541 pipe = crtc->pipe;
3542
3543 /*
3544 * At this point, only the pipes more space than before are
3545 * left to re-allocate.
3546 */
3547 if (reallocated[pipe])
3548 continue;
3549
3550 skl_wm_flush_pipe(dev_priv, pipe, 3);
3551 }
3552 }
3553
3554 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
3555 struct skl_pipe_wm_parameters *params,
3556 struct intel_wm_config *config,
3557 struct skl_ddb_allocation *ddb, /* out */
3558 struct skl_pipe_wm *pipe_wm /* out */)
3559 {
3560 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3561
3562 skl_compute_wm_pipe_parameters(crtc, params);
3563 skl_allocate_pipe_ddb(crtc, config, params, ddb);
3564 skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
3565
3566 if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
3567 return false;
3568
3569 intel_crtc->wm.skl_active = *pipe_wm;
3570
3571 return true;
3572 }
3573
3574 static void skl_update_other_pipe_wm(struct drm_device *dev,
3575 struct drm_crtc *crtc,
3576 struct intel_wm_config *config,
3577 struct skl_wm_values *r)
3578 {
3579 struct intel_crtc *intel_crtc;
3580 struct intel_crtc *this_crtc = to_intel_crtc(crtc);
3581
3582 /*
3583 * If the WM update hasn't changed the allocation for this_crtc (the
3584 * crtc we are currently computing the new WM values for), other
3585 * enabled crtcs will keep the same allocation and we don't need to
3586 * recompute anything for them.
3587 */
3588 if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
3589 return;
3590
3591 /*
3592 * Otherwise, because of this_crtc being freshly enabled/disabled, the
3593 * other active pipes need new DDB allocation and WM values.
3594 */
3595 list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
3596 base.head) {
3597 struct skl_pipe_wm_parameters params = {};
3598 struct skl_pipe_wm pipe_wm = {};
3599 bool wm_changed;
3600
3601 if (this_crtc->pipe == intel_crtc->pipe)
3602 continue;
3603
3604 if (!intel_crtc->active)
3605 continue;
3606
3607 wm_changed = skl_update_pipe_wm(&intel_crtc->base,
3608 ¶ms, config,
3609 &r->ddb, &pipe_wm);
3610
3611 /*
3612 * If we end up re-computing the other pipe WM values, it's
3613 * because it was really needed, so we expect the WM values to
3614 * be different.
3615 */
3616 WARN_ON(!wm_changed);
3617
3618 skl_compute_wm_results(dev, ¶ms, &pipe_wm, r, intel_crtc);
3619 r->dirty[intel_crtc->pipe] = true;
3620 }
3621 }
3622
3623 static void skl_clear_wm(struct skl_wm_values *watermarks, enum i915_pipe pipe)
3624 {
3625 watermarks->wm_linetime[pipe] = 0;
3626 memset(watermarks->plane[pipe], 0,
3627 sizeof(uint32_t) * 8 * I915_MAX_PLANES);
3628 memset(watermarks->plane_trans[pipe],
3629 0, sizeof(uint32_t) * I915_MAX_PLANES);
3630 watermarks->plane_trans[pipe][PLANE_CURSOR] = 0;
3631
3632 /* Clear ddb entries for pipe */
3633 memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry));
3634 memset(&watermarks->ddb.plane[pipe], 0,
3635 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3636 memset(&watermarks->ddb.y_plane[pipe], 0,
3637 sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
3638 memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0,
3639 sizeof(struct skl_ddb_entry));
3640
3641 }
3642
3643 static void skl_update_wm(struct drm_crtc *crtc)
3644 {
3645 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3646 struct drm_device *dev = crtc->dev;
3647 struct drm_i915_private *dev_priv = dev->dev_private;
3648 struct skl_pipe_wm_parameters params = {};
3649 struct skl_wm_values *results = &dev_priv->wm.skl_results;
3650 struct skl_pipe_wm pipe_wm = {};
3651 struct intel_wm_config config = {};
3652
3653
3654 /* Clear all dirty flags */
3655 memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES);
3656
3657 skl_clear_wm(results, intel_crtc->pipe);
3658
3659 skl_compute_wm_global_parameters(dev, &config);
3660
3661 if (!skl_update_pipe_wm(crtc, ¶ms, &config,
3662 &results->ddb, &pipe_wm))
3663 return;
3664
3665 skl_compute_wm_results(dev, ¶ms, &pipe_wm, results, intel_crtc);
3666 results->dirty[intel_crtc->pipe] = true;
3667
3668 skl_update_other_pipe_wm(dev, crtc, &config, results);
3669 skl_write_wm_values(dev_priv, results);
3670 skl_flush_wm_values(dev_priv, results);
3671
3672 /* store the new configuration */
3673 dev_priv->wm.skl_hw = *results;
3674 }
3675
3676 static void
3677 skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
3678 uint32_t sprite_width, uint32_t sprite_height,
3679 int pixel_size, bool enabled, bool scaled)
3680 {
3681 struct intel_plane *intel_plane = to_intel_plane(plane);
3682 struct drm_framebuffer *fb = plane->state->fb;
3683
3684 intel_plane->wm.enabled = enabled;
3685 intel_plane->wm.scaled = scaled;
3686 intel_plane->wm.horiz_pixels = sprite_width;
3687 intel_plane->wm.vert_pixels = sprite_height;
3688 intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
3689
3690 /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
3691 intel_plane->wm.bytes_per_pixel =
3692 (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3693 drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
3694 intel_plane->wm.y_bytes_per_pixel =
3695 (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
3696 drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
3697
3698 /*
3699 * Framebuffer can be NULL on plane disable, but it does not
3700 * matter for watermarks if we assume no tiling in that case.
3701 */
3702 if (fb)
3703 intel_plane->wm.tiling = fb->modifier[0];
3704 intel_plane->wm.rotation = plane->state->rotation;
3705
3706 skl_update_wm(crtc);
3707 }
3708
3709 static void ilk_update_wm(struct drm_crtc *crtc)
3710 {
3711 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3712 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
3713 struct drm_device *dev = crtc->dev;
3714 struct drm_i915_private *dev_priv = dev->dev_private;
3715 struct ilk_wm_maximums max;
3716 static const struct ilk_wm_values zero_values;
3717 struct ilk_wm_values results = zero_values;
3718 enum intel_ddb_partitioning partitioning;
3719 static const struct intel_pipe_wm zero_wm;
3720 struct intel_pipe_wm pipe_wm = zero_wm;
3721 struct intel_pipe_wm lp_wm_1_2 = zero_wm, lp_wm_5_6 = zero_wm,
3722 *best_lp_wm;
3723 static const struct intel_wm_config zero_config;
3724 struct intel_wm_config config = zero_config;
3725
3726 WARN_ON(cstate->base.active != intel_crtc->active);
3727
3728 intel_compute_pipe_wm(cstate, &pipe_wm);
3729
3730 if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
3731 return;
3732
3733 intel_crtc->wm.active = pipe_wm;
3734
3735 ilk_compute_wm_config(dev, &config);
3736
3737 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
3738 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
3739
3740 /* 5/6 split only in single pipe config on IVB+ */
3741 if (INTEL_INFO(dev)->gen >= 7 &&
3742 config.num_pipes_active == 1 && config.sprites_enabled) {
3743 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
3744 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
3745
3746 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
3747 } else {
3748 best_lp_wm = &lp_wm_1_2;
3749 }
3750
3751 partitioning = (best_lp_wm == &lp_wm_1_2) ?
3752 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
3753
3754 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
3755
3756 ilk_write_wm_values(dev_priv, &results);
3757 }
3758
3759 static void
3760 ilk_update_sprite_wm(struct drm_plane *plane,
3761 struct drm_crtc *crtc,
3762 uint32_t sprite_width, uint32_t sprite_height,
3763 int pixel_size, bool enabled, bool scaled)
3764 {
3765 struct drm_device *dev = plane->dev;
3766 struct intel_plane *intel_plane = to_intel_plane(plane);
3767
3768 /*
3769 * IVB workaround: must disable low power watermarks for at least
3770 * one frame before enabling scaling. LP watermarks can be re-enabled
3771 * when scaling is disabled.
3772 *
3773 * WaCxSRDisabledForSpriteScaling:ivb
3774 */
3775 if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
3776 intel_wait_for_vblank(dev, intel_plane->pipe);
3777
3778 ilk_update_wm(crtc);
3779 }
3780
3781 static void skl_pipe_wm_active_state(uint32_t val,
3782 struct skl_pipe_wm *active,
3783 bool is_transwm,
3784 bool is_cursor,
3785 int i,
3786 int level)
3787 {
3788 bool is_enabled = (val & PLANE_WM_EN) != 0;
3789
3790 if (!is_transwm) {
3791 if (!is_cursor) {
3792 active->wm[level].plane_en[i] = is_enabled;
3793 active->wm[level].plane_res_b[i] =
3794 val & PLANE_WM_BLOCKS_MASK;
3795 active->wm[level].plane_res_l[i] =
3796 (val >> PLANE_WM_LINES_SHIFT) &
3797 PLANE_WM_LINES_MASK;
3798 } else {
3799 active->wm[level].plane_en[PLANE_CURSOR] = is_enabled;
3800 active->wm[level].plane_res_b[PLANE_CURSOR] =
3801 val & PLANE_WM_BLOCKS_MASK;
3802 active->wm[level].plane_res_l[PLANE_CURSOR] =
3803 (val >> PLANE_WM_LINES_SHIFT) &
3804 PLANE_WM_LINES_MASK;
3805 }
3806 } else {
3807 if (!is_cursor) {
3808 active->trans_wm.plane_en[i] = is_enabled;
3809 active->trans_wm.plane_res_b[i] =
3810 val & PLANE_WM_BLOCKS_MASK;
3811 active->trans_wm.plane_res_l[i] =
3812 (val >> PLANE_WM_LINES_SHIFT) &
3813 PLANE_WM_LINES_MASK;
3814 } else {
3815 active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled;
3816 active->trans_wm.plane_res_b[PLANE_CURSOR] =
3817 val & PLANE_WM_BLOCKS_MASK;
3818 active->trans_wm.plane_res_l[PLANE_CURSOR] =
3819 (val >> PLANE_WM_LINES_SHIFT) &
3820 PLANE_WM_LINES_MASK;
3821 }
3822 }
3823 }
3824
3825 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3826 {
3827 struct drm_device *dev = crtc->dev;
3828 struct drm_i915_private *dev_priv = dev->dev_private;
3829 struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
3830 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3831 struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
3832 enum i915_pipe pipe = intel_crtc->pipe;
3833 int level, i, max_level;
3834 uint32_t temp;
3835
3836 max_level = ilk_wm_max_level(dev);
3837
3838 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3839
3840 for (level = 0; level <= max_level; level++) {
3841 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3842 hw->plane[pipe][i][level] =
3843 I915_READ(PLANE_WM(pipe, i, level));
3844 hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level));
3845 }
3846
3847 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3848 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
3849 hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe));
3850
3851 if (!intel_crtc->active)
3852 return;
3853
3854 hw->dirty[pipe] = true;
3855
3856 active->linetime = hw->wm_linetime[pipe];
3857
3858 for (level = 0; level <= max_level; level++) {
3859 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3860 temp = hw->plane[pipe][i][level];
3861 skl_pipe_wm_active_state(temp, active, false,
3862 false, i, level);
3863 }
3864 temp = hw->plane[pipe][PLANE_CURSOR][level];
3865 skl_pipe_wm_active_state(temp, active, false, true, i, level);
3866 }
3867
3868 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3869 temp = hw->plane_trans[pipe][i];
3870 skl_pipe_wm_active_state(temp, active, true, false, i, 0);
3871 }
3872
3873 temp = hw->plane_trans[pipe][PLANE_CURSOR];
3874 skl_pipe_wm_active_state(temp, active, true, true, i, 0);
3875 }
3876
3877 void skl_wm_get_hw_state(struct drm_device *dev)
3878 {
3879 struct drm_i915_private *dev_priv = dev->dev_private;
3880 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
3881 struct drm_crtc *crtc;
3882
3883 skl_ddb_get_hw_state(dev_priv, ddb);
3884 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3885 skl_pipe_wm_get_hw_state(crtc);
3886 }
3887
3888 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3889 {
3890 struct drm_device *dev = crtc->dev;
3891 struct drm_i915_private *dev_priv = dev->dev_private;
3892 struct ilk_wm_values *hw = &dev_priv->wm.hw;
3893 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3894 struct intel_pipe_wm *active = &intel_crtc->wm.active;
3895 enum i915_pipe pipe = intel_crtc->pipe;
3896 static const unsigned int wm0_pipe_reg[] = {
3897 [PIPE_A] = WM0_PIPEA_ILK,
3898 [PIPE_B] = WM0_PIPEB_ILK,
3899 [PIPE_C] = WM0_PIPEC_IVB,
3900 };
3901
3902 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3903 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3904 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3905
3906 memset(active, 0, sizeof(*active));
3907
3908 active->pipe_enabled = intel_crtc->active;
3909
3910 if (active->pipe_enabled) {
3911 u32 tmp = hw->wm_pipe[pipe];
3912
3913 /*
3914 * For active pipes LP0 watermark is marked as
3915 * enabled, and LP1+ watermaks as disabled since
3916 * we can't really reverse compute them in case
3917 * multiple pipes are active.
3918 */
3919 active->wm[0].enable = true;
3920 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3921 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3922 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3923 active->linetime = hw->wm_linetime[pipe];
3924 } else {
3925 int level, max_level = ilk_wm_max_level(dev);
3926
3927 /*
3928 * For inactive pipes, all watermark levels
3929 * should be marked as enabled but zeroed,
3930 * which is what we'd compute them to.
3931 */
3932 for (level = 0; level <= max_level; level++)
3933 active->wm[level].enable = true;
3934 }
3935 }
3936
3937 #define _FW_WM(value, plane) \
3938 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
3939 #define _FW_WM_VLV(value, plane) \
3940 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
3941
3942 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
3943 struct vlv_wm_values *wm)
3944 {
3945 enum i915_pipe pipe;
3946 uint32_t tmp;
3947
3948 for_each_pipe(dev_priv, pipe) {
3949 tmp = I915_READ(VLV_DDL(pipe));
3950
3951 wm->ddl[pipe].primary =
3952 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3953 wm->ddl[pipe].cursor =
3954 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3955 wm->ddl[pipe].sprite[0] =
3956 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3957 wm->ddl[pipe].sprite[1] =
3958 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
3959 }
3960
3961 tmp = I915_READ(DSPFW1);
3962 wm->sr.plane = _FW_WM(tmp, SR);
3963 wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB);
3964 wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB);
3965 wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA);
3966
3967 tmp = I915_READ(DSPFW2);
3968 wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB);
3969 wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA);
3970 wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA);
3971
3972 tmp = I915_READ(DSPFW3);
3973 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
3974
3975 if (IS_CHERRYVIEW(dev_priv)) {
3976 tmp = I915_READ(DSPFW7_CHV);
3977 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
3978 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
3979
3980 tmp = I915_READ(DSPFW8_CHV);
3981 wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF);
3982 wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE);
3983
3984 tmp = I915_READ(DSPFW9_CHV);
3985 wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC);
3986 wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC);
3987
3988 tmp = I915_READ(DSPHOWM);
3989 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
3990 wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
3991 wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
3992 wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8;
3993 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
3994 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
3995 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
3996 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
3997 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
3998 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
3999 } else {
4000 tmp = I915_READ(DSPFW7);
4001 wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
4002 wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
4003
4004 tmp = I915_READ(DSPHOWM);
4005 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
4006 wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
4007 wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
4008 wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
4009 wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
4010 wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
4011 wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
4012 }
4013 }
4014
4015 #undef _FW_WM
4016 #undef _FW_WM_VLV
4017
4018 void vlv_wm_get_hw_state(struct drm_device *dev)
4019 {
4020 struct drm_i915_private *dev_priv = to_i915(dev);
4021 struct vlv_wm_values *wm = &dev_priv->wm.vlv;
4022 struct intel_plane *plane;
4023 enum i915_pipe pipe;
4024 u32 val;
4025
4026 vlv_read_wm_values(dev_priv, wm);
4027
4028 for_each_intel_plane(dev, plane) {
4029 switch (plane->base.type) {
4030 int sprite;
4031 case DRM_PLANE_TYPE_CURSOR:
4032 plane->wm.fifo_size = 63;
4033 break;
4034 case DRM_PLANE_TYPE_PRIMARY:
4035 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0);
4036 break;
4037 case DRM_PLANE_TYPE_OVERLAY:
4038 sprite = plane->plane;
4039 plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1);
4040 break;
4041 }
4042 }
4043
4044 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
4045 wm->level = VLV_WM_LEVEL_PM2;
4046
4047 if (IS_CHERRYVIEW(dev_priv)) {
4048 mutex_lock(&dev_priv->rps.hw_lock);
4049
4050 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
4051 if (val & DSP_MAXFIFO_PM5_ENABLE)
4052 wm->level = VLV_WM_LEVEL_PM5;
4053
4054 /*
4055 * If DDR DVFS is disabled in the BIOS, Punit
4056 * will never ack the request. So if that happens
4057 * assume we don't have to enable/disable DDR DVFS
4058 * dynamically. To test that just set the REQ_ACK
4059 * bit to poke the Punit, but don't change the
4060 * HIGH/LOW bits so that we don't actually change
4061 * the current state.
4062 */
4063 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4064 val |= FORCE_DDR_FREQ_REQ_ACK;
4065 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
4066
4067 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
4068 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
4069 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
4070 "assuming DDR DVFS is disabled\n");
4071 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
4072 } else {
4073 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
4074 if ((val & FORCE_DDR_HIGH_FREQ) == 0)
4075 wm->level = VLV_WM_LEVEL_DDR_DVFS;
4076 }
4077
4078 mutex_unlock(&dev_priv->rps.hw_lock);
4079 }
4080
4081 for_each_pipe(dev_priv, pipe)
4082 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
4083 pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor,
4084 wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]);
4085
4086 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
4087 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
4088 }
4089
4090 void ilk_wm_get_hw_state(struct drm_device *dev)
4091 {
4092 struct drm_i915_private *dev_priv = dev->dev_private;
4093 struct ilk_wm_values *hw = &dev_priv->wm.hw;
4094 struct drm_crtc *crtc;
4095
4096 for_each_crtc(dev, crtc)
4097 ilk_pipe_wm_get_hw_state(crtc);
4098
4099 hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
4100 hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
4101 hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
4102
4103 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
4104 if (INTEL_INFO(dev)->gen >= 7) {
4105 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
4106 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
4107 }
4108
4109 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4110 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
4111 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4112 else if (IS_IVYBRIDGE(dev))
4113 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
4114 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
4115
4116 hw->enable_fbc_wm =
4117 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
4118 }
4119
4120 /**
4121 * intel_update_watermarks - update FIFO watermark values based on current modes
4122 *
4123 * Calculate watermark values for the various WM regs based on current mode
4124 * and plane configuration.
4125 *
4126 * There are several cases to deal with here:
4127 * - normal (i.e. non-self-refresh)
4128 * - self-refresh (SR) mode
4129 * - lines are large relative to FIFO size (buffer can hold up to 2)
4130 * - lines are small relative to FIFO size (buffer can hold more than 2
4131 * lines), so need to account for TLB latency
4132 *
4133 * The normal calculation is:
4134 * watermark = dotclock * bytes per pixel * latency
4135 * where latency is platform & configuration dependent (we assume pessimal
4136 * values here).
4137 *
4138 * The SR calculation is:
4139 * watermark = (trunc(latency/line time)+1) * surface width *
4140 * bytes per pixel
4141 * where
4142 * line time = htotal / dotclock
4143 * surface width = hdisplay for normal plane and 64 for cursor
4144 * and latency is assumed to be high, as above.
4145 *
4146 * The final value programmed to the register should always be rounded up,
4147 * and include an extra 2 entries to account for clock crossings.
4148 *
4149 * We don't use the sprite, so we can ignore that. And on Crestline we have
4150 * to set the non-SR watermarks to 8.
4151 */
4152 void intel_update_watermarks(struct drm_crtc *crtc)
4153 {
4154 struct drm_i915_private *dev_priv = crtc->dev->dev_private;
4155
4156 if (dev_priv->display.update_wm)
4157 dev_priv->display.update_wm(crtc);
4158 }
4159
4160 void intel_update_sprite_watermarks(struct drm_plane *plane,
4161 struct drm_crtc *crtc,
4162 uint32_t sprite_width,
4163 uint32_t sprite_height,
4164 int pixel_size,
4165 bool enabled, bool scaled)
4166 {
4167 struct drm_i915_private *dev_priv = plane->dev->dev_private;
4168
4169 if (dev_priv->display.update_sprite_wm)
4170 dev_priv->display.update_sprite_wm(plane, crtc,
4171 sprite_width, sprite_height,
4172 pixel_size, enabled, scaled);
4173 }
4174
4175 /**
4176 * Lock protecting IPS related data structures
4177 */
4178 #ifdef __NetBSD__
4179 spinlock_t mchdev_lock;
4180 #else
4181 DEFINE_SPINLOCK(mchdev_lock);
4182 #endif
4183
4184 /* Global for IPS driver to get at the current i915 device. Protected by
4185 * mchdev_lock. */
4186 static struct drm_i915_private *i915_mch_dev;
4187
4188 bool ironlake_set_drps(struct drm_device *dev, u8 val)
4189 {
4190 struct drm_i915_private *dev_priv = dev->dev_private;
4191 u16 rgvswctl;
4192
4193 assert_spin_locked(&mchdev_lock);
4194
4195 rgvswctl = I915_READ16(MEMSWCTL);
4196 if (rgvswctl & MEMCTL_CMD_STS) {
4197 DRM_DEBUG("gpu busy, RCS change rejected\n");
4198 return false; /* still busy with another command */
4199 }
4200
4201 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
4202 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
4203 I915_WRITE16(MEMSWCTL, rgvswctl);
4204 POSTING_READ16(MEMSWCTL);
4205
4206 rgvswctl |= MEMCTL_CMD_STS;
4207 I915_WRITE16(MEMSWCTL, rgvswctl);
4208
4209 return true;
4210 }
4211
4212 static void ironlake_enable_drps(struct drm_device *dev)
4213 {
4214 struct drm_i915_private *dev_priv = dev->dev_private;
4215 u32 rgvmodectl = I915_READ(MEMMODECTL);
4216 u8 fmax, fmin, fstart, vstart;
4217
4218 spin_lock_irq(&mchdev_lock);
4219
4220 /* Enable temp reporting */
4221 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
4222 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
4223
4224 /* 100ms RC evaluation intervals */
4225 I915_WRITE(RCUPEI, 100000);
4226 I915_WRITE(RCDNEI, 100000);
4227
4228 /* Set max/min thresholds to 90ms and 80ms respectively */
4229 I915_WRITE(RCBMAXAVG, 90000);
4230 I915_WRITE(RCBMINAVG, 80000);
4231
4232 I915_WRITE(MEMIHYST, 1);
4233
4234 /* Set up min, max, and cur for interrupt handling */
4235 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
4236 fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
4237 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
4238 MEMMODE_FSTART_SHIFT;
4239
4240 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
4241 PXVFREQ_PX_SHIFT;
4242
4243 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
4244 dev_priv->ips.fstart = fstart;
4245
4246 dev_priv->ips.max_delay = fstart;
4247 dev_priv->ips.min_delay = fmin;
4248 dev_priv->ips.cur_delay = fstart;
4249
4250 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
4251 fmax, fmin, fstart);
4252
4253 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
4254
4255 /*
4256 * Interrupts will be enabled in ironlake_irq_postinstall
4257 */
4258
4259 I915_WRITE(VIDSTART, vstart);
4260 POSTING_READ(VIDSTART);
4261
4262 rgvmodectl |= MEMMODE_SWMODE_EN;
4263 I915_WRITE(MEMMODECTL, rgvmodectl);
4264
4265 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
4266 DRM_ERROR("stuck trying to change perf mode\n");
4267 mdelay(1);
4268
4269 ironlake_set_drps(dev, fstart);
4270
4271 dev_priv->ips.last_count1 = I915_READ(DMIEC) +
4272 I915_READ(DDREC) + I915_READ(CSIEC);
4273 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
4274 dev_priv->ips.last_count2 = I915_READ(GFXEC);
4275 dev_priv->ips.last_time2 = ktime_get_raw_ns();
4276
4277 spin_unlock_irq(&mchdev_lock);
4278 }
4279
4280 static void ironlake_disable_drps(struct drm_device *dev)
4281 {
4282 struct drm_i915_private *dev_priv = dev->dev_private;
4283 u16 rgvswctl;
4284
4285 spin_lock_irq(&mchdev_lock);
4286
4287 rgvswctl = I915_READ16(MEMSWCTL);
4288
4289 /* Ack interrupts, disable EFC interrupt */
4290 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
4291 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
4292 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
4293 I915_WRITE(DEIIR, DE_PCU_EVENT);
4294 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
4295
4296 /* Go back to the starting frequency */
4297 ironlake_set_drps(dev, dev_priv->ips.fstart);
4298 mdelay(1);
4299 rgvswctl |= MEMCTL_CMD_STS;
4300 I915_WRITE(MEMSWCTL, rgvswctl);
4301 mdelay(1);
4302
4303 spin_unlock_irq(&mchdev_lock);
4304 }
4305
4306 /* There's a funny hw issue where the hw returns all 0 when reading from
4307 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
4308 * ourselves, instead of doing a rmw cycle (which might result in us clearing
4309 * all limits and the gpu stuck at whatever frequency it is at atm).
4310 */
4311 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
4312 {
4313 u32 limits;
4314
4315 /* Only set the down limit when we've reached the lowest level to avoid
4316 * getting more interrupts, otherwise leave this clear. This prevents a
4317 * race in the hw when coming out of rc6: There's a tiny window where
4318 * the hw runs at the minimal clock before selecting the desired
4319 * frequency, if the down threshold expires in that window we will not
4320 * receive a down interrupt. */
4321 if (IS_GEN9(dev_priv->dev)) {
4322 limits = (dev_priv->rps.max_freq_softlimit) << 23;
4323 if (val <= dev_priv->rps.min_freq_softlimit)
4324 limits |= (dev_priv->rps.min_freq_softlimit) << 14;
4325 } else {
4326 limits = dev_priv->rps.max_freq_softlimit << 24;
4327 if (val <= dev_priv->rps.min_freq_softlimit)
4328 limits |= dev_priv->rps.min_freq_softlimit << 16;
4329 }
4330
4331 return limits;
4332 }
4333
4334 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
4335 {
4336 int new_power;
4337 u32 threshold_up = 0, threshold_down = 0; /* in % */
4338 u32 ei_up = 0, ei_down = 0;
4339
4340 new_power = dev_priv->rps.power;
4341 switch (dev_priv->rps.power) {
4342 case LOW_POWER:
4343 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
4344 new_power = BETWEEN;
4345 break;
4346
4347 case BETWEEN:
4348 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
4349 new_power = LOW_POWER;
4350 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
4351 new_power = HIGH_POWER;
4352 break;
4353
4354 case HIGH_POWER:
4355 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
4356 new_power = BETWEEN;
4357 break;
4358 }
4359 /* Max/min bins are special */
4360 if (val <= dev_priv->rps.min_freq_softlimit)
4361 new_power = LOW_POWER;
4362 if (val >= dev_priv->rps.max_freq_softlimit)
4363 new_power = HIGH_POWER;
4364 if (new_power == dev_priv->rps.power)
4365 return;
4366
4367 /* Note the units here are not exactly 1us, but 1280ns. */
4368 switch (new_power) {
4369 case LOW_POWER:
4370 /* Upclock if more than 95% busy over 16ms */
4371 ei_up = 16000;
4372 threshold_up = 95;
4373
4374 /* Downclock if less than 85% busy over 32ms */
4375 ei_down = 32000;
4376 threshold_down = 85;
4377 break;
4378
4379 case BETWEEN:
4380 /* Upclock if more than 90% busy over 13ms */
4381 ei_up = 13000;
4382 threshold_up = 90;
4383
4384 /* Downclock if less than 75% busy over 32ms */
4385 ei_down = 32000;
4386 threshold_down = 75;
4387 break;
4388
4389 case HIGH_POWER:
4390 /* Upclock if more than 85% busy over 10ms */
4391 ei_up = 10000;
4392 threshold_up = 85;
4393
4394 /* Downclock if less than 60% busy over 32ms */
4395 ei_down = 32000;
4396 threshold_down = 60;
4397 break;
4398 }
4399
4400 /* When byt can survive without system hang with dynamic
4401 * sw freq adjustments, this restriction can be lifted.
4402 */
4403 if (IS_VALLEYVIEW(dev_priv))
4404 goto skip_hw_write;
4405
4406 I915_WRITE(GEN6_RP_UP_EI,
4407 GT_INTERVAL_FROM_US(dev_priv, ei_up));
4408 I915_WRITE(GEN6_RP_UP_THRESHOLD,
4409 GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100)));
4410
4411 I915_WRITE(GEN6_RP_DOWN_EI,
4412 GT_INTERVAL_FROM_US(dev_priv, ei_down));
4413 I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
4414 GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100)));
4415
4416 I915_WRITE(GEN6_RP_CONTROL,
4417 GEN6_RP_MEDIA_TURBO |
4418 GEN6_RP_MEDIA_HW_NORMAL_MODE |
4419 GEN6_RP_MEDIA_IS_GFX |
4420 GEN6_RP_ENABLE |
4421 GEN6_RP_UP_BUSY_AVG |
4422 GEN6_RP_DOWN_IDLE_AVG);
4423
4424 skip_hw_write:
4425 dev_priv->rps.power = new_power;
4426 dev_priv->rps.up_threshold = threshold_up;
4427 dev_priv->rps.down_threshold = threshold_down;
4428 dev_priv->rps.last_adj = 0;
4429 }
4430
4431 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
4432 {
4433 u32 mask = 0;
4434
4435 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
4436 if (val > dev_priv->rps.min_freq_softlimit)
4437 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
4438 if (val < dev_priv->rps.max_freq_softlimit)
4439 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
4440
4441 mask &= dev_priv->pm_rps_events;
4442
4443 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
4444 }
4445
4446 /* gen6_set_rps is called to update the frequency request, but should also be
4447 * called when the range (min_delay and max_delay) is modified so that we can
4448 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
4449 static void gen6_set_rps(struct drm_device *dev, u8 val)
4450 {
4451 struct drm_i915_private *dev_priv = dev->dev_private;
4452
4453 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4454 if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0))
4455 return;
4456
4457 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4458 WARN_ON(val > dev_priv->rps.max_freq);
4459 WARN_ON(val < dev_priv->rps.min_freq);
4460
4461 /* min/max delay may still have been modified so be sure to
4462 * write the limits value.
4463 */
4464 if (val != dev_priv->rps.cur_freq) {
4465 gen6_set_rps_thresholds(dev_priv, val);
4466
4467 if (IS_GEN9(dev))
4468 I915_WRITE(GEN6_RPNSWREQ,
4469 GEN9_FREQUENCY(val));
4470 else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4471 I915_WRITE(GEN6_RPNSWREQ,
4472 HSW_FREQUENCY(val));
4473 else
4474 I915_WRITE(GEN6_RPNSWREQ,
4475 GEN6_FREQUENCY(val) |
4476 GEN6_OFFSET(0) |
4477 GEN6_AGGRESSIVE_TURBO);
4478 }
4479
4480 /* Make sure we continue to get interrupts
4481 * until we hit the minimum or maximum frequencies.
4482 */
4483 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
4484 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4485
4486 POSTING_READ(GEN6_RPNSWREQ);
4487
4488 dev_priv->rps.cur_freq = val;
4489 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4490 }
4491
4492 static void valleyview_set_rps(struct drm_device *dev, u8 val)
4493 {
4494 struct drm_i915_private *dev_priv = dev->dev_private;
4495
4496 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4497 WARN_ON(val > dev_priv->rps.max_freq);
4498 WARN_ON(val < dev_priv->rps.min_freq);
4499
4500 if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
4501 "Odd GPU freq value\n"))
4502 val &= ~1;
4503
4504 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4505
4506 if (val != dev_priv->rps.cur_freq) {
4507 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
4508 if (!IS_CHERRYVIEW(dev_priv))
4509 gen6_set_rps_thresholds(dev_priv, val);
4510 }
4511
4512 dev_priv->rps.cur_freq = val;
4513 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
4514 }
4515
4516 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
4517 *
4518 * * If Gfx is Idle, then
4519 * 1. Forcewake Media well.
4520 * 2. Request idle freq.
4521 * 3. Release Forcewake of Media well.
4522 */
4523 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
4524 {
4525 u32 val = dev_priv->rps.idle_freq;
4526
4527 if (dev_priv->rps.cur_freq <= val)
4528 return;
4529
4530 /* Wake up the media well, as that takes a lot less
4531 * power than the Render well. */
4532 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
4533 valleyview_set_rps(dev_priv->dev, val);
4534 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
4535 }
4536
4537 void gen6_rps_busy(struct drm_i915_private *dev_priv)
4538 {
4539 mutex_lock(&dev_priv->rps.hw_lock);
4540 if (dev_priv->rps.enabled) {
4541 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
4542 gen6_rps_reset_ei(dev_priv);
4543 I915_WRITE(GEN6_PMINTRMSK,
4544 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
4545 }
4546 mutex_unlock(&dev_priv->rps.hw_lock);
4547 }
4548
4549 void gen6_rps_idle(struct drm_i915_private *dev_priv)
4550 {
4551 struct drm_device *dev = dev_priv->dev;
4552
4553 mutex_lock(&dev_priv->rps.hw_lock);
4554 if (dev_priv->rps.enabled) {
4555 if (IS_VALLEYVIEW(dev))
4556 vlv_set_rps_idle(dev_priv);
4557 else
4558 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4559 dev_priv->rps.last_adj = 0;
4560 I915_WRITE(GEN6_PMINTRMSK,
4561 gen6_sanitize_rps_pm_mask(dev_priv, ~0));
4562 }
4563 mutex_unlock(&dev_priv->rps.hw_lock);
4564
4565 spin_lock(&dev_priv->rps.client_lock);
4566 while (!list_empty(&dev_priv->rps.clients))
4567 list_del_init(dev_priv->rps.clients.next);
4568 spin_unlock(&dev_priv->rps.client_lock);
4569 }
4570
4571 void gen6_rps_boost(struct drm_i915_private *dev_priv,
4572 struct intel_rps_client *rps,
4573 unsigned long submitted)
4574 {
4575 /* This is intentionally racy! We peek at the state here, then
4576 * validate inside the RPS worker.
4577 */
4578 if (!(dev_priv->mm.busy &&
4579 dev_priv->rps.enabled &&
4580 dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
4581 return;
4582
4583 /* Force a RPS boost (and don't count it against the client) if
4584 * the GPU is severely congested.
4585 */
4586 if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
4587 rps = NULL;
4588
4589 spin_lock(&dev_priv->rps.client_lock);
4590 if (rps == NULL || list_empty(&rps->link)) {
4591 spin_lock_irq(&dev_priv->irq_lock);
4592 if (dev_priv->rps.interrupts_enabled) {
4593 dev_priv->rps.client_boost = true;
4594 queue_work(dev_priv->wq, &dev_priv->rps.work);
4595 }
4596 spin_unlock_irq(&dev_priv->irq_lock);
4597
4598 if (rps != NULL) {
4599 list_add(&rps->link, &dev_priv->rps.clients);
4600 rps->boosts++;
4601 } else
4602 dev_priv->rps.boosts++;
4603 }
4604 spin_unlock(&dev_priv->rps.client_lock);
4605 }
4606
4607 void intel_set_rps(struct drm_device *dev, u8 val)
4608 {
4609 if (IS_VALLEYVIEW(dev))
4610 valleyview_set_rps(dev, val);
4611 else
4612 gen6_set_rps(dev, val);
4613 }
4614
4615 static void gen9_disable_rps(struct drm_device *dev)
4616 {
4617 struct drm_i915_private *dev_priv = dev->dev_private;
4618
4619 I915_WRITE(GEN6_RC_CONTROL, 0);
4620 I915_WRITE(GEN9_PG_ENABLE, 0);
4621 }
4622
4623 static void gen6_disable_rps(struct drm_device *dev)
4624 {
4625 struct drm_i915_private *dev_priv = dev->dev_private;
4626
4627 I915_WRITE(GEN6_RC_CONTROL, 0);
4628 I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
4629 }
4630
4631 static void cherryview_disable_rps(struct drm_device *dev)
4632 {
4633 struct drm_i915_private *dev_priv = dev->dev_private;
4634
4635 I915_WRITE(GEN6_RC_CONTROL, 0);
4636 }
4637
4638 static void valleyview_disable_rps(struct drm_device *dev)
4639 {
4640 struct drm_i915_private *dev_priv = dev->dev_private;
4641
4642 /* we're doing forcewake before Disabling RC6,
4643 * This what the BIOS expects when going into suspend */
4644 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4645
4646 I915_WRITE(GEN6_RC_CONTROL, 0);
4647
4648 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4649 }
4650
4651 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
4652 {
4653 if (IS_VALLEYVIEW(dev)) {
4654 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
4655 mode = GEN6_RC_CTL_RC6_ENABLE;
4656 else
4657 mode = 0;
4658 }
4659 if (HAS_RC6p(dev))
4660 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
4661 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
4662 (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
4663 (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
4664
4665 else
4666 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
4667 (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
4668 }
4669
4670 static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
4671 {
4672 /* No RC6 before Ironlake and code is gone for ilk. */
4673 if (INTEL_INFO(dev)->gen < 6)
4674 return 0;
4675
4676 /* Respect the kernel parameter if it is set */
4677 if (enable_rc6 >= 0) {
4678 int mask;
4679
4680 if (HAS_RC6p(dev))
4681 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
4682 INTEL_RC6pp_ENABLE;
4683 else
4684 mask = INTEL_RC6_ENABLE;
4685
4686 if ((enable_rc6 & mask) != enable_rc6)
4687 DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
4688 enable_rc6 & mask, enable_rc6, mask);
4689
4690 return enable_rc6 & mask;
4691 }
4692
4693 if (IS_IVYBRIDGE(dev))
4694 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
4695
4696 return INTEL_RC6_ENABLE;
4697 }
4698
4699 int intel_enable_rc6(const struct drm_device *dev)
4700 {
4701 return i915.enable_rc6;
4702 }
4703
4704 static void gen6_init_rps_frequencies(struct drm_device *dev)
4705 {
4706 struct drm_i915_private *dev_priv = dev->dev_private;
4707 uint32_t rp_state_cap;
4708 u32 ddcc_status = 0;
4709 int ret;
4710
4711 /* All of these values are in units of 50MHz */
4712 dev_priv->rps.cur_freq = 0;
4713 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
4714 if (IS_BROXTON(dev)) {
4715 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
4716 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
4717 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
4718 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff;
4719 } else {
4720 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
4721 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff;
4722 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff;
4723 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
4724 }
4725
4726 /* hw_max = RP0 until we check for overclocking */
4727 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq;
4728
4729 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
4730 if (IS_HASWELL(dev) || IS_BROADWELL(dev) || IS_SKYLAKE(dev)) {
4731 ret = sandybridge_pcode_read(dev_priv,
4732 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
4733 &ddcc_status);
4734 if (0 == ret)
4735 dev_priv->rps.efficient_freq =
4736 clamp_t(u8,
4737 ((ddcc_status >> 8) & 0xff),
4738 dev_priv->rps.min_freq,
4739 dev_priv->rps.max_freq);
4740 }
4741
4742 if (IS_SKYLAKE(dev)) {
4743 /* Store the frequency values in 16.66 MHZ units, which is
4744 the natural hardware unit for SKL */
4745 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
4746 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
4747 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
4748 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
4749 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
4750 }
4751
4752 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
4753
4754 /* Preserve min/max settings in case of re-init */
4755 if (dev_priv->rps.max_freq_softlimit == 0)
4756 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4757
4758 if (dev_priv->rps.min_freq_softlimit == 0) {
4759 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4760 dev_priv->rps.min_freq_softlimit =
4761 max_t(int, dev_priv->rps.efficient_freq,
4762 intel_freq_opcode(dev_priv, 450));
4763 else
4764 dev_priv->rps.min_freq_softlimit =
4765 dev_priv->rps.min_freq;
4766 }
4767 }
4768
4769 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
4770 static void gen9_enable_rps(struct drm_device *dev)
4771 {
4772 struct drm_i915_private *dev_priv = dev->dev_private;
4773
4774 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4775
4776 gen6_init_rps_frequencies(dev);
4777
4778 /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
4779 if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) {
4780 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4781 return;
4782 }
4783
4784 /* Program defaults and thresholds for RPS*/
4785 I915_WRITE(GEN6_RC_VIDEO_FREQ,
4786 GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
4787
4788 /* 1 second timeout*/
4789 I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
4790 GT_INTERVAL_FROM_US(dev_priv, 1000000));
4791
4792 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
4793
4794 /* Leaning on the below call to gen6_set_rps to program/setup the
4795 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
4796 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
4797 dev_priv->rps.power = HIGH_POWER; /* force a reset */
4798 gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4799
4800 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4801 }
4802
4803 static void gen9_enable_rc6(struct drm_device *dev)
4804 {
4805 struct drm_i915_private *dev_priv = dev->dev_private;
4806 struct intel_engine_cs *ring;
4807 uint32_t rc6_mask = 0;
4808 int unused;
4809
4810 /* 1a: Software RC state - RC0 */
4811 I915_WRITE(GEN6_RC_STATE, 0);
4812
4813 /* 1b: Get forcewake during program sequence. Although the driver
4814 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4815 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4816
4817 /* 2a: Disable RC states. */
4818 I915_WRITE(GEN6_RC_CONTROL, 0);
4819
4820 /* 2b: Program RC6 thresholds.*/
4821
4822 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
4823 if (IS_SKYLAKE(dev))
4824 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
4825 else
4826 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
4827 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4828 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4829 for_each_ring(ring, dev_priv, unused)
4830 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4831
4832 if (HAS_GUC_UCODE(dev))
4833 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
4834
4835 I915_WRITE(GEN6_RC_SLEEP, 0);
4836
4837 /* 2c: Program Coarse Power Gating Policies. */
4838 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
4839 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
4840
4841 /* 3a: Enable RC6 */
4842 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4843 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4844 DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4845 "on" : "off");
4846 /* WaRsUseTimeoutMode */
4847 if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_D0) ||
4848 (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_A0)) {
4849 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */
4850 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4851 GEN7_RC_CTL_TO_MODE |
4852 rc6_mask);
4853 } else {
4854 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
4855 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4856 GEN6_RC_CTL_EI_MODE(1) |
4857 rc6_mask);
4858 }
4859
4860 /*
4861 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
4862 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
4863 */
4864 if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
4865 ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_F0)))
4866 I915_WRITE(GEN9_PG_ENABLE, 0);
4867 else
4868 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4869 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
4870
4871 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4872
4873 }
4874
4875 static void gen8_enable_rps(struct drm_device *dev)
4876 {
4877 struct drm_i915_private *dev_priv = dev->dev_private;
4878 struct intel_engine_cs *ring;
4879 uint32_t rc6_mask = 0;
4880 int unused;
4881
4882 /* 1a: Software RC state - RC0 */
4883 I915_WRITE(GEN6_RC_STATE, 0);
4884
4885 /* 1c & 1d: Get forcewake during program sequence. Although the driver
4886 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4887 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4888
4889 /* 2a: Disable RC states. */
4890 I915_WRITE(GEN6_RC_CONTROL, 0);
4891
4892 /* Initialize rps frequencies */
4893 gen6_init_rps_frequencies(dev);
4894
4895 /* 2b: Program RC6 thresholds.*/
4896 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4897 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4898 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4899 for_each_ring(ring, dev_priv, unused)
4900 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4901 I915_WRITE(GEN6_RC_SLEEP, 0);
4902 if (IS_BROADWELL(dev))
4903 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
4904 else
4905 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
4906
4907 /* 3: Enable RC6 */
4908 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4909 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4910 intel_print_rc6_info(dev, rc6_mask);
4911 if (IS_BROADWELL(dev))
4912 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4913 GEN7_RC_CTL_TO_MODE |
4914 rc6_mask);
4915 else
4916 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4917 GEN6_RC_CTL_EI_MODE(1) |
4918 rc6_mask);
4919
4920 /* 4 Program defaults and thresholds for RPS*/
4921 I915_WRITE(GEN6_RPNSWREQ,
4922 HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4923 I915_WRITE(GEN6_RC_VIDEO_FREQ,
4924 HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4925 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
4926 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
4927
4928 /* Docs recommend 900MHz, and 300 MHz respectively */
4929 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
4930 dev_priv->rps.max_freq_softlimit << 24 |
4931 dev_priv->rps.min_freq_softlimit << 16);
4932
4933 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
4934 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
4935 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
4936 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
4937
4938 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4939
4940 /* 5: Enable RPS */
4941 I915_WRITE(GEN6_RP_CONTROL,
4942 GEN6_RP_MEDIA_TURBO |
4943 GEN6_RP_MEDIA_HW_NORMAL_MODE |
4944 GEN6_RP_MEDIA_IS_GFX |
4945 GEN6_RP_ENABLE |
4946 GEN6_RP_UP_BUSY_AVG |
4947 GEN6_RP_DOWN_IDLE_AVG);
4948
4949 /* 6: Ring frequency + overclocking (our driver does this later */
4950
4951 dev_priv->rps.power = HIGH_POWER; /* force a reset */
4952 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4953
4954 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4955 }
4956
4957 static void gen6_enable_rps(struct drm_device *dev)
4958 {
4959 struct drm_i915_private *dev_priv = dev->dev_private;
4960 struct intel_engine_cs *ring;
4961 u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
4962 u32 gtfifodbg;
4963 int rc6_mode;
4964 int i, ret;
4965
4966 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4967
4968 /* Here begins a magic sequence of register writes to enable
4969 * auto-downclocking.
4970 *
4971 * Perhaps there might be some value in exposing these to
4972 * userspace...
4973 */
4974 I915_WRITE(GEN6_RC_STATE, 0);
4975
4976 /* Clear the DBG now so we don't confuse earlier errors */
4977 if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4978 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
4979 I915_WRITE(GTFIFODBG, gtfifodbg);
4980 }
4981
4982 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4983
4984 /* Initialize rps frequencies */
4985 gen6_init_rps_frequencies(dev);
4986
4987 /* disable the counters and set deterministic thresholds */
4988 I915_WRITE(GEN6_RC_CONTROL, 0);
4989
4990 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
4991 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
4992 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
4993 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4994 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4995
4996 for_each_ring(ring, dev_priv, i)
4997 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4998
4999 I915_WRITE(GEN6_RC_SLEEP, 0);
5000 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
5001 if (IS_IVYBRIDGE(dev))
5002 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
5003 else
5004 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
5005 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
5006 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
5007
5008 /* Check if we are enabling RC6 */
5009 rc6_mode = intel_enable_rc6(dev_priv->dev);
5010 if (rc6_mode & INTEL_RC6_ENABLE)
5011 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
5012
5013 /* We don't use those on Haswell */
5014 if (!IS_HASWELL(dev)) {
5015 if (rc6_mode & INTEL_RC6p_ENABLE)
5016 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
5017
5018 if (rc6_mode & INTEL_RC6pp_ENABLE)
5019 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
5020 }
5021
5022 intel_print_rc6_info(dev, rc6_mask);
5023
5024 I915_WRITE(GEN6_RC_CONTROL,
5025 rc6_mask |
5026 GEN6_RC_CTL_EI_MODE(1) |
5027 GEN6_RC_CTL_HW_ENABLE);
5028
5029 /* Power down if completely idle for over 50ms */
5030 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
5031 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5032
5033 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
5034 if (ret)
5035 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
5036
5037 ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
5038 if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
5039 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
5040 (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
5041 (pcu_mbox & 0xff) * 50);
5042 dev_priv->rps.max_freq = pcu_mbox & 0xff;
5043 }
5044
5045 dev_priv->rps.power = HIGH_POWER; /* force a reset */
5046 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
5047
5048 rc6vids = 0;
5049 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
5050 if (IS_GEN6(dev) && ret) {
5051 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
5052 } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
5053 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
5054 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
5055 rc6vids &= 0xffff00;
5056 rc6vids |= GEN6_ENCODE_RC6_VID(450);
5057 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
5058 if (ret)
5059 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
5060 }
5061
5062 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5063 }
5064
5065 static void __gen6_update_ring_freq(struct drm_device *dev)
5066 {
5067 struct drm_i915_private *dev_priv = dev->dev_private;
5068 int min_freq = 15;
5069 unsigned int gpu_freq;
5070 unsigned int max_ia_freq, min_ring_freq;
5071 unsigned int max_gpu_freq, min_gpu_freq;
5072 int scaling_factor = 180;
5073 #ifndef __NetBSD__
5074 struct cpufreq_policy *policy;
5075 #endif
5076
5077 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5078
5079 #ifdef __NetBSD__
5080 {
5081 extern uint64_t tsc_freq; /* x86 TSC frequency in Hz */
5082 max_ia_freq = (tsc_freq / 1000);
5083 }
5084 #else
5085 policy = cpufreq_cpu_get(0);
5086 if (policy) {
5087 max_ia_freq = policy->cpuinfo.max_freq;
5088 cpufreq_cpu_put(policy);
5089 } else {
5090 /*
5091 * Default to measured freq if none found, PCU will ensure we
5092 * don't go over
5093 */
5094 max_ia_freq = tsc_khz;
5095 }
5096 #endif
5097
5098 /* Convert from kHz to MHz */
5099 max_ia_freq /= 1000;
5100
5101 min_ring_freq = I915_READ(DCLK) & 0xf;
5102 /* convert DDR frequency from units of 266.6MHz to bandwidth */
5103 min_ring_freq = mult_frac(min_ring_freq, 8, 3);
5104
5105 if (IS_SKYLAKE(dev)) {
5106 /* Convert GT frequency to 50 HZ units */
5107 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
5108 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
5109 } else {
5110 min_gpu_freq = dev_priv->rps.min_freq;
5111 max_gpu_freq = dev_priv->rps.max_freq;
5112 }
5113
5114 /*
5115 * For each potential GPU frequency, load a ring frequency we'd like
5116 * to use for memory access. We do this by specifying the IA frequency
5117 * the PCU should use as a reference to determine the ring frequency.
5118 */
5119 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
5120 int diff = max_gpu_freq - gpu_freq;
5121 unsigned int ia_freq = 0, ring_freq = 0;
5122
5123 if (IS_SKYLAKE(dev)) {
5124 /*
5125 * ring_freq = 2 * GT. ring_freq is in 100MHz units
5126 * No floor required for ring frequency on SKL.
5127 */
5128 ring_freq = gpu_freq;
5129 } else if (INTEL_INFO(dev)->gen >= 8) {
5130 /* max(2 * GT, DDR). NB: GT is 50MHz units */
5131 ring_freq = max(min_ring_freq, gpu_freq);
5132 } else if (IS_HASWELL(dev)) {
5133 ring_freq = mult_frac(gpu_freq, 5, 4);
5134 ring_freq = max(min_ring_freq, ring_freq);
5135 /* leave ia_freq as the default, chosen by cpufreq */
5136 } else {
5137 /* On older processors, there is no separate ring
5138 * clock domain, so in order to boost the bandwidth
5139 * of the ring, we need to upclock the CPU (ia_freq).
5140 *
5141 * For GPU frequencies less than 750MHz,
5142 * just use the lowest ring freq.
5143 */
5144 if (gpu_freq < min_freq)
5145 ia_freq = 800;
5146 else
5147 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
5148 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
5149 }
5150
5151 sandybridge_pcode_write(dev_priv,
5152 GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
5153 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
5154 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
5155 gpu_freq);
5156 }
5157 }
5158
5159 void gen6_update_ring_freq(struct drm_device *dev)
5160 {
5161 struct drm_i915_private *dev_priv = dev->dev_private;
5162
5163 if (!HAS_CORE_RING_FREQ(dev))
5164 return;
5165
5166 mutex_lock(&dev_priv->rps.hw_lock);
5167 __gen6_update_ring_freq(dev);
5168 mutex_unlock(&dev_priv->rps.hw_lock);
5169 }
5170
5171 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
5172 {
5173 struct drm_device *dev = dev_priv->dev;
5174 u32 val, rp0;
5175
5176 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5177
5178 switch (INTEL_INFO(dev)->eu_total) {
5179 case 8:
5180 /* (2 * 4) config */
5181 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
5182 break;
5183 case 12:
5184 /* (2 * 6) config */
5185 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
5186 break;
5187 case 16:
5188 /* (2 * 8) config */
5189 default:
5190 /* Setting (2 * 8) Min RP0 for any other combination */
5191 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
5192 break;
5193 }
5194
5195 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
5196
5197 return rp0;
5198 }
5199
5200 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5201 {
5202 u32 val, rpe;
5203
5204 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
5205 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
5206
5207 return rpe;
5208 }
5209
5210 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
5211 {
5212 u32 val, rp1;
5213
5214 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
5215 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
5216
5217 return rp1;
5218 }
5219
5220 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
5221 {
5222 u32 val, rp1;
5223
5224 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5225
5226 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
5227
5228 return rp1;
5229 }
5230
5231 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
5232 {
5233 u32 val, rp0;
5234
5235 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
5236
5237 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
5238 /* Clamp to max */
5239 rp0 = min_t(u32, rp0, 0xea);
5240
5241 return rp0;
5242 }
5243
5244 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
5245 {
5246 u32 val, rpe;
5247
5248 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
5249 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
5250 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
5251 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
5252
5253 return rpe;
5254 }
5255
5256 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
5257 {
5258 return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
5259 }
5260
5261 /* Check that the pctx buffer wasn't move under us. */
5262 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
5263 {
5264 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5265
5266 WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
5267 dev_priv->vlv_pctx->stolen->start);
5268 }
5269
5270
5271 /* Check that the pcbr address is not empty. */
5272 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
5273 {
5274 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
5275
5276 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
5277 }
5278
5279 static void cherryview_setup_pctx(struct drm_device *dev)
5280 {
5281 struct drm_i915_private *dev_priv = dev->dev_private;
5282 unsigned long pctx_paddr, paddr;
5283 struct i915_gtt *gtt = &dev_priv->gtt;
5284 u32 pcbr;
5285 int pctx_size = 32*1024;
5286
5287 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5288
5289 pcbr = I915_READ(VLV_PCBR);
5290 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
5291 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5292 paddr = (dev_priv->mm.stolen_base +
5293 (gtt->stolen_size - pctx_size));
5294
5295 pctx_paddr = (paddr & (~4095));
5296 I915_WRITE(VLV_PCBR, pctx_paddr);
5297 }
5298
5299 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5300 }
5301
5302 static void valleyview_setup_pctx(struct drm_device *dev)
5303 {
5304 struct drm_i915_private *dev_priv = dev->dev_private;
5305 struct drm_i915_gem_object *pctx;
5306 unsigned long pctx_paddr;
5307 u32 pcbr;
5308 int pctx_size = 24*1024;
5309
5310 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
5311
5312 pcbr = I915_READ(VLV_PCBR);
5313 if (pcbr) {
5314 /* BIOS set it up already, grab the pre-alloc'd space */
5315 int pcbr_offset;
5316
5317 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
5318 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
5319 pcbr_offset,
5320 I915_GTT_OFFSET_NONE,
5321 pctx_size);
5322 goto out;
5323 }
5324
5325 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
5326
5327 /*
5328 * From the Gunit register HAS:
5329 * The Gfx driver is expected to program this register and ensure
5330 * proper allocation within Gfx stolen memory. For example, this
5331 * register should be programmed such than the PCBR range does not
5332 * overlap with other ranges, such as the frame buffer, protected
5333 * memory, or any other relevant ranges.
5334 */
5335 pctx = i915_gem_object_create_stolen(dev, pctx_size);
5336 if (!pctx) {
5337 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
5338 return;
5339 }
5340
5341 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
5342 I915_WRITE(VLV_PCBR, pctx_paddr);
5343
5344 out:
5345 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
5346 dev_priv->vlv_pctx = pctx;
5347 }
5348
5349 static void valleyview_cleanup_pctx(struct drm_device *dev)
5350 {
5351 struct drm_i915_private *dev_priv = dev->dev_private;
5352
5353 if (WARN_ON(!dev_priv->vlv_pctx))
5354 return;
5355
5356 drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
5357 dev_priv->vlv_pctx = NULL;
5358 }
5359
5360 static void valleyview_init_gt_powersave(struct drm_device *dev)
5361 {
5362 struct drm_i915_private *dev_priv = dev->dev_private;
5363 u32 val;
5364
5365 valleyview_setup_pctx(dev);
5366
5367 mutex_lock(&dev_priv->rps.hw_lock);
5368
5369 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5370 switch ((val >> 6) & 3) {
5371 case 0:
5372 case 1:
5373 dev_priv->mem_freq = 800;
5374 break;
5375 case 2:
5376 dev_priv->mem_freq = 1066;
5377 break;
5378 case 3:
5379 dev_priv->mem_freq = 1333;
5380 break;
5381 }
5382 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5383
5384 dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
5385 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5386 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5387 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5388 dev_priv->rps.max_freq);
5389
5390 dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
5391 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5392 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5393 dev_priv->rps.efficient_freq);
5394
5395 dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
5396 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
5397 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5398 dev_priv->rps.rp1_freq);
5399
5400 dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
5401 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5402 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5403 dev_priv->rps.min_freq);
5404
5405 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5406
5407 /* Preserve min/max settings in case of re-init */
5408 if (dev_priv->rps.max_freq_softlimit == 0)
5409 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5410
5411 if (dev_priv->rps.min_freq_softlimit == 0)
5412 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5413
5414 mutex_unlock(&dev_priv->rps.hw_lock);
5415 }
5416
5417 static void cherryview_init_gt_powersave(struct drm_device *dev)
5418 {
5419 struct drm_i915_private *dev_priv = dev->dev_private;
5420 u32 val;
5421
5422 cherryview_setup_pctx(dev);
5423
5424 mutex_lock(&dev_priv->rps.hw_lock);
5425
5426 mutex_lock(&dev_priv->sb_lock);
5427 val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
5428 mutex_unlock(&dev_priv->sb_lock);
5429
5430 switch ((val >> 2) & 0x7) {
5431 case 3:
5432 dev_priv->mem_freq = 2000;
5433 break;
5434 default:
5435 dev_priv->mem_freq = 1600;
5436 break;
5437 }
5438 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
5439
5440 dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
5441 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
5442 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
5443 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
5444 dev_priv->rps.max_freq);
5445
5446 dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
5447 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
5448 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5449 dev_priv->rps.efficient_freq);
5450
5451 dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
5452 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
5453 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
5454 dev_priv->rps.rp1_freq);
5455
5456 /* PUnit validated range is only [RPe, RP0] */
5457 dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
5458 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
5459 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
5460 dev_priv->rps.min_freq);
5461
5462 WARN_ONCE((dev_priv->rps.max_freq |
5463 dev_priv->rps.efficient_freq |
5464 dev_priv->rps.rp1_freq |
5465 dev_priv->rps.min_freq) & 1,
5466 "Odd GPU freq values\n");
5467
5468 dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
5469
5470 /* Preserve min/max settings in case of re-init */
5471 if (dev_priv->rps.max_freq_softlimit == 0)
5472 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
5473
5474 if (dev_priv->rps.min_freq_softlimit == 0)
5475 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
5476
5477 mutex_unlock(&dev_priv->rps.hw_lock);
5478 }
5479
5480 static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
5481 {
5482 valleyview_cleanup_pctx(dev);
5483 }
5484
5485 static void cherryview_enable_rps(struct drm_device *dev)
5486 {
5487 struct drm_i915_private *dev_priv = dev->dev_private;
5488 struct intel_engine_cs *ring;
5489 u32 gtfifodbg, val, rc6_mode = 0, pcbr;
5490 int i;
5491
5492 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5493
5494 gtfifodbg = I915_READ(GTFIFODBG);
5495 if (gtfifodbg) {
5496 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5497 gtfifodbg);
5498 I915_WRITE(GTFIFODBG, gtfifodbg);
5499 }
5500
5501 cherryview_check_pctx(dev_priv);
5502
5503 /* 1a & 1b: Get forcewake during program sequence. Although the driver
5504 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
5505 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5506
5507 /* Disable RC states. */
5508 I915_WRITE(GEN6_RC_CONTROL, 0);
5509
5510 /* 2a: Program RC6 thresholds.*/
5511 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
5512 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
5513 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
5514
5515 for_each_ring(ring, dev_priv, i)
5516 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5517 I915_WRITE(GEN6_RC_SLEEP, 0);
5518
5519 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
5520 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
5521
5522 /* allows RC6 residency counter to work */
5523 I915_WRITE(VLV_COUNTER_CONTROL,
5524 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
5525 VLV_MEDIA_RC6_COUNT_EN |
5526 VLV_RENDER_RC6_COUNT_EN));
5527
5528 /* For now we assume BIOS is allocating and populating the PCBR */
5529 pcbr = I915_READ(VLV_PCBR);
5530
5531 /* 3: Enable RC6 */
5532 if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
5533 (pcbr >> VLV_PCBR_ADDR_SHIFT))
5534 rc6_mode = GEN7_RC_CTL_TO_MODE;
5535
5536 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5537
5538 /* 4 Program defaults and thresholds for RPS*/
5539 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5540 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5541 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5542 I915_WRITE(GEN6_RP_UP_EI, 66000);
5543 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5544
5545 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5546
5547 /* 5: Enable RPS */
5548 I915_WRITE(GEN6_RP_CONTROL,
5549 GEN6_RP_MEDIA_HW_NORMAL_MODE |
5550 GEN6_RP_MEDIA_IS_GFX |
5551 GEN6_RP_ENABLE |
5552 GEN6_RP_UP_BUSY_AVG |
5553 GEN6_RP_DOWN_IDLE_AVG);
5554
5555 /* Setting Fixed Bias */
5556 val = VLV_OVERRIDE_EN |
5557 VLV_SOC_TDP_EN |
5558 CHV_BIAS_CPU_50_SOC_50;
5559 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5560
5561 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5562
5563 /* RPS code assumes GPLL is used */
5564 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5565
5566 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5567 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5568
5569 dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5570 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5571 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5572 dev_priv->rps.cur_freq);
5573
5574 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5575 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5576 dev_priv->rps.efficient_freq);
5577
5578 valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5579
5580 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5581 }
5582
5583 static void valleyview_enable_rps(struct drm_device *dev)
5584 {
5585 struct drm_i915_private *dev_priv = dev->dev_private;
5586 struct intel_engine_cs *ring;
5587 u32 gtfifodbg, val, rc6_mode = 0;
5588 int i;
5589
5590 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5591
5592 valleyview_check_pctx(dev_priv);
5593
5594 if ((gtfifodbg = I915_READ(GTFIFODBG))) {
5595 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
5596 gtfifodbg);
5597 I915_WRITE(GTFIFODBG, gtfifodbg);
5598 }
5599
5600 /* If VLV, Forcewake all wells, else re-direct to regular path */
5601 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5602
5603 /* Disable RC states. */
5604 I915_WRITE(GEN6_RC_CONTROL, 0);
5605
5606 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
5607 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
5608 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
5609 I915_WRITE(GEN6_RP_UP_EI, 66000);
5610 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
5611
5612 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
5613
5614 I915_WRITE(GEN6_RP_CONTROL,
5615 GEN6_RP_MEDIA_TURBO |
5616 GEN6_RP_MEDIA_HW_NORMAL_MODE |
5617 GEN6_RP_MEDIA_IS_GFX |
5618 GEN6_RP_ENABLE |
5619 GEN6_RP_UP_BUSY_AVG |
5620 GEN6_RP_DOWN_IDLE_CONT);
5621
5622 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
5623 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
5624 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
5625
5626 for_each_ring(ring, dev_priv, i)
5627 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
5628
5629 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
5630
5631 /* allows RC6 residency counter to work */
5632 I915_WRITE(VLV_COUNTER_CONTROL,
5633 _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
5634 VLV_RENDER_RC0_COUNT_EN |
5635 VLV_MEDIA_RC6_COUNT_EN |
5636 VLV_RENDER_RC6_COUNT_EN));
5637
5638 if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
5639 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
5640
5641 intel_print_rc6_info(dev, rc6_mode);
5642
5643 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
5644
5645 /* Setting Fixed Bias */
5646 val = VLV_OVERRIDE_EN |
5647 VLV_SOC_TDP_EN |
5648 VLV_BIAS_CPU_125_SOC_875;
5649 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
5650
5651 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
5652
5653 /* RPS code assumes GPLL is used */
5654 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
5655
5656 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
5657 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
5658
5659 dev_priv->rps.cur_freq = (val >> 8) & 0xff;
5660 DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
5661 intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
5662 dev_priv->rps.cur_freq);
5663
5664 DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
5665 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
5666 dev_priv->rps.efficient_freq);
5667
5668 valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
5669
5670 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5671 }
5672
5673 static unsigned long intel_pxfreq(u32 vidfreq)
5674 {
5675 unsigned long freq;
5676 int div = (vidfreq & 0x3f0000) >> 16;
5677 int post = (vidfreq & 0x3000) >> 12;
5678 int pre = (vidfreq & 0x7);
5679
5680 if (!pre)
5681 return 0;
5682
5683 freq = ((div * 133333) / ((1<<post) * pre));
5684
5685 return freq;
5686 }
5687
5688 static const struct cparams {
5689 u16 i;
5690 u16 t;
5691 u16 m;
5692 u16 c;
5693 } cparams[] = {
5694 { 1, 1333, 301, 28664 },
5695 { 1, 1066, 294, 24460 },
5696 { 1, 800, 294, 25192 },
5697 { 0, 1333, 276, 27605 },
5698 { 0, 1066, 276, 27605 },
5699 { 0, 800, 231, 23784 },
5700 };
5701
5702 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
5703 {
5704 u64 total_count, diff, ret;
5705 u32 count1, count2, count3, m = 0, c = 0;
5706 unsigned long now = jiffies_to_msecs(jiffies), diff1;
5707 int i;
5708
5709 assert_spin_locked(&mchdev_lock);
5710
5711 diff1 = now - dev_priv->ips.last_time1;
5712
5713 /* Prevent division-by-zero if we are asking too fast.
5714 * Also, we don't get interesting results if we are polling
5715 * faster than once in 10ms, so just return the saved value
5716 * in such cases.
5717 */
5718 if (diff1 <= 10)
5719 return dev_priv->ips.chipset_power;
5720
5721 count1 = I915_READ(DMIEC);
5722 count2 = I915_READ(DDREC);
5723 count3 = I915_READ(CSIEC);
5724
5725 total_count = count1 + count2 + count3;
5726
5727 /* FIXME: handle per-counter overflow */
5728 if (total_count < dev_priv->ips.last_count1) {
5729 diff = ~0UL - dev_priv->ips.last_count1;
5730 diff += total_count;
5731 } else {
5732 diff = total_count - dev_priv->ips.last_count1;
5733 }
5734
5735 for (i = 0; i < ARRAY_SIZE(cparams); i++) {
5736 if (cparams[i].i == dev_priv->ips.c_m &&
5737 cparams[i].t == dev_priv->ips.r_t) {
5738 m = cparams[i].m;
5739 c = cparams[i].c;
5740 break;
5741 }
5742 }
5743
5744 diff = div_u64(diff, diff1);
5745 ret = ((m * diff) + c);
5746 ret = div_u64(ret, 10);
5747
5748 dev_priv->ips.last_count1 = total_count;
5749 dev_priv->ips.last_time1 = now;
5750
5751 dev_priv->ips.chipset_power = ret;
5752
5753 return ret;
5754 }
5755
5756 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
5757 {
5758 struct drm_device *dev = dev_priv->dev;
5759 unsigned long val;
5760
5761 if (INTEL_INFO(dev)->gen != 5)
5762 return 0;
5763
5764 spin_lock_irq(&mchdev_lock);
5765
5766 val = __i915_chipset_val(dev_priv);
5767
5768 spin_unlock_irq(&mchdev_lock);
5769
5770 return val;
5771 }
5772
5773 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
5774 {
5775 unsigned long m, x, b;
5776 u32 tsfs;
5777
5778 tsfs = I915_READ(TSFS);
5779
5780 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
5781 x = I915_READ8(TR1);
5782
5783 b = tsfs & TSFS_INTR_MASK;
5784
5785 return ((m * x) / 127) - b;
5786 }
5787
5788 static int _pxvid_to_vd(u8 pxvid)
5789 {
5790 if (pxvid == 0)
5791 return 0;
5792
5793 if (pxvid >= 8 && pxvid < 31)
5794 pxvid = 31;
5795
5796 return (pxvid + 2) * 125;
5797 }
5798
5799 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
5800 {
5801 struct drm_device *dev = dev_priv->dev;
5802 const int vd = _pxvid_to_vd(pxvid);
5803 const int vm = vd - 1125;
5804
5805 if (INTEL_INFO(dev)->is_mobile)
5806 return vm > 0 ? vm : 0;
5807
5808 return vd;
5809 }
5810
5811 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
5812 {
5813 u64 now, diff, diffms;
5814 u32 count;
5815
5816 assert_spin_locked(&mchdev_lock);
5817
5818 now = ktime_get_raw_ns();
5819 diffms = now - dev_priv->ips.last_time2;
5820 do_div(diffms, NSEC_PER_MSEC);
5821
5822 /* Don't divide by 0 */
5823 if (!diffms)
5824 return;
5825
5826 count = I915_READ(GFXEC);
5827
5828 if (count < dev_priv->ips.last_count2) {
5829 diff = ~0UL - dev_priv->ips.last_count2;
5830 diff += count;
5831 } else {
5832 diff = count - dev_priv->ips.last_count2;
5833 }
5834
5835 dev_priv->ips.last_count2 = count;
5836 dev_priv->ips.last_time2 = now;
5837
5838 /* More magic constants... */
5839 diff = diff * 1181;
5840 diff = div_u64(diff, diffms * 10);
5841 dev_priv->ips.gfx_power = diff;
5842 }
5843
5844 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
5845 {
5846 struct drm_device *dev = dev_priv->dev;
5847
5848 if (INTEL_INFO(dev)->gen != 5)
5849 return;
5850
5851 spin_lock_irq(&mchdev_lock);
5852
5853 __i915_update_gfx_val(dev_priv);
5854
5855 spin_unlock_irq(&mchdev_lock);
5856 }
5857
5858 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
5859 {
5860 unsigned long t, corr, state1, corr2, state2;
5861 u32 pxvid, ext_v;
5862
5863 assert_spin_locked(&mchdev_lock);
5864
5865 pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq));
5866 pxvid = (pxvid >> 24) & 0x7f;
5867 ext_v = pvid_to_extvid(dev_priv, pxvid);
5868
5869 state1 = ext_v;
5870
5871 t = i915_mch_val(dev_priv);
5872
5873 /* Revel in the empirically derived constants */
5874
5875 /* Correction factor in 1/100000 units */
5876 if (t > 80)
5877 corr = ((t * 2349) + 135940);
5878 else if (t >= 50)
5879 corr = ((t * 964) + 29317);
5880 else /* < 50 */
5881 corr = ((t * 301) + 1004);
5882
5883 corr = corr * ((150142 * state1) / 10000 - 78642);
5884 corr /= 100000;
5885 corr2 = (corr * dev_priv->ips.corr);
5886
5887 state2 = (corr2 * state1) / 10000;
5888 state2 /= 100; /* convert to mW */
5889
5890 __i915_update_gfx_val(dev_priv);
5891
5892 return dev_priv->ips.gfx_power + state2;
5893 }
5894
5895 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
5896 {
5897 struct drm_device *dev = dev_priv->dev;
5898 unsigned long val;
5899
5900 if (INTEL_INFO(dev)->gen != 5)
5901 return 0;
5902
5903 spin_lock_irq(&mchdev_lock);
5904
5905 val = __i915_gfx_val(dev_priv);
5906
5907 spin_unlock_irq(&mchdev_lock);
5908
5909 return val;
5910 }
5911
5912 /**
5913 * i915_read_mch_val - return value for IPS use
5914 *
5915 * Calculate and return a value for the IPS driver to use when deciding whether
5916 * we have thermal and power headroom to increase CPU or GPU power budget.
5917 */
5918 unsigned long i915_read_mch_val(void)
5919 {
5920 struct drm_i915_private *dev_priv;
5921 unsigned long chipset_val, graphics_val, ret = 0;
5922
5923 spin_lock_irq(&mchdev_lock);
5924 if (!i915_mch_dev)
5925 goto out_unlock;
5926 dev_priv = i915_mch_dev;
5927
5928 chipset_val = __i915_chipset_val(dev_priv);
5929 graphics_val = __i915_gfx_val(dev_priv);
5930
5931 ret = chipset_val + graphics_val;
5932
5933 out_unlock:
5934 spin_unlock_irq(&mchdev_lock);
5935
5936 return ret;
5937 }
5938 EXPORT_SYMBOL_GPL(i915_read_mch_val);
5939
5940 /**
5941 * i915_gpu_raise - raise GPU frequency limit
5942 *
5943 * Raise the limit; IPS indicates we have thermal headroom.
5944 */
5945 bool i915_gpu_raise(void)
5946 {
5947 struct drm_i915_private *dev_priv;
5948 bool ret = true;
5949
5950 spin_lock_irq(&mchdev_lock);
5951 if (!i915_mch_dev) {
5952 ret = false;
5953 goto out_unlock;
5954 }
5955 dev_priv = i915_mch_dev;
5956
5957 if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
5958 dev_priv->ips.max_delay--;
5959
5960 out_unlock:
5961 spin_unlock_irq(&mchdev_lock);
5962
5963 return ret;
5964 }
5965 EXPORT_SYMBOL_GPL(i915_gpu_raise);
5966
5967 /**
5968 * i915_gpu_lower - lower GPU frequency limit
5969 *
5970 * IPS indicates we're close to a thermal limit, so throttle back the GPU
5971 * frequency maximum.
5972 */
5973 bool i915_gpu_lower(void)
5974 {
5975 struct drm_i915_private *dev_priv;
5976 bool ret = true;
5977
5978 spin_lock_irq(&mchdev_lock);
5979 if (!i915_mch_dev) {
5980 ret = false;
5981 goto out_unlock;
5982 }
5983 dev_priv = i915_mch_dev;
5984
5985 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
5986 dev_priv->ips.max_delay++;
5987
5988 out_unlock:
5989 spin_unlock_irq(&mchdev_lock);
5990
5991 return ret;
5992 }
5993 EXPORT_SYMBOL_GPL(i915_gpu_lower);
5994
5995 /**
5996 * i915_gpu_busy - indicate GPU business to IPS
5997 *
5998 * Tell the IPS driver whether or not the GPU is busy.
5999 */
6000 bool i915_gpu_busy(void)
6001 {
6002 struct drm_i915_private *dev_priv;
6003 struct intel_engine_cs *ring;
6004 bool ret = false;
6005 int i;
6006
6007 spin_lock_irq(&mchdev_lock);
6008 if (!i915_mch_dev)
6009 goto out_unlock;
6010 dev_priv = i915_mch_dev;
6011
6012 for_each_ring(ring, dev_priv, i)
6013 ret |= !list_empty(&ring->request_list);
6014
6015 out_unlock:
6016 spin_unlock_irq(&mchdev_lock);
6017
6018 return ret;
6019 }
6020 EXPORT_SYMBOL_GPL(i915_gpu_busy);
6021
6022 /**
6023 * i915_gpu_turbo_disable - disable graphics turbo
6024 *
6025 * Disable graphics turbo by resetting the max frequency and setting the
6026 * current frequency to the default.
6027 */
6028 bool i915_gpu_turbo_disable(void)
6029 {
6030 struct drm_i915_private *dev_priv;
6031 bool ret = true;
6032
6033 spin_lock_irq(&mchdev_lock);
6034 if (!i915_mch_dev) {
6035 ret = false;
6036 goto out_unlock;
6037 }
6038 dev_priv = i915_mch_dev;
6039
6040 dev_priv->ips.max_delay = dev_priv->ips.fstart;
6041
6042 if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
6043 ret = false;
6044
6045 out_unlock:
6046 spin_unlock_irq(&mchdev_lock);
6047
6048 return ret;
6049 }
6050 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
6051
6052 /**
6053 * Tells the intel_ips driver that the i915 driver is now loaded, if
6054 * IPS got loaded first.
6055 *
6056 * This awkward dance is so that neither module has to depend on the
6057 * other in order for IPS to do the appropriate communication of
6058 * GPU turbo limits to i915.
6059 */
6060 static void
6061 ips_ping_for_i915_load(void)
6062 {
6063 #ifndef __NetBSD__ /* XXX IPS GPU turbo limits what? */
6064 void (*link)(void);
6065
6066 link = symbol_get(ips_link_to_i915_driver);
6067 if (link) {
6068 link();
6069 symbol_put(ips_link_to_i915_driver);
6070 }
6071 #endif
6072 }
6073
6074 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
6075 {
6076 /* We only register the i915 ips part with intel-ips once everything is
6077 * set up, to avoid intel-ips sneaking in and reading bogus values. */
6078 spin_lock_irq(&mchdev_lock);
6079 i915_mch_dev = dev_priv;
6080 spin_unlock_irq(&mchdev_lock);
6081
6082 ips_ping_for_i915_load();
6083 }
6084
6085 void intel_gpu_ips_teardown(void)
6086 {
6087 spin_lock_irq(&mchdev_lock);
6088 i915_mch_dev = NULL;
6089 spin_unlock_irq(&mchdev_lock);
6090 }
6091
6092 static void intel_init_emon(struct drm_device *dev)
6093 {
6094 struct drm_i915_private *dev_priv = dev->dev_private;
6095 u32 lcfuse;
6096 u8 pxw[16];
6097 int i;
6098
6099 /* Disable to program */
6100 I915_WRITE(ECR, 0);
6101 POSTING_READ(ECR);
6102
6103 /* Program energy weights for various events */
6104 I915_WRITE(SDEW, 0x15040d00);
6105 I915_WRITE(CSIEW0, 0x007f0000);
6106 I915_WRITE(CSIEW1, 0x1e220004);
6107 I915_WRITE(CSIEW2, 0x04000004);
6108
6109 for (i = 0; i < 5; i++)
6110 I915_WRITE(PEW(i), 0);
6111 for (i = 0; i < 3; i++)
6112 I915_WRITE(DEW(i), 0);
6113
6114 /* Program P-state weights to account for frequency power adjustment */
6115 for (i = 0; i < 16; i++) {
6116 u32 pxvidfreq = I915_READ(PXVFREQ(i));
6117 unsigned long freq = intel_pxfreq(pxvidfreq);
6118 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
6119 PXVFREQ_PX_SHIFT;
6120 unsigned long val;
6121
6122 val = vid * vid;
6123 val *= (freq / 1000);
6124 val *= 255;
6125 val /= (127*127*900);
6126 if (val > 0xff)
6127 DRM_ERROR("bad pxval: %ld\n", val);
6128 pxw[i] = val;
6129 }
6130 /* Render standby states get 0 weight */
6131 pxw[14] = 0;
6132 pxw[15] = 0;
6133
6134 for (i = 0; i < 4; i++) {
6135 u32 val = ((u32)pxw[i*4] << 24) | ((u32)pxw[(i*4)+1] << 16) |
6136 ((u32)pxw[(i*4)+2] << 8) | ((u32)pxw[(i*4)+3]);
6137 I915_WRITE(PXW(i), val);
6138 }
6139
6140 /* Adjust magic regs to magic values (more experimental results) */
6141 I915_WRITE(OGW0, 0);
6142 I915_WRITE(OGW1, 0);
6143 I915_WRITE(EG0, 0x00007f00);
6144 I915_WRITE(EG1, 0x0000000e);
6145 I915_WRITE(EG2, 0x000e0000);
6146 I915_WRITE(EG3, 0x68000300);
6147 I915_WRITE(EG4, 0x42000000);
6148 I915_WRITE(EG5, 0x00140031);
6149 I915_WRITE(EG6, 0);
6150 I915_WRITE(EG7, 0);
6151
6152 for (i = 0; i < 8; i++)
6153 I915_WRITE(PXWL(i), 0);
6154
6155 /* Enable PMON + select events */
6156 I915_WRITE(ECR, 0x80000019);
6157
6158 lcfuse = I915_READ(LCFUSE02);
6159
6160 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
6161 }
6162
6163 void intel_init_gt_powersave(struct drm_device *dev)
6164 {
6165 i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
6166
6167 if (IS_CHERRYVIEW(dev))
6168 cherryview_init_gt_powersave(dev);
6169 else if (IS_VALLEYVIEW(dev))
6170 valleyview_init_gt_powersave(dev);
6171 }
6172
6173 void intel_cleanup_gt_powersave(struct drm_device *dev)
6174 {
6175 if (IS_CHERRYVIEW(dev))
6176 return;
6177 else if (IS_VALLEYVIEW(dev))
6178 valleyview_cleanup_gt_powersave(dev);
6179 }
6180
6181 static void gen6_suspend_rps(struct drm_device *dev)
6182 {
6183 struct drm_i915_private *dev_priv = dev->dev_private;
6184
6185 flush_delayed_work(&dev_priv->rps.delayed_resume_work);
6186
6187 gen6_disable_rps_interrupts(dev);
6188 }
6189
6190 /**
6191 * intel_suspend_gt_powersave - suspend PM work and helper threads
6192 * @dev: drm device
6193 *
6194 * We don't want to disable RC6 or other features here, we just want
6195 * to make sure any work we've queued has finished and won't bother
6196 * us while we're suspended.
6197 */
6198 void intel_suspend_gt_powersave(struct drm_device *dev)
6199 {
6200 struct drm_i915_private *dev_priv = dev->dev_private;
6201
6202 if (INTEL_INFO(dev)->gen < 6)
6203 return;
6204
6205 gen6_suspend_rps(dev);
6206
6207 /* Force GPU to min freq during suspend */
6208 gen6_rps_idle(dev_priv);
6209 }
6210
6211 void intel_disable_gt_powersave(struct drm_device *dev)
6212 {
6213 struct drm_i915_private *dev_priv = dev->dev_private;
6214
6215 if (IS_IRONLAKE_M(dev)) {
6216 ironlake_disable_drps(dev);
6217 } else if (INTEL_INFO(dev)->gen >= 6) {
6218 intel_suspend_gt_powersave(dev);
6219
6220 mutex_lock(&dev_priv->rps.hw_lock);
6221 if (INTEL_INFO(dev)->gen >= 9)
6222 gen9_disable_rps(dev);
6223 else if (IS_CHERRYVIEW(dev))
6224 cherryview_disable_rps(dev);
6225 else if (IS_VALLEYVIEW(dev))
6226 valleyview_disable_rps(dev);
6227 else
6228 gen6_disable_rps(dev);
6229
6230 dev_priv->rps.enabled = false;
6231 mutex_unlock(&dev_priv->rps.hw_lock);
6232 }
6233 }
6234
6235 static void intel_gen6_powersave_work(struct work_struct *work)
6236 {
6237 struct drm_i915_private *dev_priv =
6238 container_of(work, struct drm_i915_private,
6239 rps.delayed_resume_work.work);
6240 struct drm_device *dev = dev_priv->dev;
6241
6242 mutex_lock(&dev_priv->rps.hw_lock);
6243
6244 gen6_reset_rps_interrupts(dev);
6245
6246 if (IS_CHERRYVIEW(dev)) {
6247 cherryview_enable_rps(dev);
6248 } else if (IS_VALLEYVIEW(dev)) {
6249 valleyview_enable_rps(dev);
6250 } else if (INTEL_INFO(dev)->gen >= 9) {
6251 gen9_enable_rc6(dev);
6252 gen9_enable_rps(dev);
6253 if (IS_SKYLAKE(dev))
6254 __gen6_update_ring_freq(dev);
6255 } else if (IS_BROADWELL(dev)) {
6256 gen8_enable_rps(dev);
6257 __gen6_update_ring_freq(dev);
6258 } else {
6259 gen6_enable_rps(dev);
6260 __gen6_update_ring_freq(dev);
6261 }
6262
6263 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
6264 WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
6265
6266 WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
6267 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
6268
6269 dev_priv->rps.enabled = true;
6270
6271 gen6_enable_rps_interrupts(dev);
6272
6273 mutex_unlock(&dev_priv->rps.hw_lock);
6274
6275 intel_runtime_pm_put(dev_priv);
6276 }
6277
6278 void intel_enable_gt_powersave(struct drm_device *dev)
6279 {
6280 struct drm_i915_private *dev_priv = dev->dev_private;
6281
6282 /* Powersaving is controlled by the host when inside a VM */
6283 if (intel_vgpu_active(dev))
6284 return;
6285
6286 if (IS_IRONLAKE_M(dev)) {
6287 mutex_lock(&dev->struct_mutex);
6288 ironlake_enable_drps(dev);
6289 intel_init_emon(dev);
6290 mutex_unlock(&dev->struct_mutex);
6291 } else if (INTEL_INFO(dev)->gen >= 6) {
6292 /*
6293 * PCU communication is slow and this doesn't need to be
6294 * done at any specific time, so do this out of our fast path
6295 * to make resume and init faster.
6296 *
6297 * We depend on the HW RC6 power context save/restore
6298 * mechanism when entering D3 through runtime PM suspend. So
6299 * disable RPM until RPS/RC6 is properly setup. We can only
6300 * get here via the driver load/system resume/runtime resume
6301 * paths, so the _noresume version is enough (and in case of
6302 * runtime resume it's necessary).
6303 */
6304 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
6305 round_jiffies_up_relative(HZ)))
6306 intel_runtime_pm_get_noresume(dev_priv);
6307 }
6308 }
6309
6310 void intel_reset_gt_powersave(struct drm_device *dev)
6311 {
6312 struct drm_i915_private *dev_priv = dev->dev_private;
6313
6314 if (INTEL_INFO(dev)->gen < 6)
6315 return;
6316
6317 gen6_suspend_rps(dev);
6318 dev_priv->rps.enabled = false;
6319 }
6320
6321 static void ibx_init_clock_gating(struct drm_device *dev)
6322 {
6323 struct drm_i915_private *dev_priv = dev->dev_private;
6324
6325 /*
6326 * On Ibex Peak and Cougar Point, we need to disable clock
6327 * gating for the panel power sequencer or it will fail to
6328 * start up when no ports are active.
6329 */
6330 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
6331 }
6332
6333 static void g4x_disable_trickle_feed(struct drm_device *dev)
6334 {
6335 struct drm_i915_private *dev_priv = dev->dev_private;
6336 enum i915_pipe pipe;
6337
6338 for_each_pipe(dev_priv, pipe) {
6339 I915_WRITE(DSPCNTR(pipe),
6340 I915_READ(DSPCNTR(pipe)) |
6341 DISPPLANE_TRICKLE_FEED_DISABLE);
6342
6343 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
6344 POSTING_READ(DSPSURF(pipe));
6345 }
6346 }
6347
6348 static void ilk_init_lp_watermarks(struct drm_device *dev)
6349 {
6350 struct drm_i915_private *dev_priv = dev->dev_private;
6351
6352 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6353 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6354 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6355
6356 /*
6357 * Don't touch WM1S_LP_EN here.
6358 * Doing so could cause underruns.
6359 */
6360 }
6361
6362 static void ironlake_init_clock_gating(struct drm_device *dev)
6363 {
6364 struct drm_i915_private *dev_priv = dev->dev_private;
6365 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6366
6367 /*
6368 * Required for FBC
6369 * WaFbcDisableDpfcClockGating:ilk
6370 */
6371 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
6372 ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
6373 ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
6374
6375 I915_WRITE(PCH_3DCGDIS0,
6376 MARIUNIT_CLOCK_GATE_DISABLE |
6377 SVSMUNIT_CLOCK_GATE_DISABLE);
6378 I915_WRITE(PCH_3DCGDIS1,
6379 VFMUNIT_CLOCK_GATE_DISABLE);
6380
6381 /*
6382 * According to the spec the following bits should be set in
6383 * order to enable memory self-refresh
6384 * The bit 22/21 of 0x42004
6385 * The bit 5 of 0x42020
6386 * The bit 15 of 0x45000
6387 */
6388 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6389 (I915_READ(ILK_DISPLAY_CHICKEN2) |
6390 ILK_DPARB_GATE | ILK_VSDPFD_FULL));
6391 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
6392 I915_WRITE(DISP_ARB_CTL,
6393 (I915_READ(DISP_ARB_CTL) |
6394 DISP_FBC_WM_DIS));
6395
6396 ilk_init_lp_watermarks(dev);
6397
6398 /*
6399 * Based on the document from hardware guys the following bits
6400 * should be set unconditionally in order to enable FBC.
6401 * The bit 22 of 0x42000
6402 * The bit 22 of 0x42004
6403 * The bit 7,8,9 of 0x42020.
6404 */
6405 if (IS_IRONLAKE_M(dev)) {
6406 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
6407 I915_WRITE(ILK_DISPLAY_CHICKEN1,
6408 I915_READ(ILK_DISPLAY_CHICKEN1) |
6409 ILK_FBCQ_DIS);
6410 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6411 I915_READ(ILK_DISPLAY_CHICKEN2) |
6412 ILK_DPARB_GATE);
6413 }
6414
6415 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6416
6417 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6418 I915_READ(ILK_DISPLAY_CHICKEN2) |
6419 ILK_ELPIN_409_SELECT);
6420 I915_WRITE(_3D_CHICKEN2,
6421 _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
6422 _3D_CHICKEN2_WM_READ_PIPELINED);
6423
6424 /* WaDisableRenderCachePipelinedFlush:ilk */
6425 I915_WRITE(CACHE_MODE_0,
6426 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6427
6428 /* WaDisable_RenderCache_OperationalFlush:ilk */
6429 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6430
6431 g4x_disable_trickle_feed(dev);
6432
6433 ibx_init_clock_gating(dev);
6434 }
6435
6436 static void cpt_init_clock_gating(struct drm_device *dev)
6437 {
6438 struct drm_i915_private *dev_priv = dev->dev_private;
6439 int pipe;
6440 uint32_t val;
6441
6442 /*
6443 * On Ibex Peak and Cougar Point, we need to disable clock
6444 * gating for the panel power sequencer or it will fail to
6445 * start up when no ports are active.
6446 */
6447 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
6448 PCH_DPLUNIT_CLOCK_GATE_DISABLE |
6449 PCH_CPUNIT_CLOCK_GATE_DISABLE);
6450 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
6451 DPLS_EDP_PPS_FIX_DIS);
6452 /* The below fixes the weird display corruption, a few pixels shifted
6453 * downward, on (only) LVDS of some HP laptops with IVY.
6454 */
6455 for_each_pipe(dev_priv, pipe) {
6456 val = I915_READ(TRANS_CHICKEN2(pipe));
6457 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
6458 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6459 if (dev_priv->vbt.fdi_rx_polarity_inverted)
6460 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
6461 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
6462 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
6463 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
6464 I915_WRITE(TRANS_CHICKEN2(pipe), val);
6465 }
6466 /* WADP0ClockGatingDisable */
6467 for_each_pipe(dev_priv, pipe) {
6468 I915_WRITE(TRANS_CHICKEN1(pipe),
6469 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6470 }
6471 }
6472
6473 static void gen6_check_mch_setup(struct drm_device *dev)
6474 {
6475 struct drm_i915_private *dev_priv = dev->dev_private;
6476 uint32_t tmp;
6477
6478 tmp = I915_READ(MCH_SSKPD);
6479 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
6480 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
6481 tmp);
6482 }
6483
6484 static void gen6_init_clock_gating(struct drm_device *dev)
6485 {
6486 struct drm_i915_private *dev_priv = dev->dev_private;
6487 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
6488
6489 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
6490
6491 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6492 I915_READ(ILK_DISPLAY_CHICKEN2) |
6493 ILK_ELPIN_409_SELECT);
6494
6495 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
6496 I915_WRITE(_3D_CHICKEN,
6497 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
6498
6499 /* WaDisable_RenderCache_OperationalFlush:snb */
6500 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6501
6502 /*
6503 * BSpec recoomends 8x4 when MSAA is used,
6504 * however in practice 16x4 seems fastest.
6505 *
6506 * Note that PS/WM thread counts depend on the WIZ hashing
6507 * disable bit, which we don't touch here, but it's good
6508 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6509 */
6510 I915_WRITE(GEN6_GT_MODE,
6511 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6512
6513 ilk_init_lp_watermarks(dev);
6514
6515 I915_WRITE(CACHE_MODE_0,
6516 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
6517
6518 I915_WRITE(GEN6_UCGCTL1,
6519 I915_READ(GEN6_UCGCTL1) |
6520 GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
6521 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6522
6523 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
6524 * gating disable must be set. Failure to set it results in
6525 * flickering pixels due to Z write ordering failures after
6526 * some amount of runtime in the Mesa "fire" demo, and Unigine
6527 * Sanctuary and Tropics, and apparently anything else with
6528 * alpha test or pixel discard.
6529 *
6530 * According to the spec, bit 11 (RCCUNIT) must also be set,
6531 * but we didn't debug actual testcases to find it out.
6532 *
6533 * WaDisableRCCUnitClockGating:snb
6534 * WaDisableRCPBUnitClockGating:snb
6535 */
6536 I915_WRITE(GEN6_UCGCTL2,
6537 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
6538 GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
6539
6540 /* WaStripsFansDisableFastClipPerformanceFix:snb */
6541 I915_WRITE(_3D_CHICKEN3,
6542 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
6543
6544 /*
6545 * Bspec says:
6546 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
6547 * 3DSTATE_SF number of SF output attributes is more than 16."
6548 */
6549 I915_WRITE(_3D_CHICKEN3,
6550 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
6551
6552 /*
6553 * According to the spec the following bits should be
6554 * set in order to enable memory self-refresh and fbc:
6555 * The bit21 and bit22 of 0x42000
6556 * The bit21 and bit22 of 0x42004
6557 * The bit5 and bit7 of 0x42020
6558 * The bit14 of 0x70180
6559 * The bit14 of 0x71180
6560 *
6561 * WaFbcAsynchFlipDisableFbcQueue:snb
6562 */
6563 I915_WRITE(ILK_DISPLAY_CHICKEN1,
6564 I915_READ(ILK_DISPLAY_CHICKEN1) |
6565 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
6566 I915_WRITE(ILK_DISPLAY_CHICKEN2,
6567 I915_READ(ILK_DISPLAY_CHICKEN2) |
6568 ILK_DPARB_GATE | ILK_VSDPFD_FULL);
6569 I915_WRITE(ILK_DSPCLK_GATE_D,
6570 I915_READ(ILK_DSPCLK_GATE_D) |
6571 ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
6572 ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
6573
6574 g4x_disable_trickle_feed(dev);
6575
6576 cpt_init_clock_gating(dev);
6577
6578 gen6_check_mch_setup(dev);
6579 }
6580
6581 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
6582 {
6583 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
6584
6585 /*
6586 * WaVSThreadDispatchOverride:ivb,vlv
6587 *
6588 * This actually overrides the dispatch
6589 * mode for all thread types.
6590 */
6591 reg &= ~GEN7_FF_SCHED_MASK;
6592 reg |= GEN7_FF_TS_SCHED_HW;
6593 reg |= GEN7_FF_VS_SCHED_HW;
6594 reg |= GEN7_FF_DS_SCHED_HW;
6595
6596 I915_WRITE(GEN7_FF_THREAD_MODE, reg);
6597 }
6598
6599 static void lpt_init_clock_gating(struct drm_device *dev)
6600 {
6601 struct drm_i915_private *dev_priv = dev->dev_private;
6602
6603 /*
6604 * TODO: this bit should only be enabled when really needed, then
6605 * disabled when not needed anymore in order to save power.
6606 */
6607 if (HAS_PCH_LPT_LP(dev))
6608 I915_WRITE(SOUTH_DSPCLK_GATE_D,
6609 I915_READ(SOUTH_DSPCLK_GATE_D) |
6610 PCH_LP_PARTITION_LEVEL_DISABLE);
6611
6612 /* WADPOClockGatingDisable:hsw */
6613 I915_WRITE(TRANS_CHICKEN1(PIPE_A),
6614 I915_READ(TRANS_CHICKEN1(PIPE_A)) |
6615 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
6616 }
6617
6618 static void lpt_suspend_hw(struct drm_device *dev)
6619 {
6620 struct drm_i915_private *dev_priv = dev->dev_private;
6621
6622 if (HAS_PCH_LPT_LP(dev)) {
6623 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
6624
6625 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
6626 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
6627 }
6628 }
6629
6630 static void broadwell_init_clock_gating(struct drm_device *dev)
6631 {
6632 struct drm_i915_private *dev_priv = dev->dev_private;
6633 enum i915_pipe pipe;
6634 uint32_t misccpctl;
6635
6636 ilk_init_lp_watermarks(dev);
6637
6638 /* WaSwitchSolVfFArbitrationPriority:bdw */
6639 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6640
6641 /* WaPsrDPAMaskVBlankInSRD:bdw */
6642 I915_WRITE(CHICKEN_PAR1_1,
6643 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
6644
6645 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
6646 for_each_pipe(dev_priv, pipe) {
6647 I915_WRITE(CHICKEN_PIPESL_1(pipe),
6648 I915_READ(CHICKEN_PIPESL_1(pipe)) |
6649 BDW_DPRS_MASK_VBLANK_SRD);
6650 }
6651
6652 /* WaVSRefCountFullforceMissDisable:bdw */
6653 /* WaDSRefCountFullforceMissDisable:bdw */
6654 I915_WRITE(GEN7_FF_THREAD_MODE,
6655 I915_READ(GEN7_FF_THREAD_MODE) &
6656 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6657
6658 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6659 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6660
6661 /* WaDisableSDEUnitClockGating:bdw */
6662 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6663 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6664
6665 /*
6666 * WaProgramL3SqcReg1Default:bdw
6667 * WaTempDisableDOPClkGating:bdw
6668 */
6669 misccpctl = I915_READ(GEN7_MISCCPCTL);
6670 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
6671 I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
6672 /*
6673 * Wait at least 100 clocks before re-enabling clock gating. See
6674 * the definition of L3SQCREG1 in BSpec.
6675 */
6676 POSTING_READ(GEN8_L3SQCREG1);
6677 udelay(1);
6678 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
6679
6680 /*
6681 * WaGttCachingOffByDefault:bdw
6682 * GTT cache may not work with big pages, so if those
6683 * are ever enabled GTT cache may need to be disabled.
6684 */
6685 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6686
6687 lpt_init_clock_gating(dev);
6688 }
6689
6690 static void haswell_init_clock_gating(struct drm_device *dev)
6691 {
6692 struct drm_i915_private *dev_priv = dev->dev_private;
6693
6694 ilk_init_lp_watermarks(dev);
6695
6696 /* L3 caching of data atomics doesn't work -- disable it. */
6697 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6698 I915_WRITE(HSW_ROW_CHICKEN3,
6699 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6700
6701 /* This is required by WaCatErrorRejectionIssue:hsw */
6702 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6703 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6704 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6705
6706 /* WaVSRefCountFullforceMissDisable:hsw */
6707 I915_WRITE(GEN7_FF_THREAD_MODE,
6708 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6709
6710 /* WaDisable_RenderCache_OperationalFlush:hsw */
6711 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6712
6713 /* enable HiZ Raw Stall Optimization */
6714 I915_WRITE(CACHE_MODE_0_GEN7,
6715 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6716
6717 /* WaDisable4x2SubspanOptimization:hsw */
6718 I915_WRITE(CACHE_MODE_1,
6719 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6720
6721 /*
6722 * BSpec recommends 8x4 when MSAA is used,
6723 * however in practice 16x4 seems fastest.
6724 *
6725 * Note that PS/WM thread counts depend on the WIZ hashing
6726 * disable bit, which we don't touch here, but it's good
6727 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6728 */
6729 I915_WRITE(GEN7_GT_MODE,
6730 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6731
6732 /* WaSampleCChickenBitEnable:hsw */
6733 I915_WRITE(HALF_SLICE_CHICKEN3,
6734 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
6735
6736 /* WaSwitchSolVfFArbitrationPriority:hsw */
6737 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6738
6739 /* WaRsPkgCStateDisplayPMReq:hsw */
6740 I915_WRITE(CHICKEN_PAR1_1,
6741 I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
6742
6743 lpt_init_clock_gating(dev);
6744 }
6745
6746 static void ivybridge_init_clock_gating(struct drm_device *dev)
6747 {
6748 struct drm_i915_private *dev_priv = dev->dev_private;
6749 uint32_t snpcr;
6750
6751 ilk_init_lp_watermarks(dev);
6752
6753 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6754
6755 /* WaDisableEarlyCull:ivb */
6756 I915_WRITE(_3D_CHICKEN3,
6757 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6758
6759 /* WaDisableBackToBackFlipFix:ivb */
6760 I915_WRITE(IVB_CHICKEN3,
6761 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6762 CHICKEN3_DGMG_DONE_FIX_DISABLE);
6763
6764 /* WaDisablePSDDualDispatchEnable:ivb */
6765 if (IS_IVB_GT1(dev))
6766 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6767 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6768
6769 /* WaDisable_RenderCache_OperationalFlush:ivb */
6770 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6771
6772 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6773 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
6774 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
6775
6776 /* WaApplyL3ControlAndL3ChickenMode:ivb */
6777 I915_WRITE(GEN7_L3CNTLREG1,
6778 GEN7_WA_FOR_GEN7_L3_CONTROL);
6779 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
6780 GEN7_WA_L3_CHICKEN_MODE);
6781 if (IS_IVB_GT1(dev))
6782 I915_WRITE(GEN7_ROW_CHICKEN2,
6783 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6784 else {
6785 /* must write both registers */
6786 I915_WRITE(GEN7_ROW_CHICKEN2,
6787 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6788 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
6789 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6790 }
6791
6792 /* WaForceL3Serialization:ivb */
6793 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6794 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6795
6796 /*
6797 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6798 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
6799 */
6800 I915_WRITE(GEN6_UCGCTL2,
6801 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6802
6803 /* This is required by WaCatErrorRejectionIssue:ivb */
6804 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6805 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6806 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6807
6808 g4x_disable_trickle_feed(dev);
6809
6810 gen7_setup_fixed_func_scheduler(dev_priv);
6811
6812 if (0) { /* causes HiZ corruption on ivb:gt1 */
6813 /* enable HiZ Raw Stall Optimization */
6814 I915_WRITE(CACHE_MODE_0_GEN7,
6815 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6816 }
6817
6818 /* WaDisable4x2SubspanOptimization:ivb */
6819 I915_WRITE(CACHE_MODE_1,
6820 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6821
6822 /*
6823 * BSpec recommends 8x4 when MSAA is used,
6824 * however in practice 16x4 seems fastest.
6825 *
6826 * Note that PS/WM thread counts depend on the WIZ hashing
6827 * disable bit, which we don't touch here, but it's good
6828 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6829 */
6830 I915_WRITE(GEN7_GT_MODE,
6831 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6832
6833 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
6834 snpcr &= ~GEN6_MBC_SNPCR_MASK;
6835 snpcr |= GEN6_MBC_SNPCR_MED;
6836 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
6837
6838 if (!HAS_PCH_NOP(dev))
6839 cpt_init_clock_gating(dev);
6840
6841 gen6_check_mch_setup(dev);
6842 }
6843
6844 static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
6845 {
6846 u32 val;
6847
6848 /*
6849 * On driver load, a pipe may be active and driving a DSI display.
6850 * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck
6851 * (and never recovering) in this case. intel_dsi_post_disable() will
6852 * clear it when we turn off the display.
6853 */
6854 val = I915_READ(DSPCLK_GATE_D);
6855 val &= DPOUNIT_CLOCK_GATE_DISABLE;
6856 val |= VRHUNIT_CLOCK_GATE_DISABLE;
6857 I915_WRITE(DSPCLK_GATE_D, val);
6858
6859 /*
6860 * Disable trickle feed and enable pnd deadline calculation
6861 */
6862 I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6863 I915_WRITE(CBR1_VLV, 0);
6864 }
6865
6866 static void valleyview_init_clock_gating(struct drm_device *dev)
6867 {
6868 struct drm_i915_private *dev_priv = dev->dev_private;
6869
6870 vlv_init_display_clock_gating(dev_priv);
6871
6872 /* WaDisableEarlyCull:vlv */
6873 I915_WRITE(_3D_CHICKEN3,
6874 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6875
6876 /* WaDisableBackToBackFlipFix:vlv */
6877 I915_WRITE(IVB_CHICKEN3,
6878 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6879 CHICKEN3_DGMG_DONE_FIX_DISABLE);
6880
6881 /* WaPsdDispatchEnable:vlv */
6882 /* WaDisablePSDDualDispatchEnable:vlv */
6883 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6884 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
6885 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6886
6887 /* WaDisable_RenderCache_OperationalFlush:vlv */
6888 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6889
6890 /* WaForceL3Serialization:vlv */
6891 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6892 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6893
6894 /* WaDisableDopClockGating:vlv */
6895 I915_WRITE(GEN7_ROW_CHICKEN2,
6896 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6897
6898 /* This is required by WaCatErrorRejectionIssue:vlv */
6899 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6900 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6901 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6902
6903 gen7_setup_fixed_func_scheduler(dev_priv);
6904
6905 /*
6906 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6907 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
6908 */
6909 I915_WRITE(GEN6_UCGCTL2,
6910 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6911
6912 /* WaDisableL3Bank2xClockGate:vlv
6913 * Disabling L3 clock gating- MMIO 940c[25] = 1
6914 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
6915 I915_WRITE(GEN7_UCGCTL4,
6916 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
6917
6918 /*
6919 * BSpec says this must be set, even though
6920 * WaDisable4x2SubspanOptimization isn't listed for VLV.
6921 */
6922 I915_WRITE(CACHE_MODE_1,
6923 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6924
6925 /*
6926 * BSpec recommends 8x4 when MSAA is used,
6927 * however in practice 16x4 seems fastest.
6928 *
6929 * Note that PS/WM thread counts depend on the WIZ hashing
6930 * disable bit, which we don't touch here, but it's good
6931 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6932 */
6933 I915_WRITE(GEN7_GT_MODE,
6934 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6935
6936 /*
6937 * WaIncreaseL3CreditsForVLVB0:vlv
6938 * This is the hardware default actually.
6939 */
6940 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
6941
6942 /*
6943 * WaDisableVLVClockGating_VBIIssue:vlv
6944 * Disable clock gating on th GCFG unit to prevent a delay
6945 * in the reporting of vblank events.
6946 */
6947 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6948 }
6949
6950 static void cherryview_init_clock_gating(struct drm_device *dev)
6951 {
6952 struct drm_i915_private *dev_priv = dev->dev_private;
6953
6954 vlv_init_display_clock_gating(dev_priv);
6955
6956 /* WaVSRefCountFullforceMissDisable:chv */
6957 /* WaDSRefCountFullforceMissDisable:chv */
6958 I915_WRITE(GEN7_FF_THREAD_MODE,
6959 I915_READ(GEN7_FF_THREAD_MODE) &
6960 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6961
6962 /* WaDisableSemaphoreAndSyncFlipWait:chv */
6963 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6964 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6965
6966 /* WaDisableCSUnitClockGating:chv */
6967 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6968 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6969
6970 /* WaDisableSDEUnitClockGating:chv */
6971 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6972 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6973
6974 /*
6975 * GTT cache may not work with big pages, so if those
6976 * are ever enabled GTT cache may need to be disabled.
6977 */
6978 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
6979 }
6980
6981 static void g4x_init_clock_gating(struct drm_device *dev)
6982 {
6983 struct drm_i915_private *dev_priv = dev->dev_private;
6984 uint32_t dspclk_gate;
6985
6986 I915_WRITE(RENCLK_GATE_D1, 0);
6987 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
6988 GS_UNIT_CLOCK_GATE_DISABLE |
6989 CL_UNIT_CLOCK_GATE_DISABLE);
6990 I915_WRITE(RAMCLK_GATE_D, 0);
6991 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
6992 OVRUNIT_CLOCK_GATE_DISABLE |
6993 OVCUNIT_CLOCK_GATE_DISABLE;
6994 if (IS_GM45(dev))
6995 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
6996 I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
6997
6998 /* WaDisableRenderCachePipelinedFlush */
6999 I915_WRITE(CACHE_MODE_0,
7000 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
7001
7002 /* WaDisable_RenderCache_OperationalFlush:g4x */
7003 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7004
7005 g4x_disable_trickle_feed(dev);
7006 }
7007
7008 static void crestline_init_clock_gating(struct drm_device *dev)
7009 {
7010 struct drm_i915_private *dev_priv = dev->dev_private;
7011
7012 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
7013 I915_WRITE(RENCLK_GATE_D2, 0);
7014 I915_WRITE(DSPCLK_GATE_D, 0);
7015 I915_WRITE(RAMCLK_GATE_D, 0);
7016 I915_WRITE16(DEUC, 0);
7017 I915_WRITE(MI_ARB_STATE,
7018 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7019
7020 /* WaDisable_RenderCache_OperationalFlush:gen4 */
7021 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7022 }
7023
7024 static void broadwater_init_clock_gating(struct drm_device *dev)
7025 {
7026 struct drm_i915_private *dev_priv = dev->dev_private;
7027
7028 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
7029 I965_RCC_CLOCK_GATE_DISABLE |
7030 I965_RCPB_CLOCK_GATE_DISABLE |
7031 I965_ISC_CLOCK_GATE_DISABLE |
7032 I965_FBC_CLOCK_GATE_DISABLE);
7033 I915_WRITE(RENCLK_GATE_D2, 0);
7034 I915_WRITE(MI_ARB_STATE,
7035 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7036
7037 /* WaDisable_RenderCache_OperationalFlush:gen4 */
7038 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
7039 }
7040
7041 static void gen3_init_clock_gating(struct drm_device *dev)
7042 {
7043 struct drm_i915_private *dev_priv = dev->dev_private;
7044 u32 dstate = I915_READ(D_STATE);
7045
7046 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
7047 DSTATE_DOT_CLOCK_GATING;
7048 I915_WRITE(D_STATE, dstate);
7049
7050 if (IS_PINEVIEW(dev))
7051 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
7052
7053 /* IIR "flip pending" means done if this bit is set */
7054 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
7055
7056 /* interrupts should cause a wake up from C3 */
7057 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
7058
7059 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
7060 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
7061
7062 I915_WRITE(MI_ARB_STATE,
7063 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
7064 }
7065
7066 static void i85x_init_clock_gating(struct drm_device *dev)
7067 {
7068 struct drm_i915_private *dev_priv = dev->dev_private;
7069
7070 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
7071
7072 /* interrupts should cause a wake up from C3 */
7073 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
7074 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
7075
7076 I915_WRITE(MEM_MODE,
7077 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
7078 }
7079
7080 static void i830_init_clock_gating(struct drm_device *dev)
7081 {
7082 struct drm_i915_private *dev_priv = dev->dev_private;
7083
7084 I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
7085
7086 I915_WRITE(MEM_MODE,
7087 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
7088 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
7089 }
7090
7091 void intel_init_clock_gating(struct drm_device *dev)
7092 {
7093 struct drm_i915_private *dev_priv = dev->dev_private;
7094
7095 if (dev_priv->display.init_clock_gating)
7096 dev_priv->display.init_clock_gating(dev);
7097 }
7098
7099 void intel_suspend_hw(struct drm_device *dev)
7100 {
7101 if (HAS_PCH_LPT(dev))
7102 lpt_suspend_hw(dev);
7103 }
7104
7105 /* Set up chip specific power management-related functions */
7106 void intel_init_pm(struct drm_device *dev)
7107 {
7108 struct drm_i915_private *dev_priv = dev->dev_private;
7109
7110 intel_fbc_init(dev_priv);
7111
7112 /* For cxsr */
7113 if (IS_PINEVIEW(dev))
7114 i915_pineview_get_mem_freq(dev);
7115 else if (IS_GEN5(dev))
7116 i915_ironlake_get_mem_freq(dev);
7117
7118 /* For FIFO watermark updates */
7119 if (INTEL_INFO(dev)->gen >= 9) {
7120 skl_setup_wm_latency(dev);
7121
7122 if (IS_BROXTON(dev))
7123 dev_priv->display.init_clock_gating =
7124 bxt_init_clock_gating;
7125 dev_priv->display.update_wm = skl_update_wm;
7126 dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
7127 } else if (HAS_PCH_SPLIT(dev)) {
7128 ilk_setup_wm_latency(dev);
7129
7130 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
7131 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
7132 (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
7133 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
7134 dev_priv->display.update_wm = ilk_update_wm;
7135 dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
7136 } else {
7137 DRM_DEBUG_KMS("Failed to read display plane latency. "
7138 "Disable CxSR\n");
7139 }
7140
7141 if (IS_GEN5(dev))
7142 dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
7143 else if (IS_GEN6(dev))
7144 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
7145 else if (IS_IVYBRIDGE(dev))
7146 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
7147 else if (IS_HASWELL(dev))
7148 dev_priv->display.init_clock_gating = haswell_init_clock_gating;
7149 else if (INTEL_INFO(dev)->gen == 8)
7150 dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
7151 } else if (IS_CHERRYVIEW(dev)) {
7152 vlv_setup_wm_latency(dev);
7153
7154 dev_priv->display.update_wm = vlv_update_wm;
7155 dev_priv->display.init_clock_gating =
7156 cherryview_init_clock_gating;
7157 } else if (IS_VALLEYVIEW(dev)) {
7158 vlv_setup_wm_latency(dev);
7159
7160 dev_priv->display.update_wm = vlv_update_wm;
7161 dev_priv->display.init_clock_gating =
7162 valleyview_init_clock_gating;
7163 } else if (IS_PINEVIEW(dev)) {
7164 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
7165 dev_priv->is_ddr3,
7166 dev_priv->fsb_freq,
7167 dev_priv->mem_freq)) {
7168 DRM_INFO("failed to find known CxSR latency "
7169 "(found ddr%s fsb freq %d, mem freq %d), "
7170 "disabling CxSR\n",
7171 (dev_priv->is_ddr3 == 1) ? "3" : "2",
7172 dev_priv->fsb_freq, dev_priv->mem_freq);
7173 /* Disable CxSR and never update its watermark again */
7174 intel_set_memory_cxsr(dev_priv, false);
7175 dev_priv->display.update_wm = NULL;
7176 } else
7177 dev_priv->display.update_wm = pineview_update_wm;
7178 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7179 } else if (IS_G4X(dev)) {
7180 dev_priv->display.update_wm = g4x_update_wm;
7181 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
7182 } else if (IS_GEN4(dev)) {
7183 dev_priv->display.update_wm = i965_update_wm;
7184 if (IS_CRESTLINE(dev))
7185 dev_priv->display.init_clock_gating = crestline_init_clock_gating;
7186 else if (IS_BROADWATER(dev))
7187 dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
7188 } else if (IS_GEN3(dev)) {
7189 dev_priv->display.update_wm = i9xx_update_wm;
7190 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
7191 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
7192 } else if (IS_GEN2(dev)) {
7193 if (INTEL_INFO(dev)->num_pipes == 1) {
7194 dev_priv->display.update_wm = i845_update_wm;
7195 dev_priv->display.get_fifo_size = i845_get_fifo_size;
7196 } else {
7197 dev_priv->display.update_wm = i9xx_update_wm;
7198 dev_priv->display.get_fifo_size = i830_get_fifo_size;
7199 }
7200
7201 if (IS_I85X(dev) || IS_I865G(dev))
7202 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
7203 else
7204 dev_priv->display.init_clock_gating = i830_init_clock_gating;
7205 } else {
7206 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
7207 }
7208 }
7209
7210 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
7211 {
7212 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7213
7214 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7215 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
7216 return -EAGAIN;
7217 }
7218
7219 I915_WRITE(GEN6_PCODE_DATA, *val);
7220 I915_WRITE(GEN6_PCODE_DATA1, 0);
7221 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7222
7223 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7224 500)) {
7225 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
7226 return -ETIMEDOUT;
7227 }
7228
7229 *val = I915_READ(GEN6_PCODE_DATA);
7230 I915_WRITE(GEN6_PCODE_DATA, 0);
7231
7232 return 0;
7233 }
7234
7235 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
7236 {
7237 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
7238
7239 if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
7240 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
7241 return -EAGAIN;
7242 }
7243
7244 I915_WRITE(GEN6_PCODE_DATA, val);
7245 I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
7246
7247 if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
7248 500)) {
7249 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
7250 return -ETIMEDOUT;
7251 }
7252
7253 I915_WRITE(GEN6_PCODE_DATA, 0);
7254
7255 return 0;
7256 }
7257
7258 static int vlv_gpu_freq_div(unsigned int czclk_freq)
7259 {
7260 switch (czclk_freq) {
7261 case 200:
7262 return 10;
7263 case 267:
7264 return 12;
7265 case 320:
7266 case 333:
7267 return 16;
7268 case 400:
7269 return 20;
7270 default:
7271 return -1;
7272 }
7273 }
7274
7275 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
7276 {
7277 int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7278
7279 div = vlv_gpu_freq_div(czclk_freq);
7280 if (div < 0)
7281 return div;
7282
7283 return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div);
7284 }
7285
7286 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
7287 {
7288 int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7289
7290 mul = vlv_gpu_freq_div(czclk_freq);
7291 if (mul < 0)
7292 return mul;
7293
7294 return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6;
7295 }
7296
7297 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
7298 {
7299 int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7300
7301 div = vlv_gpu_freq_div(czclk_freq) / 2;
7302 if (div < 0)
7303 return div;
7304
7305 return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
7306 }
7307
7308 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
7309 {
7310 int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
7311
7312 mul = vlv_gpu_freq_div(czclk_freq) / 2;
7313 if (mul < 0)
7314 return mul;
7315
7316 /* CHV needs even values */
7317 return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
7318 }
7319
7320 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
7321 {
7322 if (IS_GEN9(dev_priv->dev))
7323 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
7324 GEN9_FREQ_SCALER);
7325 else if (IS_CHERRYVIEW(dev_priv->dev))
7326 return chv_gpu_freq(dev_priv, val);
7327 else if (IS_VALLEYVIEW(dev_priv->dev))
7328 return byt_gpu_freq(dev_priv, val);
7329 else
7330 return val * GT_FREQUENCY_MULTIPLIER;
7331 }
7332
7333 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
7334 {
7335 if (IS_GEN9(dev_priv->dev))
7336 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
7337 GT_FREQUENCY_MULTIPLIER);
7338 else if (IS_CHERRYVIEW(dev_priv->dev))
7339 return chv_freq_opcode(dev_priv, val);
7340 else if (IS_VALLEYVIEW(dev_priv->dev))
7341 return byt_freq_opcode(dev_priv, val);
7342 else
7343 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
7344 }
7345
7346 struct request_boost {
7347 struct work_struct work;
7348 struct drm_i915_gem_request *req;
7349 };
7350
7351 static void __intel_rps_boost_work(struct work_struct *work)
7352 {
7353 struct request_boost *boost = container_of(work, struct request_boost, work);
7354 struct drm_i915_gem_request *req = boost->req;
7355
7356 if (!i915_gem_request_completed(req, true))
7357 gen6_rps_boost(to_i915(req->ring->dev), NULL,
7358 req->emitted_jiffies);
7359
7360 i915_gem_request_unreference__unlocked(req);
7361 kfree(boost);
7362 }
7363
7364 void intel_queue_rps_boost_for_request(struct drm_device *dev,
7365 struct drm_i915_gem_request *req)
7366 {
7367 struct request_boost *boost;
7368
7369 if (req == NULL || INTEL_INFO(dev)->gen < 6)
7370 return;
7371
7372 if (i915_gem_request_completed(req, true))
7373 return;
7374
7375 boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
7376 if (boost == NULL)
7377 return;
7378
7379 i915_gem_request_reference(req);
7380 boost->req = req;
7381
7382 INIT_WORK(&boost->work, __intel_rps_boost_work);
7383 queue_work(to_i915(dev)->wq, &boost->work);
7384 }
7385
7386 void intel_pm_setup(struct drm_device *dev)
7387 {
7388 struct drm_i915_private *dev_priv = dev->dev_private;
7389
7390 #ifdef __NetBSD__
7391 linux_mutex_init(&dev_priv->rps.hw_lock);
7392 #else
7393 mutex_init(&dev_priv->rps.hw_lock);
7394 #endif
7395 spin_lock_init(&dev_priv->rps.client_lock);
7396
7397 INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
7398 intel_gen6_powersave_work);
7399 INIT_LIST_HEAD(&dev_priv->rps.clients);
7400 INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
7401 INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
7402
7403 dev_priv->pm.suspended = false;
7404 }
7405