acpi_cpu_md.c revision 1.71.2.2 1 /* $NetBSD: acpi_cpu_md.c,v 1.71.2.2 2013/11/25 08:23:31 bouyer Exp $ */
2
3 /*-
4 * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen (at) iki.fi>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.71.2.2 2013/11/25 08:23:31 bouyer Exp $");
31
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/cpufreq.h>
35 #include <sys/device.h>
36 #include <sys/kcore.h>
37 #include <sys/sysctl.h>
38 #include <sys/xcall.h>
39
40 #include <x86/cpu.h>
41 #include <x86/cpufunc.h>
42 #include <x86/cputypes.h>
43 #include <x86/cpuvar.h>
44 #include <x86/cpu_msr.h>
45 #include <x86/machdep.h>
46
47 #include <dev/acpi/acpica.h>
48 #include <dev/acpi/acpi_cpu.h>
49
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pcidevs.h>
52
53 #include <machine/acpi_machdep.h>
54
55 /*
56 * Intel IA32_MISC_ENABLE.
57 */
58 #define MSR_MISC_ENABLE_EST __BIT(16)
59 #define MSR_MISC_ENABLE_TURBO __BIT(38)
60
61 /*
62 * AMD C1E.
63 */
64 #define MSR_CMPHALT 0xc0010055
65
66 #define MSR_CMPHALT_SMI __BIT(27)
67 #define MSR_CMPHALT_C1E __BIT(28)
68 #define MSR_CMPHALT_BMSTS __BIT(29)
69
70 /*
71 * AMD families 10h, 11h, 12h, 14h, and 15h.
72 */
73 #define MSR_10H_LIMIT 0xc0010061
74 #define MSR_10H_CONTROL 0xc0010062
75 #define MSR_10H_STATUS 0xc0010063
76 #define MSR_10H_CONFIG 0xc0010064
77
78 /*
79 * AMD family 0Fh.
80 */
81 #define MSR_0FH_CONTROL 0xc0010041
82 #define MSR_0FH_STATUS 0xc0010042
83
84 #define MSR_0FH_STATUS_CFID __BITS( 0, 5)
85 #define MSR_0FH_STATUS_CVID __BITS(32, 36)
86 #define MSR_0FH_STATUS_PENDING __BITS(31, 31)
87
88 #define MSR_0FH_CONTROL_FID __BITS( 0, 5)
89 #define MSR_0FH_CONTROL_VID __BITS( 8, 12)
90 #define MSR_0FH_CONTROL_CHG __BITS(16, 16)
91 #define MSR_0FH_CONTROL_CNT __BITS(32, 51)
92
93 #define ACPI_0FH_STATUS_FID __BITS( 0, 5)
94 #define ACPI_0FH_STATUS_VID __BITS( 6, 10)
95
96 #define ACPI_0FH_CONTROL_FID __BITS( 0, 5)
97 #define ACPI_0FH_CONTROL_VID __BITS( 6, 10)
98 #define ACPI_0FH_CONTROL_VST __BITS(11, 17)
99 #define ACPI_0FH_CONTROL_MVS __BITS(18, 19)
100 #define ACPI_0FH_CONTROL_PLL __BITS(20, 26)
101 #define ACPI_0FH_CONTROL_RVO __BITS(28, 29)
102 #define ACPI_0FH_CONTROL_IRT __BITS(30, 31)
103
104 #define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid))
105
106 static char native_idle_text[16];
107 void (*native_idle)(void) = NULL;
108
109 static int acpicpu_md_quirk_piix4(const struct pci_attach_args *);
110 static void acpicpu_md_pstate_hwf_reset(void *, void *);
111 static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *,
112 uint32_t *);
113 static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *);
114 static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *);
115 static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t,
116 uint32_t, uint32_t);
117 static int acpicpu_md_pstate_sysctl_init(void);
118 static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO);
119 static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO);
120 static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO);
121
122 extern struct acpicpu_softc **acpicpu_sc;
123 static struct sysctllog *acpicpu_log = NULL;
124
125 struct cpu_info *
126 acpicpu_md_match(device_t parent, cfdata_t match, void *aux)
127 {
128 struct cpufeature_attach_args *cfaa = aux;
129
130 if (strcmp(cfaa->name, "frequency") != 0)
131 return NULL;
132
133 return cfaa->ci;
134 }
135
136 struct cpu_info *
137 acpicpu_md_attach(device_t parent, device_t self, void *aux)
138 {
139 struct cpufeature_attach_args *cfaa = aux;
140
141 return cfaa->ci;
142 }
143
144 uint32_t
145 acpicpu_md_flags(void)
146 {
147 struct cpu_info *ci = curcpu();
148 struct pci_attach_args pa;
149 uint32_t family, val = 0;
150 uint32_t regs[4];
151 uint64_t msr;
152
153 if (acpi_md_ncpus() == 1)
154 val |= ACPICPU_FLAG_C_BM;
155
156 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
157 val |= ACPICPU_FLAG_C_FFH;
158
159 /*
160 * By default, assume that the local APIC timer
161 * as well as TSC are stalled during C3 sleep.
162 */
163 val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC;
164
165 switch (cpu_vendor) {
166
167 case CPUVENDOR_IDT:
168
169 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
170 val |= ACPICPU_FLAG_P_FFH;
171
172 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
173 val |= ACPICPU_FLAG_T_FFH;
174
175 break;
176
177 case CPUVENDOR_INTEL:
178
179 /*
180 * Bus master control and arbitration should be
181 * available on all supported Intel CPUs (to be
182 * sure, this is double-checked later from the
183 * firmware data). These flags imply that it is
184 * not necessary to flush caches before C3 state.
185 */
186 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB;
187
188 /*
189 * Check if we can use "native", MSR-based,
190 * access. If not, we have to resort to I/O.
191 */
192 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
193 val |= ACPICPU_FLAG_P_FFH;
194
195 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
196 val |= ACPICPU_FLAG_T_FFH;
197
198 /*
199 * Check whether MSR_APERF, MSR_MPERF, and Turbo
200 * Boost are available. Also see if we might have
201 * an invariant local APIC timer ("ARAT").
202 */
203 if (cpuid_level >= 0x06) {
204
205 x86_cpuid(0x00000006, regs);
206
207 if ((regs[2] & CPUID_DSPM_HWF) != 0)
208 val |= ACPICPU_FLAG_P_HWF;
209
210 if ((regs[0] & CPUID_DSPM_IDA) != 0)
211 val |= ACPICPU_FLAG_P_TURBO;
212
213 if ((regs[0] & CPUID_DSPM_ARAT) != 0)
214 val &= ~ACPICPU_FLAG_C_APIC;
215 }
216
217 /*
218 * Detect whether TSC is invariant. If it is not,
219 * we keep the flag to note that TSC will not run
220 * at constant rate. Depending on the CPU, this may
221 * affect P- and T-state changes, but especially
222 * relevant are C-states; with variant TSC, states
223 * larger than C1 may completely stop the counter.
224 */
225 x86_cpuid(0x80000000, regs);
226
227 if (regs[0] >= 0x80000007) {
228
229 x86_cpuid(0x80000007, regs);
230
231 if ((regs[3] & __BIT(8)) != 0)
232 val &= ~ACPICPU_FLAG_C_TSC;
233 }
234
235 break;
236
237 case CPUVENDOR_AMD:
238
239 x86_cpuid(0x80000000, regs);
240
241 if (regs[0] < 0x80000007)
242 break;
243
244 x86_cpuid(0x80000007, regs);
245
246 family = CPUID2FAMILY(ci->ci_signature);
247
248 if (family == 0xf)
249 family += CPUID2EXTFAMILY(ci->ci_signature);
250
251 switch (family) {
252
253 case 0x0f:
254
255 /*
256 * Disable C1E if present.
257 */
258 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT)
259 val |= ACPICPU_FLAG_C_C1E;
260
261 /*
262 * Evaluate support for the "FID/VID
263 * algorithm" also used by powernow(4).
264 */
265 if ((regs[3] & CPUID_APM_FID) == 0)
266 break;
267
268 if ((regs[3] & CPUID_APM_VID) == 0)
269 break;
270
271 val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID;
272 break;
273
274 case 0x10:
275 case 0x11:
276
277 /*
278 * Disable C1E if present.
279 */
280 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT)
281 val |= ACPICPU_FLAG_C_C1E;
282
283 /* FALLTHROUGH */
284
285 case 0x12:
286 case 0x14: /* AMD Fusion */
287 case 0x15: /* AMD Bulldozer */
288
289 /*
290 * Like with Intel, detect invariant TSC,
291 * MSR-based P-states, and AMD's "turbo"
292 * (Core Performance Boost), respectively.
293 */
294 if ((regs[3] & CPUID_APM_TSC) != 0)
295 val &= ~ACPICPU_FLAG_C_TSC;
296
297 if ((regs[3] & CPUID_APM_HWP) != 0)
298 val |= ACPICPU_FLAG_P_FFH;
299
300 if ((regs[3] & CPUID_APM_CPB) != 0)
301 val |= ACPICPU_FLAG_P_TURBO;
302
303 /*
304 * Also check for APERF and MPERF,
305 * first available in the family 10h.
306 */
307 if (cpuid_level >= 0x06) {
308
309 x86_cpuid(0x00000006, regs);
310
311 if ((regs[2] & CPUID_DSPM_HWF) != 0)
312 val |= ACPICPU_FLAG_P_HWF;
313 }
314
315 break;
316 }
317
318 break;
319 }
320
321 /*
322 * There are several erratums for PIIX4.
323 */
324 if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0)
325 val |= ACPICPU_FLAG_PIIX4;
326
327 return val;
328 }
329
330 static int
331 acpicpu_md_quirk_piix4(const struct pci_attach_args *pa)
332 {
333
334 /*
335 * XXX: The pci_find_device(9) function only
336 * deals with attached devices. Change this
337 * to use something like pci_device_foreach().
338 */
339 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL)
340 return 0;
341
342 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA ||
343 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC)
344 return 1;
345
346 return 0;
347 }
348
349 void
350 acpicpu_md_quirk_c1e(void)
351 {
352 const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E;
353 uint64_t val;
354
355 val = rdmsr(MSR_CMPHALT);
356
357 if ((val & c1e) != 0)
358 wrmsr(MSR_CMPHALT, val & ~c1e);
359 }
360
361 int
362 acpicpu_md_cstate_start(struct acpicpu_softc *sc)
363 {
364 const size_t size = sizeof(native_idle_text);
365 struct acpicpu_cstate *cs;
366 bool ipi = false;
367 int i;
368
369 /*
370 * Save the cpu_idle(9) loop used by default.
371 */
372 x86_cpu_idle_get(&native_idle, native_idle_text, size);
373
374 for (i = 0; i < ACPI_C_STATE_COUNT; i++) {
375
376 cs = &sc->sc_cstate[i];
377
378 if (cs->cs_method == ACPICPU_C_STATE_HALT) {
379 ipi = true;
380 break;
381 }
382 }
383
384 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi);
385
386 return 0;
387 }
388
389 int
390 acpicpu_md_cstate_stop(void)
391 {
392 static char text[16];
393 void (*func)(void);
394 uint64_t xc;
395 bool ipi;
396
397 x86_cpu_idle_get(&func, text, sizeof(text));
398
399 if (func == native_idle)
400 return EALREADY;
401
402 ipi = (native_idle != x86_cpu_idle_halt) ? false : true;
403 x86_cpu_idle_set(native_idle, native_idle_text, ipi);
404
405 /*
406 * Run a cross-call to ensure that all CPUs are
407 * out from the ACPI idle-loop before detachment.
408 */
409 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
410 xc_wait(xc);
411
412 return 0;
413 }
414
415 /*
416 * Called with interrupts enabled.
417 */
418 void
419 acpicpu_md_cstate_enter(int method, int state)
420 {
421 struct cpu_info *ci = curcpu();
422
423 KASSERT(ci->ci_ilevel == IPL_NONE);
424
425 switch (method) {
426
427 case ACPICPU_C_STATE_FFH:
428
429 x86_monitor(&ci->ci_want_resched, 0, 0);
430
431 if (__predict_false(ci->ci_want_resched != 0))
432 return;
433
434 x86_mwait((state - 1) << 4, 0);
435 break;
436
437 case ACPICPU_C_STATE_HALT:
438
439 x86_disable_intr();
440
441 if (__predict_false(ci->ci_want_resched != 0)) {
442 x86_enable_intr();
443 return;
444 }
445
446 x86_stihlt();
447 break;
448 }
449 }
450
451 int
452 acpicpu_md_pstate_start(struct acpicpu_softc *sc)
453 {
454 uint64_t xc, val;
455
456 switch (cpu_vendor) {
457
458 case CPUVENDOR_IDT:
459 case CPUVENDOR_INTEL:
460
461 /*
462 * Make sure EST is enabled.
463 */
464 if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) {
465
466 val = rdmsr(MSR_MISC_ENABLE);
467
468 if ((val & MSR_MISC_ENABLE_EST) == 0) {
469
470 val |= MSR_MISC_ENABLE_EST;
471 wrmsr(MSR_MISC_ENABLE, val);
472 val = rdmsr(MSR_MISC_ENABLE);
473
474 if ((val & MSR_MISC_ENABLE_EST) == 0)
475 return ENOTTY;
476 }
477 }
478 }
479
480 /*
481 * Reset the APERF and MPERF counters.
482 */
483 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) {
484 xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL);
485 xc_wait(xc);
486 }
487
488 return acpicpu_md_pstate_sysctl_init();
489 }
490
491 int
492 acpicpu_md_pstate_stop(void)
493 {
494
495 if (acpicpu_log == NULL)
496 return EALREADY;
497
498 sysctl_teardown(&acpicpu_log);
499 acpicpu_log = NULL;
500
501 return 0;
502 }
503
504 int
505 acpicpu_md_pstate_init(struct acpicpu_softc *sc)
506 {
507 struct cpu_info *ci = sc->sc_ci;
508 struct acpicpu_pstate *ps, msr;
509 uint32_t family, i = 0;
510
511 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate));
512
513 switch (cpu_vendor) {
514
515 case CPUVENDOR_IDT:
516 case CPUVENDOR_INTEL:
517
518 /*
519 * If the so-called Turbo Boost is present,
520 * the P0-state is always the "turbo state".
521 * It is shown as the P1 frequency + 1 MHz.
522 *
523 * For discussion, see:
524 *
525 * Intel Corporation: Intel Turbo Boost Technology
526 * in Intel Core(tm) Microarchitectures (Nehalem)
527 * Based Processors. White Paper, November 2008.
528 */
529 if (sc->sc_pstate_count >= 2 &&
530 (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) {
531
532 ps = &sc->sc_pstate[0];
533
534 if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1)
535 ps->ps_flags |= ACPICPU_FLAG_P_TURBO;
536 }
537
538 msr.ps_control_addr = MSR_PERF_CTL;
539 msr.ps_control_mask = __BITS(0, 15);
540
541 msr.ps_status_addr = MSR_PERF_STATUS;
542 msr.ps_status_mask = __BITS(0, 15);
543 break;
544
545 case CPUVENDOR_AMD:
546
547 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
548 msr.ps_flags |= ACPICPU_FLAG_P_FIDVID;
549
550 family = CPUID2FAMILY(ci->ci_signature);
551
552 if (family == 0xf)
553 family += CPUID2EXTFAMILY(ci->ci_signature);
554
555 switch (family) {
556
557 case 0x0f:
558 msr.ps_control_addr = MSR_0FH_CONTROL;
559 msr.ps_status_addr = MSR_0FH_STATUS;
560 break;
561
562 case 0x10:
563 case 0x11:
564 case 0x12:
565 case 0x14:
566 case 0x15:
567 msr.ps_control_addr = MSR_10H_CONTROL;
568 msr.ps_control_mask = __BITS(0, 2);
569
570 msr.ps_status_addr = MSR_10H_STATUS;
571 msr.ps_status_mask = __BITS(0, 2);
572 break;
573
574 default:
575 /*
576 * If we have an unknown AMD CPU, rely on XPSS.
577 */
578 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0)
579 return EOPNOTSUPP;
580 }
581
582 break;
583
584 default:
585 return ENODEV;
586 }
587
588 /*
589 * Fill the P-state structures with MSR addresses that are
590 * known to be correct. If we do not know the addresses,
591 * leave the values intact. If a vendor uses XPSS, we do
592 * not necessarily need to do anything to support new CPUs.
593 */
594 while (i < sc->sc_pstate_count) {
595
596 ps = &sc->sc_pstate[i];
597
598 if (msr.ps_flags != 0)
599 ps->ps_flags |= msr.ps_flags;
600
601 if (msr.ps_status_addr != 0)
602 ps->ps_status_addr = msr.ps_status_addr;
603
604 if (msr.ps_status_mask != 0)
605 ps->ps_status_mask = msr.ps_status_mask;
606
607 if (msr.ps_control_addr != 0)
608 ps->ps_control_addr = msr.ps_control_addr;
609
610 if (msr.ps_control_mask != 0)
611 ps->ps_control_mask = msr.ps_control_mask;
612
613 i++;
614 }
615
616 return 0;
617 }
618
619 /*
620 * Read the IA32_APERF and IA32_MPERF counters. The first
621 * increments at the rate of the fixed maximum frequency
622 * configured during the boot, whereas APERF counts at the
623 * rate of the actual frequency. Note that the MSRs must be
624 * read without delay, and that only the ratio between
625 * IA32_APERF and IA32_MPERF is architecturally defined.
626 *
627 * The function thus returns the percentage of the actual
628 * frequency in terms of the maximum frequency of the calling
629 * CPU since the last call. A value zero implies an error.
630 *
631 * For further details, refer to:
632 *
633 * Intel Corporation: Intel 64 and IA-32 Architectures
634 * Software Developer's Manual. Section 13.2, Volume 3A:
635 * System Programming Guide, Part 1. July, 2008.
636 *
637 * Advanced Micro Devices: BIOS and Kernel Developer's
638 * Guide (BKDG) for AMD Family 10h Processors. Section
639 * 2.4.5, Revision 3.48, April 2010.
640 */
641 uint8_t
642 acpicpu_md_pstate_hwf(struct cpu_info *ci)
643 {
644 struct acpicpu_softc *sc;
645 uint64_t aperf, mperf;
646 uint8_t rv = 0;
647
648 sc = acpicpu_sc[ci->ci_acpiid];
649
650 if (__predict_false(sc == NULL))
651 return 0;
652
653 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0))
654 return 0;
655
656 aperf = sc->sc_pstate_aperf;
657 mperf = sc->sc_pstate_mperf;
658
659 x86_disable_intr();
660
661 sc->sc_pstate_aperf = rdmsr(MSR_APERF);
662 sc->sc_pstate_mperf = rdmsr(MSR_MPERF);
663
664 x86_enable_intr();
665
666 aperf = sc->sc_pstate_aperf - aperf;
667 mperf = sc->sc_pstate_mperf - mperf;
668
669 if (__predict_true(mperf != 0))
670 rv = (aperf * 100) / mperf;
671
672 return rv;
673 }
674
675 static void
676 acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2)
677 {
678 struct cpu_info *ci = curcpu();
679 struct acpicpu_softc *sc;
680
681 sc = acpicpu_sc[ci->ci_acpiid];
682
683 if (__predict_false(sc == NULL))
684 return;
685
686 x86_disable_intr();
687
688 wrmsr(MSR_APERF, 0);
689 wrmsr(MSR_MPERF, 0);
690
691 x86_enable_intr();
692
693 sc->sc_pstate_aperf = 0;
694 sc->sc_pstate_mperf = 0;
695 }
696
697 int
698 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq)
699 {
700 struct acpicpu_pstate *ps = NULL;
701 uint64_t val;
702 uint32_t i;
703
704 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
705 return acpicpu_md_pstate_fidvid_get(sc, freq);
706
707 /*
708 * Pick any P-state for the status address.
709 */
710 for (i = 0; i < sc->sc_pstate_count; i++) {
711
712 ps = &sc->sc_pstate[i];
713
714 if (__predict_true(ps->ps_freq != 0))
715 break;
716 }
717
718 if (__predict_false(ps == NULL))
719 return ENODEV;
720
721 if (__predict_false(ps->ps_status_addr == 0))
722 return EINVAL;
723
724 val = rdmsr(ps->ps_status_addr);
725
726 if (__predict_true(ps->ps_status_mask != 0))
727 val = val & ps->ps_status_mask;
728
729 /*
730 * Search for the value from known P-states.
731 */
732 for (i = 0; i < sc->sc_pstate_count; i++) {
733
734 ps = &sc->sc_pstate[i];
735
736 if (__predict_false(ps->ps_freq == 0))
737 continue;
738
739 if (val == ps->ps_status) {
740 *freq = ps->ps_freq;
741 return 0;
742 }
743 }
744
745 /*
746 * If the value was not found, try APERF/MPERF.
747 * The state is P0 if the return value is 100 %.
748 */
749 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) {
750
751 KASSERT(sc->sc_pstate_count > 0);
752 KASSERT(sc->sc_pstate[0].ps_freq != 0);
753
754 if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) {
755 *freq = sc->sc_pstate[0].ps_freq;
756 return 0;
757 }
758 }
759
760 return EIO;
761 }
762
763 int
764 acpicpu_md_pstate_set(struct acpicpu_pstate *ps)
765 {
766 uint64_t val = 0;
767
768 if (__predict_false(ps->ps_control_addr == 0))
769 return EINVAL;
770
771 if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0)
772 return acpicpu_md_pstate_fidvid_set(ps);
773
774 /*
775 * If the mask is set, do a read-modify-write.
776 */
777 if (__predict_true(ps->ps_control_mask != 0)) {
778 val = rdmsr(ps->ps_control_addr);
779 val &= ~ps->ps_control_mask;
780 }
781
782 val |= ps->ps_control;
783
784 wrmsr(ps->ps_control_addr, val);
785 DELAY(ps->ps_latency);
786
787 return 0;
788 }
789
790 static int
791 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq)
792 {
793 struct acpicpu_pstate *ps;
794 uint32_t fid, i, vid;
795 uint32_t cfid, cvid;
796 int rv;
797
798 /*
799 * AMD family 0Fh needs special treatment.
800 * While it wants to use ACPI, it does not
801 * comply with the ACPI specifications.
802 */
803 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
804
805 if (rv != 0)
806 return rv;
807
808 for (i = 0; i < sc->sc_pstate_count; i++) {
809
810 ps = &sc->sc_pstate[i];
811
812 if (__predict_false(ps->ps_freq == 0))
813 continue;
814
815 fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID);
816 vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID);
817
818 if (cfid == fid && cvid == vid) {
819 *freq = ps->ps_freq;
820 return 0;
821 }
822 }
823
824 return EIO;
825 }
826
827 static int
828 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps)
829 {
830 const uint64_t ctrl = ps->ps_control;
831 uint32_t cfid, cvid, fid, i, irt;
832 uint32_t pll, vco_cfid, vco_fid;
833 uint32_t val, vid, vst;
834 int rv;
835
836 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
837
838 if (rv != 0)
839 return rv;
840
841 fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID);
842 vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID);
843 irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT);
844 vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST);
845 pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL);
846
847 vst = vst * 20;
848 pll = pll * 1000 / 5;
849 irt = 10 * __BIT(irt);
850
851 /*
852 * Phase 1.
853 */
854 while (cvid > vid) {
855
856 val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS);
857 val = (val > cvid) ? 0 : cvid - val;
858
859 acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst);
860 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
861
862 if (rv != 0)
863 return rv;
864 }
865
866 i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO);
867
868 for (; i > 0 && cvid > 0; --i) {
869
870 acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst);
871 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
872
873 if (rv != 0)
874 return rv;
875 }
876
877 /*
878 * Phase 2.
879 */
880 if (cfid != fid) {
881
882 vco_fid = FID_TO_VCO_FID(fid);
883 vco_cfid = FID_TO_VCO_FID(cfid);
884
885 while (abs(vco_fid - vco_cfid) > 2) {
886
887 if (fid <= cfid)
888 val = cfid - 2;
889 else {
890 val = (cfid > 6) ? cfid + 2 :
891 FID_TO_VCO_FID(cfid) + 2;
892 }
893
894 acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt);
895 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
896
897 if (rv != 0)
898 return rv;
899
900 vco_cfid = FID_TO_VCO_FID(cfid);
901 }
902
903 acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt);
904 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
905
906 if (rv != 0)
907 return rv;
908 }
909
910 /*
911 * Phase 3.
912 */
913 if (cvid != vid) {
914
915 acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst);
916 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
917
918 if (rv != 0)
919 return rv;
920 }
921
922 return 0;
923 }
924
925 static int
926 acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid)
927 {
928 int i = ACPICPU_P_STATE_RETRY * 100;
929 uint64_t val;
930
931 do {
932 val = rdmsr(MSR_0FH_STATUS);
933
934 } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0);
935
936 if (i == 0)
937 return EAGAIN;
938
939 if (cfid != NULL)
940 *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID);
941
942 if (cvid != NULL)
943 *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID);
944
945 return 0;
946 }
947
948 static void
949 acpicpu_md_pstate_fidvid_write(uint32_t fid,
950 uint32_t vid, uint32_t cnt, uint32_t tmo)
951 {
952 uint64_t val = 0;
953
954 val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID);
955 val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID);
956 val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT);
957 val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG);
958
959 wrmsr(MSR_0FH_CONTROL, val);
960 DELAY(tmo);
961 }
962
963 int
964 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent)
965 {
966 struct acpicpu_tstate *ts;
967 uint64_t val;
968 uint32_t i;
969
970 val = rdmsr(MSR_THERM_CONTROL);
971
972 for (i = 0; i < sc->sc_tstate_count; i++) {
973
974 ts = &sc->sc_tstate[i];
975
976 if (ts->ts_percent == 0)
977 continue;
978
979 if (val == ts->ts_status) {
980 *percent = ts->ts_percent;
981 return 0;
982 }
983 }
984
985 return EIO;
986 }
987
988 int
989 acpicpu_md_tstate_set(struct acpicpu_tstate *ts)
990 {
991 uint64_t val;
992 uint8_t i;
993
994 val = ts->ts_control;
995 val = val & __BITS(0, 4);
996
997 wrmsr(MSR_THERM_CONTROL, val);
998
999 if (ts->ts_status == 0) {
1000 DELAY(ts->ts_latency);
1001 return 0;
1002 }
1003
1004 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) {
1005
1006 val = rdmsr(MSR_THERM_CONTROL);
1007
1008 if (val == ts->ts_status)
1009 return 0;
1010
1011 DELAY(ts->ts_latency);
1012 }
1013
1014 return EAGAIN;
1015 }
1016
1017 /*
1018 * A kludge for backwards compatibility.
1019 */
1020 static int
1021 acpicpu_md_pstate_sysctl_init(void)
1022 {
1023 const struct sysctlnode *fnode, *mnode, *rnode;
1024 const char *str;
1025 int rv;
1026
1027 switch (cpu_vendor) {
1028
1029 case CPUVENDOR_IDT:
1030 case CPUVENDOR_INTEL:
1031 str = "est";
1032 break;
1033
1034 case CPUVENDOR_AMD:
1035 str = "powernow";
1036 break;
1037
1038 default:
1039 return ENODEV;
1040 }
1041
1042
1043 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode,
1044 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
1045 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
1046
1047 if (rv != 0)
1048 goto fail;
1049
1050 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode,
1051 0, CTLTYPE_NODE, str, NULL,
1052 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1053
1054 if (rv != 0)
1055 goto fail;
1056
1057 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode,
1058 0, CTLTYPE_NODE, "frequency", NULL,
1059 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1060
1061 if (rv != 0)
1062 goto fail;
1063
1064 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1065 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL,
1066 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1067
1068 if (rv != 0)
1069 goto fail;
1070
1071 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1072 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL,
1073 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1074
1075 if (rv != 0)
1076 goto fail;
1077
1078 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1079 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL,
1080 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1081
1082 if (rv != 0)
1083 goto fail;
1084
1085 return 0;
1086
1087 fail:
1088 if (acpicpu_log != NULL) {
1089 sysctl_teardown(&acpicpu_log);
1090 acpicpu_log = NULL;
1091 }
1092
1093 return rv;
1094 }
1095
1096 static int
1097 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS)
1098 {
1099 struct sysctlnode node;
1100 uint32_t freq;
1101 int err;
1102
1103 freq = cpufreq_get(curcpu());
1104
1105 if (freq == 0)
1106 return ENXIO;
1107
1108 node = *rnode;
1109 node.sysctl_data = &freq;
1110
1111 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1112
1113 if (err != 0 || newp == NULL)
1114 return err;
1115
1116 return 0;
1117 }
1118
1119 static int
1120 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS)
1121 {
1122 struct sysctlnode node;
1123 uint32_t freq;
1124 int err;
1125
1126 freq = cpufreq_get(curcpu());
1127
1128 if (freq == 0)
1129 return ENXIO;
1130
1131 node = *rnode;
1132 node.sysctl_data = &freq;
1133
1134 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1135
1136 if (err != 0 || newp == NULL)
1137 return err;
1138
1139 cpufreq_set_all(freq);
1140
1141 return 0;
1142 }
1143
1144 static int
1145 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS)
1146 {
1147 struct cpu_info *ci = curcpu();
1148 struct acpicpu_softc *sc;
1149 struct sysctlnode node;
1150 char buf[1024];
1151 size_t len;
1152 uint32_t i;
1153 int err;
1154
1155 sc = acpicpu_sc[ci->ci_acpiid];
1156
1157 if (sc == NULL)
1158 return ENXIO;
1159
1160 (void)memset(&buf, 0, sizeof(buf));
1161
1162 mutex_enter(&sc->sc_mtx);
1163
1164 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) {
1165
1166 if (sc->sc_pstate[i].ps_freq == 0)
1167 continue;
1168
1169 len += snprintf(buf + len, sizeof(buf) - len, "%u%s",
1170 sc->sc_pstate[i].ps_freq,
1171 i < (sc->sc_pstate_count - 1) ? " " : "");
1172 }
1173
1174 mutex_exit(&sc->sc_mtx);
1175
1176 node = *rnode;
1177 node.sysctl_data = buf;
1178
1179 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1180
1181 if (err != 0 || newp == NULL)
1182 return err;
1183
1184 return 0;
1185 }
1186
1187