acpi_cpu_md.c revision 1.78 1 /* $NetBSD: acpi_cpu_md.c,v 1.78 2016/12/08 11:31:12 nat Exp $ */
2
3 /*-
4 * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen (at) iki.fi>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.78 2016/12/08 11:31:12 nat Exp $");
31
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/cpufreq.h>
35 #include <sys/device.h>
36 #include <sys/kcore.h>
37 #include <sys/sysctl.h>
38 #include <sys/xcall.h>
39
40 #include <x86/cpu.h>
41 #include <x86/cpufunc.h>
42 #include <x86/cputypes.h>
43 #include <x86/cpuvar.h>
44 #include <x86/cpu_msr.h>
45 #include <x86/machdep.h>
46 #include <x86/x86/tsc.h>
47
48 #include <dev/acpi/acpica.h>
49 #include <dev/acpi/acpi_cpu.h>
50
51 #include <dev/pci/pcivar.h>
52 #include <dev/pci/pcidevs.h>
53
54 #include <machine/acpi_machdep.h>
55
56 /*
57 * Intel IA32_MISC_ENABLE.
58 */
59 #define MSR_MISC_ENABLE_EST __BIT(16)
60 #define MSR_MISC_ENABLE_TURBO __BIT(38)
61
62 /*
63 * AMD C1E.
64 */
65 #define MSR_CMPHALT 0xc0010055
66
67 #define MSR_CMPHALT_SMI __BIT(27)
68 #define MSR_CMPHALT_C1E __BIT(28)
69 #define MSR_CMPHALT_BMSTS __BIT(29)
70
71 /*
72 * AMD families 10h, 11h, 12h, 14h, and 15h.
73 */
74 #define MSR_10H_LIMIT 0xc0010061
75 #define MSR_10H_CONTROL 0xc0010062
76 #define MSR_10H_STATUS 0xc0010063
77 #define MSR_10H_CONFIG 0xc0010064
78
79 /*
80 * AMD family 0Fh.
81 */
82 #define MSR_0FH_CONTROL 0xc0010041
83 #define MSR_0FH_STATUS 0xc0010042
84
85 #define MSR_0FH_STATUS_CFID __BITS( 0, 5)
86 #define MSR_0FH_STATUS_CVID __BITS(32, 36)
87 #define MSR_0FH_STATUS_PENDING __BITS(31, 31)
88
89 #define MSR_0FH_CONTROL_FID __BITS( 0, 5)
90 #define MSR_0FH_CONTROL_VID __BITS( 8, 12)
91 #define MSR_0FH_CONTROL_CHG __BITS(16, 16)
92 #define MSR_0FH_CONTROL_CNT __BITS(32, 51)
93
94 #define ACPI_0FH_STATUS_FID __BITS( 0, 5)
95 #define ACPI_0FH_STATUS_VID __BITS( 6, 10)
96
97 #define ACPI_0FH_CONTROL_FID __BITS( 0, 5)
98 #define ACPI_0FH_CONTROL_VID __BITS( 6, 10)
99 #define ACPI_0FH_CONTROL_VST __BITS(11, 17)
100 #define ACPI_0FH_CONTROL_MVS __BITS(18, 19)
101 #define ACPI_0FH_CONTROL_PLL __BITS(20, 26)
102 #define ACPI_0FH_CONTROL_RVO __BITS(28, 29)
103 #define ACPI_0FH_CONTROL_IRT __BITS(30, 31)
104
105 #define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid))
106
107 static char native_idle_text[16];
108 void (*native_idle)(void) = NULL;
109
110 static int acpicpu_md_quirk_piix4(const struct pci_attach_args *);
111 static void acpicpu_md_pstate_hwf_reset(void *, void *);
112 static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *,
113 uint32_t *);
114 static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *);
115 static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *);
116 static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t,
117 uint32_t, uint32_t);
118 static int acpicpu_md_pstate_sysctl_init(void);
119 static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO);
120 static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO);
121 static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO);
122
123 extern struct acpicpu_softc **acpicpu_sc;
124 static struct sysctllog *acpicpu_log = NULL;
125
126 struct cpu_info *
127 acpicpu_md_match(device_t parent, cfdata_t match, void *aux)
128 {
129 struct cpufeature_attach_args *cfaa = aux;
130
131 if (strcmp(cfaa->name, "frequency") != 0)
132 return NULL;
133
134 return cfaa->ci;
135 }
136
137 struct cpu_info *
138 acpicpu_md_attach(device_t parent, device_t self, void *aux)
139 {
140 struct cpufeature_attach_args *cfaa = aux;
141
142 return cfaa->ci;
143 }
144
145 uint32_t
146 acpicpu_md_flags(void)
147 {
148 struct cpu_info *ci = curcpu();
149 struct pci_attach_args pa;
150 uint32_t family, val = 0;
151 uint32_t regs[4];
152 uint64_t msr;
153
154 if (acpi_md_ncpus() == 1)
155 val |= ACPICPU_FLAG_C_BM;
156
157 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
158 val |= ACPICPU_FLAG_C_FFH;
159
160 /*
161 * By default, assume that the local APIC timer
162 * as well as TSC are stalled during C3 sleep.
163 */
164 val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC;
165
166 /*
167 * Detect whether TSC is invariant. If it is not, we keep the flag to
168 * note that TSC will not run at constant rate. Depending on the CPU,
169 * this may affect P- and T-state changes, but especially relevant
170 * are C-states; with variant TSC, states larger than C1 may
171 * completely stop the counter.
172 */
173 if (tsc_is_invariant())
174 val &= ~ACPICPU_FLAG_C_TSC;
175
176 switch (cpu_vendor) {
177
178 case CPUVENDOR_IDT:
179
180 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
181 val |= ACPICPU_FLAG_P_FFH;
182
183 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
184 val |= ACPICPU_FLAG_T_FFH;
185
186 break;
187
188 case CPUVENDOR_INTEL:
189
190 /*
191 * Bus master control and arbitration should be
192 * available on all supported Intel CPUs (to be
193 * sure, this is double-checked later from the
194 * firmware data). These flags imply that it is
195 * not necessary to flush caches before C3 state.
196 */
197 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB;
198
199 /*
200 * Check if we can use "native", MSR-based,
201 * access. If not, we have to resort to I/O.
202 */
203 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
204 val |= ACPICPU_FLAG_P_FFH;
205
206 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
207 val |= ACPICPU_FLAG_T_FFH;
208
209 /*
210 * Check whether MSR_APERF, MSR_MPERF, and Turbo
211 * Boost are available. Also see if we might have
212 * an invariant local APIC timer ("ARAT").
213 */
214 if (cpuid_level >= 0x06) {
215
216 x86_cpuid(0x00000006, regs);
217
218 if ((regs[2] & CPUID_DSPM_HWF) != 0)
219 val |= ACPICPU_FLAG_P_HWF;
220
221 if ((regs[0] & CPUID_DSPM_IDA) != 0)
222 val |= ACPICPU_FLAG_P_TURBO;
223
224 if ((regs[0] & CPUID_DSPM_ARAT) != 0)
225 val &= ~ACPICPU_FLAG_C_APIC;
226
227 }
228
229 break;
230
231 case CPUVENDOR_AMD:
232
233 x86_cpuid(0x80000000, regs);
234
235 if (regs[0] < 0x80000007)
236 break;
237
238 x86_cpuid(0x80000007, regs);
239
240 family = CPUID_TO_FAMILY(ci->ci_signature);
241
242 switch (family) {
243
244 case 0x0f:
245
246 /*
247 * Disable C1E if present.
248 */
249 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT)
250 val |= ACPICPU_FLAG_C_C1E;
251
252 /*
253 * Evaluate support for the "FID/VID
254 * algorithm" also used by powernow(4).
255 */
256 if ((regs[3] & CPUID_APM_FID) == 0)
257 break;
258
259 if ((regs[3] & CPUID_APM_VID) == 0)
260 break;
261
262 val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID;
263 break;
264
265 case 0x10:
266 case 0x11:
267
268 /*
269 * Disable C1E if present.
270 */
271 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT)
272 val |= ACPICPU_FLAG_C_C1E;
273
274 /* FALLTHROUGH */
275
276 case 0x12:
277 case 0x14: /* AMD Fusion */
278 case 0x15: /* AMD Bulldozer */
279
280 /*
281 * Like with Intel, detect MSR-based P-states,
282 * and AMD's "turbo" (Core Performance Boost),
283 * respectively.
284 */
285 if ((regs[3] & CPUID_APM_HWP) != 0)
286 val |= ACPICPU_FLAG_P_FFH;
287
288 if ((regs[3] & CPUID_APM_CPB) != 0)
289 val |= ACPICPU_FLAG_P_TURBO;
290
291 /*
292 * Also check for APERF and MPERF,
293 * first available in the family 10h.
294 */
295 if (cpuid_level >= 0x06) {
296
297 x86_cpuid(0x00000006, regs);
298
299 if ((regs[2] & CPUID_DSPM_HWF) != 0)
300 val |= ACPICPU_FLAG_P_HWF;
301 }
302
303 break;
304 }
305
306 break;
307 }
308
309 /*
310 * There are several erratums for PIIX4.
311 */
312 if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0)
313 val |= ACPICPU_FLAG_PIIX4;
314
315 return val;
316 }
317
318 static int
319 acpicpu_md_quirk_piix4(const struct pci_attach_args *pa)
320 {
321
322 /*
323 * XXX: The pci_find_device(9) function only
324 * deals with attached devices. Change this
325 * to use something like pci_device_foreach().
326 */
327 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL)
328 return 0;
329
330 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA ||
331 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC)
332 return 1;
333
334 return 0;
335 }
336
337 void
338 acpicpu_md_quirk_c1e(void)
339 {
340 const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E;
341 uint64_t val;
342
343 val = rdmsr(MSR_CMPHALT);
344
345 if ((val & c1e) != 0)
346 wrmsr(MSR_CMPHALT, val & ~c1e);
347 }
348
349 int
350 acpicpu_md_cstate_start(struct acpicpu_softc *sc)
351 {
352 const size_t size = sizeof(native_idle_text);
353 struct acpicpu_cstate *cs;
354 bool ipi = false;
355 int i;
356
357 /*
358 * Save the cpu_idle(9) loop used by default.
359 */
360 x86_cpu_idle_get(&native_idle, native_idle_text, size);
361
362 for (i = 0; i < ACPI_C_STATE_COUNT; i++) {
363
364 cs = &sc->sc_cstate[i];
365
366 if (cs->cs_method == ACPICPU_C_STATE_HALT) {
367 ipi = true;
368 break;
369 }
370 }
371
372 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi);
373
374 return 0;
375 }
376
377 int
378 acpicpu_md_cstate_stop(void)
379 {
380 static char text[16];
381 void (*func)(void);
382 uint64_t xc;
383 bool ipi;
384
385 x86_cpu_idle_get(&func, text, sizeof(text));
386
387 if (func == native_idle)
388 return EALREADY;
389
390 ipi = (native_idle != x86_cpu_idle_halt) ? false : true;
391 x86_cpu_idle_set(native_idle, native_idle_text, ipi);
392
393 /*
394 * Run a cross-call to ensure that all CPUs are
395 * out from the ACPI idle-loop before detachment.
396 */
397 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
398 xc_wait(xc);
399
400 return 0;
401 }
402
403 /*
404 * Called with interrupts enabled.
405 */
406 void
407 acpicpu_md_cstate_enter(int method, int state)
408 {
409 struct cpu_info *ci = curcpu();
410
411 KASSERT(ci->ci_ilevel == IPL_NONE);
412
413 switch (method) {
414
415 case ACPICPU_C_STATE_FFH:
416
417 x86_monitor(&ci->ci_want_resched, 0, 0);
418
419 if (__predict_false(ci->ci_want_resched != 0))
420 return;
421
422 x86_mwait((state - 1) << 4, 0);
423 break;
424
425 case ACPICPU_C_STATE_HALT:
426
427 x86_disable_intr();
428
429 if (__predict_false(ci->ci_want_resched != 0)) {
430 x86_enable_intr();
431 return;
432 }
433
434 x86_stihlt();
435 break;
436 }
437 }
438
439 int
440 acpicpu_md_pstate_start(struct acpicpu_softc *sc)
441 {
442 uint64_t xc, val;
443
444 switch (cpu_vendor) {
445
446 case CPUVENDOR_IDT:
447 case CPUVENDOR_INTEL:
448
449 /*
450 * Make sure EST is enabled.
451 */
452 if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) {
453
454 val = rdmsr(MSR_MISC_ENABLE);
455
456 if ((val & MSR_MISC_ENABLE_EST) == 0) {
457
458 val |= MSR_MISC_ENABLE_EST;
459 wrmsr(MSR_MISC_ENABLE, val);
460 val = rdmsr(MSR_MISC_ENABLE);
461
462 if ((val & MSR_MISC_ENABLE_EST) == 0)
463 return ENOTTY;
464 }
465 }
466 }
467
468 /*
469 * Reset the APERF and MPERF counters.
470 */
471 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) {
472 xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL);
473 xc_wait(xc);
474 }
475
476 return acpicpu_md_pstate_sysctl_init();
477 }
478
479 int
480 acpicpu_md_pstate_stop(void)
481 {
482
483 if (acpicpu_log == NULL)
484 return EALREADY;
485
486 sysctl_teardown(&acpicpu_log);
487 acpicpu_log = NULL;
488
489 return 0;
490 }
491
492 int
493 acpicpu_md_pstate_init(struct acpicpu_softc *sc)
494 {
495 struct cpu_info *ci = sc->sc_ci;
496 struct acpicpu_pstate *ps, msr;
497 uint32_t family, i = 0;
498
499 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate));
500
501 switch (cpu_vendor) {
502
503 case CPUVENDOR_IDT:
504 case CPUVENDOR_INTEL:
505
506 /*
507 * If the so-called Turbo Boost is present,
508 * the P0-state is always the "turbo state".
509 * It is shown as the P1 frequency + 1 MHz.
510 *
511 * For discussion, see:
512 *
513 * Intel Corporation: Intel Turbo Boost Technology
514 * in Intel Core(tm) Microarchitectures (Nehalem)
515 * Based Processors. White Paper, November 2008.
516 */
517 if (sc->sc_pstate_count >= 2 &&
518 (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) {
519
520 ps = &sc->sc_pstate[0];
521
522 if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1)
523 ps->ps_flags |= ACPICPU_FLAG_P_TURBO;
524 }
525
526 msr.ps_control_addr = MSR_PERF_CTL;
527 msr.ps_control_mask = __BITS(0, 15);
528
529 msr.ps_status_addr = MSR_PERF_STATUS;
530 msr.ps_status_mask = __BITS(0, 15);
531 break;
532
533 case CPUVENDOR_AMD:
534
535 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
536 msr.ps_flags |= ACPICPU_FLAG_P_FIDVID;
537
538 family = CPUID_TO_FAMILY(ci->ci_signature);
539
540 switch (family) {
541
542 case 0x0f:
543 msr.ps_control_addr = MSR_0FH_CONTROL;
544 msr.ps_status_addr = MSR_0FH_STATUS;
545 break;
546
547 case 0x10:
548 case 0x11:
549 case 0x12:
550 case 0x14:
551 case 0x15:
552 msr.ps_control_addr = MSR_10H_CONTROL;
553 msr.ps_control_mask = __BITS(0, 2);
554
555 msr.ps_status_addr = MSR_10H_STATUS;
556 msr.ps_status_mask = __BITS(0, 2);
557 break;
558
559 default:
560 /*
561 * If we have an unknown AMD CPU, rely on XPSS.
562 */
563 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0)
564 return EOPNOTSUPP;
565 }
566
567 break;
568
569 default:
570 return ENODEV;
571 }
572
573 /*
574 * Fill the P-state structures with MSR addresses that are
575 * known to be correct. If we do not know the addresses,
576 * leave the values intact. If a vendor uses XPSS, we do
577 * not necessarily need to do anything to support new CPUs.
578 */
579 while (i < sc->sc_pstate_count) {
580
581 ps = &sc->sc_pstate[i];
582
583 if (msr.ps_flags != 0)
584 ps->ps_flags |= msr.ps_flags;
585
586 if (msr.ps_status_addr != 0)
587 ps->ps_status_addr = msr.ps_status_addr;
588
589 if (msr.ps_status_mask != 0)
590 ps->ps_status_mask = msr.ps_status_mask;
591
592 if (msr.ps_control_addr != 0)
593 ps->ps_control_addr = msr.ps_control_addr;
594
595 if (msr.ps_control_mask != 0)
596 ps->ps_control_mask = msr.ps_control_mask;
597
598 i++;
599 }
600
601 return 0;
602 }
603
604 /*
605 * Read the IA32_APERF and IA32_MPERF counters. The first
606 * increments at the rate of the fixed maximum frequency
607 * configured during the boot, whereas APERF counts at the
608 * rate of the actual frequency. Note that the MSRs must be
609 * read without delay, and that only the ratio between
610 * IA32_APERF and IA32_MPERF is architecturally defined.
611 *
612 * The function thus returns the percentage of the actual
613 * frequency in terms of the maximum frequency of the calling
614 * CPU since the last call. A value zero implies an error.
615 *
616 * For further details, refer to:
617 *
618 * Intel Corporation: Intel 64 and IA-32 Architectures
619 * Software Developer's Manual. Section 13.2, Volume 3A:
620 * System Programming Guide, Part 1. July, 2008.
621 *
622 * Advanced Micro Devices: BIOS and Kernel Developer's
623 * Guide (BKDG) for AMD Family 10h Processors. Section
624 * 2.4.5, Revision 3.48, April 2010.
625 */
626 uint8_t
627 acpicpu_md_pstate_hwf(struct cpu_info *ci)
628 {
629 struct acpicpu_softc *sc;
630 uint64_t aperf, mperf;
631 uint8_t rv = 0;
632
633 sc = acpicpu_sc[ci->ci_acpiid];
634
635 if (__predict_false(sc == NULL))
636 return 0;
637
638 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0))
639 return 0;
640
641 aperf = sc->sc_pstate_aperf;
642 mperf = sc->sc_pstate_mperf;
643
644 x86_disable_intr();
645
646 sc->sc_pstate_aperf = rdmsr(MSR_APERF);
647 sc->sc_pstate_mperf = rdmsr(MSR_MPERF);
648
649 x86_enable_intr();
650
651 aperf = sc->sc_pstate_aperf - aperf;
652 mperf = sc->sc_pstate_mperf - mperf;
653
654 if (__predict_true(mperf != 0))
655 rv = (aperf * 100) / mperf;
656
657 return rv;
658 }
659
660 static void
661 acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2)
662 {
663 struct cpu_info *ci = curcpu();
664 struct acpicpu_softc *sc;
665
666 sc = acpicpu_sc[ci->ci_acpiid];
667
668 if (__predict_false(sc == NULL))
669 return;
670
671 x86_disable_intr();
672
673 wrmsr(MSR_APERF, 0);
674 wrmsr(MSR_MPERF, 0);
675
676 x86_enable_intr();
677
678 sc->sc_pstate_aperf = 0;
679 sc->sc_pstate_mperf = 0;
680 }
681
682 int
683 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq)
684 {
685 struct acpicpu_pstate *ps = NULL;
686 uint64_t val;
687 uint32_t i;
688
689 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
690 return acpicpu_md_pstate_fidvid_get(sc, freq);
691
692 /*
693 * Pick any P-state for the status address.
694 */
695 for (i = 0; i < sc->sc_pstate_count; i++) {
696
697 ps = &sc->sc_pstate[i];
698
699 if (__predict_true(ps->ps_freq != 0))
700 break;
701 }
702
703 if (__predict_false(ps == NULL))
704 return ENODEV;
705
706 if (__predict_false(ps->ps_status_addr == 0))
707 return EINVAL;
708
709 val = rdmsr(ps->ps_status_addr);
710
711 if (__predict_true(ps->ps_status_mask != 0))
712 val = val & ps->ps_status_mask;
713
714 /*
715 * Search for the value from known P-states.
716 */
717 for (i = 0; i < sc->sc_pstate_count; i++) {
718
719 ps = &sc->sc_pstate[i];
720
721 if (__predict_false(ps->ps_freq == 0))
722 continue;
723
724 if (val == ps->ps_status) {
725 *freq = ps->ps_freq;
726 return 0;
727 }
728 }
729
730 /*
731 * If the value was not found, try APERF/MPERF.
732 * The state is P0 if the return value is 100 %.
733 */
734 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) {
735
736 KASSERT(sc->sc_pstate_count > 0);
737 KASSERT(sc->sc_pstate[0].ps_freq != 0);
738
739 if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) {
740 *freq = sc->sc_pstate[0].ps_freq;
741 return 0;
742 }
743 }
744
745 return EIO;
746 }
747
748 int
749 acpicpu_md_pstate_set(struct acpicpu_pstate *ps)
750 {
751 uint64_t val = 0;
752
753 if (__predict_false(ps->ps_control_addr == 0))
754 return EINVAL;
755
756 if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0)
757 return acpicpu_md_pstate_fidvid_set(ps);
758
759 /*
760 * If the mask is set, do a read-modify-write.
761 */
762 if (__predict_true(ps->ps_control_mask != 0)) {
763 val = rdmsr(ps->ps_control_addr);
764 val &= ~ps->ps_control_mask;
765 }
766
767 val |= ps->ps_control;
768
769 wrmsr(ps->ps_control_addr, val);
770 DELAY(ps->ps_latency);
771
772 return 0;
773 }
774
775 static int
776 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq)
777 {
778 struct acpicpu_pstate *ps;
779 uint32_t fid, i, vid;
780 uint32_t cfid, cvid;
781 int rv;
782
783 /*
784 * AMD family 0Fh needs special treatment.
785 * While it wants to use ACPI, it does not
786 * comply with the ACPI specifications.
787 */
788 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
789
790 if (rv != 0)
791 return rv;
792
793 for (i = 0; i < sc->sc_pstate_count; i++) {
794
795 ps = &sc->sc_pstate[i];
796
797 if (__predict_false(ps->ps_freq == 0))
798 continue;
799
800 fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID);
801 vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID);
802
803 if (cfid == fid && cvid == vid) {
804 *freq = ps->ps_freq;
805 return 0;
806 }
807 }
808
809 return EIO;
810 }
811
812 static int
813 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps)
814 {
815 const uint64_t ctrl = ps->ps_control;
816 uint32_t cfid, cvid, fid, i, irt;
817 uint32_t pll, vco_cfid, vco_fid;
818 uint32_t val, vid, vst;
819 int rv;
820
821 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
822
823 if (rv != 0)
824 return rv;
825
826 fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID);
827 vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID);
828 irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT);
829 vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST);
830 pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL);
831
832 vst = vst * 20;
833 pll = pll * 1000 / 5;
834 irt = 10 * __BIT(irt);
835
836 /*
837 * Phase 1.
838 */
839 while (cvid > vid) {
840
841 val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS);
842 val = (val > cvid) ? 0 : cvid - val;
843
844 acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst);
845 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
846
847 if (rv != 0)
848 return rv;
849 }
850
851 i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO);
852
853 for (; i > 0 && cvid > 0; --i) {
854
855 acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst);
856 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
857
858 if (rv != 0)
859 return rv;
860 }
861
862 /*
863 * Phase 2.
864 */
865 if (cfid != fid) {
866
867 vco_fid = FID_TO_VCO_FID(fid);
868 vco_cfid = FID_TO_VCO_FID(cfid);
869
870 while (abs(vco_fid - vco_cfid) > 2) {
871
872 if (fid <= cfid)
873 val = cfid - 2;
874 else {
875 val = (cfid > 6) ? cfid + 2 :
876 FID_TO_VCO_FID(cfid) + 2;
877 }
878
879 acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt);
880 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
881
882 if (rv != 0)
883 return rv;
884
885 vco_cfid = FID_TO_VCO_FID(cfid);
886 }
887
888 acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt);
889 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
890
891 if (rv != 0)
892 return rv;
893 }
894
895 /*
896 * Phase 3.
897 */
898 if (cvid != vid) {
899
900 acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst);
901 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
902
903 if (rv != 0)
904 return rv;
905 }
906
907 return 0;
908 }
909
910 static int
911 acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid)
912 {
913 int i = ACPICPU_P_STATE_RETRY * 100;
914 uint64_t val;
915
916 do {
917 val = rdmsr(MSR_0FH_STATUS);
918
919 } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0);
920
921 if (i == 0)
922 return EAGAIN;
923
924 if (cfid != NULL)
925 *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID);
926
927 if (cvid != NULL)
928 *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID);
929
930 return 0;
931 }
932
933 static void
934 acpicpu_md_pstate_fidvid_write(uint32_t fid,
935 uint32_t vid, uint32_t cnt, uint32_t tmo)
936 {
937 uint64_t val = 0;
938
939 val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID);
940 val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID);
941 val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT);
942 val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG);
943
944 wrmsr(MSR_0FH_CONTROL, val);
945 DELAY(tmo);
946 }
947
948 int
949 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent)
950 {
951 struct acpicpu_tstate *ts;
952 uint64_t val;
953 uint32_t i;
954
955 val = rdmsr(MSR_THERM_CONTROL);
956
957 for (i = 0; i < sc->sc_tstate_count; i++) {
958
959 ts = &sc->sc_tstate[i];
960
961 if (ts->ts_percent == 0)
962 continue;
963
964 if (val == ts->ts_status) {
965 *percent = ts->ts_percent;
966 return 0;
967 }
968 }
969
970 return EIO;
971 }
972
973 int
974 acpicpu_md_tstate_set(struct acpicpu_tstate *ts)
975 {
976 uint64_t val;
977 uint8_t i;
978
979 val = ts->ts_control;
980 val = val & __BITS(0, 4);
981
982 wrmsr(MSR_THERM_CONTROL, val);
983
984 if (ts->ts_status == 0) {
985 DELAY(ts->ts_latency);
986 return 0;
987 }
988
989 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) {
990
991 val = rdmsr(MSR_THERM_CONTROL);
992
993 if (val == ts->ts_status)
994 return 0;
995
996 DELAY(ts->ts_latency);
997 }
998
999 return EAGAIN;
1000 }
1001
1002 /*
1003 * A kludge for backwards compatibility.
1004 */
1005 static int
1006 acpicpu_md_pstate_sysctl_init(void)
1007 {
1008 const struct sysctlnode *fnode, *mnode, *rnode;
1009 const char *str;
1010 int rv;
1011
1012 switch (cpu_vendor) {
1013
1014 case CPUVENDOR_IDT:
1015 case CPUVENDOR_INTEL:
1016 str = "est";
1017 break;
1018
1019 case CPUVENDOR_AMD:
1020 str = "powernow";
1021 break;
1022
1023 default:
1024 return ENODEV;
1025 }
1026
1027
1028 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode,
1029 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
1030 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
1031
1032 if (rv != 0)
1033 goto fail;
1034
1035 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode,
1036 0, CTLTYPE_NODE, str, NULL,
1037 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1038
1039 if (rv != 0)
1040 goto fail;
1041
1042 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode,
1043 0, CTLTYPE_NODE, "frequency", NULL,
1044 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1045
1046 if (rv != 0)
1047 goto fail;
1048
1049 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1050 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL,
1051 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1052
1053 if (rv != 0)
1054 goto fail;
1055
1056 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1057 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL,
1058 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1059
1060 if (rv != 0)
1061 goto fail;
1062
1063 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1064 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL,
1065 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1066
1067 if (rv != 0)
1068 goto fail;
1069
1070 return 0;
1071
1072 fail:
1073 if (acpicpu_log != NULL) {
1074 sysctl_teardown(&acpicpu_log);
1075 acpicpu_log = NULL;
1076 }
1077
1078 return rv;
1079 }
1080
1081 static int
1082 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS)
1083 {
1084 struct sysctlnode node;
1085 uint32_t freq;
1086 int err;
1087
1088 freq = cpufreq_get(curcpu());
1089
1090 if (freq == 0)
1091 return ENXIO;
1092
1093 node = *rnode;
1094 node.sysctl_data = &freq;
1095
1096 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1097
1098 if (err != 0 || newp == NULL)
1099 return err;
1100
1101 return 0;
1102 }
1103
1104 static int
1105 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS)
1106 {
1107 struct sysctlnode node;
1108 uint32_t freq;
1109 int err;
1110
1111 freq = cpufreq_get(curcpu());
1112
1113 if (freq == 0)
1114 return ENXIO;
1115
1116 node = *rnode;
1117 node.sysctl_data = &freq;
1118
1119 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1120
1121 if (err != 0 || newp == NULL)
1122 return err;
1123
1124 cpufreq_set_all(freq);
1125
1126 return 0;
1127 }
1128
1129 static int
1130 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS)
1131 {
1132 struct cpu_info *ci = curcpu();
1133 struct acpicpu_softc *sc;
1134 struct sysctlnode node;
1135 char buf[1024];
1136 size_t len;
1137 uint32_t i;
1138 int err;
1139
1140 sc = acpicpu_sc[ci->ci_acpiid];
1141
1142 if (sc == NULL)
1143 return ENXIO;
1144
1145 (void)memset(&buf, 0, sizeof(buf));
1146
1147 mutex_enter(&sc->sc_mtx);
1148
1149 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) {
1150
1151 if (sc->sc_pstate[i].ps_freq == 0)
1152 continue;
1153
1154 if (len >= sizeof(buf))
1155 break;
1156 len += snprintf(buf + len, sizeof(buf) - len, "%u%s",
1157 sc->sc_pstate[i].ps_freq,
1158 i < (sc->sc_pstate_count - 1) ? " " : "");
1159 }
1160
1161 mutex_exit(&sc->sc_mtx);
1162
1163 node = *rnode;
1164 node.sysctl_data = buf;
1165
1166 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1167
1168 if (err != 0 || newp == NULL)
1169 return err;
1170
1171 return 0;
1172 }
1173
1174