acpi_cpu_md.c revision 1.41 1 /* $NetBSD: acpi_cpu_md.c,v 1.41 2011/02/25 09:16:00 jruoho Exp $ */
2
3 /*-
4 * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen (at) iki.fi>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.41 2011/02/25 09:16:00 jruoho Exp $");
31
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/kcore.h>
35 #include <sys/sysctl.h>
36 #include <sys/xcall.h>
37
38 #include <x86/cpu.h>
39 #include <x86/cpufunc.h>
40 #include <x86/cputypes.h>
41 #include <x86/cpuvar.h>
42 #include <x86/cpu_msr.h>
43 #include <x86/machdep.h>
44
45 #include <dev/acpi/acpica.h>
46 #include <dev/acpi/acpi_cpu.h>
47
48 #include <dev/pci/pcivar.h>
49 #include <dev/pci/pcidevs.h>
50
51 #include <machine/acpi_machdep.h>
52
53 /*
54 * AMD C1E.
55 */
56 #define MSR_CMPHALT 0xc0010055
57
58 #define MSR_CMPHALT_SMI __BIT(27)
59 #define MSR_CMPHALT_C1E __BIT(28)
60 #define MSR_CMPHALT_BMSTS __BIT(29)
61
62 /*
63 * AMD families 10h, 11h, and 14h
64 */
65 #define MSR_10H_LIMIT 0xc0010061
66 #define MSR_10H_CONTROL 0xc0010062
67 #define MSR_10H_STATUS 0xc0010063
68 #define MSR_10H_CONFIG 0xc0010064
69
70 /*
71 * AMD family 0Fh.
72 */
73 #define MSR_0FH_CONTROL 0xc0010041
74 #define MSR_0FH_STATUS 0xc0010042
75
76 #define MSR_0FH_STATUS_CFID __BITS( 0, 5)
77 #define MSR_0FH_STATUS_CVID __BITS(32, 36)
78 #define MSR_0FH_STATUS_PENDING __BITS(31, 31)
79
80 #define MSR_0FH_CONTROL_FID __BITS( 0, 5)
81 #define MSR_0FH_CONTROL_VID __BITS( 8, 12)
82 #define MSR_0FH_CONTROL_CHG __BITS(16, 16)
83 #define MSR_0FH_CONTROL_CNT __BITS(32, 51)
84
85 #define ACPI_0FH_STATUS_FID __BITS( 0, 5)
86 #define ACPI_0FH_STATUS_VID __BITS( 6, 10)
87
88 #define ACPI_0FH_CONTROL_FID __BITS( 0, 5)
89 #define ACPI_0FH_CONTROL_VID __BITS( 6, 10)
90 #define ACPI_0FH_CONTROL_VST __BITS(11, 17)
91 #define ACPI_0FH_CONTROL_MVS __BITS(18, 19)
92 #define ACPI_0FH_CONTROL_PLL __BITS(20, 26)
93 #define ACPI_0FH_CONTROL_RVO __BITS(28, 29)
94 #define ACPI_0FH_CONTROL_IRT __BITS(30, 31)
95
96 #define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid))
97
98 static char native_idle_text[16];
99 void (*native_idle)(void) = NULL;
100
101 static int acpicpu_md_quirks_piix4(struct pci_attach_args *);
102 static void acpicpu_md_pstate_percent_reset(struct acpicpu_softc *);
103 static void acpicpu_md_pstate_percent_status(void *, void *);
104 static void acpicpu_md_pstate_status(void *, void *);
105 static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *,
106 uint32_t *);
107 static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *);
108 static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *);
109 static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t,
110 uint32_t, uint32_t);
111 static void acpicpu_md_tstate_status(void *, void *);
112 static int acpicpu_md_pstate_sysctl_init(void);
113 static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO);
114 static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO);
115 static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO);
116
117 extern struct acpicpu_softc **acpicpu_sc;
118 static bool acpicpu_pstate_status = false;
119 static struct sysctllog *acpicpu_log = NULL;
120
121 uint32_t
122 acpicpu_md_cap(void)
123 {
124 struct cpu_info *ci = curcpu();
125 uint32_t val = 0;
126
127 if (cpu_vendor != CPUVENDOR_IDT &&
128 cpu_vendor != CPUVENDOR_INTEL)
129 return val;
130
131 /*
132 * Basic SMP C-states (required for _CST).
133 */
134 val |= ACPICPU_PDC_C_C1PT | ACPICPU_PDC_C_C2C3;
135
136 /*
137 * If MONITOR/MWAIT is available, announce
138 * support for native instructions in all C-states.
139 */
140 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
141 val |= ACPICPU_PDC_C_C1_FFH | ACPICPU_PDC_C_C2C3_FFH;
142
143 /*
144 * Set native P- and T-states, if available.
145 */
146 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
147 val |= ACPICPU_PDC_P_FFH;
148
149 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
150 val |= ACPICPU_PDC_T_FFH;
151
152 return val;
153 }
154
155 uint32_t
156 acpicpu_md_quirks(void)
157 {
158 struct cpu_info *ci = curcpu();
159 struct pci_attach_args pa;
160 uint32_t family, val = 0;
161 uint32_t regs[4];
162
163 if (acpi_md_ncpus() == 1)
164 val |= ACPICPU_FLAG_C_BM;
165
166 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
167 val |= ACPICPU_FLAG_C_FFH;
168
169 /*
170 * By default, assume that the local APIC timer
171 * as well as TSC are stalled during C3 sleep.
172 */
173 val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC;
174
175 switch (cpu_vendor) {
176
177 case CPUVENDOR_IDT:
178
179 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
180 val |= ACPICPU_FLAG_P_FFH;
181
182 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
183 val |= ACPICPU_FLAG_T_FFH;
184
185 break;
186
187 case CPUVENDOR_INTEL:
188
189 /*
190 * Bus master control and arbitration should be
191 * available on all supported Intel CPUs (to be
192 * sure, this is double-checked later from the
193 * firmware data). These flags imply that it is
194 * not necessary to flush caches before C3 state.
195 */
196 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB;
197
198 /*
199 * Check if we can use "native", MSR-based,
200 * access. If not, we have to resort to I/O.
201 */
202 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
203 val |= ACPICPU_FLAG_P_FFH;
204
205 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
206 val |= ACPICPU_FLAG_T_FFH;
207
208 /*
209 * Check whether MSR_APERF, MSR_MPERF, and Turbo
210 * Boost are available. Also see if we might have
211 * an invariant local APIC timer ("ARAT").
212 */
213 if (cpuid_level >= 0x06) {
214
215 x86_cpuid(0x06, regs);
216
217 if ((regs[2] & CPUID_DSPM_HWF) != 0)
218 val |= ACPICPU_FLAG_P_HW;
219
220 if ((regs[0] & CPUID_DSPM_IDA) != 0)
221 val |= ACPICPU_FLAG_P_TURBO;
222
223 if ((regs[0] & CPUID_DSPM_ARAT) != 0)
224 val &= ~ACPICPU_FLAG_C_APIC;
225 }
226
227 /*
228 * Detect whether TSC is invariant. If it is not,
229 * we keep the flag to note that TSC will not run
230 * at constant rate. Depending on the CPU, this may
231 * affect P- and T-state changes, but especially
232 * relevant are C-states; with variant TSC, states
233 * larger than C1 may completely stop the counter.
234 */
235 x86_cpuid(0x80000000, regs);
236
237 if (regs[0] >= 0x80000007) {
238
239 x86_cpuid(0x80000007, regs);
240
241 if ((regs[3] & __BIT(8)) != 0)
242 val &= ~ACPICPU_FLAG_C_TSC;
243 }
244
245 break;
246
247 case CPUVENDOR_AMD:
248
249 x86_cpuid(0x80000000, regs);
250
251 if (regs[0] < 0x80000007)
252 break;
253
254 x86_cpuid(0x80000007, regs);
255
256 family = CPUID2FAMILY(ci->ci_signature);
257
258 if (family == 0xf)
259 family += CPUID2EXTFAMILY(ci->ci_signature);
260
261 switch (family) {
262
263 case 0x0f:
264
265 if ((regs[3] & CPUID_APM_FID) == 0)
266 break;
267
268 if ((regs[3] & CPUID_APM_VID) == 0)
269 break;
270
271 val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID;
272 break;
273
274 case 0x10:
275 case 0x11:
276 val |= ACPICPU_FLAG_C_C1E;
277 /* FALLTHROUGH */
278
279 case 0x14: /* AMD Fusion */
280
281 if ((regs[3] & CPUID_APM_TSC) != 0)
282 val &= ~ACPICPU_FLAG_C_TSC;
283
284 if ((regs[3] & CPUID_APM_HWP) != 0)
285 val |= ACPICPU_FLAG_P_FFH;
286
287 if ((regs[3] & CPUID_APM_CPB) != 0)
288 val |= ACPICPU_FLAG_P_TURBO;
289
290 break;
291 }
292
293 break;
294 }
295
296 /*
297 * There are several erratums for PIIX4.
298 */
299 if (pci_find_device(&pa, acpicpu_md_quirks_piix4) != 0)
300 val |= ACPICPU_FLAG_PIIX4;
301
302 return val;
303 }
304
305 static int
306 acpicpu_md_quirks_piix4(struct pci_attach_args *pa)
307 {
308
309 /*
310 * XXX: The pci_find_device(9) function only
311 * deals with attached devices. Change this
312 * to use something like pci_device_foreach().
313 */
314 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL)
315 return 0;
316
317 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA ||
318 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC)
319 return 1;
320
321 return 0;
322 }
323
324 void
325 acpicpu_md_quirks_c1e(void)
326 {
327 const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E;
328 uint64_t val;
329
330 val = rdmsr(MSR_CMPHALT);
331
332 if ((val & c1e) != 0)
333 wrmsr(MSR_CMPHALT, val & ~c1e);
334 }
335
336 int
337 acpicpu_md_idle_start(struct acpicpu_softc *sc)
338 {
339 const size_t size = sizeof(native_idle_text);
340 struct acpicpu_cstate *cs;
341 bool ipi = false;
342 int i;
343
344 x86_cpu_idle_get(&native_idle, native_idle_text, size);
345
346 for (i = 0; i < ACPI_C_STATE_COUNT; i++) {
347
348 cs = &sc->sc_cstate[i];
349
350 if (cs->cs_method == ACPICPU_C_STATE_HALT) {
351 ipi = true;
352 break;
353 }
354 }
355
356 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi);
357
358 return 0;
359 }
360
361 int
362 acpicpu_md_idle_stop(void)
363 {
364 uint64_t xc;
365 bool ipi;
366
367 ipi = (native_idle != x86_cpu_idle_halt) ? false : true;
368 x86_cpu_idle_set(native_idle, native_idle_text, ipi);
369
370 /*
371 * Run a cross-call to ensure that all CPUs are
372 * out from the ACPI idle-loop before detachment.
373 */
374 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
375 xc_wait(xc);
376
377 return 0;
378 }
379
380 /*
381 * Called with interrupts disabled.
382 * Caller should enable interrupts after return.
383 */
384 void
385 acpicpu_md_idle_enter(int method, int state)
386 {
387 struct cpu_info *ci = curcpu();
388
389 switch (method) {
390
391 case ACPICPU_C_STATE_FFH:
392
393 x86_enable_intr();
394 x86_monitor(&ci->ci_want_resched, 0, 0);
395
396 if (__predict_false(ci->ci_want_resched != 0))
397 return;
398
399 x86_mwait((state - 1) << 4, 0);
400 break;
401
402 case ACPICPU_C_STATE_HALT:
403
404 if (__predict_false(ci->ci_want_resched != 0))
405 return;
406
407 x86_stihlt();
408 break;
409 }
410 }
411
412 int
413 acpicpu_md_pstate_start(struct acpicpu_softc *sc)
414 {
415 const uint64_t est = __BIT(16);
416 uint64_t val;
417
418 if ((sc->sc_flags & ACPICPU_FLAG_P) == 0)
419 return ENODEV;
420
421 switch (cpu_vendor) {
422
423 case CPUVENDOR_IDT:
424 case CPUVENDOR_INTEL:
425
426 /*
427 * Make sure EST is enabled.
428 */
429 val = rdmsr(MSR_MISC_ENABLE);
430
431 if ((val & est) == 0) {
432
433 val |= est;
434
435 wrmsr(MSR_MISC_ENABLE, val);
436 val = rdmsr(MSR_MISC_ENABLE);
437
438 if ((val & est) == 0)
439 return ENOTTY;
440 }
441
442 /*
443 * Reset the APERF and MPERF counters.
444 */
445 if ((sc->sc_flags & ACPICPU_FLAG_P_HW) != 0)
446 acpicpu_md_pstate_percent_reset(sc);
447 }
448
449 return acpicpu_md_pstate_sysctl_init();
450 }
451
452 int
453 acpicpu_md_pstate_stop(void)
454 {
455
456 if (acpicpu_log != NULL)
457 sysctl_teardown(&acpicpu_log);
458
459 return 0;
460 }
461
462 int
463 acpicpu_md_pstate_pss(struct acpicpu_softc *sc)
464 {
465 struct acpicpu_pstate *ps, msr;
466 struct cpu_info *ci = curcpu();
467 uint32_t family, i = 0;
468
469 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate));
470
471 switch (cpu_vendor) {
472
473 case CPUVENDOR_IDT:
474 case CPUVENDOR_INTEL:
475
476 /*
477 * If the so-called Turbo Boost is present,
478 * the P0-state is always the "turbo state".
479 *
480 * For discussion, see:
481 *
482 * Intel Corporation: Intel Turbo Boost Technology
483 * in Intel Core(tm) Microarchitectures (Nehalem)
484 * Based Processors. White Paper, November 2008.
485 */
486 if ((sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0)
487 sc->sc_pstate[0].ps_flags |= ACPICPU_FLAG_P_TURBO;
488
489 msr.ps_control_addr = MSR_PERF_CTL;
490 msr.ps_control_mask = __BITS(0, 15);
491
492 msr.ps_status_addr = MSR_PERF_STATUS;
493 msr.ps_status_mask = __BITS(0, 15);
494 break;
495
496 case CPUVENDOR_AMD:
497
498 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
499 msr.ps_flags |= ACPICPU_FLAG_P_FIDVID;
500
501 family = CPUID2FAMILY(ci->ci_signature);
502
503 if (family == 0xf)
504 family += CPUID2EXTFAMILY(ci->ci_signature);
505
506 switch (family) {
507
508 case 0x0f:
509 msr.ps_control_addr = MSR_0FH_CONTROL;
510 msr.ps_status_addr = MSR_0FH_STATUS;
511 break;
512
513 case 0x10:
514 case 0x11:
515 case 0x14: /* AMD Fusion */
516 msr.ps_control_addr = MSR_10H_CONTROL;
517 msr.ps_control_mask = __BITS(0, 2);
518
519 msr.ps_status_addr = MSR_10H_STATUS;
520 msr.ps_status_mask = __BITS(0, 2);
521 break;
522
523 default:
524
525 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0)
526 return EOPNOTSUPP;
527 }
528
529 break;
530
531 default:
532 return ENODEV;
533 }
534
535 /*
536 * Fill the P-state structures with MSR addresses that are
537 * known to be correct. If we do not know the addresses,
538 * leave the values intact. If a vendor uses XPSS, we do
539 * not necessarily need to do anything to support new CPUs.
540 */
541 while (i < sc->sc_pstate_count) {
542
543 ps = &sc->sc_pstate[i];
544
545 if (msr.ps_flags != 0)
546 ps->ps_flags |= msr.ps_flags;
547
548 if (msr.ps_status_addr != 0)
549 ps->ps_status_addr = msr.ps_status_addr;
550
551 if (msr.ps_status_mask != 0)
552 ps->ps_status_mask = msr.ps_status_mask;
553
554 if (msr.ps_control_addr != 0)
555 ps->ps_control_addr = msr.ps_control_addr;
556
557 if (msr.ps_control_mask != 0)
558 ps->ps_control_mask = msr.ps_control_mask;
559
560 i++;
561 }
562
563 return 0;
564 }
565
566 /*
567 * Returns the percentage of the actual frequency in
568 * terms of the maximum frequency of the calling CPU
569 * since the last call. A value zero implies an error.
570 */
571 uint8_t
572 acpicpu_md_pstate_percent(struct acpicpu_softc *sc)
573 {
574 struct cpu_info *ci = sc->sc_ci;
575 uint64_t aperf, mperf;
576 uint64_t xc, rv = 0;
577
578 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P) == 0))
579 return 0;
580
581 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HW) == 0))
582 return 0;
583
584 /*
585 * Read the IA32_APERF and IA32_MPERF counters. The first
586 * increments at the rate of the fixed maximum frequency
587 * configured during the boot, whereas APERF counts at the
588 * rate of the actual frequency. Note that the MSRs must be
589 * read without delay, and that only the ratio between
590 * IA32_APERF and IA32_MPERF is architecturally defined.
591 *
592 * For further details, refer to:
593 *
594 * Intel Corporation: Intel 64 and IA-32 Architectures
595 * Software Developer's Manual. Section 13.2, Volume 3A:
596 * System Programming Guide, Part 1. July, 2008.
597 */
598 x86_disable_intr();
599
600 aperf = sc->sc_pstate_aperf;
601 mperf = sc->sc_pstate_mperf;
602
603 xc = xc_unicast(0, acpicpu_md_pstate_percent_status, sc, NULL, ci);
604 xc_wait(xc);
605
606 x86_enable_intr();
607
608 aperf = sc->sc_pstate_aperf - aperf;
609 mperf = sc->sc_pstate_mperf - mperf;
610
611 if (__predict_true(mperf != 0))
612 rv = (aperf * 100) / mperf;
613
614 return rv;
615 }
616
617 static void
618 acpicpu_md_pstate_percent_status(void *arg1, void *arg2)
619 {
620 struct acpicpu_softc *sc = arg1;
621
622 sc->sc_pstate_aperf = rdmsr(MSR_APERF);
623 sc->sc_pstate_mperf = rdmsr(MSR_MPERF);
624 }
625
626 static void
627 acpicpu_md_pstate_percent_reset(struct acpicpu_softc *sc)
628 {
629
630 KASSERT((sc->sc_flags & ACPICPU_FLAG_P) != 0);
631 KASSERT((sc->sc_flags & ACPICPU_FLAG_P_HW) != 0);
632
633 wrmsr(MSR_APERF, 0);
634 wrmsr(MSR_MPERF, 0);
635
636 sc->sc_pstate_aperf = 0;
637 sc->sc_pstate_mperf = 0;
638 }
639
640 int
641 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq)
642 {
643 struct acpicpu_pstate *ps = NULL;
644 uint64_t val;
645 uint32_t i;
646
647 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0)
648 return acpicpu_md_pstate_fidvid_get(sc, freq);
649
650 for (i = 0; i < sc->sc_pstate_count; i++) {
651
652 ps = &sc->sc_pstate[i];
653
654 if (__predict_true(ps->ps_freq != 0))
655 break;
656 }
657
658 if (__predict_false(ps == NULL))
659 return ENODEV;
660
661 if (__predict_false(ps->ps_status_addr == 0))
662 return EINVAL;
663
664 val = rdmsr(ps->ps_status_addr);
665
666 if (__predict_true(ps->ps_status_mask != 0))
667 val = val & ps->ps_status_mask;
668
669 for (i = 0; i < sc->sc_pstate_count; i++) {
670
671 ps = &sc->sc_pstate[i];
672
673 if (__predict_false(ps->ps_freq == 0))
674 continue;
675
676 if (val == ps->ps_status) {
677 *freq = ps->ps_freq;
678 return 0;
679 }
680 }
681
682 return EIO;
683 }
684
685 int
686 acpicpu_md_pstate_set(struct acpicpu_pstate *ps)
687 {
688 struct msr_rw_info msr;
689 uint64_t xc;
690 int rv = 0;
691
692 if (__predict_false(ps->ps_control_addr == 0))
693 return EINVAL;
694
695 if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0)
696 return acpicpu_md_pstate_fidvid_set(ps);
697
698 msr.msr_read = false;
699 msr.msr_type = ps->ps_control_addr;
700 msr.msr_value = ps->ps_control;
701
702 if (__predict_true(ps->ps_control_mask != 0)) {
703 msr.msr_mask = ps->ps_control_mask;
704 msr.msr_read = true;
705 }
706
707 xc = xc_broadcast(0, (xcfunc_t)x86_msr_xcall, &msr, NULL);
708 xc_wait(xc);
709
710 /*
711 * Due several problems, we bypass the
712 * relatively expensive status check.
713 */
714 if (acpicpu_pstate_status != true) {
715 DELAY(ps->ps_latency);
716 return 0;
717 }
718
719 xc = xc_broadcast(0, (xcfunc_t)acpicpu_md_pstate_status, ps, &rv);
720 xc_wait(xc);
721
722 return rv;
723 }
724
725 static void
726 acpicpu_md_pstate_status(void *arg1, void *arg2)
727 {
728 struct acpicpu_pstate *ps = arg1;
729 uint64_t val;
730 int i;
731
732 for (i = val = 0; i < ACPICPU_P_STATE_RETRY; i++) {
733
734 val = rdmsr(ps->ps_status_addr);
735
736 if (__predict_true(ps->ps_status_mask != 0))
737 val = val & ps->ps_status_mask;
738
739 if (val == ps->ps_status)
740 return;
741
742 DELAY(ps->ps_latency);
743 }
744
745 *(uintptr_t *)arg2 = EAGAIN;
746 }
747
748 static int
749 acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq)
750 {
751 struct acpicpu_pstate *ps;
752 uint32_t fid, i, vid;
753 uint32_t cfid, cvid;
754 int rv;
755
756 /*
757 * AMD family 0Fh needs special treatment.
758 * While it wants to use ACPI, it does not
759 * comply with the ACPI specifications.
760 */
761 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
762
763 if (rv != 0)
764 return rv;
765
766 for (i = 0; i < sc->sc_pstate_count; i++) {
767
768 ps = &sc->sc_pstate[i];
769
770 if (__predict_false(ps->ps_freq == 0))
771 continue;
772
773 fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID);
774 vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID);
775
776 if (cfid == fid && cvid == vid) {
777 *freq = ps->ps_freq;
778 return 0;
779 }
780 }
781
782 return EIO;
783 }
784
785 static int
786 acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps)
787 {
788 const uint64_t ctrl = ps->ps_control;
789 uint32_t cfid, cvid, fid, i, irt;
790 uint32_t pll, vco_cfid, vco_fid;
791 uint32_t val, vid, vst;
792 int rv;
793
794 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid);
795
796 if (rv != 0)
797 return rv;
798
799 fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID);
800 vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID);
801 irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT);
802 vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST);
803 pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL);
804
805 vst = vst * 20;
806 pll = pll * 1000 / 5;
807 irt = 10 * __BIT(irt);
808
809 /*
810 * Phase 1.
811 */
812 while (cvid > vid) {
813
814 val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS);
815 val = (val > cvid) ? 0 : cvid - val;
816
817 acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst);
818 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
819
820 if (rv != 0)
821 return rv;
822 }
823
824 i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO);
825
826 for (; i > 0 && cvid > 0; --i) {
827
828 acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst);
829 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
830
831 if (rv != 0)
832 return rv;
833 }
834
835 /*
836 * Phase 2.
837 */
838 if (cfid != fid) {
839
840 vco_fid = FID_TO_VCO_FID(fid);
841 vco_cfid = FID_TO_VCO_FID(cfid);
842
843 while (abs(vco_fid - vco_cfid) > 2) {
844
845 if (fid <= cfid)
846 val = cfid - 2;
847 else {
848 val = (cfid > 6) ? cfid + 2 :
849 FID_TO_VCO_FID(cfid) + 2;
850 }
851
852 acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt);
853 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
854
855 if (rv != 0)
856 return rv;
857
858 vco_cfid = FID_TO_VCO_FID(cfid);
859 }
860
861 acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt);
862 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL);
863
864 if (rv != 0)
865 return rv;
866 }
867
868 /*
869 * Phase 3.
870 */
871 if (cvid != vid) {
872
873 acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst);
874 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid);
875
876 if (rv != 0)
877 return rv;
878 }
879
880 if (cfid != fid || cvid != vid)
881 return EIO;
882
883 return 0;
884 }
885
886 static int
887 acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid)
888 {
889 int i = ACPICPU_P_STATE_RETRY * 100;
890 uint64_t val;
891
892 do {
893 val = rdmsr(MSR_0FH_STATUS);
894
895 } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0);
896
897 if (i == 0)
898 return EAGAIN;
899
900 if (cfid != NULL)
901 *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID);
902
903 if (cvid != NULL)
904 *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID);
905
906 return 0;
907 }
908
909 static void
910 acpicpu_md_pstate_fidvid_write(uint32_t fid,
911 uint32_t vid, uint32_t cnt, uint32_t tmo)
912 {
913 struct msr_rw_info msr;
914 uint64_t xc;
915
916 msr.msr_read = false;
917 msr.msr_type = MSR_0FH_CONTROL;
918 msr.msr_value = 0;
919
920 msr.msr_value |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID);
921 msr.msr_value |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID);
922 msr.msr_value |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT);
923 msr.msr_value |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG);
924
925 xc = xc_broadcast(0, (xcfunc_t)x86_msr_xcall, &msr, NULL);
926 xc_wait(xc);
927
928 DELAY(tmo);
929 }
930
931 int
932 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent)
933 {
934 struct acpicpu_tstate *ts;
935 uint64_t val;
936 uint32_t i;
937
938 val = rdmsr(MSR_THERM_CONTROL);
939
940 for (i = 0; i < sc->sc_tstate_count; i++) {
941
942 ts = &sc->sc_tstate[i];
943
944 if (ts->ts_percent == 0)
945 continue;
946
947 if (val == ts->ts_status) {
948 *percent = ts->ts_percent;
949 return 0;
950 }
951 }
952
953 return EIO;
954 }
955
956 int
957 acpicpu_md_tstate_set(struct acpicpu_tstate *ts)
958 {
959 struct msr_rw_info msr;
960 uint64_t xc;
961 int rv = 0;
962
963 msr.msr_read = true;
964 msr.msr_type = MSR_THERM_CONTROL;
965 msr.msr_value = ts->ts_control;
966 msr.msr_mask = __BITS(1, 4);
967
968 xc = xc_broadcast(0, (xcfunc_t)x86_msr_xcall, &msr, NULL);
969 xc_wait(xc);
970
971 if (ts->ts_status == 0) {
972 DELAY(ts->ts_latency);
973 return 0;
974 }
975
976 xc = xc_broadcast(0, (xcfunc_t)acpicpu_md_tstate_status, ts, &rv);
977 xc_wait(xc);
978
979 return rv;
980 }
981
982 static void
983 acpicpu_md_tstate_status(void *arg1, void *arg2)
984 {
985 struct acpicpu_tstate *ts = arg1;
986 uint64_t val;
987 int i;
988
989 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) {
990
991 val = rdmsr(MSR_THERM_CONTROL);
992
993 if (val == ts->ts_status)
994 return;
995
996 DELAY(ts->ts_latency);
997 }
998
999 *(uintptr_t *)arg2 = EAGAIN;
1000 }
1001
1002 /*
1003 * A kludge for backwards compatibility.
1004 */
1005 static int
1006 acpicpu_md_pstate_sysctl_init(void)
1007 {
1008 const struct sysctlnode *fnode, *mnode, *rnode;
1009 const char *str;
1010 int rv;
1011
1012 switch (cpu_vendor) {
1013
1014 case CPUVENDOR_IDT:
1015 case CPUVENDOR_INTEL:
1016 str = "est";
1017 break;
1018
1019 case CPUVENDOR_AMD:
1020 str = "powernow";
1021 break;
1022
1023 default:
1024 return ENODEV;
1025 }
1026
1027
1028 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode,
1029 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
1030 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
1031
1032 if (rv != 0)
1033 goto fail;
1034
1035 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode,
1036 0, CTLTYPE_NODE, str, NULL,
1037 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1038
1039 if (rv != 0)
1040 goto fail;
1041
1042 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode,
1043 0, CTLTYPE_NODE, "frequency", NULL,
1044 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1045
1046 if (rv != 0)
1047 goto fail;
1048
1049 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1050 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL,
1051 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1052
1053 if (rv != 0)
1054 goto fail;
1055
1056 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1057 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL,
1058 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1059
1060 if (rv != 0)
1061 goto fail;
1062
1063 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
1064 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL,
1065 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL);
1066
1067 if (rv != 0)
1068 goto fail;
1069
1070 return 0;
1071
1072 fail:
1073 if (acpicpu_log != NULL) {
1074 sysctl_teardown(&acpicpu_log);
1075 acpicpu_log = NULL;
1076 }
1077
1078 return rv;
1079 }
1080
1081 static int
1082 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS)
1083 {
1084 struct cpu_info *ci = curcpu();
1085 struct acpicpu_softc *sc;
1086 struct sysctlnode node;
1087 uint32_t freq;
1088 int err;
1089
1090 sc = acpicpu_sc[ci->ci_acpiid];
1091
1092 if (sc == NULL)
1093 return ENXIO;
1094
1095 err = acpicpu_pstate_get(sc, &freq);
1096
1097 if (err != 0)
1098 return err;
1099
1100 node = *rnode;
1101 node.sysctl_data = &freq;
1102
1103 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1104
1105 if (err != 0 || newp == NULL)
1106 return err;
1107
1108 return 0;
1109 }
1110
1111 static int
1112 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS)
1113 {
1114 struct cpu_info *ci = curcpu();
1115 struct acpicpu_softc *sc;
1116 struct sysctlnode node;
1117 uint32_t freq;
1118 int err;
1119
1120 sc = acpicpu_sc[ci->ci_acpiid];
1121
1122 if (sc == NULL)
1123 return ENXIO;
1124
1125 err = acpicpu_pstate_get(sc, &freq);
1126
1127 if (err != 0)
1128 return err;
1129
1130 node = *rnode;
1131 node.sysctl_data = &freq;
1132
1133 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1134
1135 if (err != 0 || newp == NULL)
1136 return err;
1137
1138 err = acpicpu_pstate_set(sc, freq);
1139
1140 if (err != 0)
1141 return err;
1142
1143 return 0;
1144 }
1145
1146 static int
1147 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS)
1148 {
1149 struct cpu_info *ci = curcpu();
1150 struct acpicpu_softc *sc;
1151 struct sysctlnode node;
1152 char buf[1024];
1153 size_t len;
1154 uint32_t i;
1155 int err;
1156
1157 sc = acpicpu_sc[ci->ci_acpiid];
1158
1159 if (sc == NULL)
1160 return ENXIO;
1161
1162 (void)memset(&buf, 0, sizeof(buf));
1163
1164 mutex_enter(&sc->sc_mtx);
1165
1166 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) {
1167
1168 if (sc->sc_pstate[i].ps_freq == 0)
1169 continue;
1170
1171 len += snprintf(buf + len, sizeof(buf) - len, "%u%s",
1172 sc->sc_pstate[i].ps_freq,
1173 i < (sc->sc_pstate_count - 1) ? " " : "");
1174 }
1175
1176 mutex_exit(&sc->sc_mtx);
1177
1178 node = *rnode;
1179 node.sysctl_data = buf;
1180
1181 err = sysctl_lookup(SYSCTLFN_CALL(&node));
1182
1183 if (err != 0 || newp == NULL)
1184 return err;
1185
1186 return 0;
1187 }
1188
1189