acpi_cpu_md.c revision 1.23 1 /* $NetBSD: acpi_cpu_md.c,v 1.23 2010/08/21 04:36:29 jruoho Exp $ */
2
3 /*-
4 * Copyright (c) 2010 Jukka Ruohonen <jruohonen (at) iki.fi>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.23 2010/08/21 04:36:29 jruoho Exp $");
31
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/kcore.h>
35 #include <sys/sysctl.h>
36 #include <sys/xcall.h>
37
38 #include <x86/cpu.h>
39 #include <x86/cpufunc.h>
40 #include <x86/cputypes.h>
41 #include <x86/cpuvar.h>
42 #include <x86/cpu_msr.h>
43 #include <x86/machdep.h>
44
45 #include <dev/acpi/acpica.h>
46 #include <dev/acpi/acpi_cpu.h>
47
48 #include <dev/pci/pcivar.h>
49 #include <dev/pci/pcidevs.h>
50
51 #define CPUID_INTEL_TSC __BIT(8)
52
53 #define MSR_0FH_CONTROL 0xc0010041 /* Family 0Fh (and K7). */
54 #define MSR_0FH_STATUS 0xc0010042
55
56 #define MSR_10H_LIMIT 0xc0010061 /* Families 10h and 11h. */
57 #define MSR_10H_CONTROL 0xc0010062
58 #define MSR_10H_STATUS 0xc0010063
59 #define MSR_10H_CONFIG 0xc0010064
60
61 static char native_idle_text[16];
62 void (*native_idle)(void) = NULL;
63
64 static int acpicpu_md_quirks_piix4(struct pci_attach_args *);
65 static void acpicpu_md_pstate_status(void *, void *);
66 static void acpicpu_md_tstate_status(void *, void *);
67 static int acpicpu_md_pstate_sysctl_init(void);
68 static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO);
69 static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO);
70 static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO);
71
72 extern uint32_t cpus_running;
73 extern struct acpicpu_softc **acpicpu_sc;
74 static struct sysctllog *acpicpu_log = NULL;
75
76 uint32_t
77 acpicpu_md_cap(void)
78 {
79 struct cpu_info *ci = curcpu();
80 uint32_t val = 0;
81
82 if (cpu_vendor != CPUVENDOR_IDT &&
83 cpu_vendor != CPUVENDOR_INTEL)
84 return val;
85
86 /*
87 * Basic SMP C-states (required for _CST).
88 */
89 val |= ACPICPU_PDC_C_C1PT | ACPICPU_PDC_C_C2C3;
90
91 /*
92 * If MONITOR/MWAIT is available, announce
93 * support for native instructions in all C-states.
94 */
95 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
96 val |= ACPICPU_PDC_C_C1_FFH | ACPICPU_PDC_C_C2C3_FFH;
97
98 /*
99 * Set native P- and T-states, if available.
100 */
101 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
102 val |= ACPICPU_PDC_P_FFH;
103
104 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
105 val |= ACPICPU_PDC_T_FFH;
106
107 return val;
108 }
109
110 uint32_t
111 acpicpu_md_quirks(void)
112 {
113 struct cpu_info *ci = curcpu();
114 struct pci_attach_args pa;
115 uint32_t family, val = 0;
116 uint32_t regs[4];
117
118 if (acpicpu_md_cpus_running() == 1)
119 val |= ACPICPU_FLAG_C_BM;
120
121 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0)
122 val |= ACPICPU_FLAG_C_FFH;
123
124 val |= ACPICPU_FLAG_C_TSC;
125
126 switch (cpu_vendor) {
127
128 case CPUVENDOR_IDT:
129
130 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
131 val |= ACPICPU_FLAG_P_FFH;
132
133 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
134 val |= ACPICPU_FLAG_T_FFH;
135
136 break;
137
138 case CPUVENDOR_INTEL:
139
140 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB;
141
142 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0)
143 val |= ACPICPU_FLAG_P_FFH;
144
145 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0)
146 val |= ACPICPU_FLAG_T_FFH;
147
148 /*
149 * See if MSR_APERF, MSR_MPERF,
150 * and Turbo Boost are available.
151 */
152 if (cpuid_level >= 0x06) {
153
154 x86_cpuid(0x06, regs);
155
156 if ((regs[2] & __BIT(0)) != 0) /* ECX.06[0] */
157 val |= ACPICPU_FLAG_P_HW;
158
159 if ((regs[0] & __BIT(1)) != 0)
160 val |= ACPICPU_FLAG_P_TURBO; /* EAX.06[1] */
161 }
162
163 /*
164 * Detect whether TSC is invariant. If it is not,
165 * we keep the flag to note that TSC will not run
166 * at constant rate. Depending on the CPU, this may
167 * affect P- and T-state changes, but especially
168 * relevant are C-states; with variant TSC, states
169 * larger than C1 will completely stop the timer.
170 */
171 x86_cpuid(0x80000000, regs);
172
173 if (regs[0] >= 0x80000007) {
174
175 x86_cpuid(0x80000007, regs);
176
177 if ((regs[3] & CPUID_INTEL_TSC) != 0)
178 val &= ~ACPICPU_FLAG_C_TSC;
179 }
180
181 break;
182
183 case CPUVENDOR_AMD:
184
185 family = CPUID2FAMILY(ci->ci_signature);
186
187 if (family == 0xf)
188 family += CPUID2EXTFAMILY(ci->ci_signature);
189
190 switch (family) {
191
192 case 0x0f:
193 case 0x10:
194 case 0x11:
195
196 x86_cpuid(0x80000007, regs);
197
198 if ((regs[3] & CPUID_APM_TSC) != 0)
199 val &= ~ACPICPU_FLAG_C_TSC;
200
201 if ((regs[3] & CPUID_APM_HWP) != 0)
202 val |= ACPICPU_FLAG_P_FFH;
203
204 if ((regs[3] & CPUID_APM_CPB) != 0)
205 val |= ACPICPU_FLAG_P_TURBO;
206 }
207
208 break;
209 }
210
211 /*
212 * There are several erratums for PIIX4.
213 */
214 if (pci_find_device(&pa, acpicpu_md_quirks_piix4) != 0)
215 val |= ACPICPU_FLAG_PIIX4;
216
217 return val;
218 }
219
220 static int
221 acpicpu_md_quirks_piix4(struct pci_attach_args *pa)
222 {
223
224 /*
225 * XXX: The pci_find_device(9) function only
226 * deals with attached devices. Change this
227 * to use something like pci_device_foreach().
228 */
229 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL)
230 return 0;
231
232 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA ||
233 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC)
234 return 1;
235
236 return 0;
237 }
238
239 uint32_t
240 acpicpu_md_cpus_running(void)
241 {
242
243 return popcount32(cpus_running);
244 }
245
246 int
247 acpicpu_md_idle_start(void)
248 {
249 const size_t size = sizeof(native_idle_text);
250
251 x86_disable_intr();
252 x86_cpu_idle_get(&native_idle, native_idle_text, size);
253 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi");
254 x86_enable_intr();
255
256 return 0;
257 }
258
259 int
260 acpicpu_md_idle_stop(void)
261 {
262 uint64_t xc;
263
264 x86_disable_intr();
265 x86_cpu_idle_set(native_idle, native_idle_text);
266 x86_enable_intr();
267
268 /*
269 * Run a cross-call to ensure that all CPUs are
270 * out from the ACPI idle-loop before detachment.
271 */
272 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
273 xc_wait(xc);
274
275 return 0;
276 }
277
278 /*
279 * The MD idle loop. Called with interrupts disabled.
280 */
281 void
282 acpicpu_md_idle_enter(int method, int state)
283 {
284 struct cpu_info *ci = curcpu();
285
286 switch (method) {
287
288 case ACPICPU_C_STATE_FFH:
289
290 x86_enable_intr();
291 x86_monitor(&ci->ci_want_resched, 0, 0);
292
293 if (__predict_false(ci->ci_want_resched) != 0)
294 return;
295
296 x86_mwait((state - 1) << 4, 0);
297 break;
298
299 case ACPICPU_C_STATE_HALT:
300
301 if (__predict_false(ci->ci_want_resched) != 0) {
302 x86_enable_intr();
303 return;
304 }
305
306 x86_stihlt();
307 break;
308 }
309 }
310
311 int
312 acpicpu_md_pstate_start(void)
313 {
314 const uint64_t est = __BIT(16);
315 uint64_t val;
316
317 switch (cpu_vendor) {
318
319 case CPUVENDOR_IDT:
320 case CPUVENDOR_INTEL:
321
322 val = rdmsr(MSR_MISC_ENABLE);
323
324 if ((val & est) == 0) {
325
326 val |= est;
327
328 wrmsr(MSR_MISC_ENABLE, val);
329 val = rdmsr(MSR_MISC_ENABLE);
330
331 if ((val & est) == 0)
332 return ENOTTY;
333 }
334 }
335
336 return acpicpu_md_pstate_sysctl_init();
337 }
338
339 int
340 acpicpu_md_pstate_stop(void)
341 {
342
343 if (acpicpu_log != NULL)
344 sysctl_teardown(&acpicpu_log);
345
346 return 0;
347 }
348
349 int
350 acpicpu_md_pstate_pss(struct acpicpu_softc *sc)
351 {
352 struct acpicpu_pstate *ps, msr;
353 struct cpu_info *ci = curcpu();
354 uint32_t family, i = 0;
355
356 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate));
357
358 switch (cpu_vendor) {
359
360 case CPUVENDOR_IDT:
361 case CPUVENDOR_INTEL:
362 msr.ps_control_addr = MSR_PERF_CTL;
363 msr.ps_control_mask = __BITS(0, 15);
364
365 msr.ps_status_addr = MSR_PERF_STATUS;
366 msr.ps_status_mask = __BITS(0, 15);
367 break;
368
369 case CPUVENDOR_AMD:
370
371 family = CPUID2FAMILY(ci->ci_signature);
372
373 if (family == 0xf)
374 family += CPUID2EXTFAMILY(ci->ci_signature);
375
376 switch (family) {
377
378 case 0x10:
379 case 0x11:
380 msr.ps_control_addr = MSR_10H_CONTROL;
381 msr.ps_control_mask = __BITS(0, 2);
382
383 msr.ps_status_addr = MSR_10H_STATUS;
384 msr.ps_status_mask = __BITS(0, 2);
385 break;
386
387 default:
388
389 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0)
390 return EOPNOTSUPP;
391 }
392
393 break;
394
395 default:
396 return ENODEV;
397 }
398
399 while (i < sc->sc_pstate_count) {
400
401 ps = &sc->sc_pstate[i];
402
403 if (ps->ps_status_addr == 0)
404 ps->ps_status_addr = msr.ps_status_addr;
405
406 if (ps->ps_status_mask == 0)
407 ps->ps_status_mask = msr.ps_status_mask;
408
409 if (ps->ps_control_addr == 0)
410 ps->ps_control_addr = msr.ps_control_addr;
411
412 if (ps->ps_control_mask == 0)
413 ps->ps_control_mask = msr.ps_control_mask;
414
415 i++;
416 }
417
418 return 0;
419 }
420
421 int
422 acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq)
423 {
424 struct acpicpu_pstate *ps = NULL;
425 uint64_t val;
426 uint32_t i;
427
428 for (i = 0; i < sc->sc_pstate_count; i++) {
429
430 ps = &sc->sc_pstate[i];
431
432 if (ps->ps_freq != 0)
433 break;
434 }
435
436 if (__predict_false(ps == NULL))
437 return ENODEV;
438
439 if (ps->ps_status_addr == 0)
440 return EINVAL;
441
442 val = rdmsr(ps->ps_status_addr);
443
444 if (ps->ps_status_mask != 0)
445 val = val & ps->ps_status_mask;
446
447 for (i = 0; i < sc->sc_pstate_count; i++) {
448
449 ps = &sc->sc_pstate[i];
450
451 if (ps->ps_freq == 0)
452 continue;
453
454 if (val == ps->ps_status) {
455 *freq = ps->ps_freq;
456 return 0;
457 }
458 }
459
460 return EIO;
461 }
462
463 int
464 acpicpu_md_pstate_set(struct acpicpu_pstate *ps)
465 {
466 struct msr_rw_info msr;
467 uint64_t xc;
468 int rv = 0;
469
470 msr.msr_read = false;
471 msr.msr_type = ps->ps_control_addr;
472 msr.msr_value = ps->ps_control;
473
474 if (ps->ps_control_mask != 0) {
475 msr.msr_mask = ps->ps_control_mask;
476 msr.msr_read = true;
477 }
478
479 xc = xc_broadcast(0, (xcfunc_t)x86_msr_xcall, &msr, NULL);
480 xc_wait(xc);
481
482 if (ps->ps_status_addr == 0)
483 return 0;
484
485 xc = xc_broadcast(0, (xcfunc_t)acpicpu_md_pstate_status, ps, &rv);
486 xc_wait(xc);
487
488 return rv;
489 }
490
491 static void
492 acpicpu_md_pstate_status(void *arg1, void *arg2)
493 {
494 struct acpicpu_pstate *ps = arg1;
495 uint64_t val;
496 int i;
497
498 for (i = val = 0; i < ACPICPU_P_STATE_RETRY; i++) {
499
500 val = rdmsr(ps->ps_status_addr);
501
502 if (ps->ps_status_mask != 0)
503 val = val & ps->ps_status_mask;
504
505 if (val == ps->ps_status)
506 return;
507
508 DELAY(ps->ps_latency);
509 }
510
511 *(uintptr_t *)arg2 = EAGAIN;
512 }
513
514 int
515 acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent)
516 {
517 struct acpicpu_tstate *ts;
518 uint64_t val;
519 uint32_t i;
520
521 val = rdmsr(MSR_THERM_CONTROL);
522
523 for (i = 0; i < sc->sc_tstate_count; i++) {
524
525 ts = &sc->sc_tstate[i];
526
527 if (ts->ts_percent == 0)
528 continue;
529
530 if (val == ts->ts_control || val == ts->ts_status) {
531 *percent = ts->ts_percent;
532 return 0;
533 }
534 }
535
536 return EIO;
537 }
538
539 int
540 acpicpu_md_tstate_set(struct acpicpu_tstate *ts)
541 {
542 struct msr_rw_info msr;
543 uint64_t xc;
544 int rv = 0;
545
546 msr.msr_read = true;
547 msr.msr_type = MSR_THERM_CONTROL;
548 msr.msr_value = ts->ts_control;
549 msr.msr_mask = __BITS(1, 4);
550
551 xc = xc_broadcast(0, (xcfunc_t)x86_msr_xcall, &msr, NULL);
552 xc_wait(xc);
553
554 if (ts->ts_status == 0)
555 return 0;
556
557 xc = xc_broadcast(0, (xcfunc_t)acpicpu_md_tstate_status, ts, &rv);
558 xc_wait(xc);
559
560 return rv;
561 }
562
563 static void
564 acpicpu_md_tstate_status(void *arg1, void *arg2)
565 {
566 struct acpicpu_tstate *ts = arg1;
567 uint64_t val;
568 int i;
569
570 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) {
571
572 val = rdmsr(MSR_THERM_CONTROL);
573
574 if (val == ts->ts_status)
575 return;
576
577 DELAY(ts->ts_latency);
578 }
579
580 *(uintptr_t *)arg2 = EAGAIN;
581 }
582
583 /*
584 * A kludge for backwards compatibility.
585 */
586 static int
587 acpicpu_md_pstate_sysctl_init(void)
588 {
589 const struct sysctlnode *fnode, *mnode, *rnode;
590 const char *str;
591 int rv;
592
593 switch (cpu_vendor) {
594
595 case CPUVENDOR_IDT:
596 case CPUVENDOR_INTEL:
597 str = "est";
598 break;
599
600 case CPUVENDOR_AMD:
601 str = "powernow";
602 break;
603
604 default:
605 return ENODEV;
606 }
607
608
609 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode,
610 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
611 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
612
613 if (rv != 0)
614 goto fail;
615
616 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode,
617 0, CTLTYPE_NODE, str, NULL,
618 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
619
620 if (rv != 0)
621 goto fail;
622
623 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode,
624 0, CTLTYPE_NODE, "frequency", NULL,
625 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
626
627 if (rv != 0)
628 goto fail;
629
630 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
631 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL,
632 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL);
633
634 if (rv != 0)
635 goto fail;
636
637 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
638 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL,
639 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL);
640
641 if (rv != 0)
642 goto fail;
643
644 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode,
645 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL,
646 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL);
647
648 if (rv != 0)
649 goto fail;
650
651 return 0;
652
653 fail:
654 if (acpicpu_log != NULL) {
655 sysctl_teardown(&acpicpu_log);
656 acpicpu_log = NULL;
657 }
658
659 return rv;
660 }
661
662 static int
663 acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS)
664 {
665 struct cpu_info *ci = curcpu();
666 struct acpicpu_softc *sc;
667 struct sysctlnode node;
668 uint32_t freq;
669 int err;
670
671 sc = acpicpu_sc[ci->ci_acpiid];
672
673 if (sc == NULL)
674 return ENXIO;
675
676 err = acpicpu_pstate_get(sc, &freq);
677
678 if (err != 0)
679 return err;
680
681 node = *rnode;
682 node.sysctl_data = &freq;
683
684 err = sysctl_lookup(SYSCTLFN_CALL(&node));
685
686 if (err != 0 || newp == NULL)
687 return err;
688
689 return 0;
690 }
691
692 static int
693 acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS)
694 {
695 struct cpu_info *ci = curcpu();
696 struct acpicpu_softc *sc;
697 struct sysctlnode node;
698 uint32_t freq;
699 int err;
700
701 sc = acpicpu_sc[ci->ci_acpiid];
702
703 if (sc == NULL)
704 return ENXIO;
705
706 err = acpicpu_pstate_get(sc, &freq);
707
708 if (err != 0)
709 return err;
710
711 node = *rnode;
712 node.sysctl_data = &freq;
713
714 err = sysctl_lookup(SYSCTLFN_CALL(&node));
715
716 if (err != 0 || newp == NULL)
717 return err;
718
719 err = acpicpu_pstate_set(sc, freq);
720
721 if (err != 0)
722 return err;
723
724 return 0;
725 }
726
727 static int
728 acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS)
729 {
730 struct cpu_info *ci = curcpu();
731 struct acpicpu_softc *sc;
732 struct sysctlnode node;
733 char buf[1024];
734 size_t len;
735 uint32_t i;
736 int err;
737
738 sc = acpicpu_sc[ci->ci_acpiid];
739
740 if (sc == NULL)
741 return ENXIO;
742
743 (void)memset(&buf, 0, sizeof(buf));
744
745 mutex_enter(&sc->sc_mtx);
746
747 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) {
748
749 if (sc->sc_pstate[i].ps_freq == 0)
750 continue;
751
752 len += snprintf(buf + len, sizeof(buf) - len, "%u%s",
753 sc->sc_pstate[i].ps_freq,
754 i < (sc->sc_pstate_count - 1) ? " " : "");
755 }
756
757 mutex_exit(&sc->sc_mtx);
758
759 node = *rnode;
760 node.sysctl_data = buf;
761
762 err = sysctl_lookup(SYSCTLFN_CALL(&node));
763
764 if (err != 0 || newp == NULL)
765 return err;
766
767 return 0;
768 }
769
770