apei.c revision 1.8 1 /* $NetBSD: apei.c,v 1.8 2024/10/27 17:27:11 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2024 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * APEI: ACPI Platform Error Interface
31 *
32 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33 *
34 * XXX dtrace probes
35 *
36 * XXX call _OSC appropriately to announce to the platform that we, the
37 * OSPM, support APEI
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.8 2024/10/27 17:27:11 riastradh Exp $");
42
43 #include <sys/param.h>
44 #include <sys/types.h>
45
46 #include <sys/atomic.h>
47 #include <sys/endian.h>
48 #include <sys/device.h>
49 #include <sys/module.h>
50 #include <sys/sysctl.h>
51 #include <sys/uuid.h>
52
53 #include <dev/acpi/acpireg.h>
54 #include <dev/acpi/acpivar.h>
55 #include <dev/acpi/apei_bertvar.h>
56 #include <dev/acpi/apei_cper.h>
57 #include <dev/acpi/apei_einjvar.h>
58 #include <dev/acpi/apei_erstvar.h>
59 #include <dev/acpi/apei_hestvar.h>
60 #include <dev/acpi/apei_interp.h>
61 #include <dev/acpi/apeivar.h>
62 #include <dev/pci/pcireg.h>
63
64 #define _COMPONENT ACPI_RESOURCE_COMPONENT
65 ACPI_MODULE_NAME ("apei")
66
67 static int apei_match(device_t, cfdata_t, void *);
68 static void apei_attach(device_t, device_t, void *);
69 static int apei_detach(device_t, int);
70
71 static void apei_get_tables(struct apei_tab *);
72 static void apei_put_tables(struct apei_tab *);
73
74 static void apei_identify(struct apei_softc *, const char *,
75 const ACPI_TABLE_HEADER *);
76
77 CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
78 apei_match, apei_attach, apei_detach, NULL);
79
80 static int
81 apei_match(device_t parent, cfdata_t match, void *aux)
82 {
83 struct apei_tab tab;
84 int prio = 0;
85
86 /*
87 * If we have any of the APEI tables, match.
88 */
89 apei_get_tables(&tab);
90 if (tab.bert || tab.einj || tab.erst || tab.hest)
91 prio = 1;
92 apei_put_tables(&tab);
93
94 return prio;
95 }
96
97 static void
98 apei_attach(device_t parent, device_t self, void *aux)
99 {
100 struct apei_softc *sc = device_private(self);
101 const struct sysctlnode *sysctl_hw_acpi;
102 int error;
103
104 aprint_naive("\n");
105 aprint_normal(": ACPI Platform Error Interface\n");
106
107 pmf_device_register(self, NULL, NULL);
108
109 sc->sc_dev = self;
110 apei_get_tables(&sc->sc_tab);
111
112 /*
113 * Get the sysctl hw.acpi node. This should already be created
114 * but I don't see an easy way to get at it. If this fails,
115 * something is seriously wrong, so let's stop here.
116 */
117 error = sysctl_createv(&sc->sc_sysctllog, 0,
118 NULL, &sysctl_hw_acpi, 0,
119 CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
120 CTL_HW, CTL_CREATE, CTL_EOL);
121 if (error) {
122 aprint_error_dev(sc->sc_dev,
123 "failed to create sysctl hw.acpi: %d\n", error);
124 return;
125 }
126
127 /*
128 * Create sysctl hw.acpi.apei.
129 */
130 error = sysctl_createv(&sc->sc_sysctllog, 0,
131 &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
132 CTLTYPE_NODE, "apei",
133 SYSCTL_DESCR("ACPI Platform Error Interface"),
134 NULL, 0, NULL, 0,
135 CTL_CREATE, CTL_EOL);
136 if (error) {
137 aprint_error_dev(sc->sc_dev,
138 "failed to create sysctl hw.acpi.apei: %d\n", error);
139 return;
140 }
141
142 /*
143 * Set up BERT, EINJ, ERST, and HEST.
144 */
145 if (sc->sc_tab.bert) {
146 apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
147 apei_bert_attach(sc);
148 }
149 if (sc->sc_tab.einj) {
150 apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
151 apei_einj_attach(sc);
152 }
153 if (sc->sc_tab.erst) {
154 apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
155 apei_erst_attach(sc);
156 }
157 if (sc->sc_tab.hest) {
158 apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
159 apei_hest_attach(sc);
160 }
161 }
162
163 static int
164 apei_detach(device_t self, int flags)
165 {
166 struct apei_softc *sc = device_private(self);
167 int error;
168
169 /*
170 * Detach children. We don't currently have any but this is
171 * harmless without children and mandatory if we ever sprouted
172 * them, so let's just leave it here for good measure.
173 *
174 * After this point, we are committed to detaching; failure is
175 * forbidden.
176 */
177 error = config_detach_children(self, flags);
178 if (error)
179 return error;
180
181 /*
182 * Tear down all the sysctl nodes first, before the software
183 * state backing them goes away.
184 */
185 sysctl_teardown(&sc->sc_sysctllog);
186 sc->sc_sysctlroot = NULL;
187
188 /*
189 * Detach the software state for the APEI tables.
190 */
191 if (sc->sc_tab.hest)
192 apei_hest_detach(sc);
193 if (sc->sc_tab.erst)
194 apei_erst_detach(sc);
195 if (sc->sc_tab.einj)
196 apei_einj_detach(sc);
197 if (sc->sc_tab.bert)
198 apei_bert_detach(sc);
199
200 /*
201 * Release the APEI tables and we're done.
202 */
203 apei_put_tables(&sc->sc_tab);
204 pmf_device_deregister(self);
205 return 0;
206 }
207
208 /*
209 * apei_get_tables(tab)
210 *
211 * Get references to whichever APEI-related tables -- BERT, EINJ,
212 * ERST, HEST -- are available in the system.
213 */
214 static void
215 apei_get_tables(struct apei_tab *tab)
216 {
217 ACPI_STATUS rv;
218
219 /*
220 * Probe the BERT -- Boot Error Record Table.
221 */
222 rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
223 if (ACPI_FAILURE(rv))
224 tab->bert = NULL;
225
226 /*
227 * Probe the EINJ -- Error Injection Table.
228 */
229 rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
230 if (ACPI_FAILURE(rv))
231 tab->einj = NULL;
232
233 /*
234 * Probe the ERST -- Error Record Serialization Table.
235 */
236 rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
237 if (ACPI_FAILURE(rv))
238 tab->erst = NULL;
239
240 /*
241 * Probe the HEST -- Hardware Error Source Table.
242 */
243 rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
244 if (ACPI_FAILURE(rv))
245 tab->hest = NULL;
246 }
247
248 /*
249 * apei_put_tables(tab)
250 *
251 * Release the tables acquired by apei_get_tables.
252 */
253 static void
254 apei_put_tables(struct apei_tab *tab)
255 {
256
257 if (tab->bert != NULL) {
258 AcpiPutTable(&tab->bert->Header);
259 tab->bert = NULL;
260 }
261 if (tab->einj != NULL) {
262 AcpiPutTable(&tab->einj->Header);
263 tab->einj = NULL;
264 }
265 if (tab->erst != NULL) {
266 AcpiPutTable(&tab->erst->Header);
267 tab->erst = NULL;
268 }
269 if (tab->hest != NULL) {
270 AcpiPutTable(&tab->hest->Header);
271 tab->hest = NULL;
272 }
273 }
274
275 /*
276 * apei_identify(sc, name, header)
277 *
278 * Identify the APEI-related table header for dmesg.
279 */
280 static void
281 apei_identify(struct apei_softc *sc, const char *name,
282 const ACPI_TABLE_HEADER *h)
283 {
284
285 aprint_normal_dev(sc->sc_dev, "%s:"
286 " OemId <%6.6s,%8.8s,%08x>"
287 " AslId <%4.4s,%08x>\n",
288 name,
289 h->OemId, h->OemTableId, h->OemRevision,
290 h->AslCompilerId, h->AslCompilerRevision);
291 }
292
293 /*
294 * apei_cper_guid_dec(buf, uuid)
295 *
296 * Decode a Common Platform Error Record UUID/GUID from an ACPI
297 * table at buf into a sys/uuid.h struct uuid.
298 */
299 static void
300 apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
301 {
302
303 uuid_dec_le(buf, uuid);
304 }
305
306 /*
307 * apei_format_guid(uuid, s)
308 *
309 * Format a UUID as a string. This uses C initializer notation,
310 * not UUID notation, in order to match the text in the UEFI
311 * specification.
312 */
313 static void
314 apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
315 {
316
317 snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
318 "{0x%02x,%02x,"
319 "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
320 uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
321 uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
322 uuid->node[0], uuid->node[1], uuid->node[2],
323 uuid->node[3], uuid->node[4], uuid->node[5]);
324 }
325
326 /*
327 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
328 */
329
330 static const char *const cper_memory_error_type[] = {
331 #define F(LN, SN, V) [LN] = #SN,
332 CPER_MEMORY_ERROR_TYPES(F)
333 #undef F
334 };
335
336 /*
337 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
338 *
339 * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
340 * but are designated as being intended for Generic Error Data Entries
341 * rather than Generic Error Status Blocks.
342 */
343 static const char *const apei_gesb_severity[] = {
344 [0] = "recoverable",
345 [1] = "fatal",
346 [2] = "corrected",
347 [3] = "none",
348 };
349
350 /*
351 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
352 */
353 static const char *const apei_gede_severity[] = {
354 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
355 [ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
356 [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
357 [ACPI_HEST_GEN_ERROR_NONE] = "none",
358 };
359
360 /*
361 * N.2.5. Memory Error Section
362 *
363 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
364 */
365 static const struct uuid CPER_MEMORY_ERROR_SECTION =
366 {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
367
368 static void
369 apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
370 size_t len, const char *ctx, bool ratelimitok)
371 {
372 const struct cper_memory_error *ME = buf;
373 char bitbuf[1024];
374
375 /*
376 * If we've hit the rate limit, skip printing the error.
377 */
378 if (!ratelimitok)
379 goto out;
380
381 snprintb(bitbuf, sizeof(bitbuf),
382 CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
383 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
384 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
385 /*
386 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
387 */
388 /* XXX define this format somewhere */
389 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
390 "f\010\010" "ErrorType\0"
391 "=\001" "ERR_INTERNAL\0"
392 "=\004" "ERR_MEM\0"
393 "=\005" "ERR_TLB\0"
394 "=\006" "ERR_CACHE\0"
395 "=\007" "ERR_FUNCTION\0"
396 "=\010" "ERR_SELFTEST\0"
397 "=\011" "ERR_FLOW\0"
398 "=\020" "ERR_BUS\0"
399 "=\021" "ERR_MAP\0"
400 "=\022" "ERR_IMPROPER\0"
401 "=\023" "ERR_UNIMPL\0"
402 "=\024" "ERR_LOL\0"
403 "=\025" "ERR_RESPONSE\0"
404 "=\026" "ERR_PARITY\0"
405 "=\027" "ERR_PROTOCOL\0"
406 "=\030" "ERR_ERROR\0"
407 "=\031" "ERR_TIMEOUT\0"
408 "=\032" "ERR_POISONED\0"
409 "b\020" "AddressError\0"
410 "b\021" "ControlError\0"
411 "b\022" "DataError\0"
412 "b\023" "ResponderDetected\0"
413 "b\024" "RequesterDetected\0"
414 "b\025" "FirstError\0"
415 "b\026" "Overflow\0"
416 "\0", ME->ErrorStatus);
417 device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
418 }
419 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
420 device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
421 ctx, ME->PhysicalAddress);
422 }
423 if (ME->ValidationBits &
424 CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
425 device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
426 "\n", ctx, ME->PhysicalAddressMask);
427 }
428 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
429 device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
430 ME->Node);
431 }
432 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
433 device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
434 ME->Card);
435 }
436 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
437 device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
438 ME->Module);
439 }
440 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
441 device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
442 ME->Bank);
443 }
444 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
445 device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
446 ME->Device);
447 }
448 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
449 device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
450 ME->Row);
451 }
452 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
453 device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
454 ME->Column);
455 }
456 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
457 device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
458 ctx, ME->BitPosition);
459 }
460 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
461 device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
462 ctx, ME->RequestorId);
463 }
464 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
465 device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
466 ctx, ME->ResponderId);
467 }
468 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
469 device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
470 ctx, ME->TargetId);
471 }
472 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
473 const uint8_t t = ME->MemoryErrorType;
474 const char *n = t < __arraycount(cper_memory_error_type)
475 ? cper_memory_error_type[t] : NULL;
476
477 if (n) {
478 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
479 " (%s)\n", ctx, t, n);
480 } else {
481 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
482 ctx, t);
483 }
484 }
485
486 out: /*
487 * XXX pass this through to uvm(9) or userland for decisions
488 * like page retirement
489 */
490 return;
491 }
492
493 /*
494 * N.2.7. PCI Express Error Section
495 *
496 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section
497 */
498 static const struct uuid CPER_PCIE_ERROR_SECTION =
499 {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}};
500
501 static const char *const cper_pcie_error_port_type[] = {
502 #define F(LN, SN, V) [LN] = #SN,
503 CPER_PCIE_ERROR_PORT_TYPES(F)
504 #undef F
505 };
506
507 static void
508 apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len,
509 const char *ctx, bool ratelimitok)
510 {
511 const struct cper_pcie_error *PE = buf;
512 char bitbuf[1024];
513
514 /*
515 * If we've hit the rate limit, skip printing the error.
516 */
517 if (!ratelimitok)
518 goto out;
519
520 snprintb(bitbuf, sizeof(bitbuf),
521 CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits);
522 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
523 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) {
524 const uint32_t t = PE->PortType;
525 const char *n = t < __arraycount(cper_pcie_error_port_type)
526 ? cper_pcie_error_port_type[t] : NULL;
527
528 if (n) {
529 device_printf(sc->sc_dev, "%s: PortType=%"PRIu32
530 " (%s)\n", ctx, t, n);
531 } else {
532 device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n",
533 ctx, t);
534 }
535 }
536 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) {
537 /* XXX BCD */
538 device_printf(sc->sc_dev, "%s: Version=0x08%"PRIx32"\n",
539 ctx, PE->Version);
540 }
541 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) {
542 /* XXX move me to pcireg.h */
543 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
544 /* command */
545 "b\000" "IO_ENABLE\0"
546 "b\001" "MEM_ENABLE\0"
547 "b\002" "MASTER_ENABLE\0"
548 "b\003" "SPECIAL_ENABLE\0"
549 "b\004" "INVALIDATE_ENABLE\0"
550 "b\005" "PALETTE_ENABLE\0"
551 "b\006" "PARITY_ENABLE\0"
552 "b\007" "STEPPING_ENABLE\0"
553 "b\010" "SERR_ENABLE\0"
554 "b\011" "BACKTOBACK_ENABLE\0"
555 "b\012" "INTERRUPT_DISABLE\0"
556 /* status */
557 "b\023" "INT_STATUS\0"
558 "b\024" "CAPLIST_SUPPORT\0"
559 "b\025" "66MHZ_SUPPORT\0"
560 "b\026" "UDF_SUPPORT\0"
561 "b\027" "BACKTOBACK_SUPPORT\0"
562 "b\030" "PARITY_ERROR\0"
563 "f\031\002" "DEVSEL\0"
564 "=\000" "FAST\0"
565 "=\001" "MEDIUM\0"
566 "=\002" "SLOW\0"
567 "b\033" "TARGET_TARGET_ABORT\0"
568 "b\034" "MASTER_TARGET_ABORT\0"
569 "b\035" "MASTER_ABORT\0"
570 "b\036" "SPECIAL_ERROR\0"
571 "b\037" "PARITY_DETECT\0"
572 "\0", PE->CommandStatus);
573 device_printf(sc->sc_dev, "%s: CommandStatus=%s\n",
574 ctx, bitbuf);
575 }
576 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) {
577 device_printf(sc->sc_dev, "%s: DeviceID:"
578 " VendorID=0x%04"PRIx16
579 " DeviceID=0x%04"PRIx16
580 " ClassCode=0x%06"PRIx32
581 " Function=%"PRIu8
582 " Device=%"PRIu8
583 " Segment=%"PRIu16
584 " Bus=%"PRIu8
585 " SecondaryBus=%"PRIu8
586 " Slot=0x%04"PRIx16
587 " Reserved0=0x%02"PRIx8
588 "\n",
589 ctx,
590 le16dec(PE->DeviceID.VendorID),
591 le16dec(PE->DeviceID.DeviceID),
592 (PE->DeviceID.ClassCode[0] | /* le24dec */
593 ((uint32_t)PE->DeviceID.ClassCode[1] << 8) |
594 ((uint32_t)PE->DeviceID.ClassCode[2] << 16)),
595 PE->DeviceID.Function, PE->DeviceID.Device,
596 le16dec(PE->DeviceID.Segment), PE->DeviceID.Bus,
597 PE->DeviceID.SecondaryBus, le16dec(PE->DeviceID.Slot),
598 PE->DeviceID.Reserved0);
599 }
600 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) {
601 device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n",
602 ctx, PE->DeviceSerial);
603 }
604 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) {
605 /* XXX snprintb */
606 device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32
607 "\n", ctx, PE->BridgeControlStatus);
608 }
609 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) {
610 uint32_t dcsr, dsr;
611 char hex[9*sizeof(PE->CapabilityStructure)/4];
612 unsigned i;
613
614 /*
615 * Display a hex dump of each 32-bit register in the
616 * PCIe capability structure.
617 */
618 __CTASSERT(sizeof(PE->CapabilityStructure) % 4 == 0);
619 for (i = 0; i < sizeof(PE->CapabilityStructure)/4; i++) {
620 snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
621 le32dec(&PE->CapabilityStructure[4*i]));
622 }
623 hex[sizeof(hex) - 1] = '\0';
624 device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n",
625 ctx, hex);
626
627 /*
628 * If the Device Status Register has any bits set,
629 * highlight it in particular -- these are probably
630 * error bits.
631 */
632 dcsr = le32dec(&PE->CapabilityStructure[PCIE_DCSR]);
633 dsr = __SHIFTOUT(dcsr, __BITS(31,16));
634 if (dsr != 0) {
635 /*
636 * XXX move me to pcireg.h; note: high
637 * half of DCSR
638 */
639 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
640 "b\000" "CORRECTABLE_ERROR\0"
641 "b\001" "NONFATAL_UNCORRECTABLE_ERROR\0"
642 "b\002" "FATAL_ERROR\0"
643 "b\003" "UNSUPPORTED_REQUEST\0"
644 "b\004" "AUX_POWER\0"
645 "b\005" "TRANSACTIONS_PENDING\0"
646 "\0", dsr);
647 device_printf(sc->sc_dev, "%s: PCIe Device Status:"
648 " %s\n",
649 ctx, bitbuf);
650 }
651 }
652 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) {
653 uint32_t uc_status, uc_sev;
654 uint32_t cor_status;
655 uint32_t control;
656 char hex[9*sizeof(PE->AERInfo)/4];
657 unsigned i;
658
659 /*
660 * Display a hex dump of each 32-bit register in the
661 * PCIe Advanced Error Reporting extended capability
662 * structure.
663 */
664 __CTASSERT(sizeof(PE->AERInfo) % 4 == 0);
665 for (i = 0; i < sizeof(PE->AERInfo)/4; i++) {
666 snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
667 le32dec(&PE->AERInfo[4*i]));
668 }
669 hex[sizeof(hex) - 1] = '\0';
670 device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex);
671
672 /* XXX move me to pcireg.h */
673 #define PCI_AER_UC_STATUS_FMT "\177\020" \
674 "b\000" "UNDEFINED\0" \
675 "b\004" "DL_PROTOCOL_ERROR\0" \
676 "b\005" "SURPRISE_DOWN_ERROR\0" \
677 "b\014" "POISONED_TLP\0" \
678 "b\015" "FC_PROTOCOL_ERROR\0" \
679 "b\016" "COMPLETION_TIMEOUT\0" \
680 "b\017" "COMPLETION_ABORT\0" \
681 "b\020" "UNEXPECTED_COMPLETION\0" \
682 "b\021" "RECEIVER_OVERFLOW\0" \
683 "b\022" "MALFORMED_TLP\0" \
684 "b\023" "ECRC_ERROR\0" \
685 "b\024" "UNSUPPORTED_REQUEST_ERROR\0" \
686 "b\025" "ACS_VIOLATION\0" \
687 "b\026" "INTERNAL_ERROR\0" \
688 "b\027" "MC_BLOCKED_TLP\0" \
689 "b\030" "ATOMIC_OP_EGRESS_BLOCKED\0" \
690 "b\031" "TLP_PREFIX_BLOCKED_ERROR\0" \
691 "b\032" "POISONTLP_EGRESS_BLOCKED\0" \
692 "\0"
693
694 /*
695 * If there are any hardware error status bits set,
696 * highlight them in particular, in three groups:
697 *
698 * - uncorrectable fatal (UC_STATUS and UC_SEVERITY)
699 * - uncorrectable nonfatal (UC_STATUS but not UC_SEVERITY)
700 * - corrected (COR_STATUS)
701 *
702 * And if there are any uncorrectable errors, show
703 * which one was reported first, according to
704 * CAP_CONTROL.
705 */
706 uc_status = le32dec(&PE->AERInfo[PCI_AER_UC_STATUS]);
707 uc_sev = le32dec(&PE->AERInfo[PCI_AER_UC_SEVERITY]);
708 cor_status = le32dec(&PE->AERInfo[PCI_AER_COR_STATUS]);
709 control = le32dec(&PE->AERInfo[PCI_AER_CAP_CONTROL]);
710
711 if (uc_status & uc_sev) {
712 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
713 uc_status & uc_sev);
714 device_printf(sc->sc_dev, "%s:"
715 " AER hardware fatal uncorrectable errors: %s\n",
716 ctx, bitbuf);
717 }
718 if (uc_status & ~uc_sev) {
719 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
720 uc_status & uc_sev);
721 device_printf(sc->sc_dev, "%s:"
722 " AER hardware fatal uncorrectable errors: %s\n",
723 ctx, bitbuf);
724 }
725 if (uc_status) {
726 unsigned first = __SHIFTOUT(control,
727 PCI_AER_FIRST_ERROR_PTR);
728 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
729 (uint32_t)1 << first);
730 device_printf(sc->sc_dev, "%s:"
731 " AER hardware first uncorrectable error: %s\n",
732 ctx, bitbuf);
733 }
734 if (cor_status) {
735 /* XXX move me to pcireg.h */
736 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
737 "b\000" "RECEIVER_ERROR\0"
738 "b\006" "BAD_TLP\0"
739 "b\007" "BAD_DLLP\0"
740 "b\010" "REPLAY_NUM_ROLLOVER\0"
741 "b\014" "REPLAY_TIMER_TIMEOUT\0"
742 "b\015" "ADVISORY_NF_ERROR\0"
743 "b\016" "INTERNAL_ERROR\0"
744 "b\017" "HEADER_LOG_OVERFLOW\0"
745 "\0", cor_status);
746 device_printf(sc->sc_dev, "%s:"
747 " AER hardware corrected error: %s\n",
748 ctx, bitbuf);
749 }
750 }
751
752 out: /*
753 * XXX pass this on to the PCI subsystem to handle
754 */
755 return;
756 }
757
758 /*
759 * apei_cper_reports
760 *
761 * Table of known Common Platform Error Record types, symbolic
762 * names, minimum data lengths, and functions to report them.
763 *
764 * The section types and corresponding section layouts are listed
765 * at:
766 *
767 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
768 */
769 static const struct apei_cper_report {
770 const char *name;
771 const struct uuid *type;
772 size_t minlength;
773 void (*func)(struct apei_softc *, const void *, size_t, const char *,
774 bool);
775 } apei_cper_reports[] = {
776 { "memory", &CPER_MEMORY_ERROR_SECTION,
777 sizeof(struct cper_memory_error),
778 apei_cper_memory_error_report },
779 { "PCIe", &CPER_PCIE_ERROR_SECTION,
780 sizeof(struct cper_pcie_error),
781 apei_cper_pcie_error_report },
782 };
783
784 /*
785 * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
786 *
787 * Report the header of the ith Generic Error Data Entry in the
788 * given context, if ratelimitok is true.
789 *
790 * Return the actual length of the header in headerlen, or 0 if
791 * not known because the revision isn't recognized.
792 *
793 * Return the report type in report, or NULL if not known because
794 * the section type isn't recognized.
795 */
796 static void
797 apei_gede_report_header(struct apei_softc *sc,
798 const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
799 size_t *headerlenp, const struct apei_cper_report **reportp)
800 {
801 const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
802 struct uuid sectype;
803 char guidstr[69];
804 char buf[128];
805 unsigned i;
806
807 /*
808 * Print the section type as a C initializer. It would be
809 * prettier to use standard hyphenated UUID notation, but that
810 * notation is slightly ambiguous here (two octets could be
811 * written either way, depending on Microsoft convention --
812 * which influenced ACPI and UEFI -- or internet convention),
813 * and the UEFI spec writes the C initializer notation, so this
814 * makes it easier to search for.
815 *
816 * Also print out a symbolic name, if we know it.
817 */
818 apei_cper_guid_dec(gede->SectionType, §ype);
819 apei_format_guid(§ype, guidstr);
820 for (i = 0; i < __arraycount(apei_cper_reports); i++) {
821 const struct apei_cper_report *const report =
822 &apei_cper_reports[i];
823
824 if (memcmp(§ype, report->type, sizeof(sectype)) != 0)
825 continue;
826 if (ratelimitok) {
827 device_printf(sc->sc_dev, "%s:"
828 " SectionType=%s (%s error)\n",
829 ctx, guidstr, report->name);
830 }
831 *reportp = report;
832 break;
833 }
834 if (i == __arraycount(apei_cper_reports)) {
835 if (ratelimitok) {
836 device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
837 guidstr);
838 }
839 *reportp = NULL;
840 }
841
842 /*
843 * Print the numeric severity and, if we have it, a symbolic
844 * name for it.
845 */
846 if (ratelimitok) {
847 device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
848 ctx,
849 gede->ErrorSeverity,
850 (gede->ErrorSeverity < __arraycount(apei_gede_severity)
851 ? apei_gede_severity[gede->ErrorSeverity]
852 : "unknown"));
853 }
854
855 /*
856 * The Revision may not often be useful, but this is only ever
857 * shown at the time of a hardware error report, not something
858 * you can glean at your convenience with acpidump. So print
859 * it anyway.
860 */
861 if (ratelimitok) {
862 device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
863 gede->Revision);
864 }
865
866 /*
867 * Don't touch anything past the Revision until we've
868 * determined we understand it. Return the header length to
869 * the caller, or return zero -- and stop here -- if we don't
870 * know what the actual header length is.
871 */
872 if (gede->Revision < 0x0300) {
873 *headerlenp = sizeof(*gede);
874 } else if (gede->Revision < 0x0400) {
875 *headerlenp = sizeof(*gede_v3);
876 } else {
877 *headerlenp = 0;
878 return;
879 }
880
881 /*
882 * Print the validation bits at debug level. Only really
883 * helpful if there are bits we _don't_ know about.
884 */
885 if (ratelimitok) {
886 /* XXX define this format somewhere */
887 snprintb(buf, sizeof(buf), "\177\020"
888 "b\000" "FRU_ID\0"
889 "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */
890 "b\002" "TIMESTAMP\0"
891 "\0", gede->ValidationBits);
892 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
893 buf);
894 }
895
896 /*
897 * Print the CPER section flags.
898 */
899 if (ratelimitok) {
900 snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
901 gede->Flags);
902 device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
903 }
904
905 /*
906 * The ErrorDataLength is unlikely to be useful for the log, so
907 * print it at debug level only.
908 */
909 if (ratelimitok) {
910 aprint_debug_dev(sc->sc_dev, "%s:"
911 " ErrorDataLength=0x%"PRIu32"\n",
912 ctx, gede->ErrorDataLength);
913 }
914
915 /*
916 * Print the FRU Id and text, if available.
917 */
918 if (ratelimitok &&
919 (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
920 struct uuid fruid;
921
922 apei_cper_guid_dec(gede->FruId, &fruid);
923 apei_format_guid(&fruid, guidstr);
924 device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
925 }
926 if (ratelimitok &&
927 (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
928 device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
929 ctx, gede->FruText);
930 }
931
932 /*
933 * Print the timestamp, if available by the revision number and
934 * the validation bits.
935 */
936 if (ratelimitok &&
937 gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
938 gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
939 const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
940 const uint8_t s = t[0];
941 const uint8_t m = t[1];
942 const uint8_t h = t[2];
943 const uint8_t f = t[3];
944 const uint8_t D = t[4];
945 const uint8_t M = t[5];
946 const uint8_t Y = t[6];
947 const uint8_t C = t[7];
948
949 device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
950 " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
951 ctx, gede_v3->TimeStamp,
952 C,Y, M, D, h,m,s,
953 f & __BIT(0) ? " (event time)" : " (collect time)");
954 }
955 }
956
957 /*
958 * apei_gesb_ratelimit
959 *
960 * State to limit the rate of console log messages about hardware
961 * errors. For each of the four severity levels in a Generic
962 * Error Status Block,
963 *
964 * 0 - Recoverable (uncorrectable),
965 * 1 - Fatal (uncorrectable),
966 * 2 - Corrected, and
967 * 3 - None (including ill-formed errors),
968 *
969 * we record the last time it happened, protected by a CPU simple
970 * lock that we only try-acquire so it is safe to use in any
971 * context, including non-maskable interrupt context.
972 */
973
974 static struct {
975 __cpu_simple_lock_t lock;
976 struct timeval lasttime;
977 volatile uint32_t suppressed;
978 } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
979 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
980 [ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
981 [ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
982 [ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
983 };
984
985 static void
986 atomic_incsat_32(volatile uint32_t *p)
987 {
988 uint32_t o, n;
989
990 do {
991 o = atomic_load_relaxed(p);
992 if (__predict_false(o == UINT_MAX))
993 return;
994 n = o + 1;
995 } while (__predict_false(atomic_cas_32(p, o, n) != o));
996 }
997
998 /*
999 * apei_gesb_ratecheck(sc, severity, suppressed)
1000 *
1001 * Check for a rate limit on errors of the specified severity.
1002 *
1003 * => Return true if the error should be printed, and format into
1004 * the buffer suppressed a message saying how many errors were
1005 * previously suppressed.
1006 *
1007 * => Return false if the error should be suppressed because the
1008 * last one printed was too recent.
1009 */
1010 static bool
1011 apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
1012 char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
1013 {
1014 /* one of each type per minute (XXX worth making configurable?) */
1015 const struct timeval mininterval = {60, 0};
1016 unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
1017 bool ok = false;
1018
1019 /*
1020 * If the lock is contended, the rate limit is probably
1021 * exceeded, so it's not OK to print.
1022 *
1023 * Otherwise, with the lock held, ask ratecheck(9) whether it's
1024 * OK to print.
1025 */
1026 if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
1027 goto out;
1028 ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
1029 __cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
1030
1031 out: /*
1032 * If it's OK to print, report the number of errors that were
1033 * suppressed. If it's not OK to print, count a suppressed
1034 * error.
1035 */
1036 if (ok) {
1037 const uint32_t n =
1038 atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
1039
1040 if (n == 0) {
1041 suppressed[0] = '\0';
1042 } else {
1043 snprintf(suppressed,
1044 sizeof(" (4294967295 or more errors suppressed)"),
1045 " (%u%s error%s suppressed)",
1046 n,
1047 n == UINT32_MAX ? " or more" : "",
1048 n == 1 ? "" : "s");
1049 }
1050 } else {
1051 atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
1052 suppressed[0] = '\0';
1053 }
1054 return ok;
1055 }
1056
1057 /*
1058 * apei_gesb_report(sc, gesb, size, ctx)
1059 *
1060 * Check a Generic Error Status Block, of at most the specified
1061 * size in bytes, and report any errors in it. Return the 32-bit
1062 * Block Status in case the caller needs it to acknowledge the
1063 * report to firmware.
1064 */
1065 uint32_t
1066 apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
1067 size_t size, const char *ctx, bool *fatalp)
1068 {
1069 uint32_t status, unknownstatus, severity, nentries, i;
1070 uint32_t datalen, rawdatalen;
1071 const ACPI_HEST_GENERIC_DATA *gede0, *gede;
1072 const unsigned char *rawdata;
1073 bool ratelimitok = false;
1074 char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
1075 bool fatal = false;
1076
1077 /*
1078 * Verify the buffer is large enough for a Generic Error Status
1079 * Block before we try to touch anything in it.
1080 */
1081 if (size < sizeof(*gesb)) {
1082 ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
1083 suppressed);
1084 if (ratelimitok) {
1085 device_printf(sc->sc_dev,
1086 "%s: truncated GESB, %zu < %zu%s\n",
1087 ctx, size, sizeof(*gesb), suppressed);
1088 }
1089 status = 0;
1090 goto out;
1091 }
1092 size -= sizeof(*gesb);
1093
1094 /*
1095 * Load the status. Access ordering rules are unclear in the
1096 * ACPI specification; I'm guessing that load-acquire of the
1097 * block status is a good idea before any other access to the
1098 * GESB.
1099 */
1100 status = atomic_load_acquire(&gesb->BlockStatus);
1101
1102 /*
1103 * If there are no status bits set, the rest of the GESB is
1104 * garbage, so stop here.
1105 */
1106 if (status == 0) {
1107 /* XXX dtrace */
1108 /* XXX DPRINTF */
1109 goto out;
1110 }
1111
1112 /*
1113 * Read out the severity and get the number of entries in this
1114 * status block.
1115 */
1116 severity = gesb->ErrorSeverity;
1117 nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
1118
1119 /*
1120 * Print a message to the console and dmesg about the severity
1121 * of the error.
1122 */
1123 ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
1124 if (ratelimitok) {
1125 char statusbuf[128];
1126
1127 /* XXX define this format somewhere */
1128 snprintb(statusbuf, sizeof(statusbuf), "\177\020"
1129 "b\000" "UE\0"
1130 "b\001" "CE\0"
1131 "b\002" "MULTI_UE\0"
1132 "b\003" "MULTI_CE\0"
1133 "f\004\010" "GEDE_COUNT\0"
1134 "\0", status);
1135
1136 if (severity < __arraycount(apei_gesb_severity)) {
1137 device_printf(sc->sc_dev, "%s"
1138 " reported hardware error%s:"
1139 " severity=%s nentries=%u status=%s\n",
1140 ctx, suppressed,
1141 apei_gesb_severity[severity], nentries, statusbuf);
1142 } else {
1143 device_printf(sc->sc_dev, "%s reported error%s:"
1144 " severity=%"PRIu32" nentries=%u status=%s\n",
1145 ctx, suppressed,
1146 severity, nentries, statusbuf);
1147 }
1148 }
1149
1150 /*
1151 * Make a determination about whether the error is fatal.
1152 *
1153 * XXX Currently we don't have any mechanism to recover from
1154 * uncorrectable but recoverable errors, so we treat those --
1155 * and anything else we don't recognize -- as fatal.
1156 */
1157 switch (severity) {
1158 case ACPI_HEST_GEN_ERROR_CORRECTED:
1159 case ACPI_HEST_GEN_ERROR_NONE:
1160 fatal = false;
1161 break;
1162 case ACPI_HEST_GEN_ERROR_FATAL:
1163 case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
1164 default:
1165 fatal = true;
1166 break;
1167 }
1168
1169 /*
1170 * Clear the bits we know about to warn if there's anything
1171 * left we don't understand.
1172 */
1173 unknownstatus = status;
1174 unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
1175 unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
1176 unknownstatus &= ~ACPI_HEST_CORRECTABLE;
1177 unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
1178 unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
1179 if (ratelimitok && unknownstatus != 0) {
1180 /* XXX dtrace */
1181 device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
1182 " 0x%"PRIx32"\n", ctx, unknownstatus);
1183 }
1184
1185 /*
1186 * Advance past the Generic Error Status Block (GESB) header to
1187 * the Generic Error Data Entries (GEDEs).
1188 */
1189 gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
1190
1191 /*
1192 * Verify that the data length (GEDEs) fits within the size.
1193 * If not, truncate the GEDEs.
1194 */
1195 datalen = gesb->DataLength;
1196 if (size < datalen) {
1197 if (ratelimitok) {
1198 device_printf(sc->sc_dev, "%s:"
1199 " GESB DataLength exceeds bounds:"
1200 " %zu < %"PRIu32"\n",
1201 ctx, size, datalen);
1202 }
1203 datalen = size;
1204 }
1205 size -= datalen;
1206
1207 /*
1208 * Report each of the Generic Error Data Entries.
1209 */
1210 for (i = 0; i < nentries; i++) {
1211 size_t headerlen;
1212 const struct apei_cper_report *report;
1213 char subctx[128];
1214
1215 /*
1216 * Format a subcontext to show this numbered entry of
1217 * the GESB.
1218 */
1219 snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
1220
1221 /*
1222 * If the remaining GESB data length isn't enough for a
1223 * GEDE header, stop here.
1224 */
1225 if (datalen < sizeof(*gede)) {
1226 if (ratelimitok) {
1227 device_printf(sc->sc_dev, "%s:"
1228 " truncated GEDE: %"PRIu32" < %zu bytes\n",
1229 subctx, datalen, sizeof(*gede));
1230 }
1231 break;
1232 }
1233
1234 /*
1235 * Print the GEDE header and get the full length (may
1236 * vary from revision to revision of the GEDE) and the
1237 * CPER report function if possible.
1238 */
1239 apei_gede_report_header(sc, gede, subctx, ratelimitok,
1240 &headerlen, &report);
1241
1242 /*
1243 * If we don't know the header length because of an
1244 * unfamiliar revision, stop here.
1245 */
1246 if (headerlen == 0) {
1247 if (ratelimitok) {
1248 device_printf(sc->sc_dev, "%s:"
1249 " unknown revision: 0x%"PRIx16"\n",
1250 subctx, gede->Revision);
1251 }
1252 break;
1253 }
1254
1255 /*
1256 * Stop here if what we mapped is too small for the
1257 * error data length.
1258 */
1259 datalen -= headerlen;
1260 if (datalen < gede->ErrorDataLength) {
1261 if (ratelimitok) {
1262 device_printf(sc->sc_dev, "%s:"
1263 " truncated GEDE payload:"
1264 " %"PRIu32" < %"PRIu32" bytes\n",
1265 subctx, datalen, gede->ErrorDataLength);
1266 }
1267 break;
1268 }
1269
1270 /*
1271 * Report the Common Platform Error Record appendix to
1272 * this Generic Error Data Entry.
1273 */
1274 if (report == NULL) {
1275 if (ratelimitok) {
1276 device_printf(sc->sc_dev, "%s:"
1277 " [unknown type]\n", ctx);
1278 }
1279 } else {
1280 /* XXX pass ratelimit through */
1281 (*report->func)(sc, (const char *)gede + headerlen,
1282 gede->ErrorDataLength, subctx, ratelimitok);
1283 }
1284
1285 /*
1286 * Advance past the GEDE header and CPER data to the
1287 * next GEDE.
1288 */
1289 gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
1290 + headerlen + gede->ErrorDataLength);
1291 }
1292
1293 /*
1294 * Advance past the Generic Error Data Entries (GEDEs) to the
1295 * raw error data.
1296 *
1297 * XXX Provide Max Raw Data Length as a parameter, as found in
1298 * various HEST entry types.
1299 */
1300 rawdata = (const unsigned char *)gede0 + datalen;
1301
1302 /*
1303 * Verify that the raw data length fits within the size. If
1304 * not, truncate the raw data.
1305 */
1306 rawdatalen = gesb->RawDataLength;
1307 if (size < rawdatalen) {
1308 if (ratelimitok) {
1309 device_printf(sc->sc_dev, "%s:"
1310 " GESB RawDataLength exceeds bounds:"
1311 " %zu < %"PRIu32"\n",
1312 ctx, size, rawdatalen);
1313 }
1314 rawdatalen = size;
1315 }
1316 size -= rawdatalen;
1317
1318 /*
1319 * Hexdump the raw data, if any.
1320 */
1321 if (ratelimitok && rawdatalen > 0) {
1322 char devctx[128];
1323
1324 snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
1325 device_xname(sc->sc_dev), ctx);
1326 hexdump(printf, devctx, rawdata, rawdatalen);
1327 }
1328
1329 /*
1330 * If there's anything left after the raw data, warn.
1331 */
1332 if (ratelimitok && size > 0) {
1333 device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
1334 ctx, size);
1335 }
1336
1337 /*
1338 * Return the status so the caller can ack it, and tell the
1339 * caller whether this error is fatal.
1340 */
1341 out: *fatalp = fatal;
1342 return status;
1343 }
1344
1345 MODULE(MODULE_CLASS_DRIVER, apei, NULL);
1346
1347 #ifdef _MODULE
1348 #include "ioconf.c"
1349 #endif
1350
1351 static int
1352 apei_modcmd(modcmd_t cmd, void *opaque)
1353 {
1354 int error = 0;
1355
1356 switch (cmd) {
1357 case MODULE_CMD_INIT:
1358 #ifdef _MODULE
1359 error = config_init_component(cfdriver_ioconf_apei,
1360 cfattach_ioconf_apei, cfdata_ioconf_apei);
1361 #endif
1362 return error;
1363 case MODULE_CMD_FINI:
1364 #ifdef _MODULE
1365 error = config_fini_component(cfdriver_ioconf_apei,
1366 cfattach_ioconf_apei, cfdata_ioconf_apei);
1367 #endif
1368 return error;
1369 default:
1370 return ENOTTY;
1371 }
1372 }
1373