apei.c revision 1.9 1 /* $NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2024 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * APEI: ACPI Platform Error Interface
31 *
32 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33 *
34 * XXX dtrace probes
35 *
36 * XXX call _OSC appropriately to announce to the platform that we, the
37 * OSPM, support APEI
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $");
42
43 #include <sys/param.h>
44 #include <sys/types.h>
45
46 #include <sys/atomic.h>
47 #include <sys/endian.h>
48 #include <sys/device.h>
49 #include <sys/module.h>
50 #include <sys/sysctl.h>
51 #include <sys/uuid.h>
52
53 #include <dev/acpi/acpireg.h>
54 #include <dev/acpi/acpivar.h>
55 #include <dev/acpi/apei_bertvar.h>
56 #include <dev/acpi/apei_cper.h>
57 #include <dev/acpi/apei_einjvar.h>
58 #include <dev/acpi/apei_erstvar.h>
59 #include <dev/acpi/apei_hestvar.h>
60 #include <dev/acpi/apei_interp.h>
61 #include <dev/acpi/apeivar.h>
62 #include <dev/pci/pcireg.h>
63
64 #define _COMPONENT ACPI_RESOURCE_COMPONENT
65 ACPI_MODULE_NAME ("apei")
66
67 static int apei_match(device_t, cfdata_t, void *);
68 static void apei_attach(device_t, device_t, void *);
69 static int apei_detach(device_t, int);
70
71 static void apei_get_tables(struct apei_tab *);
72 static void apei_put_tables(struct apei_tab *);
73
74 static void apei_identify(struct apei_softc *, const char *,
75 const ACPI_TABLE_HEADER *);
76
77 CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
78 apei_match, apei_attach, apei_detach, NULL);
79
80 static int
81 apei_match(device_t parent, cfdata_t match, void *aux)
82 {
83 struct apei_tab tab;
84 int prio = 0;
85
86 /*
87 * If we have any of the APEI tables, match.
88 */
89 apei_get_tables(&tab);
90 if (tab.bert || tab.einj || tab.erst || tab.hest)
91 prio = 1;
92 apei_put_tables(&tab);
93
94 return prio;
95 }
96
97 static void
98 apei_attach(device_t parent, device_t self, void *aux)
99 {
100 struct apei_softc *sc = device_private(self);
101 const struct sysctlnode *sysctl_hw_acpi;
102 int error;
103
104 aprint_naive("\n");
105 aprint_normal(": ACPI Platform Error Interface\n");
106
107 pmf_device_register(self, NULL, NULL);
108
109 sc->sc_dev = self;
110 apei_get_tables(&sc->sc_tab);
111
112 /*
113 * Get the sysctl hw.acpi node. This should already be created
114 * but I don't see an easy way to get at it. If this fails,
115 * something is seriously wrong, so let's stop here.
116 */
117 error = sysctl_createv(&sc->sc_sysctllog, 0,
118 NULL, &sysctl_hw_acpi, 0,
119 CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
120 CTL_HW, CTL_CREATE, CTL_EOL);
121 if (error) {
122 aprint_error_dev(sc->sc_dev,
123 "failed to create sysctl hw.acpi: %d\n", error);
124 return;
125 }
126
127 /*
128 * Create sysctl hw.acpi.apei.
129 */
130 error = sysctl_createv(&sc->sc_sysctllog, 0,
131 &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
132 CTLTYPE_NODE, "apei",
133 SYSCTL_DESCR("ACPI Platform Error Interface"),
134 NULL, 0, NULL, 0,
135 CTL_CREATE, CTL_EOL);
136 if (error) {
137 aprint_error_dev(sc->sc_dev,
138 "failed to create sysctl hw.acpi.apei: %d\n", error);
139 return;
140 }
141
142 /*
143 * Set up BERT, EINJ, ERST, and HEST.
144 */
145 if (sc->sc_tab.bert) {
146 apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
147 apei_bert_attach(sc);
148 }
149 if (sc->sc_tab.einj) {
150 apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
151 apei_einj_attach(sc);
152 }
153 if (sc->sc_tab.erst) {
154 apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
155 apei_erst_attach(sc);
156 }
157 if (sc->sc_tab.hest) {
158 apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
159 apei_hest_attach(sc);
160 }
161 }
162
163 static int
164 apei_detach(device_t self, int flags)
165 {
166 struct apei_softc *sc = device_private(self);
167 int error;
168
169 /*
170 * Detach children. We don't currently have any but this is
171 * harmless without children and mandatory if we ever sprouted
172 * them, so let's just leave it here for good measure.
173 *
174 * After this point, we are committed to detaching; failure is
175 * forbidden.
176 */
177 error = config_detach_children(self, flags);
178 if (error)
179 return error;
180
181 /*
182 * Tear down all the sysctl nodes first, before the software
183 * state backing them goes away.
184 */
185 sysctl_teardown(&sc->sc_sysctllog);
186 sc->sc_sysctlroot = NULL;
187
188 /*
189 * Detach the software state for the APEI tables.
190 */
191 if (sc->sc_tab.hest)
192 apei_hest_detach(sc);
193 if (sc->sc_tab.erst)
194 apei_erst_detach(sc);
195 if (sc->sc_tab.einj)
196 apei_einj_detach(sc);
197 if (sc->sc_tab.bert)
198 apei_bert_detach(sc);
199
200 /*
201 * Release the APEI tables and we're done.
202 */
203 apei_put_tables(&sc->sc_tab);
204 pmf_device_deregister(self);
205 return 0;
206 }
207
208 /*
209 * apei_get_tables(tab)
210 *
211 * Get references to whichever APEI-related tables -- BERT, EINJ,
212 * ERST, HEST -- are available in the system.
213 */
214 static void
215 apei_get_tables(struct apei_tab *tab)
216 {
217 ACPI_STATUS rv;
218
219 /*
220 * Probe the BERT -- Boot Error Record Table.
221 */
222 rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
223 if (ACPI_FAILURE(rv))
224 tab->bert = NULL;
225
226 /*
227 * Probe the EINJ -- Error Injection Table.
228 */
229 rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
230 if (ACPI_FAILURE(rv))
231 tab->einj = NULL;
232
233 /*
234 * Probe the ERST -- Error Record Serialization Table.
235 */
236 rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
237 if (ACPI_FAILURE(rv))
238 tab->erst = NULL;
239
240 /*
241 * Probe the HEST -- Hardware Error Source Table.
242 */
243 rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
244 if (ACPI_FAILURE(rv))
245 tab->hest = NULL;
246 }
247
248 /*
249 * apei_put_tables(tab)
250 *
251 * Release the tables acquired by apei_get_tables.
252 */
253 static void
254 apei_put_tables(struct apei_tab *tab)
255 {
256
257 if (tab->bert != NULL) {
258 AcpiPutTable(&tab->bert->Header);
259 tab->bert = NULL;
260 }
261 if (tab->einj != NULL) {
262 AcpiPutTable(&tab->einj->Header);
263 tab->einj = NULL;
264 }
265 if (tab->erst != NULL) {
266 AcpiPutTable(&tab->erst->Header);
267 tab->erst = NULL;
268 }
269 if (tab->hest != NULL) {
270 AcpiPutTable(&tab->hest->Header);
271 tab->hest = NULL;
272 }
273 }
274
275 /*
276 * apei_identify(sc, name, header)
277 *
278 * Identify the APEI-related table header for dmesg.
279 */
280 static void
281 apei_identify(struct apei_softc *sc, const char *name,
282 const ACPI_TABLE_HEADER *h)
283 {
284
285 aprint_normal_dev(sc->sc_dev, "%s:"
286 " OemId <%6.6s,%8.8s,%08x>"
287 " AslId <%4.4s,%08x>\n",
288 name,
289 h->OemId, h->OemTableId, h->OemRevision,
290 h->AslCompilerId, h->AslCompilerRevision);
291 }
292
293 /*
294 * apei_cper_guid_dec(buf, uuid)
295 *
296 * Decode a Common Platform Error Record UUID/GUID from an ACPI
297 * table at buf into a sys/uuid.h struct uuid.
298 */
299 static void
300 apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
301 {
302
303 uuid_dec_le(buf, uuid);
304 }
305
306 /*
307 * apei_format_guid(uuid, s)
308 *
309 * Format a UUID as a string. This uses C initializer notation,
310 * not UUID notation, in order to match the text in the UEFI
311 * specification.
312 */
313 static void
314 apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
315 {
316
317 snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
318 "{0x%02x,%02x,"
319 "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
320 uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
321 uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
322 uuid->node[0], uuid->node[1], uuid->node[2],
323 uuid->node[3], uuid->node[4], uuid->node[5]);
324 }
325
326 /*
327 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
328 */
329
330 static const char *const cper_memory_error_type[] = {
331 #define F(LN, SN, V) [LN] = #SN,
332 CPER_MEMORY_ERROR_TYPES(F)
333 #undef F
334 };
335
336 /*
337 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
338 *
339 * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
340 * but are designated as being intended for Generic Error Data Entries
341 * rather than Generic Error Status Blocks.
342 */
343 static const char *const apei_gesb_severity[] = {
344 [0] = "recoverable",
345 [1] = "fatal",
346 [2] = "corrected",
347 [3] = "none",
348 };
349
350 /*
351 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
352 */
353 static const char *const apei_gede_severity[] = {
354 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
355 [ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
356 [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
357 [ACPI_HEST_GEN_ERROR_NONE] = "none",
358 };
359
360 /*
361 * N.2.5. Memory Error Section
362 *
363 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
364 */
365 static const struct uuid CPER_MEMORY_ERROR_SECTION =
366 {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
367
368 static void
369 apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
370 size_t len, const char *ctx, bool ratelimitok)
371 {
372 const struct cper_memory_error *ME = buf;
373 char bitbuf[1024];
374
375 /*
376 * If we've hit the rate limit, skip printing the error.
377 */
378 if (!ratelimitok)
379 goto out;
380
381 snprintb(bitbuf, sizeof(bitbuf),
382 CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
383 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
384 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
385 /*
386 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
387 */
388 /* XXX define this format somewhere */
389 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
390 "f\010\010" "ErrorType\0"
391 "=\001" "ERR_INTERNAL\0"
392 "=\004" "ERR_MEM\0"
393 "=\005" "ERR_TLB\0"
394 "=\006" "ERR_CACHE\0"
395 "=\007" "ERR_FUNCTION\0"
396 "=\010" "ERR_SELFTEST\0"
397 "=\011" "ERR_FLOW\0"
398 "=\020" "ERR_BUS\0"
399 "=\021" "ERR_MAP\0"
400 "=\022" "ERR_IMPROPER\0"
401 "=\023" "ERR_UNIMPL\0"
402 "=\024" "ERR_LOL\0"
403 "=\025" "ERR_RESPONSE\0"
404 "=\026" "ERR_PARITY\0"
405 "=\027" "ERR_PROTOCOL\0"
406 "=\030" "ERR_ERROR\0"
407 "=\031" "ERR_TIMEOUT\0"
408 "=\032" "ERR_POISONED\0"
409 "b\020" "AddressError\0"
410 "b\021" "ControlError\0"
411 "b\022" "DataError\0"
412 "b\023" "ResponderDetected\0"
413 "b\024" "RequesterDetected\0"
414 "b\025" "FirstError\0"
415 "b\026" "Overflow\0"
416 "\0", ME->ErrorStatus);
417 device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
418 }
419 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
420 device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
421 ctx, ME->PhysicalAddress);
422 }
423 if (ME->ValidationBits &
424 CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
425 device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
426 "\n", ctx, ME->PhysicalAddressMask);
427 }
428 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
429 device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
430 ME->Node);
431 }
432 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
433 device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
434 ME->Card);
435 }
436 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
437 device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
438 ME->Module);
439 }
440 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
441 device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
442 ME->Bank);
443 }
444 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
445 device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
446 ME->Device);
447 }
448 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
449 device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
450 ME->Row);
451 }
452 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
453 device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
454 ME->Column);
455 }
456 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
457 device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
458 ctx, ME->BitPosition);
459 }
460 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
461 device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
462 ctx, ME->RequestorId);
463 }
464 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
465 device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
466 ctx, ME->ResponderId);
467 }
468 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
469 device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
470 ctx, ME->TargetId);
471 }
472 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
473 const uint8_t t = ME->MemoryErrorType;
474 const char *n = t < __arraycount(cper_memory_error_type)
475 ? cper_memory_error_type[t] : NULL;
476
477 if (n) {
478 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
479 " (%s)\n", ctx, t, n);
480 } else {
481 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
482 ctx, t);
483 }
484 }
485
486 out: /*
487 * XXX pass this through to uvm(9) or userland for decisions
488 * like page retirement
489 */
490 return;
491 }
492
493 /*
494 * N.2.7. PCI Express Error Section
495 *
496 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section
497 */
498 static const struct uuid CPER_PCIE_ERROR_SECTION =
499 {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}};
500
501 static const char *const cper_pcie_error_port_type[] = {
502 #define F(LN, SN, V) [LN] = #SN,
503 CPER_PCIE_ERROR_PORT_TYPES(F)
504 #undef F
505 };
506
507 static void
508 apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len,
509 const char *ctx, bool ratelimitok)
510 {
511 const struct cper_pcie_error *PE = buf;
512 char bitbuf[1024];
513
514 /*
515 * If we've hit the rate limit, skip printing the error.
516 */
517 if (!ratelimitok)
518 goto out;
519
520 snprintb(bitbuf, sizeof(bitbuf),
521 CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits);
522 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
523 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) {
524 const uint32_t t = PE->PortType;
525 const char *n = t < __arraycount(cper_pcie_error_port_type)
526 ? cper_pcie_error_port_type[t] : NULL;
527
528 if (n) {
529 device_printf(sc->sc_dev, "%s: PortType=%"PRIu32
530 " (%s)\n", ctx, t, n);
531 } else {
532 device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n",
533 ctx, t);
534 }
535 }
536 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) {
537 /* XXX BCD */
538 device_printf(sc->sc_dev, "%s: Version=0x08%"PRIx32"\n",
539 ctx, PE->Version);
540 }
541 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) {
542 /* XXX move me to pcireg.h */
543 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
544 /* command */
545 "b\000" "IO_ENABLE\0"
546 "b\001" "MEM_ENABLE\0"
547 "b\002" "MASTER_ENABLE\0"
548 "b\003" "SPECIAL_ENABLE\0"
549 "b\004" "INVALIDATE_ENABLE\0"
550 "b\005" "PALETTE_ENABLE\0"
551 "b\006" "PARITY_ENABLE\0"
552 "b\007" "STEPPING_ENABLE\0"
553 "b\010" "SERR_ENABLE\0"
554 "b\011" "BACKTOBACK_ENABLE\0"
555 "b\012" "INTERRUPT_DISABLE\0"
556 /* status */
557 "b\023" "INT_STATUS\0"
558 "b\024" "CAPLIST_SUPPORT\0"
559 "b\025" "66MHZ_SUPPORT\0"
560 "b\026" "UDF_SUPPORT\0"
561 "b\027" "BACKTOBACK_SUPPORT\0"
562 "b\030" "PARITY_ERROR\0"
563 "f\031\002" "DEVSEL\0"
564 "=\000" "FAST\0"
565 "=\001" "MEDIUM\0"
566 "=\002" "SLOW\0"
567 "b\033" "TARGET_TARGET_ABORT\0"
568 "b\034" "MASTER_TARGET_ABORT\0"
569 "b\035" "MASTER_ABORT\0"
570 "b\036" "SPECIAL_ERROR\0"
571 "b\037" "PARITY_DETECT\0"
572 "\0", PE->CommandStatus);
573 device_printf(sc->sc_dev, "%s: CommandStatus=%s\n",
574 ctx, bitbuf);
575 }
576 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) {
577 device_printf(sc->sc_dev, "%s: DeviceID:"
578 " VendorID=0x%04"PRIx16
579 " DeviceID=0x%04"PRIx16
580 " ClassCode=0x%06"PRIx32
581 " Function=%"PRIu8
582 " Device=%"PRIu8
583 " Segment=%"PRIu16
584 " Bus=%"PRIu8
585 " SecondaryBus=%"PRIu8
586 " Slot=0x%04"PRIx16
587 " Reserved0=0x%02"PRIx8
588 "\n",
589 ctx,
590 le16dec(PE->DeviceID.VendorID),
591 le16dec(PE->DeviceID.DeviceID),
592 (PE->DeviceID.ClassCode[0] | /* le24dec */
593 ((uint32_t)PE->DeviceID.ClassCode[1] << 8) |
594 ((uint32_t)PE->DeviceID.ClassCode[2] << 16)),
595 PE->DeviceID.Function, PE->DeviceID.Device,
596 le16dec(PE->DeviceID.Segment), PE->DeviceID.Bus,
597 PE->DeviceID.SecondaryBus, le16dec(PE->DeviceID.Slot),
598 PE->DeviceID.Reserved0);
599 }
600 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) {
601 device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n",
602 ctx, PE->DeviceSerial);
603 }
604 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) {
605 /* XXX snprintb */
606 device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32
607 "\n", ctx, PE->BridgeControlStatus);
608 }
609 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) {
610 uint32_t dcsr, dsr;
611 char hex[9*sizeof(PE->CapabilityStructure)/4];
612 unsigned i;
613
614 /*
615 * Display a hex dump of each 32-bit register in the
616 * PCIe capability structure.
617 */
618 __CTASSERT(sizeof(PE->CapabilityStructure) % 4 == 0);
619 for (i = 0; i < sizeof(PE->CapabilityStructure)/4; i++) {
620 snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
621 le32dec(&PE->CapabilityStructure[4*i]));
622 }
623 hex[sizeof(hex) - 1] = '\0';
624 device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n",
625 ctx, hex);
626
627 /*
628 * If the Device Status Register has any bits set,
629 * highlight it in particular -- these are probably
630 * error bits.
631 */
632 dcsr = le32dec(&PE->CapabilityStructure[PCIE_DCSR]);
633 dsr = __SHIFTOUT(dcsr, __BITS(31,16));
634 if (dsr != 0) {
635 /*
636 * XXX move me to pcireg.h; note: high
637 * half of DCSR
638 */
639 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
640 "b\000" "CORRECTABLE_ERROR\0"
641 "b\001" "NONFATAL_UNCORRECTABLE_ERROR\0"
642 "b\002" "FATAL_ERROR\0"
643 "b\003" "UNSUPPORTED_REQUEST\0"
644 "b\004" "AUX_POWER\0"
645 "b\005" "TRANSACTIONS_PENDING\0"
646 "\0", dsr);
647 device_printf(sc->sc_dev, "%s: PCIe Device Status:"
648 " %s\n",
649 ctx, bitbuf);
650 }
651 }
652 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) {
653 uint32_t uc_status, uc_sev;
654 uint32_t cor_status;
655 uint32_t control;
656 char hex[9*sizeof(PE->AERInfo)/4];
657 unsigned i;
658
659 /*
660 * Display a hex dump of each 32-bit register in the
661 * PCIe Advanced Error Reporting extended capability
662 * structure.
663 */
664 __CTASSERT(sizeof(PE->AERInfo) % 4 == 0);
665 for (i = 0; i < sizeof(PE->AERInfo)/4; i++) {
666 snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
667 le32dec(&PE->AERInfo[4*i]));
668 }
669 hex[sizeof(hex) - 1] = '\0';
670 device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex);
671
672 /* XXX move me to pcireg.h */
673 #define PCI_AER_UC_STATUS_FMT "\177\020" \
674 "b\000" "UNDEFINED\0" \
675 "b\004" "DL_PROTOCOL_ERROR\0" \
676 "b\005" "SURPRISE_DOWN_ERROR\0" \
677 "b\014" "POISONED_TLP\0" \
678 "b\015" "FC_PROTOCOL_ERROR\0" \
679 "b\016" "COMPLETION_TIMEOUT\0" \
680 "b\017" "COMPLETION_ABORT\0" \
681 "b\020" "UNEXPECTED_COMPLETION\0" \
682 "b\021" "RECEIVER_OVERFLOW\0" \
683 "b\022" "MALFORMED_TLP\0" \
684 "b\023" "ECRC_ERROR\0" \
685 "b\024" "UNSUPPORTED_REQUEST_ERROR\0" \
686 "b\025" "ACS_VIOLATION\0" \
687 "b\026" "INTERNAL_ERROR\0" \
688 "b\027" "MC_BLOCKED_TLP\0" \
689 "b\030" "ATOMIC_OP_EGRESS_BLOCKED\0" \
690 "b\031" "TLP_PREFIX_BLOCKED_ERROR\0" \
691 "b\032" "POISONTLP_EGRESS_BLOCKED\0" \
692 "\0"
693
694 /*
695 * If there are any hardware error status bits set,
696 * highlight them in particular, in three groups:
697 *
698 * - uncorrectable fatal (UC_STATUS and UC_SEVERITY)
699 * - uncorrectable nonfatal (UC_STATUS but not UC_SEVERITY)
700 * - corrected (COR_STATUS)
701 *
702 * And if there are any uncorrectable errors, show
703 * which one was reported first, according to
704 * CAP_CONTROL.
705 */
706 uc_status = le32dec(&PE->AERInfo[PCI_AER_UC_STATUS]);
707 uc_sev = le32dec(&PE->AERInfo[PCI_AER_UC_SEVERITY]);
708 cor_status = le32dec(&PE->AERInfo[PCI_AER_COR_STATUS]);
709 control = le32dec(&PE->AERInfo[PCI_AER_CAP_CONTROL]);
710
711 if (uc_status & uc_sev) {
712 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
713 uc_status & uc_sev);
714 device_printf(sc->sc_dev, "%s:"
715 " AER hardware fatal uncorrectable errors: %s\n",
716 ctx, bitbuf);
717 }
718 if (uc_status & ~uc_sev) {
719 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
720 uc_status & ~uc_sev);
721 device_printf(sc->sc_dev, "%s:"
722 " AER hardware non-fatal uncorrectable errors:"
723 " %s\n",
724 ctx, bitbuf);
725 }
726 if (uc_status) {
727 unsigned first = __SHIFTOUT(control,
728 PCI_AER_FIRST_ERROR_PTR);
729 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
730 (uint32_t)1 << first);
731 device_printf(sc->sc_dev, "%s:"
732 " AER hardware first uncorrectable error: %s\n",
733 ctx, bitbuf);
734 }
735 if (cor_status) {
736 /* XXX move me to pcireg.h */
737 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
738 "b\000" "RECEIVER_ERROR\0"
739 "b\006" "BAD_TLP\0"
740 "b\007" "BAD_DLLP\0"
741 "b\010" "REPLAY_NUM_ROLLOVER\0"
742 "b\014" "REPLAY_TIMER_TIMEOUT\0"
743 "b\015" "ADVISORY_NF_ERROR\0"
744 "b\016" "INTERNAL_ERROR\0"
745 "b\017" "HEADER_LOG_OVERFLOW\0"
746 "\0", cor_status);
747 device_printf(sc->sc_dev, "%s:"
748 " AER hardware corrected error: %s\n",
749 ctx, bitbuf);
750 }
751 }
752
753 out: /*
754 * XXX pass this on to the PCI subsystem to handle
755 */
756 return;
757 }
758
759 /*
760 * apei_cper_reports
761 *
762 * Table of known Common Platform Error Record types, symbolic
763 * names, minimum data lengths, and functions to report them.
764 *
765 * The section types and corresponding section layouts are listed
766 * at:
767 *
768 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
769 */
770 static const struct apei_cper_report {
771 const char *name;
772 const struct uuid *type;
773 size_t minlength;
774 void (*func)(struct apei_softc *, const void *, size_t, const char *,
775 bool);
776 } apei_cper_reports[] = {
777 { "memory", &CPER_MEMORY_ERROR_SECTION,
778 sizeof(struct cper_memory_error),
779 apei_cper_memory_error_report },
780 { "PCIe", &CPER_PCIE_ERROR_SECTION,
781 sizeof(struct cper_pcie_error),
782 apei_cper_pcie_error_report },
783 };
784
785 /*
786 * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
787 *
788 * Report the header of the ith Generic Error Data Entry in the
789 * given context, if ratelimitok is true.
790 *
791 * Return the actual length of the header in headerlen, or 0 if
792 * not known because the revision isn't recognized.
793 *
794 * Return the report type in report, or NULL if not known because
795 * the section type isn't recognized.
796 */
797 static void
798 apei_gede_report_header(struct apei_softc *sc,
799 const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
800 size_t *headerlenp, const struct apei_cper_report **reportp)
801 {
802 const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
803 struct uuid sectype;
804 char guidstr[69];
805 char buf[128];
806 unsigned i;
807
808 /*
809 * Print the section type as a C initializer. It would be
810 * prettier to use standard hyphenated UUID notation, but that
811 * notation is slightly ambiguous here (two octets could be
812 * written either way, depending on Microsoft convention --
813 * which influenced ACPI and UEFI -- or internet convention),
814 * and the UEFI spec writes the C initializer notation, so this
815 * makes it easier to search for.
816 *
817 * Also print out a symbolic name, if we know it.
818 */
819 apei_cper_guid_dec(gede->SectionType, §ype);
820 apei_format_guid(§ype, guidstr);
821 for (i = 0; i < __arraycount(apei_cper_reports); i++) {
822 const struct apei_cper_report *const report =
823 &apei_cper_reports[i];
824
825 if (memcmp(§ype, report->type, sizeof(sectype)) != 0)
826 continue;
827 if (ratelimitok) {
828 device_printf(sc->sc_dev, "%s:"
829 " SectionType=%s (%s error)\n",
830 ctx, guidstr, report->name);
831 }
832 *reportp = report;
833 break;
834 }
835 if (i == __arraycount(apei_cper_reports)) {
836 if (ratelimitok) {
837 device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
838 guidstr);
839 }
840 *reportp = NULL;
841 }
842
843 /*
844 * Print the numeric severity and, if we have it, a symbolic
845 * name for it.
846 */
847 if (ratelimitok) {
848 device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
849 ctx,
850 gede->ErrorSeverity,
851 (gede->ErrorSeverity < __arraycount(apei_gede_severity)
852 ? apei_gede_severity[gede->ErrorSeverity]
853 : "unknown"));
854 }
855
856 /*
857 * The Revision may not often be useful, but this is only ever
858 * shown at the time of a hardware error report, not something
859 * you can glean at your convenience with acpidump. So print
860 * it anyway.
861 */
862 if (ratelimitok) {
863 device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
864 gede->Revision);
865 }
866
867 /*
868 * Don't touch anything past the Revision until we've
869 * determined we understand it. Return the header length to
870 * the caller, or return zero -- and stop here -- if we don't
871 * know what the actual header length is.
872 */
873 if (gede->Revision < 0x0300) {
874 *headerlenp = sizeof(*gede);
875 } else if (gede->Revision < 0x0400) {
876 *headerlenp = sizeof(*gede_v3);
877 } else {
878 *headerlenp = 0;
879 return;
880 }
881
882 /*
883 * Print the validation bits at debug level. Only really
884 * helpful if there are bits we _don't_ know about.
885 */
886 if (ratelimitok) {
887 /* XXX define this format somewhere */
888 snprintb(buf, sizeof(buf), "\177\020"
889 "b\000" "FRU_ID\0"
890 "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */
891 "b\002" "TIMESTAMP\0"
892 "\0", gede->ValidationBits);
893 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
894 buf);
895 }
896
897 /*
898 * Print the CPER section flags.
899 */
900 if (ratelimitok) {
901 snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
902 gede->Flags);
903 device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
904 }
905
906 /*
907 * The ErrorDataLength is unlikely to be useful for the log, so
908 * print it at debug level only.
909 */
910 if (ratelimitok) {
911 aprint_debug_dev(sc->sc_dev, "%s:"
912 " ErrorDataLength=0x%"PRIu32"\n",
913 ctx, gede->ErrorDataLength);
914 }
915
916 /*
917 * Print the FRU Id and text, if available.
918 */
919 if (ratelimitok &&
920 (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
921 struct uuid fruid;
922
923 apei_cper_guid_dec(gede->FruId, &fruid);
924 apei_format_guid(&fruid, guidstr);
925 device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
926 }
927 if (ratelimitok &&
928 (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
929 device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
930 ctx, gede->FruText);
931 }
932
933 /*
934 * Print the timestamp, if available by the revision number and
935 * the validation bits.
936 */
937 if (ratelimitok &&
938 gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
939 gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
940 const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
941 const uint8_t s = t[0];
942 const uint8_t m = t[1];
943 const uint8_t h = t[2];
944 const uint8_t f = t[3];
945 const uint8_t D = t[4];
946 const uint8_t M = t[5];
947 const uint8_t Y = t[6];
948 const uint8_t C = t[7];
949
950 device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
951 " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
952 ctx, gede_v3->TimeStamp,
953 C,Y, M, D, h,m,s,
954 f & __BIT(0) ? " (event time)" : " (collect time)");
955 }
956 }
957
958 /*
959 * apei_gesb_ratelimit
960 *
961 * State to limit the rate of console log messages about hardware
962 * errors. For each of the four severity levels in a Generic
963 * Error Status Block,
964 *
965 * 0 - Recoverable (uncorrectable),
966 * 1 - Fatal (uncorrectable),
967 * 2 - Corrected, and
968 * 3 - None (including ill-formed errors),
969 *
970 * we record the last time it happened, protected by a CPU simple
971 * lock that we only try-acquire so it is safe to use in any
972 * context, including non-maskable interrupt context.
973 */
974
975 static struct {
976 __cpu_simple_lock_t lock;
977 struct timeval lasttime;
978 volatile uint32_t suppressed;
979 } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
980 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
981 [ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
982 [ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
983 [ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
984 };
985
986 static void
987 atomic_incsat_32(volatile uint32_t *p)
988 {
989 uint32_t o, n;
990
991 do {
992 o = atomic_load_relaxed(p);
993 if (__predict_false(o == UINT_MAX))
994 return;
995 n = o + 1;
996 } while (__predict_false(atomic_cas_32(p, o, n) != o));
997 }
998
999 /*
1000 * apei_gesb_ratecheck(sc, severity, suppressed)
1001 *
1002 * Check for a rate limit on errors of the specified severity.
1003 *
1004 * => Return true if the error should be printed, and format into
1005 * the buffer suppressed a message saying how many errors were
1006 * previously suppressed.
1007 *
1008 * => Return false if the error should be suppressed because the
1009 * last one printed was too recent.
1010 */
1011 static bool
1012 apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
1013 char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
1014 {
1015 /* one of each type per minute (XXX worth making configurable?) */
1016 const struct timeval mininterval = {60, 0};
1017 unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
1018 bool ok = false;
1019
1020 /*
1021 * If the lock is contended, the rate limit is probably
1022 * exceeded, so it's not OK to print.
1023 *
1024 * Otherwise, with the lock held, ask ratecheck(9) whether it's
1025 * OK to print.
1026 */
1027 if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
1028 goto out;
1029 ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
1030 __cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
1031
1032 out: /*
1033 * If it's OK to print, report the number of errors that were
1034 * suppressed. If it's not OK to print, count a suppressed
1035 * error.
1036 */
1037 if (ok) {
1038 const uint32_t n =
1039 atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
1040
1041 if (n == 0) {
1042 suppressed[0] = '\0';
1043 } else {
1044 snprintf(suppressed,
1045 sizeof(" (4294967295 or more errors suppressed)"),
1046 " (%u%s error%s suppressed)",
1047 n,
1048 n == UINT32_MAX ? " or more" : "",
1049 n == 1 ? "" : "s");
1050 }
1051 } else {
1052 atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
1053 suppressed[0] = '\0';
1054 }
1055 return ok;
1056 }
1057
1058 /*
1059 * apei_gesb_report(sc, gesb, size, ctx)
1060 *
1061 * Check a Generic Error Status Block, of at most the specified
1062 * size in bytes, and report any errors in it. Return the 32-bit
1063 * Block Status in case the caller needs it to acknowledge the
1064 * report to firmware.
1065 */
1066 uint32_t
1067 apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
1068 size_t size, const char *ctx, bool *fatalp)
1069 {
1070 uint32_t status, unknownstatus, severity, nentries, i;
1071 uint32_t datalen, rawdatalen;
1072 const ACPI_HEST_GENERIC_DATA *gede0, *gede;
1073 const unsigned char *rawdata;
1074 bool ratelimitok = false;
1075 char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
1076 bool fatal = false;
1077
1078 /*
1079 * Verify the buffer is large enough for a Generic Error Status
1080 * Block before we try to touch anything in it.
1081 */
1082 if (size < sizeof(*gesb)) {
1083 ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
1084 suppressed);
1085 if (ratelimitok) {
1086 device_printf(sc->sc_dev,
1087 "%s: truncated GESB, %zu < %zu%s\n",
1088 ctx, size, sizeof(*gesb), suppressed);
1089 }
1090 status = 0;
1091 goto out;
1092 }
1093 size -= sizeof(*gesb);
1094
1095 /*
1096 * Load the status. Access ordering rules are unclear in the
1097 * ACPI specification; I'm guessing that load-acquire of the
1098 * block status is a good idea before any other access to the
1099 * GESB.
1100 */
1101 status = atomic_load_acquire(&gesb->BlockStatus);
1102
1103 /*
1104 * If there are no status bits set, the rest of the GESB is
1105 * garbage, so stop here.
1106 */
1107 if (status == 0) {
1108 /* XXX dtrace */
1109 /* XXX DPRINTF */
1110 goto out;
1111 }
1112
1113 /*
1114 * Read out the severity and get the number of entries in this
1115 * status block.
1116 */
1117 severity = gesb->ErrorSeverity;
1118 nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
1119
1120 /*
1121 * Print a message to the console and dmesg about the severity
1122 * of the error.
1123 */
1124 ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
1125 if (ratelimitok) {
1126 char statusbuf[128];
1127
1128 /* XXX define this format somewhere */
1129 snprintb(statusbuf, sizeof(statusbuf), "\177\020"
1130 "b\000" "UE\0"
1131 "b\001" "CE\0"
1132 "b\002" "MULTI_UE\0"
1133 "b\003" "MULTI_CE\0"
1134 "f\004\010" "GEDE_COUNT\0"
1135 "\0", status);
1136
1137 if (severity < __arraycount(apei_gesb_severity)) {
1138 device_printf(sc->sc_dev, "%s"
1139 " reported hardware error%s:"
1140 " severity=%s nentries=%u status=%s\n",
1141 ctx, suppressed,
1142 apei_gesb_severity[severity], nentries, statusbuf);
1143 } else {
1144 device_printf(sc->sc_dev, "%s reported error%s:"
1145 " severity=%"PRIu32" nentries=%u status=%s\n",
1146 ctx, suppressed,
1147 severity, nentries, statusbuf);
1148 }
1149 }
1150
1151 /*
1152 * Make a determination about whether the error is fatal.
1153 *
1154 * XXX Currently we don't have any mechanism to recover from
1155 * uncorrectable but recoverable errors, so we treat those --
1156 * and anything else we don't recognize -- as fatal.
1157 */
1158 switch (severity) {
1159 case ACPI_HEST_GEN_ERROR_CORRECTED:
1160 case ACPI_HEST_GEN_ERROR_NONE:
1161 fatal = false;
1162 break;
1163 case ACPI_HEST_GEN_ERROR_FATAL:
1164 case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
1165 default:
1166 fatal = true;
1167 break;
1168 }
1169
1170 /*
1171 * Clear the bits we know about to warn if there's anything
1172 * left we don't understand.
1173 */
1174 unknownstatus = status;
1175 unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
1176 unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
1177 unknownstatus &= ~ACPI_HEST_CORRECTABLE;
1178 unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
1179 unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
1180 if (ratelimitok && unknownstatus != 0) {
1181 /* XXX dtrace */
1182 device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
1183 " 0x%"PRIx32"\n", ctx, unknownstatus);
1184 }
1185
1186 /*
1187 * Advance past the Generic Error Status Block (GESB) header to
1188 * the Generic Error Data Entries (GEDEs).
1189 */
1190 gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
1191
1192 /*
1193 * Verify that the data length (GEDEs) fits within the size.
1194 * If not, truncate the GEDEs.
1195 */
1196 datalen = gesb->DataLength;
1197 if (size < datalen) {
1198 if (ratelimitok) {
1199 device_printf(sc->sc_dev, "%s:"
1200 " GESB DataLength exceeds bounds:"
1201 " %zu < %"PRIu32"\n",
1202 ctx, size, datalen);
1203 }
1204 datalen = size;
1205 }
1206 size -= datalen;
1207
1208 /*
1209 * Report each of the Generic Error Data Entries.
1210 */
1211 for (i = 0; i < nentries; i++) {
1212 size_t headerlen;
1213 const struct apei_cper_report *report;
1214 char subctx[128];
1215
1216 /*
1217 * Format a subcontext to show this numbered entry of
1218 * the GESB.
1219 */
1220 snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
1221
1222 /*
1223 * If the remaining GESB data length isn't enough for a
1224 * GEDE header, stop here.
1225 */
1226 if (datalen < sizeof(*gede)) {
1227 if (ratelimitok) {
1228 device_printf(sc->sc_dev, "%s:"
1229 " truncated GEDE: %"PRIu32" < %zu bytes\n",
1230 subctx, datalen, sizeof(*gede));
1231 }
1232 break;
1233 }
1234
1235 /*
1236 * Print the GEDE header and get the full length (may
1237 * vary from revision to revision of the GEDE) and the
1238 * CPER report function if possible.
1239 */
1240 apei_gede_report_header(sc, gede, subctx, ratelimitok,
1241 &headerlen, &report);
1242
1243 /*
1244 * If we don't know the header length because of an
1245 * unfamiliar revision, stop here.
1246 */
1247 if (headerlen == 0) {
1248 if (ratelimitok) {
1249 device_printf(sc->sc_dev, "%s:"
1250 " unknown revision: 0x%"PRIx16"\n",
1251 subctx, gede->Revision);
1252 }
1253 break;
1254 }
1255
1256 /*
1257 * Stop here if what we mapped is too small for the
1258 * error data length.
1259 */
1260 datalen -= headerlen;
1261 if (datalen < gede->ErrorDataLength) {
1262 if (ratelimitok) {
1263 device_printf(sc->sc_dev, "%s:"
1264 " truncated GEDE payload:"
1265 " %"PRIu32" < %"PRIu32" bytes\n",
1266 subctx, datalen, gede->ErrorDataLength);
1267 }
1268 break;
1269 }
1270
1271 /*
1272 * Report the Common Platform Error Record appendix to
1273 * this Generic Error Data Entry.
1274 */
1275 if (report == NULL) {
1276 if (ratelimitok) {
1277 device_printf(sc->sc_dev, "%s:"
1278 " [unknown type]\n", ctx);
1279 }
1280 } else {
1281 /* XXX pass ratelimit through */
1282 (*report->func)(sc, (const char *)gede + headerlen,
1283 gede->ErrorDataLength, subctx, ratelimitok);
1284 }
1285
1286 /*
1287 * Advance past the GEDE header and CPER data to the
1288 * next GEDE.
1289 */
1290 gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
1291 + headerlen + gede->ErrorDataLength);
1292 }
1293
1294 /*
1295 * Advance past the Generic Error Data Entries (GEDEs) to the
1296 * raw error data.
1297 *
1298 * XXX Provide Max Raw Data Length as a parameter, as found in
1299 * various HEST entry types.
1300 */
1301 rawdata = (const unsigned char *)gede0 + datalen;
1302
1303 /*
1304 * Verify that the raw data length fits within the size. If
1305 * not, truncate the raw data.
1306 */
1307 rawdatalen = gesb->RawDataLength;
1308 if (size < rawdatalen) {
1309 if (ratelimitok) {
1310 device_printf(sc->sc_dev, "%s:"
1311 " GESB RawDataLength exceeds bounds:"
1312 " %zu < %"PRIu32"\n",
1313 ctx, size, rawdatalen);
1314 }
1315 rawdatalen = size;
1316 }
1317 size -= rawdatalen;
1318
1319 /*
1320 * Hexdump the raw data, if any.
1321 */
1322 if (ratelimitok && rawdatalen > 0) {
1323 char devctx[128];
1324
1325 snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
1326 device_xname(sc->sc_dev), ctx);
1327 hexdump(printf, devctx, rawdata, rawdatalen);
1328 }
1329
1330 /*
1331 * If there's anything left after the raw data, warn.
1332 */
1333 if (ratelimitok && size > 0) {
1334 device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
1335 ctx, size);
1336 }
1337
1338 /*
1339 * Return the status so the caller can ack it, and tell the
1340 * caller whether this error is fatal.
1341 */
1342 out: *fatalp = fatal;
1343 return status;
1344 }
1345
1346 MODULE(MODULE_CLASS_DRIVER, apei, NULL);
1347
1348 #ifdef _MODULE
1349 #include "ioconf.c"
1350 #endif
1351
1352 static int
1353 apei_modcmd(modcmd_t cmd, void *opaque)
1354 {
1355 int error = 0;
1356
1357 switch (cmd) {
1358 case MODULE_CMD_INIT:
1359 #ifdef _MODULE
1360 error = config_init_component(cfdriver_ioconf_apei,
1361 cfattach_ioconf_apei, cfdata_ioconf_apei);
1362 #endif
1363 return error;
1364 case MODULE_CMD_FINI:
1365 #ifdef _MODULE
1366 error = config_fini_component(cfdriver_ioconf_apei,
1367 cfattach_ioconf_apei, cfdata_ioconf_apei);
1368 #endif
1369 return error;
1370 default:
1371 return ENOTTY;
1372 }
1373 }
1374