apei.c revision 1.7 1 /* $NetBSD: apei.c,v 1.7 2024/10/27 12:59:08 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2024 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * APEI: ACPI Platform Error Interface
31 *
32 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33 *
34 * XXX dtrace probes
35 *
36 * XXX call _OSC appropriately to announce to the platform that we, the
37 * OSPM, support APEI
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.7 2024/10/27 12:59:08 riastradh Exp $");
42
43 #include <sys/param.h>
44 #include <sys/types.h>
45
46 #include <sys/atomic.h>
47 #include <sys/endian.h>
48 #include <sys/device.h>
49 #include <sys/module.h>
50 #include <sys/sysctl.h>
51 #include <sys/uuid.h>
52
53 #include <dev/acpi/acpireg.h>
54 #include <dev/acpi/acpivar.h>
55 #include <dev/acpi/apei_bertvar.h>
56 #include <dev/acpi/apei_cper.h>
57 #include <dev/acpi/apei_einjvar.h>
58 #include <dev/acpi/apei_erstvar.h>
59 #include <dev/acpi/apei_hestvar.h>
60 #include <dev/acpi/apei_interp.h>
61 #include <dev/acpi/apeivar.h>
62 #include <dev/pci/pcireg.h>
63
64 #define _COMPONENT ACPI_RESOURCE_COMPONENT
65 ACPI_MODULE_NAME ("apei")
66
67 static int apei_match(device_t, cfdata_t, void *);
68 static void apei_attach(device_t, device_t, void *);
69 static int apei_detach(device_t, int);
70
71 static void apei_get_tables(struct apei_tab *);
72 static void apei_put_tables(struct apei_tab *);
73
74 static void apei_identify(struct apei_softc *, const char *,
75 const ACPI_TABLE_HEADER *);
76
77 CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
78 apei_match, apei_attach, apei_detach, NULL);
79
80 static int
81 apei_match(device_t parent, cfdata_t match, void *aux)
82 {
83 struct apei_tab tab;
84 int prio = 0;
85
86 /*
87 * If we have any of the APEI tables, match.
88 */
89 apei_get_tables(&tab);
90 if (tab.bert || tab.einj || tab.erst || tab.hest)
91 prio = 1;
92 apei_put_tables(&tab);
93
94 return prio;
95 }
96
97 static void
98 apei_attach(device_t parent, device_t self, void *aux)
99 {
100 struct apei_softc *sc = device_private(self);
101 const struct sysctlnode *sysctl_hw_acpi;
102 int error;
103
104 aprint_naive("\n");
105 aprint_normal(": ACPI Platform Error Interface\n");
106
107 pmf_device_register(self, NULL, NULL);
108
109 sc->sc_dev = self;
110 apei_get_tables(&sc->sc_tab);
111
112 /*
113 * Get the sysctl hw.acpi node. This should already be created
114 * but I don't see an easy way to get at it. If this fails,
115 * something is seriously wrong, so let's stop here.
116 */
117 error = sysctl_createv(&sc->sc_sysctllog, 0,
118 NULL, &sysctl_hw_acpi, 0,
119 CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
120 CTL_HW, CTL_CREATE, CTL_EOL);
121 if (error) {
122 aprint_error_dev(sc->sc_dev,
123 "failed to create sysctl hw.acpi: %d\n", error);
124 return;
125 }
126
127 /*
128 * Create sysctl hw.acpi.apei.
129 */
130 error = sysctl_createv(&sc->sc_sysctllog, 0,
131 &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
132 CTLTYPE_NODE, "apei",
133 SYSCTL_DESCR("ACPI Platform Error Interface"),
134 NULL, 0, NULL, 0,
135 CTL_CREATE, CTL_EOL);
136 if (error) {
137 aprint_error_dev(sc->sc_dev,
138 "failed to create sysctl hw.acpi.apei: %d\n", error);
139 return;
140 }
141
142 /*
143 * Set up BERT, EINJ, ERST, and HEST.
144 */
145 if (sc->sc_tab.bert) {
146 apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
147 apei_bert_attach(sc);
148 }
149 if (sc->sc_tab.einj) {
150 apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
151 apei_einj_attach(sc);
152 }
153 if (sc->sc_tab.erst) {
154 apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
155 apei_erst_attach(sc);
156 }
157 if (sc->sc_tab.hest) {
158 apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
159 apei_hest_attach(sc);
160 }
161 }
162
163 static int
164 apei_detach(device_t self, int flags)
165 {
166 struct apei_softc *sc = device_private(self);
167 int error;
168
169 /*
170 * Detach children. We don't currently have any but this is
171 * harmless without children and mandatory if we ever sprouted
172 * them, so let's just leave it here for good measure.
173 *
174 * After this point, we are committed to detaching; failure is
175 * forbidden.
176 */
177 error = config_detach_children(self, flags);
178 if (error)
179 return error;
180
181 /*
182 * Tear down all the sysctl nodes first, before the software
183 * state backing them goes away.
184 */
185 sysctl_teardown(&sc->sc_sysctllog);
186 sc->sc_sysctlroot = NULL;
187
188 /*
189 * Detach the software state for the APEI tables.
190 */
191 if (sc->sc_tab.hest)
192 apei_hest_detach(sc);
193 if (sc->sc_tab.erst)
194 apei_erst_detach(sc);
195 if (sc->sc_tab.einj)
196 apei_einj_detach(sc);
197 if (sc->sc_tab.bert)
198 apei_bert_detach(sc);
199
200 /*
201 * Release the APEI tables and we're done.
202 */
203 apei_put_tables(&sc->sc_tab);
204 pmf_device_deregister(self);
205 return 0;
206 }
207
208 /*
209 * apei_get_tables(tab)
210 *
211 * Get references to whichever APEI-related tables -- BERT, EINJ,
212 * ERST, HEST -- are available in the system.
213 */
214 static void
215 apei_get_tables(struct apei_tab *tab)
216 {
217 ACPI_STATUS rv;
218
219 /*
220 * Probe the BERT -- Boot Error Record Table.
221 */
222 rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
223 if (ACPI_FAILURE(rv))
224 tab->bert = NULL;
225
226 /*
227 * Probe the EINJ -- Error Injection Table.
228 */
229 rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
230 if (ACPI_FAILURE(rv))
231 tab->einj = NULL;
232
233 /*
234 * Probe the ERST -- Error Record Serialization Table.
235 */
236 rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
237 if (ACPI_FAILURE(rv))
238 tab->erst = NULL;
239
240 /*
241 * Probe the HEST -- Hardware Error Source Table.
242 */
243 rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
244 if (ACPI_FAILURE(rv))
245 tab->hest = NULL;
246 }
247
248 /*
249 * apei_put_tables(tab)
250 *
251 * Release the tables acquired by apei_get_tables.
252 */
253 static void
254 apei_put_tables(struct apei_tab *tab)
255 {
256
257 if (tab->bert != NULL) {
258 AcpiPutTable(&tab->bert->Header);
259 tab->bert = NULL;
260 }
261 if (tab->einj != NULL) {
262 AcpiPutTable(&tab->einj->Header);
263 tab->einj = NULL;
264 }
265 if (tab->erst != NULL) {
266 AcpiPutTable(&tab->erst->Header);
267 tab->erst = NULL;
268 }
269 if (tab->hest != NULL) {
270 AcpiPutTable(&tab->hest->Header);
271 tab->hest = NULL;
272 }
273 }
274
275 /*
276 * apei_identify(sc, name, header)
277 *
278 * Identify the APEI-related table header for dmesg.
279 */
280 static void
281 apei_identify(struct apei_softc *sc, const char *name,
282 const ACPI_TABLE_HEADER *h)
283 {
284
285 aprint_normal_dev(sc->sc_dev, "%s:"
286 " OemId <%6.6s,%8.8s,%08x>"
287 " AslId <%4.4s,%08x>\n",
288 name,
289 h->OemId, h->OemTableId, h->OemRevision,
290 h->AslCompilerId, h->AslCompilerRevision);
291 }
292
293 /*
294 * apei_cper_guid_dec(buf, uuid)
295 *
296 * Decode a Common Platform Error Record UUID/GUID from an ACPI
297 * table at buf into a sys/uuid.h struct uuid.
298 */
299 static void
300 apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
301 {
302
303 uuid_dec_le(buf, uuid);
304 }
305
306 /*
307 * apei_format_guid(uuid, s)
308 *
309 * Format a UUID as a string. This uses C initializer notation,
310 * not UUID notation, in order to match the text in the UEFI
311 * specification.
312 */
313 static void
314 apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
315 {
316
317 snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
318 "{0x%02x,%02x,"
319 "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
320 uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
321 uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
322 uuid->node[0], uuid->node[1], uuid->node[2],
323 uuid->node[3], uuid->node[4], uuid->node[5]);
324 }
325
326 /*
327 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
328 */
329
330 static const char *const cper_memory_error_type[] = {
331 #define F(LN, SN, V) [LN] = #SN,
332 CPER_MEMORY_ERROR_TYPES(F)
333 #undef F
334 };
335
336 /*
337 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
338 *
339 * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
340 * but are designated as being intended for Generic Error Data Entries
341 * rather than Generic Error Status Blocks.
342 */
343 static const char *const apei_gesb_severity[] = {
344 [0] = "recoverable",
345 [1] = "fatal",
346 [2] = "corrected",
347 [3] = "none",
348 };
349
350 /*
351 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
352 */
353 static const char *const apei_gede_severity[] = {
354 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
355 [ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
356 [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
357 [ACPI_HEST_GEN_ERROR_NONE] = "none",
358 };
359
360 /*
361 * N.2.5. Memory Error Section
362 *
363 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
364 */
365 static const struct uuid CPER_MEMORY_ERROR_SECTION =
366 {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
367
368 static void
369 apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
370 size_t len, const char *ctx, bool ratelimitok)
371 {
372 const struct cper_memory_error *ME = buf;
373 char bitbuf[1024];
374
375 /*
376 * If we've hit the rate limit, skip printing the error.
377 */
378 if (!ratelimitok)
379 goto out;
380
381 snprintb(bitbuf, sizeof(bitbuf),
382 CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
383 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
384 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
385 /*
386 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
387 */
388 /* XXX define this format somewhere */
389 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
390 "f\010\010" "ErrorType\0"
391 "=\001" "ERR_INTERNAL\0"
392 "=\004" "ERR_MEM\0"
393 "=\005" "ERR_TLB\0"
394 "=\006" "ERR_CACHE\0"
395 "=\007" "ERR_FUNCTION\0"
396 "=\010" "ERR_SELFTEST\0"
397 "=\011" "ERR_FLOW\0"
398 "=\020" "ERR_BUS\0"
399 "=\021" "ERR_MAP\0"
400 "=\022" "ERR_IMPROPER\0"
401 "=\023" "ERR_UNIMPL\0"
402 "=\024" "ERR_LOL\0"
403 "=\025" "ERR_RESPONSE\0"
404 "=\026" "ERR_PARITY\0"
405 "=\027" "ERR_PROTOCOL\0"
406 "=\030" "ERR_ERROR\0"
407 "=\031" "ERR_TIMEOUT\0"
408 "=\032" "ERR_POISONED\0"
409 "b\020" "AddressError\0"
410 "b\021" "ControlError\0"
411 "b\022" "DataError\0"
412 "b\023" "ResponderDetected\0"
413 "b\024" "RequesterDetected\0"
414 "b\025" "FirstError\0"
415 "b\026" "Overflow\0"
416 "\0", ME->ErrorStatus);
417 device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
418 }
419 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
420 device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
421 ctx, ME->PhysicalAddress);
422 }
423 if (ME->ValidationBits &
424 CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
425 device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
426 "\n", ctx, ME->PhysicalAddressMask);
427 }
428 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
429 device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
430 ME->Node);
431 }
432 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
433 device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
434 ME->Card);
435 }
436 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
437 device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
438 ME->Module);
439 }
440 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
441 device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
442 ME->Bank);
443 }
444 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
445 device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
446 ME->Device);
447 }
448 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
449 device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
450 ME->Row);
451 }
452 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
453 device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
454 ME->Column);
455 }
456 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
457 device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
458 ctx, ME->BitPosition);
459 }
460 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
461 device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
462 ctx, ME->RequestorId);
463 }
464 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
465 device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
466 ctx, ME->ResponderId);
467 }
468 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
469 device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
470 ctx, ME->TargetId);
471 }
472 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
473 const uint8_t t = ME->MemoryErrorType;
474 const char *n = t < __arraycount(cper_memory_error_type)
475 ? cper_memory_error_type[t] : NULL;
476
477 if (n) {
478 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
479 " (%s)\n", ctx, t, n);
480 } else {
481 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
482 ctx, t);
483 }
484 }
485
486 out: /*
487 * XXX pass this through to uvm(9) or userland for decisions
488 * like page retirement
489 */
490 return;
491 }
492
493 /*
494 * N.2.7. PCI Express Error Section
495 *
496 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section
497 */
498 static const struct uuid CPER_PCIE_ERROR_SECTION =
499 {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}};
500
501 static const char *const cper_pcie_error_port_type[] = {
502 #define F(LN, SN, V) [LN] = #SN,
503 CPER_PCIE_ERROR_PORT_TYPES(F)
504 #undef F
505 };
506
507 static void
508 apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len,
509 const char *ctx, bool ratelimitok)
510 {
511 const struct cper_pcie_error *PE = buf;
512 char bitbuf[1024];
513
514 /*
515 * If we've hit the rate limit, skip printing the error.
516 */
517 if (!ratelimitok)
518 goto out;
519
520 snprintb(bitbuf, sizeof(bitbuf),
521 CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits);
522 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
523 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) {
524 const uint32_t t = PE->PortType;
525 const char *n = t < __arraycount(cper_pcie_error_port_type)
526 ? cper_pcie_error_port_type[t] : NULL;
527
528 if (n) {
529 device_printf(sc->sc_dev, "%s: PortType=%"PRIu32
530 " (%s)\n", ctx, t, n);
531 } else {
532 device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n",
533 ctx, t);
534 }
535 }
536 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) {
537 /* XXX BCD */
538 device_printf(sc->sc_dev, "%s: Version=0x08%"PRIx32"\n",
539 ctx, PE->Version);
540 }
541 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) {
542 /* XXX move me to pcireg.h */
543 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
544 /* command */
545 "b\000" "IO_ENABLE\0"
546 "b\001" "MEM_ENABLE\0"
547 "b\002" "MASTER_ENABLE\0"
548 "b\003" "SPECIAL_ENABLE\0"
549 "b\004" "INVALIDATE_ENABLE\0"
550 "b\005" "PALETTE_ENABLE\0"
551 "b\006" "PARITY_ENABLE\0"
552 "b\007" "STEPPING_ENABLE\0"
553 "b\010" "SERR_ENABLE\0"
554 "b\011" "BACKTOBACK_ENABLE\0"
555 "b\012" "INTERRUPT_DISABLE\0"
556 /* status */
557 "b\023" "INT_STATUS\0"
558 "b\024" "CAPLIST_SUPPORT\0"
559 "b\025" "66MHZ_SUPPORT\0"
560 "b\026" "UDF_SUPPORT\0"
561 "b\027" "BACKTOBACK_SUPPORT\0"
562 "b\030" "PARITY_ERROR\0"
563 "f\031\002" "DEVSEL\0"
564 "=\000" "FAST\0"
565 "=\001" "MEDIUM\0"
566 "=\002" "SLOW\0"
567 "b\033" "TARGET_TARGET_ABORT\0"
568 "b\034" "MASTER_TARGET_ABORT\0"
569 "b\035" "MASTER_ABORT\0"
570 "b\036" "SPECIAL_ERROR\0"
571 "b\037" "PARITY_DETECT\0"
572 "\0", PE->CommandStatus);
573 device_printf(sc->sc_dev, "%s: CommandStatus=%s\n",
574 ctx, bitbuf);
575 }
576 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) {
577 device_printf(sc->sc_dev, "%s: DeviceID:"
578 " VendorID=0x%04"PRIx16
579 " DeviceID=0x%04"PRIx16
580 " ClassCode=0x%06"PRIx32
581 " Function=%"PRIu8
582 " Device=%"PRIu8
583 " Segment=%"PRIu16
584 " Bus=%"PRIu8
585 " SecondaryBus=%"PRIu8
586 " Slot=0x%04"PRIx16
587 " Reserved0=0x%02"PRIx8
588 "\n",
589 ctx,
590 le16dec(PE->DeviceID.VendorID),
591 le16dec(PE->DeviceID.DeviceID),
592 (PE->DeviceID.ClassCode[0] | /* le24dec */
593 ((uint32_t)PE->DeviceID.ClassCode[1] << 8) |
594 ((uint32_t)PE->DeviceID.ClassCode[2] << 16)),
595 PE->DeviceID.Function, PE->DeviceID.Device,
596 le16dec(PE->DeviceID.Segment), PE->DeviceID.Bus,
597 PE->DeviceID.SecondaryBus, le16dec(PE->DeviceID.Slot),
598 PE->DeviceID.Reserved0);
599 }
600 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) {
601 device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n",
602 ctx, PE->DeviceSerial);
603 }
604 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) {
605 /* XXX snprintb */
606 device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32
607 "\n", ctx, PE->BridgeControlStatus);
608 }
609 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) {
610 uint32_t dcsr, dsr;
611 char hex[2*sizeof(PE->CapabilityStructure) + 1];
612 unsigned i;
613
614 for (i = 0; i < sizeof(PE->CapabilityStructure); i++) {
615 snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx",
616 PE->CapabilityStructure[i]);
617 }
618 device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n",
619 ctx, hex);
620
621 dcsr = le32dec(&PE->CapabilityStructure[PCIE_DCSR]);
622 dsr = __SHIFTOUT(dcsr, __BITS(31,16));
623 if (dsr != 0) {
624 /*
625 * XXX move me to pcireg.h; note: high
626 * half of DCSR
627 */
628 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
629 "b\000" "CORRECTABLE_ERROR\0"
630 "b\001" "NONFATAL_UNCORRECTABLE_ERROR\0"
631 "b\002" "FATAL_ERROR\0"
632 "b\003" "UNSUPPORTED_REQUEST\0"
633 "b\004" "AUX_POWER\0"
634 "b\005" "TRANSACTIONS_PENDING\0"
635 "\0", dsr);
636 device_printf(sc->sc_dev, "%s: PCIe Device Status:"
637 " %s\n",
638 ctx, bitbuf);
639 }
640 }
641 if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) {
642 uint32_t uc_status, uc_sev;
643 uint32_t cor_status;
644 uint32_t control;
645 char hex[2*sizeof(PE->AERInfo) + 1];
646 unsigned i;
647
648 for (i = 0; i < sizeof(PE->AERInfo); i++) {
649 snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx",
650 PE->AERInfo[i]);
651 }
652 device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex);
653
654 /* XXX move me to pcireg.h */
655 #define PCI_AER_UC_STATUS_FMT "\177\020" \
656 "b\000" "UNDEFINED\0" \
657 "b\004" "DL_PROTOCOL_ERROR\0" \
658 "b\005" "SURPRISE_DOWN_ERROR\0" \
659 "b\014" "POISONED_TLP\0" \
660 "b\015" "FC_PROTOCOL_ERROR\0" \
661 "b\016" "COMPLETION_TIMEOUT\0" \
662 "b\017" "COMPLETION_ABORT\0" \
663 "b\020" "UNEXPECTED_COMPLETION\0" \
664 "b\021" "RECEIVER_OVERFLOW\0" \
665 "b\022" "MALFORMED_TLP\0" \
666 "b\023" "ECRC_ERROR\0" \
667 "b\024" "UNSUPPORTED_REQUEST_ERROR\0" \
668 "b\025" "ACS_VIOLATION\0" \
669 "b\026" "INTERNAL_ERROR\0" \
670 "b\027" "MC_BLOCKED_TLP\0" \
671 "b\030" "ATOMIC_OP_EGRESS_BLOCKED\0" \
672 "b\031" "TLP_PREFIX_BLOCKED_ERROR\0" \
673 "b\032" "POISONTLP_EGRESS_BLOCKED\0" \
674 "\0"
675
676 uc_status = le32dec(&PE->AERInfo[PCI_AER_UC_STATUS]);
677 uc_sev = le32dec(&PE->AERInfo[PCI_AER_UC_SEVERITY]);
678 cor_status = le32dec(&PE->AERInfo[PCI_AER_COR_STATUS]);
679 control = le32dec(&PE->AERInfo[PCI_AER_CAP_CONTROL]);
680
681 if (uc_status & uc_sev) {
682 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
683 uc_status & uc_sev);
684 device_printf(sc->sc_dev, "%s:"
685 " AER hardware fatal uncorrectable errors: %s\n",
686 ctx, bitbuf);
687 }
688 if (uc_status & ~uc_sev) {
689 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
690 uc_status & uc_sev);
691 device_printf(sc->sc_dev, "%s:"
692 " AER hardware fatal uncorrectable errors: %s\n",
693 ctx, bitbuf);
694 }
695 if (uc_status) {
696 unsigned first = __SHIFTOUT(control,
697 PCI_AER_FIRST_ERROR_PTR);
698 snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
699 (uint32_t)1 << first);
700 device_printf(sc->sc_dev, "%s:"
701 " AER hardware first uncorrectable error: %s\n",
702 ctx, bitbuf);
703 }
704 if (cor_status) {
705 /* XXX move me to pcireg.h */
706 snprintb(bitbuf, sizeof(bitbuf), "\177\020"
707 "b\000" "RECEIVER_ERROR\0"
708 "b\006" "BAD_TLP\0"
709 "b\007" "BAD_DLLP\0"
710 "b\010" "REPLAY_NUM_ROLLOVER\0"
711 "b\014" "REPLAY_TIMER_TIMEOUT\0"
712 "b\015" "ADVISORY_NF_ERROR\0"
713 "b\016" "INTERNAL_ERROR\0"
714 "b\017" "HEADER_LOG_OVERFLOW\0"
715 "\0", cor_status);
716 device_printf(sc->sc_dev, "%s:"
717 " AER hardware corrected error: %s\n",
718 ctx, bitbuf);
719 }
720 }
721
722 out: /*
723 * XXX pass this on to the PCI subsystem to handle
724 */
725 return;
726 }
727
728 /*
729 * apei_cper_reports
730 *
731 * Table of known Common Platform Error Record types, symbolic
732 * names, minimum data lengths, and functions to report them.
733 *
734 * The section types and corresponding section layouts are listed
735 * at:
736 *
737 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
738 */
739 static const struct apei_cper_report {
740 const char *name;
741 const struct uuid *type;
742 size_t minlength;
743 void (*func)(struct apei_softc *, const void *, size_t, const char *,
744 bool);
745 } apei_cper_reports[] = {
746 { "memory", &CPER_MEMORY_ERROR_SECTION,
747 sizeof(struct cper_memory_error),
748 apei_cper_memory_error_report },
749 { "PCIe", &CPER_PCIE_ERROR_SECTION,
750 sizeof(struct cper_pcie_error),
751 apei_cper_pcie_error_report },
752 };
753
754 /*
755 * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
756 *
757 * Report the header of the ith Generic Error Data Entry in the
758 * given context, if ratelimitok is true.
759 *
760 * Return the actual length of the header in headerlen, or 0 if
761 * not known because the revision isn't recognized.
762 *
763 * Return the report type in report, or NULL if not known because
764 * the section type isn't recognized.
765 */
766 static void
767 apei_gede_report_header(struct apei_softc *sc,
768 const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
769 size_t *headerlenp, const struct apei_cper_report **reportp)
770 {
771 const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
772 struct uuid sectype;
773 char guidstr[69];
774 char buf[128];
775 unsigned i;
776
777 /*
778 * Print the section type as a C initializer. It would be
779 * prettier to use standard hyphenated UUID notation, but that
780 * notation is slightly ambiguous here (two octets could be
781 * written either way, depending on Microsoft convention --
782 * which influenced ACPI and UEFI -- or internet convention),
783 * and the UEFI spec writes the C initializer notation, so this
784 * makes it easier to search for.
785 *
786 * Also print out a symbolic name, if we know it.
787 */
788 apei_cper_guid_dec(gede->SectionType, §ype);
789 apei_format_guid(§ype, guidstr);
790 for (i = 0; i < __arraycount(apei_cper_reports); i++) {
791 const struct apei_cper_report *const report =
792 &apei_cper_reports[i];
793
794 if (memcmp(§ype, report->type, sizeof(sectype)) != 0)
795 continue;
796 if (ratelimitok) {
797 device_printf(sc->sc_dev, "%s:"
798 " SectionType=%s (%s error)\n",
799 ctx, guidstr, report->name);
800 }
801 *reportp = report;
802 break;
803 }
804 if (i == __arraycount(apei_cper_reports)) {
805 if (ratelimitok) {
806 device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
807 guidstr);
808 }
809 *reportp = NULL;
810 }
811
812 /*
813 * Print the numeric severity and, if we have it, a symbolic
814 * name for it.
815 */
816 if (ratelimitok) {
817 device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
818 ctx,
819 gede->ErrorSeverity,
820 (gede->ErrorSeverity < __arraycount(apei_gede_severity)
821 ? apei_gede_severity[gede->ErrorSeverity]
822 : "unknown"));
823 }
824
825 /*
826 * The Revision may not often be useful, but this is only ever
827 * shown at the time of a hardware error report, not something
828 * you can glean at your convenience with acpidump. So print
829 * it anyway.
830 */
831 if (ratelimitok) {
832 device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
833 gede->Revision);
834 }
835
836 /*
837 * Don't touch anything past the Revision until we've
838 * determined we understand it. Return the header length to
839 * the caller, or return zero -- and stop here -- if we don't
840 * know what the actual header length is.
841 */
842 if (gede->Revision < 0x0300) {
843 *headerlenp = sizeof(*gede);
844 } else if (gede->Revision < 0x0400) {
845 *headerlenp = sizeof(*gede_v3);
846 } else {
847 *headerlenp = 0;
848 return;
849 }
850
851 /*
852 * Print the validation bits at debug level. Only really
853 * helpful if there are bits we _don't_ know about.
854 */
855 if (ratelimitok) {
856 /* XXX define this format somewhere */
857 snprintb(buf, sizeof(buf), "\177\020"
858 "b\000" "FRU_ID\0"
859 "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */
860 "b\002" "TIMESTAMP\0"
861 "\0", gede->ValidationBits);
862 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
863 buf);
864 }
865
866 /*
867 * Print the CPER section flags.
868 */
869 if (ratelimitok) {
870 snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
871 gede->Flags);
872 device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
873 }
874
875 /*
876 * The ErrorDataLength is unlikely to be useful for the log, so
877 * print it at debug level only.
878 */
879 if (ratelimitok) {
880 aprint_debug_dev(sc->sc_dev, "%s:"
881 " ErrorDataLength=0x%"PRIu32"\n",
882 ctx, gede->ErrorDataLength);
883 }
884
885 /*
886 * Print the FRU Id and text, if available.
887 */
888 if (ratelimitok &&
889 (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
890 struct uuid fruid;
891
892 apei_cper_guid_dec(gede->FruId, &fruid);
893 apei_format_guid(&fruid, guidstr);
894 device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
895 }
896 if (ratelimitok &&
897 (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
898 device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
899 ctx, gede->FruText);
900 }
901
902 /*
903 * Print the timestamp, if available by the revision number and
904 * the validation bits.
905 */
906 if (ratelimitok &&
907 gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
908 gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
909 const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
910 const uint8_t s = t[0];
911 const uint8_t m = t[1];
912 const uint8_t h = t[2];
913 const uint8_t f = t[3];
914 const uint8_t D = t[4];
915 const uint8_t M = t[5];
916 const uint8_t Y = t[6];
917 const uint8_t C = t[7];
918
919 device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
920 " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
921 ctx, gede_v3->TimeStamp,
922 C,Y, M, D, h,m,s,
923 f & __BIT(0) ? " (event time)" : " (collect time)");
924 }
925 }
926
927 /*
928 * apei_gesb_ratelimit
929 *
930 * State to limit the rate of console log messages about hardware
931 * errors. For each of the four severity levels in a Generic
932 * Error Status Block,
933 *
934 * 0 - Recoverable (uncorrectable),
935 * 1 - Fatal (uncorrectable),
936 * 2 - Corrected, and
937 * 3 - None (including ill-formed errors),
938 *
939 * we record the last time it happened, protected by a CPU simple
940 * lock that we only try-acquire so it is safe to use in any
941 * context, including non-maskable interrupt context.
942 */
943
944 static struct {
945 __cpu_simple_lock_t lock;
946 struct timeval lasttime;
947 volatile uint32_t suppressed;
948 } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
949 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
950 [ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
951 [ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
952 [ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
953 };
954
955 static void
956 atomic_incsat_32(volatile uint32_t *p)
957 {
958 uint32_t o, n;
959
960 do {
961 o = atomic_load_relaxed(p);
962 if (__predict_false(o == UINT_MAX))
963 return;
964 n = o + 1;
965 } while (__predict_false(atomic_cas_32(p, o, n) != o));
966 }
967
968 /*
969 * apei_gesb_ratecheck(sc, severity, suppressed)
970 *
971 * Check for a rate limit on errors of the specified severity.
972 *
973 * => Return true if the error should be printed, and format into
974 * the buffer suppressed a message saying how many errors were
975 * previously suppressed.
976 *
977 * => Return false if the error should be suppressed because the
978 * last one printed was too recent.
979 */
980 static bool
981 apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
982 char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
983 {
984 /* one of each type per minute (XXX worth making configurable?) */
985 const struct timeval mininterval = {60, 0};
986 unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
987 bool ok = false;
988
989 /*
990 * If the lock is contended, the rate limit is probably
991 * exceeded, so it's not OK to print.
992 *
993 * Otherwise, with the lock held, ask ratecheck(9) whether it's
994 * OK to print.
995 */
996 if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
997 goto out;
998 ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
999 __cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
1000
1001 out: /*
1002 * If it's OK to print, report the number of errors that were
1003 * suppressed. If it's not OK to print, count a suppressed
1004 * error.
1005 */
1006 if (ok) {
1007 const uint32_t n =
1008 atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
1009
1010 if (n == 0) {
1011 suppressed[0] = '\0';
1012 } else {
1013 snprintf(suppressed,
1014 sizeof(" (4294967295 or more errors suppressed)"),
1015 " (%u%s error%s suppressed)",
1016 n,
1017 n == UINT32_MAX ? " or more" : "",
1018 n == 1 ? "" : "s");
1019 }
1020 } else {
1021 atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
1022 suppressed[0] = '\0';
1023 }
1024 return ok;
1025 }
1026
1027 /*
1028 * apei_gesb_report(sc, gesb, size, ctx)
1029 *
1030 * Check a Generic Error Status Block, of at most the specified
1031 * size in bytes, and report any errors in it. Return the 32-bit
1032 * Block Status in case the caller needs it to acknowledge the
1033 * report to firmware.
1034 */
1035 uint32_t
1036 apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
1037 size_t size, const char *ctx, bool *fatalp)
1038 {
1039 uint32_t status, unknownstatus, severity, nentries, i;
1040 uint32_t datalen, rawdatalen;
1041 const ACPI_HEST_GENERIC_DATA *gede0, *gede;
1042 const unsigned char *rawdata;
1043 bool ratelimitok = false;
1044 char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
1045 bool fatal = false;
1046
1047 /*
1048 * Verify the buffer is large enough for a Generic Error Status
1049 * Block before we try to touch anything in it.
1050 */
1051 if (size < sizeof(*gesb)) {
1052 ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
1053 suppressed);
1054 if (ratelimitok) {
1055 device_printf(sc->sc_dev,
1056 "%s: truncated GESB, %zu < %zu%s\n",
1057 ctx, size, sizeof(*gesb), suppressed);
1058 }
1059 status = 0;
1060 goto out;
1061 }
1062 size -= sizeof(*gesb);
1063
1064 /*
1065 * Load the status. Access ordering rules are unclear in the
1066 * ACPI specification; I'm guessing that load-acquire of the
1067 * block status is a good idea before any other access to the
1068 * GESB.
1069 */
1070 status = atomic_load_acquire(&gesb->BlockStatus);
1071
1072 /*
1073 * If there are no status bits set, the rest of the GESB is
1074 * garbage, so stop here.
1075 */
1076 if (status == 0) {
1077 /* XXX dtrace */
1078 /* XXX DPRINTF */
1079 goto out;
1080 }
1081
1082 /*
1083 * Read out the severity and get the number of entries in this
1084 * status block.
1085 */
1086 severity = gesb->ErrorSeverity;
1087 nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
1088
1089 /*
1090 * Print a message to the console and dmesg about the severity
1091 * of the error.
1092 */
1093 ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
1094 if (ratelimitok) {
1095 char statusbuf[128];
1096
1097 /* XXX define this format somewhere */
1098 snprintb(statusbuf, sizeof(statusbuf), "\177\020"
1099 "b\000" "UE\0"
1100 "b\001" "CE\0"
1101 "b\002" "MULTI_UE\0"
1102 "b\003" "MULTI_CE\0"
1103 "f\004\010" "GEDE_COUNT\0"
1104 "\0", status);
1105
1106 if (severity < __arraycount(apei_gesb_severity)) {
1107 device_printf(sc->sc_dev, "%s"
1108 " reported hardware error%s:"
1109 " severity=%s nentries=%u status=%s\n",
1110 ctx, suppressed,
1111 apei_gesb_severity[severity], nentries, statusbuf);
1112 } else {
1113 device_printf(sc->sc_dev, "%s reported error%s:"
1114 " severity=%"PRIu32" nentries=%u status=%s\n",
1115 ctx, suppressed,
1116 severity, nentries, statusbuf);
1117 }
1118 }
1119
1120 /*
1121 * Make a determination about whether the error is fatal.
1122 *
1123 * XXX Currently we don't have any mechanism to recover from
1124 * uncorrectable but recoverable errors, so we treat those --
1125 * and anything else we don't recognize -- as fatal.
1126 */
1127 switch (severity) {
1128 case ACPI_HEST_GEN_ERROR_CORRECTED:
1129 case ACPI_HEST_GEN_ERROR_NONE:
1130 fatal = false;
1131 break;
1132 case ACPI_HEST_GEN_ERROR_FATAL:
1133 case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
1134 default:
1135 fatal = true;
1136 break;
1137 }
1138
1139 /*
1140 * Clear the bits we know about to warn if there's anything
1141 * left we don't understand.
1142 */
1143 unknownstatus = status;
1144 unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
1145 unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
1146 unknownstatus &= ~ACPI_HEST_CORRECTABLE;
1147 unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
1148 unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
1149 if (ratelimitok && unknownstatus != 0) {
1150 /* XXX dtrace */
1151 device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
1152 " 0x%"PRIx32"\n", ctx, unknownstatus);
1153 }
1154
1155 /*
1156 * Advance past the Generic Error Status Block (GESB) header to
1157 * the Generic Error Data Entries (GEDEs).
1158 */
1159 gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
1160
1161 /*
1162 * Verify that the data length (GEDEs) fits within the size.
1163 * If not, truncate the GEDEs.
1164 */
1165 datalen = gesb->DataLength;
1166 if (size < datalen) {
1167 if (ratelimitok) {
1168 device_printf(sc->sc_dev, "%s:"
1169 " GESB DataLength exceeds bounds:"
1170 " %zu < %"PRIu32"\n",
1171 ctx, size, datalen);
1172 }
1173 datalen = size;
1174 }
1175 size -= datalen;
1176
1177 /*
1178 * Report each of the Generic Error Data Entries.
1179 */
1180 for (i = 0; i < nentries; i++) {
1181 size_t headerlen;
1182 const struct apei_cper_report *report;
1183 char subctx[128];
1184
1185 /*
1186 * Format a subcontext to show this numbered entry of
1187 * the GESB.
1188 */
1189 snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
1190
1191 /*
1192 * If the remaining GESB data length isn't enough for a
1193 * GEDE header, stop here.
1194 */
1195 if (datalen < sizeof(*gede)) {
1196 if (ratelimitok) {
1197 device_printf(sc->sc_dev, "%s:"
1198 " truncated GEDE: %"PRIu32" < %zu bytes\n",
1199 subctx, datalen, sizeof(*gede));
1200 }
1201 break;
1202 }
1203
1204 /*
1205 * Print the GEDE header and get the full length (may
1206 * vary from revision to revision of the GEDE) and the
1207 * CPER report function if possible.
1208 */
1209 apei_gede_report_header(sc, gede, subctx, ratelimitok,
1210 &headerlen, &report);
1211
1212 /*
1213 * If we don't know the header length because of an
1214 * unfamiliar revision, stop here.
1215 */
1216 if (headerlen == 0) {
1217 if (ratelimitok) {
1218 device_printf(sc->sc_dev, "%s:"
1219 " unknown revision: 0x%"PRIx16"\n",
1220 subctx, gede->Revision);
1221 }
1222 break;
1223 }
1224
1225 /*
1226 * Stop here if what we mapped is too small for the
1227 * error data length.
1228 */
1229 datalen -= headerlen;
1230 if (datalen < gede->ErrorDataLength) {
1231 if (ratelimitok) {
1232 device_printf(sc->sc_dev, "%s:"
1233 " truncated GEDE payload:"
1234 " %"PRIu32" < %"PRIu32" bytes\n",
1235 subctx, datalen, gede->ErrorDataLength);
1236 }
1237 break;
1238 }
1239
1240 /*
1241 * Report the Common Platform Error Record appendix to
1242 * this Generic Error Data Entry.
1243 */
1244 if (report == NULL) {
1245 if (ratelimitok) {
1246 device_printf(sc->sc_dev, "%s:"
1247 " [unknown type]\n", ctx);
1248 }
1249 } else {
1250 /* XXX pass ratelimit through */
1251 (*report->func)(sc, (const char *)gede + headerlen,
1252 gede->ErrorDataLength, subctx, ratelimitok);
1253 }
1254
1255 /*
1256 * Advance past the GEDE header and CPER data to the
1257 * next GEDE.
1258 */
1259 gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
1260 + headerlen + gede->ErrorDataLength);
1261 }
1262
1263 /*
1264 * Advance past the Generic Error Data Entries (GEDEs) to the
1265 * raw error data.
1266 *
1267 * XXX Provide Max Raw Data Length as a parameter, as found in
1268 * various HEST entry types.
1269 */
1270 rawdata = (const unsigned char *)gede0 + datalen;
1271
1272 /*
1273 * Verify that the raw data length fits within the size. If
1274 * not, truncate the raw data.
1275 */
1276 rawdatalen = gesb->RawDataLength;
1277 if (size < rawdatalen) {
1278 if (ratelimitok) {
1279 device_printf(sc->sc_dev, "%s:"
1280 " GESB RawDataLength exceeds bounds:"
1281 " %zu < %"PRIu32"\n",
1282 ctx, size, rawdatalen);
1283 }
1284 rawdatalen = size;
1285 }
1286 size -= rawdatalen;
1287
1288 /*
1289 * Hexdump the raw data, if any.
1290 */
1291 if (ratelimitok && rawdatalen > 0) {
1292 char devctx[128];
1293
1294 snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
1295 device_xname(sc->sc_dev), ctx);
1296 hexdump(printf, devctx, rawdata, rawdatalen);
1297 }
1298
1299 /*
1300 * If there's anything left after the raw data, warn.
1301 */
1302 if (ratelimitok && size > 0) {
1303 device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
1304 ctx, size);
1305 }
1306
1307 /*
1308 * Return the status so the caller can ack it, and tell the
1309 * caller whether this error is fatal.
1310 */
1311 out: *fatalp = fatal;
1312 return status;
1313 }
1314
1315 MODULE(MODULE_CLASS_DRIVER, apei, NULL);
1316
1317 #ifdef _MODULE
1318 #include "ioconf.c"
1319 #endif
1320
1321 static int
1322 apei_modcmd(modcmd_t cmd, void *opaque)
1323 {
1324 int error = 0;
1325
1326 switch (cmd) {
1327 case MODULE_CMD_INIT:
1328 #ifdef _MODULE
1329 error = config_init_component(cfdriver_ioconf_apei,
1330 cfattach_ioconf_apei, cfdata_ioconf_apei);
1331 #endif
1332 return error;
1333 case MODULE_CMD_FINI:
1334 #ifdef _MODULE
1335 error = config_fini_component(cfdriver_ioconf_apei,
1336 cfattach_ioconf_apei, cfdata_ioconf_apei);
1337 #endif
1338 return error;
1339 default:
1340 return ENOTTY;
1341 }
1342 }
1343