Home | History | Annotate | Line # | Download | only in acpi
      1 /*	$NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2024 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * APEI: ACPI Platform Error Interface
     31  *
     32  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
     33  *
     34  * XXX dtrace probes
     35  *
     36  * XXX call _OSC appropriately to announce to the platform that we, the
     37  * OSPM, support APEI
     38  */
     39 
     40 #include <sys/cdefs.h>
     41 __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.9 2024/10/27 21:28:54 riastradh Exp $");
     42 
     43 #include <sys/param.h>
     44 #include <sys/types.h>
     45 
     46 #include <sys/atomic.h>
     47 #include <sys/endian.h>
     48 #include <sys/device.h>
     49 #include <sys/module.h>
     50 #include <sys/sysctl.h>
     51 #include <sys/uuid.h>
     52 
     53 #include <dev/acpi/acpireg.h>
     54 #include <dev/acpi/acpivar.h>
     55 #include <dev/acpi/apei_bertvar.h>
     56 #include <dev/acpi/apei_cper.h>
     57 #include <dev/acpi/apei_einjvar.h>
     58 #include <dev/acpi/apei_erstvar.h>
     59 #include <dev/acpi/apei_hestvar.h>
     60 #include <dev/acpi/apei_interp.h>
     61 #include <dev/acpi/apeivar.h>
     62 #include <dev/pci/pcireg.h>
     63 
     64 #define	_COMPONENT	ACPI_RESOURCE_COMPONENT
     65 ACPI_MODULE_NAME	("apei")
     66 
     67 static int apei_match(device_t, cfdata_t, void *);
     68 static void apei_attach(device_t, device_t, void *);
     69 static int apei_detach(device_t, int);
     70 
     71 static void apei_get_tables(struct apei_tab *);
     72 static void apei_put_tables(struct apei_tab *);
     73 
     74 static void apei_identify(struct apei_softc *, const char *,
     75     const ACPI_TABLE_HEADER *);
     76 
     77 CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
     78     apei_match, apei_attach, apei_detach, NULL);
     79 
     80 static int
     81 apei_match(device_t parent, cfdata_t match, void *aux)
     82 {
     83 	struct apei_tab tab;
     84 	int prio = 0;
     85 
     86 	/*
     87 	 * If we have any of the APEI tables, match.
     88 	 */
     89 	apei_get_tables(&tab);
     90 	if (tab.bert || tab.einj || tab.erst || tab.hest)
     91 		prio = 1;
     92 	apei_put_tables(&tab);
     93 
     94 	return prio;
     95 }
     96 
     97 static void
     98 apei_attach(device_t parent, device_t self, void *aux)
     99 {
    100 	struct apei_softc *sc = device_private(self);
    101 	const struct sysctlnode *sysctl_hw_acpi;
    102 	int error;
    103 
    104 	aprint_naive("\n");
    105 	aprint_normal(": ACPI Platform Error Interface\n");
    106 
    107 	pmf_device_register(self, NULL, NULL);
    108 
    109 	sc->sc_dev = self;
    110 	apei_get_tables(&sc->sc_tab);
    111 
    112 	/*
    113 	 * Get the sysctl hw.acpi node.  This should already be created
    114 	 * but I don't see an easy way to get at it.  If this fails,
    115 	 * something is seriously wrong, so let's stop here.
    116 	 */
    117 	error = sysctl_createv(&sc->sc_sysctllog, 0,
    118 	    NULL, &sysctl_hw_acpi, 0,
    119 	    CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
    120 	    CTL_HW, CTL_CREATE, CTL_EOL);
    121 	if (error) {
    122 		aprint_error_dev(sc->sc_dev,
    123 		    "failed to create sysctl hw.acpi: %d\n", error);
    124 		return;
    125 	}
    126 
    127 	/*
    128 	 * Create sysctl hw.acpi.apei.
    129 	 */
    130 	error = sysctl_createv(&sc->sc_sysctllog, 0,
    131 	    &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
    132 	    CTLTYPE_NODE, "apei",
    133 	    SYSCTL_DESCR("ACPI Platform Error Interface"),
    134 	    NULL, 0, NULL, 0,
    135 	    CTL_CREATE, CTL_EOL);
    136 	if (error) {
    137 		aprint_error_dev(sc->sc_dev,
    138 		    "failed to create sysctl hw.acpi.apei: %d\n", error);
    139 		return;
    140 	}
    141 
    142 	/*
    143 	 * Set up BERT, EINJ, ERST, and HEST.
    144 	 */
    145 	if (sc->sc_tab.bert) {
    146 		apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
    147 		apei_bert_attach(sc);
    148 	}
    149 	if (sc->sc_tab.einj) {
    150 		apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
    151 		apei_einj_attach(sc);
    152 	}
    153 	if (sc->sc_tab.erst) {
    154 		apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
    155 		apei_erst_attach(sc);
    156 	}
    157 	if (sc->sc_tab.hest) {
    158 		apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
    159 		apei_hest_attach(sc);
    160 	}
    161 }
    162 
    163 static int
    164 apei_detach(device_t self, int flags)
    165 {
    166 	struct apei_softc *sc = device_private(self);
    167 	int error;
    168 
    169 	/*
    170 	 * Detach children.  We don't currently have any but this is
    171 	 * harmless without children and mandatory if we ever sprouted
    172 	 * them, so let's just leave it here for good measure.
    173 	 *
    174 	 * After this point, we are committed to detaching; failure is
    175 	 * forbidden.
    176 	 */
    177 	error = config_detach_children(self, flags);
    178 	if (error)
    179 		return error;
    180 
    181 	/*
    182 	 * Tear down all the sysctl nodes first, before the software
    183 	 * state backing them goes away.
    184 	 */
    185 	sysctl_teardown(&sc->sc_sysctllog);
    186 	sc->sc_sysctlroot = NULL;
    187 
    188 	/*
    189 	 * Detach the software state for the APEI tables.
    190 	 */
    191 	if (sc->sc_tab.hest)
    192 		apei_hest_detach(sc);
    193 	if (sc->sc_tab.erst)
    194 		apei_erst_detach(sc);
    195 	if (sc->sc_tab.einj)
    196 		apei_einj_detach(sc);
    197 	if (sc->sc_tab.bert)
    198 		apei_bert_detach(sc);
    199 
    200 	/*
    201 	 * Release the APEI tables and we're done.
    202 	 */
    203 	apei_put_tables(&sc->sc_tab);
    204 	pmf_device_deregister(self);
    205 	return 0;
    206 }
    207 
    208 /*
    209  * apei_get_tables(tab)
    210  *
    211  *	Get references to whichever APEI-related tables -- BERT, EINJ,
    212  *	ERST, HEST -- are available in the system.
    213  */
    214 static void
    215 apei_get_tables(struct apei_tab *tab)
    216 {
    217 	ACPI_STATUS rv;
    218 
    219 	/*
    220 	 * Probe the BERT -- Boot Error Record Table.
    221 	 */
    222 	rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
    223 	if (ACPI_FAILURE(rv))
    224 		tab->bert = NULL;
    225 
    226 	/*
    227 	 * Probe the EINJ -- Error Injection Table.
    228 	 */
    229 	rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
    230 	if (ACPI_FAILURE(rv))
    231 		tab->einj = NULL;
    232 
    233 	/*
    234 	 * Probe the ERST -- Error Record Serialization Table.
    235 	 */
    236 	rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
    237 	if (ACPI_FAILURE(rv))
    238 		tab->erst = NULL;
    239 
    240 	/*
    241 	 * Probe the HEST -- Hardware Error Source Table.
    242 	 */
    243 	rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
    244 	if (ACPI_FAILURE(rv))
    245 		tab->hest = NULL;
    246 }
    247 
    248 /*
    249  * apei_put_tables(tab)
    250  *
    251  *	Release the tables acquired by apei_get_tables.
    252  */
    253 static void
    254 apei_put_tables(struct apei_tab *tab)
    255 {
    256 
    257 	if (tab->bert != NULL) {
    258 		AcpiPutTable(&tab->bert->Header);
    259 		tab->bert = NULL;
    260 	}
    261 	if (tab->einj != NULL) {
    262 		AcpiPutTable(&tab->einj->Header);
    263 		tab->einj = NULL;
    264 	}
    265 	if (tab->erst != NULL) {
    266 		AcpiPutTable(&tab->erst->Header);
    267 		tab->erst = NULL;
    268 	}
    269 	if (tab->hest != NULL) {
    270 		AcpiPutTable(&tab->hest->Header);
    271 		tab->hest = NULL;
    272 	}
    273 }
    274 
    275 /*
    276  * apei_identify(sc, name, header)
    277  *
    278  *	Identify the APEI-related table header for dmesg.
    279  */
    280 static void
    281 apei_identify(struct apei_softc *sc, const char *name,
    282     const ACPI_TABLE_HEADER *h)
    283 {
    284 
    285 	aprint_normal_dev(sc->sc_dev, "%s:"
    286 	    " OemId <%6.6s,%8.8s,%08x>"
    287 	    " AslId <%4.4s,%08x>\n",
    288 	    name,
    289 	    h->OemId, h->OemTableId, h->OemRevision,
    290 	    h->AslCompilerId, h->AslCompilerRevision);
    291 }
    292 
    293 /*
    294  * apei_cper_guid_dec(buf, uuid)
    295  *
    296  *	Decode a Common Platform Error Record UUID/GUID from an ACPI
    297  *	table at buf into a sys/uuid.h struct uuid.
    298  */
    299 static void
    300 apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
    301 {
    302 
    303 	uuid_dec_le(buf, uuid);
    304 }
    305 
    306 /*
    307  * apei_format_guid(uuid, s)
    308  *
    309  *	Format a UUID as a string.  This uses C initializer notation,
    310  *	not UUID notation, in order to match the text in the UEFI
    311  *	specification.
    312  */
    313 static void
    314 apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
    315 {
    316 
    317 	snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
    318 	    "{0x%02x,%02x,"
    319 	    "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
    320 	    uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
    321 	    uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
    322 	    uuid->node[0], uuid->node[1], uuid->node[2],
    323 	    uuid->node[3], uuid->node[4], uuid->node[5]);
    324 }
    325 
    326 /*
    327  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
    328  */
    329 
    330 static const char *const cper_memory_error_type[] = {
    331 #define	F(LN, SN, V)	[LN] = #SN,
    332 	CPER_MEMORY_ERROR_TYPES(F)
    333 #undef	F
    334 };
    335 
    336 /*
    337  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
    338  *
    339  * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
    340  * but are designated as being intended for Generic Error Data Entries
    341  * rather than Generic Error Status Blocks.
    342  */
    343 static const char *const apei_gesb_severity[] = {
    344 	[0] = "recoverable",
    345 	[1] = "fatal",
    346 	[2] = "corrected",
    347 	[3] = "none",
    348 };
    349 
    350 /*
    351  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
    352  */
    353 static const char *const apei_gede_severity[] = {
    354 	[ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
    355 	[ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
    356 	[ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
    357 	[ACPI_HEST_GEN_ERROR_NONE] = "none",
    358 };
    359 
    360 /*
    361  * N.2.5. Memory Error Section
    362  *
    363  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
    364  */
    365 static const struct uuid CPER_MEMORY_ERROR_SECTION =
    366     {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
    367 
    368 static void
    369 apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
    370     size_t len, const char *ctx, bool ratelimitok)
    371 {
    372 	const struct cper_memory_error *ME = buf;
    373 	char bitbuf[1024];
    374 
    375 	/*
    376 	 * If we've hit the rate limit, skip printing the error.
    377 	 */
    378 	if (!ratelimitok)
    379 		goto out;
    380 
    381 	snprintb(bitbuf, sizeof(bitbuf),
    382 	    CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
    383 	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
    384 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
    385 		/*
    386 		 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
    387 		 */
    388 		/* XXX define this format somewhere */
    389 		snprintb(bitbuf, sizeof(bitbuf), "\177\020"
    390 		    "f\010\010"	"ErrorType\0"
    391 			"=\001"		"ERR_INTERNAL\0"
    392 			"=\004"		"ERR_MEM\0"
    393 			"=\005"		"ERR_TLB\0"
    394 			"=\006"		"ERR_CACHE\0"
    395 			"=\007"		"ERR_FUNCTION\0"
    396 			"=\010"		"ERR_SELFTEST\0"
    397 			"=\011"		"ERR_FLOW\0"
    398 			"=\020"		"ERR_BUS\0"
    399 			"=\021"		"ERR_MAP\0"
    400 			"=\022"		"ERR_IMPROPER\0"
    401 			"=\023"		"ERR_UNIMPL\0"
    402 			"=\024"		"ERR_LOL\0"
    403 			"=\025"		"ERR_RESPONSE\0"
    404 			"=\026"		"ERR_PARITY\0"
    405 			"=\027"		"ERR_PROTOCOL\0"
    406 			"=\030"		"ERR_ERROR\0"
    407 			"=\031"		"ERR_TIMEOUT\0"
    408 			"=\032"		"ERR_POISONED\0"
    409 		    "b\020"	"AddressError\0"
    410 		    "b\021"	"ControlError\0"
    411 		    "b\022"	"DataError\0"
    412 		    "b\023"	"ResponderDetected\0"
    413 		    "b\024"	"RequesterDetected\0"
    414 		    "b\025"	"FirstError\0"
    415 		    "b\026"	"Overflow\0"
    416 		    "\0", ME->ErrorStatus);
    417 		device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
    418 	}
    419 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
    420 		device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
    421 		    ctx, ME->PhysicalAddress);
    422 	}
    423 	if (ME->ValidationBits &
    424 	    CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
    425 		device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
    426 		    "\n", ctx, ME->PhysicalAddressMask);
    427 	}
    428 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
    429 		device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
    430 		    ME->Node);
    431 	}
    432 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
    433 		device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
    434 		    ME->Card);
    435 	}
    436 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
    437 		device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
    438 		    ME->Module);
    439 	}
    440 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
    441 		device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
    442 		    ME->Bank);
    443 	}
    444 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
    445 		device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
    446 		    ME->Device);
    447 	}
    448 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
    449 		device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
    450 		    ME->Row);
    451 	}
    452 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
    453 		device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
    454 		    ME->Column);
    455 	}
    456 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
    457 		device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
    458 		    ctx, ME->BitPosition);
    459 	}
    460 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
    461 		device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
    462 		    ctx, ME->RequestorId);
    463 	}
    464 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
    465 		device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
    466 		    ctx, ME->ResponderId);
    467 	}
    468 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
    469 		device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
    470 		    ctx, ME->TargetId);
    471 	}
    472 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
    473 		const uint8_t t = ME->MemoryErrorType;
    474 		const char *n = t < __arraycount(cper_memory_error_type)
    475 		    ? cper_memory_error_type[t] : NULL;
    476 
    477 		if (n) {
    478 			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
    479 			    " (%s)\n", ctx, t, n);
    480 		} else {
    481 			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
    482 			    ctx, t);
    483 		}
    484 	}
    485 
    486 out:	/*
    487 	 * XXX pass this through to uvm(9) or userland for decisions
    488 	 * like page retirement
    489 	 */
    490 	return;
    491 }
    492 
    493 /*
    494  * N.2.7. PCI Express Error Section
    495  *
    496  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section
    497  */
    498 static const struct uuid CPER_PCIE_ERROR_SECTION =
    499     {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}};
    500 
    501 static const char *const cper_pcie_error_port_type[] = {
    502 #define	F(LN, SN, V)	[LN] = #SN,
    503 	CPER_PCIE_ERROR_PORT_TYPES(F)
    504 #undef	F
    505 };
    506 
    507 static void
    508 apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len,
    509     const char *ctx, bool ratelimitok)
    510 {
    511 	const struct cper_pcie_error *PE = buf;
    512 	char bitbuf[1024];
    513 
    514 	/*
    515 	 * If we've hit the rate limit, skip printing the error.
    516 	 */
    517 	if (!ratelimitok)
    518 		goto out;
    519 
    520 	snprintb(bitbuf, sizeof(bitbuf),
    521 	    CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits);
    522 	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
    523 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) {
    524 		const uint32_t t = PE->PortType;
    525 		const char *n = t < __arraycount(cper_pcie_error_port_type)
    526 		    ? cper_pcie_error_port_type[t] : NULL;
    527 
    528 		if (n) {
    529 			device_printf(sc->sc_dev, "%s: PortType=%"PRIu32
    530 			    " (%s)\n", ctx, t, n);
    531 		} else {
    532 			device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n",
    533 			    ctx, t);
    534 		}
    535 	}
    536 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) {
    537 		/* XXX BCD */
    538 		device_printf(sc->sc_dev, "%s: Version=0x08%"PRIx32"\n",
    539 		    ctx, PE->Version);
    540 	}
    541 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) {
    542 		/* XXX move me to pcireg.h */
    543 		snprintb(bitbuf, sizeof(bitbuf), "\177\020"
    544 			/* command */
    545 		    "b\000"	"IO_ENABLE\0"
    546 		    "b\001"	"MEM_ENABLE\0"
    547 		    "b\002"	"MASTER_ENABLE\0"
    548 		    "b\003"	"SPECIAL_ENABLE\0"
    549 		    "b\004"	"INVALIDATE_ENABLE\0"
    550 		    "b\005"	"PALETTE_ENABLE\0"
    551 		    "b\006"	"PARITY_ENABLE\0"
    552 		    "b\007"	"STEPPING_ENABLE\0"
    553 		    "b\010"	"SERR_ENABLE\0"
    554 		    "b\011"	"BACKTOBACK_ENABLE\0"
    555 		    "b\012"	"INTERRUPT_DISABLE\0"
    556 			/* status */
    557 		    "b\023"	"INT_STATUS\0"
    558 		    "b\024"	"CAPLIST_SUPPORT\0"
    559 		    "b\025"	"66MHZ_SUPPORT\0"
    560 		    "b\026"	"UDF_SUPPORT\0"
    561 		    "b\027"	"BACKTOBACK_SUPPORT\0"
    562 		    "b\030"	"PARITY_ERROR\0"
    563 		    "f\031\002"	"DEVSEL\0"
    564 			"=\000"		"FAST\0"
    565 			"=\001"		"MEDIUM\0"
    566 			"=\002"		"SLOW\0"
    567 		    "b\033"	"TARGET_TARGET_ABORT\0"
    568 		    "b\034"	"MASTER_TARGET_ABORT\0"
    569 		    "b\035"	"MASTER_ABORT\0"
    570 		    "b\036"	"SPECIAL_ERROR\0"
    571 		    "b\037"	"PARITY_DETECT\0"
    572 		    "\0", PE->CommandStatus);
    573 		device_printf(sc->sc_dev, "%s: CommandStatus=%s\n",
    574 		    ctx, bitbuf);
    575 	}
    576 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) {
    577 		device_printf(sc->sc_dev, "%s: DeviceID:"
    578 		    " VendorID=0x%04"PRIx16
    579 		    " DeviceID=0x%04"PRIx16
    580 		    " ClassCode=0x%06"PRIx32
    581 		    " Function=%"PRIu8
    582 		    " Device=%"PRIu8
    583 		    " Segment=%"PRIu16
    584 		    " Bus=%"PRIu8
    585 		    " SecondaryBus=%"PRIu8
    586 		    " Slot=0x%04"PRIx16
    587 		    " Reserved0=0x%02"PRIx8
    588 		    "\n",
    589 		    ctx,
    590 		    le16dec(PE->DeviceID.VendorID),
    591 		    le16dec(PE->DeviceID.DeviceID),
    592 		    (PE->DeviceID.ClassCode[0] |	/* le24dec */
    593 			((uint32_t)PE->DeviceID.ClassCode[1] << 8) |
    594 			((uint32_t)PE->DeviceID.ClassCode[2] << 16)),
    595 		    PE->DeviceID.Function, PE->DeviceID.Device,
    596 		    le16dec(PE->DeviceID.Segment), PE->DeviceID.Bus,
    597 		    PE->DeviceID.SecondaryBus, le16dec(PE->DeviceID.Slot),
    598 		    PE->DeviceID.Reserved0);
    599 	}
    600 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) {
    601 		device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n",
    602 		    ctx, PE->DeviceSerial);
    603 	}
    604 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) {
    605 		/* XXX snprintb */
    606 		device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32
    607 		    "\n", ctx, PE->BridgeControlStatus);
    608 	}
    609 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) {
    610 		uint32_t dcsr, dsr;
    611 		char hex[9*sizeof(PE->CapabilityStructure)/4];
    612 		unsigned i;
    613 
    614 		/*
    615 		 * Display a hex dump of each 32-bit register in the
    616 		 * PCIe capability structure.
    617 		 */
    618 		__CTASSERT(sizeof(PE->CapabilityStructure) % 4 == 0);
    619 		for (i = 0; i < sizeof(PE->CapabilityStructure)/4; i++) {
    620 			snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
    621 			    le32dec(&PE->CapabilityStructure[4*i]));
    622 		}
    623 		hex[sizeof(hex) - 1] = '\0';
    624 		device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n",
    625 		    ctx, hex);
    626 
    627 		/*
    628 		 * If the Device Status Register has any bits set,
    629 		 * highlight it in particular -- these are probably
    630 		 * error bits.
    631 		 */
    632 		dcsr = le32dec(&PE->CapabilityStructure[PCIE_DCSR]);
    633 		dsr = __SHIFTOUT(dcsr, __BITS(31,16));
    634 		if (dsr != 0) {
    635 			/*
    636 			 * XXX move me to pcireg.h; note: high
    637 			 * half of DCSR
    638 			 */
    639 			snprintb(bitbuf, sizeof(bitbuf), "\177\020"
    640 			    "b\000"	"CORRECTABLE_ERROR\0"
    641 			    "b\001"	"NONFATAL_UNCORRECTABLE_ERROR\0"
    642 			    "b\002"	"FATAL_ERROR\0"
    643 			    "b\003"	"UNSUPPORTED_REQUEST\0"
    644 			    "b\004"	"AUX_POWER\0"
    645 			    "b\005"	"TRANSACTIONS_PENDING\0"
    646 			    "\0", dsr);
    647 			device_printf(sc->sc_dev, "%s: PCIe Device Status:"
    648 			    " %s\n",
    649 			    ctx, bitbuf);
    650 		}
    651 	}
    652 	if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) {
    653 		uint32_t uc_status, uc_sev;
    654 		uint32_t cor_status;
    655 		uint32_t control;
    656 		char hex[9*sizeof(PE->AERInfo)/4];
    657 		unsigned i;
    658 
    659 		/*
    660 		 * Display a hex dump of each 32-bit register in the
    661 		 * PCIe Advanced Error Reporting extended capability
    662 		 * structure.
    663 		 */
    664 		__CTASSERT(sizeof(PE->AERInfo) % 4 == 0);
    665 		for (i = 0; i < sizeof(PE->AERInfo)/4; i++) {
    666 			snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
    667 			    le32dec(&PE->AERInfo[4*i]));
    668 		}
    669 		hex[sizeof(hex) - 1] = '\0';
    670 		device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex);
    671 
    672 			/* XXX move me to pcireg.h */
    673 #define	PCI_AER_UC_STATUS_FMT	"\177\020"				      \
    674 	"b\000"	"UNDEFINED\0"						      \
    675 	"b\004"	"DL_PROTOCOL_ERROR\0"					      \
    676 	"b\005"	"SURPRISE_DOWN_ERROR\0"					      \
    677 	"b\014"	"POISONED_TLP\0"					      \
    678 	"b\015"	"FC_PROTOCOL_ERROR\0"					      \
    679 	"b\016"	"COMPLETION_TIMEOUT\0"					      \
    680 	"b\017"	"COMPLETION_ABORT\0"					      \
    681 	"b\020"	"UNEXPECTED_COMPLETION\0"				      \
    682 	"b\021"	"RECEIVER_OVERFLOW\0"					      \
    683 	"b\022"	"MALFORMED_TLP\0"					      \
    684 	"b\023"	"ECRC_ERROR\0"						      \
    685 	"b\024"	"UNSUPPORTED_REQUEST_ERROR\0"				      \
    686 	"b\025"	"ACS_VIOLATION\0"					      \
    687 	"b\026"	"INTERNAL_ERROR\0"					      \
    688 	"b\027"	"MC_BLOCKED_TLP\0"					      \
    689 	"b\030"	"ATOMIC_OP_EGRESS_BLOCKED\0"				      \
    690 	"b\031"	"TLP_PREFIX_BLOCKED_ERROR\0"				      \
    691 	"b\032"	"POISONTLP_EGRESS_BLOCKED\0"				      \
    692 	"\0"
    693 
    694 		/*
    695 		 * If there are any hardware error status bits set,
    696 		 * highlight them in particular, in three groups:
    697 		 *
    698 		 * - uncorrectable fatal (UC_STATUS and UC_SEVERITY)
    699 		 * - uncorrectable nonfatal (UC_STATUS but not UC_SEVERITY)
    700 		 * - corrected (COR_STATUS)
    701 		 *
    702 		 * And if there are any uncorrectable errors, show
    703 		 * which one was reported first, according to
    704 		 * CAP_CONTROL.
    705 		 */
    706 		uc_status = le32dec(&PE->AERInfo[PCI_AER_UC_STATUS]);
    707 		uc_sev = le32dec(&PE->AERInfo[PCI_AER_UC_SEVERITY]);
    708 		cor_status = le32dec(&PE->AERInfo[PCI_AER_COR_STATUS]);
    709 		control = le32dec(&PE->AERInfo[PCI_AER_CAP_CONTROL]);
    710 
    711 		if (uc_status & uc_sev) {
    712 			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
    713 			    uc_status & uc_sev);
    714 			device_printf(sc->sc_dev, "%s:"
    715 			    " AER hardware fatal uncorrectable errors: %s\n",
    716 			    ctx, bitbuf);
    717 		}
    718 		if (uc_status & ~uc_sev) {
    719 			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
    720 			    uc_status & ~uc_sev);
    721 			device_printf(sc->sc_dev, "%s:"
    722 			    " AER hardware non-fatal uncorrectable errors:"
    723 			    " %s\n",
    724 			    ctx, bitbuf);
    725 		}
    726 		if (uc_status) {
    727 			unsigned first = __SHIFTOUT(control,
    728 			    PCI_AER_FIRST_ERROR_PTR);
    729 			snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
    730 			    (uint32_t)1 << first);
    731 			device_printf(sc->sc_dev, "%s:"
    732 			    " AER hardware first uncorrectable error: %s\n",
    733 			    ctx, bitbuf);
    734 		}
    735 		if (cor_status) {
    736 			/* XXX move me to pcireg.h */
    737 			snprintb(bitbuf, sizeof(bitbuf), "\177\020"
    738 			    "b\000"	"RECEIVER_ERROR\0"
    739 			    "b\006"	"BAD_TLP\0"
    740 			    "b\007"	"BAD_DLLP\0"
    741 			    "b\010"	"REPLAY_NUM_ROLLOVER\0"
    742 			    "b\014"	"REPLAY_TIMER_TIMEOUT\0"
    743 			    "b\015"	"ADVISORY_NF_ERROR\0"
    744 			    "b\016"	"INTERNAL_ERROR\0"
    745 			    "b\017"	"HEADER_LOG_OVERFLOW\0"
    746 			    "\0", cor_status);
    747 			device_printf(sc->sc_dev, "%s:"
    748 			    " AER hardware corrected error: %s\n",
    749 			    ctx, bitbuf);
    750 		}
    751 	}
    752 
    753 out:	/*
    754 	 * XXX pass this on to the PCI subsystem to handle
    755 	 */
    756 	return;
    757 }
    758 
    759 /*
    760  * apei_cper_reports
    761  *
    762  *	Table of known Common Platform Error Record types, symbolic
    763  *	names, minimum data lengths, and functions to report them.
    764  *
    765  *	The section types and corresponding section layouts are listed
    766  *	at:
    767  *
    768  *	https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
    769  */
    770 static const struct apei_cper_report {
    771 	const char *name;
    772 	const struct uuid *type;
    773 	size_t minlength;
    774 	void (*func)(struct apei_softc *, const void *, size_t, const char *,
    775 	    bool);
    776 } apei_cper_reports[] = {
    777 	{ "memory", &CPER_MEMORY_ERROR_SECTION,
    778 	  sizeof(struct cper_memory_error),
    779 	  apei_cper_memory_error_report },
    780 	{ "PCIe", &CPER_PCIE_ERROR_SECTION,
    781 	  sizeof(struct cper_pcie_error),
    782 	  apei_cper_pcie_error_report },
    783 };
    784 
    785 /*
    786  * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
    787  *
    788  *	Report the header of the ith Generic Error Data Entry in the
    789  *	given context, if ratelimitok is true.
    790  *
    791  *	Return the actual length of the header in headerlen, or 0 if
    792  *	not known because the revision isn't recognized.
    793  *
    794  *	Return the report type in report, or NULL if not known because
    795  *	the section type isn't recognized.
    796  */
    797 static void
    798 apei_gede_report_header(struct apei_softc *sc,
    799     const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
    800     size_t *headerlenp, const struct apei_cper_report **reportp)
    801 {
    802 	const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
    803 	struct uuid sectype;
    804 	char guidstr[69];
    805 	char buf[128];
    806 	unsigned i;
    807 
    808 	/*
    809 	 * Print the section type as a C initializer.  It would be
    810 	 * prettier to use standard hyphenated UUID notation, but that
    811 	 * notation is slightly ambiguous here (two octets could be
    812 	 * written either way, depending on Microsoft convention --
    813 	 * which influenced ACPI and UEFI -- or internet convention),
    814 	 * and the UEFI spec writes the C initializer notation, so this
    815 	 * makes it easier to search for.
    816 	 *
    817 	 * Also print out a symbolic name, if we know it.
    818 	 */
    819 	apei_cper_guid_dec(gede->SectionType, &sectype);
    820 	apei_format_guid(&sectype, guidstr);
    821 	for (i = 0; i < __arraycount(apei_cper_reports); i++) {
    822 		const struct apei_cper_report *const report =
    823 		    &apei_cper_reports[i];
    824 
    825 		if (memcmp(&sectype, report->type, sizeof(sectype)) != 0)
    826 			continue;
    827 		if (ratelimitok) {
    828 			device_printf(sc->sc_dev, "%s:"
    829 			    " SectionType=%s (%s error)\n",
    830 			    ctx, guidstr, report->name);
    831 		}
    832 		*reportp = report;
    833 		break;
    834 	}
    835 	if (i == __arraycount(apei_cper_reports)) {
    836 		if (ratelimitok) {
    837 			device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
    838 			    guidstr);
    839 		}
    840 		*reportp = NULL;
    841 	}
    842 
    843 	/*
    844 	 * Print the numeric severity and, if we have it, a symbolic
    845 	 * name for it.
    846 	 */
    847 	if (ratelimitok) {
    848 		device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
    849 		    ctx,
    850 		    gede->ErrorSeverity,
    851 		    (gede->ErrorSeverity < __arraycount(apei_gede_severity)
    852 			? apei_gede_severity[gede->ErrorSeverity]
    853 			: "unknown"));
    854 	}
    855 
    856 	/*
    857 	 * The Revision may not often be useful, but this is only ever
    858 	 * shown at the time of a hardware error report, not something
    859 	 * you can glean at your convenience with acpidump.  So print
    860 	 * it anyway.
    861 	 */
    862 	if (ratelimitok) {
    863 		device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
    864 		    gede->Revision);
    865 	}
    866 
    867 	/*
    868 	 * Don't touch anything past the Revision until we've
    869 	 * determined we understand it.  Return the header length to
    870 	 * the caller, or return zero -- and stop here -- if we don't
    871 	 * know what the actual header length is.
    872 	 */
    873 	if (gede->Revision < 0x0300) {
    874 		*headerlenp = sizeof(*gede);
    875 	} else if (gede->Revision < 0x0400) {
    876 		*headerlenp = sizeof(*gede_v3);
    877 	} else {
    878 		*headerlenp = 0;
    879 		return;
    880 	}
    881 
    882 	/*
    883 	 * Print the validation bits at debug level.  Only really
    884 	 * helpful if there are bits we _don't_ know about.
    885 	 */
    886 	if (ratelimitok) {
    887 		/* XXX define this format somewhere */
    888 		snprintb(buf, sizeof(buf), "\177\020"
    889 		    "b\000"	"FRU_ID\0"
    890 		    "b\001"	"FRU_TEXT\0" /* `FRU string', sometimes */
    891 		    "b\002"	"TIMESTAMP\0"
    892 		    "\0", gede->ValidationBits);
    893 		aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
    894 		    buf);
    895 	}
    896 
    897 	/*
    898 	 * Print the CPER section flags.
    899 	 */
    900 	if (ratelimitok) {
    901 		snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
    902 		    gede->Flags);
    903 		device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
    904 	}
    905 
    906 	/*
    907 	 * The ErrorDataLength is unlikely to be useful for the log, so
    908 	 * print it at debug level only.
    909 	 */
    910 	if (ratelimitok) {
    911 		aprint_debug_dev(sc->sc_dev, "%s:"
    912 		    " ErrorDataLength=0x%"PRIu32"\n",
    913 		    ctx, gede->ErrorDataLength);
    914 	}
    915 
    916 	/*
    917 	 * Print the FRU Id and text, if available.
    918 	 */
    919 	if (ratelimitok &&
    920 	    (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
    921 		struct uuid fruid;
    922 
    923 		apei_cper_guid_dec(gede->FruId, &fruid);
    924 		apei_format_guid(&fruid, guidstr);
    925 		device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
    926 	}
    927 	if (ratelimitok &&
    928 	    (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
    929 		device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
    930 		    ctx, gede->FruText);
    931 	}
    932 
    933 	/*
    934 	 * Print the timestamp, if available by the revision number and
    935 	 * the validation bits.
    936 	 */
    937 	if (ratelimitok &&
    938 	    gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
    939 	    gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
    940 		const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
    941 		const uint8_t s = t[0];
    942 		const uint8_t m = t[1];
    943 		const uint8_t h = t[2];
    944 		const uint8_t f = t[3];
    945 		const uint8_t D = t[4];
    946 		const uint8_t M = t[5];
    947 		const uint8_t Y = t[6];
    948 		const uint8_t C = t[7];
    949 
    950 		device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
    951 		    " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
    952 		    ctx, gede_v3->TimeStamp,
    953 		    C,Y, M, D, h,m,s,
    954 		    f & __BIT(0) ? " (event time)" : " (collect time)");
    955 	}
    956 }
    957 
    958 /*
    959  * apei_gesb_ratelimit
    960  *
    961  *	State to limit the rate of console log messages about hardware
    962  *	errors.  For each of the four severity levels in a Generic
    963  *	Error Status Block,
    964  *
    965  *	0 - Recoverable (uncorrectable),
    966  *	1 - Fatal (uncorrectable),
    967  *	2 - Corrected, and
    968  *	3 - None (including ill-formed errors),
    969  *
    970  *	we record the last time it happened, protected by a CPU simple
    971  *	lock that we only try-acquire so it is safe to use in any
    972  *	context, including non-maskable interrupt context.
    973  */
    974 
    975 static struct {
    976 	__cpu_simple_lock_t	lock;
    977 	struct timeval		lasttime;
    978 	volatile uint32_t	suppressed;
    979 } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
    980 	[ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
    981 	[ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
    982 	[ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
    983 	[ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
    984 };
    985 
    986 static void
    987 atomic_incsat_32(volatile uint32_t *p)
    988 {
    989 	uint32_t o, n;
    990 
    991 	do {
    992 		o = atomic_load_relaxed(p);
    993 		if (__predict_false(o == UINT_MAX))
    994 			return;
    995 		n = o + 1;
    996 	} while (__predict_false(atomic_cas_32(p, o, n) != o));
    997 }
    998 
    999 /*
   1000  * apei_gesb_ratecheck(sc, severity, suppressed)
   1001  *
   1002  *	Check for a rate limit on errors of the specified severity.
   1003  *
   1004  *	=> Return true if the error should be printed, and format into
   1005  *	   the buffer suppressed a message saying how many errors were
   1006  *	   previously suppressed.
   1007  *
   1008  *	=> Return false if the error should be suppressed because the
   1009  *	   last one printed was too recent.
   1010  */
   1011 static bool
   1012 apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
   1013     char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
   1014 {
   1015 	/* one of each type per minute (XXX worth making configurable?) */
   1016 	const struct timeval mininterval = {60, 0};
   1017 	unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
   1018 	bool ok = false;
   1019 
   1020 	/*
   1021 	 * If the lock is contended, the rate limit is probably
   1022 	 * exceeded, so it's not OK to print.
   1023 	 *
   1024 	 * Otherwise, with the lock held, ask ratecheck(9) whether it's
   1025 	 * OK to print.
   1026 	 */
   1027 	if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
   1028 		goto out;
   1029 	ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
   1030 	__cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
   1031 
   1032 out:	/*
   1033 	 * If it's OK to print, report the number of errors that were
   1034 	 * suppressed.  If it's not OK to print, count a suppressed
   1035 	 * error.
   1036 	 */
   1037 	if (ok) {
   1038 		const uint32_t n =
   1039 		    atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
   1040 
   1041 		if (n == 0) {
   1042 			suppressed[0] = '\0';
   1043 		} else {
   1044 			snprintf(suppressed,
   1045 			    sizeof(" (4294967295 or more errors suppressed)"),
   1046 			    " (%u%s error%s suppressed)",
   1047 			    n,
   1048 			    n == UINT32_MAX ? " or more" : "",
   1049 			    n == 1 ? "" : "s");
   1050 		}
   1051 	} else {
   1052 		atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
   1053 		suppressed[0] = '\0';
   1054 	}
   1055 	return ok;
   1056 }
   1057 
   1058 /*
   1059  * apei_gesb_report(sc, gesb, size, ctx)
   1060  *
   1061  *	Check a Generic Error Status Block, of at most the specified
   1062  *	size in bytes, and report any errors in it.  Return the 32-bit
   1063  *	Block Status in case the caller needs it to acknowledge the
   1064  *	report to firmware.
   1065  */
   1066 uint32_t
   1067 apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
   1068     size_t size, const char *ctx, bool *fatalp)
   1069 {
   1070 	uint32_t status, unknownstatus, severity, nentries, i;
   1071 	uint32_t datalen, rawdatalen;
   1072 	const ACPI_HEST_GENERIC_DATA *gede0, *gede;
   1073 	const unsigned char *rawdata;
   1074 	bool ratelimitok = false;
   1075 	char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
   1076 	bool fatal = false;
   1077 
   1078 	/*
   1079 	 * Verify the buffer is large enough for a Generic Error Status
   1080 	 * Block before we try to touch anything in it.
   1081 	 */
   1082 	if (size < sizeof(*gesb)) {
   1083 		ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
   1084 		    suppressed);
   1085 		if (ratelimitok) {
   1086 			device_printf(sc->sc_dev,
   1087 			    "%s: truncated GESB, %zu < %zu%s\n",
   1088 			    ctx, size, sizeof(*gesb), suppressed);
   1089 		}
   1090 		status = 0;
   1091 		goto out;
   1092 	}
   1093 	size -= sizeof(*gesb);
   1094 
   1095 	/*
   1096 	 * Load the status.  Access ordering rules are unclear in the
   1097 	 * ACPI specification; I'm guessing that load-acquire of the
   1098 	 * block status is a good idea before any other access to the
   1099 	 * GESB.
   1100 	 */
   1101 	status = atomic_load_acquire(&gesb->BlockStatus);
   1102 
   1103 	/*
   1104 	 * If there are no status bits set, the rest of the GESB is
   1105 	 * garbage, so stop here.
   1106 	 */
   1107 	if (status == 0) {
   1108 		/* XXX dtrace */
   1109 		/* XXX DPRINTF */
   1110 		goto out;
   1111 	}
   1112 
   1113 	/*
   1114 	 * Read out the severity and get the number of entries in this
   1115 	 * status block.
   1116 	 */
   1117 	severity = gesb->ErrorSeverity;
   1118 	nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
   1119 
   1120 	/*
   1121 	 * Print a message to the console and dmesg about the severity
   1122 	 * of the error.
   1123 	 */
   1124 	ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
   1125 	if (ratelimitok) {
   1126 		char statusbuf[128];
   1127 
   1128 		/* XXX define this format somewhere */
   1129 		snprintb(statusbuf, sizeof(statusbuf), "\177\020"
   1130 		    "b\000"	"UE\0"
   1131 		    "b\001"	"CE\0"
   1132 		    "b\002"	"MULTI_UE\0"
   1133 		    "b\003"	"MULTI_CE\0"
   1134 		    "f\004\010"	"GEDE_COUNT\0"
   1135 		    "\0", status);
   1136 
   1137 		if (severity < __arraycount(apei_gesb_severity)) {
   1138 			device_printf(sc->sc_dev, "%s"
   1139 			    " reported hardware error%s:"
   1140 			    " severity=%s nentries=%u status=%s\n",
   1141 			    ctx, suppressed,
   1142 			    apei_gesb_severity[severity], nentries, statusbuf);
   1143 		} else {
   1144 			device_printf(sc->sc_dev, "%s reported error%s:"
   1145 			    " severity=%"PRIu32" nentries=%u status=%s\n",
   1146 			    ctx, suppressed,
   1147 			    severity, nentries, statusbuf);
   1148 		}
   1149 	}
   1150 
   1151 	/*
   1152 	 * Make a determination about whether the error is fatal.
   1153 	 *
   1154 	 * XXX Currently we don't have any mechanism to recover from
   1155 	 * uncorrectable but recoverable errors, so we treat those --
   1156 	 * and anything else we don't recognize -- as fatal.
   1157 	 */
   1158 	switch (severity) {
   1159 	case ACPI_HEST_GEN_ERROR_CORRECTED:
   1160 	case ACPI_HEST_GEN_ERROR_NONE:
   1161 		fatal = false;
   1162 		break;
   1163 	case ACPI_HEST_GEN_ERROR_FATAL:
   1164 	case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
   1165 	default:
   1166 		fatal = true;
   1167 		break;
   1168 	}
   1169 
   1170 	/*
   1171 	 * Clear the bits we know about to warn if there's anything
   1172 	 * left we don't understand.
   1173 	 */
   1174 	unknownstatus = status;
   1175 	unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
   1176 	unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
   1177 	unknownstatus &= ~ACPI_HEST_CORRECTABLE;
   1178 	unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
   1179 	unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
   1180 	if (ratelimitok && unknownstatus != 0) {
   1181 		/* XXX dtrace */
   1182 		device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
   1183 		    " 0x%"PRIx32"\n", ctx, unknownstatus);
   1184 	}
   1185 
   1186 	/*
   1187 	 * Advance past the Generic Error Status Block (GESB) header to
   1188 	 * the Generic Error Data Entries (GEDEs).
   1189 	 */
   1190 	gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
   1191 
   1192 	/*
   1193 	 * Verify that the data length (GEDEs) fits within the size.
   1194 	 * If not, truncate the GEDEs.
   1195 	 */
   1196 	datalen = gesb->DataLength;
   1197 	if (size < datalen) {
   1198 		if (ratelimitok) {
   1199 			device_printf(sc->sc_dev, "%s:"
   1200 			    " GESB DataLength exceeds bounds:"
   1201 			    " %zu < %"PRIu32"\n",
   1202 			    ctx, size, datalen);
   1203 		}
   1204 		datalen = size;
   1205 	}
   1206 	size -= datalen;
   1207 
   1208 	/*
   1209 	 * Report each of the Generic Error Data Entries.
   1210 	 */
   1211 	for (i = 0; i < nentries; i++) {
   1212 		size_t headerlen;
   1213 		const struct apei_cper_report *report;
   1214 		char subctx[128];
   1215 
   1216 		/*
   1217 		 * Format a subcontext to show this numbered entry of
   1218 		 * the GESB.
   1219 		 */
   1220 		snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
   1221 
   1222 		/*
   1223 		 * If the remaining GESB data length isn't enough for a
   1224 		 * GEDE header, stop here.
   1225 		 */
   1226 		if (datalen < sizeof(*gede)) {
   1227 			if (ratelimitok) {
   1228 				device_printf(sc->sc_dev, "%s:"
   1229 				    " truncated GEDE: %"PRIu32" < %zu bytes\n",
   1230 				    subctx, datalen, sizeof(*gede));
   1231 			}
   1232 			break;
   1233 		}
   1234 
   1235 		/*
   1236 		 * Print the GEDE header and get the full length (may
   1237 		 * vary from revision to revision of the GEDE) and the
   1238 		 * CPER report function if possible.
   1239 		 */
   1240 		apei_gede_report_header(sc, gede, subctx, ratelimitok,
   1241 		    &headerlen, &report);
   1242 
   1243 		/*
   1244 		 * If we don't know the header length because of an
   1245 		 * unfamiliar revision, stop here.
   1246 		 */
   1247 		if (headerlen == 0) {
   1248 			if (ratelimitok) {
   1249 				device_printf(sc->sc_dev, "%s:"
   1250 				    " unknown revision: 0x%"PRIx16"\n",
   1251 				    subctx, gede->Revision);
   1252 			}
   1253 			break;
   1254 		}
   1255 
   1256 		/*
   1257 		 * Stop here if what we mapped is too small for the
   1258 		 * error data length.
   1259 		 */
   1260 		datalen -= headerlen;
   1261 		if (datalen < gede->ErrorDataLength) {
   1262 			if (ratelimitok) {
   1263 				device_printf(sc->sc_dev, "%s:"
   1264 				    " truncated GEDE payload:"
   1265 				    " %"PRIu32" < %"PRIu32" bytes\n",
   1266 				    subctx, datalen, gede->ErrorDataLength);
   1267 			}
   1268 			break;
   1269 		}
   1270 
   1271 		/*
   1272 		 * Report the Common Platform Error Record appendix to
   1273 		 * this Generic Error Data Entry.
   1274 		 */
   1275 		if (report == NULL) {
   1276 			if (ratelimitok) {
   1277 				device_printf(sc->sc_dev, "%s:"
   1278 				    " [unknown type]\n", ctx);
   1279 			}
   1280 		} else {
   1281 			/* XXX pass ratelimit through */
   1282 			(*report->func)(sc, (const char *)gede + headerlen,
   1283 			    gede->ErrorDataLength, subctx, ratelimitok);
   1284 		}
   1285 
   1286 		/*
   1287 		 * Advance past the GEDE header and CPER data to the
   1288 		 * next GEDE.
   1289 		 */
   1290 		gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
   1291 		    + headerlen + gede->ErrorDataLength);
   1292 	}
   1293 
   1294 	/*
   1295 	 * Advance past the Generic Error Data Entries (GEDEs) to the
   1296 	 * raw error data.
   1297 	 *
   1298 	 * XXX Provide Max Raw Data Length as a parameter, as found in
   1299 	 * various HEST entry types.
   1300 	 */
   1301 	rawdata = (const unsigned char *)gede0 + datalen;
   1302 
   1303 	/*
   1304 	 * Verify that the raw data length fits within the size.  If
   1305 	 * not, truncate the raw data.
   1306 	 */
   1307 	rawdatalen = gesb->RawDataLength;
   1308 	if (size < rawdatalen) {
   1309 		if (ratelimitok) {
   1310 			device_printf(sc->sc_dev, "%s:"
   1311 			    " GESB RawDataLength exceeds bounds:"
   1312 			    " %zu < %"PRIu32"\n",
   1313 			    ctx, size, rawdatalen);
   1314 		}
   1315 		rawdatalen = size;
   1316 	}
   1317 	size -= rawdatalen;
   1318 
   1319 	/*
   1320 	 * Hexdump the raw data, if any.
   1321 	 */
   1322 	if (ratelimitok && rawdatalen > 0) {
   1323 		char devctx[128];
   1324 
   1325 		snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
   1326 		    device_xname(sc->sc_dev), ctx);
   1327 		hexdump(printf, devctx, rawdata, rawdatalen);
   1328 	}
   1329 
   1330 	/*
   1331 	 * If there's anything left after the raw data, warn.
   1332 	 */
   1333 	if (ratelimitok && size > 0) {
   1334 		device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
   1335 		    ctx, size);
   1336 	}
   1337 
   1338 	/*
   1339 	 * Return the status so the caller can ack it, and tell the
   1340 	 * caller whether this error is fatal.
   1341 	 */
   1342 out:	*fatalp = fatal;
   1343 	return status;
   1344 }
   1345 
   1346 MODULE(MODULE_CLASS_DRIVER, apei, NULL);
   1347 
   1348 #ifdef _MODULE
   1349 #include "ioconf.c"
   1350 #endif
   1351 
   1352 static int
   1353 apei_modcmd(modcmd_t cmd, void *opaque)
   1354 {
   1355 	int error = 0;
   1356 
   1357 	switch (cmd) {
   1358 	case MODULE_CMD_INIT:
   1359 #ifdef _MODULE
   1360 		error = config_init_component(cfdriver_ioconf_apei,
   1361 		    cfattach_ioconf_apei, cfdata_ioconf_apei);
   1362 #endif
   1363 		return error;
   1364 	case MODULE_CMD_FINI:
   1365 #ifdef _MODULE
   1366 		error = config_fini_component(cfdriver_ioconf_apei,
   1367 		    cfattach_ioconf_apei, cfdata_ioconf_apei);
   1368 #endif
   1369 		return error;
   1370 	default:
   1371 		return ENOTTY;
   1372 	}
   1373 }
   1374