Home | History | Annotate | Line # | Download | only in acpi
apei.c revision 1.6
      1 /*	$NetBSD: apei.c,v 1.6 2024/10/27 12:14:07 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2024 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * APEI: ACPI Platform Error Interface
     31  *
     32  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
     33  *
     34  * XXX dtrace probes
     35  *
     36  * XXX call _OSC appropriately to announce to the platform that we, the
     37  * OSPM, support APEI
     38  */
     39 
     40 #include <sys/cdefs.h>
     41 __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.6 2024/10/27 12:14:07 riastradh Exp $");
     42 
     43 #include <sys/param.h>
     44 #include <sys/types.h>
     45 
     46 #include <sys/atomic.h>
     47 #include <sys/device.h>
     48 #include <sys/module.h>
     49 #include <sys/sysctl.h>
     50 #include <sys/uuid.h>
     51 
     52 #include <dev/acpi/acpireg.h>
     53 #include <dev/acpi/acpivar.h>
     54 #include <dev/acpi/apei_bertvar.h>
     55 #include <dev/acpi/apei_cper.h>
     56 #include <dev/acpi/apei_einjvar.h>
     57 #include <dev/acpi/apei_erstvar.h>
     58 #include <dev/acpi/apei_hestvar.h>
     59 #include <dev/acpi/apei_interp.h>
     60 #include <dev/acpi/apeivar.h>
     61 
     62 #define	_COMPONENT	ACPI_RESOURCE_COMPONENT
     63 ACPI_MODULE_NAME	("apei")
     64 
     65 static int apei_match(device_t, cfdata_t, void *);
     66 static void apei_attach(device_t, device_t, void *);
     67 static int apei_detach(device_t, int);
     68 
     69 static void apei_get_tables(struct apei_tab *);
     70 static void apei_put_tables(struct apei_tab *);
     71 
     72 static void apei_identify(struct apei_softc *, const char *,
     73     const ACPI_TABLE_HEADER *);
     74 
     75 CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
     76     apei_match, apei_attach, apei_detach, NULL);
     77 
     78 static int
     79 apei_match(device_t parent, cfdata_t match, void *aux)
     80 {
     81 	struct apei_tab tab;
     82 	int prio = 0;
     83 
     84 	/*
     85 	 * If we have any of the APEI tables, match.
     86 	 */
     87 	apei_get_tables(&tab);
     88 	if (tab.bert || tab.einj || tab.erst || tab.hest)
     89 		prio = 1;
     90 	apei_put_tables(&tab);
     91 
     92 	return prio;
     93 }
     94 
     95 static void
     96 apei_attach(device_t parent, device_t self, void *aux)
     97 {
     98 	struct apei_softc *sc = device_private(self);
     99 	const struct sysctlnode *sysctl_hw_acpi;
    100 	int error;
    101 
    102 	aprint_naive("\n");
    103 	aprint_normal(": ACPI Platform Error Interface\n");
    104 
    105 	pmf_device_register(self, NULL, NULL);
    106 
    107 	sc->sc_dev = self;
    108 	apei_get_tables(&sc->sc_tab);
    109 
    110 	/*
    111 	 * Get the sysctl hw.acpi node.  This should already be created
    112 	 * but I don't see an easy way to get at it.  If this fails,
    113 	 * something is seriously wrong, so let's stop here.
    114 	 */
    115 	error = sysctl_createv(&sc->sc_sysctllog, 0,
    116 	    NULL, &sysctl_hw_acpi, 0,
    117 	    CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
    118 	    CTL_HW, CTL_CREATE, CTL_EOL);
    119 	if (error) {
    120 		aprint_error_dev(sc->sc_dev,
    121 		    "failed to create sysctl hw.acpi: %d\n", error);
    122 		return;
    123 	}
    124 
    125 	/*
    126 	 * Create sysctl hw.acpi.apei.
    127 	 */
    128 	error = sysctl_createv(&sc->sc_sysctllog, 0,
    129 	    &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
    130 	    CTLTYPE_NODE, "apei",
    131 	    SYSCTL_DESCR("ACPI Platform Error Interface"),
    132 	    NULL, 0, NULL, 0,
    133 	    CTL_CREATE, CTL_EOL);
    134 	if (error) {
    135 		aprint_error_dev(sc->sc_dev,
    136 		    "failed to create sysctl hw.acpi.apei: %d\n", error);
    137 		return;
    138 	}
    139 
    140 	/*
    141 	 * Set up BERT, EINJ, ERST, and HEST.
    142 	 */
    143 	if (sc->sc_tab.bert) {
    144 		apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
    145 		apei_bert_attach(sc);
    146 	}
    147 	if (sc->sc_tab.einj) {
    148 		apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
    149 		apei_einj_attach(sc);
    150 	}
    151 	if (sc->sc_tab.erst) {
    152 		apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
    153 		apei_erst_attach(sc);
    154 	}
    155 	if (sc->sc_tab.hest) {
    156 		apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
    157 		apei_hest_attach(sc);
    158 	}
    159 }
    160 
    161 static int
    162 apei_detach(device_t self, int flags)
    163 {
    164 	struct apei_softc *sc = device_private(self);
    165 	int error;
    166 
    167 	/*
    168 	 * Detach children.  We don't currently have any but this is
    169 	 * harmless without children and mandatory if we ever sprouted
    170 	 * them, so let's just leave it here for good measure.
    171 	 *
    172 	 * After this point, we are committed to detaching; failure is
    173 	 * forbidden.
    174 	 */
    175 	error = config_detach_children(self, flags);
    176 	if (error)
    177 		return error;
    178 
    179 	/*
    180 	 * Tear down all the sysctl nodes first, before the software
    181 	 * state backing them goes away.
    182 	 */
    183 	sysctl_teardown(&sc->sc_sysctllog);
    184 	sc->sc_sysctlroot = NULL;
    185 
    186 	/*
    187 	 * Detach the software state for the APEI tables.
    188 	 */
    189 	if (sc->sc_tab.hest)
    190 		apei_hest_detach(sc);
    191 	if (sc->sc_tab.erst)
    192 		apei_erst_detach(sc);
    193 	if (sc->sc_tab.einj)
    194 		apei_einj_detach(sc);
    195 	if (sc->sc_tab.bert)
    196 		apei_bert_detach(sc);
    197 
    198 	/*
    199 	 * Release the APEI tables and we're done.
    200 	 */
    201 	apei_put_tables(&sc->sc_tab);
    202 	pmf_device_deregister(self);
    203 	return 0;
    204 }
    205 
    206 /*
    207  * apei_get_tables(tab)
    208  *
    209  *	Get references to whichever APEI-related tables -- BERT, EINJ,
    210  *	ERST, HEST -- are available in the system.
    211  */
    212 static void
    213 apei_get_tables(struct apei_tab *tab)
    214 {
    215 	ACPI_STATUS rv;
    216 
    217 	/*
    218 	 * Probe the BERT -- Boot Error Record Table.
    219 	 */
    220 	rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
    221 	if (ACPI_FAILURE(rv))
    222 		tab->bert = NULL;
    223 
    224 	/*
    225 	 * Probe the EINJ -- Error Injection Table.
    226 	 */
    227 	rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
    228 	if (ACPI_FAILURE(rv))
    229 		tab->einj = NULL;
    230 
    231 	/*
    232 	 * Probe the ERST -- Error Record Serialization Table.
    233 	 */
    234 	rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
    235 	if (ACPI_FAILURE(rv))
    236 		tab->erst = NULL;
    237 
    238 	/*
    239 	 * Probe the HEST -- Hardware Error Source Table.
    240 	 */
    241 	rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
    242 	if (ACPI_FAILURE(rv))
    243 		tab->hest = NULL;
    244 }
    245 
    246 /*
    247  * apei_put_tables(tab)
    248  *
    249  *	Release the tables acquired by apei_get_tables.
    250  */
    251 static void
    252 apei_put_tables(struct apei_tab *tab)
    253 {
    254 
    255 	if (tab->bert != NULL) {
    256 		AcpiPutTable(&tab->bert->Header);
    257 		tab->bert = NULL;
    258 	}
    259 	if (tab->einj != NULL) {
    260 		AcpiPutTable(&tab->einj->Header);
    261 		tab->einj = NULL;
    262 	}
    263 	if (tab->erst != NULL) {
    264 		AcpiPutTable(&tab->erst->Header);
    265 		tab->erst = NULL;
    266 	}
    267 	if (tab->hest != NULL) {
    268 		AcpiPutTable(&tab->hest->Header);
    269 		tab->hest = NULL;
    270 	}
    271 }
    272 
    273 /*
    274  * apei_identify(sc, name, header)
    275  *
    276  *	Identify the APEI-related table header for dmesg.
    277  */
    278 static void
    279 apei_identify(struct apei_softc *sc, const char *name,
    280     const ACPI_TABLE_HEADER *h)
    281 {
    282 
    283 	aprint_normal_dev(sc->sc_dev, "%s:"
    284 	    " OemId <%6.6s,%8.8s,%08x>"
    285 	    " AslId <%4.4s,%08x>\n",
    286 	    name,
    287 	    h->OemId, h->OemTableId, h->OemRevision,
    288 	    h->AslCompilerId, h->AslCompilerRevision);
    289 }
    290 
    291 /*
    292  * apei_cper_guid_dec(buf, uuid)
    293  *
    294  *	Decode a Common Platform Error Record UUID/GUID from an ACPI
    295  *	table at buf into a sys/uuid.h struct uuid.
    296  */
    297 static void
    298 apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
    299 {
    300 
    301 	uuid_dec_le(buf, uuid);
    302 }
    303 
    304 /*
    305  * apei_format_guid(uuid, s)
    306  *
    307  *	Format a UUID as a string.  This uses C initializer notation,
    308  *	not UUID notation, in order to match the text in the UEFI
    309  *	specification.
    310  */
    311 static void
    312 apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
    313 {
    314 
    315 	snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
    316 	    "{0x%02x,%02x,"
    317 	    "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
    318 	    uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
    319 	    uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
    320 	    uuid->node[0], uuid->node[1], uuid->node[2],
    321 	    uuid->node[3], uuid->node[4], uuid->node[5]);
    322 }
    323 
    324 /*
    325  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
    326  */
    327 
    328 static const char *const cper_memory_error_type[] = {
    329 #define	F(LN, SN, V)	[LN] = #SN,
    330 	CPER_MEMORY_ERROR_TYPES(F)
    331 #undef	F
    332 };
    333 
    334 /*
    335  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
    336  *
    337  * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
    338  * but are designated as being intended for Generic Error Data Entries
    339  * rather than Generic Error Status Blocks.
    340  */
    341 static const char *const apei_gesb_severity[] = {
    342 	[0] = "recoverable",
    343 	[1] = "fatal",
    344 	[2] = "corrected",
    345 	[3] = "none",
    346 };
    347 
    348 /*
    349  * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
    350  */
    351 static const char *const apei_gede_severity[] = {
    352 	[ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
    353 	[ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
    354 	[ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
    355 	[ACPI_HEST_GEN_ERROR_NONE] = "none",
    356 };
    357 
    358 /*
    359  * N.2.5. Memory Error Section
    360  *
    361  * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
    362  */
    363 static const struct uuid CPER_MEMORY_ERROR_SECTION =
    364     {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
    365 
    366 static void
    367 apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
    368     size_t len, const char *ctx, bool ratelimitok)
    369 {
    370 	const struct cper_memory_error *ME = buf;
    371 	char bitbuf[1024];
    372 
    373 	/*
    374 	 * If we've hit the rate limit, skip printing the error.
    375 	 */
    376 	if (!ratelimitok)
    377 		goto out;
    378 
    379 	snprintb(bitbuf, sizeof(bitbuf),
    380 	    CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
    381 	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
    382 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
    383 		/*
    384 		 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
    385 		 */
    386 		/* XXX define this format somewhere */
    387 		snprintb(bitbuf, sizeof(bitbuf), "\177\020"
    388 		    "f\010\010"	"ErrorType\0"
    389 			"=\001"		"ERR_INTERNAL\0"
    390 			"=\004"		"ERR_MEM\0"
    391 			"=\005"		"ERR_TLB\0"
    392 			"=\006"		"ERR_CACHE\0"
    393 			"=\007"		"ERR_FUNCTION\0"
    394 			"=\010"		"ERR_SELFTEST\0"
    395 			"=\011"		"ERR_FLOW\0"
    396 			"=\020"		"ERR_BUS\0"
    397 			"=\021"		"ERR_MAP\0"
    398 			"=\022"		"ERR_IMPROPER\0"
    399 			"=\023"		"ERR_UNIMPL\0"
    400 			"=\024"		"ERR_LOL\0"
    401 			"=\025"		"ERR_RESPONSE\0"
    402 			"=\026"		"ERR_PARITY\0"
    403 			"=\027"		"ERR_PROTOCOL\0"
    404 			"=\030"		"ERR_ERROR\0"
    405 			"=\031"		"ERR_TIMEOUT\0"
    406 			"=\032"		"ERR_POISONED\0"
    407 		    "b\020"	"AddressError\0"
    408 		    "b\021"	"ControlError\0"
    409 		    "b\022"	"DataError\0"
    410 		    "b\023"	"ResponderDetected\0"
    411 		    "b\024"	"RequesterDetected\0"
    412 		    "b\025"	"FirstError\0"
    413 		    "b\026"	"Overflow\0"
    414 		    "\0", ME->ErrorStatus);
    415 		device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
    416 	}
    417 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
    418 		device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
    419 		    ctx, ME->PhysicalAddress);
    420 	}
    421 	if (ME->ValidationBits &
    422 	    CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
    423 		device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
    424 		    "\n", ctx, ME->PhysicalAddressMask);
    425 	}
    426 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
    427 		device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
    428 		    ME->Node);
    429 	}
    430 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
    431 		device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
    432 		    ME->Card);
    433 	}
    434 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
    435 		device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
    436 		    ME->Module);
    437 	}
    438 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
    439 		device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
    440 		    ME->Bank);
    441 	}
    442 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
    443 		device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
    444 		    ME->Device);
    445 	}
    446 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
    447 		device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
    448 		    ME->Row);
    449 	}
    450 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
    451 		device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
    452 		    ME->Column);
    453 	}
    454 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
    455 		device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
    456 		    ctx, ME->BitPosition);
    457 	}
    458 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
    459 		device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
    460 		    ctx, ME->RequestorId);
    461 	}
    462 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
    463 		device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
    464 		    ctx, ME->ResponderId);
    465 	}
    466 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
    467 		device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
    468 		    ctx, ME->TargetId);
    469 	}
    470 	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
    471 		const uint8_t t = ME->MemoryErrorType;
    472 		const char *n = t < __arraycount(cper_memory_error_type)
    473 		    ? cper_memory_error_type[t] : NULL;
    474 
    475 		if (n) {
    476 			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
    477 			    " (%s)\n", ctx, t, n);
    478 		} else {
    479 			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
    480 			    ctx, t);
    481 		}
    482 	}
    483 
    484 out:	/*
    485 	 * XXX pass this through to uvm(9) or userland for decisions
    486 	 * like page retirement
    487 	 */
    488 	return;
    489 }
    490 
    491 /*
    492  * apei_cper_reports
    493  *
    494  *	Table of known Common Platform Error Record types, symbolic
    495  *	names, minimum data lengths, and functions to report them.
    496  *
    497  *	The section types and corresponding section layouts are listed
    498  *	at:
    499  *
    500  *	https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
    501  */
    502 static const struct apei_cper_report {
    503 	const char *name;
    504 	const struct uuid *type;
    505 	size_t minlength;
    506 	void (*func)(struct apei_softc *, const void *, size_t, const char *,
    507 	    bool);
    508 } apei_cper_reports[] = {
    509 	{ "memory", &CPER_MEMORY_ERROR_SECTION,
    510 	  sizeof(struct cper_memory_error),
    511 	  apei_cper_memory_error_report },
    512 };
    513 
    514 /*
    515  * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
    516  *
    517  *	Report the header of the ith Generic Error Data Entry in the
    518  *	given context, if ratelimitok is true.
    519  *
    520  *	Return the actual length of the header in headerlen, or 0 if
    521  *	not known because the revision isn't recognized.
    522  *
    523  *	Return the report type in report, or NULL if not known because
    524  *	the section type isn't recognized.
    525  */
    526 static void
    527 apei_gede_report_header(struct apei_softc *sc,
    528     const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
    529     size_t *headerlenp, const struct apei_cper_report **reportp)
    530 {
    531 	const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
    532 	struct uuid sectype;
    533 	char guidstr[69];
    534 	char buf[128];
    535 	unsigned i;
    536 
    537 	/*
    538 	 * Print the section type as a C initializer.  It would be
    539 	 * prettier to use standard hyphenated UUID notation, but that
    540 	 * notation is slightly ambiguous here (two octets could be
    541 	 * written either way, depending on Microsoft convention --
    542 	 * which influenced ACPI and UEFI -- or internet convention),
    543 	 * and the UEFI spec writes the C initializer notation, so this
    544 	 * makes it easier to search for.
    545 	 *
    546 	 * Also print out a symbolic name, if we know it.
    547 	 */
    548 	apei_cper_guid_dec(gede->SectionType, &sectype);
    549 	apei_format_guid(&sectype, guidstr);
    550 	for (i = 0; i < __arraycount(apei_cper_reports); i++) {
    551 		const struct apei_cper_report *const report =
    552 		    &apei_cper_reports[i];
    553 
    554 		if (memcmp(&sectype, report->type, sizeof(sectype)) != 0)
    555 			continue;
    556 		if (ratelimitok) {
    557 			device_printf(sc->sc_dev, "%s:"
    558 			    " SectionType=%s (%s error)\n",
    559 			    ctx, guidstr, report->name);
    560 		}
    561 		*reportp = report;
    562 		break;
    563 	}
    564 	if (i == __arraycount(apei_cper_reports)) {
    565 		if (ratelimitok) {
    566 			device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
    567 			    guidstr);
    568 		}
    569 		*reportp = NULL;
    570 	}
    571 
    572 	/*
    573 	 * Print the numeric severity and, if we have it, a symbolic
    574 	 * name for it.
    575 	 */
    576 	if (ratelimitok) {
    577 		device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
    578 		    ctx,
    579 		    gede->ErrorSeverity,
    580 		    (gede->ErrorSeverity < __arraycount(apei_gede_severity)
    581 			? apei_gede_severity[gede->ErrorSeverity]
    582 			: "unknown"));
    583 	}
    584 
    585 	/*
    586 	 * The Revision may not often be useful, but this is only ever
    587 	 * shown at the time of a hardware error report, not something
    588 	 * you can glean at your convenience with acpidump.  So print
    589 	 * it anyway.
    590 	 */
    591 	if (ratelimitok) {
    592 		device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
    593 		    gede->Revision);
    594 	}
    595 
    596 	/*
    597 	 * Don't touch anything past the Revision until we've
    598 	 * determined we understand it.  Return the header length to
    599 	 * the caller, or return zero -- and stop here -- if we don't
    600 	 * know what the actual header length is.
    601 	 */
    602 	if (gede->Revision < 0x0300) {
    603 		*headerlenp = sizeof(*gede);
    604 	} else if (gede->Revision < 0x0400) {
    605 		*headerlenp = sizeof(*gede_v3);
    606 	} else {
    607 		*headerlenp = 0;
    608 		return;
    609 	}
    610 
    611 	/*
    612 	 * Print the validation bits at debug level.  Only really
    613 	 * helpful if there are bits we _don't_ know about.
    614 	 */
    615 	if (ratelimitok) {
    616 		/* XXX define this format somewhere */
    617 		snprintb(buf, sizeof(buf), "\177\020"
    618 		    "b\000"	"FRU_ID\0"
    619 		    "b\001"	"FRU_TEXT\0" /* `FRU string', sometimes */
    620 		    "b\002"	"TIMESTAMP\0"
    621 		    "\0", gede->ValidationBits);
    622 		aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
    623 		    buf);
    624 	}
    625 
    626 	/*
    627 	 * Print the CPER section flags.
    628 	 */
    629 	if (ratelimitok) {
    630 		snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
    631 		    gede->Flags);
    632 		device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
    633 	}
    634 
    635 	/*
    636 	 * The ErrorDataLength is unlikely to be useful for the log, so
    637 	 * print it at debug level only.
    638 	 */
    639 	if (ratelimitok) {
    640 		aprint_debug_dev(sc->sc_dev, "%s:"
    641 		    " ErrorDataLength=0x%"PRIu32"\n",
    642 		    ctx, gede->ErrorDataLength);
    643 	}
    644 
    645 	/*
    646 	 * Print the FRU Id and text, if available.
    647 	 */
    648 	if (ratelimitok &&
    649 	    (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
    650 		struct uuid fruid;
    651 
    652 		apei_cper_guid_dec(gede->FruId, &fruid);
    653 		apei_format_guid(&fruid, guidstr);
    654 		device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
    655 	}
    656 	if (ratelimitok &&
    657 	    (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
    658 		device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
    659 		    ctx, gede->FruText);
    660 	}
    661 
    662 	/*
    663 	 * Print the timestamp, if available by the revision number and
    664 	 * the validation bits.
    665 	 */
    666 	if (ratelimitok &&
    667 	    gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
    668 	    gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
    669 		const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
    670 		const uint8_t s = t[0];
    671 		const uint8_t m = t[1];
    672 		const uint8_t h = t[2];
    673 		const uint8_t f = t[3];
    674 		const uint8_t D = t[4];
    675 		const uint8_t M = t[5];
    676 		const uint8_t Y = t[6];
    677 		const uint8_t C = t[7];
    678 
    679 		device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
    680 		    " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
    681 		    ctx, gede_v3->TimeStamp,
    682 		    C,Y, M, D, h,m,s,
    683 		    f & __BIT(0) ? " (event time)" : " (collect time)");
    684 	}
    685 }
    686 
    687 /*
    688  * apei_gesb_ratelimit
    689  *
    690  *	State to limit the rate of console log messages about hardware
    691  *	errors.  For each of the four severity levels in a Generic
    692  *	Error Status Block,
    693  *
    694  *	0 - Recoverable (uncorrectable),
    695  *	1 - Fatal (uncorrectable),
    696  *	2 - Corrected, and
    697  *	3 - None (including ill-formed errors),
    698  *
    699  *	we record the last time it happened, protected by a CPU simple
    700  *	lock that we only try-acquire so it is safe to use in any
    701  *	context, including non-maskable interrupt context.
    702  */
    703 
    704 static struct {
    705 	__cpu_simple_lock_t	lock;
    706 	struct timeval		lasttime;
    707 	volatile uint32_t	suppressed;
    708 } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
    709 	[ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
    710 	[ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
    711 	[ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
    712 	[ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
    713 };
    714 
    715 static void
    716 atomic_incsat_32(volatile uint32_t *p)
    717 {
    718 	uint32_t o, n;
    719 
    720 	do {
    721 		o = atomic_load_relaxed(p);
    722 		if (__predict_false(o == UINT_MAX))
    723 			return;
    724 		n = o + 1;
    725 	} while (__predict_false(atomic_cas_32(p, o, n) != o));
    726 }
    727 
    728 /*
    729  * apei_gesb_ratecheck(sc, severity, suppressed)
    730  *
    731  *	Check for a rate limit on errors of the specified severity.
    732  *
    733  *	=> Return true if the error should be printed, and format into
    734  *	   the buffer suppressed a message saying how many errors were
    735  *	   previously suppressed.
    736  *
    737  *	=> Return false if the error should be suppressed because the
    738  *	   last one printed was too recent.
    739  */
    740 static bool
    741 apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
    742     char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
    743 {
    744 	/* one of each type per minute (XXX worth making configurable?) */
    745 	const struct timeval mininterval = {60, 0};
    746 	unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
    747 	bool ok = false;
    748 
    749 	/*
    750 	 * If the lock is contended, the rate limit is probably
    751 	 * exceeded, so it's not OK to print.
    752 	 *
    753 	 * Otherwise, with the lock held, ask ratecheck(9) whether it's
    754 	 * OK to print.
    755 	 */
    756 	if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
    757 		goto out;
    758 	ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
    759 	__cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
    760 
    761 out:	/*
    762 	 * If it's OK to print, report the number of errors that were
    763 	 * suppressed.  If it's not OK to print, count a suppressed
    764 	 * error.
    765 	 */
    766 	if (ok) {
    767 		const uint32_t n =
    768 		    atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
    769 
    770 		if (n == 0) {
    771 			suppressed[0] = '\0';
    772 		} else {
    773 			snprintf(suppressed,
    774 			    sizeof(" (4294967295 or more errors suppressed)"),
    775 			    " (%u%s error%s suppressed)",
    776 			    n,
    777 			    n == UINT32_MAX ? " or more" : "",
    778 			    n == 1 ? "" : "s");
    779 		}
    780 	} else {
    781 		atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
    782 		suppressed[0] = '\0';
    783 	}
    784 	return ok;
    785 }
    786 
    787 /*
    788  * apei_gesb_report(sc, gesb, size, ctx)
    789  *
    790  *	Check a Generic Error Status Block, of at most the specified
    791  *	size in bytes, and report any errors in it.  Return the 32-bit
    792  *	Block Status in case the caller needs it to acknowledge the
    793  *	report to firmware.
    794  */
    795 uint32_t
    796 apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
    797     size_t size, const char *ctx, bool *fatalp)
    798 {
    799 	uint32_t status, unknownstatus, severity, nentries, i;
    800 	uint32_t datalen, rawdatalen;
    801 	const ACPI_HEST_GENERIC_DATA *gede0, *gede;
    802 	const unsigned char *rawdata;
    803 	bool ratelimitok = false;
    804 	char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
    805 	bool fatal = false;
    806 
    807 	/*
    808 	 * Verify the buffer is large enough for a Generic Error Status
    809 	 * Block before we try to touch anything in it.
    810 	 */
    811 	if (size < sizeof(*gesb)) {
    812 		ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
    813 		    suppressed);
    814 		if (ratelimitok) {
    815 			device_printf(sc->sc_dev,
    816 			    "%s: truncated GESB, %zu < %zu%s\n",
    817 			    ctx, size, sizeof(*gesb), suppressed);
    818 		}
    819 		status = 0;
    820 		goto out;
    821 	}
    822 	size -= sizeof(*gesb);
    823 
    824 	/*
    825 	 * Load the status.  Access ordering rules are unclear in the
    826 	 * ACPI specification; I'm guessing that load-acquire of the
    827 	 * block status is a good idea before any other access to the
    828 	 * GESB.
    829 	 */
    830 	status = atomic_load_acquire(&gesb->BlockStatus);
    831 
    832 	/*
    833 	 * If there are no status bits set, the rest of the GESB is
    834 	 * garbage, so stop here.
    835 	 */
    836 	if (status == 0) {
    837 		/* XXX dtrace */
    838 		/* XXX DPRINTF */
    839 		goto out;
    840 	}
    841 
    842 	/*
    843 	 * Read out the severity and get the number of entries in this
    844 	 * status block.
    845 	 */
    846 	severity = gesb->ErrorSeverity;
    847 	nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
    848 
    849 	/*
    850 	 * Print a message to the console and dmesg about the severity
    851 	 * of the error.
    852 	 */
    853 	ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
    854 	if (ratelimitok) {
    855 		char statusbuf[128];
    856 
    857 		/* XXX define this format somewhere */
    858 		snprintb(statusbuf, sizeof(statusbuf), "\177\020"
    859 		    "b\000"	"UE\0"
    860 		    "b\001"	"CE\0"
    861 		    "b\002"	"MULTI_UE\0"
    862 		    "b\003"	"MULTI_CE\0"
    863 		    "f\004\010"	"GEDE_COUNT\0"
    864 		    "\0", status);
    865 
    866 		if (severity < __arraycount(apei_gesb_severity)) {
    867 			device_printf(sc->sc_dev, "%s"
    868 			    " reported hardware error%s:"
    869 			    " severity=%s nentries=%u status=%s\n",
    870 			    ctx, suppressed,
    871 			    apei_gesb_severity[severity], nentries, statusbuf);
    872 		} else {
    873 			device_printf(sc->sc_dev, "%s reported error%s:"
    874 			    " severity=%"PRIu32" nentries=%u status=%s\n",
    875 			    ctx, suppressed,
    876 			    severity, nentries, statusbuf);
    877 		}
    878 	}
    879 
    880 	/*
    881 	 * Make a determination about whether the error is fatal.
    882 	 *
    883 	 * XXX Currently we don't have any mechanism to recover from
    884 	 * uncorrectable but recoverable errors, so we treat those --
    885 	 * and anything else we don't recognize -- as fatal.
    886 	 */
    887 	switch (severity) {
    888 	case ACPI_HEST_GEN_ERROR_CORRECTED:
    889 	case ACPI_HEST_GEN_ERROR_NONE:
    890 		fatal = false;
    891 		break;
    892 	case ACPI_HEST_GEN_ERROR_FATAL:
    893 	case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
    894 	default:
    895 		fatal = true;
    896 		break;
    897 	}
    898 
    899 	/*
    900 	 * Clear the bits we know about to warn if there's anything
    901 	 * left we don't understand.
    902 	 */
    903 	unknownstatus = status;
    904 	unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
    905 	unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
    906 	unknownstatus &= ~ACPI_HEST_CORRECTABLE;
    907 	unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
    908 	unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
    909 	if (ratelimitok && unknownstatus != 0) {
    910 		/* XXX dtrace */
    911 		device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
    912 		    " 0x%"PRIx32"\n", ctx, unknownstatus);
    913 	}
    914 
    915 	/*
    916 	 * Advance past the Generic Error Status Block (GESB) header to
    917 	 * the Generic Error Data Entries (GEDEs).
    918 	 */
    919 	gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
    920 
    921 	/*
    922 	 * Verify that the data length (GEDEs) fits within the size.
    923 	 * If not, truncate the GEDEs.
    924 	 */
    925 	datalen = gesb->DataLength;
    926 	if (size < datalen) {
    927 		if (ratelimitok) {
    928 			device_printf(sc->sc_dev, "%s:"
    929 			    " GESB DataLength exceeds bounds:"
    930 			    " %zu < %"PRIu32"\n",
    931 			    ctx, size, datalen);
    932 		}
    933 		datalen = size;
    934 	}
    935 	size -= datalen;
    936 
    937 	/*
    938 	 * Report each of the Generic Error Data Entries.
    939 	 */
    940 	for (i = 0; i < nentries; i++) {
    941 		size_t headerlen;
    942 		const struct apei_cper_report *report;
    943 		char subctx[128];
    944 
    945 		/*
    946 		 * Format a subcontext to show this numbered entry of
    947 		 * the GESB.
    948 		 */
    949 		snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
    950 
    951 		/*
    952 		 * If the remaining GESB data length isn't enough for a
    953 		 * GEDE header, stop here.
    954 		 */
    955 		if (datalen < sizeof(*gede)) {
    956 			if (ratelimitok) {
    957 				device_printf(sc->sc_dev, "%s:"
    958 				    " truncated GEDE: %"PRIu32" < %zu bytes\n",
    959 				    subctx, datalen, sizeof(*gede));
    960 			}
    961 			break;
    962 		}
    963 
    964 		/*
    965 		 * Print the GEDE header and get the full length (may
    966 		 * vary from revision to revision of the GEDE) and the
    967 		 * CPER report function if possible.
    968 		 */
    969 		apei_gede_report_header(sc, gede, subctx, ratelimitok,
    970 		    &headerlen, &report);
    971 
    972 		/*
    973 		 * If we don't know the header length because of an
    974 		 * unfamiliar revision, stop here.
    975 		 */
    976 		if (headerlen == 0) {
    977 			if (ratelimitok) {
    978 				device_printf(sc->sc_dev, "%s:"
    979 				    " unknown revision: 0x%"PRIx16"\n",
    980 				    subctx, gede->Revision);
    981 			}
    982 			break;
    983 		}
    984 
    985 		/*
    986 		 * Stop here if what we mapped is too small for the
    987 		 * error data length.
    988 		 */
    989 		datalen -= headerlen;
    990 		if (datalen < gede->ErrorDataLength) {
    991 			if (ratelimitok) {
    992 				device_printf(sc->sc_dev, "%s:"
    993 				    " truncated GEDE payload:"
    994 				    " %"PRIu32" < %"PRIu32" bytes\n",
    995 				    subctx, datalen, gede->ErrorDataLength);
    996 			}
    997 			break;
    998 		}
    999 
   1000 		/*
   1001 		 * Report the Common Platform Error Record appendix to
   1002 		 * this Generic Error Data Entry.
   1003 		 */
   1004 		if (report == NULL) {
   1005 			if (ratelimitok) {
   1006 				device_printf(sc->sc_dev, "%s:"
   1007 				    " [unknown type]\n", ctx);
   1008 			}
   1009 		} else {
   1010 			/* XXX pass ratelimit through */
   1011 			(*report->func)(sc, (const char *)gede + headerlen,
   1012 			    gede->ErrorDataLength, subctx, ratelimitok);
   1013 		}
   1014 
   1015 		/*
   1016 		 * Advance past the GEDE header and CPER data to the
   1017 		 * next GEDE.
   1018 		 */
   1019 		gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
   1020 		    + headerlen + gede->ErrorDataLength);
   1021 	}
   1022 
   1023 	/*
   1024 	 * Advance past the Generic Error Data Entries (GEDEs) to the
   1025 	 * raw error data.
   1026 	 *
   1027 	 * XXX Provide Max Raw Data Length as a parameter, as found in
   1028 	 * various HEST entry types.
   1029 	 */
   1030 	rawdata = (const unsigned char *)gede0 + datalen;
   1031 
   1032 	/*
   1033 	 * Verify that the raw data length fits within the size.  If
   1034 	 * not, truncate the raw data.
   1035 	 */
   1036 	rawdatalen = gesb->RawDataLength;
   1037 	if (size < rawdatalen) {
   1038 		if (ratelimitok) {
   1039 			device_printf(sc->sc_dev, "%s:"
   1040 			    " GESB RawDataLength exceeds bounds:"
   1041 			    " %zu < %"PRIu32"\n",
   1042 			    ctx, size, rawdatalen);
   1043 		}
   1044 		rawdatalen = size;
   1045 	}
   1046 	size -= rawdatalen;
   1047 
   1048 	/*
   1049 	 * Hexdump the raw data, if any.
   1050 	 */
   1051 	if (ratelimitok && rawdatalen > 0) {
   1052 		char devctx[128];
   1053 
   1054 		snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
   1055 		    device_xname(sc->sc_dev), ctx);
   1056 		hexdump(printf, devctx, rawdata, rawdatalen);
   1057 	}
   1058 
   1059 	/*
   1060 	 * If there's anything left after the raw data, warn.
   1061 	 */
   1062 	if (ratelimitok && size > 0) {
   1063 		device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
   1064 		    ctx, size);
   1065 	}
   1066 
   1067 	/*
   1068 	 * Return the status so the caller can ack it, and tell the
   1069 	 * caller whether this error is fatal.
   1070 	 */
   1071 out:	*fatalp = fatal;
   1072 	return status;
   1073 }
   1074 
   1075 MODULE(MODULE_CLASS_DRIVER, apei, NULL);
   1076 
   1077 #ifdef _MODULE
   1078 #include "ioconf.c"
   1079 #endif
   1080 
   1081 static int
   1082 apei_modcmd(modcmd_t cmd, void *opaque)
   1083 {
   1084 	int error = 0;
   1085 
   1086 	switch (cmd) {
   1087 	case MODULE_CMD_INIT:
   1088 #ifdef _MODULE
   1089 		error = config_init_component(cfdriver_ioconf_apei,
   1090 		    cfattach_ioconf_apei, cfdata_ioconf_apei);
   1091 #endif
   1092 		return error;
   1093 	case MODULE_CMD_FINI:
   1094 #ifdef _MODULE
   1095 		error = config_fini_component(cfdriver_ioconf_apei,
   1096 		    cfattach_ioconf_apei, cfdata_ioconf_apei);
   1097 #endif
   1098 		return error;
   1099 	default:
   1100 		return ENOTTY;
   1101 	}
   1102 }
   1103