apei.c revision 1.6 1 1.6 riastrad /* $NetBSD: apei.c,v 1.6 2024/10/27 12:14:07 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2024 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * Redistribution and use in source and binary forms, with or without
8 1.1 riastrad * modification, are permitted provided that the following conditions
9 1.1 riastrad * are met:
10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
11 1.1 riastrad * notice, this list of conditions and the following disclaimer.
12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
14 1.1 riastrad * documentation and/or other materials provided with the distribution.
15 1.1 riastrad *
16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
27 1.1 riastrad */
28 1.1 riastrad
29 1.1 riastrad /*
30 1.1 riastrad * APEI: ACPI Platform Error Interface
31 1.1 riastrad *
32 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33 1.1 riastrad *
34 1.1 riastrad * XXX dtrace probes
35 1.1 riastrad *
36 1.1 riastrad * XXX call _OSC appropriately to announce to the platform that we, the
37 1.1 riastrad * OSPM, support APEI
38 1.1 riastrad */
39 1.1 riastrad
40 1.1 riastrad #include <sys/cdefs.h>
41 1.6 riastrad __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.6 2024/10/27 12:14:07 riastradh Exp $");
42 1.1 riastrad
43 1.1 riastrad #include <sys/param.h>
44 1.1 riastrad #include <sys/types.h>
45 1.1 riastrad
46 1.1 riastrad #include <sys/atomic.h>
47 1.1 riastrad #include <sys/device.h>
48 1.1 riastrad #include <sys/module.h>
49 1.1 riastrad #include <sys/sysctl.h>
50 1.1 riastrad #include <sys/uuid.h>
51 1.1 riastrad
52 1.1 riastrad #include <dev/acpi/acpireg.h>
53 1.1 riastrad #include <dev/acpi/acpivar.h>
54 1.1 riastrad #include <dev/acpi/apei_bertvar.h>
55 1.1 riastrad #include <dev/acpi/apei_cper.h>
56 1.1 riastrad #include <dev/acpi/apei_einjvar.h>
57 1.1 riastrad #include <dev/acpi/apei_erstvar.h>
58 1.1 riastrad #include <dev/acpi/apei_hestvar.h>
59 1.1 riastrad #include <dev/acpi/apei_interp.h>
60 1.1 riastrad #include <dev/acpi/apeivar.h>
61 1.1 riastrad
62 1.1 riastrad #define _COMPONENT ACPI_RESOURCE_COMPONENT
63 1.1 riastrad ACPI_MODULE_NAME ("apei")
64 1.1 riastrad
65 1.1 riastrad static int apei_match(device_t, cfdata_t, void *);
66 1.1 riastrad static void apei_attach(device_t, device_t, void *);
67 1.1 riastrad static int apei_detach(device_t, int);
68 1.1 riastrad
69 1.1 riastrad static void apei_get_tables(struct apei_tab *);
70 1.1 riastrad static void apei_put_tables(struct apei_tab *);
71 1.1 riastrad
72 1.1 riastrad static void apei_identify(struct apei_softc *, const char *,
73 1.1 riastrad const ACPI_TABLE_HEADER *);
74 1.1 riastrad
75 1.1 riastrad CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
76 1.1 riastrad apei_match, apei_attach, apei_detach, NULL);
77 1.1 riastrad
78 1.1 riastrad static int
79 1.1 riastrad apei_match(device_t parent, cfdata_t match, void *aux)
80 1.1 riastrad {
81 1.1 riastrad struct apei_tab tab;
82 1.1 riastrad int prio = 0;
83 1.1 riastrad
84 1.1 riastrad /*
85 1.1 riastrad * If we have any of the APEI tables, match.
86 1.1 riastrad */
87 1.1 riastrad apei_get_tables(&tab);
88 1.1 riastrad if (tab.bert || tab.einj || tab.erst || tab.hest)
89 1.1 riastrad prio = 1;
90 1.1 riastrad apei_put_tables(&tab);
91 1.1 riastrad
92 1.1 riastrad return prio;
93 1.1 riastrad }
94 1.1 riastrad
95 1.1 riastrad static void
96 1.1 riastrad apei_attach(device_t parent, device_t self, void *aux)
97 1.1 riastrad {
98 1.1 riastrad struct apei_softc *sc = device_private(self);
99 1.1 riastrad const struct sysctlnode *sysctl_hw_acpi;
100 1.1 riastrad int error;
101 1.1 riastrad
102 1.1 riastrad aprint_naive("\n");
103 1.1 riastrad aprint_normal(": ACPI Platform Error Interface\n");
104 1.1 riastrad
105 1.1 riastrad pmf_device_register(self, NULL, NULL);
106 1.1 riastrad
107 1.1 riastrad sc->sc_dev = self;
108 1.1 riastrad apei_get_tables(&sc->sc_tab);
109 1.1 riastrad
110 1.1 riastrad /*
111 1.1 riastrad * Get the sysctl hw.acpi node. This should already be created
112 1.1 riastrad * but I don't see an easy way to get at it. If this fails,
113 1.1 riastrad * something is seriously wrong, so let's stop here.
114 1.1 riastrad */
115 1.1 riastrad error = sysctl_createv(&sc->sc_sysctllog, 0,
116 1.1 riastrad NULL, &sysctl_hw_acpi, 0,
117 1.1 riastrad CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
118 1.1 riastrad CTL_HW, CTL_CREATE, CTL_EOL);
119 1.1 riastrad if (error) {
120 1.1 riastrad aprint_error_dev(sc->sc_dev,
121 1.1 riastrad "failed to create sysctl hw.acpi: %d\n", error);
122 1.1 riastrad return;
123 1.1 riastrad }
124 1.1 riastrad
125 1.1 riastrad /*
126 1.1 riastrad * Create sysctl hw.acpi.apei.
127 1.1 riastrad */
128 1.1 riastrad error = sysctl_createv(&sc->sc_sysctllog, 0,
129 1.1 riastrad &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
130 1.1 riastrad CTLTYPE_NODE, "apei",
131 1.1 riastrad SYSCTL_DESCR("ACPI Platform Error Interface"),
132 1.1 riastrad NULL, 0, NULL, 0,
133 1.1 riastrad CTL_CREATE, CTL_EOL);
134 1.1 riastrad if (error) {
135 1.1 riastrad aprint_error_dev(sc->sc_dev,
136 1.1 riastrad "failed to create sysctl hw.acpi.apei: %d\n", error);
137 1.1 riastrad return;
138 1.1 riastrad }
139 1.1 riastrad
140 1.1 riastrad /*
141 1.1 riastrad * Set up BERT, EINJ, ERST, and HEST.
142 1.1 riastrad */
143 1.1 riastrad if (sc->sc_tab.bert) {
144 1.1 riastrad apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
145 1.1 riastrad apei_bert_attach(sc);
146 1.1 riastrad }
147 1.1 riastrad if (sc->sc_tab.einj) {
148 1.1 riastrad apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
149 1.1 riastrad apei_einj_attach(sc);
150 1.1 riastrad }
151 1.1 riastrad if (sc->sc_tab.erst) {
152 1.1 riastrad apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
153 1.1 riastrad apei_erst_attach(sc);
154 1.1 riastrad }
155 1.1 riastrad if (sc->sc_tab.hest) {
156 1.1 riastrad apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
157 1.1 riastrad apei_hest_attach(sc);
158 1.1 riastrad }
159 1.1 riastrad }
160 1.1 riastrad
161 1.1 riastrad static int
162 1.1 riastrad apei_detach(device_t self, int flags)
163 1.1 riastrad {
164 1.1 riastrad struct apei_softc *sc = device_private(self);
165 1.1 riastrad int error;
166 1.1 riastrad
167 1.1 riastrad /*
168 1.1 riastrad * Detach children. We don't currently have any but this is
169 1.1 riastrad * harmless without children and mandatory if we ever sprouted
170 1.1 riastrad * them, so let's just leave it here for good measure.
171 1.1 riastrad *
172 1.1 riastrad * After this point, we are committed to detaching; failure is
173 1.1 riastrad * forbidden.
174 1.1 riastrad */
175 1.1 riastrad error = config_detach_children(self, flags);
176 1.1 riastrad if (error)
177 1.1 riastrad return error;
178 1.1 riastrad
179 1.1 riastrad /*
180 1.1 riastrad * Tear down all the sysctl nodes first, before the software
181 1.1 riastrad * state backing them goes away.
182 1.1 riastrad */
183 1.1 riastrad sysctl_teardown(&sc->sc_sysctllog);
184 1.1 riastrad sc->sc_sysctlroot = NULL;
185 1.1 riastrad
186 1.1 riastrad /*
187 1.1 riastrad * Detach the software state for the APEI tables.
188 1.1 riastrad */
189 1.1 riastrad if (sc->sc_tab.hest)
190 1.1 riastrad apei_hest_detach(sc);
191 1.1 riastrad if (sc->sc_tab.erst)
192 1.1 riastrad apei_erst_detach(sc);
193 1.1 riastrad if (sc->sc_tab.einj)
194 1.1 riastrad apei_einj_detach(sc);
195 1.1 riastrad if (sc->sc_tab.bert)
196 1.1 riastrad apei_bert_detach(sc);
197 1.1 riastrad
198 1.1 riastrad /*
199 1.1 riastrad * Release the APEI tables and we're done.
200 1.1 riastrad */
201 1.1 riastrad apei_put_tables(&sc->sc_tab);
202 1.1 riastrad pmf_device_deregister(self);
203 1.1 riastrad return 0;
204 1.1 riastrad }
205 1.1 riastrad
206 1.1 riastrad /*
207 1.1 riastrad * apei_get_tables(tab)
208 1.1 riastrad *
209 1.1 riastrad * Get references to whichever APEI-related tables -- BERT, EINJ,
210 1.1 riastrad * ERST, HEST -- are available in the system.
211 1.1 riastrad */
212 1.1 riastrad static void
213 1.1 riastrad apei_get_tables(struct apei_tab *tab)
214 1.1 riastrad {
215 1.1 riastrad ACPI_STATUS rv;
216 1.1 riastrad
217 1.1 riastrad /*
218 1.1 riastrad * Probe the BERT -- Boot Error Record Table.
219 1.1 riastrad */
220 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
221 1.1 riastrad if (ACPI_FAILURE(rv))
222 1.1 riastrad tab->bert = NULL;
223 1.1 riastrad
224 1.1 riastrad /*
225 1.1 riastrad * Probe the EINJ -- Error Injection Table.
226 1.1 riastrad */
227 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
228 1.1 riastrad if (ACPI_FAILURE(rv))
229 1.1 riastrad tab->einj = NULL;
230 1.1 riastrad
231 1.1 riastrad /*
232 1.1 riastrad * Probe the ERST -- Error Record Serialization Table.
233 1.1 riastrad */
234 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
235 1.1 riastrad if (ACPI_FAILURE(rv))
236 1.1 riastrad tab->erst = NULL;
237 1.1 riastrad
238 1.1 riastrad /*
239 1.1 riastrad * Probe the HEST -- Hardware Error Source Table.
240 1.1 riastrad */
241 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
242 1.1 riastrad if (ACPI_FAILURE(rv))
243 1.1 riastrad tab->hest = NULL;
244 1.1 riastrad }
245 1.1 riastrad
246 1.1 riastrad /*
247 1.1 riastrad * apei_put_tables(tab)
248 1.1 riastrad *
249 1.1 riastrad * Release the tables acquired by apei_get_tables.
250 1.1 riastrad */
251 1.1 riastrad static void
252 1.1 riastrad apei_put_tables(struct apei_tab *tab)
253 1.1 riastrad {
254 1.1 riastrad
255 1.1 riastrad if (tab->bert != NULL) {
256 1.1 riastrad AcpiPutTable(&tab->bert->Header);
257 1.1 riastrad tab->bert = NULL;
258 1.1 riastrad }
259 1.1 riastrad if (tab->einj != NULL) {
260 1.1 riastrad AcpiPutTable(&tab->einj->Header);
261 1.1 riastrad tab->einj = NULL;
262 1.1 riastrad }
263 1.1 riastrad if (tab->erst != NULL) {
264 1.1 riastrad AcpiPutTable(&tab->erst->Header);
265 1.1 riastrad tab->erst = NULL;
266 1.1 riastrad }
267 1.1 riastrad if (tab->hest != NULL) {
268 1.1 riastrad AcpiPutTable(&tab->hest->Header);
269 1.1 riastrad tab->hest = NULL;
270 1.1 riastrad }
271 1.1 riastrad }
272 1.1 riastrad
273 1.1 riastrad /*
274 1.1 riastrad * apei_identify(sc, name, header)
275 1.1 riastrad *
276 1.1 riastrad * Identify the APEI-related table header for dmesg.
277 1.1 riastrad */
278 1.1 riastrad static void
279 1.1 riastrad apei_identify(struct apei_softc *sc, const char *name,
280 1.1 riastrad const ACPI_TABLE_HEADER *h)
281 1.1 riastrad {
282 1.1 riastrad
283 1.1 riastrad aprint_normal_dev(sc->sc_dev, "%s:"
284 1.1 riastrad " OemId <%6.6s,%8.8s,%08x>"
285 1.1 riastrad " AslId <%4.4s,%08x>\n",
286 1.1 riastrad name,
287 1.1 riastrad h->OemId, h->OemTableId, h->OemRevision,
288 1.1 riastrad h->AslCompilerId, h->AslCompilerRevision);
289 1.1 riastrad }
290 1.1 riastrad
291 1.1 riastrad /*
292 1.1 riastrad * apei_cper_guid_dec(buf, uuid)
293 1.1 riastrad *
294 1.1 riastrad * Decode a Common Platform Error Record UUID/GUID from an ACPI
295 1.1 riastrad * table at buf into a sys/uuid.h struct uuid.
296 1.1 riastrad */
297 1.1 riastrad static void
298 1.1 riastrad apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
299 1.1 riastrad {
300 1.1 riastrad
301 1.1 riastrad uuid_dec_le(buf, uuid);
302 1.1 riastrad }
303 1.1 riastrad
304 1.1 riastrad /*
305 1.1 riastrad * apei_format_guid(uuid, s)
306 1.1 riastrad *
307 1.1 riastrad * Format a UUID as a string. This uses C initializer notation,
308 1.3 rillig * not UUID notation, in order to match the text in the UEFI
309 1.1 riastrad * specification.
310 1.1 riastrad */
311 1.1 riastrad static void
312 1.1 riastrad apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
313 1.1 riastrad {
314 1.1 riastrad
315 1.1 riastrad snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
316 1.4 riastrad "{0x%02x,%02x,"
317 1.4 riastrad "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
318 1.1 riastrad uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
319 1.4 riastrad uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
320 1.1 riastrad uuid->node[0], uuid->node[1], uuid->node[2],
321 1.1 riastrad uuid->node[3], uuid->node[4], uuid->node[5]);
322 1.1 riastrad }
323 1.1 riastrad
324 1.1 riastrad /*
325 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
326 1.1 riastrad */
327 1.1 riastrad
328 1.1 riastrad static const char *const cper_memory_error_type[] = {
329 1.1 riastrad #define F(LN, SN, V) [LN] = #SN,
330 1.1 riastrad CPER_MEMORY_ERROR_TYPES(F)
331 1.1 riastrad #undef F
332 1.1 riastrad };
333 1.1 riastrad
334 1.1 riastrad /*
335 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
336 1.1 riastrad *
337 1.1 riastrad * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
338 1.1 riastrad * but are designated as being intended for Generic Error Data Entries
339 1.1 riastrad * rather than Generic Error Status Blocks.
340 1.1 riastrad */
341 1.1 riastrad static const char *const apei_gesb_severity[] = {
342 1.1 riastrad [0] = "recoverable",
343 1.1 riastrad [1] = "fatal",
344 1.1 riastrad [2] = "corrected",
345 1.1 riastrad [3] = "none",
346 1.1 riastrad };
347 1.1 riastrad
348 1.1 riastrad /*
349 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
350 1.1 riastrad */
351 1.1 riastrad static const char *const apei_gede_severity[] = {
352 1.1 riastrad [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
353 1.1 riastrad [ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
354 1.1 riastrad [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
355 1.1 riastrad [ACPI_HEST_GEN_ERROR_NONE] = "none",
356 1.1 riastrad };
357 1.1 riastrad
358 1.1 riastrad /*
359 1.6 riastrad * N.2.5. Memory Error Section
360 1.6 riastrad *
361 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
362 1.1 riastrad */
363 1.1 riastrad static const struct uuid CPER_MEMORY_ERROR_SECTION =
364 1.1 riastrad {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
365 1.1 riastrad
366 1.1 riastrad static void
367 1.1 riastrad apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
368 1.5 riastrad size_t len, const char *ctx, bool ratelimitok)
369 1.1 riastrad {
370 1.1 riastrad const struct cper_memory_error *ME = buf;
371 1.1 riastrad char bitbuf[1024];
372 1.1 riastrad
373 1.5 riastrad /*
374 1.5 riastrad * If we've hit the rate limit, skip printing the error.
375 1.5 riastrad */
376 1.5 riastrad if (!ratelimitok)
377 1.5 riastrad goto out;
378 1.5 riastrad
379 1.1 riastrad snprintb(bitbuf, sizeof(bitbuf),
380 1.1 riastrad CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
381 1.1 riastrad aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
382 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
383 1.1 riastrad /*
384 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
385 1.1 riastrad */
386 1.1 riastrad /* XXX define this format somewhere */
387 1.1 riastrad snprintb(bitbuf, sizeof(bitbuf), "\177\020"
388 1.1 riastrad "f\010\010" "ErrorType\0"
389 1.1 riastrad "=\001" "ERR_INTERNAL\0"
390 1.1 riastrad "=\004" "ERR_MEM\0"
391 1.1 riastrad "=\005" "ERR_TLB\0"
392 1.1 riastrad "=\006" "ERR_CACHE\0"
393 1.1 riastrad "=\007" "ERR_FUNCTION\0"
394 1.1 riastrad "=\010" "ERR_SELFTEST\0"
395 1.1 riastrad "=\011" "ERR_FLOW\0"
396 1.1 riastrad "=\020" "ERR_BUS\0"
397 1.1 riastrad "=\021" "ERR_MAP\0"
398 1.1 riastrad "=\022" "ERR_IMPROPER\0"
399 1.1 riastrad "=\023" "ERR_UNIMPL\0"
400 1.1 riastrad "=\024" "ERR_LOL\0"
401 1.1 riastrad "=\025" "ERR_RESPONSE\0"
402 1.1 riastrad "=\026" "ERR_PARITY\0"
403 1.1 riastrad "=\027" "ERR_PROTOCOL\0"
404 1.1 riastrad "=\030" "ERR_ERROR\0"
405 1.1 riastrad "=\031" "ERR_TIMEOUT\0"
406 1.1 riastrad "=\032" "ERR_POISONED\0"
407 1.1 riastrad "b\020" "AddressError\0"
408 1.1 riastrad "b\021" "ControlError\0"
409 1.1 riastrad "b\022" "DataError\0"
410 1.1 riastrad "b\023" "ResponderDetected\0"
411 1.1 riastrad "b\024" "RequesterDetected\0"
412 1.1 riastrad "b\025" "FirstError\0"
413 1.1 riastrad "b\026" "Overflow\0"
414 1.1 riastrad "\0", ME->ErrorStatus);
415 1.1 riastrad device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
416 1.1 riastrad }
417 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
418 1.1 riastrad device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
419 1.1 riastrad ctx, ME->PhysicalAddress);
420 1.1 riastrad }
421 1.1 riastrad if (ME->ValidationBits &
422 1.1 riastrad CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
423 1.1 riastrad device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
424 1.1 riastrad "\n", ctx, ME->PhysicalAddressMask);
425 1.1 riastrad }
426 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
427 1.1 riastrad device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
428 1.1 riastrad ME->Node);
429 1.1 riastrad }
430 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
431 1.1 riastrad device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
432 1.1 riastrad ME->Card);
433 1.1 riastrad }
434 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
435 1.1 riastrad device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
436 1.1 riastrad ME->Module);
437 1.1 riastrad }
438 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
439 1.1 riastrad device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
440 1.1 riastrad ME->Bank);
441 1.1 riastrad }
442 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
443 1.1 riastrad device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
444 1.1 riastrad ME->Device);
445 1.1 riastrad }
446 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
447 1.1 riastrad device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
448 1.1 riastrad ME->Row);
449 1.1 riastrad }
450 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
451 1.1 riastrad device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
452 1.1 riastrad ME->Column);
453 1.1 riastrad }
454 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
455 1.1 riastrad device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
456 1.1 riastrad ctx, ME->BitPosition);
457 1.1 riastrad }
458 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
459 1.1 riastrad device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
460 1.1 riastrad ctx, ME->RequestorId);
461 1.1 riastrad }
462 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
463 1.1 riastrad device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
464 1.1 riastrad ctx, ME->ResponderId);
465 1.1 riastrad }
466 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
467 1.1 riastrad device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
468 1.1 riastrad ctx, ME->TargetId);
469 1.1 riastrad }
470 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
471 1.1 riastrad const uint8_t t = ME->MemoryErrorType;
472 1.1 riastrad const char *n = t < __arraycount(cper_memory_error_type)
473 1.1 riastrad ? cper_memory_error_type[t] : NULL;
474 1.1 riastrad
475 1.1 riastrad if (n) {
476 1.1 riastrad device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
477 1.1 riastrad " (%s)\n", ctx, t, n);
478 1.1 riastrad } else {
479 1.1 riastrad device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
480 1.1 riastrad ctx, t);
481 1.1 riastrad }
482 1.1 riastrad }
483 1.5 riastrad
484 1.5 riastrad out: /*
485 1.5 riastrad * XXX pass this through to uvm(9) or userland for decisions
486 1.5 riastrad * like page retirement
487 1.5 riastrad */
488 1.5 riastrad return;
489 1.1 riastrad }
490 1.1 riastrad
491 1.1 riastrad /*
492 1.1 riastrad * apei_cper_reports
493 1.1 riastrad *
494 1.1 riastrad * Table of known Common Platform Error Record types, symbolic
495 1.1 riastrad * names, minimum data lengths, and functions to report them.
496 1.1 riastrad *
497 1.1 riastrad * The section types and corresponding section layouts are listed
498 1.1 riastrad * at:
499 1.1 riastrad *
500 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
501 1.1 riastrad */
502 1.1 riastrad static const struct apei_cper_report {
503 1.1 riastrad const char *name;
504 1.1 riastrad const struct uuid *type;
505 1.1 riastrad size_t minlength;
506 1.5 riastrad void (*func)(struct apei_softc *, const void *, size_t, const char *,
507 1.5 riastrad bool);
508 1.1 riastrad } apei_cper_reports[] = {
509 1.1 riastrad { "memory", &CPER_MEMORY_ERROR_SECTION,
510 1.1 riastrad sizeof(struct cper_memory_error),
511 1.1 riastrad apei_cper_memory_error_report },
512 1.1 riastrad };
513 1.1 riastrad
514 1.1 riastrad /*
515 1.5 riastrad * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
516 1.1 riastrad *
517 1.1 riastrad * Report the header of the ith Generic Error Data Entry in the
518 1.5 riastrad * given context, if ratelimitok is true.
519 1.1 riastrad *
520 1.1 riastrad * Return the actual length of the header in headerlen, or 0 if
521 1.1 riastrad * not known because the revision isn't recognized.
522 1.1 riastrad *
523 1.1 riastrad * Return the report type in report, or NULL if not known because
524 1.1 riastrad * the section type isn't recognized.
525 1.1 riastrad */
526 1.1 riastrad static void
527 1.1 riastrad apei_gede_report_header(struct apei_softc *sc,
528 1.5 riastrad const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
529 1.1 riastrad size_t *headerlenp, const struct apei_cper_report **reportp)
530 1.1 riastrad {
531 1.1 riastrad const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
532 1.1 riastrad struct uuid sectype;
533 1.1 riastrad char guidstr[69];
534 1.1 riastrad char buf[128];
535 1.1 riastrad unsigned i;
536 1.1 riastrad
537 1.1 riastrad /*
538 1.1 riastrad * Print the section type as a C initializer. It would be
539 1.1 riastrad * prettier to use standard hyphenated UUID notation, but that
540 1.1 riastrad * notation is slightly ambiguous here (two octets could be
541 1.1 riastrad * written either way, depending on Microsoft convention --
542 1.1 riastrad * which influenced ACPI and UEFI -- or internet convention),
543 1.1 riastrad * and the UEFI spec writes the C initializer notation, so this
544 1.1 riastrad * makes it easier to search for.
545 1.1 riastrad *
546 1.1 riastrad * Also print out a symbolic name, if we know it.
547 1.1 riastrad */
548 1.1 riastrad apei_cper_guid_dec(gede->SectionType, §ype);
549 1.1 riastrad apei_format_guid(§ype, guidstr);
550 1.1 riastrad for (i = 0; i < __arraycount(apei_cper_reports); i++) {
551 1.1 riastrad const struct apei_cper_report *const report =
552 1.1 riastrad &apei_cper_reports[i];
553 1.1 riastrad
554 1.1 riastrad if (memcmp(§ype, report->type, sizeof(sectype)) != 0)
555 1.1 riastrad continue;
556 1.5 riastrad if (ratelimitok) {
557 1.5 riastrad device_printf(sc->sc_dev, "%s:"
558 1.5 riastrad " SectionType=%s (%s error)\n",
559 1.5 riastrad ctx, guidstr, report->name);
560 1.5 riastrad }
561 1.1 riastrad *reportp = report;
562 1.1 riastrad break;
563 1.1 riastrad }
564 1.1 riastrad if (i == __arraycount(apei_cper_reports)) {
565 1.5 riastrad if (ratelimitok) {
566 1.5 riastrad device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
567 1.5 riastrad guidstr);
568 1.5 riastrad }
569 1.1 riastrad *reportp = NULL;
570 1.1 riastrad }
571 1.1 riastrad
572 1.1 riastrad /*
573 1.1 riastrad * Print the numeric severity and, if we have it, a symbolic
574 1.1 riastrad * name for it.
575 1.1 riastrad */
576 1.5 riastrad if (ratelimitok) {
577 1.5 riastrad device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
578 1.5 riastrad ctx,
579 1.5 riastrad gede->ErrorSeverity,
580 1.5 riastrad (gede->ErrorSeverity < __arraycount(apei_gede_severity)
581 1.5 riastrad ? apei_gede_severity[gede->ErrorSeverity]
582 1.5 riastrad : "unknown"));
583 1.5 riastrad }
584 1.1 riastrad
585 1.1 riastrad /*
586 1.1 riastrad * The Revision may not often be useful, but this is only ever
587 1.1 riastrad * shown at the time of a hardware error report, not something
588 1.1 riastrad * you can glean at your convenience with acpidump. So print
589 1.1 riastrad * it anyway.
590 1.1 riastrad */
591 1.5 riastrad if (ratelimitok) {
592 1.5 riastrad device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
593 1.5 riastrad gede->Revision);
594 1.5 riastrad }
595 1.1 riastrad
596 1.1 riastrad /*
597 1.1 riastrad * Don't touch anything past the Revision until we've
598 1.1 riastrad * determined we understand it. Return the header length to
599 1.1 riastrad * the caller, or return zero -- and stop here -- if we don't
600 1.1 riastrad * know what the actual header length is.
601 1.1 riastrad */
602 1.1 riastrad if (gede->Revision < 0x0300) {
603 1.1 riastrad *headerlenp = sizeof(*gede);
604 1.1 riastrad } else if (gede->Revision < 0x0400) {
605 1.1 riastrad *headerlenp = sizeof(*gede_v3);
606 1.1 riastrad } else {
607 1.1 riastrad *headerlenp = 0;
608 1.1 riastrad return;
609 1.1 riastrad }
610 1.1 riastrad
611 1.1 riastrad /*
612 1.1 riastrad * Print the validation bits at debug level. Only really
613 1.1 riastrad * helpful if there are bits we _don't_ know about.
614 1.1 riastrad */
615 1.5 riastrad if (ratelimitok) {
616 1.5 riastrad /* XXX define this format somewhere */
617 1.5 riastrad snprintb(buf, sizeof(buf), "\177\020"
618 1.5 riastrad "b\000" "FRU_ID\0"
619 1.5 riastrad "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */
620 1.5 riastrad "b\002" "TIMESTAMP\0"
621 1.5 riastrad "\0", gede->ValidationBits);
622 1.5 riastrad aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
623 1.5 riastrad buf);
624 1.5 riastrad }
625 1.1 riastrad
626 1.1 riastrad /*
627 1.1 riastrad * Print the CPER section flags.
628 1.1 riastrad */
629 1.5 riastrad if (ratelimitok) {
630 1.5 riastrad snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
631 1.5 riastrad gede->Flags);
632 1.5 riastrad device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
633 1.5 riastrad }
634 1.1 riastrad
635 1.1 riastrad /*
636 1.1 riastrad * The ErrorDataLength is unlikely to be useful for the log, so
637 1.1 riastrad * print it at debug level only.
638 1.1 riastrad */
639 1.5 riastrad if (ratelimitok) {
640 1.5 riastrad aprint_debug_dev(sc->sc_dev, "%s:"
641 1.5 riastrad " ErrorDataLength=0x%"PRIu32"\n",
642 1.5 riastrad ctx, gede->ErrorDataLength);
643 1.5 riastrad }
644 1.1 riastrad
645 1.1 riastrad /*
646 1.1 riastrad * Print the FRU Id and text, if available.
647 1.1 riastrad */
648 1.5 riastrad if (ratelimitok &&
649 1.5 riastrad (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
650 1.1 riastrad struct uuid fruid;
651 1.1 riastrad
652 1.1 riastrad apei_cper_guid_dec(gede->FruId, &fruid);
653 1.1 riastrad apei_format_guid(&fruid, guidstr);
654 1.1 riastrad device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
655 1.1 riastrad }
656 1.5 riastrad if (ratelimitok &&
657 1.5 riastrad (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
658 1.1 riastrad device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
659 1.1 riastrad ctx, gede->FruText);
660 1.1 riastrad }
661 1.1 riastrad
662 1.1 riastrad /*
663 1.1 riastrad * Print the timestamp, if available by the revision number and
664 1.1 riastrad * the validation bits.
665 1.1 riastrad */
666 1.5 riastrad if (ratelimitok &&
667 1.5 riastrad gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
668 1.1 riastrad gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
669 1.1 riastrad const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
670 1.1 riastrad const uint8_t s = t[0];
671 1.1 riastrad const uint8_t m = t[1];
672 1.1 riastrad const uint8_t h = t[2];
673 1.1 riastrad const uint8_t f = t[3];
674 1.1 riastrad const uint8_t D = t[4];
675 1.1 riastrad const uint8_t M = t[5];
676 1.1 riastrad const uint8_t Y = t[6];
677 1.1 riastrad const uint8_t C = t[7];
678 1.1 riastrad
679 1.1 riastrad device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
680 1.1 riastrad " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
681 1.1 riastrad ctx, gede_v3->TimeStamp,
682 1.1 riastrad C,Y, M, D, h,m,s,
683 1.1 riastrad f & __BIT(0) ? " (event time)" : " (collect time)");
684 1.1 riastrad }
685 1.1 riastrad }
686 1.1 riastrad
687 1.1 riastrad /*
688 1.5 riastrad * apei_gesb_ratelimit
689 1.5 riastrad *
690 1.5 riastrad * State to limit the rate of console log messages about hardware
691 1.5 riastrad * errors. For each of the four severity levels in a Generic
692 1.5 riastrad * Error Status Block,
693 1.5 riastrad *
694 1.5 riastrad * 0 - Recoverable (uncorrectable),
695 1.5 riastrad * 1 - Fatal (uncorrectable),
696 1.5 riastrad * 2 - Corrected, and
697 1.5 riastrad * 3 - None (including ill-formed errors),
698 1.5 riastrad *
699 1.5 riastrad * we record the last time it happened, protected by a CPU simple
700 1.5 riastrad * lock that we only try-acquire so it is safe to use in any
701 1.5 riastrad * context, including non-maskable interrupt context.
702 1.5 riastrad */
703 1.5 riastrad
704 1.5 riastrad static struct {
705 1.5 riastrad __cpu_simple_lock_t lock;
706 1.5 riastrad struct timeval lasttime;
707 1.5 riastrad volatile uint32_t suppressed;
708 1.5 riastrad } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
709 1.5 riastrad [ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
710 1.5 riastrad [ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
711 1.5 riastrad [ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
712 1.5 riastrad [ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
713 1.5 riastrad };
714 1.5 riastrad
715 1.5 riastrad static void
716 1.5 riastrad atomic_incsat_32(volatile uint32_t *p)
717 1.5 riastrad {
718 1.5 riastrad uint32_t o, n;
719 1.5 riastrad
720 1.5 riastrad do {
721 1.5 riastrad o = atomic_load_relaxed(p);
722 1.5 riastrad if (__predict_false(o == UINT_MAX))
723 1.5 riastrad return;
724 1.5 riastrad n = o + 1;
725 1.5 riastrad } while (__predict_false(atomic_cas_32(p, o, n) != o));
726 1.5 riastrad }
727 1.5 riastrad
728 1.5 riastrad /*
729 1.5 riastrad * apei_gesb_ratecheck(sc, severity, suppressed)
730 1.5 riastrad *
731 1.5 riastrad * Check for a rate limit on errors of the specified severity.
732 1.5 riastrad *
733 1.5 riastrad * => Return true if the error should be printed, and format into
734 1.5 riastrad * the buffer suppressed a message saying how many errors were
735 1.5 riastrad * previously suppressed.
736 1.5 riastrad *
737 1.5 riastrad * => Return false if the error should be suppressed because the
738 1.5 riastrad * last one printed was too recent.
739 1.5 riastrad */
740 1.5 riastrad static bool
741 1.5 riastrad apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
742 1.5 riastrad char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
743 1.5 riastrad {
744 1.5 riastrad /* one of each type per minute (XXX worth making configurable?) */
745 1.5 riastrad const struct timeval mininterval = {60, 0};
746 1.5 riastrad unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
747 1.5 riastrad bool ok = false;
748 1.5 riastrad
749 1.5 riastrad /*
750 1.5 riastrad * If the lock is contended, the rate limit is probably
751 1.5 riastrad * exceeded, so it's not OK to print.
752 1.5 riastrad *
753 1.5 riastrad * Otherwise, with the lock held, ask ratecheck(9) whether it's
754 1.5 riastrad * OK to print.
755 1.5 riastrad */
756 1.5 riastrad if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
757 1.5 riastrad goto out;
758 1.5 riastrad ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
759 1.5 riastrad __cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
760 1.5 riastrad
761 1.5 riastrad out: /*
762 1.5 riastrad * If it's OK to print, report the number of errors that were
763 1.5 riastrad * suppressed. If it's not OK to print, count a suppressed
764 1.5 riastrad * error.
765 1.5 riastrad */
766 1.5 riastrad if (ok) {
767 1.5 riastrad const uint32_t n =
768 1.5 riastrad atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
769 1.5 riastrad
770 1.5 riastrad if (n == 0) {
771 1.5 riastrad suppressed[0] = '\0';
772 1.5 riastrad } else {
773 1.5 riastrad snprintf(suppressed,
774 1.5 riastrad sizeof(" (4294967295 or more errors suppressed)"),
775 1.5 riastrad " (%u%s error%s suppressed)",
776 1.5 riastrad n,
777 1.5 riastrad n == UINT32_MAX ? " or more" : "",
778 1.5 riastrad n == 1 ? "" : "s");
779 1.5 riastrad }
780 1.5 riastrad } else {
781 1.5 riastrad atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
782 1.5 riastrad suppressed[0] = '\0';
783 1.5 riastrad }
784 1.5 riastrad return ok;
785 1.5 riastrad }
786 1.5 riastrad
787 1.5 riastrad /*
788 1.1 riastrad * apei_gesb_report(sc, gesb, size, ctx)
789 1.1 riastrad *
790 1.1 riastrad * Check a Generic Error Status Block, of at most the specified
791 1.1 riastrad * size in bytes, and report any errors in it. Return the 32-bit
792 1.1 riastrad * Block Status in case the caller needs it to acknowledge the
793 1.1 riastrad * report to firmware.
794 1.1 riastrad */
795 1.1 riastrad uint32_t
796 1.1 riastrad apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
797 1.1 riastrad size_t size, const char *ctx, bool *fatalp)
798 1.1 riastrad {
799 1.1 riastrad uint32_t status, unknownstatus, severity, nentries, i;
800 1.1 riastrad uint32_t datalen, rawdatalen;
801 1.1 riastrad const ACPI_HEST_GENERIC_DATA *gede0, *gede;
802 1.1 riastrad const unsigned char *rawdata;
803 1.5 riastrad bool ratelimitok = false;
804 1.5 riastrad char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
805 1.1 riastrad bool fatal = false;
806 1.1 riastrad
807 1.1 riastrad /*
808 1.1 riastrad * Verify the buffer is large enough for a Generic Error Status
809 1.1 riastrad * Block before we try to touch anything in it.
810 1.1 riastrad */
811 1.1 riastrad if (size < sizeof(*gesb)) {
812 1.5 riastrad ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
813 1.5 riastrad suppressed);
814 1.5 riastrad if (ratelimitok) {
815 1.5 riastrad device_printf(sc->sc_dev,
816 1.5 riastrad "%s: truncated GESB, %zu < %zu%s\n",
817 1.5 riastrad ctx, size, sizeof(*gesb), suppressed);
818 1.5 riastrad }
819 1.2 riastrad status = 0;
820 1.2 riastrad goto out;
821 1.1 riastrad }
822 1.1 riastrad size -= sizeof(*gesb);
823 1.1 riastrad
824 1.1 riastrad /*
825 1.1 riastrad * Load the status. Access ordering rules are unclear in the
826 1.1 riastrad * ACPI specification; I'm guessing that load-acquire of the
827 1.1 riastrad * block status is a good idea before any other access to the
828 1.1 riastrad * GESB.
829 1.1 riastrad */
830 1.1 riastrad status = atomic_load_acquire(&gesb->BlockStatus);
831 1.1 riastrad
832 1.1 riastrad /*
833 1.1 riastrad * If there are no status bits set, the rest of the GESB is
834 1.1 riastrad * garbage, so stop here.
835 1.1 riastrad */
836 1.1 riastrad if (status == 0) {
837 1.1 riastrad /* XXX dtrace */
838 1.1 riastrad /* XXX DPRINTF */
839 1.1 riastrad goto out;
840 1.1 riastrad }
841 1.1 riastrad
842 1.5 riastrad /*
843 1.5 riastrad * Read out the severity and get the number of entries in this
844 1.5 riastrad * status block.
845 1.5 riastrad */
846 1.5 riastrad severity = gesb->ErrorSeverity;
847 1.5 riastrad nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
848 1.1 riastrad
849 1.1 riastrad /*
850 1.1 riastrad * Print a message to the console and dmesg about the severity
851 1.1 riastrad * of the error.
852 1.1 riastrad */
853 1.5 riastrad ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
854 1.5 riastrad if (ratelimitok) {
855 1.5 riastrad char statusbuf[128];
856 1.5 riastrad
857 1.5 riastrad /* XXX define this format somewhere */
858 1.5 riastrad snprintb(statusbuf, sizeof(statusbuf), "\177\020"
859 1.5 riastrad "b\000" "UE\0"
860 1.5 riastrad "b\001" "CE\0"
861 1.5 riastrad "b\002" "MULTI_UE\0"
862 1.5 riastrad "b\003" "MULTI_CE\0"
863 1.5 riastrad "f\004\010" "GEDE_COUNT\0"
864 1.5 riastrad "\0", status);
865 1.5 riastrad
866 1.5 riastrad if (severity < __arraycount(apei_gesb_severity)) {
867 1.5 riastrad device_printf(sc->sc_dev, "%s"
868 1.5 riastrad " reported hardware error%s:"
869 1.5 riastrad " severity=%s nentries=%u status=%s\n",
870 1.5 riastrad ctx, suppressed,
871 1.5 riastrad apei_gesb_severity[severity], nentries, statusbuf);
872 1.5 riastrad } else {
873 1.5 riastrad device_printf(sc->sc_dev, "%s reported error%s:"
874 1.5 riastrad " severity=%"PRIu32" nentries=%u status=%s\n",
875 1.5 riastrad ctx, suppressed,
876 1.5 riastrad severity, nentries, statusbuf);
877 1.5 riastrad }
878 1.1 riastrad }
879 1.1 riastrad
880 1.1 riastrad /*
881 1.1 riastrad * Make a determination about whether the error is fatal.
882 1.1 riastrad *
883 1.1 riastrad * XXX Currently we don't have any mechanism to recover from
884 1.1 riastrad * uncorrectable but recoverable errors, so we treat those --
885 1.1 riastrad * and anything else we don't recognize -- as fatal.
886 1.1 riastrad */
887 1.1 riastrad switch (severity) {
888 1.1 riastrad case ACPI_HEST_GEN_ERROR_CORRECTED:
889 1.1 riastrad case ACPI_HEST_GEN_ERROR_NONE:
890 1.1 riastrad fatal = false;
891 1.1 riastrad break;
892 1.1 riastrad case ACPI_HEST_GEN_ERROR_FATAL:
893 1.1 riastrad case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
894 1.1 riastrad default:
895 1.1 riastrad fatal = true;
896 1.1 riastrad break;
897 1.1 riastrad }
898 1.1 riastrad
899 1.1 riastrad /*
900 1.1 riastrad * Clear the bits we know about to warn if there's anything
901 1.1 riastrad * left we don't understand.
902 1.1 riastrad */
903 1.1 riastrad unknownstatus = status;
904 1.1 riastrad unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
905 1.1 riastrad unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
906 1.1 riastrad unknownstatus &= ~ACPI_HEST_CORRECTABLE;
907 1.1 riastrad unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
908 1.1 riastrad unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
909 1.5 riastrad if (ratelimitok && unknownstatus != 0) {
910 1.1 riastrad /* XXX dtrace */
911 1.1 riastrad device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
912 1.1 riastrad " 0x%"PRIx32"\n", ctx, unknownstatus);
913 1.1 riastrad }
914 1.1 riastrad
915 1.1 riastrad /*
916 1.1 riastrad * Advance past the Generic Error Status Block (GESB) header to
917 1.1 riastrad * the Generic Error Data Entries (GEDEs).
918 1.1 riastrad */
919 1.1 riastrad gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
920 1.1 riastrad
921 1.1 riastrad /*
922 1.1 riastrad * Verify that the data length (GEDEs) fits within the size.
923 1.1 riastrad * If not, truncate the GEDEs.
924 1.1 riastrad */
925 1.1 riastrad datalen = gesb->DataLength;
926 1.1 riastrad if (size < datalen) {
927 1.5 riastrad if (ratelimitok) {
928 1.5 riastrad device_printf(sc->sc_dev, "%s:"
929 1.5 riastrad " GESB DataLength exceeds bounds:"
930 1.5 riastrad " %zu < %"PRIu32"\n",
931 1.5 riastrad ctx, size, datalen);
932 1.5 riastrad }
933 1.1 riastrad datalen = size;
934 1.1 riastrad }
935 1.1 riastrad size -= datalen;
936 1.1 riastrad
937 1.1 riastrad /*
938 1.1 riastrad * Report each of the Generic Error Data Entries.
939 1.1 riastrad */
940 1.1 riastrad for (i = 0; i < nentries; i++) {
941 1.1 riastrad size_t headerlen;
942 1.1 riastrad const struct apei_cper_report *report;
943 1.1 riastrad char subctx[128];
944 1.1 riastrad
945 1.1 riastrad /*
946 1.1 riastrad * Format a subcontext to show this numbered entry of
947 1.1 riastrad * the GESB.
948 1.1 riastrad */
949 1.1 riastrad snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
950 1.1 riastrad
951 1.1 riastrad /*
952 1.1 riastrad * If the remaining GESB data length isn't enough for a
953 1.1 riastrad * GEDE header, stop here.
954 1.1 riastrad */
955 1.1 riastrad if (datalen < sizeof(*gede)) {
956 1.5 riastrad if (ratelimitok) {
957 1.5 riastrad device_printf(sc->sc_dev, "%s:"
958 1.5 riastrad " truncated GEDE: %"PRIu32" < %zu bytes\n",
959 1.5 riastrad subctx, datalen, sizeof(*gede));
960 1.5 riastrad }
961 1.1 riastrad break;
962 1.1 riastrad }
963 1.1 riastrad
964 1.1 riastrad /*
965 1.1 riastrad * Print the GEDE header and get the full length (may
966 1.1 riastrad * vary from revision to revision of the GEDE) and the
967 1.1 riastrad * CPER report function if possible.
968 1.1 riastrad */
969 1.5 riastrad apei_gede_report_header(sc, gede, subctx, ratelimitok,
970 1.1 riastrad &headerlen, &report);
971 1.1 riastrad
972 1.1 riastrad /*
973 1.1 riastrad * If we don't know the header length because of an
974 1.1 riastrad * unfamiliar revision, stop here.
975 1.1 riastrad */
976 1.1 riastrad if (headerlen == 0) {
977 1.5 riastrad if (ratelimitok) {
978 1.5 riastrad device_printf(sc->sc_dev, "%s:"
979 1.5 riastrad " unknown revision: 0x%"PRIx16"\n",
980 1.5 riastrad subctx, gede->Revision);
981 1.5 riastrad }
982 1.1 riastrad break;
983 1.1 riastrad }
984 1.1 riastrad
985 1.1 riastrad /*
986 1.1 riastrad * Stop here if what we mapped is too small for the
987 1.1 riastrad * error data length.
988 1.1 riastrad */
989 1.1 riastrad datalen -= headerlen;
990 1.1 riastrad if (datalen < gede->ErrorDataLength) {
991 1.5 riastrad if (ratelimitok) {
992 1.5 riastrad device_printf(sc->sc_dev, "%s:"
993 1.5 riastrad " truncated GEDE payload:"
994 1.5 riastrad " %"PRIu32" < %"PRIu32" bytes\n",
995 1.5 riastrad subctx, datalen, gede->ErrorDataLength);
996 1.5 riastrad }
997 1.1 riastrad break;
998 1.1 riastrad }
999 1.1 riastrad
1000 1.1 riastrad /*
1001 1.1 riastrad * Report the Common Platform Error Record appendix to
1002 1.1 riastrad * this Generic Error Data Entry.
1003 1.1 riastrad */
1004 1.1 riastrad if (report == NULL) {
1005 1.5 riastrad if (ratelimitok) {
1006 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1007 1.5 riastrad " [unknown type]\n", ctx);
1008 1.5 riastrad }
1009 1.1 riastrad } else {
1010 1.5 riastrad /* XXX pass ratelimit through */
1011 1.1 riastrad (*report->func)(sc, (const char *)gede + headerlen,
1012 1.5 riastrad gede->ErrorDataLength, subctx, ratelimitok);
1013 1.1 riastrad }
1014 1.1 riastrad
1015 1.1 riastrad /*
1016 1.1 riastrad * Advance past the GEDE header and CPER data to the
1017 1.1 riastrad * next GEDE.
1018 1.1 riastrad */
1019 1.1 riastrad gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
1020 1.1 riastrad + headerlen + gede->ErrorDataLength);
1021 1.1 riastrad }
1022 1.1 riastrad
1023 1.1 riastrad /*
1024 1.1 riastrad * Advance past the Generic Error Data Entries (GEDEs) to the
1025 1.1 riastrad * raw error data.
1026 1.1 riastrad *
1027 1.1 riastrad * XXX Provide Max Raw Data Length as a parameter, as found in
1028 1.1 riastrad * various HEST entry types.
1029 1.1 riastrad */
1030 1.1 riastrad rawdata = (const unsigned char *)gede0 + datalen;
1031 1.1 riastrad
1032 1.1 riastrad /*
1033 1.1 riastrad * Verify that the raw data length fits within the size. If
1034 1.1 riastrad * not, truncate the raw data.
1035 1.1 riastrad */
1036 1.1 riastrad rawdatalen = gesb->RawDataLength;
1037 1.1 riastrad if (size < rawdatalen) {
1038 1.5 riastrad if (ratelimitok) {
1039 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1040 1.5 riastrad " GESB RawDataLength exceeds bounds:"
1041 1.5 riastrad " %zu < %"PRIu32"\n",
1042 1.5 riastrad ctx, size, rawdatalen);
1043 1.5 riastrad }
1044 1.1 riastrad rawdatalen = size;
1045 1.1 riastrad }
1046 1.1 riastrad size -= rawdatalen;
1047 1.1 riastrad
1048 1.1 riastrad /*
1049 1.1 riastrad * Hexdump the raw data, if any.
1050 1.1 riastrad */
1051 1.5 riastrad if (ratelimitok && rawdatalen > 0) {
1052 1.1 riastrad char devctx[128];
1053 1.1 riastrad
1054 1.1 riastrad snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
1055 1.1 riastrad device_xname(sc->sc_dev), ctx);
1056 1.1 riastrad hexdump(printf, devctx, rawdata, rawdatalen);
1057 1.1 riastrad }
1058 1.1 riastrad
1059 1.1 riastrad /*
1060 1.1 riastrad * If there's anything left after the raw data, warn.
1061 1.1 riastrad */
1062 1.5 riastrad if (ratelimitok && size > 0) {
1063 1.1 riastrad device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
1064 1.1 riastrad ctx, size);
1065 1.1 riastrad }
1066 1.1 riastrad
1067 1.1 riastrad /*
1068 1.1 riastrad * Return the status so the caller can ack it, and tell the
1069 1.1 riastrad * caller whether this error is fatal.
1070 1.1 riastrad */
1071 1.1 riastrad out: *fatalp = fatal;
1072 1.1 riastrad return status;
1073 1.1 riastrad }
1074 1.1 riastrad
1075 1.1 riastrad MODULE(MODULE_CLASS_DRIVER, apei, NULL);
1076 1.1 riastrad
1077 1.1 riastrad #ifdef _MODULE
1078 1.1 riastrad #include "ioconf.c"
1079 1.1 riastrad #endif
1080 1.1 riastrad
1081 1.1 riastrad static int
1082 1.1 riastrad apei_modcmd(modcmd_t cmd, void *opaque)
1083 1.1 riastrad {
1084 1.1 riastrad int error = 0;
1085 1.1 riastrad
1086 1.1 riastrad switch (cmd) {
1087 1.1 riastrad case MODULE_CMD_INIT:
1088 1.1 riastrad #ifdef _MODULE
1089 1.1 riastrad error = config_init_component(cfdriver_ioconf_apei,
1090 1.1 riastrad cfattach_ioconf_apei, cfdata_ioconf_apei);
1091 1.1 riastrad #endif
1092 1.1 riastrad return error;
1093 1.1 riastrad case MODULE_CMD_FINI:
1094 1.1 riastrad #ifdef _MODULE
1095 1.1 riastrad error = config_fini_component(cfdriver_ioconf_apei,
1096 1.1 riastrad cfattach_ioconf_apei, cfdata_ioconf_apei);
1097 1.1 riastrad #endif
1098 1.1 riastrad return error;
1099 1.1 riastrad default:
1100 1.1 riastrad return ENOTTY;
1101 1.1 riastrad }
1102 1.1 riastrad }
1103