apei.c revision 1.5 1 1.5 riastrad /* $NetBSD: apei.c,v 1.5 2024/10/27 12:13:42 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2024 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * Redistribution and use in source and binary forms, with or without
8 1.1 riastrad * modification, are permitted provided that the following conditions
9 1.1 riastrad * are met:
10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
11 1.1 riastrad * notice, this list of conditions and the following disclaimer.
12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
14 1.1 riastrad * documentation and/or other materials provided with the distribution.
15 1.1 riastrad *
16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
27 1.1 riastrad */
28 1.1 riastrad
29 1.1 riastrad /*
30 1.1 riastrad * APEI: ACPI Platform Error Interface
31 1.1 riastrad *
32 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33 1.1 riastrad *
34 1.1 riastrad * XXX dtrace probes
35 1.1 riastrad *
36 1.1 riastrad * XXX call _OSC appropriately to announce to the platform that we, the
37 1.1 riastrad * OSPM, support APEI
38 1.1 riastrad */
39 1.1 riastrad
40 1.1 riastrad #include <sys/cdefs.h>
41 1.5 riastrad __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.5 2024/10/27 12:13:42 riastradh Exp $");
42 1.1 riastrad
43 1.1 riastrad #include <sys/param.h>
44 1.1 riastrad #include <sys/types.h>
45 1.1 riastrad
46 1.1 riastrad #include <sys/atomic.h>
47 1.1 riastrad #include <sys/device.h>
48 1.1 riastrad #include <sys/module.h>
49 1.1 riastrad #include <sys/sysctl.h>
50 1.1 riastrad #include <sys/uuid.h>
51 1.1 riastrad
52 1.1 riastrad #include <dev/acpi/acpireg.h>
53 1.1 riastrad #include <dev/acpi/acpivar.h>
54 1.1 riastrad #include <dev/acpi/apei_bertvar.h>
55 1.1 riastrad #include <dev/acpi/apei_cper.h>
56 1.1 riastrad #include <dev/acpi/apei_einjvar.h>
57 1.1 riastrad #include <dev/acpi/apei_erstvar.h>
58 1.1 riastrad #include <dev/acpi/apei_hestvar.h>
59 1.1 riastrad #include <dev/acpi/apei_interp.h>
60 1.1 riastrad #include <dev/acpi/apeivar.h>
61 1.1 riastrad
62 1.1 riastrad #define _COMPONENT ACPI_RESOURCE_COMPONENT
63 1.1 riastrad ACPI_MODULE_NAME ("apei")
64 1.1 riastrad
65 1.1 riastrad static int apei_match(device_t, cfdata_t, void *);
66 1.1 riastrad static void apei_attach(device_t, device_t, void *);
67 1.1 riastrad static int apei_detach(device_t, int);
68 1.1 riastrad
69 1.1 riastrad static void apei_get_tables(struct apei_tab *);
70 1.1 riastrad static void apei_put_tables(struct apei_tab *);
71 1.1 riastrad
72 1.1 riastrad static void apei_identify(struct apei_softc *, const char *,
73 1.1 riastrad const ACPI_TABLE_HEADER *);
74 1.1 riastrad
75 1.1 riastrad CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
76 1.1 riastrad apei_match, apei_attach, apei_detach, NULL);
77 1.1 riastrad
78 1.1 riastrad static int
79 1.1 riastrad apei_match(device_t parent, cfdata_t match, void *aux)
80 1.1 riastrad {
81 1.1 riastrad struct apei_tab tab;
82 1.1 riastrad int prio = 0;
83 1.1 riastrad
84 1.1 riastrad /*
85 1.1 riastrad * If we have any of the APEI tables, match.
86 1.1 riastrad */
87 1.1 riastrad apei_get_tables(&tab);
88 1.1 riastrad if (tab.bert || tab.einj || tab.erst || tab.hest)
89 1.1 riastrad prio = 1;
90 1.1 riastrad apei_put_tables(&tab);
91 1.1 riastrad
92 1.1 riastrad return prio;
93 1.1 riastrad }
94 1.1 riastrad
95 1.1 riastrad static void
96 1.1 riastrad apei_attach(device_t parent, device_t self, void *aux)
97 1.1 riastrad {
98 1.1 riastrad struct apei_softc *sc = device_private(self);
99 1.1 riastrad const struct sysctlnode *sysctl_hw_acpi;
100 1.1 riastrad int error;
101 1.1 riastrad
102 1.1 riastrad aprint_naive("\n");
103 1.1 riastrad aprint_normal(": ACPI Platform Error Interface\n");
104 1.1 riastrad
105 1.1 riastrad pmf_device_register(self, NULL, NULL);
106 1.1 riastrad
107 1.1 riastrad sc->sc_dev = self;
108 1.1 riastrad apei_get_tables(&sc->sc_tab);
109 1.1 riastrad
110 1.1 riastrad /*
111 1.1 riastrad * Get the sysctl hw.acpi node. This should already be created
112 1.1 riastrad * but I don't see an easy way to get at it. If this fails,
113 1.1 riastrad * something is seriously wrong, so let's stop here.
114 1.1 riastrad */
115 1.1 riastrad error = sysctl_createv(&sc->sc_sysctllog, 0,
116 1.1 riastrad NULL, &sysctl_hw_acpi, 0,
117 1.1 riastrad CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
118 1.1 riastrad CTL_HW, CTL_CREATE, CTL_EOL);
119 1.1 riastrad if (error) {
120 1.1 riastrad aprint_error_dev(sc->sc_dev,
121 1.1 riastrad "failed to create sysctl hw.acpi: %d\n", error);
122 1.1 riastrad return;
123 1.1 riastrad }
124 1.1 riastrad
125 1.1 riastrad /*
126 1.1 riastrad * Create sysctl hw.acpi.apei.
127 1.1 riastrad */
128 1.1 riastrad error = sysctl_createv(&sc->sc_sysctllog, 0,
129 1.1 riastrad &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
130 1.1 riastrad CTLTYPE_NODE, "apei",
131 1.1 riastrad SYSCTL_DESCR("ACPI Platform Error Interface"),
132 1.1 riastrad NULL, 0, NULL, 0,
133 1.1 riastrad CTL_CREATE, CTL_EOL);
134 1.1 riastrad if (error) {
135 1.1 riastrad aprint_error_dev(sc->sc_dev,
136 1.1 riastrad "failed to create sysctl hw.acpi.apei: %d\n", error);
137 1.1 riastrad return;
138 1.1 riastrad }
139 1.1 riastrad
140 1.1 riastrad /*
141 1.1 riastrad * Set up BERT, EINJ, ERST, and HEST.
142 1.1 riastrad */
143 1.1 riastrad if (sc->sc_tab.bert) {
144 1.1 riastrad apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
145 1.1 riastrad apei_bert_attach(sc);
146 1.1 riastrad }
147 1.1 riastrad if (sc->sc_tab.einj) {
148 1.1 riastrad apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
149 1.1 riastrad apei_einj_attach(sc);
150 1.1 riastrad }
151 1.1 riastrad if (sc->sc_tab.erst) {
152 1.1 riastrad apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
153 1.1 riastrad apei_erst_attach(sc);
154 1.1 riastrad }
155 1.1 riastrad if (sc->sc_tab.hest) {
156 1.1 riastrad apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
157 1.1 riastrad apei_hest_attach(sc);
158 1.1 riastrad }
159 1.1 riastrad }
160 1.1 riastrad
161 1.1 riastrad static int
162 1.1 riastrad apei_detach(device_t self, int flags)
163 1.1 riastrad {
164 1.1 riastrad struct apei_softc *sc = device_private(self);
165 1.1 riastrad int error;
166 1.1 riastrad
167 1.1 riastrad /*
168 1.1 riastrad * Detach children. We don't currently have any but this is
169 1.1 riastrad * harmless without children and mandatory if we ever sprouted
170 1.1 riastrad * them, so let's just leave it here for good measure.
171 1.1 riastrad *
172 1.1 riastrad * After this point, we are committed to detaching; failure is
173 1.1 riastrad * forbidden.
174 1.1 riastrad */
175 1.1 riastrad error = config_detach_children(self, flags);
176 1.1 riastrad if (error)
177 1.1 riastrad return error;
178 1.1 riastrad
179 1.1 riastrad /*
180 1.1 riastrad * Tear down all the sysctl nodes first, before the software
181 1.1 riastrad * state backing them goes away.
182 1.1 riastrad */
183 1.1 riastrad sysctl_teardown(&sc->sc_sysctllog);
184 1.1 riastrad sc->sc_sysctlroot = NULL;
185 1.1 riastrad
186 1.1 riastrad /*
187 1.1 riastrad * Detach the software state for the APEI tables.
188 1.1 riastrad */
189 1.1 riastrad if (sc->sc_tab.hest)
190 1.1 riastrad apei_hest_detach(sc);
191 1.1 riastrad if (sc->sc_tab.erst)
192 1.1 riastrad apei_erst_detach(sc);
193 1.1 riastrad if (sc->sc_tab.einj)
194 1.1 riastrad apei_einj_detach(sc);
195 1.1 riastrad if (sc->sc_tab.bert)
196 1.1 riastrad apei_bert_detach(sc);
197 1.1 riastrad
198 1.1 riastrad /*
199 1.1 riastrad * Release the APEI tables and we're done.
200 1.1 riastrad */
201 1.1 riastrad apei_put_tables(&sc->sc_tab);
202 1.1 riastrad pmf_device_deregister(self);
203 1.1 riastrad return 0;
204 1.1 riastrad }
205 1.1 riastrad
206 1.1 riastrad /*
207 1.1 riastrad * apei_get_tables(tab)
208 1.1 riastrad *
209 1.1 riastrad * Get references to whichever APEI-related tables -- BERT, EINJ,
210 1.1 riastrad * ERST, HEST -- are available in the system.
211 1.1 riastrad */
212 1.1 riastrad static void
213 1.1 riastrad apei_get_tables(struct apei_tab *tab)
214 1.1 riastrad {
215 1.1 riastrad ACPI_STATUS rv;
216 1.1 riastrad
217 1.1 riastrad /*
218 1.1 riastrad * Probe the BERT -- Boot Error Record Table.
219 1.1 riastrad */
220 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
221 1.1 riastrad if (ACPI_FAILURE(rv))
222 1.1 riastrad tab->bert = NULL;
223 1.1 riastrad
224 1.1 riastrad /*
225 1.1 riastrad * Probe the EINJ -- Error Injection Table.
226 1.1 riastrad */
227 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
228 1.1 riastrad if (ACPI_FAILURE(rv))
229 1.1 riastrad tab->einj = NULL;
230 1.1 riastrad
231 1.1 riastrad /*
232 1.1 riastrad * Probe the ERST -- Error Record Serialization Table.
233 1.1 riastrad */
234 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
235 1.1 riastrad if (ACPI_FAILURE(rv))
236 1.1 riastrad tab->erst = NULL;
237 1.1 riastrad
238 1.1 riastrad /*
239 1.1 riastrad * Probe the HEST -- Hardware Error Source Table.
240 1.1 riastrad */
241 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
242 1.1 riastrad if (ACPI_FAILURE(rv))
243 1.1 riastrad tab->hest = NULL;
244 1.1 riastrad }
245 1.1 riastrad
246 1.1 riastrad /*
247 1.1 riastrad * apei_put_tables(tab)
248 1.1 riastrad *
249 1.1 riastrad * Release the tables acquired by apei_get_tables.
250 1.1 riastrad */
251 1.1 riastrad static void
252 1.1 riastrad apei_put_tables(struct apei_tab *tab)
253 1.1 riastrad {
254 1.1 riastrad
255 1.1 riastrad if (tab->bert != NULL) {
256 1.1 riastrad AcpiPutTable(&tab->bert->Header);
257 1.1 riastrad tab->bert = NULL;
258 1.1 riastrad }
259 1.1 riastrad if (tab->einj != NULL) {
260 1.1 riastrad AcpiPutTable(&tab->einj->Header);
261 1.1 riastrad tab->einj = NULL;
262 1.1 riastrad }
263 1.1 riastrad if (tab->erst != NULL) {
264 1.1 riastrad AcpiPutTable(&tab->erst->Header);
265 1.1 riastrad tab->erst = NULL;
266 1.1 riastrad }
267 1.1 riastrad if (tab->hest != NULL) {
268 1.1 riastrad AcpiPutTable(&tab->hest->Header);
269 1.1 riastrad tab->hest = NULL;
270 1.1 riastrad }
271 1.1 riastrad }
272 1.1 riastrad
273 1.1 riastrad /*
274 1.1 riastrad * apei_identify(sc, name, header)
275 1.1 riastrad *
276 1.1 riastrad * Identify the APEI-related table header for dmesg.
277 1.1 riastrad */
278 1.1 riastrad static void
279 1.1 riastrad apei_identify(struct apei_softc *sc, const char *name,
280 1.1 riastrad const ACPI_TABLE_HEADER *h)
281 1.1 riastrad {
282 1.1 riastrad
283 1.1 riastrad aprint_normal_dev(sc->sc_dev, "%s:"
284 1.1 riastrad " OemId <%6.6s,%8.8s,%08x>"
285 1.1 riastrad " AslId <%4.4s,%08x>\n",
286 1.1 riastrad name,
287 1.1 riastrad h->OemId, h->OemTableId, h->OemRevision,
288 1.1 riastrad h->AslCompilerId, h->AslCompilerRevision);
289 1.1 riastrad }
290 1.1 riastrad
291 1.1 riastrad /*
292 1.1 riastrad * apei_cper_guid_dec(buf, uuid)
293 1.1 riastrad *
294 1.1 riastrad * Decode a Common Platform Error Record UUID/GUID from an ACPI
295 1.1 riastrad * table at buf into a sys/uuid.h struct uuid.
296 1.1 riastrad */
297 1.1 riastrad static void
298 1.1 riastrad apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
299 1.1 riastrad {
300 1.1 riastrad
301 1.1 riastrad uuid_dec_le(buf, uuid);
302 1.1 riastrad }
303 1.1 riastrad
304 1.1 riastrad /*
305 1.1 riastrad * apei_format_guid(uuid, s)
306 1.1 riastrad *
307 1.1 riastrad * Format a UUID as a string. This uses C initializer notation,
308 1.3 rillig * not UUID notation, in order to match the text in the UEFI
309 1.1 riastrad * specification.
310 1.1 riastrad */
311 1.1 riastrad static void
312 1.1 riastrad apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
313 1.1 riastrad {
314 1.1 riastrad
315 1.1 riastrad snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
316 1.4 riastrad "{0x%02x,%02x,"
317 1.4 riastrad "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
318 1.1 riastrad uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
319 1.4 riastrad uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
320 1.1 riastrad uuid->node[0], uuid->node[1], uuid->node[2],
321 1.1 riastrad uuid->node[3], uuid->node[4], uuid->node[5]);
322 1.1 riastrad }
323 1.1 riastrad
324 1.1 riastrad /*
325 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
326 1.1 riastrad */
327 1.1 riastrad
328 1.1 riastrad static const char *const cper_memory_error_type[] = {
329 1.1 riastrad #define F(LN, SN, V) [LN] = #SN,
330 1.1 riastrad CPER_MEMORY_ERROR_TYPES(F)
331 1.1 riastrad #undef F
332 1.1 riastrad };
333 1.1 riastrad
334 1.1 riastrad /*
335 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
336 1.1 riastrad *
337 1.1 riastrad * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
338 1.1 riastrad * but are designated as being intended for Generic Error Data Entries
339 1.1 riastrad * rather than Generic Error Status Blocks.
340 1.1 riastrad */
341 1.1 riastrad static const char *const apei_gesb_severity[] = {
342 1.1 riastrad [0] = "recoverable",
343 1.1 riastrad [1] = "fatal",
344 1.1 riastrad [2] = "corrected",
345 1.1 riastrad [3] = "none",
346 1.1 riastrad };
347 1.1 riastrad
348 1.1 riastrad /*
349 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
350 1.1 riastrad */
351 1.1 riastrad static const char *const apei_gede_severity[] = {
352 1.1 riastrad [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
353 1.1 riastrad [ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
354 1.1 riastrad [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
355 1.1 riastrad [ACPI_HEST_GEN_ERROR_NONE] = "none",
356 1.1 riastrad };
357 1.1 riastrad
358 1.1 riastrad /*
359 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
360 1.1 riastrad */
361 1.1 riastrad static const struct uuid CPER_MEMORY_ERROR_SECTION =
362 1.1 riastrad {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
363 1.1 riastrad
364 1.1 riastrad static void
365 1.1 riastrad apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
366 1.5 riastrad size_t len, const char *ctx, bool ratelimitok)
367 1.1 riastrad {
368 1.1 riastrad const struct cper_memory_error *ME = buf;
369 1.1 riastrad char bitbuf[1024];
370 1.1 riastrad
371 1.5 riastrad /*
372 1.5 riastrad * If we've hit the rate limit, skip printing the error.
373 1.5 riastrad */
374 1.5 riastrad if (!ratelimitok)
375 1.5 riastrad goto out;
376 1.5 riastrad
377 1.1 riastrad snprintb(bitbuf, sizeof(bitbuf),
378 1.1 riastrad CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
379 1.1 riastrad aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
380 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
381 1.1 riastrad /*
382 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
383 1.1 riastrad */
384 1.1 riastrad /* XXX define this format somewhere */
385 1.1 riastrad snprintb(bitbuf, sizeof(bitbuf), "\177\020"
386 1.1 riastrad "f\010\010" "ErrorType\0"
387 1.1 riastrad "=\001" "ERR_INTERNAL\0"
388 1.1 riastrad "=\004" "ERR_MEM\0"
389 1.1 riastrad "=\005" "ERR_TLB\0"
390 1.1 riastrad "=\006" "ERR_CACHE\0"
391 1.1 riastrad "=\007" "ERR_FUNCTION\0"
392 1.1 riastrad "=\010" "ERR_SELFTEST\0"
393 1.1 riastrad "=\011" "ERR_FLOW\0"
394 1.1 riastrad "=\020" "ERR_BUS\0"
395 1.1 riastrad "=\021" "ERR_MAP\0"
396 1.1 riastrad "=\022" "ERR_IMPROPER\0"
397 1.1 riastrad "=\023" "ERR_UNIMPL\0"
398 1.1 riastrad "=\024" "ERR_LOL\0"
399 1.1 riastrad "=\025" "ERR_RESPONSE\0"
400 1.1 riastrad "=\026" "ERR_PARITY\0"
401 1.1 riastrad "=\027" "ERR_PROTOCOL\0"
402 1.1 riastrad "=\030" "ERR_ERROR\0"
403 1.1 riastrad "=\031" "ERR_TIMEOUT\0"
404 1.1 riastrad "=\032" "ERR_POISONED\0"
405 1.1 riastrad "b\020" "AddressError\0"
406 1.1 riastrad "b\021" "ControlError\0"
407 1.1 riastrad "b\022" "DataError\0"
408 1.1 riastrad "b\023" "ResponderDetected\0"
409 1.1 riastrad "b\024" "RequesterDetected\0"
410 1.1 riastrad "b\025" "FirstError\0"
411 1.1 riastrad "b\026" "Overflow\0"
412 1.1 riastrad "\0", ME->ErrorStatus);
413 1.1 riastrad device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
414 1.1 riastrad }
415 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
416 1.1 riastrad device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
417 1.1 riastrad ctx, ME->PhysicalAddress);
418 1.1 riastrad }
419 1.1 riastrad if (ME->ValidationBits &
420 1.1 riastrad CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
421 1.1 riastrad device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
422 1.1 riastrad "\n", ctx, ME->PhysicalAddressMask);
423 1.1 riastrad }
424 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
425 1.1 riastrad device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
426 1.1 riastrad ME->Node);
427 1.1 riastrad }
428 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
429 1.1 riastrad device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
430 1.1 riastrad ME->Card);
431 1.1 riastrad }
432 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
433 1.1 riastrad device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
434 1.1 riastrad ME->Module);
435 1.1 riastrad }
436 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
437 1.1 riastrad device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
438 1.1 riastrad ME->Bank);
439 1.1 riastrad }
440 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
441 1.1 riastrad device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
442 1.1 riastrad ME->Device);
443 1.1 riastrad }
444 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
445 1.1 riastrad device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
446 1.1 riastrad ME->Row);
447 1.1 riastrad }
448 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
449 1.1 riastrad device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
450 1.1 riastrad ME->Column);
451 1.1 riastrad }
452 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
453 1.1 riastrad device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
454 1.1 riastrad ctx, ME->BitPosition);
455 1.1 riastrad }
456 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
457 1.1 riastrad device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
458 1.1 riastrad ctx, ME->RequestorId);
459 1.1 riastrad }
460 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
461 1.1 riastrad device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
462 1.1 riastrad ctx, ME->ResponderId);
463 1.1 riastrad }
464 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
465 1.1 riastrad device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
466 1.1 riastrad ctx, ME->TargetId);
467 1.1 riastrad }
468 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
469 1.1 riastrad const uint8_t t = ME->MemoryErrorType;
470 1.1 riastrad const char *n = t < __arraycount(cper_memory_error_type)
471 1.1 riastrad ? cper_memory_error_type[t] : NULL;
472 1.1 riastrad
473 1.1 riastrad if (n) {
474 1.1 riastrad device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
475 1.1 riastrad " (%s)\n", ctx, t, n);
476 1.1 riastrad } else {
477 1.1 riastrad device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
478 1.1 riastrad ctx, t);
479 1.1 riastrad }
480 1.1 riastrad }
481 1.5 riastrad
482 1.5 riastrad out: /*
483 1.5 riastrad * XXX pass this through to uvm(9) or userland for decisions
484 1.5 riastrad * like page retirement
485 1.5 riastrad */
486 1.5 riastrad return;
487 1.1 riastrad }
488 1.1 riastrad
489 1.1 riastrad /*
490 1.1 riastrad * apei_cper_reports
491 1.1 riastrad *
492 1.1 riastrad * Table of known Common Platform Error Record types, symbolic
493 1.1 riastrad * names, minimum data lengths, and functions to report them.
494 1.1 riastrad *
495 1.1 riastrad * The section types and corresponding section layouts are listed
496 1.1 riastrad * at:
497 1.1 riastrad *
498 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
499 1.1 riastrad */
500 1.1 riastrad static const struct apei_cper_report {
501 1.1 riastrad const char *name;
502 1.1 riastrad const struct uuid *type;
503 1.1 riastrad size_t minlength;
504 1.5 riastrad void (*func)(struct apei_softc *, const void *, size_t, const char *,
505 1.5 riastrad bool);
506 1.1 riastrad } apei_cper_reports[] = {
507 1.1 riastrad { "memory", &CPER_MEMORY_ERROR_SECTION,
508 1.1 riastrad sizeof(struct cper_memory_error),
509 1.1 riastrad apei_cper_memory_error_report },
510 1.1 riastrad };
511 1.1 riastrad
512 1.1 riastrad /*
513 1.5 riastrad * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
514 1.1 riastrad *
515 1.1 riastrad * Report the header of the ith Generic Error Data Entry in the
516 1.5 riastrad * given context, if ratelimitok is true.
517 1.1 riastrad *
518 1.1 riastrad * Return the actual length of the header in headerlen, or 0 if
519 1.1 riastrad * not known because the revision isn't recognized.
520 1.1 riastrad *
521 1.1 riastrad * Return the report type in report, or NULL if not known because
522 1.1 riastrad * the section type isn't recognized.
523 1.1 riastrad */
524 1.1 riastrad static void
525 1.1 riastrad apei_gede_report_header(struct apei_softc *sc,
526 1.5 riastrad const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
527 1.1 riastrad size_t *headerlenp, const struct apei_cper_report **reportp)
528 1.1 riastrad {
529 1.1 riastrad const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
530 1.1 riastrad struct uuid sectype;
531 1.1 riastrad char guidstr[69];
532 1.1 riastrad char buf[128];
533 1.1 riastrad unsigned i;
534 1.1 riastrad
535 1.1 riastrad /*
536 1.1 riastrad * Print the section type as a C initializer. It would be
537 1.1 riastrad * prettier to use standard hyphenated UUID notation, but that
538 1.1 riastrad * notation is slightly ambiguous here (two octets could be
539 1.1 riastrad * written either way, depending on Microsoft convention --
540 1.1 riastrad * which influenced ACPI and UEFI -- or internet convention),
541 1.1 riastrad * and the UEFI spec writes the C initializer notation, so this
542 1.1 riastrad * makes it easier to search for.
543 1.1 riastrad *
544 1.1 riastrad * Also print out a symbolic name, if we know it.
545 1.1 riastrad */
546 1.1 riastrad apei_cper_guid_dec(gede->SectionType, §ype);
547 1.1 riastrad apei_format_guid(§ype, guidstr);
548 1.1 riastrad for (i = 0; i < __arraycount(apei_cper_reports); i++) {
549 1.1 riastrad const struct apei_cper_report *const report =
550 1.1 riastrad &apei_cper_reports[i];
551 1.1 riastrad
552 1.1 riastrad if (memcmp(§ype, report->type, sizeof(sectype)) != 0)
553 1.1 riastrad continue;
554 1.5 riastrad if (ratelimitok) {
555 1.5 riastrad device_printf(sc->sc_dev, "%s:"
556 1.5 riastrad " SectionType=%s (%s error)\n",
557 1.5 riastrad ctx, guidstr, report->name);
558 1.5 riastrad }
559 1.1 riastrad *reportp = report;
560 1.1 riastrad break;
561 1.1 riastrad }
562 1.1 riastrad if (i == __arraycount(apei_cper_reports)) {
563 1.5 riastrad if (ratelimitok) {
564 1.5 riastrad device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
565 1.5 riastrad guidstr);
566 1.5 riastrad }
567 1.1 riastrad *reportp = NULL;
568 1.1 riastrad }
569 1.1 riastrad
570 1.1 riastrad /*
571 1.1 riastrad * Print the numeric severity and, if we have it, a symbolic
572 1.1 riastrad * name for it.
573 1.1 riastrad */
574 1.5 riastrad if (ratelimitok) {
575 1.5 riastrad device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
576 1.5 riastrad ctx,
577 1.5 riastrad gede->ErrorSeverity,
578 1.5 riastrad (gede->ErrorSeverity < __arraycount(apei_gede_severity)
579 1.5 riastrad ? apei_gede_severity[gede->ErrorSeverity]
580 1.5 riastrad : "unknown"));
581 1.5 riastrad }
582 1.1 riastrad
583 1.1 riastrad /*
584 1.1 riastrad * The Revision may not often be useful, but this is only ever
585 1.1 riastrad * shown at the time of a hardware error report, not something
586 1.1 riastrad * you can glean at your convenience with acpidump. So print
587 1.1 riastrad * it anyway.
588 1.1 riastrad */
589 1.5 riastrad if (ratelimitok) {
590 1.5 riastrad device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
591 1.5 riastrad gede->Revision);
592 1.5 riastrad }
593 1.1 riastrad
594 1.1 riastrad /*
595 1.1 riastrad * Don't touch anything past the Revision until we've
596 1.1 riastrad * determined we understand it. Return the header length to
597 1.1 riastrad * the caller, or return zero -- and stop here -- if we don't
598 1.1 riastrad * know what the actual header length is.
599 1.1 riastrad */
600 1.1 riastrad if (gede->Revision < 0x0300) {
601 1.1 riastrad *headerlenp = sizeof(*gede);
602 1.1 riastrad } else if (gede->Revision < 0x0400) {
603 1.1 riastrad *headerlenp = sizeof(*gede_v3);
604 1.1 riastrad } else {
605 1.1 riastrad *headerlenp = 0;
606 1.1 riastrad return;
607 1.1 riastrad }
608 1.1 riastrad
609 1.1 riastrad /*
610 1.1 riastrad * Print the validation bits at debug level. Only really
611 1.1 riastrad * helpful if there are bits we _don't_ know about.
612 1.1 riastrad */
613 1.5 riastrad if (ratelimitok) {
614 1.5 riastrad /* XXX define this format somewhere */
615 1.5 riastrad snprintb(buf, sizeof(buf), "\177\020"
616 1.5 riastrad "b\000" "FRU_ID\0"
617 1.5 riastrad "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */
618 1.5 riastrad "b\002" "TIMESTAMP\0"
619 1.5 riastrad "\0", gede->ValidationBits);
620 1.5 riastrad aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
621 1.5 riastrad buf);
622 1.5 riastrad }
623 1.1 riastrad
624 1.1 riastrad /*
625 1.1 riastrad * Print the CPER section flags.
626 1.1 riastrad */
627 1.5 riastrad if (ratelimitok) {
628 1.5 riastrad snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
629 1.5 riastrad gede->Flags);
630 1.5 riastrad device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
631 1.5 riastrad }
632 1.1 riastrad
633 1.1 riastrad /*
634 1.1 riastrad * The ErrorDataLength is unlikely to be useful for the log, so
635 1.1 riastrad * print it at debug level only.
636 1.1 riastrad */
637 1.5 riastrad if (ratelimitok) {
638 1.5 riastrad aprint_debug_dev(sc->sc_dev, "%s:"
639 1.5 riastrad " ErrorDataLength=0x%"PRIu32"\n",
640 1.5 riastrad ctx, gede->ErrorDataLength);
641 1.5 riastrad }
642 1.1 riastrad
643 1.1 riastrad /*
644 1.1 riastrad * Print the FRU Id and text, if available.
645 1.1 riastrad */
646 1.5 riastrad if (ratelimitok &&
647 1.5 riastrad (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
648 1.1 riastrad struct uuid fruid;
649 1.1 riastrad
650 1.1 riastrad apei_cper_guid_dec(gede->FruId, &fruid);
651 1.1 riastrad apei_format_guid(&fruid, guidstr);
652 1.1 riastrad device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
653 1.1 riastrad }
654 1.5 riastrad if (ratelimitok &&
655 1.5 riastrad (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
656 1.1 riastrad device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
657 1.1 riastrad ctx, gede->FruText);
658 1.1 riastrad }
659 1.1 riastrad
660 1.1 riastrad /*
661 1.1 riastrad * Print the timestamp, if available by the revision number and
662 1.1 riastrad * the validation bits.
663 1.1 riastrad */
664 1.5 riastrad if (ratelimitok &&
665 1.5 riastrad gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
666 1.1 riastrad gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
667 1.1 riastrad const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
668 1.1 riastrad const uint8_t s = t[0];
669 1.1 riastrad const uint8_t m = t[1];
670 1.1 riastrad const uint8_t h = t[2];
671 1.1 riastrad const uint8_t f = t[3];
672 1.1 riastrad const uint8_t D = t[4];
673 1.1 riastrad const uint8_t M = t[5];
674 1.1 riastrad const uint8_t Y = t[6];
675 1.1 riastrad const uint8_t C = t[7];
676 1.1 riastrad
677 1.1 riastrad device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
678 1.1 riastrad " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
679 1.1 riastrad ctx, gede_v3->TimeStamp,
680 1.1 riastrad C,Y, M, D, h,m,s,
681 1.1 riastrad f & __BIT(0) ? " (event time)" : " (collect time)");
682 1.1 riastrad }
683 1.1 riastrad }
684 1.1 riastrad
685 1.1 riastrad /*
686 1.5 riastrad * apei_gesb_ratelimit
687 1.5 riastrad *
688 1.5 riastrad * State to limit the rate of console log messages about hardware
689 1.5 riastrad * errors. For each of the four severity levels in a Generic
690 1.5 riastrad * Error Status Block,
691 1.5 riastrad *
692 1.5 riastrad * 0 - Recoverable (uncorrectable),
693 1.5 riastrad * 1 - Fatal (uncorrectable),
694 1.5 riastrad * 2 - Corrected, and
695 1.5 riastrad * 3 - None (including ill-formed errors),
696 1.5 riastrad *
697 1.5 riastrad * we record the last time it happened, protected by a CPU simple
698 1.5 riastrad * lock that we only try-acquire so it is safe to use in any
699 1.5 riastrad * context, including non-maskable interrupt context.
700 1.5 riastrad */
701 1.5 riastrad
702 1.5 riastrad static struct {
703 1.5 riastrad __cpu_simple_lock_t lock;
704 1.5 riastrad struct timeval lasttime;
705 1.5 riastrad volatile uint32_t suppressed;
706 1.5 riastrad } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
707 1.5 riastrad [ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
708 1.5 riastrad [ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
709 1.5 riastrad [ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
710 1.5 riastrad [ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
711 1.5 riastrad };
712 1.5 riastrad
713 1.5 riastrad static void
714 1.5 riastrad atomic_incsat_32(volatile uint32_t *p)
715 1.5 riastrad {
716 1.5 riastrad uint32_t o, n;
717 1.5 riastrad
718 1.5 riastrad do {
719 1.5 riastrad o = atomic_load_relaxed(p);
720 1.5 riastrad if (__predict_false(o == UINT_MAX))
721 1.5 riastrad return;
722 1.5 riastrad n = o + 1;
723 1.5 riastrad } while (__predict_false(atomic_cas_32(p, o, n) != o));
724 1.5 riastrad }
725 1.5 riastrad
726 1.5 riastrad /*
727 1.5 riastrad * apei_gesb_ratecheck(sc, severity, suppressed)
728 1.5 riastrad *
729 1.5 riastrad * Check for a rate limit on errors of the specified severity.
730 1.5 riastrad *
731 1.5 riastrad * => Return true if the error should be printed, and format into
732 1.5 riastrad * the buffer suppressed a message saying how many errors were
733 1.5 riastrad * previously suppressed.
734 1.5 riastrad *
735 1.5 riastrad * => Return false if the error should be suppressed because the
736 1.5 riastrad * last one printed was too recent.
737 1.5 riastrad */
738 1.5 riastrad static bool
739 1.5 riastrad apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
740 1.5 riastrad char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
741 1.5 riastrad {
742 1.5 riastrad /* one of each type per minute (XXX worth making configurable?) */
743 1.5 riastrad const struct timeval mininterval = {60, 0};
744 1.5 riastrad unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
745 1.5 riastrad bool ok = false;
746 1.5 riastrad
747 1.5 riastrad /*
748 1.5 riastrad * If the lock is contended, the rate limit is probably
749 1.5 riastrad * exceeded, so it's not OK to print.
750 1.5 riastrad *
751 1.5 riastrad * Otherwise, with the lock held, ask ratecheck(9) whether it's
752 1.5 riastrad * OK to print.
753 1.5 riastrad */
754 1.5 riastrad if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
755 1.5 riastrad goto out;
756 1.5 riastrad ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
757 1.5 riastrad __cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
758 1.5 riastrad
759 1.5 riastrad out: /*
760 1.5 riastrad * If it's OK to print, report the number of errors that were
761 1.5 riastrad * suppressed. If it's not OK to print, count a suppressed
762 1.5 riastrad * error.
763 1.5 riastrad */
764 1.5 riastrad if (ok) {
765 1.5 riastrad const uint32_t n =
766 1.5 riastrad atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
767 1.5 riastrad
768 1.5 riastrad if (n == 0) {
769 1.5 riastrad suppressed[0] = '\0';
770 1.5 riastrad } else {
771 1.5 riastrad snprintf(suppressed,
772 1.5 riastrad sizeof(" (4294967295 or more errors suppressed)"),
773 1.5 riastrad " (%u%s error%s suppressed)",
774 1.5 riastrad n,
775 1.5 riastrad n == UINT32_MAX ? " or more" : "",
776 1.5 riastrad n == 1 ? "" : "s");
777 1.5 riastrad }
778 1.5 riastrad } else {
779 1.5 riastrad atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
780 1.5 riastrad suppressed[0] = '\0';
781 1.5 riastrad }
782 1.5 riastrad return ok;
783 1.5 riastrad }
784 1.5 riastrad
785 1.5 riastrad /*
786 1.1 riastrad * apei_gesb_report(sc, gesb, size, ctx)
787 1.1 riastrad *
788 1.1 riastrad * Check a Generic Error Status Block, of at most the specified
789 1.1 riastrad * size in bytes, and report any errors in it. Return the 32-bit
790 1.1 riastrad * Block Status in case the caller needs it to acknowledge the
791 1.1 riastrad * report to firmware.
792 1.1 riastrad */
793 1.1 riastrad uint32_t
794 1.1 riastrad apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
795 1.1 riastrad size_t size, const char *ctx, bool *fatalp)
796 1.1 riastrad {
797 1.1 riastrad uint32_t status, unknownstatus, severity, nentries, i;
798 1.1 riastrad uint32_t datalen, rawdatalen;
799 1.1 riastrad const ACPI_HEST_GENERIC_DATA *gede0, *gede;
800 1.1 riastrad const unsigned char *rawdata;
801 1.5 riastrad bool ratelimitok = false;
802 1.5 riastrad char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
803 1.1 riastrad bool fatal = false;
804 1.1 riastrad
805 1.1 riastrad /*
806 1.1 riastrad * Verify the buffer is large enough for a Generic Error Status
807 1.1 riastrad * Block before we try to touch anything in it.
808 1.1 riastrad */
809 1.1 riastrad if (size < sizeof(*gesb)) {
810 1.5 riastrad ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
811 1.5 riastrad suppressed);
812 1.5 riastrad if (ratelimitok) {
813 1.5 riastrad device_printf(sc->sc_dev,
814 1.5 riastrad "%s: truncated GESB, %zu < %zu%s\n",
815 1.5 riastrad ctx, size, sizeof(*gesb), suppressed);
816 1.5 riastrad }
817 1.2 riastrad status = 0;
818 1.2 riastrad goto out;
819 1.1 riastrad }
820 1.1 riastrad size -= sizeof(*gesb);
821 1.1 riastrad
822 1.1 riastrad /*
823 1.1 riastrad * Load the status. Access ordering rules are unclear in the
824 1.1 riastrad * ACPI specification; I'm guessing that load-acquire of the
825 1.1 riastrad * block status is a good idea before any other access to the
826 1.1 riastrad * GESB.
827 1.1 riastrad */
828 1.1 riastrad status = atomic_load_acquire(&gesb->BlockStatus);
829 1.1 riastrad
830 1.1 riastrad /*
831 1.1 riastrad * If there are no status bits set, the rest of the GESB is
832 1.1 riastrad * garbage, so stop here.
833 1.1 riastrad */
834 1.1 riastrad if (status == 0) {
835 1.1 riastrad /* XXX dtrace */
836 1.1 riastrad /* XXX DPRINTF */
837 1.1 riastrad goto out;
838 1.1 riastrad }
839 1.1 riastrad
840 1.5 riastrad /*
841 1.5 riastrad * Read out the severity and get the number of entries in this
842 1.5 riastrad * status block.
843 1.5 riastrad */
844 1.5 riastrad severity = gesb->ErrorSeverity;
845 1.5 riastrad nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
846 1.1 riastrad
847 1.1 riastrad /*
848 1.1 riastrad * Print a message to the console and dmesg about the severity
849 1.1 riastrad * of the error.
850 1.1 riastrad */
851 1.5 riastrad ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
852 1.5 riastrad if (ratelimitok) {
853 1.5 riastrad char statusbuf[128];
854 1.5 riastrad
855 1.5 riastrad /* XXX define this format somewhere */
856 1.5 riastrad snprintb(statusbuf, sizeof(statusbuf), "\177\020"
857 1.5 riastrad "b\000" "UE\0"
858 1.5 riastrad "b\001" "CE\0"
859 1.5 riastrad "b\002" "MULTI_UE\0"
860 1.5 riastrad "b\003" "MULTI_CE\0"
861 1.5 riastrad "f\004\010" "GEDE_COUNT\0"
862 1.5 riastrad "\0", status);
863 1.5 riastrad
864 1.5 riastrad if (severity < __arraycount(apei_gesb_severity)) {
865 1.5 riastrad device_printf(sc->sc_dev, "%s"
866 1.5 riastrad " reported hardware error%s:"
867 1.5 riastrad " severity=%s nentries=%u status=%s\n",
868 1.5 riastrad ctx, suppressed,
869 1.5 riastrad apei_gesb_severity[severity], nentries, statusbuf);
870 1.5 riastrad } else {
871 1.5 riastrad device_printf(sc->sc_dev, "%s reported error%s:"
872 1.5 riastrad " severity=%"PRIu32" nentries=%u status=%s\n",
873 1.5 riastrad ctx, suppressed,
874 1.5 riastrad severity, nentries, statusbuf);
875 1.5 riastrad }
876 1.1 riastrad }
877 1.1 riastrad
878 1.1 riastrad /*
879 1.1 riastrad * Make a determination about whether the error is fatal.
880 1.1 riastrad *
881 1.1 riastrad * XXX Currently we don't have any mechanism to recover from
882 1.1 riastrad * uncorrectable but recoverable errors, so we treat those --
883 1.1 riastrad * and anything else we don't recognize -- as fatal.
884 1.1 riastrad */
885 1.1 riastrad switch (severity) {
886 1.1 riastrad case ACPI_HEST_GEN_ERROR_CORRECTED:
887 1.1 riastrad case ACPI_HEST_GEN_ERROR_NONE:
888 1.1 riastrad fatal = false;
889 1.1 riastrad break;
890 1.1 riastrad case ACPI_HEST_GEN_ERROR_FATAL:
891 1.1 riastrad case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
892 1.1 riastrad default:
893 1.1 riastrad fatal = true;
894 1.1 riastrad break;
895 1.1 riastrad }
896 1.1 riastrad
897 1.1 riastrad /*
898 1.1 riastrad * Clear the bits we know about to warn if there's anything
899 1.1 riastrad * left we don't understand.
900 1.1 riastrad */
901 1.1 riastrad unknownstatus = status;
902 1.1 riastrad unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
903 1.1 riastrad unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
904 1.1 riastrad unknownstatus &= ~ACPI_HEST_CORRECTABLE;
905 1.1 riastrad unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
906 1.1 riastrad unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
907 1.5 riastrad if (ratelimitok && unknownstatus != 0) {
908 1.1 riastrad /* XXX dtrace */
909 1.1 riastrad device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
910 1.1 riastrad " 0x%"PRIx32"\n", ctx, unknownstatus);
911 1.1 riastrad }
912 1.1 riastrad
913 1.1 riastrad /*
914 1.1 riastrad * Advance past the Generic Error Status Block (GESB) header to
915 1.1 riastrad * the Generic Error Data Entries (GEDEs).
916 1.1 riastrad */
917 1.1 riastrad gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
918 1.1 riastrad
919 1.1 riastrad /*
920 1.1 riastrad * Verify that the data length (GEDEs) fits within the size.
921 1.1 riastrad * If not, truncate the GEDEs.
922 1.1 riastrad */
923 1.1 riastrad datalen = gesb->DataLength;
924 1.1 riastrad if (size < datalen) {
925 1.5 riastrad if (ratelimitok) {
926 1.5 riastrad device_printf(sc->sc_dev, "%s:"
927 1.5 riastrad " GESB DataLength exceeds bounds:"
928 1.5 riastrad " %zu < %"PRIu32"\n",
929 1.5 riastrad ctx, size, datalen);
930 1.5 riastrad }
931 1.1 riastrad datalen = size;
932 1.1 riastrad }
933 1.1 riastrad size -= datalen;
934 1.1 riastrad
935 1.1 riastrad /*
936 1.1 riastrad * Report each of the Generic Error Data Entries.
937 1.1 riastrad */
938 1.1 riastrad for (i = 0; i < nentries; i++) {
939 1.1 riastrad size_t headerlen;
940 1.1 riastrad const struct apei_cper_report *report;
941 1.1 riastrad char subctx[128];
942 1.1 riastrad
943 1.1 riastrad /*
944 1.1 riastrad * Format a subcontext to show this numbered entry of
945 1.1 riastrad * the GESB.
946 1.1 riastrad */
947 1.1 riastrad snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
948 1.1 riastrad
949 1.1 riastrad /*
950 1.1 riastrad * If the remaining GESB data length isn't enough for a
951 1.1 riastrad * GEDE header, stop here.
952 1.1 riastrad */
953 1.1 riastrad if (datalen < sizeof(*gede)) {
954 1.5 riastrad if (ratelimitok) {
955 1.5 riastrad device_printf(sc->sc_dev, "%s:"
956 1.5 riastrad " truncated GEDE: %"PRIu32" < %zu bytes\n",
957 1.5 riastrad subctx, datalen, sizeof(*gede));
958 1.5 riastrad }
959 1.1 riastrad break;
960 1.1 riastrad }
961 1.1 riastrad
962 1.1 riastrad /*
963 1.1 riastrad * Print the GEDE header and get the full length (may
964 1.1 riastrad * vary from revision to revision of the GEDE) and the
965 1.1 riastrad * CPER report function if possible.
966 1.1 riastrad */
967 1.5 riastrad apei_gede_report_header(sc, gede, subctx, ratelimitok,
968 1.1 riastrad &headerlen, &report);
969 1.1 riastrad
970 1.1 riastrad /*
971 1.1 riastrad * If we don't know the header length because of an
972 1.1 riastrad * unfamiliar revision, stop here.
973 1.1 riastrad */
974 1.1 riastrad if (headerlen == 0) {
975 1.5 riastrad if (ratelimitok) {
976 1.5 riastrad device_printf(sc->sc_dev, "%s:"
977 1.5 riastrad " unknown revision: 0x%"PRIx16"\n",
978 1.5 riastrad subctx, gede->Revision);
979 1.5 riastrad }
980 1.1 riastrad break;
981 1.1 riastrad }
982 1.1 riastrad
983 1.1 riastrad /*
984 1.1 riastrad * Stop here if what we mapped is too small for the
985 1.1 riastrad * error data length.
986 1.1 riastrad */
987 1.1 riastrad datalen -= headerlen;
988 1.1 riastrad if (datalen < gede->ErrorDataLength) {
989 1.5 riastrad if (ratelimitok) {
990 1.5 riastrad device_printf(sc->sc_dev, "%s:"
991 1.5 riastrad " truncated GEDE payload:"
992 1.5 riastrad " %"PRIu32" < %"PRIu32" bytes\n",
993 1.5 riastrad subctx, datalen, gede->ErrorDataLength);
994 1.5 riastrad }
995 1.1 riastrad break;
996 1.1 riastrad }
997 1.1 riastrad
998 1.1 riastrad /*
999 1.1 riastrad * Report the Common Platform Error Record appendix to
1000 1.1 riastrad * this Generic Error Data Entry.
1001 1.1 riastrad */
1002 1.1 riastrad if (report == NULL) {
1003 1.5 riastrad if (ratelimitok) {
1004 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1005 1.5 riastrad " [unknown type]\n", ctx);
1006 1.5 riastrad }
1007 1.1 riastrad } else {
1008 1.5 riastrad /* XXX pass ratelimit through */
1009 1.1 riastrad (*report->func)(sc, (const char *)gede + headerlen,
1010 1.5 riastrad gede->ErrorDataLength, subctx, ratelimitok);
1011 1.1 riastrad }
1012 1.1 riastrad
1013 1.1 riastrad /*
1014 1.1 riastrad * Advance past the GEDE header and CPER data to the
1015 1.1 riastrad * next GEDE.
1016 1.1 riastrad */
1017 1.1 riastrad gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
1018 1.1 riastrad + headerlen + gede->ErrorDataLength);
1019 1.1 riastrad }
1020 1.1 riastrad
1021 1.1 riastrad /*
1022 1.1 riastrad * Advance past the Generic Error Data Entries (GEDEs) to the
1023 1.1 riastrad * raw error data.
1024 1.1 riastrad *
1025 1.1 riastrad * XXX Provide Max Raw Data Length as a parameter, as found in
1026 1.1 riastrad * various HEST entry types.
1027 1.1 riastrad */
1028 1.1 riastrad rawdata = (const unsigned char *)gede0 + datalen;
1029 1.1 riastrad
1030 1.1 riastrad /*
1031 1.1 riastrad * Verify that the raw data length fits within the size. If
1032 1.1 riastrad * not, truncate the raw data.
1033 1.1 riastrad */
1034 1.1 riastrad rawdatalen = gesb->RawDataLength;
1035 1.1 riastrad if (size < rawdatalen) {
1036 1.5 riastrad if (ratelimitok) {
1037 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1038 1.5 riastrad " GESB RawDataLength exceeds bounds:"
1039 1.5 riastrad " %zu < %"PRIu32"\n",
1040 1.5 riastrad ctx, size, rawdatalen);
1041 1.5 riastrad }
1042 1.1 riastrad rawdatalen = size;
1043 1.1 riastrad }
1044 1.1 riastrad size -= rawdatalen;
1045 1.1 riastrad
1046 1.1 riastrad /*
1047 1.1 riastrad * Hexdump the raw data, if any.
1048 1.1 riastrad */
1049 1.5 riastrad if (ratelimitok && rawdatalen > 0) {
1050 1.1 riastrad char devctx[128];
1051 1.1 riastrad
1052 1.1 riastrad snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
1053 1.1 riastrad device_xname(sc->sc_dev), ctx);
1054 1.1 riastrad hexdump(printf, devctx, rawdata, rawdatalen);
1055 1.1 riastrad }
1056 1.1 riastrad
1057 1.1 riastrad /*
1058 1.1 riastrad * If there's anything left after the raw data, warn.
1059 1.1 riastrad */
1060 1.5 riastrad if (ratelimitok && size > 0) {
1061 1.1 riastrad device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
1062 1.1 riastrad ctx, size);
1063 1.1 riastrad }
1064 1.1 riastrad
1065 1.1 riastrad /*
1066 1.1 riastrad * Return the status so the caller can ack it, and tell the
1067 1.1 riastrad * caller whether this error is fatal.
1068 1.1 riastrad */
1069 1.1 riastrad out: *fatalp = fatal;
1070 1.1 riastrad return status;
1071 1.1 riastrad }
1072 1.1 riastrad
1073 1.1 riastrad MODULE(MODULE_CLASS_DRIVER, apei, NULL);
1074 1.1 riastrad
1075 1.1 riastrad #ifdef _MODULE
1076 1.1 riastrad #include "ioconf.c"
1077 1.1 riastrad #endif
1078 1.1 riastrad
1079 1.1 riastrad static int
1080 1.1 riastrad apei_modcmd(modcmd_t cmd, void *opaque)
1081 1.1 riastrad {
1082 1.1 riastrad int error = 0;
1083 1.1 riastrad
1084 1.1 riastrad switch (cmd) {
1085 1.1 riastrad case MODULE_CMD_INIT:
1086 1.1 riastrad #ifdef _MODULE
1087 1.1 riastrad error = config_init_component(cfdriver_ioconf_apei,
1088 1.1 riastrad cfattach_ioconf_apei, cfdata_ioconf_apei);
1089 1.1 riastrad #endif
1090 1.1 riastrad return error;
1091 1.1 riastrad case MODULE_CMD_FINI:
1092 1.1 riastrad #ifdef _MODULE
1093 1.1 riastrad error = config_fini_component(cfdriver_ioconf_apei,
1094 1.1 riastrad cfattach_ioconf_apei, cfdata_ioconf_apei);
1095 1.1 riastrad #endif
1096 1.1 riastrad return error;
1097 1.1 riastrad default:
1098 1.1 riastrad return ENOTTY;
1099 1.1 riastrad }
1100 1.1 riastrad }
1101