apei.c revision 1.8 1 1.8 riastrad /* $NetBSD: apei.c,v 1.8 2024/10/27 17:27:11 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2024 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * Redistribution and use in source and binary forms, with or without
8 1.1 riastrad * modification, are permitted provided that the following conditions
9 1.1 riastrad * are met:
10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
11 1.1 riastrad * notice, this list of conditions and the following disclaimer.
12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
14 1.1 riastrad * documentation and/or other materials provided with the distribution.
15 1.1 riastrad *
16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
27 1.1 riastrad */
28 1.1 riastrad
29 1.1 riastrad /*
30 1.1 riastrad * APEI: ACPI Platform Error Interface
31 1.1 riastrad *
32 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33 1.1 riastrad *
34 1.1 riastrad * XXX dtrace probes
35 1.1 riastrad *
36 1.1 riastrad * XXX call _OSC appropriately to announce to the platform that we, the
37 1.1 riastrad * OSPM, support APEI
38 1.1 riastrad */
39 1.1 riastrad
40 1.1 riastrad #include <sys/cdefs.h>
41 1.8 riastrad __KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.8 2024/10/27 17:27:11 riastradh Exp $");
42 1.1 riastrad
43 1.1 riastrad #include <sys/param.h>
44 1.1 riastrad #include <sys/types.h>
45 1.1 riastrad
46 1.1 riastrad #include <sys/atomic.h>
47 1.7 riastrad #include <sys/endian.h>
48 1.1 riastrad #include <sys/device.h>
49 1.1 riastrad #include <sys/module.h>
50 1.1 riastrad #include <sys/sysctl.h>
51 1.1 riastrad #include <sys/uuid.h>
52 1.1 riastrad
53 1.1 riastrad #include <dev/acpi/acpireg.h>
54 1.1 riastrad #include <dev/acpi/acpivar.h>
55 1.1 riastrad #include <dev/acpi/apei_bertvar.h>
56 1.1 riastrad #include <dev/acpi/apei_cper.h>
57 1.1 riastrad #include <dev/acpi/apei_einjvar.h>
58 1.1 riastrad #include <dev/acpi/apei_erstvar.h>
59 1.1 riastrad #include <dev/acpi/apei_hestvar.h>
60 1.1 riastrad #include <dev/acpi/apei_interp.h>
61 1.1 riastrad #include <dev/acpi/apeivar.h>
62 1.7 riastrad #include <dev/pci/pcireg.h>
63 1.1 riastrad
64 1.1 riastrad #define _COMPONENT ACPI_RESOURCE_COMPONENT
65 1.1 riastrad ACPI_MODULE_NAME ("apei")
66 1.1 riastrad
67 1.1 riastrad static int apei_match(device_t, cfdata_t, void *);
68 1.1 riastrad static void apei_attach(device_t, device_t, void *);
69 1.1 riastrad static int apei_detach(device_t, int);
70 1.1 riastrad
71 1.1 riastrad static void apei_get_tables(struct apei_tab *);
72 1.1 riastrad static void apei_put_tables(struct apei_tab *);
73 1.1 riastrad
74 1.1 riastrad static void apei_identify(struct apei_softc *, const char *,
75 1.1 riastrad const ACPI_TABLE_HEADER *);
76 1.1 riastrad
77 1.1 riastrad CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
78 1.1 riastrad apei_match, apei_attach, apei_detach, NULL);
79 1.1 riastrad
80 1.1 riastrad static int
81 1.1 riastrad apei_match(device_t parent, cfdata_t match, void *aux)
82 1.1 riastrad {
83 1.1 riastrad struct apei_tab tab;
84 1.1 riastrad int prio = 0;
85 1.1 riastrad
86 1.1 riastrad /*
87 1.1 riastrad * If we have any of the APEI tables, match.
88 1.1 riastrad */
89 1.1 riastrad apei_get_tables(&tab);
90 1.1 riastrad if (tab.bert || tab.einj || tab.erst || tab.hest)
91 1.1 riastrad prio = 1;
92 1.1 riastrad apei_put_tables(&tab);
93 1.1 riastrad
94 1.1 riastrad return prio;
95 1.1 riastrad }
96 1.1 riastrad
97 1.1 riastrad static void
98 1.1 riastrad apei_attach(device_t parent, device_t self, void *aux)
99 1.1 riastrad {
100 1.1 riastrad struct apei_softc *sc = device_private(self);
101 1.1 riastrad const struct sysctlnode *sysctl_hw_acpi;
102 1.1 riastrad int error;
103 1.1 riastrad
104 1.1 riastrad aprint_naive("\n");
105 1.1 riastrad aprint_normal(": ACPI Platform Error Interface\n");
106 1.1 riastrad
107 1.1 riastrad pmf_device_register(self, NULL, NULL);
108 1.1 riastrad
109 1.1 riastrad sc->sc_dev = self;
110 1.1 riastrad apei_get_tables(&sc->sc_tab);
111 1.1 riastrad
112 1.1 riastrad /*
113 1.1 riastrad * Get the sysctl hw.acpi node. This should already be created
114 1.1 riastrad * but I don't see an easy way to get at it. If this fails,
115 1.1 riastrad * something is seriously wrong, so let's stop here.
116 1.1 riastrad */
117 1.1 riastrad error = sysctl_createv(&sc->sc_sysctllog, 0,
118 1.1 riastrad NULL, &sysctl_hw_acpi, 0,
119 1.1 riastrad CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
120 1.1 riastrad CTL_HW, CTL_CREATE, CTL_EOL);
121 1.1 riastrad if (error) {
122 1.1 riastrad aprint_error_dev(sc->sc_dev,
123 1.1 riastrad "failed to create sysctl hw.acpi: %d\n", error);
124 1.1 riastrad return;
125 1.1 riastrad }
126 1.1 riastrad
127 1.1 riastrad /*
128 1.1 riastrad * Create sysctl hw.acpi.apei.
129 1.1 riastrad */
130 1.1 riastrad error = sysctl_createv(&sc->sc_sysctllog, 0,
131 1.1 riastrad &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
132 1.1 riastrad CTLTYPE_NODE, "apei",
133 1.1 riastrad SYSCTL_DESCR("ACPI Platform Error Interface"),
134 1.1 riastrad NULL, 0, NULL, 0,
135 1.1 riastrad CTL_CREATE, CTL_EOL);
136 1.1 riastrad if (error) {
137 1.1 riastrad aprint_error_dev(sc->sc_dev,
138 1.1 riastrad "failed to create sysctl hw.acpi.apei: %d\n", error);
139 1.1 riastrad return;
140 1.1 riastrad }
141 1.1 riastrad
142 1.1 riastrad /*
143 1.1 riastrad * Set up BERT, EINJ, ERST, and HEST.
144 1.1 riastrad */
145 1.1 riastrad if (sc->sc_tab.bert) {
146 1.1 riastrad apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
147 1.1 riastrad apei_bert_attach(sc);
148 1.1 riastrad }
149 1.1 riastrad if (sc->sc_tab.einj) {
150 1.1 riastrad apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
151 1.1 riastrad apei_einj_attach(sc);
152 1.1 riastrad }
153 1.1 riastrad if (sc->sc_tab.erst) {
154 1.1 riastrad apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
155 1.1 riastrad apei_erst_attach(sc);
156 1.1 riastrad }
157 1.1 riastrad if (sc->sc_tab.hest) {
158 1.1 riastrad apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
159 1.1 riastrad apei_hest_attach(sc);
160 1.1 riastrad }
161 1.1 riastrad }
162 1.1 riastrad
163 1.1 riastrad static int
164 1.1 riastrad apei_detach(device_t self, int flags)
165 1.1 riastrad {
166 1.1 riastrad struct apei_softc *sc = device_private(self);
167 1.1 riastrad int error;
168 1.1 riastrad
169 1.1 riastrad /*
170 1.1 riastrad * Detach children. We don't currently have any but this is
171 1.1 riastrad * harmless without children and mandatory if we ever sprouted
172 1.1 riastrad * them, so let's just leave it here for good measure.
173 1.1 riastrad *
174 1.1 riastrad * After this point, we are committed to detaching; failure is
175 1.1 riastrad * forbidden.
176 1.1 riastrad */
177 1.1 riastrad error = config_detach_children(self, flags);
178 1.1 riastrad if (error)
179 1.1 riastrad return error;
180 1.1 riastrad
181 1.1 riastrad /*
182 1.1 riastrad * Tear down all the sysctl nodes first, before the software
183 1.1 riastrad * state backing them goes away.
184 1.1 riastrad */
185 1.1 riastrad sysctl_teardown(&sc->sc_sysctllog);
186 1.1 riastrad sc->sc_sysctlroot = NULL;
187 1.1 riastrad
188 1.1 riastrad /*
189 1.1 riastrad * Detach the software state for the APEI tables.
190 1.1 riastrad */
191 1.1 riastrad if (sc->sc_tab.hest)
192 1.1 riastrad apei_hest_detach(sc);
193 1.1 riastrad if (sc->sc_tab.erst)
194 1.1 riastrad apei_erst_detach(sc);
195 1.1 riastrad if (sc->sc_tab.einj)
196 1.1 riastrad apei_einj_detach(sc);
197 1.1 riastrad if (sc->sc_tab.bert)
198 1.1 riastrad apei_bert_detach(sc);
199 1.1 riastrad
200 1.1 riastrad /*
201 1.1 riastrad * Release the APEI tables and we're done.
202 1.1 riastrad */
203 1.1 riastrad apei_put_tables(&sc->sc_tab);
204 1.1 riastrad pmf_device_deregister(self);
205 1.1 riastrad return 0;
206 1.1 riastrad }
207 1.1 riastrad
208 1.1 riastrad /*
209 1.1 riastrad * apei_get_tables(tab)
210 1.1 riastrad *
211 1.1 riastrad * Get references to whichever APEI-related tables -- BERT, EINJ,
212 1.1 riastrad * ERST, HEST -- are available in the system.
213 1.1 riastrad */
214 1.1 riastrad static void
215 1.1 riastrad apei_get_tables(struct apei_tab *tab)
216 1.1 riastrad {
217 1.1 riastrad ACPI_STATUS rv;
218 1.1 riastrad
219 1.1 riastrad /*
220 1.1 riastrad * Probe the BERT -- Boot Error Record Table.
221 1.1 riastrad */
222 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
223 1.1 riastrad if (ACPI_FAILURE(rv))
224 1.1 riastrad tab->bert = NULL;
225 1.1 riastrad
226 1.1 riastrad /*
227 1.1 riastrad * Probe the EINJ -- Error Injection Table.
228 1.1 riastrad */
229 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
230 1.1 riastrad if (ACPI_FAILURE(rv))
231 1.1 riastrad tab->einj = NULL;
232 1.1 riastrad
233 1.1 riastrad /*
234 1.1 riastrad * Probe the ERST -- Error Record Serialization Table.
235 1.1 riastrad */
236 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
237 1.1 riastrad if (ACPI_FAILURE(rv))
238 1.1 riastrad tab->erst = NULL;
239 1.1 riastrad
240 1.1 riastrad /*
241 1.1 riastrad * Probe the HEST -- Hardware Error Source Table.
242 1.1 riastrad */
243 1.1 riastrad rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
244 1.1 riastrad if (ACPI_FAILURE(rv))
245 1.1 riastrad tab->hest = NULL;
246 1.1 riastrad }
247 1.1 riastrad
248 1.1 riastrad /*
249 1.1 riastrad * apei_put_tables(tab)
250 1.1 riastrad *
251 1.1 riastrad * Release the tables acquired by apei_get_tables.
252 1.1 riastrad */
253 1.1 riastrad static void
254 1.1 riastrad apei_put_tables(struct apei_tab *tab)
255 1.1 riastrad {
256 1.1 riastrad
257 1.1 riastrad if (tab->bert != NULL) {
258 1.1 riastrad AcpiPutTable(&tab->bert->Header);
259 1.1 riastrad tab->bert = NULL;
260 1.1 riastrad }
261 1.1 riastrad if (tab->einj != NULL) {
262 1.1 riastrad AcpiPutTable(&tab->einj->Header);
263 1.1 riastrad tab->einj = NULL;
264 1.1 riastrad }
265 1.1 riastrad if (tab->erst != NULL) {
266 1.1 riastrad AcpiPutTable(&tab->erst->Header);
267 1.1 riastrad tab->erst = NULL;
268 1.1 riastrad }
269 1.1 riastrad if (tab->hest != NULL) {
270 1.1 riastrad AcpiPutTable(&tab->hest->Header);
271 1.1 riastrad tab->hest = NULL;
272 1.1 riastrad }
273 1.1 riastrad }
274 1.1 riastrad
275 1.1 riastrad /*
276 1.1 riastrad * apei_identify(sc, name, header)
277 1.1 riastrad *
278 1.1 riastrad * Identify the APEI-related table header for dmesg.
279 1.1 riastrad */
280 1.1 riastrad static void
281 1.1 riastrad apei_identify(struct apei_softc *sc, const char *name,
282 1.1 riastrad const ACPI_TABLE_HEADER *h)
283 1.1 riastrad {
284 1.1 riastrad
285 1.1 riastrad aprint_normal_dev(sc->sc_dev, "%s:"
286 1.1 riastrad " OemId <%6.6s,%8.8s,%08x>"
287 1.1 riastrad " AslId <%4.4s,%08x>\n",
288 1.1 riastrad name,
289 1.1 riastrad h->OemId, h->OemTableId, h->OemRevision,
290 1.1 riastrad h->AslCompilerId, h->AslCompilerRevision);
291 1.1 riastrad }
292 1.1 riastrad
293 1.1 riastrad /*
294 1.1 riastrad * apei_cper_guid_dec(buf, uuid)
295 1.1 riastrad *
296 1.1 riastrad * Decode a Common Platform Error Record UUID/GUID from an ACPI
297 1.1 riastrad * table at buf into a sys/uuid.h struct uuid.
298 1.1 riastrad */
299 1.1 riastrad static void
300 1.1 riastrad apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
301 1.1 riastrad {
302 1.1 riastrad
303 1.1 riastrad uuid_dec_le(buf, uuid);
304 1.1 riastrad }
305 1.1 riastrad
306 1.1 riastrad /*
307 1.1 riastrad * apei_format_guid(uuid, s)
308 1.1 riastrad *
309 1.1 riastrad * Format a UUID as a string. This uses C initializer notation,
310 1.3 rillig * not UUID notation, in order to match the text in the UEFI
311 1.1 riastrad * specification.
312 1.1 riastrad */
313 1.1 riastrad static void
314 1.1 riastrad apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
315 1.1 riastrad {
316 1.1 riastrad
317 1.1 riastrad snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
318 1.4 riastrad "{0x%02x,%02x,"
319 1.4 riastrad "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
320 1.1 riastrad uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
321 1.4 riastrad uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
322 1.1 riastrad uuid->node[0], uuid->node[1], uuid->node[2],
323 1.1 riastrad uuid->node[3], uuid->node[4], uuid->node[5]);
324 1.1 riastrad }
325 1.1 riastrad
326 1.1 riastrad /*
327 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
328 1.1 riastrad */
329 1.1 riastrad
330 1.1 riastrad static const char *const cper_memory_error_type[] = {
331 1.1 riastrad #define F(LN, SN, V) [LN] = #SN,
332 1.1 riastrad CPER_MEMORY_ERROR_TYPES(F)
333 1.1 riastrad #undef F
334 1.1 riastrad };
335 1.1 riastrad
336 1.1 riastrad /*
337 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
338 1.1 riastrad *
339 1.1 riastrad * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
340 1.1 riastrad * but are designated as being intended for Generic Error Data Entries
341 1.1 riastrad * rather than Generic Error Status Blocks.
342 1.1 riastrad */
343 1.1 riastrad static const char *const apei_gesb_severity[] = {
344 1.1 riastrad [0] = "recoverable",
345 1.1 riastrad [1] = "fatal",
346 1.1 riastrad [2] = "corrected",
347 1.1 riastrad [3] = "none",
348 1.1 riastrad };
349 1.1 riastrad
350 1.1 riastrad /*
351 1.1 riastrad * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
352 1.1 riastrad */
353 1.1 riastrad static const char *const apei_gede_severity[] = {
354 1.1 riastrad [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
355 1.1 riastrad [ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
356 1.1 riastrad [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
357 1.1 riastrad [ACPI_HEST_GEN_ERROR_NONE] = "none",
358 1.1 riastrad };
359 1.1 riastrad
360 1.1 riastrad /*
361 1.6 riastrad * N.2.5. Memory Error Section
362 1.6 riastrad *
363 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
364 1.1 riastrad */
365 1.1 riastrad static const struct uuid CPER_MEMORY_ERROR_SECTION =
366 1.1 riastrad {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
367 1.1 riastrad
368 1.1 riastrad static void
369 1.1 riastrad apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
370 1.5 riastrad size_t len, const char *ctx, bool ratelimitok)
371 1.1 riastrad {
372 1.1 riastrad const struct cper_memory_error *ME = buf;
373 1.1 riastrad char bitbuf[1024];
374 1.1 riastrad
375 1.5 riastrad /*
376 1.5 riastrad * If we've hit the rate limit, skip printing the error.
377 1.5 riastrad */
378 1.5 riastrad if (!ratelimitok)
379 1.5 riastrad goto out;
380 1.5 riastrad
381 1.1 riastrad snprintb(bitbuf, sizeof(bitbuf),
382 1.1 riastrad CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
383 1.1 riastrad aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
384 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
385 1.1 riastrad /*
386 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
387 1.1 riastrad */
388 1.1 riastrad /* XXX define this format somewhere */
389 1.1 riastrad snprintb(bitbuf, sizeof(bitbuf), "\177\020"
390 1.1 riastrad "f\010\010" "ErrorType\0"
391 1.1 riastrad "=\001" "ERR_INTERNAL\0"
392 1.1 riastrad "=\004" "ERR_MEM\0"
393 1.1 riastrad "=\005" "ERR_TLB\0"
394 1.1 riastrad "=\006" "ERR_CACHE\0"
395 1.1 riastrad "=\007" "ERR_FUNCTION\0"
396 1.1 riastrad "=\010" "ERR_SELFTEST\0"
397 1.1 riastrad "=\011" "ERR_FLOW\0"
398 1.1 riastrad "=\020" "ERR_BUS\0"
399 1.1 riastrad "=\021" "ERR_MAP\0"
400 1.1 riastrad "=\022" "ERR_IMPROPER\0"
401 1.1 riastrad "=\023" "ERR_UNIMPL\0"
402 1.1 riastrad "=\024" "ERR_LOL\0"
403 1.1 riastrad "=\025" "ERR_RESPONSE\0"
404 1.1 riastrad "=\026" "ERR_PARITY\0"
405 1.1 riastrad "=\027" "ERR_PROTOCOL\0"
406 1.1 riastrad "=\030" "ERR_ERROR\0"
407 1.1 riastrad "=\031" "ERR_TIMEOUT\0"
408 1.1 riastrad "=\032" "ERR_POISONED\0"
409 1.1 riastrad "b\020" "AddressError\0"
410 1.1 riastrad "b\021" "ControlError\0"
411 1.1 riastrad "b\022" "DataError\0"
412 1.1 riastrad "b\023" "ResponderDetected\0"
413 1.1 riastrad "b\024" "RequesterDetected\0"
414 1.1 riastrad "b\025" "FirstError\0"
415 1.1 riastrad "b\026" "Overflow\0"
416 1.1 riastrad "\0", ME->ErrorStatus);
417 1.1 riastrad device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
418 1.1 riastrad }
419 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
420 1.1 riastrad device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
421 1.1 riastrad ctx, ME->PhysicalAddress);
422 1.1 riastrad }
423 1.1 riastrad if (ME->ValidationBits &
424 1.1 riastrad CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
425 1.1 riastrad device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
426 1.1 riastrad "\n", ctx, ME->PhysicalAddressMask);
427 1.1 riastrad }
428 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
429 1.1 riastrad device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
430 1.1 riastrad ME->Node);
431 1.1 riastrad }
432 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
433 1.1 riastrad device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
434 1.1 riastrad ME->Card);
435 1.1 riastrad }
436 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
437 1.1 riastrad device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
438 1.1 riastrad ME->Module);
439 1.1 riastrad }
440 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
441 1.1 riastrad device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
442 1.1 riastrad ME->Bank);
443 1.1 riastrad }
444 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
445 1.1 riastrad device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
446 1.1 riastrad ME->Device);
447 1.1 riastrad }
448 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
449 1.1 riastrad device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
450 1.1 riastrad ME->Row);
451 1.1 riastrad }
452 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
453 1.1 riastrad device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
454 1.1 riastrad ME->Column);
455 1.1 riastrad }
456 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
457 1.1 riastrad device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
458 1.1 riastrad ctx, ME->BitPosition);
459 1.1 riastrad }
460 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
461 1.1 riastrad device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
462 1.1 riastrad ctx, ME->RequestorId);
463 1.1 riastrad }
464 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
465 1.1 riastrad device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
466 1.1 riastrad ctx, ME->ResponderId);
467 1.1 riastrad }
468 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
469 1.1 riastrad device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
470 1.1 riastrad ctx, ME->TargetId);
471 1.1 riastrad }
472 1.1 riastrad if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
473 1.1 riastrad const uint8_t t = ME->MemoryErrorType;
474 1.1 riastrad const char *n = t < __arraycount(cper_memory_error_type)
475 1.1 riastrad ? cper_memory_error_type[t] : NULL;
476 1.1 riastrad
477 1.1 riastrad if (n) {
478 1.1 riastrad device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
479 1.1 riastrad " (%s)\n", ctx, t, n);
480 1.1 riastrad } else {
481 1.1 riastrad device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
482 1.1 riastrad ctx, t);
483 1.1 riastrad }
484 1.1 riastrad }
485 1.5 riastrad
486 1.5 riastrad out: /*
487 1.5 riastrad * XXX pass this through to uvm(9) or userland for decisions
488 1.5 riastrad * like page retirement
489 1.5 riastrad */
490 1.5 riastrad return;
491 1.1 riastrad }
492 1.1 riastrad
493 1.1 riastrad /*
494 1.7 riastrad * N.2.7. PCI Express Error Section
495 1.7 riastrad *
496 1.7 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section
497 1.7 riastrad */
498 1.7 riastrad static const struct uuid CPER_PCIE_ERROR_SECTION =
499 1.7 riastrad {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}};
500 1.7 riastrad
501 1.7 riastrad static const char *const cper_pcie_error_port_type[] = {
502 1.7 riastrad #define F(LN, SN, V) [LN] = #SN,
503 1.7 riastrad CPER_PCIE_ERROR_PORT_TYPES(F)
504 1.7 riastrad #undef F
505 1.7 riastrad };
506 1.7 riastrad
507 1.7 riastrad static void
508 1.7 riastrad apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len,
509 1.7 riastrad const char *ctx, bool ratelimitok)
510 1.7 riastrad {
511 1.7 riastrad const struct cper_pcie_error *PE = buf;
512 1.7 riastrad char bitbuf[1024];
513 1.7 riastrad
514 1.7 riastrad /*
515 1.7 riastrad * If we've hit the rate limit, skip printing the error.
516 1.7 riastrad */
517 1.7 riastrad if (!ratelimitok)
518 1.7 riastrad goto out;
519 1.7 riastrad
520 1.7 riastrad snprintb(bitbuf, sizeof(bitbuf),
521 1.7 riastrad CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits);
522 1.7 riastrad aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
523 1.7 riastrad if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) {
524 1.7 riastrad const uint32_t t = PE->PortType;
525 1.7 riastrad const char *n = t < __arraycount(cper_pcie_error_port_type)
526 1.7 riastrad ? cper_pcie_error_port_type[t] : NULL;
527 1.7 riastrad
528 1.7 riastrad if (n) {
529 1.7 riastrad device_printf(sc->sc_dev, "%s: PortType=%"PRIu32
530 1.7 riastrad " (%s)\n", ctx, t, n);
531 1.7 riastrad } else {
532 1.7 riastrad device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n",
533 1.7 riastrad ctx, t);
534 1.7 riastrad }
535 1.7 riastrad }
536 1.7 riastrad if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) {
537 1.7 riastrad /* XXX BCD */
538 1.7 riastrad device_printf(sc->sc_dev, "%s: Version=0x08%"PRIx32"\n",
539 1.7 riastrad ctx, PE->Version);
540 1.7 riastrad }
541 1.7 riastrad if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) {
542 1.7 riastrad /* XXX move me to pcireg.h */
543 1.7 riastrad snprintb(bitbuf, sizeof(bitbuf), "\177\020"
544 1.7 riastrad /* command */
545 1.7 riastrad "b\000" "IO_ENABLE\0"
546 1.7 riastrad "b\001" "MEM_ENABLE\0"
547 1.7 riastrad "b\002" "MASTER_ENABLE\0"
548 1.7 riastrad "b\003" "SPECIAL_ENABLE\0"
549 1.7 riastrad "b\004" "INVALIDATE_ENABLE\0"
550 1.7 riastrad "b\005" "PALETTE_ENABLE\0"
551 1.7 riastrad "b\006" "PARITY_ENABLE\0"
552 1.7 riastrad "b\007" "STEPPING_ENABLE\0"
553 1.7 riastrad "b\010" "SERR_ENABLE\0"
554 1.7 riastrad "b\011" "BACKTOBACK_ENABLE\0"
555 1.7 riastrad "b\012" "INTERRUPT_DISABLE\0"
556 1.7 riastrad /* status */
557 1.7 riastrad "b\023" "INT_STATUS\0"
558 1.7 riastrad "b\024" "CAPLIST_SUPPORT\0"
559 1.7 riastrad "b\025" "66MHZ_SUPPORT\0"
560 1.7 riastrad "b\026" "UDF_SUPPORT\0"
561 1.7 riastrad "b\027" "BACKTOBACK_SUPPORT\0"
562 1.7 riastrad "b\030" "PARITY_ERROR\0"
563 1.7 riastrad "f\031\002" "DEVSEL\0"
564 1.7 riastrad "=\000" "FAST\0"
565 1.7 riastrad "=\001" "MEDIUM\0"
566 1.7 riastrad "=\002" "SLOW\0"
567 1.7 riastrad "b\033" "TARGET_TARGET_ABORT\0"
568 1.7 riastrad "b\034" "MASTER_TARGET_ABORT\0"
569 1.7 riastrad "b\035" "MASTER_ABORT\0"
570 1.7 riastrad "b\036" "SPECIAL_ERROR\0"
571 1.7 riastrad "b\037" "PARITY_DETECT\0"
572 1.7 riastrad "\0", PE->CommandStatus);
573 1.7 riastrad device_printf(sc->sc_dev, "%s: CommandStatus=%s\n",
574 1.7 riastrad ctx, bitbuf);
575 1.7 riastrad }
576 1.7 riastrad if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) {
577 1.7 riastrad device_printf(sc->sc_dev, "%s: DeviceID:"
578 1.7 riastrad " VendorID=0x%04"PRIx16
579 1.7 riastrad " DeviceID=0x%04"PRIx16
580 1.7 riastrad " ClassCode=0x%06"PRIx32
581 1.7 riastrad " Function=%"PRIu8
582 1.7 riastrad " Device=%"PRIu8
583 1.7 riastrad " Segment=%"PRIu16
584 1.7 riastrad " Bus=%"PRIu8
585 1.7 riastrad " SecondaryBus=%"PRIu8
586 1.7 riastrad " Slot=0x%04"PRIx16
587 1.7 riastrad " Reserved0=0x%02"PRIx8
588 1.7 riastrad "\n",
589 1.7 riastrad ctx,
590 1.7 riastrad le16dec(PE->DeviceID.VendorID),
591 1.7 riastrad le16dec(PE->DeviceID.DeviceID),
592 1.7 riastrad (PE->DeviceID.ClassCode[0] | /* le24dec */
593 1.7 riastrad ((uint32_t)PE->DeviceID.ClassCode[1] << 8) |
594 1.7 riastrad ((uint32_t)PE->DeviceID.ClassCode[2] << 16)),
595 1.7 riastrad PE->DeviceID.Function, PE->DeviceID.Device,
596 1.7 riastrad le16dec(PE->DeviceID.Segment), PE->DeviceID.Bus,
597 1.7 riastrad PE->DeviceID.SecondaryBus, le16dec(PE->DeviceID.Slot),
598 1.7 riastrad PE->DeviceID.Reserved0);
599 1.7 riastrad }
600 1.7 riastrad if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) {
601 1.7 riastrad device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n",
602 1.7 riastrad ctx, PE->DeviceSerial);
603 1.7 riastrad }
604 1.7 riastrad if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) {
605 1.7 riastrad /* XXX snprintb */
606 1.7 riastrad device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32
607 1.7 riastrad "\n", ctx, PE->BridgeControlStatus);
608 1.7 riastrad }
609 1.7 riastrad if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) {
610 1.7 riastrad uint32_t dcsr, dsr;
611 1.8 riastrad char hex[9*sizeof(PE->CapabilityStructure)/4];
612 1.7 riastrad unsigned i;
613 1.7 riastrad
614 1.8 riastrad /*
615 1.8 riastrad * Display a hex dump of each 32-bit register in the
616 1.8 riastrad * PCIe capability structure.
617 1.8 riastrad */
618 1.8 riastrad __CTASSERT(sizeof(PE->CapabilityStructure) % 4 == 0);
619 1.8 riastrad for (i = 0; i < sizeof(PE->CapabilityStructure)/4; i++) {
620 1.8 riastrad snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
621 1.8 riastrad le32dec(&PE->CapabilityStructure[4*i]));
622 1.7 riastrad }
623 1.8 riastrad hex[sizeof(hex) - 1] = '\0';
624 1.7 riastrad device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n",
625 1.7 riastrad ctx, hex);
626 1.7 riastrad
627 1.8 riastrad /*
628 1.8 riastrad * If the Device Status Register has any bits set,
629 1.8 riastrad * highlight it in particular -- these are probably
630 1.8 riastrad * error bits.
631 1.8 riastrad */
632 1.7 riastrad dcsr = le32dec(&PE->CapabilityStructure[PCIE_DCSR]);
633 1.7 riastrad dsr = __SHIFTOUT(dcsr, __BITS(31,16));
634 1.7 riastrad if (dsr != 0) {
635 1.7 riastrad /*
636 1.7 riastrad * XXX move me to pcireg.h; note: high
637 1.7 riastrad * half of DCSR
638 1.7 riastrad */
639 1.7 riastrad snprintb(bitbuf, sizeof(bitbuf), "\177\020"
640 1.7 riastrad "b\000" "CORRECTABLE_ERROR\0"
641 1.7 riastrad "b\001" "NONFATAL_UNCORRECTABLE_ERROR\0"
642 1.7 riastrad "b\002" "FATAL_ERROR\0"
643 1.7 riastrad "b\003" "UNSUPPORTED_REQUEST\0"
644 1.7 riastrad "b\004" "AUX_POWER\0"
645 1.7 riastrad "b\005" "TRANSACTIONS_PENDING\0"
646 1.7 riastrad "\0", dsr);
647 1.7 riastrad device_printf(sc->sc_dev, "%s: PCIe Device Status:"
648 1.7 riastrad " %s\n",
649 1.7 riastrad ctx, bitbuf);
650 1.7 riastrad }
651 1.7 riastrad }
652 1.7 riastrad if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) {
653 1.7 riastrad uint32_t uc_status, uc_sev;
654 1.7 riastrad uint32_t cor_status;
655 1.7 riastrad uint32_t control;
656 1.8 riastrad char hex[9*sizeof(PE->AERInfo)/4];
657 1.7 riastrad unsigned i;
658 1.7 riastrad
659 1.8 riastrad /*
660 1.8 riastrad * Display a hex dump of each 32-bit register in the
661 1.8 riastrad * PCIe Advanced Error Reporting extended capability
662 1.8 riastrad * structure.
663 1.8 riastrad */
664 1.8 riastrad __CTASSERT(sizeof(PE->AERInfo) % 4 == 0);
665 1.8 riastrad for (i = 0; i < sizeof(PE->AERInfo)/4; i++) {
666 1.8 riastrad snprintf(hex + 9*i, sizeof(hex) - 9*i, "%08"PRIx32" ",
667 1.8 riastrad le32dec(&PE->AERInfo[4*i]));
668 1.7 riastrad }
669 1.8 riastrad hex[sizeof(hex) - 1] = '\0';
670 1.7 riastrad device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex);
671 1.7 riastrad
672 1.7 riastrad /* XXX move me to pcireg.h */
673 1.7 riastrad #define PCI_AER_UC_STATUS_FMT "\177\020" \
674 1.7 riastrad "b\000" "UNDEFINED\0" \
675 1.7 riastrad "b\004" "DL_PROTOCOL_ERROR\0" \
676 1.7 riastrad "b\005" "SURPRISE_DOWN_ERROR\0" \
677 1.7 riastrad "b\014" "POISONED_TLP\0" \
678 1.7 riastrad "b\015" "FC_PROTOCOL_ERROR\0" \
679 1.7 riastrad "b\016" "COMPLETION_TIMEOUT\0" \
680 1.7 riastrad "b\017" "COMPLETION_ABORT\0" \
681 1.7 riastrad "b\020" "UNEXPECTED_COMPLETION\0" \
682 1.7 riastrad "b\021" "RECEIVER_OVERFLOW\0" \
683 1.7 riastrad "b\022" "MALFORMED_TLP\0" \
684 1.7 riastrad "b\023" "ECRC_ERROR\0" \
685 1.7 riastrad "b\024" "UNSUPPORTED_REQUEST_ERROR\0" \
686 1.7 riastrad "b\025" "ACS_VIOLATION\0" \
687 1.7 riastrad "b\026" "INTERNAL_ERROR\0" \
688 1.7 riastrad "b\027" "MC_BLOCKED_TLP\0" \
689 1.7 riastrad "b\030" "ATOMIC_OP_EGRESS_BLOCKED\0" \
690 1.7 riastrad "b\031" "TLP_PREFIX_BLOCKED_ERROR\0" \
691 1.7 riastrad "b\032" "POISONTLP_EGRESS_BLOCKED\0" \
692 1.7 riastrad "\0"
693 1.7 riastrad
694 1.8 riastrad /*
695 1.8 riastrad * If there are any hardware error status bits set,
696 1.8 riastrad * highlight them in particular, in three groups:
697 1.8 riastrad *
698 1.8 riastrad * - uncorrectable fatal (UC_STATUS and UC_SEVERITY)
699 1.8 riastrad * - uncorrectable nonfatal (UC_STATUS but not UC_SEVERITY)
700 1.8 riastrad * - corrected (COR_STATUS)
701 1.8 riastrad *
702 1.8 riastrad * And if there are any uncorrectable errors, show
703 1.8 riastrad * which one was reported first, according to
704 1.8 riastrad * CAP_CONTROL.
705 1.8 riastrad */
706 1.7 riastrad uc_status = le32dec(&PE->AERInfo[PCI_AER_UC_STATUS]);
707 1.7 riastrad uc_sev = le32dec(&PE->AERInfo[PCI_AER_UC_SEVERITY]);
708 1.7 riastrad cor_status = le32dec(&PE->AERInfo[PCI_AER_COR_STATUS]);
709 1.7 riastrad control = le32dec(&PE->AERInfo[PCI_AER_CAP_CONTROL]);
710 1.7 riastrad
711 1.7 riastrad if (uc_status & uc_sev) {
712 1.7 riastrad snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
713 1.7 riastrad uc_status & uc_sev);
714 1.7 riastrad device_printf(sc->sc_dev, "%s:"
715 1.7 riastrad " AER hardware fatal uncorrectable errors: %s\n",
716 1.7 riastrad ctx, bitbuf);
717 1.7 riastrad }
718 1.7 riastrad if (uc_status & ~uc_sev) {
719 1.7 riastrad snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
720 1.7 riastrad uc_status & uc_sev);
721 1.7 riastrad device_printf(sc->sc_dev, "%s:"
722 1.7 riastrad " AER hardware fatal uncorrectable errors: %s\n",
723 1.7 riastrad ctx, bitbuf);
724 1.7 riastrad }
725 1.7 riastrad if (uc_status) {
726 1.7 riastrad unsigned first = __SHIFTOUT(control,
727 1.7 riastrad PCI_AER_FIRST_ERROR_PTR);
728 1.7 riastrad snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT,
729 1.7 riastrad (uint32_t)1 << first);
730 1.7 riastrad device_printf(sc->sc_dev, "%s:"
731 1.7 riastrad " AER hardware first uncorrectable error: %s\n",
732 1.7 riastrad ctx, bitbuf);
733 1.7 riastrad }
734 1.7 riastrad if (cor_status) {
735 1.7 riastrad /* XXX move me to pcireg.h */
736 1.7 riastrad snprintb(bitbuf, sizeof(bitbuf), "\177\020"
737 1.7 riastrad "b\000" "RECEIVER_ERROR\0"
738 1.7 riastrad "b\006" "BAD_TLP\0"
739 1.7 riastrad "b\007" "BAD_DLLP\0"
740 1.7 riastrad "b\010" "REPLAY_NUM_ROLLOVER\0"
741 1.7 riastrad "b\014" "REPLAY_TIMER_TIMEOUT\0"
742 1.7 riastrad "b\015" "ADVISORY_NF_ERROR\0"
743 1.7 riastrad "b\016" "INTERNAL_ERROR\0"
744 1.7 riastrad "b\017" "HEADER_LOG_OVERFLOW\0"
745 1.7 riastrad "\0", cor_status);
746 1.7 riastrad device_printf(sc->sc_dev, "%s:"
747 1.7 riastrad " AER hardware corrected error: %s\n",
748 1.7 riastrad ctx, bitbuf);
749 1.7 riastrad }
750 1.7 riastrad }
751 1.7 riastrad
752 1.7 riastrad out: /*
753 1.7 riastrad * XXX pass this on to the PCI subsystem to handle
754 1.7 riastrad */
755 1.7 riastrad return;
756 1.7 riastrad }
757 1.7 riastrad
758 1.7 riastrad /*
759 1.1 riastrad * apei_cper_reports
760 1.1 riastrad *
761 1.1 riastrad * Table of known Common Platform Error Record types, symbolic
762 1.1 riastrad * names, minimum data lengths, and functions to report them.
763 1.1 riastrad *
764 1.1 riastrad * The section types and corresponding section layouts are listed
765 1.1 riastrad * at:
766 1.1 riastrad *
767 1.1 riastrad * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
768 1.1 riastrad */
769 1.1 riastrad static const struct apei_cper_report {
770 1.1 riastrad const char *name;
771 1.1 riastrad const struct uuid *type;
772 1.1 riastrad size_t minlength;
773 1.5 riastrad void (*func)(struct apei_softc *, const void *, size_t, const char *,
774 1.5 riastrad bool);
775 1.1 riastrad } apei_cper_reports[] = {
776 1.1 riastrad { "memory", &CPER_MEMORY_ERROR_SECTION,
777 1.1 riastrad sizeof(struct cper_memory_error),
778 1.1 riastrad apei_cper_memory_error_report },
779 1.7 riastrad { "PCIe", &CPER_PCIE_ERROR_SECTION,
780 1.7 riastrad sizeof(struct cper_pcie_error),
781 1.7 riastrad apei_cper_pcie_error_report },
782 1.1 riastrad };
783 1.1 riastrad
784 1.1 riastrad /*
785 1.5 riastrad * apei_gede_report_header(sc, gede, ctx, ratelimitok, &headerlen, &report)
786 1.1 riastrad *
787 1.1 riastrad * Report the header of the ith Generic Error Data Entry in the
788 1.5 riastrad * given context, if ratelimitok is true.
789 1.1 riastrad *
790 1.1 riastrad * Return the actual length of the header in headerlen, or 0 if
791 1.1 riastrad * not known because the revision isn't recognized.
792 1.1 riastrad *
793 1.1 riastrad * Return the report type in report, or NULL if not known because
794 1.1 riastrad * the section type isn't recognized.
795 1.1 riastrad */
796 1.1 riastrad static void
797 1.1 riastrad apei_gede_report_header(struct apei_softc *sc,
798 1.5 riastrad const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, bool ratelimitok,
799 1.1 riastrad size_t *headerlenp, const struct apei_cper_report **reportp)
800 1.1 riastrad {
801 1.1 riastrad const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
802 1.1 riastrad struct uuid sectype;
803 1.1 riastrad char guidstr[69];
804 1.1 riastrad char buf[128];
805 1.1 riastrad unsigned i;
806 1.1 riastrad
807 1.1 riastrad /*
808 1.1 riastrad * Print the section type as a C initializer. It would be
809 1.1 riastrad * prettier to use standard hyphenated UUID notation, but that
810 1.1 riastrad * notation is slightly ambiguous here (two octets could be
811 1.1 riastrad * written either way, depending on Microsoft convention --
812 1.1 riastrad * which influenced ACPI and UEFI -- or internet convention),
813 1.1 riastrad * and the UEFI spec writes the C initializer notation, so this
814 1.1 riastrad * makes it easier to search for.
815 1.1 riastrad *
816 1.1 riastrad * Also print out a symbolic name, if we know it.
817 1.1 riastrad */
818 1.1 riastrad apei_cper_guid_dec(gede->SectionType, §ype);
819 1.1 riastrad apei_format_guid(§ype, guidstr);
820 1.1 riastrad for (i = 0; i < __arraycount(apei_cper_reports); i++) {
821 1.1 riastrad const struct apei_cper_report *const report =
822 1.1 riastrad &apei_cper_reports[i];
823 1.1 riastrad
824 1.1 riastrad if (memcmp(§ype, report->type, sizeof(sectype)) != 0)
825 1.1 riastrad continue;
826 1.5 riastrad if (ratelimitok) {
827 1.5 riastrad device_printf(sc->sc_dev, "%s:"
828 1.5 riastrad " SectionType=%s (%s error)\n",
829 1.5 riastrad ctx, guidstr, report->name);
830 1.5 riastrad }
831 1.1 riastrad *reportp = report;
832 1.1 riastrad break;
833 1.1 riastrad }
834 1.1 riastrad if (i == __arraycount(apei_cper_reports)) {
835 1.5 riastrad if (ratelimitok) {
836 1.5 riastrad device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
837 1.5 riastrad guidstr);
838 1.5 riastrad }
839 1.1 riastrad *reportp = NULL;
840 1.1 riastrad }
841 1.1 riastrad
842 1.1 riastrad /*
843 1.1 riastrad * Print the numeric severity and, if we have it, a symbolic
844 1.1 riastrad * name for it.
845 1.1 riastrad */
846 1.5 riastrad if (ratelimitok) {
847 1.5 riastrad device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n",
848 1.5 riastrad ctx,
849 1.5 riastrad gede->ErrorSeverity,
850 1.5 riastrad (gede->ErrorSeverity < __arraycount(apei_gede_severity)
851 1.5 riastrad ? apei_gede_severity[gede->ErrorSeverity]
852 1.5 riastrad : "unknown"));
853 1.5 riastrad }
854 1.1 riastrad
855 1.1 riastrad /*
856 1.1 riastrad * The Revision may not often be useful, but this is only ever
857 1.1 riastrad * shown at the time of a hardware error report, not something
858 1.1 riastrad * you can glean at your convenience with acpidump. So print
859 1.1 riastrad * it anyway.
860 1.1 riastrad */
861 1.5 riastrad if (ratelimitok) {
862 1.5 riastrad device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
863 1.5 riastrad gede->Revision);
864 1.5 riastrad }
865 1.1 riastrad
866 1.1 riastrad /*
867 1.1 riastrad * Don't touch anything past the Revision until we've
868 1.1 riastrad * determined we understand it. Return the header length to
869 1.1 riastrad * the caller, or return zero -- and stop here -- if we don't
870 1.1 riastrad * know what the actual header length is.
871 1.1 riastrad */
872 1.1 riastrad if (gede->Revision < 0x0300) {
873 1.1 riastrad *headerlenp = sizeof(*gede);
874 1.1 riastrad } else if (gede->Revision < 0x0400) {
875 1.1 riastrad *headerlenp = sizeof(*gede_v3);
876 1.1 riastrad } else {
877 1.1 riastrad *headerlenp = 0;
878 1.1 riastrad return;
879 1.1 riastrad }
880 1.1 riastrad
881 1.1 riastrad /*
882 1.1 riastrad * Print the validation bits at debug level. Only really
883 1.1 riastrad * helpful if there are bits we _don't_ know about.
884 1.1 riastrad */
885 1.5 riastrad if (ratelimitok) {
886 1.5 riastrad /* XXX define this format somewhere */
887 1.5 riastrad snprintb(buf, sizeof(buf), "\177\020"
888 1.5 riastrad "b\000" "FRU_ID\0"
889 1.5 riastrad "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */
890 1.5 riastrad "b\002" "TIMESTAMP\0"
891 1.5 riastrad "\0", gede->ValidationBits);
892 1.5 riastrad aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx,
893 1.5 riastrad buf);
894 1.5 riastrad }
895 1.1 riastrad
896 1.1 riastrad /*
897 1.1 riastrad * Print the CPER section flags.
898 1.1 riastrad */
899 1.5 riastrad if (ratelimitok) {
900 1.5 riastrad snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT,
901 1.5 riastrad gede->Flags);
902 1.5 riastrad device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
903 1.5 riastrad }
904 1.1 riastrad
905 1.1 riastrad /*
906 1.1 riastrad * The ErrorDataLength is unlikely to be useful for the log, so
907 1.1 riastrad * print it at debug level only.
908 1.1 riastrad */
909 1.5 riastrad if (ratelimitok) {
910 1.5 riastrad aprint_debug_dev(sc->sc_dev, "%s:"
911 1.5 riastrad " ErrorDataLength=0x%"PRIu32"\n",
912 1.5 riastrad ctx, gede->ErrorDataLength);
913 1.5 riastrad }
914 1.1 riastrad
915 1.1 riastrad /*
916 1.1 riastrad * Print the FRU Id and text, if available.
917 1.1 riastrad */
918 1.5 riastrad if (ratelimitok &&
919 1.5 riastrad (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) != 0) {
920 1.1 riastrad struct uuid fruid;
921 1.1 riastrad
922 1.1 riastrad apei_cper_guid_dec(gede->FruId, &fruid);
923 1.1 riastrad apei_format_guid(&fruid, guidstr);
924 1.1 riastrad device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
925 1.1 riastrad }
926 1.5 riastrad if (ratelimitok &&
927 1.5 riastrad (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) != 0) {
928 1.1 riastrad device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
929 1.1 riastrad ctx, gede->FruText);
930 1.1 riastrad }
931 1.1 riastrad
932 1.1 riastrad /*
933 1.1 riastrad * Print the timestamp, if available by the revision number and
934 1.1 riastrad * the validation bits.
935 1.1 riastrad */
936 1.5 riastrad if (ratelimitok &&
937 1.5 riastrad gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
938 1.1 riastrad gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
939 1.1 riastrad const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
940 1.1 riastrad const uint8_t s = t[0];
941 1.1 riastrad const uint8_t m = t[1];
942 1.1 riastrad const uint8_t h = t[2];
943 1.1 riastrad const uint8_t f = t[3];
944 1.1 riastrad const uint8_t D = t[4];
945 1.1 riastrad const uint8_t M = t[5];
946 1.1 riastrad const uint8_t Y = t[6];
947 1.1 riastrad const uint8_t C = t[7];
948 1.1 riastrad
949 1.1 riastrad device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
950 1.1 riastrad " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
951 1.1 riastrad ctx, gede_v3->TimeStamp,
952 1.1 riastrad C,Y, M, D, h,m,s,
953 1.1 riastrad f & __BIT(0) ? " (event time)" : " (collect time)");
954 1.1 riastrad }
955 1.1 riastrad }
956 1.1 riastrad
957 1.1 riastrad /*
958 1.5 riastrad * apei_gesb_ratelimit
959 1.5 riastrad *
960 1.5 riastrad * State to limit the rate of console log messages about hardware
961 1.5 riastrad * errors. For each of the four severity levels in a Generic
962 1.5 riastrad * Error Status Block,
963 1.5 riastrad *
964 1.5 riastrad * 0 - Recoverable (uncorrectable),
965 1.5 riastrad * 1 - Fatal (uncorrectable),
966 1.5 riastrad * 2 - Corrected, and
967 1.5 riastrad * 3 - None (including ill-formed errors),
968 1.5 riastrad *
969 1.5 riastrad * we record the last time it happened, protected by a CPU simple
970 1.5 riastrad * lock that we only try-acquire so it is safe to use in any
971 1.5 riastrad * context, including non-maskable interrupt context.
972 1.5 riastrad */
973 1.5 riastrad
974 1.5 riastrad static struct {
975 1.5 riastrad __cpu_simple_lock_t lock;
976 1.5 riastrad struct timeval lasttime;
977 1.5 riastrad volatile uint32_t suppressed;
978 1.5 riastrad } __aligned(COHERENCY_UNIT) apei_gesb_ratelimit[4] __cacheline_aligned = {
979 1.5 riastrad [ACPI_HEST_GEN_ERROR_RECOVERABLE] = { .lock = __SIMPLELOCK_UNLOCKED },
980 1.5 riastrad [ACPI_HEST_GEN_ERROR_FATAL] = { .lock = __SIMPLELOCK_UNLOCKED },
981 1.5 riastrad [ACPI_HEST_GEN_ERROR_CORRECTED] = { .lock = __SIMPLELOCK_UNLOCKED },
982 1.5 riastrad [ACPI_HEST_GEN_ERROR_NONE] = { .lock = __SIMPLELOCK_UNLOCKED },
983 1.5 riastrad };
984 1.5 riastrad
985 1.5 riastrad static void
986 1.5 riastrad atomic_incsat_32(volatile uint32_t *p)
987 1.5 riastrad {
988 1.5 riastrad uint32_t o, n;
989 1.5 riastrad
990 1.5 riastrad do {
991 1.5 riastrad o = atomic_load_relaxed(p);
992 1.5 riastrad if (__predict_false(o == UINT_MAX))
993 1.5 riastrad return;
994 1.5 riastrad n = o + 1;
995 1.5 riastrad } while (__predict_false(atomic_cas_32(p, o, n) != o));
996 1.5 riastrad }
997 1.5 riastrad
998 1.5 riastrad /*
999 1.5 riastrad * apei_gesb_ratecheck(sc, severity, suppressed)
1000 1.5 riastrad *
1001 1.5 riastrad * Check for a rate limit on errors of the specified severity.
1002 1.5 riastrad *
1003 1.5 riastrad * => Return true if the error should be printed, and format into
1004 1.5 riastrad * the buffer suppressed a message saying how many errors were
1005 1.5 riastrad * previously suppressed.
1006 1.5 riastrad *
1007 1.5 riastrad * => Return false if the error should be suppressed because the
1008 1.5 riastrad * last one printed was too recent.
1009 1.5 riastrad */
1010 1.5 riastrad static bool
1011 1.5 riastrad apei_gesb_ratecheck(struct apei_softc *sc, uint32_t severity,
1012 1.5 riastrad char suppressed[static sizeof(" (4294967295 or more errors suppressed)")])
1013 1.5 riastrad {
1014 1.5 riastrad /* one of each type per minute (XXX worth making configurable?) */
1015 1.5 riastrad const struct timeval mininterval = {60, 0};
1016 1.5 riastrad unsigned i = MIN(severity, ACPI_HEST_GEN_ERROR_NONE); /* paranoia */
1017 1.5 riastrad bool ok = false;
1018 1.5 riastrad
1019 1.5 riastrad /*
1020 1.5 riastrad * If the lock is contended, the rate limit is probably
1021 1.5 riastrad * exceeded, so it's not OK to print.
1022 1.5 riastrad *
1023 1.5 riastrad * Otherwise, with the lock held, ask ratecheck(9) whether it's
1024 1.5 riastrad * OK to print.
1025 1.5 riastrad */
1026 1.5 riastrad if (!__cpu_simple_lock_try(&apei_gesb_ratelimit[i].lock))
1027 1.5 riastrad goto out;
1028 1.5 riastrad ok = ratecheck(&apei_gesb_ratelimit[i].lasttime, &mininterval);
1029 1.5 riastrad __cpu_simple_unlock(&apei_gesb_ratelimit[i].lock);
1030 1.5 riastrad
1031 1.5 riastrad out: /*
1032 1.5 riastrad * If it's OK to print, report the number of errors that were
1033 1.5 riastrad * suppressed. If it's not OK to print, count a suppressed
1034 1.5 riastrad * error.
1035 1.5 riastrad */
1036 1.5 riastrad if (ok) {
1037 1.5 riastrad const uint32_t n =
1038 1.5 riastrad atomic_swap_32(&apei_gesb_ratelimit[i].suppressed, 0);
1039 1.5 riastrad
1040 1.5 riastrad if (n == 0) {
1041 1.5 riastrad suppressed[0] = '\0';
1042 1.5 riastrad } else {
1043 1.5 riastrad snprintf(suppressed,
1044 1.5 riastrad sizeof(" (4294967295 or more errors suppressed)"),
1045 1.5 riastrad " (%u%s error%s suppressed)",
1046 1.5 riastrad n,
1047 1.5 riastrad n == UINT32_MAX ? " or more" : "",
1048 1.5 riastrad n == 1 ? "" : "s");
1049 1.5 riastrad }
1050 1.5 riastrad } else {
1051 1.5 riastrad atomic_incsat_32(&apei_gesb_ratelimit[i].suppressed);
1052 1.5 riastrad suppressed[0] = '\0';
1053 1.5 riastrad }
1054 1.5 riastrad return ok;
1055 1.5 riastrad }
1056 1.5 riastrad
1057 1.5 riastrad /*
1058 1.1 riastrad * apei_gesb_report(sc, gesb, size, ctx)
1059 1.1 riastrad *
1060 1.1 riastrad * Check a Generic Error Status Block, of at most the specified
1061 1.1 riastrad * size in bytes, and report any errors in it. Return the 32-bit
1062 1.1 riastrad * Block Status in case the caller needs it to acknowledge the
1063 1.1 riastrad * report to firmware.
1064 1.1 riastrad */
1065 1.1 riastrad uint32_t
1066 1.1 riastrad apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
1067 1.1 riastrad size_t size, const char *ctx, bool *fatalp)
1068 1.1 riastrad {
1069 1.1 riastrad uint32_t status, unknownstatus, severity, nentries, i;
1070 1.1 riastrad uint32_t datalen, rawdatalen;
1071 1.1 riastrad const ACPI_HEST_GENERIC_DATA *gede0, *gede;
1072 1.1 riastrad const unsigned char *rawdata;
1073 1.5 riastrad bool ratelimitok = false;
1074 1.5 riastrad char suppressed[sizeof(" (4294967295 or more errors suppressed)")];
1075 1.1 riastrad bool fatal = false;
1076 1.1 riastrad
1077 1.1 riastrad /*
1078 1.1 riastrad * Verify the buffer is large enough for a Generic Error Status
1079 1.1 riastrad * Block before we try to touch anything in it.
1080 1.1 riastrad */
1081 1.1 riastrad if (size < sizeof(*gesb)) {
1082 1.5 riastrad ratelimitok = apei_gesb_ratecheck(sc, ACPI_HEST_GEN_ERROR_NONE,
1083 1.5 riastrad suppressed);
1084 1.5 riastrad if (ratelimitok) {
1085 1.5 riastrad device_printf(sc->sc_dev,
1086 1.5 riastrad "%s: truncated GESB, %zu < %zu%s\n",
1087 1.5 riastrad ctx, size, sizeof(*gesb), suppressed);
1088 1.5 riastrad }
1089 1.2 riastrad status = 0;
1090 1.2 riastrad goto out;
1091 1.1 riastrad }
1092 1.1 riastrad size -= sizeof(*gesb);
1093 1.1 riastrad
1094 1.1 riastrad /*
1095 1.1 riastrad * Load the status. Access ordering rules are unclear in the
1096 1.1 riastrad * ACPI specification; I'm guessing that load-acquire of the
1097 1.1 riastrad * block status is a good idea before any other access to the
1098 1.1 riastrad * GESB.
1099 1.1 riastrad */
1100 1.1 riastrad status = atomic_load_acquire(&gesb->BlockStatus);
1101 1.1 riastrad
1102 1.1 riastrad /*
1103 1.1 riastrad * If there are no status bits set, the rest of the GESB is
1104 1.1 riastrad * garbage, so stop here.
1105 1.1 riastrad */
1106 1.1 riastrad if (status == 0) {
1107 1.1 riastrad /* XXX dtrace */
1108 1.1 riastrad /* XXX DPRINTF */
1109 1.1 riastrad goto out;
1110 1.1 riastrad }
1111 1.1 riastrad
1112 1.5 riastrad /*
1113 1.5 riastrad * Read out the severity and get the number of entries in this
1114 1.5 riastrad * status block.
1115 1.5 riastrad */
1116 1.5 riastrad severity = gesb->ErrorSeverity;
1117 1.5 riastrad nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
1118 1.1 riastrad
1119 1.1 riastrad /*
1120 1.1 riastrad * Print a message to the console and dmesg about the severity
1121 1.1 riastrad * of the error.
1122 1.1 riastrad */
1123 1.5 riastrad ratelimitok = apei_gesb_ratecheck(sc, severity, suppressed);
1124 1.5 riastrad if (ratelimitok) {
1125 1.5 riastrad char statusbuf[128];
1126 1.5 riastrad
1127 1.5 riastrad /* XXX define this format somewhere */
1128 1.5 riastrad snprintb(statusbuf, sizeof(statusbuf), "\177\020"
1129 1.5 riastrad "b\000" "UE\0"
1130 1.5 riastrad "b\001" "CE\0"
1131 1.5 riastrad "b\002" "MULTI_UE\0"
1132 1.5 riastrad "b\003" "MULTI_CE\0"
1133 1.5 riastrad "f\004\010" "GEDE_COUNT\0"
1134 1.5 riastrad "\0", status);
1135 1.5 riastrad
1136 1.5 riastrad if (severity < __arraycount(apei_gesb_severity)) {
1137 1.5 riastrad device_printf(sc->sc_dev, "%s"
1138 1.5 riastrad " reported hardware error%s:"
1139 1.5 riastrad " severity=%s nentries=%u status=%s\n",
1140 1.5 riastrad ctx, suppressed,
1141 1.5 riastrad apei_gesb_severity[severity], nentries, statusbuf);
1142 1.5 riastrad } else {
1143 1.5 riastrad device_printf(sc->sc_dev, "%s reported error%s:"
1144 1.5 riastrad " severity=%"PRIu32" nentries=%u status=%s\n",
1145 1.5 riastrad ctx, suppressed,
1146 1.5 riastrad severity, nentries, statusbuf);
1147 1.5 riastrad }
1148 1.1 riastrad }
1149 1.1 riastrad
1150 1.1 riastrad /*
1151 1.1 riastrad * Make a determination about whether the error is fatal.
1152 1.1 riastrad *
1153 1.1 riastrad * XXX Currently we don't have any mechanism to recover from
1154 1.1 riastrad * uncorrectable but recoverable errors, so we treat those --
1155 1.1 riastrad * and anything else we don't recognize -- as fatal.
1156 1.1 riastrad */
1157 1.1 riastrad switch (severity) {
1158 1.1 riastrad case ACPI_HEST_GEN_ERROR_CORRECTED:
1159 1.1 riastrad case ACPI_HEST_GEN_ERROR_NONE:
1160 1.1 riastrad fatal = false;
1161 1.1 riastrad break;
1162 1.1 riastrad case ACPI_HEST_GEN_ERROR_FATAL:
1163 1.1 riastrad case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
1164 1.1 riastrad default:
1165 1.1 riastrad fatal = true;
1166 1.1 riastrad break;
1167 1.1 riastrad }
1168 1.1 riastrad
1169 1.1 riastrad /*
1170 1.1 riastrad * Clear the bits we know about to warn if there's anything
1171 1.1 riastrad * left we don't understand.
1172 1.1 riastrad */
1173 1.1 riastrad unknownstatus = status;
1174 1.1 riastrad unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
1175 1.1 riastrad unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
1176 1.1 riastrad unknownstatus &= ~ACPI_HEST_CORRECTABLE;
1177 1.1 riastrad unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
1178 1.1 riastrad unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
1179 1.5 riastrad if (ratelimitok && unknownstatus != 0) {
1180 1.1 riastrad /* XXX dtrace */
1181 1.1 riastrad device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
1182 1.1 riastrad " 0x%"PRIx32"\n", ctx, unknownstatus);
1183 1.1 riastrad }
1184 1.1 riastrad
1185 1.1 riastrad /*
1186 1.1 riastrad * Advance past the Generic Error Status Block (GESB) header to
1187 1.1 riastrad * the Generic Error Data Entries (GEDEs).
1188 1.1 riastrad */
1189 1.1 riastrad gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
1190 1.1 riastrad
1191 1.1 riastrad /*
1192 1.1 riastrad * Verify that the data length (GEDEs) fits within the size.
1193 1.1 riastrad * If not, truncate the GEDEs.
1194 1.1 riastrad */
1195 1.1 riastrad datalen = gesb->DataLength;
1196 1.1 riastrad if (size < datalen) {
1197 1.5 riastrad if (ratelimitok) {
1198 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1199 1.5 riastrad " GESB DataLength exceeds bounds:"
1200 1.5 riastrad " %zu < %"PRIu32"\n",
1201 1.5 riastrad ctx, size, datalen);
1202 1.5 riastrad }
1203 1.1 riastrad datalen = size;
1204 1.1 riastrad }
1205 1.1 riastrad size -= datalen;
1206 1.1 riastrad
1207 1.1 riastrad /*
1208 1.1 riastrad * Report each of the Generic Error Data Entries.
1209 1.1 riastrad */
1210 1.1 riastrad for (i = 0; i < nentries; i++) {
1211 1.1 riastrad size_t headerlen;
1212 1.1 riastrad const struct apei_cper_report *report;
1213 1.1 riastrad char subctx[128];
1214 1.1 riastrad
1215 1.1 riastrad /*
1216 1.1 riastrad * Format a subcontext to show this numbered entry of
1217 1.1 riastrad * the GESB.
1218 1.1 riastrad */
1219 1.1 riastrad snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
1220 1.1 riastrad
1221 1.1 riastrad /*
1222 1.1 riastrad * If the remaining GESB data length isn't enough for a
1223 1.1 riastrad * GEDE header, stop here.
1224 1.1 riastrad */
1225 1.1 riastrad if (datalen < sizeof(*gede)) {
1226 1.5 riastrad if (ratelimitok) {
1227 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1228 1.5 riastrad " truncated GEDE: %"PRIu32" < %zu bytes\n",
1229 1.5 riastrad subctx, datalen, sizeof(*gede));
1230 1.5 riastrad }
1231 1.1 riastrad break;
1232 1.1 riastrad }
1233 1.1 riastrad
1234 1.1 riastrad /*
1235 1.1 riastrad * Print the GEDE header and get the full length (may
1236 1.1 riastrad * vary from revision to revision of the GEDE) and the
1237 1.1 riastrad * CPER report function if possible.
1238 1.1 riastrad */
1239 1.5 riastrad apei_gede_report_header(sc, gede, subctx, ratelimitok,
1240 1.1 riastrad &headerlen, &report);
1241 1.1 riastrad
1242 1.1 riastrad /*
1243 1.1 riastrad * If we don't know the header length because of an
1244 1.1 riastrad * unfamiliar revision, stop here.
1245 1.1 riastrad */
1246 1.1 riastrad if (headerlen == 0) {
1247 1.5 riastrad if (ratelimitok) {
1248 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1249 1.5 riastrad " unknown revision: 0x%"PRIx16"\n",
1250 1.5 riastrad subctx, gede->Revision);
1251 1.5 riastrad }
1252 1.1 riastrad break;
1253 1.1 riastrad }
1254 1.1 riastrad
1255 1.1 riastrad /*
1256 1.1 riastrad * Stop here if what we mapped is too small for the
1257 1.1 riastrad * error data length.
1258 1.1 riastrad */
1259 1.1 riastrad datalen -= headerlen;
1260 1.1 riastrad if (datalen < gede->ErrorDataLength) {
1261 1.5 riastrad if (ratelimitok) {
1262 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1263 1.5 riastrad " truncated GEDE payload:"
1264 1.5 riastrad " %"PRIu32" < %"PRIu32" bytes\n",
1265 1.5 riastrad subctx, datalen, gede->ErrorDataLength);
1266 1.5 riastrad }
1267 1.1 riastrad break;
1268 1.1 riastrad }
1269 1.1 riastrad
1270 1.1 riastrad /*
1271 1.1 riastrad * Report the Common Platform Error Record appendix to
1272 1.1 riastrad * this Generic Error Data Entry.
1273 1.1 riastrad */
1274 1.1 riastrad if (report == NULL) {
1275 1.5 riastrad if (ratelimitok) {
1276 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1277 1.5 riastrad " [unknown type]\n", ctx);
1278 1.5 riastrad }
1279 1.1 riastrad } else {
1280 1.5 riastrad /* XXX pass ratelimit through */
1281 1.1 riastrad (*report->func)(sc, (const char *)gede + headerlen,
1282 1.5 riastrad gede->ErrorDataLength, subctx, ratelimitok);
1283 1.1 riastrad }
1284 1.1 riastrad
1285 1.1 riastrad /*
1286 1.1 riastrad * Advance past the GEDE header and CPER data to the
1287 1.1 riastrad * next GEDE.
1288 1.1 riastrad */
1289 1.1 riastrad gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
1290 1.1 riastrad + headerlen + gede->ErrorDataLength);
1291 1.1 riastrad }
1292 1.1 riastrad
1293 1.1 riastrad /*
1294 1.1 riastrad * Advance past the Generic Error Data Entries (GEDEs) to the
1295 1.1 riastrad * raw error data.
1296 1.1 riastrad *
1297 1.1 riastrad * XXX Provide Max Raw Data Length as a parameter, as found in
1298 1.1 riastrad * various HEST entry types.
1299 1.1 riastrad */
1300 1.1 riastrad rawdata = (const unsigned char *)gede0 + datalen;
1301 1.1 riastrad
1302 1.1 riastrad /*
1303 1.1 riastrad * Verify that the raw data length fits within the size. If
1304 1.1 riastrad * not, truncate the raw data.
1305 1.1 riastrad */
1306 1.1 riastrad rawdatalen = gesb->RawDataLength;
1307 1.1 riastrad if (size < rawdatalen) {
1308 1.5 riastrad if (ratelimitok) {
1309 1.5 riastrad device_printf(sc->sc_dev, "%s:"
1310 1.5 riastrad " GESB RawDataLength exceeds bounds:"
1311 1.5 riastrad " %zu < %"PRIu32"\n",
1312 1.5 riastrad ctx, size, rawdatalen);
1313 1.5 riastrad }
1314 1.1 riastrad rawdatalen = size;
1315 1.1 riastrad }
1316 1.1 riastrad size -= rawdatalen;
1317 1.1 riastrad
1318 1.1 riastrad /*
1319 1.1 riastrad * Hexdump the raw data, if any.
1320 1.1 riastrad */
1321 1.5 riastrad if (ratelimitok && rawdatalen > 0) {
1322 1.1 riastrad char devctx[128];
1323 1.1 riastrad
1324 1.1 riastrad snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
1325 1.1 riastrad device_xname(sc->sc_dev), ctx);
1326 1.1 riastrad hexdump(printf, devctx, rawdata, rawdatalen);
1327 1.1 riastrad }
1328 1.1 riastrad
1329 1.1 riastrad /*
1330 1.1 riastrad * If there's anything left after the raw data, warn.
1331 1.1 riastrad */
1332 1.5 riastrad if (ratelimitok && size > 0) {
1333 1.1 riastrad device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
1334 1.1 riastrad ctx, size);
1335 1.1 riastrad }
1336 1.1 riastrad
1337 1.1 riastrad /*
1338 1.1 riastrad * Return the status so the caller can ack it, and tell the
1339 1.1 riastrad * caller whether this error is fatal.
1340 1.1 riastrad */
1341 1.1 riastrad out: *fatalp = fatal;
1342 1.1 riastrad return status;
1343 1.1 riastrad }
1344 1.1 riastrad
1345 1.1 riastrad MODULE(MODULE_CLASS_DRIVER, apei, NULL);
1346 1.1 riastrad
1347 1.1 riastrad #ifdef _MODULE
1348 1.1 riastrad #include "ioconf.c"
1349 1.1 riastrad #endif
1350 1.1 riastrad
1351 1.1 riastrad static int
1352 1.1 riastrad apei_modcmd(modcmd_t cmd, void *opaque)
1353 1.1 riastrad {
1354 1.1 riastrad int error = 0;
1355 1.1 riastrad
1356 1.1 riastrad switch (cmd) {
1357 1.1 riastrad case MODULE_CMD_INIT:
1358 1.1 riastrad #ifdef _MODULE
1359 1.1 riastrad error = config_init_component(cfdriver_ioconf_apei,
1360 1.1 riastrad cfattach_ioconf_apei, cfdata_ioconf_apei);
1361 1.1 riastrad #endif
1362 1.1 riastrad return error;
1363 1.1 riastrad case MODULE_CMD_FINI:
1364 1.1 riastrad #ifdef _MODULE
1365 1.1 riastrad error = config_fini_component(cfdriver_ioconf_apei,
1366 1.1 riastrad cfattach_ioconf_apei, cfdata_ioconf_apei);
1367 1.1 riastrad #endif
1368 1.1 riastrad return error;
1369 1.1 riastrad default:
1370 1.1 riastrad return ENOTTY;
1371 1.1 riastrad }
1372 1.1 riastrad }
1373