acpi_srat.c revision 1.5.4.1 1 /* $NetBSD: acpi_srat.c,v 1.5.4.1 2020/04/13 08:04:18 martin Exp $ */
2
3 /*
4 * Copyright (c) 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christoph Egger.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.5.4.1 2020/04/13 08:04:18 martin Exp $");
34
35 #include <sys/param.h>
36 #include <sys/kmem.h>
37 #include <sys/systm.h>
38
39 #include <dev/acpi/acpivar.h>
40 #include <dev/acpi/acpi_srat.h>
41
42 #include <uvm/uvm_extern.h>
43
44 static ACPI_TABLE_SRAT *srat;
45
46 static uint32_t nnodes; /* Number of NUMA nodes */
47 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
48 static uint32_t ncpus; /* Number of CPUs */
49 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
50 static uint32_t nmems; /* Number of Memory ranges */
51 static struct acpisrat_mem *mem_array;
52
53 struct cpulist {
54 struct acpisrat_cpu cpu;
55 TAILQ_ENTRY(cpulist) entry;
56 };
57
58 static TAILQ_HEAD(, cpulist) cpulisthead;
59
60 #define CPU_INIT() TAILQ_INIT(&cpulisthead);
61 #define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry)
62 #define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
63 #define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry)
64 #define CPU_FIRST() TAILQ_FIRST(&cpulisthead)
65
66 struct memlist {
67 struct acpisrat_mem mem;
68 TAILQ_ENTRY(memlist) entry;
69 };
70
71 static TAILQ_HEAD(, memlist) memlisthead;
72
73 #define MEM_INIT() TAILQ_INIT(&memlisthead)
74 #define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry)
75 #define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
76 #define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry)
77 #define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry)
78 #define MEM_FIRST() TAILQ_FIRST(&memlisthead)
79
80
81 static struct cpulist *
82 cpu_alloc(void)
83 {
84 return kmem_zalloc(sizeof(struct cpulist), KM_SLEEP);
85 }
86
87 static void
88 cpu_free(struct cpulist *c)
89 {
90 kmem_free(c, sizeof(struct cpulist));
91 }
92
93 static struct memlist *
94 mem_alloc(void)
95 {
96 return kmem_zalloc(sizeof(struct memlist), KM_SLEEP);
97 }
98
99 static void
100 mem_free(struct memlist *m)
101 {
102 kmem_free(m, sizeof(struct memlist));
103 }
104
105 static struct memlist *
106 mem_get(acpisrat_nodeid_t nodeid)
107 {
108 struct memlist *tmp;
109
110 MEM_FOREACH(tmp) {
111 if (tmp->mem.nodeid == nodeid)
112 return tmp;
113 }
114
115 return NULL;
116 }
117
118 /*
119 * Returns true if ACPI SRAT table is available. If table does not exist, all
120 * functions below have undefined behaviour.
121 */
122 bool
123 acpisrat_exist(void)
124 {
125 ACPI_TABLE_HEADER *table;
126 ACPI_STATUS rv;
127
128 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
129 if (ACPI_FAILURE(rv))
130 return false;
131
132 /* Check if header is valid */
133 if (table == NULL)
134 return false;
135
136 if (table->Length == 0xffffffff)
137 return false;
138
139 srat = (ACPI_TABLE_SRAT *)table;
140
141 return true;
142 }
143
144 static int
145 acpisrat_parse(void)
146 {
147 ACPI_SUBTABLE_HEADER *subtable;
148 ACPI_SRAT_CPU_AFFINITY *srat_cpu;
149 ACPI_SRAT_MEM_AFFINITY *srat_mem;
150 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
151
152 acpisrat_nodeid_t nodeid;
153 struct cpulist *cpuentry = NULL;
154 struct memlist *mementry;
155 uint32_t srat_pos;
156 bool ignore_cpu_affinity = false;
157
158 KASSERT(srat != NULL);
159
160 /* Content starts right after the header */
161 srat_pos = sizeof(ACPI_TABLE_SRAT);
162
163 while (srat_pos < srat->Header.Length) {
164 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
165 srat_pos += subtable->Length;
166
167 switch (subtable->Type) {
168 case ACPI_SRAT_TYPE_CPU_AFFINITY:
169 if (ignore_cpu_affinity)
170 continue;
171
172 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
173 if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
174 break;
175 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
176 (srat_cpu->ProximityDomainHi[1] << 16) |
177 (srat_cpu->ProximityDomainHi[0] << 8) |
178 (srat_cpu->ProximityDomainLo);
179
180 cpuentry = cpu_alloc();
181 if (cpuentry == NULL)
182 return ENOMEM;
183 CPU_ADD(cpuentry);
184
185 cpuentry->cpu.nodeid = nodeid;
186 cpuentry->cpu.apicid = srat_cpu->ApicId;
187 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
188 cpuentry->cpu.flags = srat_cpu->Flags;
189 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
190 break;
191
192 case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
193 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
194 nodeid = srat_mem->ProximityDomain;
195 if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0)
196 break;
197
198 mementry = mem_alloc();
199 if (mementry == NULL)
200 return ENOMEM;
201 MEM_ADD(mementry);
202
203 mementry->mem.nodeid = nodeid;
204 mementry->mem.baseaddress = srat_mem->BaseAddress;
205 mementry->mem.length = srat_mem->Length;
206 mementry->mem.flags = srat_mem->Flags;
207 break;
208
209 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
210 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
211 if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
212 break;
213 nodeid = srat_x2apic->ProximityDomain;
214
215 /*
216 * This table entry overrides
217 * ACPI_SRAT_TYPE_CPU_AFFINITY.
218 */
219 if (!ignore_cpu_affinity) {
220 struct cpulist *citer;
221 while ((citer = CPU_FIRST()) != NULL) {
222 CPU_REM(citer);
223 cpu_free(citer);
224 }
225 ignore_cpu_affinity = true;
226 }
227
228 cpuentry = cpu_alloc();
229 if (cpuentry == NULL)
230 return ENOMEM;
231 CPU_ADD(cpuentry);
232
233 cpuentry->cpu.nodeid = nodeid;
234 cpuentry->cpu.apicid = srat_x2apic->ApicId;
235 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
236 cpuentry->cpu.flags = srat_x2apic->Flags;
237 break;
238
239 case ACPI_SRAT_TYPE_RESERVED:
240 printf("ACPI SRAT subtable reserved, length: 0x%x\n",
241 subtable->Length);
242 break;
243 }
244 }
245
246 return 0;
247 }
248
249 static int
250 acpisrat_quirks(void)
251 {
252 struct cpulist *citer;
253 struct memlist *mem, *miter;
254
255 /* Some sanity checks. */
256
257 /*
258 * Deal with holes in the memory nodes. BIOS doesn't enlist memory
259 * nodes which don't have any memory modules plugged in. This behaviour
260 * has been observed on AMD machines.
261 *
262 * Do that by searching for CPUs in NUMA nodes which don't exist in the
263 * memory and then insert a zero memory range for the missing node.
264 */
265 CPU_FOREACH(citer) {
266 mem = mem_get(citer->cpu.nodeid);
267 if (mem != NULL)
268 continue;
269 mem = mem_alloc();
270 if (mem == NULL)
271 return ENOMEM;
272 mem->mem.nodeid = citer->cpu.nodeid;
273 /* all other fields are already zero filled */
274
275 MEM_FOREACH(miter) {
276 if (miter->mem.nodeid < citer->cpu.nodeid)
277 continue;
278 MEM_ADD_BEFORE(mem, miter);
279 break;
280 }
281 }
282
283 return 0;
284 }
285
286 /*
287 * Initializes parser. Must be the first function being called when table is
288 * available.
289 */
290 int
291 acpisrat_init(void)
292 {
293 if (!acpisrat_exist())
294 return EEXIST;
295 return acpisrat_refresh();
296 }
297
298 /*
299 * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM.
300 */
301 int
302 acpisrat_refresh(void)
303 {
304 int rc, i, j, k;
305 struct cpulist *citer;
306 struct memlist *miter;
307 uint32_t cnodes = 0, mnodes = 0;
308
309 CPU_INIT();
310 MEM_INIT();
311
312 rc = acpisrat_parse();
313 if (rc)
314 return rc;
315
316 rc = acpisrat_quirks();
317 if (rc)
318 return rc;
319
320 /* cleanup resources */
321 rc = acpisrat_exit();
322 if (rc)
323 return rc;
324
325 ncpus = 0;
326 CPU_FOREACH(citer) {
327 cnodes = MAX(citer->cpu.nodeid, cnodes);
328 ncpus++;
329 }
330
331 nmems = 0;
332 MEM_FOREACH(miter) {
333 mnodes = MAX(miter->mem.nodeid, mnodes);
334 nmems++;
335 }
336
337 nnodes = MAX(cnodes, mnodes) + 1;
338
339 if (nnodes == 0 || nmems == 0 || ncpus == 0) {
340 rc = ENOENT;
341 goto fail;
342 }
343
344 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
345 KM_SLEEP);
346 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
347 KM_SLEEP);
348 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
349 KM_SLEEP);
350
351 i = 0;
352 CPU_FOREACH(citer) {
353 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
354 i++;
355 node_array[citer->cpu.nodeid].ncpus++;
356 }
357
358 i = 0;
359 MEM_FOREACH(miter) {
360 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
361 i++;
362 node_array[miter->mem.nodeid].nmems++;
363 }
364
365 for (i = 0; i < nnodes; i++) {
366 node_array[i].nodeid = i;
367
368 if (node_array[i].ncpus != 0) {
369 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
370 sizeof(struct acpisrat_cpu *), KM_SLEEP);
371 }
372 if (node_array[i].nmems != 0) {
373 node_array[i].mem = kmem_zalloc(node_array[i].nmems *
374 sizeof(struct acpisrat_mem *), KM_SLEEP);
375 }
376
377 k = 0;
378 for (j = 0; j < ncpus; j++) {
379 if (cpu_array[j].nodeid != i)
380 continue;
381 KASSERT(node_array[i].cpu != NULL);
382 node_array[i].cpu[k] = &cpu_array[j];
383 k++;
384 }
385
386 k = 0;
387 for (j = 0; j < nmems; j++) {
388 if (mem_array[j].nodeid != i)
389 continue;
390 KASSERT(node_array[i].mem != NULL);
391 node_array[i].mem[k] = &mem_array[j];
392 k++;
393 }
394 }
395
396 fail:
397 while ((citer = CPU_FIRST()) != NULL) {
398 CPU_REM(citer);
399 cpu_free(citer);
400 }
401
402 while ((miter = MEM_FIRST()) != NULL) {
403 MEM_REM(miter);
404 mem_free(miter);
405 }
406
407 return rc;
408 }
409
410 /*
411 * Free allocated memory. Should be called when acpisrat is no longer of any
412 * use.
413 */
414 int
415 acpisrat_exit(void)
416 {
417 int i;
418
419 if (node_array) {
420 for (i = 0; i < nnodes; i++) {
421 if (node_array[i].cpu)
422 kmem_free(node_array[i].cpu,
423 node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
424 if (node_array[i].mem)
425 kmem_free(node_array[i].mem,
426 node_array[i].nmems * sizeof(struct acpisrat_mem *));
427 }
428 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
429 }
430 node_array = NULL;
431
432 if (cpu_array)
433 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
434 cpu_array = NULL;
435
436 if (mem_array)
437 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
438 mem_array = NULL;
439
440 nnodes = 0;
441 ncpus = 0;
442 nmems = 0;
443
444 return 0;
445 }
446
447 void
448 acpisrat_dump(void)
449 {
450 uint32_t i, j, nn, nc, nm;
451 struct acpisrat_cpu c;
452 struct acpisrat_mem m;
453
454 nn = acpisrat_nodes();
455 aprint_debug("SRAT: %u NUMA nodes\n", nn);
456 for (i = 0; i < nn; i++) {
457 nc = acpisrat_node_cpus(i);
458 for (j = 0; j < nc; j++) {
459 acpisrat_cpu(i, j, &c);
460 aprint_debug("SRAT: node %u cpu %u "
461 "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
462 c.nodeid, j, c.apicid, c.sapiceid, c.flags,
463 c.clockdomain);
464 }
465
466 nm = acpisrat_node_memoryranges(i);
467 for (j = 0; j < nm; j++) {
468 acpisrat_mem(i, j, &m);
469 aprint_debug("SRAT: node %u memory range %u (0x%"
470 PRIx64" - 0x%"PRIx64" flags %u)\n",
471 m.nodeid, j, m.baseaddress,
472 m.baseaddress + m.length, m.flags);
473 }
474 }
475 }
476
477 void
478 acpisrat_load_uvm(void)
479 {
480 uint32_t i, j, nn, nm;
481 struct acpisrat_mem m;
482
483 nn = acpisrat_nodes();
484 aprint_debug("SRAT: %u NUMA nodes\n", nn);
485 for (i = 0; i < nn; i++) {
486 nm = acpisrat_node_memoryranges(i);
487 for (j = 0; j < nm; j++) {
488 acpisrat_mem(i, j, &m);
489 aprint_debug("SRAT: node %u memory range %u (0x%"
490 PRIx64" - 0x%"PRIx64" flags %u)\n",
491 m.nodeid, j, m.baseaddress,
492 m.baseaddress + m.length, m.flags);
493 uvm_page_numa_load(trunc_page(m.baseaddress),
494 trunc_page(m.length), m.nodeid);
495 }
496 }
497 }
498
499 /*
500 * Get number of NUMA nodes.
501 */
502 uint32_t
503 acpisrat_nodes(void)
504 {
505 return nnodes;
506 }
507
508 /*
509 * Get number of cpus in the node. 0 means, this is a cpu-less node.
510 */
511 uint32_t
512 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
513 {
514 return node_array[nodeid].ncpus;
515 }
516
517 /*
518 * Get number of memory ranges in the node 0 means, this node has no RAM.
519 */
520 uint32_t
521 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
522 {
523 return node_array[nodeid].nmems;
524 }
525
526 void
527 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
528 struct acpisrat_cpu *c)
529 {
530 memcpy(c, node_array[nodeid].cpu[cpunum],
531 sizeof(struct acpisrat_cpu));
532 }
533
534 void
535 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
536 struct acpisrat_mem *mem)
537 {
538 memcpy(mem, node_array[nodeid].mem[memrange],
539 sizeof(struct acpisrat_mem));
540 }
541
542 /*
543 * Get a node from an APIC id (belonging to a cpu).
544 */
545 struct acpisrat_node *
546 acpisrat_get_node(uint32_t apicid)
547 {
548 struct acpisrat_node *node;
549 struct acpisrat_cpu *cpu;
550 size_t i, n;
551
552 for (i = 0; i < nnodes; i++) {
553 node = &node_array[i];
554
555 for (n = 0; n < node->ncpus; n++) {
556 cpu = node->cpu[n];
557 if (cpu->apicid == apicid) {
558 return node;
559 }
560 }
561 }
562
563 return NULL;
564 }
565