Home | History | Annotate | Line # | Download | only in acpi
      1 /* $NetBSD: acpi_srat.c,v 1.9 2024/06/30 17:54:08 jmcneill Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Christoph Egger.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.9 2024/06/30 17:54:08 jmcneill Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/kmem.h>
     37 #include <sys/systm.h>
     38 
     39 #include <dev/acpi/acpivar.h>
     40 #include <dev/acpi/acpi_srat.h>
     41 
     42 #include <uvm/uvm_extern.h>
     43 
     44 static ACPI_TABLE_SRAT *srat;
     45 
     46 static uint32_t nnodes; /* Number of NUMA nodes */
     47 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
     48 static uint32_t ncpus; /* Number of CPUs */
     49 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
     50 static uint32_t nmems; /* Number of Memory ranges */
     51 static struct acpisrat_mem *mem_array;
     52 
     53 struct cpulist {
     54 	struct acpisrat_cpu cpu;
     55 	TAILQ_ENTRY(cpulist) entry;
     56 };
     57 
     58 static TAILQ_HEAD(, cpulist) cpulisthead;
     59 
     60 #define CPU_INIT()		TAILQ_INIT(&cpulisthead);
     61 #define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
     62 #define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
     63 #define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
     64 #define CPU_FIRST()		TAILQ_FIRST(&cpulisthead)
     65 
     66 struct memlist {
     67 	struct acpisrat_mem mem;
     68 	TAILQ_ENTRY(memlist) entry;
     69 };
     70 
     71 static TAILQ_HEAD(, memlist) memlisthead;
     72 
     73 #define MEM_INIT()		TAILQ_INIT(&memlisthead)
     74 #define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
     75 #define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
     76 #define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
     77 #define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
     78 #define MEM_FIRST()		TAILQ_FIRST(&memlisthead)
     79 
     80 
     81 static struct cpulist *
     82 cpu_alloc(void)
     83 {
     84 	return kmem_zalloc(sizeof(struct cpulist), KM_SLEEP);
     85 }
     86 
     87 static void
     88 cpu_free(struct cpulist *c)
     89 {
     90 	kmem_free(c, sizeof(struct cpulist));
     91 }
     92 
     93 static struct memlist *
     94 mem_alloc(void)
     95 {
     96 	return kmem_zalloc(sizeof(struct memlist), KM_SLEEP);
     97 }
     98 
     99 static void
    100 mem_free(struct memlist *m)
    101 {
    102 	kmem_free(m, sizeof(struct memlist));
    103 }
    104 
    105 static struct memlist *
    106 mem_get(acpisrat_nodeid_t nodeid)
    107 {
    108 	struct memlist *tmp;
    109 
    110 	MEM_FOREACH(tmp) {
    111 		if (tmp->mem.nodeid == nodeid)
    112 			return tmp;
    113 	}
    114 
    115 	return NULL;
    116 }
    117 
    118 /*
    119  * Returns true if ACPI SRAT table is available. If table does not exist, all
    120  * functions below have undefined behaviour.
    121  */
    122 bool
    123 acpisrat_exist(void)
    124 {
    125 	ACPI_TABLE_HEADER *table;
    126 	ACPI_STATUS rv;
    127 
    128 	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
    129 	if (ACPI_FAILURE(rv))
    130 		return false;
    131 
    132 	/* Check if header is valid */
    133 	if (table == NULL)
    134 		return false;
    135 
    136 	if (table->Length == 0xffffffff)
    137 		return false;
    138 
    139 	srat = (ACPI_TABLE_SRAT *)table;
    140 
    141 	return true;
    142 }
    143 
    144 static int
    145 acpisrat_parse(void)
    146 {
    147 	ACPI_SUBTABLE_HEADER *subtable;
    148 	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
    149 	ACPI_SRAT_MEM_AFFINITY *srat_mem;
    150 	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
    151 	ACPI_SRAT_GICC_AFFINITY *srat_gicc;
    152 
    153 	acpisrat_nodeid_t nodeid;
    154 	struct cpulist *cpuentry = NULL;
    155 	struct memlist *mementry;
    156 	uint32_t srat_pos;
    157 	bool ignore_cpu_affinity = false;
    158 
    159 	KASSERT(srat != NULL);
    160 
    161 	/* Content starts right after the header */
    162 	srat_pos = sizeof(ACPI_TABLE_SRAT);
    163 
    164 	while (srat_pos < srat->Header.Length) {
    165 		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
    166 		srat_pos += subtable->Length;
    167 
    168 		switch (subtable->Type) {
    169 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
    170 			if (ignore_cpu_affinity)
    171 				continue;
    172 
    173 			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
    174 			if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
    175 				break;
    176 			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
    177 			    (srat_cpu->ProximityDomainHi[1] << 16) |
    178 			    (srat_cpu->ProximityDomainHi[0] << 8) |
    179 			    (srat_cpu->ProximityDomainLo);
    180 
    181 			cpuentry = cpu_alloc();
    182 			if (cpuentry == NULL)
    183 				return ENOMEM;
    184 			CPU_ADD(cpuentry);
    185 
    186 			cpuentry->cpu.nodeid = nodeid;
    187 			cpuentry->cpu.apicid = srat_cpu->ApicId;
    188 			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
    189 			cpuentry->cpu.flags = srat_cpu->Flags;
    190 			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
    191 			break;
    192 
    193 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
    194 			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
    195 			nodeid = srat_mem->ProximityDomain;
    196 			if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0)
    197 				break;
    198 
    199 			mementry = mem_alloc();
    200 			if (mementry == NULL)
    201 				return ENOMEM;
    202 			MEM_ADD(mementry);
    203 
    204 			mementry->mem.nodeid = nodeid;
    205 			mementry->mem.baseaddress = srat_mem->BaseAddress;
    206 			mementry->mem.length = srat_mem->Length;
    207 			mementry->mem.flags = srat_mem->Flags;
    208 			break;
    209 
    210 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
    211 			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
    212 			if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
    213 				break;
    214 			nodeid = srat_x2apic->ProximityDomain;
    215 
    216 			/*
    217 			 * This table entry overrides
    218 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
    219 			 */
    220 			if (!ignore_cpu_affinity) {
    221 				struct cpulist *citer;
    222 				while ((citer = CPU_FIRST()) != NULL) {
    223 					CPU_REM(citer);
    224 					cpu_free(citer);
    225 				}
    226 				ignore_cpu_affinity = true;
    227 			}
    228 
    229 			cpuentry = cpu_alloc();
    230 			if (cpuentry == NULL)
    231 				return ENOMEM;
    232 			CPU_ADD(cpuentry);
    233 
    234 			cpuentry->cpu.nodeid = nodeid;
    235 			cpuentry->cpu.apicid = srat_x2apic->ApicId;
    236 			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
    237 			cpuentry->cpu.flags = srat_x2apic->Flags;
    238 			break;
    239 
    240 		case ACPI_SRAT_TYPE_GICC_AFFINITY:
    241 			srat_gicc = (ACPI_SRAT_GICC_AFFINITY *)subtable;
    242 			if ((srat_gicc->Flags & ACPI_SRAT_GICC_ENABLED) == 0)
    243 				break;
    244 			nodeid = srat_gicc->ProximityDomain;
    245 
    246 			/*
    247 			 * This table entry overrides
    248 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
    249 			 */
    250 			if (!ignore_cpu_affinity) {
    251 				struct cpulist *citer;
    252 				while ((citer = CPU_FIRST()) != NULL) {
    253 					CPU_REM(citer);
    254 					cpu_free(citer);
    255 				}
    256 				ignore_cpu_affinity = true;
    257 			}
    258 
    259 			cpuentry = cpu_alloc();
    260 			if (cpuentry == NULL)
    261 				return ENOMEM;
    262 			CPU_ADD(cpuentry);
    263 
    264 			cpuentry->cpu.nodeid = nodeid;
    265 			cpuentry->cpu.apicid = srat_gicc->AcpiProcessorUid;
    266 			cpuentry->cpu.clockdomain = srat_gicc->ClockDomain;
    267 			cpuentry->cpu.flags = srat_gicc->Flags;
    268 			break;
    269 
    270 		case ACPI_SRAT_TYPE_RESERVED:
    271 			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
    272 				subtable->Length);
    273 			break;
    274 		}
    275 	}
    276 
    277 	return 0;
    278 }
    279 
    280 static int
    281 acpisrat_quirks(void)
    282 {
    283 	struct cpulist *citer;
    284 	struct memlist *mem, *miter;
    285 
    286 	/* Some sanity checks. */
    287 
    288 	/*
    289 	 * Deal with holes in the memory nodes. BIOS doesn't enlist memory
    290 	 * nodes which don't have any memory modules plugged in. This behaviour
    291 	 * has been observed on AMD machines.
    292 	 *
    293 	 * Do that by searching for CPUs in NUMA nodes which don't exist in the
    294 	 * memory and then insert a zero memory range for the missing node.
    295 	 */
    296 	CPU_FOREACH(citer) {
    297 		mem = mem_get(citer->cpu.nodeid);
    298 		if (mem != NULL)
    299 			continue;
    300 		mem = mem_alloc();
    301 		if (mem == NULL)
    302 			return ENOMEM;
    303 		mem->mem.nodeid = citer->cpu.nodeid;
    304 		/* all other fields are already zero filled */
    305 
    306 		MEM_FOREACH(miter) {
    307 			if (miter->mem.nodeid < citer->cpu.nodeid)
    308 				continue;
    309 			MEM_ADD_BEFORE(mem, miter);
    310 			break;
    311 		}
    312 	}
    313 
    314 	return 0;
    315 }
    316 
    317 /*
    318  * Initializes parser. Must be the first function being called when table is
    319  * available.
    320  */
    321 int
    322 acpisrat_init(void)
    323 {
    324 	if (!acpisrat_exist())
    325 		return EEXIST;
    326 	return acpisrat_refresh();
    327 }
    328 
    329 /*
    330  * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM.
    331  */
    332 int
    333 acpisrat_refresh(void)
    334 {
    335 	int rc, i, j, k;
    336 	struct cpulist *citer;
    337 	struct memlist *miter;
    338 	uint32_t cnodes = 0, mnodes = 0;
    339 
    340 	CPU_INIT();
    341 	MEM_INIT();
    342 
    343 	rc = acpisrat_parse();
    344 	if (rc)
    345 		return rc;
    346 
    347 	rc = acpisrat_quirks();
    348 	if (rc)
    349 		return rc;
    350 
    351 	/* cleanup resources */
    352 	rc = acpisrat_exit();
    353 	if (rc)
    354 		return rc;
    355 
    356 	ncpus = 0;
    357 	CPU_FOREACH(citer) {
    358 		cnodes = MAX(citer->cpu.nodeid, cnodes);
    359 		ncpus++;
    360 	}
    361 
    362 	nmems = 0;
    363 	MEM_FOREACH(miter) {
    364 		mnodes = MAX(miter->mem.nodeid, mnodes);
    365 		nmems++;
    366 	}
    367 
    368 	nnodes = MAX(cnodes, mnodes) + 1;
    369 
    370 	if (nnodes == 0 || nmems == 0 || ncpus == 0) {
    371 		rc = ENOENT;
    372 		goto fail;
    373 	}
    374 
    375 	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
    376 	    KM_SLEEP);
    377 	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
    378 	    KM_SLEEP);
    379 	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
    380 	    KM_SLEEP);
    381 
    382 	i = 0;
    383 	CPU_FOREACH(citer) {
    384 		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
    385 		i++;
    386 		node_array[citer->cpu.nodeid].ncpus++;
    387 	}
    388 
    389 	i = 0;
    390 	MEM_FOREACH(miter) {
    391 		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
    392 		i++;
    393 		node_array[miter->mem.nodeid].nmems++;
    394 	}
    395 
    396 	for (i = 0; i < nnodes; i++) {
    397 		node_array[i].nodeid = i;
    398 
    399 		if (node_array[i].ncpus != 0) {
    400 			node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
    401 			    sizeof(struct acpisrat_cpu *), KM_SLEEP);
    402 		}
    403 		if (node_array[i].nmems != 0) {
    404 			node_array[i].mem = kmem_zalloc(node_array[i].nmems *
    405 			    sizeof(struct acpisrat_mem *), KM_SLEEP);
    406 		}
    407 
    408 		k = 0;
    409 		for (j = 0; j < ncpus; j++) {
    410 			if (cpu_array[j].nodeid != i)
    411 				continue;
    412 			KASSERT(node_array[i].cpu != NULL);
    413 			node_array[i].cpu[k] = &cpu_array[j];
    414 			k++;
    415 		}
    416 
    417 		k = 0;
    418 		for (j = 0; j < nmems; j++) {
    419 			if (mem_array[j].nodeid != i)
    420 				continue;
    421 			KASSERT(node_array[i].mem != NULL);
    422 			node_array[i].mem[k] = &mem_array[j];
    423 			k++;
    424 		}
    425 	}
    426 
    427  fail:
    428 	while ((citer = CPU_FIRST()) != NULL) {
    429 		CPU_REM(citer);
    430 		cpu_free(citer);
    431 	}
    432 
    433 	while ((miter = MEM_FIRST()) != NULL) {
    434 		MEM_REM(miter);
    435 		mem_free(miter);
    436 	}
    437 
    438 	return rc;
    439 }
    440 
    441 /*
    442  * Free allocated memory. Should be called when acpisrat is no longer of any
    443  * use.
    444  */
    445 int
    446 acpisrat_exit(void)
    447 {
    448 	int i;
    449 
    450 	if (node_array) {
    451 		for (i = 0; i < nnodes; i++) {
    452 			if (node_array[i].cpu)
    453 				kmem_free(node_array[i].cpu,
    454 				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
    455 			if (node_array[i].mem)
    456 				kmem_free(node_array[i].mem,
    457 				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
    458 		}
    459 		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
    460 	}
    461 	node_array = NULL;
    462 
    463 	if (cpu_array)
    464 		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
    465 	cpu_array = NULL;
    466 
    467 	if (mem_array)
    468 		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
    469 	mem_array = NULL;
    470 
    471 	nnodes = 0;
    472 	ncpus = 0;
    473 	nmems = 0;
    474 
    475 	return 0;
    476 }
    477 
    478 void
    479 acpisrat_dump(void)
    480 {
    481 	uint32_t i, j, nn, nc, nm;
    482 	struct acpisrat_cpu c;
    483 	struct acpisrat_mem m;
    484 
    485 	nn = acpisrat_nodes();
    486 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
    487 	for (i = 0; i < nn; i++) {
    488 		nc = acpisrat_node_cpus(i);
    489 		for (j = 0; j < nc; j++) {
    490 			acpisrat_cpu(i, j, &c);
    491 			aprint_debug("SRAT: node %u cpu %u "
    492 			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
    493 			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
    494 			    c.clockdomain);
    495 		}
    496 
    497 		nm = acpisrat_node_memoryranges(i);
    498 		for (j = 0; j < nm; j++) {
    499 			acpisrat_mem(i, j, &m);
    500 			aprint_debug("SRAT: node %u memory range %u (0x%"
    501 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
    502 			    m.nodeid, j, m.baseaddress,
    503 			    m.baseaddress + m.length, m.flags);
    504 		}
    505 	}
    506 }
    507 
    508 void
    509 acpisrat_load_uvm(void)
    510 {
    511 	uint32_t i, j, nn, nm;
    512 	struct acpisrat_mem m;
    513 
    514 	nn = acpisrat_nodes();
    515 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
    516 	for (i = 0; i < nn; i++) {
    517 		nm = acpisrat_node_memoryranges(i);
    518 		for (j = 0; j < nm; j++) {
    519 			acpisrat_mem(i, j, &m);
    520 			aprint_debug("SRAT: node %u memory range %u (0x%"
    521 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
    522 			    m.nodeid, j, m.baseaddress,
    523 			    m.baseaddress + m.length, m.flags);
    524 			uvm_page_numa_load(trunc_page(m.baseaddress),
    525 			    trunc_page(m.length), m.nodeid);
    526 		}
    527 	}
    528 }
    529 
    530 /*
    531  * Get number of NUMA nodes.
    532  */
    533 uint32_t
    534 acpisrat_nodes(void)
    535 {
    536 	return nnodes;
    537 }
    538 
    539 /*
    540  * Get number of cpus in the node. 0 means, this is a cpu-less node.
    541  */
    542 uint32_t
    543 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
    544 {
    545 	return node_array[nodeid].ncpus;
    546 }
    547 
    548 /*
    549  * Get number of memory ranges in the node 0 means, this node has no RAM.
    550  */
    551 uint32_t
    552 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
    553 {
    554 	return node_array[nodeid].nmems;
    555 }
    556 
    557 void
    558 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
    559     struct acpisrat_cpu *c)
    560 {
    561 	memcpy(c, node_array[nodeid].cpu[cpunum],
    562 	    sizeof(struct acpisrat_cpu));
    563 }
    564 
    565 void
    566 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
    567     struct acpisrat_mem *mem)
    568 {
    569 	memcpy(mem, node_array[nodeid].mem[memrange],
    570 	    sizeof(struct acpisrat_mem));
    571 }
    572 
    573 /*
    574  * Get a node from an APIC id (belonging to a cpu).
    575  */
    576 struct acpisrat_node *
    577 acpisrat_get_node(uint32_t apicid)
    578 {
    579 	struct acpisrat_node *node;
    580 	struct acpisrat_cpu *cpu;
    581 	size_t i, n;
    582 
    583 	for (i = 0; i < nnodes; i++) {
    584 		node = &node_array[i];
    585 
    586 		for (n = 0; n < node->ncpus; n++) {
    587 			cpu = node->cpu[n];
    588 			if (cpu->apicid == apicid) {
    589 				return node;
    590 			}
    591 		}
    592 	}
    593 
    594 	return NULL;
    595 }
    596