Home | History | Annotate | Line # | Download | only in acpi
acpi_srat.c revision 1.5.4.1
      1 /* $NetBSD: acpi_srat.c,v 1.5.4.1 2020/04/13 08:04:18 martin Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Christoph Egger.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.5.4.1 2020/04/13 08:04:18 martin Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/kmem.h>
     37 #include <sys/systm.h>
     38 
     39 #include <dev/acpi/acpivar.h>
     40 #include <dev/acpi/acpi_srat.h>
     41 
     42 #include <uvm/uvm_extern.h>
     43 
     44 static ACPI_TABLE_SRAT *srat;
     45 
     46 static uint32_t nnodes; /* Number of NUMA nodes */
     47 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
     48 static uint32_t ncpus; /* Number of CPUs */
     49 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
     50 static uint32_t nmems; /* Number of Memory ranges */
     51 static struct acpisrat_mem *mem_array;
     52 
     53 struct cpulist {
     54 	struct acpisrat_cpu cpu;
     55 	TAILQ_ENTRY(cpulist) entry;
     56 };
     57 
     58 static TAILQ_HEAD(, cpulist) cpulisthead;
     59 
     60 #define CPU_INIT()		TAILQ_INIT(&cpulisthead);
     61 #define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
     62 #define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
     63 #define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
     64 #define CPU_FIRST()		TAILQ_FIRST(&cpulisthead)
     65 
     66 struct memlist {
     67 	struct acpisrat_mem mem;
     68 	TAILQ_ENTRY(memlist) entry;
     69 };
     70 
     71 static TAILQ_HEAD(, memlist) memlisthead;
     72 
     73 #define MEM_INIT()		TAILQ_INIT(&memlisthead)
     74 #define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
     75 #define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
     76 #define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
     77 #define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
     78 #define MEM_FIRST()		TAILQ_FIRST(&memlisthead)
     79 
     80 
     81 static struct cpulist *
     82 cpu_alloc(void)
     83 {
     84 	return kmem_zalloc(sizeof(struct cpulist), KM_SLEEP);
     85 }
     86 
     87 static void
     88 cpu_free(struct cpulist *c)
     89 {
     90 	kmem_free(c, sizeof(struct cpulist));
     91 }
     92 
     93 static struct memlist *
     94 mem_alloc(void)
     95 {
     96 	return kmem_zalloc(sizeof(struct memlist), KM_SLEEP);
     97 }
     98 
     99 static void
    100 mem_free(struct memlist *m)
    101 {
    102 	kmem_free(m, sizeof(struct memlist));
    103 }
    104 
    105 static struct memlist *
    106 mem_get(acpisrat_nodeid_t nodeid)
    107 {
    108 	struct memlist *tmp;
    109 
    110 	MEM_FOREACH(tmp) {
    111 		if (tmp->mem.nodeid == nodeid)
    112 			return tmp;
    113 	}
    114 
    115 	return NULL;
    116 }
    117 
    118 /*
    119  * Returns true if ACPI SRAT table is available. If table does not exist, all
    120  * functions below have undefined behaviour.
    121  */
    122 bool
    123 acpisrat_exist(void)
    124 {
    125 	ACPI_TABLE_HEADER *table;
    126 	ACPI_STATUS rv;
    127 
    128 	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
    129 	if (ACPI_FAILURE(rv))
    130 		return false;
    131 
    132 	/* Check if header is valid */
    133 	if (table == NULL)
    134 		return false;
    135 
    136 	if (table->Length == 0xffffffff)
    137 		return false;
    138 
    139 	srat = (ACPI_TABLE_SRAT *)table;
    140 
    141 	return true;
    142 }
    143 
    144 static int
    145 acpisrat_parse(void)
    146 {
    147 	ACPI_SUBTABLE_HEADER *subtable;
    148 	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
    149 	ACPI_SRAT_MEM_AFFINITY *srat_mem;
    150 	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
    151 
    152 	acpisrat_nodeid_t nodeid;
    153 	struct cpulist *cpuentry = NULL;
    154 	struct memlist *mementry;
    155 	uint32_t srat_pos;
    156 	bool ignore_cpu_affinity = false;
    157 
    158 	KASSERT(srat != NULL);
    159 
    160 	/* Content starts right after the header */
    161 	srat_pos = sizeof(ACPI_TABLE_SRAT);
    162 
    163 	while (srat_pos < srat->Header.Length) {
    164 		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
    165 		srat_pos += subtable->Length;
    166 
    167 		switch (subtable->Type) {
    168 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
    169 			if (ignore_cpu_affinity)
    170 				continue;
    171 
    172 			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
    173 			if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
    174 				break;
    175 			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
    176 			    (srat_cpu->ProximityDomainHi[1] << 16) |
    177 			    (srat_cpu->ProximityDomainHi[0] << 8) |
    178 			    (srat_cpu->ProximityDomainLo);
    179 
    180 			cpuentry = cpu_alloc();
    181 			if (cpuentry == NULL)
    182 				return ENOMEM;
    183 			CPU_ADD(cpuentry);
    184 
    185 			cpuentry->cpu.nodeid = nodeid;
    186 			cpuentry->cpu.apicid = srat_cpu->ApicId;
    187 			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
    188 			cpuentry->cpu.flags = srat_cpu->Flags;
    189 			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
    190 			break;
    191 
    192 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
    193 			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
    194 			nodeid = srat_mem->ProximityDomain;
    195 			if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0)
    196 				break;
    197 
    198 			mementry = mem_alloc();
    199 			if (mementry == NULL)
    200 				return ENOMEM;
    201 			MEM_ADD(mementry);
    202 
    203 			mementry->mem.nodeid = nodeid;
    204 			mementry->mem.baseaddress = srat_mem->BaseAddress;
    205 			mementry->mem.length = srat_mem->Length;
    206 			mementry->mem.flags = srat_mem->Flags;
    207 			break;
    208 
    209 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
    210 			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
    211 			if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
    212 				break;
    213 			nodeid = srat_x2apic->ProximityDomain;
    214 
    215 			/*
    216 			 * This table entry overrides
    217 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
    218 			 */
    219 			if (!ignore_cpu_affinity) {
    220 				struct cpulist *citer;
    221 				while ((citer = CPU_FIRST()) != NULL) {
    222 					CPU_REM(citer);
    223 					cpu_free(citer);
    224 				}
    225 				ignore_cpu_affinity = true;
    226 			}
    227 
    228 			cpuentry = cpu_alloc();
    229 			if (cpuentry == NULL)
    230 				return ENOMEM;
    231 			CPU_ADD(cpuentry);
    232 
    233 			cpuentry->cpu.nodeid = nodeid;
    234 			cpuentry->cpu.apicid = srat_x2apic->ApicId;
    235 			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
    236 			cpuentry->cpu.flags = srat_x2apic->Flags;
    237 			break;
    238 
    239 		case ACPI_SRAT_TYPE_RESERVED:
    240 			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
    241 				subtable->Length);
    242 			break;
    243 		}
    244 	}
    245 
    246 	return 0;
    247 }
    248 
    249 static int
    250 acpisrat_quirks(void)
    251 {
    252 	struct cpulist *citer;
    253 	struct memlist *mem, *miter;
    254 
    255 	/* Some sanity checks. */
    256 
    257 	/*
    258 	 * Deal with holes in the memory nodes. BIOS doesn't enlist memory
    259 	 * nodes which don't have any memory modules plugged in. This behaviour
    260 	 * has been observed on AMD machines.
    261 	 *
    262 	 * Do that by searching for CPUs in NUMA nodes which don't exist in the
    263 	 * memory and then insert a zero memory range for the missing node.
    264 	 */
    265 	CPU_FOREACH(citer) {
    266 		mem = mem_get(citer->cpu.nodeid);
    267 		if (mem != NULL)
    268 			continue;
    269 		mem = mem_alloc();
    270 		if (mem == NULL)
    271 			return ENOMEM;
    272 		mem->mem.nodeid = citer->cpu.nodeid;
    273 		/* all other fields are already zero filled */
    274 
    275 		MEM_FOREACH(miter) {
    276 			if (miter->mem.nodeid < citer->cpu.nodeid)
    277 				continue;
    278 			MEM_ADD_BEFORE(mem, miter);
    279 			break;
    280 		}
    281 	}
    282 
    283 	return 0;
    284 }
    285 
    286 /*
    287  * Initializes parser. Must be the first function being called when table is
    288  * available.
    289  */
    290 int
    291 acpisrat_init(void)
    292 {
    293 	if (!acpisrat_exist())
    294 		return EEXIST;
    295 	return acpisrat_refresh();
    296 }
    297 
    298 /*
    299  * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM.
    300  */
    301 int
    302 acpisrat_refresh(void)
    303 {
    304 	int rc, i, j, k;
    305 	struct cpulist *citer;
    306 	struct memlist *miter;
    307 	uint32_t cnodes = 0, mnodes = 0;
    308 
    309 	CPU_INIT();
    310 	MEM_INIT();
    311 
    312 	rc = acpisrat_parse();
    313 	if (rc)
    314 		return rc;
    315 
    316 	rc = acpisrat_quirks();
    317 	if (rc)
    318 		return rc;
    319 
    320 	/* cleanup resources */
    321 	rc = acpisrat_exit();
    322 	if (rc)
    323 		return rc;
    324 
    325 	ncpus = 0;
    326 	CPU_FOREACH(citer) {
    327 		cnodes = MAX(citer->cpu.nodeid, cnodes);
    328 		ncpus++;
    329 	}
    330 
    331 	nmems = 0;
    332 	MEM_FOREACH(miter) {
    333 		mnodes = MAX(miter->mem.nodeid, mnodes);
    334 		nmems++;
    335 	}
    336 
    337 	nnodes = MAX(cnodes, mnodes) + 1;
    338 
    339 	if (nnodes == 0 || nmems == 0 || ncpus == 0) {
    340 		rc = ENOENT;
    341 		goto fail;
    342 	}
    343 
    344 	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
    345 	    KM_SLEEP);
    346 	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
    347 	    KM_SLEEP);
    348 	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
    349 	    KM_SLEEP);
    350 
    351 	i = 0;
    352 	CPU_FOREACH(citer) {
    353 		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
    354 		i++;
    355 		node_array[citer->cpu.nodeid].ncpus++;
    356 	}
    357 
    358 	i = 0;
    359 	MEM_FOREACH(miter) {
    360 		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
    361 		i++;
    362 		node_array[miter->mem.nodeid].nmems++;
    363 	}
    364 
    365 	for (i = 0; i < nnodes; i++) {
    366 		node_array[i].nodeid = i;
    367 
    368 		if (node_array[i].ncpus != 0) {
    369 			node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
    370 			    sizeof(struct acpisrat_cpu *), KM_SLEEP);
    371 		}
    372 		if (node_array[i].nmems != 0) {
    373 			node_array[i].mem = kmem_zalloc(node_array[i].nmems *
    374 			    sizeof(struct acpisrat_mem *), KM_SLEEP);
    375 		}
    376 
    377 		k = 0;
    378 		for (j = 0; j < ncpus; j++) {
    379 			if (cpu_array[j].nodeid != i)
    380 				continue;
    381 			KASSERT(node_array[i].cpu != NULL);
    382 			node_array[i].cpu[k] = &cpu_array[j];
    383 			k++;
    384 		}
    385 
    386 		k = 0;
    387 		for (j = 0; j < nmems; j++) {
    388 			if (mem_array[j].nodeid != i)
    389 				continue;
    390 			KASSERT(node_array[i].mem != NULL);
    391 			node_array[i].mem[k] = &mem_array[j];
    392 			k++;
    393 		}
    394 	}
    395 
    396  fail:
    397 	while ((citer = CPU_FIRST()) != NULL) {
    398 		CPU_REM(citer);
    399 		cpu_free(citer);
    400 	}
    401 
    402 	while ((miter = MEM_FIRST()) != NULL) {
    403 		MEM_REM(miter);
    404 		mem_free(miter);
    405 	}
    406 
    407 	return rc;
    408 }
    409 
    410 /*
    411  * Free allocated memory. Should be called when acpisrat is no longer of any
    412  * use.
    413  */
    414 int
    415 acpisrat_exit(void)
    416 {
    417 	int i;
    418 
    419 	if (node_array) {
    420 		for (i = 0; i < nnodes; i++) {
    421 			if (node_array[i].cpu)
    422 				kmem_free(node_array[i].cpu,
    423 				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
    424 			if (node_array[i].mem)
    425 				kmem_free(node_array[i].mem,
    426 				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
    427 		}
    428 		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
    429 	}
    430 	node_array = NULL;
    431 
    432 	if (cpu_array)
    433 		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
    434 	cpu_array = NULL;
    435 
    436 	if (mem_array)
    437 		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
    438 	mem_array = NULL;
    439 
    440 	nnodes = 0;
    441 	ncpus = 0;
    442 	nmems = 0;
    443 
    444 	return 0;
    445 }
    446 
    447 void
    448 acpisrat_dump(void)
    449 {
    450 	uint32_t i, j, nn, nc, nm;
    451 	struct acpisrat_cpu c;
    452 	struct acpisrat_mem m;
    453 
    454 	nn = acpisrat_nodes();
    455 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
    456 	for (i = 0; i < nn; i++) {
    457 		nc = acpisrat_node_cpus(i);
    458 		for (j = 0; j < nc; j++) {
    459 			acpisrat_cpu(i, j, &c);
    460 			aprint_debug("SRAT: node %u cpu %u "
    461 			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
    462 			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
    463 			    c.clockdomain);
    464 		}
    465 
    466 		nm = acpisrat_node_memoryranges(i);
    467 		for (j = 0; j < nm; j++) {
    468 			acpisrat_mem(i, j, &m);
    469 			aprint_debug("SRAT: node %u memory range %u (0x%"
    470 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
    471 			    m.nodeid, j, m.baseaddress,
    472 			    m.baseaddress + m.length, m.flags);
    473 		}
    474 	}
    475 }
    476 
    477 void
    478 acpisrat_load_uvm(void)
    479 {
    480 	uint32_t i, j, nn, nm;
    481 	struct acpisrat_mem m;
    482 
    483 	nn = acpisrat_nodes();
    484 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
    485 	for (i = 0; i < nn; i++) {
    486 		nm = acpisrat_node_memoryranges(i);
    487 		for (j = 0; j < nm; j++) {
    488 			acpisrat_mem(i, j, &m);
    489 			aprint_debug("SRAT: node %u memory range %u (0x%"
    490 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
    491 			    m.nodeid, j, m.baseaddress,
    492 			    m.baseaddress + m.length, m.flags);
    493 			uvm_page_numa_load(trunc_page(m.baseaddress),
    494 			    trunc_page(m.length), m.nodeid);
    495 		}
    496 	}
    497 }
    498 
    499 /*
    500  * Get number of NUMA nodes.
    501  */
    502 uint32_t
    503 acpisrat_nodes(void)
    504 {
    505 	return nnodes;
    506 }
    507 
    508 /*
    509  * Get number of cpus in the node. 0 means, this is a cpu-less node.
    510  */
    511 uint32_t
    512 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
    513 {
    514 	return node_array[nodeid].ncpus;
    515 }
    516 
    517 /*
    518  * Get number of memory ranges in the node 0 means, this node has no RAM.
    519  */
    520 uint32_t
    521 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
    522 {
    523 	return node_array[nodeid].nmems;
    524 }
    525 
    526 void
    527 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
    528     struct acpisrat_cpu *c)
    529 {
    530 	memcpy(c, node_array[nodeid].cpu[cpunum],
    531 	    sizeof(struct acpisrat_cpu));
    532 }
    533 
    534 void
    535 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
    536     struct acpisrat_mem *mem)
    537 {
    538 	memcpy(mem, node_array[nodeid].mem[memrange],
    539 	    sizeof(struct acpisrat_mem));
    540 }
    541 
    542 /*
    543  * Get a node from an APIC id (belonging to a cpu).
    544  */
    545 struct acpisrat_node *
    546 acpisrat_get_node(uint32_t apicid)
    547 {
    548 	struct acpisrat_node *node;
    549 	struct acpisrat_cpu *cpu;
    550 	size_t i, n;
    551 
    552 	for (i = 0; i < nnodes; i++) {
    553 		node = &node_array[i];
    554 
    555 		for (n = 0; n < node->ncpus; n++) {
    556 			cpu = node->cpu[n];
    557 			if (cpu->apicid == apicid) {
    558 				return node;
    559 			}
    560 		}
    561 	}
    562 
    563 	return NULL;
    564 }
    565