Home | History | Annotate | Line # | Download | only in kern
subr_cpu.c revision 1.19
      1 /*	$NetBSD: subr_cpu.c,v 1.19 2023/07/08 13:59:05 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020
      5  *     The NetBSD Foundation, Inc.
      6  * All rights reserved.
      7  *
      8  * This code is derived from software contributed to The NetBSD Foundation
      9  * by Andrew Doran.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*-
     34  * Copyright (c)2007 YAMAMOTO Takashi,
     35  * All rights reserved.
     36  *
     37  * Redistribution and use in source and binary forms, with or without
     38  * modification, are permitted provided that the following conditions
     39  * are met:
     40  * 1. Redistributions of source code must retain the above copyright
     41  *    notice, this list of conditions and the following disclaimer.
     42  * 2. Redistributions in binary form must reproduce the above copyright
     43  *    notice, this list of conditions and the following disclaimer in the
     44  *    documentation and/or other materials provided with the distribution.
     45  *
     46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     56  * SUCH DAMAGE.
     57  */
     58 
     59 /*
     60  * CPU related routines shared with rump.
     61  */
     62 
     63 #include <sys/cdefs.h>
     64 __KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.19 2023/07/08 13:59:05 riastradh Exp $");
     65 
     66 #include <sys/param.h>
     67 #include <sys/atomic.h>
     68 #include <sys/systm.h>
     69 #include <sys/sched.h>
     70 #include <sys/conf.h>
     71 #include <sys/cpu.h>
     72 #include <sys/proc.h>
     73 #include <sys/kernel.h>
     74 #include <sys/kmem.h>
     75 
     76 static void	cpu_topology_fake1(struct cpu_info *);
     77 
     78 kmutex_t	cpu_lock		__cacheline_aligned;
     79 int		ncpu			__read_mostly;
     80 int		ncpuonline		__read_mostly;
     81 bool		mp_online		__read_mostly;
     82 static bool	cpu_topology_present	__read_mostly;
     83 static bool	cpu_topology_haveslow	__read_mostly;
     84 int64_t		cpu_counts[CPU_COUNT_MAX];
     85 
     86 /* An array of CPUs.  There are ncpu entries. */
     87 struct cpu_info **cpu_infos		__read_mostly;
     88 
     89 /* Note: set on mi_cpu_attach() and idle_loop(). */
     90 kcpuset_t *	kcpuset_attached	__read_mostly	= NULL;
     91 kcpuset_t *	kcpuset_running		__read_mostly	= NULL;
     92 
     93 static char cpu_model[128];
     94 
     95 /*
     96  * mi_cpu_init: early initialisation of MI CPU related structures.
     97  *
     98  * Note: may not block and memory allocator is not yet available.
     99  */
    100 void
    101 mi_cpu_init(void)
    102 {
    103 	struct cpu_info *ci;
    104 
    105 	mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
    106 
    107 	kcpuset_create(&kcpuset_attached, true);
    108 	kcpuset_create(&kcpuset_running, true);
    109 	kcpuset_set(kcpuset_running, 0);
    110 
    111 	ci = curcpu();
    112 	cpu_topology_fake1(ci);
    113 }
    114 
    115 int
    116 cpu_setmodel(const char *fmt, ...)
    117 {
    118 	int len;
    119 	va_list ap;
    120 
    121 	va_start(ap, fmt);
    122 	len = vsnprintf(cpu_model, sizeof(cpu_model), fmt, ap);
    123 	va_end(ap);
    124 	return len;
    125 }
    126 
    127 const char *
    128 cpu_getmodel(void)
    129 {
    130 	return cpu_model;
    131 }
    132 
    133 bool
    134 cpu_softintr_p(void)
    135 {
    136 
    137 	return (curlwp->l_pflag & LP_INTR) != 0;
    138 }
    139 
    140 bool
    141 curcpu_stable(void)
    142 {
    143 	struct lwp *const l = curlwp;
    144 	const int pflag = l->l_pflag;
    145 	const int nopreempt = l->l_nopreempt;
    146 
    147 	/*
    148 	 * - Softints (LP_INTR) never migrate between CPUs.
    149 	 * - Bound lwps (LP_BOUND), either kthreads created bound to
    150 	 *   a CPU or any lwps bound with curlwp_bind, never migrate.
    151 	 * - If kpreemption is disabled, the lwp can't migrate.
    152 	 * - If we're in interrupt context, preemption is blocked.
    153 	 *
    154 	 * We combine the LP_INTR, LP_BOUND, and l_nopreempt test into
    155 	 * a single predicted-true branch so this is cheap to assert in
    156 	 * most contexts where it will be used, then fall back to
    157 	 * calling the full kpreempt_disabled() and cpu_intr_p() as
    158 	 * subroutines.
    159 	 *
    160 	 * XXX Is cpu_intr_p redundant with kpreempt_disabled?
    161 	 */
    162 	return __predict_true(((pflag & (LP_INTR|LP_BOUND)) | nopreempt)
    163 		!= 0) ||
    164 	    kpreempt_disabled() ||
    165 	    cpu_intr_p();
    166 }
    167 
    168 /*
    169  * Collect CPU topology information as each CPU is attached.  This can be
    170  * called early during boot, so we need to be careful what we do.
    171  */
    172 void
    173 cpu_topology_set(struct cpu_info *ci, u_int package_id, u_int core_id,
    174     u_int smt_id, u_int numa_id)
    175 {
    176 	enum cpu_rel rel;
    177 
    178 	cpu_topology_present = true;
    179 	ci->ci_package_id = package_id;
    180 	ci->ci_core_id = core_id;
    181 	ci->ci_smt_id = smt_id;
    182 	ci->ci_numa_id = numa_id;
    183 	for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
    184 		ci->ci_sibling[rel] = ci;
    185 		ci->ci_nsibling[rel] = 1;
    186 	}
    187 }
    188 
    189 /*
    190  * Collect CPU relative speed
    191  */
    192 void
    193 cpu_topology_setspeed(struct cpu_info *ci, bool slow)
    194 {
    195 
    196 	cpu_topology_haveslow |= slow;
    197 	ci->ci_is_slow = slow;
    198 }
    199 
    200 /*
    201  * Link a CPU into the given circular list.
    202  */
    203 static void
    204 cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel)
    205 {
    206 	struct cpu_info *ci3;
    207 
    208 	/* Walk to the end of the existing circular list and append. */
    209 	for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) {
    210 		ci3->ci_nsibling[rel]++;
    211 		if (ci3->ci_sibling[rel] == ci2) {
    212 			break;
    213 		}
    214 	}
    215 	ci->ci_sibling[rel] = ci2;
    216 	ci3->ci_sibling[rel] = ci;
    217 	ci->ci_nsibling[rel] = ci3->ci_nsibling[rel];
    218 }
    219 
    220 /*
    221  * Print out the topology lists.
    222  */
    223 static void
    224 cpu_topology_dump(void)
    225 {
    226 #ifdef DEBUG
    227 	CPU_INFO_ITERATOR cii;
    228 	struct cpu_info *ci, *ci2;
    229 	const char *names[] = { "core", "pkg", "1st" };
    230 	enum cpu_rel rel;
    231 	int i;
    232 
    233 	CTASSERT(__arraycount(names) >= __arraycount(ci->ci_sibling));
    234 	if (ncpu == 1) {
    235 		return;
    236 	}
    237 
    238 	for (CPU_INFO_FOREACH(cii, ci)) {
    239 		if (cpu_topology_haveslow)
    240 			printf("%s ", ci->ci_is_slow ? "slow" : "fast");
    241 		for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
    242 			printf("%s has %d %s siblings:", cpu_name(ci),
    243 			    ci->ci_nsibling[rel], names[rel]);
    244 			ci2 = ci->ci_sibling[rel];
    245 			i = 0;
    246 			do {
    247 				printf(" %s", cpu_name(ci2));
    248 				ci2 = ci2->ci_sibling[rel];
    249 			} while (++i < 64 && ci2 != ci->ci_sibling[rel]);
    250 			if (i == 64) {
    251 				printf(" GAVE UP");
    252 			}
    253 			printf("\n");
    254 		}
    255 		printf("%s first in package: %s\n", cpu_name(ci),
    256 		    cpu_name(ci->ci_package1st));
    257 	}
    258 #endif	/* DEBUG */
    259 }
    260 
    261 /*
    262  * Fake up topology info if we have none, or if what we got was bogus.
    263  * Used early in boot, and by cpu_topology_fake().
    264  */
    265 static void
    266 cpu_topology_fake1(struct cpu_info *ci)
    267 {
    268 	enum cpu_rel rel;
    269 
    270 	for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
    271 		ci->ci_sibling[rel] = ci;
    272 		ci->ci_nsibling[rel] = 1;
    273 	}
    274 	if (!cpu_topology_present) {
    275 		ci->ci_package_id = cpu_index(ci);
    276 	}
    277 	ci->ci_schedstate.spc_flags |=
    278 	    (SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
    279 	ci->ci_package1st = ci;
    280 	if (!cpu_topology_haveslow) {
    281 		ci->ci_is_slow = false;
    282 	}
    283 }
    284 
    285 /*
    286  * Fake up topology info if we have none, or if what we got was bogus.
    287  * Don't override ci_package_id, etc, if cpu_topology_present is set.
    288  * MD code also uses these.
    289  */
    290 static void
    291 cpu_topology_fake(void)
    292 {
    293 	CPU_INFO_ITERATOR cii;
    294 	struct cpu_info *ci;
    295 
    296 	for (CPU_INFO_FOREACH(cii, ci)) {
    297 		cpu_topology_fake1(ci);
    298 		/* Undo (early boot) flag set so everything links OK. */
    299 		ci->ci_schedstate.spc_flags &=
    300 		    ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
    301 	}
    302 }
    303 
    304 /*
    305  * Fix up basic CPU topology info.  Right now that means attach each CPU to
    306  * circular lists of its siblings in the same core, and in the same package.
    307  */
    308 void
    309 cpu_topology_init(void)
    310 {
    311 	CPU_INFO_ITERATOR cii, cii2;
    312 	struct cpu_info *ci, *ci2, *ci3;
    313 	u_int minsmt, mincore;
    314 
    315 	if (!cpu_topology_present) {
    316 		cpu_topology_fake();
    317 		goto linkit;
    318 	}
    319 
    320 	/* Find siblings in same core and package. */
    321 	for (CPU_INFO_FOREACH(cii, ci)) {
    322 		ci->ci_schedstate.spc_flags &=
    323 		    ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
    324 		for (CPU_INFO_FOREACH(cii2, ci2)) {
    325 			/* Avoid bad things happening. */
    326 			if (ci2->ci_package_id == ci->ci_package_id &&
    327 			    ci2->ci_core_id == ci->ci_core_id &&
    328 			    ci2->ci_smt_id == ci->ci_smt_id &&
    329 			    ci2 != ci) {
    330 #ifdef DEBUG
    331 				printf("cpu%u %p pkg %u core %u smt %u same as "
    332 				       "cpu%u %p pkg %u core %u smt %u\n",
    333 				       cpu_index(ci), ci, ci->ci_package_id,
    334 				       ci->ci_core_id, ci->ci_smt_id,
    335 				       cpu_index(ci2), ci2, ci2->ci_package_id,
    336 				       ci2->ci_core_id, ci2->ci_smt_id);
    337 #endif
    338 			    	printf("cpu_topology_init: info bogus, "
    339 			    	    "faking it\n");
    340 			    	cpu_topology_fake();
    341 			    	goto linkit;
    342 			}
    343 			if (ci2 == ci ||
    344 			    ci2->ci_package_id != ci->ci_package_id) {
    345 				continue;
    346 			}
    347 			/* Find CPUs in the same core. */
    348 			if (ci->ci_nsibling[CPUREL_CORE] == 1 &&
    349 			    ci->ci_core_id == ci2->ci_core_id) {
    350 			    	cpu_topology_link(ci, ci2, CPUREL_CORE);
    351 			}
    352 			/* Find CPUs in the same package. */
    353 			if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) {
    354 			    	cpu_topology_link(ci, ci2, CPUREL_PACKAGE);
    355 			}
    356 			if (ci->ci_nsibling[CPUREL_CORE] > 1 &&
    357 			    ci->ci_nsibling[CPUREL_PACKAGE] > 1) {
    358 				break;
    359 			}
    360 		}
    361 	}
    362 
    363  linkit:
    364 	/* Identify lowest numbered SMT in each core. */
    365 	for (CPU_INFO_FOREACH(cii, ci)) {
    366 		ci2 = ci3 = ci;
    367 		minsmt = ci->ci_smt_id;
    368 		do {
    369 			if (ci2->ci_smt_id < minsmt) {
    370 				ci3 = ci2;
    371 				minsmt = ci2->ci_smt_id;
    372 			}
    373 			ci2 = ci2->ci_sibling[CPUREL_CORE];
    374 		} while (ci2 != ci);
    375 		ci3->ci_schedstate.spc_flags |= SPCF_CORE1ST;
    376 	}
    377 
    378 	/* Identify lowest numbered SMT in each package. */
    379 	ci3 = NULL;
    380 	for (CPU_INFO_FOREACH(cii, ci)) {
    381 		if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) == 0) {
    382 			continue;
    383 		}
    384 		ci2 = ci3 = ci;
    385 		mincore = ci->ci_core_id;
    386 		do {
    387 			if ((ci2->ci_schedstate.spc_flags &
    388 			    SPCF_CORE1ST) != 0 &&
    389 			    ci2->ci_core_id < mincore) {
    390 				ci3 = ci2;
    391 				mincore = ci2->ci_core_id;
    392 			}
    393 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
    394 		} while (ci2 != ci);
    395 
    396 		if ((ci3->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) != 0) {
    397 			/* Already identified - nothing more to do. */
    398 			continue;
    399 		}
    400 		ci3->ci_schedstate.spc_flags |= SPCF_PACKAGE1ST;
    401 
    402 		/* Walk through all CPUs in package and point to first. */
    403 		ci2 = ci3;
    404 		do {
    405 			ci2->ci_package1st = ci3;
    406 			ci2->ci_sibling[CPUREL_PACKAGE1ST] = ci3;
    407 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
    408 		} while (ci2 != ci3);
    409 
    410 		/* Now look for somebody else to link to. */
    411 		for (CPU_INFO_FOREACH(cii2, ci2)) {
    412 			if ((ci2->ci_schedstate.spc_flags & SPCF_PACKAGE1ST)
    413 			    != 0 && ci2 != ci3) {
    414 			    	cpu_topology_link(ci3, ci2, CPUREL_PACKAGE1ST);
    415 			    	break;
    416 			}
    417 		}
    418 	}
    419 
    420 	/* Walk through all packages, starting with value of ci3 from above. */
    421 	KASSERT(ci3 != NULL);
    422 	ci = ci3;
    423 	do {
    424 		/* Walk through CPUs in the package and copy in PACKAGE1ST. */
    425 		ci2 = ci;
    426 		do {
    427 			ci2->ci_sibling[CPUREL_PACKAGE1ST] =
    428 			    ci->ci_sibling[CPUREL_PACKAGE1ST];
    429 			ci2->ci_nsibling[CPUREL_PACKAGE1ST] =
    430 			    ci->ci_nsibling[CPUREL_PACKAGE1ST];
    431 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
    432 		} while (ci2 != ci);
    433 		ci = ci->ci_sibling[CPUREL_PACKAGE1ST];
    434 	} while (ci != ci3);
    435 
    436 	if (cpu_topology_haveslow) {
    437 		/*
    438 		 * For asymmetric systems where some CPUs are slower than
    439 		 * others, mark first class CPUs for the scheduler.  This
    440 		 * conflicts with SMT right now so whinge if observed.
    441 		 */
    442 		if (curcpu()->ci_nsibling[CPUREL_CORE] > 1) {
    443 			printf("cpu_topology_init: asymmetric & SMT??\n");
    444 		}
    445 		for (CPU_INFO_FOREACH(cii, ci)) {
    446 			if (!ci->ci_is_slow) {
    447 				ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
    448 			}
    449 		}
    450 	} else {
    451 		/*
    452 		 * For any other configuration mark the 1st CPU in each
    453 		 * core as a first class CPU.
    454 		 */
    455 		for (CPU_INFO_FOREACH(cii, ci)) {
    456 			if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) != 0) {
    457 				ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
    458 			}
    459 		}
    460 	}
    461 
    462 	cpu_topology_dump();
    463 }
    464 
    465 /*
    466  * Adjust one count, for a counter that's NOT updated from interrupt
    467  * context.  Hardly worth making an inline due to preemption stuff.
    468  */
    469 void
    470 cpu_count(enum cpu_count idx, int64_t delta)
    471 {
    472 	lwp_t *l = curlwp;
    473 	KPREEMPT_DISABLE(l);
    474 	l->l_cpu->ci_counts[idx] += delta;
    475 	KPREEMPT_ENABLE(l);
    476 }
    477 
    478 /*
    479  * Fetch fresh sum total for all counts.  Expensive - don't call often.
    480  *
    481  * If poll is true, the caller is okay with less recent values (but
    482  * no more than 1/hz seconds old).  Where this is called very often that
    483  * should be the case.
    484  *
    485  * This should be reasonably quick so that any value collected get isn't
    486  * totally out of whack, and it can also be called from interrupt context,
    487  * so go to splvm() while summing the counters.  It's tempting to use a spin
    488  * mutex here but this routine is called from DDB.
    489  */
    490 void
    491 cpu_count_sync(bool poll)
    492 {
    493 	CPU_INFO_ITERATOR cii;
    494 	struct cpu_info *ci;
    495 	int64_t sum[CPU_COUNT_MAX], *ptr;
    496 	static int lasttick;
    497 	int curtick, s;
    498 	enum cpu_count i;
    499 
    500 	KASSERT(sizeof(ci->ci_counts) == sizeof(cpu_counts));
    501 
    502 	if (__predict_false(!mp_online)) {
    503 		memcpy(cpu_counts, curcpu()->ci_counts, sizeof(cpu_counts));
    504 		return;
    505 	}
    506 
    507 	s = splvm();
    508 	curtick = getticks();
    509 	if (poll && atomic_load_acquire(&lasttick) == curtick) {
    510 		splx(s);
    511 		return;
    512 	}
    513 	memset(sum, 0, sizeof(sum));
    514 	curcpu()->ci_counts[CPU_COUNT_SYNC]++;
    515 	for (CPU_INFO_FOREACH(cii, ci)) {
    516 		ptr = ci->ci_counts;
    517 		for (i = 0; i < CPU_COUNT_MAX; i += 8) {
    518 			sum[i+0] += ptr[i+0];
    519 			sum[i+1] += ptr[i+1];
    520 			sum[i+2] += ptr[i+2];
    521 			sum[i+3] += ptr[i+3];
    522 			sum[i+4] += ptr[i+4];
    523 			sum[i+5] += ptr[i+5];
    524 			sum[i+6] += ptr[i+6];
    525 			sum[i+7] += ptr[i+7];
    526 		}
    527 		KASSERT(i == CPU_COUNT_MAX);
    528 	}
    529 	memcpy(cpu_counts, sum, sizeof(cpu_counts));
    530 	atomic_store_release(&lasttick, curtick);
    531 	splx(s);
    532 }
    533