Home | History | Annotate | Line # | Download | only in kern
subr_cpu.c revision 1.11
      1 /*	$NetBSD: subr_cpu.c,v 1.11 2020/01/13 20:30:08 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020
      5  *     The NetBSD Foundation, Inc.
      6  * All rights reserved.
      7  *
      8  * This code is derived from software contributed to The NetBSD Foundation
      9  * by Andrew Doran.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*-
     34  * Copyright (c)2007 YAMAMOTO Takashi,
     35  * All rights reserved.
     36  *
     37  * Redistribution and use in source and binary forms, with or without
     38  * modification, are permitted provided that the following conditions
     39  * are met:
     40  * 1. Redistributions of source code must retain the above copyright
     41  *    notice, this list of conditions and the following disclaimer.
     42  * 2. Redistributions in binary form must reproduce the above copyright
     43  *    notice, this list of conditions and the following disclaimer in the
     44  *    documentation and/or other materials provided with the distribution.
     45  *
     46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     56  * SUCH DAMAGE.
     57  */
     58 
     59 /*
     60  * CPU related routines shared with rump.
     61  */
     62 
     63 #include <sys/cdefs.h>
     64 __KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.11 2020/01/13 20:30:08 ad Exp $");
     65 
     66 #include <sys/param.h>
     67 #include <sys/systm.h>
     68 #include <sys/sched.h>
     69 #include <sys/conf.h>
     70 #include <sys/cpu.h>
     71 #include <sys/proc.h>
     72 #include <sys/kernel.h>
     73 #include <sys/kmem.h>
     74 
     75 static void	cpu_topology_fake1(struct cpu_info *);
     76 
     77 kmutex_t	cpu_lock		__cacheline_aligned;
     78 int		ncpu			__read_mostly;
     79 int		ncpuonline		__read_mostly;
     80 bool		mp_online		__read_mostly;
     81 static bool	cpu_topology_present	__read_mostly;
     82 static bool	cpu_topology_haveslow	__read_mostly;
     83 int64_t		cpu_counts[CPU_COUNT_MAX];
     84 
     85 /* An array of CPUs.  There are ncpu entries. */
     86 struct cpu_info **cpu_infos		__read_mostly;
     87 
     88 /* Note: set on mi_cpu_attach() and idle_loop(). */
     89 kcpuset_t *	kcpuset_attached	__read_mostly	= NULL;
     90 kcpuset_t *	kcpuset_running		__read_mostly	= NULL;
     91 
     92 static char cpu_model[128];
     93 
     94 /*
     95  * mi_cpu_init: early initialisation of MI CPU related structures.
     96  *
     97  * Note: may not block and memory allocator is not yet available.
     98  */
     99 void
    100 mi_cpu_init(void)
    101 {
    102 	struct cpu_info *ci;
    103 
    104 	mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
    105 
    106 	kcpuset_create(&kcpuset_attached, true);
    107 	kcpuset_create(&kcpuset_running, true);
    108 	kcpuset_set(kcpuset_running, 0);
    109 
    110 	ci = curcpu();
    111 	cpu_topology_fake1(ci);
    112 }
    113 
    114 int
    115 cpu_setmodel(const char *fmt, ...)
    116 {
    117 	int len;
    118 	va_list ap;
    119 
    120 	va_start(ap, fmt);
    121 	len = vsnprintf(cpu_model, sizeof(cpu_model), fmt, ap);
    122 	va_end(ap);
    123 	return len;
    124 }
    125 
    126 const char *
    127 cpu_getmodel(void)
    128 {
    129 	return cpu_model;
    130 }
    131 
    132 bool
    133 cpu_softintr_p(void)
    134 {
    135 
    136 	return (curlwp->l_pflag & LP_INTR) != 0;
    137 }
    138 
    139 /*
    140  * Collect CPU topology information as each CPU is attached.  This can be
    141  * called early during boot, so we need to be careful what we do.
    142  */
    143 void
    144 cpu_topology_set(struct cpu_info *ci, u_int package_id, u_int core_id,
    145     u_int smt_id, u_int numa_id, bool slow)
    146 {
    147 	enum cpu_rel rel;
    148 
    149 	cpu_topology_present = true;
    150 	cpu_topology_haveslow |= slow;
    151 	ci->ci_package_id = package_id;
    152 	ci->ci_core_id = core_id;
    153 	ci->ci_smt_id = smt_id;
    154 	ci->ci_numa_id = numa_id;
    155 	ci->ci_is_slow = slow;
    156 	for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
    157 		ci->ci_sibling[rel] = ci;
    158 		ci->ci_nsibling[rel] = 1;
    159 	}
    160 }
    161 
    162 /*
    163  * Link a CPU into the given circular list.
    164  */
    165 static void
    166 cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel)
    167 {
    168 	struct cpu_info *ci3;
    169 
    170 	/* Walk to the end of the existing circular list and append. */
    171 	for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) {
    172 		ci3->ci_nsibling[rel]++;
    173 		if (ci3->ci_sibling[rel] == ci2) {
    174 			break;
    175 		}
    176 	}
    177 	ci->ci_sibling[rel] = ci2;
    178 	ci3->ci_sibling[rel] = ci;
    179 	ci->ci_nsibling[rel] = ci3->ci_nsibling[rel];
    180 }
    181 
    182 /*
    183  * Print out the topology lists.
    184  */
    185 static void
    186 cpu_topology_dump(void)
    187 {
    188 #ifdef DEBUG
    189 	CPU_INFO_ITERATOR cii;
    190 	struct cpu_info *ci, *ci2;
    191 	const char *names[] = { "core", "pkg", "1st" };
    192 	enum cpu_rel rel;
    193 	int i;
    194 
    195 	CTASSERT(__arraycount(names) >= __arraycount(ci->ci_sibling));
    196 
    197 	for (CPU_INFO_FOREACH(cii, ci)) {
    198 		if (cpu_topology_haveslow)
    199 			printf("%s ", ci->ci_is_slow ? "slow" : "fast");
    200 		for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
    201 			printf("%s has %d %s siblings:", cpu_name(ci),
    202 			    ci->ci_nsibling[rel], names[rel]);
    203 			ci2 = ci->ci_sibling[rel];
    204 			i = 0;
    205 			do {
    206 				printf(" %s", cpu_name(ci2));
    207 				ci2 = ci2->ci_sibling[rel];
    208 			} while (++i < 64 && ci2 != ci->ci_sibling[rel]);
    209 			if (i == 64) {
    210 				printf(" GAVE UP");
    211 			}
    212 			printf("\n");
    213 		}
    214 		printf("%s first in package: %s\n", cpu_name(ci),
    215 		    cpu_name(ci->ci_package1st));
    216 	}
    217 #endif	/* DEBUG */
    218 }
    219 
    220 /*
    221  * Fake up topology info if we have none, or if what we got was bogus.
    222  * Used early in boot, and by cpu_topology_fake().
    223  */
    224 static void
    225 cpu_topology_fake1(struct cpu_info *ci)
    226 {
    227 	enum cpu_rel rel;
    228 
    229 	for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
    230 		ci->ci_sibling[rel] = ci;
    231 		ci->ci_nsibling[rel] = 1;
    232 	}
    233 	if (!cpu_topology_present) {
    234 		ci->ci_package_id = cpu_index(ci);
    235 	}
    236 	ci->ci_schedstate.spc_flags |=
    237 	    (SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
    238 	ci->ci_package1st = ci;
    239 	ci->ci_is_slow = false;
    240 	cpu_topology_haveslow = false;
    241 }
    242 
    243 /*
    244  * Fake up topology info if we have none, or if what we got was bogus.
    245  * Don't override ci_package_id, etc, if cpu_topology_present is set.
    246  * MD code also uses these.
    247  */
    248 static void
    249 cpu_topology_fake(void)
    250 {
    251 	CPU_INFO_ITERATOR cii;
    252 	struct cpu_info *ci;
    253 
    254 	for (CPU_INFO_FOREACH(cii, ci)) {
    255 		cpu_topology_fake1(ci);
    256 		/* Undo (early boot) flag set so everything links OK. */
    257 		ci->ci_schedstate.spc_flags &=
    258 		    ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
    259 	}
    260 }
    261 
    262 /*
    263  * Fix up basic CPU topology info.  Right now that means attach each CPU to
    264  * circular lists of its siblings in the same core, and in the same package.
    265  */
    266 void
    267 cpu_topology_init(void)
    268 {
    269 	CPU_INFO_ITERATOR cii, cii2;
    270 	struct cpu_info *ci, *ci2, *ci3;
    271 	u_int minsmt, mincore;
    272 
    273 	if (!cpu_topology_present) {
    274 		cpu_topology_fake();
    275 		goto linkit;
    276 	}
    277 
    278 	/* Find siblings in same core and package. */
    279 	for (CPU_INFO_FOREACH(cii, ci)) {
    280 		ci->ci_schedstate.spc_flags &=
    281 		    ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
    282 		for (CPU_INFO_FOREACH(cii2, ci2)) {
    283 			/* Avoid bad things happening. */
    284 			if (ci2->ci_package_id == ci->ci_package_id &&
    285 			    ci2->ci_core_id == ci->ci_core_id &&
    286 			    ci2->ci_smt_id == ci->ci_smt_id &&
    287 			    ci2 != ci) {
    288 #ifdef DEBUG
    289 				printf("cpu%u %p pkg %u core %u smt %u same as "
    290 				       "cpu%u %p pkg %u core %u smt %u\n",
    291 				       cpu_index(ci), ci, ci->ci_package_id,
    292 				       ci->ci_core_id, ci->ci_smt_id,
    293 				       cpu_index(ci2), ci2, ci2->ci_package_id,
    294 				       ci2->ci_core_id, ci2->ci_smt_id);
    295 #endif
    296 			    	printf("cpu_topology_init: info bogus, "
    297 			    	    "faking it\n");
    298 			    	cpu_topology_fake();
    299 			    	goto linkit;
    300 			}
    301 			if (ci2 == ci ||
    302 			    ci2->ci_package_id != ci->ci_package_id) {
    303 				continue;
    304 			}
    305 			/* Find CPUs in the same core. */
    306 			if (ci->ci_nsibling[CPUREL_CORE] == 1 &&
    307 			    ci->ci_core_id == ci2->ci_core_id) {
    308 			    	cpu_topology_link(ci, ci2, CPUREL_CORE);
    309 			}
    310 			/* Find CPUs in the same package. */
    311 			if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) {
    312 			    	cpu_topology_link(ci, ci2, CPUREL_PACKAGE);
    313 			}
    314 			if (ci->ci_nsibling[CPUREL_CORE] > 1 &&
    315 			    ci->ci_nsibling[CPUREL_PACKAGE] > 1) {
    316 				break;
    317 			}
    318 		}
    319 	}
    320 
    321  linkit:
    322 	/* Identify lowest numbered SMT in each core. */
    323 	for (CPU_INFO_FOREACH(cii, ci)) {
    324 		ci2 = ci3 = ci;
    325 		minsmt = ci->ci_smt_id;
    326 		do {
    327 			if (ci2->ci_smt_id < minsmt) {
    328 				ci3 = ci2;
    329 				minsmt = ci2->ci_smt_id;
    330 			}
    331 			ci2 = ci2->ci_sibling[CPUREL_CORE];
    332 		} while (ci2 != ci);
    333 		ci3->ci_schedstate.spc_flags |= SPCF_CORE1ST;
    334 	}
    335 
    336 	/* Identify lowest numbered SMT in each package. */
    337 	ci3 = NULL;
    338 	for (CPU_INFO_FOREACH(cii, ci)) {
    339 		if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) == 0) {
    340 			continue;
    341 		}
    342 		ci2 = ci3 = ci;
    343 		mincore = ci->ci_core_id;
    344 		do {
    345 			if ((ci2->ci_schedstate.spc_flags &
    346 			    SPCF_CORE1ST) != 0 &&
    347 			    ci2->ci_core_id < mincore) {
    348 				ci3 = ci2;
    349 				mincore = ci2->ci_core_id;
    350 			}
    351 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
    352 		} while (ci2 != ci);
    353 
    354 		if ((ci3->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) != 0) {
    355 			/* Already identified - nothing more to do. */
    356 			continue;
    357 		}
    358 		ci3->ci_schedstate.spc_flags |= SPCF_PACKAGE1ST;
    359 
    360 		/* Walk through all CPUs in package and point to first. */
    361 		ci2 = ci3;
    362 		do {
    363 			ci2->ci_package1st = ci3;
    364 			ci2->ci_sibling[CPUREL_PACKAGE1ST] = ci3;
    365 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
    366 		} while (ci2 != ci3);
    367 
    368 		/* Now look for somebody else to link to. */
    369 		for (CPU_INFO_FOREACH(cii2, ci2)) {
    370 			if ((ci2->ci_schedstate.spc_flags & SPCF_PACKAGE1ST)
    371 			    != 0 && ci2 != ci3) {
    372 			    	cpu_topology_link(ci3, ci2, CPUREL_PACKAGE1ST);
    373 			    	break;
    374 			}
    375 		}
    376 	}
    377 
    378 	/* Walk through all packages, starting with value of ci3 from above. */
    379 	KASSERT(ci3 != NULL);
    380 	ci = ci3;
    381 	do {
    382 		/* Walk through CPUs in the package and copy in PACKAGE1ST. */
    383 		ci2 = ci;
    384 		do {
    385 			ci2->ci_sibling[CPUREL_PACKAGE1ST] =
    386 			    ci->ci_sibling[CPUREL_PACKAGE1ST];
    387 			ci2->ci_nsibling[CPUREL_PACKAGE1ST] =
    388 			    ci->ci_nsibling[CPUREL_PACKAGE1ST];
    389 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
    390 		} while (ci2 != ci);
    391 		ci = ci->ci_sibling[CPUREL_PACKAGE1ST];
    392 	} while (ci != ci3);
    393 
    394 	if (cpu_topology_haveslow) {
    395 		/*
    396 		 * For asymmetric systems where some CPUs are slower than
    397 		 * others, mark first class CPUs for the scheduler.  This
    398 		 * conflicts with SMT right now so whinge if observed.
    399 		 */
    400 		if (curcpu()->ci_nsibling[CPUREL_CORE] > 1) {
    401 			printf("cpu_topology_init: asymmetric & SMT??\n");
    402 		}
    403 		for (CPU_INFO_FOREACH(cii, ci)) {
    404 			if (!ci->ci_is_slow) {
    405 				ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
    406 			}
    407 		}
    408 	} else {
    409 		/*
    410 		 * For any other configuration mark the 1st CPU in each
    411 		 * core as a first class CPU.
    412 		 */
    413 		for (CPU_INFO_FOREACH(cii, ci)) {
    414 			if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) != 0) {
    415 				ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
    416 			}
    417 		}
    418 	}
    419 
    420 	cpu_topology_dump();
    421 }
    422 
    423 /*
    424  * Adjust one count, for a counter that's NOT updated from interrupt
    425  * context.  Hardly worth making an inline due to preemption stuff.
    426  */
    427 void
    428 cpu_count(enum cpu_count idx, int64_t delta)
    429 {
    430 	lwp_t *l = curlwp;
    431 	KPREEMPT_DISABLE(l);
    432 	l->l_cpu->ci_counts[idx] += delta;
    433 	KPREEMPT_ENABLE(l);
    434 }
    435 
    436 /*
    437  * Fetch fresh sum total for all counts.  Expensive - don't call often.
    438  */
    439 void
    440 cpu_count_sync_all(void)
    441 {
    442 	CPU_INFO_ITERATOR cii;
    443 	struct cpu_info *ci;
    444 	int64_t sum[CPU_COUNT_MAX], *ptr;
    445 	enum cpu_count i;
    446 	int s;
    447 
    448 	KASSERT(sizeof(ci->ci_counts) == sizeof(cpu_counts));
    449 
    450 	if (__predict_true(mp_online)) {
    451 		memset(sum, 0, sizeof(sum));
    452 		/*
    453 		 * We want this to be reasonably quick, so any value we get
    454 		 * isn't totally out of whack, so don't let the current LWP
    455 		 * get preempted.
    456 		 */
    457 		s = splvm();
    458 		curcpu()->ci_counts[CPU_COUNT_SYNC_ALL]++;
    459 		for (CPU_INFO_FOREACH(cii, ci)) {
    460 			ptr = ci->ci_counts;
    461 			for (i = 0; i < CPU_COUNT_MAX; i += 8) {
    462 				sum[i+0] += ptr[i+0];
    463 				sum[i+1] += ptr[i+1];
    464 				sum[i+2] += ptr[i+2];
    465 				sum[i+3] += ptr[i+3];
    466 				sum[i+4] += ptr[i+4];
    467 				sum[i+5] += ptr[i+5];
    468 				sum[i+6] += ptr[i+6];
    469 				sum[i+7] += ptr[i+7];
    470 			}
    471 			KASSERT(i == CPU_COUNT_MAX);
    472 		}
    473 		memcpy(cpu_counts, sum, sizeof(cpu_counts));
    474 		splx(s);
    475 	} else {
    476 		memcpy(cpu_counts, curcpu()->ci_counts, sizeof(cpu_counts));
    477 	}
    478 }
    479 
    480 /*
    481  * Fetch a fresh sum total for one single count.  Expensive - don't call often.
    482  */
    483 int64_t
    484 cpu_count_sync(enum cpu_count count)
    485 {
    486 	CPU_INFO_ITERATOR cii;
    487 	struct cpu_info *ci;
    488 	int64_t sum;
    489 	int s;
    490 
    491 	if (__predict_true(mp_online)) {
    492 		s = splvm();
    493 		curcpu()->ci_counts[CPU_COUNT_SYNC_ONE]++;
    494 		sum = 0;
    495 		for (CPU_INFO_FOREACH(cii, ci)) {
    496 			sum += ci->ci_counts[count];
    497 		}
    498 		splx(s);
    499 	} else {
    500 		/* XXX Early boot, iterator might not be available. */
    501 		sum = curcpu()->ci_counts[count];
    502 	}
    503 	return cpu_counts[count] = sum;
    504 }
    505