1 /* $NetBSD: subr_kcpuset.c,v 1.22 2026/01/04 03:19:17 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2011, 2023 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Kernel CPU set implementation. 34 * 35 * Interface can be used by kernel subsystems as a unified dynamic CPU 36 * bitset implementation handling many CPUs. Facility also supports early 37 * use by MD code on boot, as it fixups bitsets on further boot. 38 * 39 * TODO: 40 * - Handle "reverse" bitset on fixup/grow. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: subr_kcpuset.c,v 1.22 2026/01/04 03:19:17 riastradh Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/types.h> 48 49 #include <sys/atomic.h> 50 #include <sys/intr.h> 51 #include <sys/kcpuset.h> 52 #include <sys/kmem.h> 53 #include <sys/sched.h> 54 #include <sys/sdt.h> 55 56 /* Number of CPUs to support. */ 57 #define KC_MAXCPUS roundup2(MAXCPUS, 32) 58 59 /* 60 * Structure of dynamic CPU set in the kernel. 61 */ 62 struct kcpuset { 63 uint32_t bits[0]; 64 }; 65 66 typedef struct kcpuset_impl { 67 /* Reference count. */ 68 u_int kc_refcnt; 69 /* Next to free, if non-NULL (used when multiple references). */ 70 struct kcpuset * kc_next; 71 /* Actual variable-sized field of bits. */ 72 struct kcpuset kc_field; 73 } kcpuset_impl_t; 74 75 #define KC_BITS_OFF (offsetof(struct kcpuset_impl, kc_field)) 76 #define KC_GETSTRUCT(b) ((kcpuset_impl_t *)((char *)(b) - KC_BITS_OFF)) 77 #define KC_GETCSTRUCT(b) ((const kcpuset_impl_t *)((const char *)(b) - KC_BITS_OFF)) 78 79 /* Sizes of a single bitset. */ 80 #define KC_SHIFT 5 81 #define KC_MASK 31 82 83 /* An array of noted early kcpuset creations and data. */ 84 #define KC_SAVE_NITEMS 8 85 86 /* Structures for early boot mechanism (must be statically initialised). */ 87 static kcpuset_t ** kc_noted_early[KC_SAVE_NITEMS]; 88 static uint32_t kc_bits_early[KC_SAVE_NITEMS]; 89 static int kc_last_idx = 0; 90 static bool kc_initialised = false; 91 92 #define KC_BITSIZE_EARLY sizeof(kc_bits_early[0]) 93 #define KC_NFIELDS_EARLY 1 94 95 /* 96 * The size of whole bitset fields and amount of fields. 97 * The whole size must statically initialise for early case. 98 */ 99 static size_t kc_bitsize __read_mostly = KC_BITSIZE_EARLY; 100 static size_t kc_nfields __read_mostly = KC_NFIELDS_EARLY; 101 static size_t kc_memsize __read_mostly; 102 103 static kcpuset_t * kcpuset_create_raw(bool); 104 105 /* 106 * kcpuset_sysinit: initialize the subsystem, transfer early boot cases 107 * to dynamically allocated sets. 108 */ 109 void 110 kcpuset_sysinit(void) 111 { 112 kcpuset_t *kc_dynamic[KC_SAVE_NITEMS], *kcp; 113 int i, s; 114 115 /* Set a kcpuset_t sizes. */ 116 kc_nfields = (KC_MAXCPUS >> KC_SHIFT); 117 kc_bitsize = sizeof(uint32_t) * kc_nfields; 118 kc_memsize = sizeof(kcpuset_impl_t) + kc_bitsize; 119 KASSERT(kc_nfields != 0); 120 KASSERT(kc_bitsize != 0); 121 122 /* First, pre-allocate kcpuset entries. */ 123 for (i = 0; i < kc_last_idx; i++) { 124 kcp = kcpuset_create_raw(true); 125 kc_dynamic[i] = kcp; 126 } 127 128 /* 129 * Prepare to convert all early noted kcpuset uses to dynamic sets. 130 * All processors, except the one we are currently running (primary), 131 * must not be spinned yet. Since MD facilities can use kcpuset, 132 * raise the IPL to high. 133 */ 134 KASSERT(mp_online == false); 135 136 s = splhigh(); 137 for (i = 0; i < kc_last_idx; i++) { 138 /* 139 * Transfer the bits from early static storage to the kcpuset. 140 */ 141 KASSERT(kc_bitsize >= KC_BITSIZE_EARLY); 142 memcpy(kc_dynamic[i], &kc_bits_early[i], KC_BITSIZE_EARLY); 143 144 /* 145 * Store the new pointer, pointing to the allocated kcpuset. 146 * Note: we are not in an interrupt context and it is the only 147 * CPU running - thus store is safe (e.g. no need for pointer 148 * variable to be volatile). 149 */ 150 *kc_noted_early[i] = kc_dynamic[i]; 151 } 152 kc_initialised = true; 153 kc_last_idx = 0; 154 splx(s); 155 } 156 157 /* 158 * kcpuset_early_ptr: note an early boot use by saving the pointer and 159 * returning a pointer to a static, temporary bit field. 160 */ 161 static kcpuset_t * 162 kcpuset_early_ptr(kcpuset_t **kcptr) 163 { 164 kcpuset_t *kcp; 165 int s; 166 167 s = splhigh(); 168 if (kc_last_idx < KC_SAVE_NITEMS) { 169 /* 170 * Save the pointer, return pointer to static early field. 171 * Need to zero it out. 172 */ 173 kc_noted_early[kc_last_idx] = kcptr; 174 kcp = (kcpuset_t *)&kc_bits_early[kc_last_idx]; 175 kc_last_idx++; 176 memset(kcp, 0, KC_BITSIZE_EARLY); 177 KASSERT(kc_bitsize == KC_BITSIZE_EARLY); 178 } else { 179 panic("kcpuset(9): all early-use entries exhausted; " 180 "increase KC_SAVE_NITEMS\n"); 181 } 182 splx(s); 183 184 return kcp; 185 } 186 187 /* 188 * Routines to create or destroy the CPU set. 189 * Early boot case is handled. 190 */ 191 192 static kcpuset_t * 193 kcpuset_create_raw(bool zero) 194 { 195 kcpuset_impl_t *kc; 196 197 kc = kmem_alloc(kc_memsize, KM_SLEEP); 198 kc->kc_refcnt = 1; 199 kc->kc_next = NULL; 200 201 if (zero) { 202 memset(&kc->kc_field, 0, kc_bitsize); 203 } 204 205 /* Note: return pointer to the actual field of bits. */ 206 KASSERT((uint8_t *)kc + KC_BITS_OFF == (uint8_t *)&kc->kc_field); 207 return &kc->kc_field; 208 } 209 210 void 211 kcpuset_create(kcpuset_t **retkcp, bool zero) 212 { 213 if (__predict_false(!kc_initialised)) { 214 /* Early boot use - special case. */ 215 *retkcp = kcpuset_early_ptr(retkcp); 216 return; 217 } 218 *retkcp = kcpuset_create_raw(zero); 219 } 220 221 void 222 kcpuset_clone(kcpuset_t **retkcp, const kcpuset_t *kcp) 223 { 224 kcpuset_create(retkcp, false); 225 memcpy(*retkcp, kcp, kc_bitsize); 226 } 227 228 void 229 kcpuset_destroy(kcpuset_t *kcp) 230 { 231 const size_t size = kc_memsize; 232 kcpuset_impl_t *kc; 233 234 KASSERT(kc_initialised); 235 KASSERT(kcp != NULL); 236 237 do { 238 kc = KC_GETSTRUCT(kcp); 239 kcp = kc->kc_next; 240 kmem_free(kc, size); 241 } while (kcp); 242 } 243 244 /* 245 * Routines to reference/unreference the CPU set. 246 * Note: early boot case is not supported by these routines. 247 */ 248 249 void 250 kcpuset_use(kcpuset_t *kcp) 251 { 252 kcpuset_impl_t *kc = KC_GETSTRUCT(kcp); 253 254 KASSERT(kc_initialised); 255 atomic_inc_uint(&kc->kc_refcnt); 256 } 257 258 void 259 kcpuset_unuse(kcpuset_t *kcp, kcpuset_t **lst) 260 { 261 kcpuset_impl_t *kc = KC_GETSTRUCT(kcp); 262 263 KASSERT(kc_initialised); 264 KASSERT(kc->kc_refcnt > 0); 265 266 membar_release(); 267 if (atomic_dec_uint_nv(&kc->kc_refcnt) != 0) { 268 return; 269 } 270 membar_acquire(); 271 KASSERT(kc->kc_next == NULL); 272 if (lst == NULL) { 273 kcpuset_destroy(kcp); 274 return; 275 } 276 kc->kc_next = *lst; 277 *lst = kcp; 278 } 279 280 /* 281 * Routines to transfer the CPU set from / to userspace. 282 * Note: early boot case is not supported by these routines. 283 */ 284 285 int 286 kcpuset_copyin(const cpuset_t *ucp, kcpuset_t *kcp, size_t len) 287 { 288 kcpuset_impl_t *kc __diagused = KC_GETSTRUCT(kcp); 289 290 KASSERT(kc_initialised); 291 KASSERT(kc->kc_refcnt > 0); 292 KASSERT(kc->kc_next == NULL); 293 294 if (len > kc_bitsize) { /* XXX */ 295 return SET_ERROR(EINVAL); 296 } 297 return copyin(ucp, kcp, len); 298 } 299 300 int 301 kcpuset_copyout(kcpuset_t *kcp, cpuset_t *ucp, size_t len) 302 { 303 kcpuset_impl_t *kc __diagused = KC_GETSTRUCT(kcp); 304 305 KASSERT(kc_initialised); 306 KASSERT(kc->kc_refcnt > 0); 307 KASSERT(kc->kc_next == NULL); 308 309 if (len > kc_bitsize) { /* XXX */ 310 return SET_ERROR(EINVAL); 311 } 312 return copyout(kcp, ucp, len); 313 } 314 315 void 316 kcpuset_export_u32(const kcpuset_t *kcp, uint32_t *bitfield, size_t len) 317 { 318 size_t rlen = MIN(kc_bitsize, len); 319 320 KASSERT(kcp != NULL); 321 memcpy(bitfield, kcp->bits, rlen); 322 } 323 324 /* 325 * Routines to change bit field - zero, fill, copy, set, unset, etc. 326 */ 327 328 void 329 kcpuset_zero(kcpuset_t *kcp) 330 { 331 332 KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_refcnt > 0); 333 KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL); 334 memset(kcp, 0, kc_bitsize); 335 } 336 337 void 338 kcpuset_fill(kcpuset_t *kcp) 339 { 340 341 KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_refcnt > 0); 342 KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL); 343 memset(kcp, ~0, kc_bitsize); 344 } 345 346 void 347 kcpuset_copy(kcpuset_t *dkcp, const kcpuset_t *skcp) 348 { 349 350 KASSERT(!kc_initialised || KC_GETSTRUCT(dkcp)->kc_refcnt > 0); 351 KASSERT(!kc_initialised || KC_GETSTRUCT(dkcp)->kc_next == NULL); 352 memcpy(dkcp, skcp, kc_bitsize); 353 } 354 355 void 356 kcpuset_set(kcpuset_t *kcp, cpuid_t i) 357 { 358 const size_t j = i >> KC_SHIFT; 359 360 KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL); 361 KASSERT(j < kc_nfields); 362 363 kcp->bits[j] |= __BIT(i & KC_MASK); 364 } 365 366 void 367 kcpuset_clear(kcpuset_t *kcp, cpuid_t i) 368 { 369 const size_t j = i >> KC_SHIFT; 370 371 KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_next == NULL); 372 KASSERT(j < kc_nfields); 373 374 kcp->bits[j] &= ~(__BIT(i & KC_MASK)); 375 } 376 377 bool 378 kcpuset_isset(const kcpuset_t *kcp, cpuid_t i) 379 { 380 const size_t j = i >> KC_SHIFT; 381 382 KASSERT(kcp != NULL); 383 KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_refcnt > 0); 384 KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_next == NULL); 385 KASSERT(j < kc_nfields); 386 387 return ((__BIT(i & KC_MASK)) & kcp->bits[j]) != 0; 388 } 389 390 bool 391 kcpuset_isotherset(const kcpuset_t *kcp, cpuid_t i) 392 { 393 const size_t j2 = i >> KC_SHIFT; 394 const uint32_t mask = ~(__BIT(i & KC_MASK)); 395 396 for (size_t j = 0; j < kc_nfields; j++) { 397 const uint32_t bits = kcp->bits[j]; 398 if (bits && (j != j2 || (bits & mask) != 0)) { 399 return true; 400 } 401 } 402 return false; 403 } 404 405 bool 406 kcpuset_iszero(const kcpuset_t *kcp) 407 { 408 409 for (size_t j = 0; j < kc_nfields; j++) { 410 if (kcp->bits[j] != 0) { 411 return false; 412 } 413 } 414 return true; 415 } 416 417 bool 418 kcpuset_match(const kcpuset_t *kcp1, const kcpuset_t *kcp2) 419 { 420 421 return memcmp(kcp1, kcp2, kc_bitsize) == 0; 422 } 423 424 bool 425 kcpuset_intersecting_p(const kcpuset_t *kcp1, const kcpuset_t *kcp2) 426 { 427 428 for (size_t j = 0; j < kc_nfields; j++) { 429 if (kcp1->bits[j] & kcp2->bits[j]) 430 return true; 431 } 432 return false; 433 } 434 435 cpuid_t 436 kcpuset_ffs(const kcpuset_t *kcp) 437 { 438 439 for (size_t j = 0; j < kc_nfields; j++) { 440 if (kcp->bits[j]) 441 return 32 * j + ffs(kcp->bits[j]); 442 } 443 return 0; 444 } 445 446 cpuid_t 447 kcpuset_ffs_intersecting(const kcpuset_t *kcp1, const kcpuset_t *kcp2) 448 { 449 450 for (size_t j = 0; j < kc_nfields; j++) { 451 uint32_t bits = kcp1->bits[j] & kcp2->bits[j]; 452 if (bits) 453 return 32 * j + ffs(bits); 454 } 455 return 0; 456 } 457 458 void 459 kcpuset_merge(kcpuset_t *kcp1, const kcpuset_t *kcp2) 460 { 461 462 for (size_t j = 0; j < kc_nfields; j++) { 463 kcp1->bits[j] |= kcp2->bits[j]; 464 } 465 } 466 467 void 468 kcpuset_intersect(kcpuset_t *kcp1, const kcpuset_t *kcp2) 469 { 470 471 for (size_t j = 0; j < kc_nfields; j++) { 472 kcp1->bits[j] &= kcp2->bits[j]; 473 } 474 } 475 476 void 477 kcpuset_remove(kcpuset_t *kcp1, const kcpuset_t *kcp2) 478 { 479 480 for (size_t j = 0; j < kc_nfields; j++) { 481 kcp1->bits[j] &= ~kcp2->bits[j]; 482 } 483 } 484 485 int 486 kcpuset_countset(const kcpuset_t *kcp) 487 { 488 int count = 0; 489 490 for (size_t j = 0; j < kc_nfields; j++) { 491 count += popcount32(kcp->bits[j]); 492 } 493 return count; 494 } 495 496 /* 497 * Routines to set/clear the flags atomically. 498 */ 499 500 void 501 kcpuset_atomic_set(kcpuset_t *kcp, cpuid_t i) 502 { 503 const size_t j = i >> KC_SHIFT; 504 505 KASSERT(j < kc_nfields); 506 atomic_or_32(&kcp->bits[j], __BIT(i & KC_MASK)); 507 } 508 509 void 510 kcpuset_atomic_clear(kcpuset_t *kcp, cpuid_t i) 511 { 512 const size_t j = i >> KC_SHIFT; 513 514 KASSERT(j < kc_nfields); 515 atomic_and_32(&kcp->bits[j], ~(__BIT(i & KC_MASK))); 516 } 517 518 void 519 kcpuset_atomicly_intersect(kcpuset_t *kcp1, const kcpuset_t *kcp2) 520 { 521 522 for (size_t j = 0; j < kc_nfields; j++) { 523 if (kcp2->bits[j]) 524 atomic_and_32(&kcp1->bits[j], kcp2->bits[j]); 525 } 526 } 527 528 void 529 kcpuset_atomicly_merge(kcpuset_t *kcp1, const kcpuset_t *kcp2) 530 { 531 532 for (size_t j = 0; j < kc_nfields; j++) { 533 if (kcp2->bits[j]) 534 atomic_or_32(&kcp1->bits[j], kcp2->bits[j]); 535 } 536 } 537 538 void 539 kcpuset_atomicly_remove(kcpuset_t *kcp1, const kcpuset_t *kcp2) 540 { 541 542 for (size_t j = 0; j < kc_nfields; j++) { 543 if (kcp2->bits[j]) 544 atomic_and_32(&kcp1->bits[j], ~kcp2->bits[j]); 545 } 546 } 547