1 /* $NetBSD: linux_sched.c,v 1.85 2025/09/19 19:35:15 kre Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Linux compatibility module. Try to deal with scheduler related syscalls. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.85 2025/09/19 19:35:15 kre Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/mount.h> 42 #include <sys/proc.h> 43 #include <sys/systm.h> 44 #include <sys/sysctl.h> 45 #include <sys/syscallargs.h> 46 #include <sys/wait.h> 47 #include <sys/kauth.h> 48 #include <sys/ptrace.h> 49 #include <sys/atomic.h> 50 51 #include <sys/cpu.h> 52 53 #include <compat/linux/common/linux_types.h> 54 #include <compat/linux/common/linux_signal.h> 55 #include <compat/linux/common/linux_emuldata.h> 56 #include <compat/linux/common/linux_ipc.h> 57 #include <compat/linux/common/linux_sem.h> 58 #include <compat/linux/common/linux_exec.h> 59 #include <compat/linux/common/linux_machdep.h> 60 61 #include <compat/linux/linux_syscallargs.h> 62 63 #include <compat/linux/common/linux_sched.h> 64 65 static int linux_clone_nptl(struct lwp *, const struct linux_sys_clone_args *, 66 register_t *); 67 68 /* Unlike Linux, dynamically calculate CPU mask size */ 69 #define LINUX_CPU_MASK_SIZE (sizeof(long) * ((ncpu + LONG_BIT - 1) / LONG_BIT)) 70 71 #if DEBUG_LINUX 72 #define DPRINTF(x, ...) uprintf(x, __VA_ARGS__) 73 #else 74 #define DPRINTF(x, ...) 75 #endif 76 77 static void 78 linux_child_return(void *arg) 79 { 80 struct lwp *l = arg; 81 struct proc *p = l->l_proc; 82 struct linux_emuldata *led = l->l_emuldata; 83 void *ctp = led->led_child_tidptr; 84 int error; 85 86 if (ctp) { 87 if ((error = copyout(&p->p_pid, ctp, sizeof(p->p_pid))) != 0) 88 printf("%s: LINUX_CLONE_CHILD_SETTID " 89 "failed (child_tidptr = %p, tid = %d error =%d)\n", 90 __func__, ctp, p->p_pid, error); 91 } 92 child_return(arg); 93 } 94 95 int 96 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, 97 register_t *retval) 98 { 99 /* { 100 syscallarg(int) flags; 101 syscallarg(void *) stack; 102 syscallarg(void *) parent_tidptr; 103 syscallarg(void *) tls; 104 syscallarg(void *) child_tidptr; 105 } */ 106 struct linux_emuldata *led; 107 int flags, sig, error; 108 109 /* 110 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 111 */ 112 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 113 return EINVAL; 114 115 /* 116 * Thread group implies shared signals. Shared signals 117 * imply shared VM. This matches what Linux kernel does. 118 */ 119 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 120 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 121 return EINVAL; 122 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 123 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 124 return EINVAL; 125 126 /* 127 * The thread group flavor is implemented totally differently. 128 */ 129 if (SCARG(uap, flags) & LINUX_CLONE_THREAD) 130 return linux_clone_nptl(l, uap, retval); 131 132 flags = 0; 133 if (SCARG(uap, flags) & LINUX_CLONE_VM) 134 flags |= FORK_SHAREVM; 135 if (SCARG(uap, flags) & LINUX_CLONE_FS) 136 flags |= FORK_SHARECWD; 137 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 138 flags |= FORK_SHAREFILES; 139 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 140 flags |= FORK_SHARESIGS; 141 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 142 flags |= FORK_PPWAIT; 143 144 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 145 if (sig < 0 || sig >= LINUX__NSIG) 146 return EINVAL; 147 sig = linux_to_native_signo[sig]; 148 149 if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID) { 150 led = l->l_emuldata; 151 led->led_child_tidptr = SCARG(uap, child_tidptr); 152 } 153 154 /* 155 * Note that Linux does not provide a portable way of specifying 156 * the stack area; the caller must know if the stack grows up 157 * or down. So, we pass a stack size of 0, so that the code 158 * that makes this adjustment is a noop. 159 */ 160 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 161 linux_child_return, NULL, retval)) != 0) { 162 DPRINTF("%s: fork1: error %d\n", __func__, error); 163 return error; 164 } 165 166 return 0; 167 } 168 169 170 int 171 linux_sys_clone3(struct lwp *l, const struct linux_sys_clone3_args *uap, register_t *retval) 172 { 173 struct linux_user_clone3_args cl_args; 174 struct linux_sys_clone_args clone_args; 175 int error; 176 177 if (SCARG(uap, size) != sizeof(cl_args)) { 178 DPRINTF("%s: Invalid size less or more\n", __func__); 179 return EINVAL; 180 } 181 182 error = copyin(SCARG(uap, cl_args), &cl_args, SCARG(uap, size)); 183 if (error) { 184 DPRINTF("%s: Copyin failed: %d\n", __func__, error); 185 return error; 186 } 187 188 DPRINTF("%s: Flags: %#jx\n", __func__, (intmax_t)cl_args.flags); 189 190 /* Define allowed flags */ 191 if (cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS) { 192 DPRINTF("%s: Unsupported flags for clone3: %#" PRIx64 "\n", 193 __func__, cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS); 194 return EOPNOTSUPP; 195 } 196 if (cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS) { 197 DPRINTF("%s: Disallowed flags for clone3: %#" PRIx64 "\n", 198 __func__, cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS); 199 return EINVAL; 200 } 201 202 #if 0 203 // XXX: this is wrong, exit_signal is the signal to deliver to the 204 // process upon exit. 205 if ((cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL) != 0){ 206 DPRINTF("%s: Disallowed flags for clone3: %#x\n", __func__, 207 cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL); 208 return EINVAL; 209 } 210 #endif 211 212 if (cl_args.stack == 0 && cl_args.stack_size != 0) { 213 DPRINTF("%s: Stack is NULL but stack size is not 0\n", 214 __func__); 215 return EINVAL; 216 } 217 if (cl_args.stack != 0 && cl_args.stack_size == 0) { 218 DPRINTF("%s: Stack is not NULL but stack size is 0\n", 219 __func__); 220 return EINVAL; 221 } 222 223 int flags = cl_args.flags & LINUX_CLONE_ALLOWED_FLAGS; 224 #if 0 225 int sig = cl_args.exit_signal & LINUX_CLONE_CSIGNAL; 226 #endif 227 // XXX: Pidfd member handling 228 // XXX: we don't have cgroups 229 // XXX: what to do with tid_set and tid_set_size 230 // XXX: clone3 has stacksize, instead implement clone as a clone3 231 // wrapper. 232 SCARG(&clone_args, flags) = flags; 233 #ifdef __MACHINE_STACK_GROWS_UP 234 SCARG(&clone_args, stack) = (void *)(uintptr_t)cl_args.stack; 235 #else 236 SCARG(&clone_args, stack) = 237 (void *)(uintptr_t)((uintptr_t)cl_args.stack + cl_args.stack_size); 238 #endif 239 SCARG(&clone_args, parent_tidptr) = 240 (void *)(intptr_t)cl_args.parent_tid; 241 SCARG(&clone_args, tls) = 242 (void *)(intptr_t)cl_args.tls; 243 SCARG(&clone_args, child_tidptr) = 244 (void *)(intptr_t)cl_args.child_tid; 245 246 return linux_sys_clone(l, &clone_args, retval); 247 } 248 249 static int 250 linux_clone_nptl(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) 251 { 252 /* { 253 syscallarg(int) flags; 254 syscallarg(void *) stack; 255 syscallarg(void *) parent_tidptr; 256 syscallarg(void *) tls; 257 syscallarg(void *) child_tidptr; 258 } */ 259 struct proc *p; 260 struct lwp *l2; 261 struct linux_emuldata *led; 262 void *parent_tidptr, *tls, *child_tidptr; 263 vaddr_t uaddr; 264 lwpid_t lid; 265 int flags, error; 266 267 p = l->l_proc; 268 flags = SCARG(uap, flags); 269 parent_tidptr = SCARG(uap, parent_tidptr); 270 tls = SCARG(uap, tls); 271 child_tidptr = SCARG(uap, child_tidptr); 272 273 uaddr = uvm_uarea_alloc(); 274 if (__predict_false(uaddr == 0)) { 275 return ENOMEM; 276 } 277 278 error = lwp_create(l, p, uaddr, LWP_DETACHED, 279 SCARG(uap, stack), 0, child_return, NULL, &l2, l->l_class, 280 &l->l_sigmask, &l->l_sigstk); 281 if (__predict_false(error)) { 282 DPRINTF("%s: lwp_create error=%d\n", __func__, error); 283 uvm_uarea_free(uaddr); 284 return error; 285 } 286 lid = l2->l_lid; 287 288 /* LINUX_CLONE_CHILD_CLEARTID: clear TID in child's memory on exit() */ 289 if (flags & LINUX_CLONE_CHILD_CLEARTID) { 290 led = l2->l_emuldata; 291 led->led_clear_tid = child_tidptr; 292 } 293 294 /* LINUX_CLONE_PARENT_SETTID: store child's TID in parent's memory */ 295 if (flags & LINUX_CLONE_PARENT_SETTID) { 296 if ((error = copyout(&lid, parent_tidptr, sizeof(lid))) != 0) 297 printf("%s: LINUX_CLONE_PARENT_SETTID " 298 "failed (parent_tidptr = %p tid = %d error=%d)\n", 299 __func__, parent_tidptr, lid, error); 300 } 301 302 /* LINUX_CLONE_CHILD_SETTID: store child's TID in child's memory */ 303 if (flags & LINUX_CLONE_CHILD_SETTID) { 304 if ((error = copyout(&lid, child_tidptr, sizeof(lid))) != 0) 305 printf("%s: LINUX_CLONE_CHILD_SETTID " 306 "failed (child_tidptr = %p, tid = %d error=%d)\n", 307 __func__, child_tidptr, lid, error); 308 } 309 310 if (flags & LINUX_CLONE_SETTLS) { 311 error = LINUX_LWP_SETPRIVATE(l2, tls); 312 if (error) { 313 DPRINTF("%s: LINUX_LWP_SETPRIVATE %d\n", __func__, 314 error); 315 lwp_exit(l2); 316 return error; 317 } 318 } 319 320 /* Set the new LWP running. */ 321 lwp_start(l2, 0); 322 323 retval[0] = lid; 324 retval[1] = 0; 325 return 0; 326 } 327 328 /* 329 * linux realtime priority 330 * 331 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99]. 332 * 333 * - SCHED_OTHER tasks don't have realtime priorities. 334 * in particular, sched_param::sched_priority is always 0. 335 */ 336 337 #define LINUX_SCHED_RTPRIO_MIN 1 338 #define LINUX_SCHED_RTPRIO_MAX 99 339 340 static int 341 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params, 342 int *native_policy, struct sched_param *native_params) 343 { 344 345 switch (linux_policy) { 346 case LINUX_SCHED_OTHER: 347 if (native_policy != NULL) { 348 *native_policy = SCHED_OTHER; 349 } 350 break; 351 352 case LINUX_SCHED_FIFO: 353 if (native_policy != NULL) { 354 *native_policy = SCHED_FIFO; 355 } 356 break; 357 358 case LINUX_SCHED_RR: 359 if (native_policy != NULL) { 360 *native_policy = SCHED_RR; 361 } 362 break; 363 364 default: 365 return EINVAL; 366 } 367 368 if (linux_params != NULL) { 369 int prio = linux_params->sched_priority; 370 371 KASSERT(native_params != NULL); 372 373 if (linux_policy == LINUX_SCHED_OTHER) { 374 if (prio != 0) { 375 return EINVAL; 376 } 377 native_params->sched_priority = PRI_NONE; /* XXX */ 378 } else { 379 if (prio < LINUX_SCHED_RTPRIO_MIN || 380 prio > LINUX_SCHED_RTPRIO_MAX) { 381 return EINVAL; 382 } 383 native_params->sched_priority = 384 (prio - LINUX_SCHED_RTPRIO_MIN) 385 * (SCHED_PRI_MAX - SCHED_PRI_MIN) 386 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 387 + SCHED_PRI_MIN; 388 } 389 } 390 391 return 0; 392 } 393 394 static int 395 sched_native2linux(int native_policy, struct sched_param *native_params, 396 int *linux_policy, struct linux_sched_param *linux_params) 397 { 398 399 switch (native_policy) { 400 case SCHED_OTHER: 401 if (linux_policy != NULL) { 402 *linux_policy = LINUX_SCHED_OTHER; 403 } 404 break; 405 406 case SCHED_FIFO: 407 if (linux_policy != NULL) { 408 *linux_policy = LINUX_SCHED_FIFO; 409 } 410 break; 411 412 case SCHED_RR: 413 if (linux_policy != NULL) { 414 *linux_policy = LINUX_SCHED_RR; 415 } 416 break; 417 418 default: 419 panic("%s: unknown policy %d\n", __func__, native_policy); 420 } 421 422 if (native_params != NULL) { 423 int prio = native_params->sched_priority; 424 425 KASSERT(prio >= SCHED_PRI_MIN); 426 KASSERT(prio <= SCHED_PRI_MAX); 427 KASSERT(linux_params != NULL); 428 429 memset(linux_params, 0, sizeof(*linux_params)); 430 431 DPRINTF("%s: native: policy %d, priority %d\n", 432 __func__, native_policy, prio); 433 434 if (native_policy == SCHED_OTHER) { 435 linux_params->sched_priority = 0; 436 } else { 437 linux_params->sched_priority = 438 (prio - SCHED_PRI_MIN) 439 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 440 / (SCHED_PRI_MAX - SCHED_PRI_MIN) 441 + LINUX_SCHED_RTPRIO_MIN; 442 } 443 DPRINTF("%s: linux: policy %d, priority %d\n", 444 __func__, -1, linux_params->sched_priority); 445 } 446 447 return 0; 448 } 449 450 int 451 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval) 452 { 453 /* { 454 syscallarg(linux_pid_t) pid; 455 syscallarg(const struct linux_sched_param *) sp; 456 } */ 457 int error, policy; 458 struct linux_sched_param lp; 459 struct sched_param sp; 460 461 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 462 error = EINVAL; 463 goto out; 464 } 465 466 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 467 if (error) 468 goto out; 469 470 /* We need the current policy in Linux terms. */ 471 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 472 if (error) 473 goto out; 474 error = sched_native2linux(policy, NULL, &policy, NULL); 475 if (error) 476 goto out; 477 478 error = sched_linux2native(policy, &lp, &policy, &sp); 479 if (error) 480 goto out; 481 482 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 483 if (error) 484 goto out; 485 486 out: 487 return error; 488 } 489 490 int 491 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval) 492 { 493 /* { 494 syscallarg(linux_pid_t) pid; 495 syscallarg(struct linux_sched_param *) sp; 496 } */ 497 struct linux_sched_param lp; 498 struct sched_param sp; 499 int error, policy; 500 501 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 502 error = EINVAL; 503 goto out; 504 } 505 506 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp); 507 if (error) 508 goto out; 509 DPRINTF("%s: native: policy %d, priority %d\n", 510 __func__, policy, sp.sched_priority); 511 512 error = sched_native2linux(policy, &sp, NULL, &lp); 513 if (error) 514 goto out; 515 DPRINTF("%s: linux: policy %d, priority %d\n", 516 __func__, policy, lp.sched_priority); 517 518 error = copyout(&lp, SCARG(uap, sp), sizeof(lp)); 519 if (error) 520 goto out; 521 522 out: 523 return error; 524 } 525 526 int 527 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval) 528 { 529 /* { 530 syscallarg(linux_pid_t) pid; 531 syscallarg(int) policy; 532 syscallarg(cont struct linux_sched_param *) sp; 533 } */ 534 int error, policy; 535 struct linux_sched_param lp; 536 struct sched_param sp; 537 538 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 539 error = EINVAL; 540 goto out; 541 } 542 543 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 544 if (error) 545 goto out; 546 DPRINTF("%s: linux: policy %d, priority %d\n", 547 __func__, SCARG(uap, policy), lp.sched_priority); 548 549 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp); 550 if (error) 551 goto out; 552 DPRINTF("%s: native: policy %d, priority %d\n", 553 __func__, policy, sp.sched_priority); 554 555 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 556 if (error) 557 goto out; 558 559 out: 560 return error; 561 } 562 563 int 564 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval) 565 { 566 /* { 567 syscallarg(linux_pid_t) pid; 568 } */ 569 int error, policy; 570 571 *retval = -1; 572 573 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 574 if (error) 575 goto out; 576 577 error = sched_native2linux(policy, NULL, &policy, NULL); 578 if (error) 579 goto out; 580 581 *retval = policy; 582 583 out: 584 return error; 585 } 586 587 int 588 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 589 { 590 591 yield(); 592 return 0; 593 } 594 595 int 596 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval) 597 { 598 /* { 599 syscallarg(int) policy; 600 } */ 601 602 switch (SCARG(uap, policy)) { 603 case LINUX_SCHED_OTHER: 604 *retval = 0; 605 break; 606 case LINUX_SCHED_FIFO: 607 case LINUX_SCHED_RR: 608 *retval = LINUX_SCHED_RTPRIO_MAX; 609 break; 610 default: 611 return EINVAL; 612 } 613 614 return 0; 615 } 616 617 int 618 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval) 619 { 620 /* { 621 syscallarg(int) policy; 622 } */ 623 624 switch (SCARG(uap, policy)) { 625 case LINUX_SCHED_OTHER: 626 *retval = 0; 627 break; 628 case LINUX_SCHED_FIFO: 629 case LINUX_SCHED_RR: 630 *retval = LINUX_SCHED_RTPRIO_MIN; 631 break; 632 default: 633 return EINVAL; 634 } 635 636 return 0; 637 } 638 639 int 640 linux_sys_exit(struct lwp *l, const struct linux_sys_exit_args *uap, register_t *retval) 641 { 642 643 lwp_exit(l); 644 return 0; 645 } 646 647 #ifndef __m68k__ 648 /* Present on everything but m68k */ 649 int 650 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval) 651 { 652 653 return sys_exit(l, (const void *)uap, retval); 654 } 655 #endif /* !__m68k__ */ 656 657 int 658 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval) 659 { 660 /* { 661 syscallarg(int *) tidptr; 662 } */ 663 struct linux_emuldata *led; 664 665 led = (struct linux_emuldata *)l->l_emuldata; 666 led->led_clear_tid = SCARG(uap, tid); 667 *retval = l->l_lid; 668 669 return 0; 670 } 671 672 /* ARGUSED1 */ 673 int 674 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval) 675 { 676 677 *retval = l->l_lid; 678 return 0; 679 } 680 681 /* 682 * The affinity syscalls assume that the layout of our cpu kcpuset is 683 * the same as linux's: a linear bitmask. 684 */ 685 int 686 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval) 687 { 688 /* { 689 syscallarg(linux_pid_t) pid; 690 syscallarg(unsigned int) len; 691 syscallarg(unsigned long *) mask; 692 } */ 693 struct proc *p; 694 struct lwp *t; 695 kcpuset_t *kcset; 696 size_t size; 697 cpuid_t i; 698 int error; 699 700 size = LINUX_CPU_MASK_SIZE; 701 if (SCARG(uap, len) < size) 702 return EINVAL; 703 704 if (SCARG(uap, pid) == 0) { 705 p = curproc; 706 mutex_enter(p->p_lock); 707 t = curlwp; 708 } else { 709 t = lwp_find2(-1, SCARG(uap, pid)); 710 if (__predict_false(t == NULL)) { 711 return ESRCH; 712 } 713 p = t->l_proc; 714 KASSERT(mutex_owned(p->p_lock)); 715 } 716 717 /* Check the permission */ 718 if (kauth_authorize_process(l->l_cred, 719 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, p, NULL, NULL, NULL)) { 720 mutex_exit(p->p_lock); 721 return EPERM; 722 } 723 724 kcpuset_create(&kcset, true); 725 lwp_lock(t); 726 if (t->l_affinity != NULL) 727 kcpuset_copy(kcset, t->l_affinity); 728 else { 729 /* 730 * All available CPUs should be masked when affinity has not 731 * been set. 732 */ 733 kcpuset_zero(kcset); 734 for (i = 0; i < ncpu; i++) 735 kcpuset_set(kcset, i); 736 } 737 lwp_unlock(t); 738 mutex_exit(p->p_lock); 739 error = kcpuset_copyout(kcset, (cpuset_t *)SCARG(uap, mask), size); 740 kcpuset_unuse(kcset, NULL); 741 *retval = size; 742 return error; 743 } 744 745 int 746 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval) 747 { 748 /* { 749 syscallarg(linux_pid_t) pid; 750 syscallarg(unsigned int) len; 751 syscallarg(unsigned long *) mask; 752 } */ 753 struct sys__sched_setaffinity_args ssa; 754 size_t size; 755 pid_t pid; 756 lwpid_t lid; 757 758 size = LINUX_CPU_MASK_SIZE; 759 if (SCARG(uap, len) < size) 760 return EINVAL; 761 762 lid = SCARG(uap, pid); 763 if (lid != 0) { 764 /* Get the canonical PID for the process. */ 765 mutex_enter(&proc_lock); 766 struct proc *p = proc_find_lwpid(SCARG(uap, pid)); 767 if (p == NULL) { 768 mutex_exit(&proc_lock); 769 return ESRCH; 770 } 771 pid = p->p_pid; 772 mutex_exit(&proc_lock); 773 } else { 774 pid = curproc->p_pid; 775 lid = curlwp->l_lid; 776 } 777 778 SCARG(&ssa, pid) = pid; 779 SCARG(&ssa, lid) = lid; 780 SCARG(&ssa, size) = size; 781 SCARG(&ssa, cpuset) = (cpuset_t *)SCARG(uap, mask); 782 783 return sys__sched_setaffinity(l, &ssa, retval); 784 } 785