1 /* $NetBSD: linux_sched.c,v 1.87 2025/11/10 15:41:38 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center; by Matthias Scheler. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Linux compatibility module. Try to deal with scheduler related syscalls. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.87 2025/11/10 15:41:38 christos Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/mount.h> 42 #include <sys/proc.h> 43 #include <sys/systm.h> 44 #include <sys/sysctl.h> 45 #include <sys/syscallargs.h> 46 #include <sys/wait.h> 47 #include <sys/kauth.h> 48 #include <sys/ptrace.h> 49 #include <sys/atomic.h> 50 51 #include <sys/cpu.h> 52 53 #include <compat/linux/common/linux_types.h> 54 #include <compat/linux/common/linux_signal.h> 55 #include <compat/linux/common/linux_emuldata.h> 56 #include <compat/linux/common/linux_ipc.h> 57 #include <compat/linux/common/linux_sem.h> 58 #include <compat/linux/common/linux_exec.h> 59 #include <compat/linux/common/linux_machdep.h> 60 61 #include <compat/linux/linux_syscallargs.h> 62 63 #include <compat/linux/common/linux_sched.h> 64 65 #include <compat/linux/common/linux_prctl.h> 66 67 static int linux_clone_nptl(struct lwp *, const struct linux_sys_clone_args *, 68 register_t *); 69 70 /* Unlike Linux, dynamically calculate CPU mask size */ 71 #define LINUX_CPU_MASK_SIZE (sizeof(long) * ((ncpu + LONG_BIT - 1) / LONG_BIT)) 72 73 #if DEBUG_LINUX 74 #define DPRINTF(x, ...) uprintf(x, __VA_ARGS__) 75 #else 76 #define DPRINTF(x, ...) 77 #endif 78 79 static void 80 linux_child_return(void *arg) 81 { 82 struct lwp *l = arg; 83 struct proc *p = l->l_proc; 84 struct linux_emuldata *led = l->l_emuldata; 85 void *ctp = led->led_child_tidptr; 86 int error; 87 88 if (ctp) { 89 if ((error = copyout(&p->p_pid, ctp, sizeof(p->p_pid))) != 0) 90 printf("%s: LINUX_CLONE_CHILD_SETTID " 91 "failed (child_tidptr = %p, tid = %d error =%d)\n", 92 __func__, ctp, p->p_pid, error); 93 } 94 child_return(arg); 95 } 96 97 int 98 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, 99 register_t *retval) 100 { 101 /* { 102 syscallarg(int) flags; 103 syscallarg(void *) stack; 104 syscallarg(void *) parent_tidptr; 105 syscallarg(void *) tls; 106 syscallarg(void *) child_tidptr; 107 } */ 108 struct linux_emuldata *led; 109 int flags, sig, error; 110 111 /* 112 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. 113 */ 114 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE)) 115 return EINVAL; 116 117 /* 118 * Thread group implies shared signals. Shared signals 119 * imply shared VM. This matches what Linux kernel does. 120 */ 121 if (SCARG(uap, flags) & LINUX_CLONE_THREAD 122 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0) 123 return EINVAL; 124 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND 125 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0) 126 return EINVAL; 127 128 /* 129 * The thread group flavor is implemented totally differently. 130 */ 131 if (SCARG(uap, flags) & LINUX_CLONE_THREAD) 132 return linux_clone_nptl(l, uap, retval); 133 134 flags = 0; 135 if (SCARG(uap, flags) & LINUX_CLONE_VM) 136 flags |= FORK_SHAREVM; 137 if (SCARG(uap, flags) & LINUX_CLONE_FS) 138 flags |= FORK_SHARECWD; 139 if (SCARG(uap, flags) & LINUX_CLONE_FILES) 140 flags |= FORK_SHAREFILES; 141 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) 142 flags |= FORK_SHARESIGS; 143 if (SCARG(uap, flags) & LINUX_CLONE_VFORK) 144 flags |= FORK_PPWAIT; 145 146 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL; 147 if (sig < 0 || sig >= LINUX__NSIG) 148 return EINVAL; 149 sig = linux_to_native_signo[sig]; 150 151 if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID) { 152 led = l->l_emuldata; 153 led->led_child_tidptr = SCARG(uap, child_tidptr); 154 } 155 156 /* 157 * Note that Linux does not provide a portable way of specifying 158 * the stack area; the caller must know if the stack grows up 159 * or down. So, we pass a stack size of 0, so that the code 160 * that makes this adjustment is a noop. 161 */ 162 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, 163 linux_child_return, NULL, retval)) != 0) { 164 DPRINTF("%s: fork1: error %d\n", __func__, error); 165 return error; 166 } 167 168 return 0; 169 } 170 171 172 int 173 linux_sys_clone3(struct lwp *l, const struct linux_sys_clone3_args *uap, register_t *retval) 174 { 175 struct linux_user_clone3_args cl_args; 176 struct linux_sys_clone_args clone_args; 177 int error; 178 179 if (SCARG(uap, size) != sizeof(cl_args)) { 180 DPRINTF("%s: Invalid size less or more\n", __func__); 181 return EINVAL; 182 } 183 184 error = copyin(SCARG(uap, cl_args), &cl_args, SCARG(uap, size)); 185 if (error) { 186 DPRINTF("%s: Copyin failed: %d\n", __func__, error); 187 return error; 188 } 189 190 DPRINTF("%s: Flags: %#jx\n", __func__, (intmax_t)cl_args.flags); 191 192 /* Define allowed flags */ 193 if (cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS) { 194 DPRINTF("%s: Unsupported flags for clone3: %#" PRIx64 "\n", 195 __func__, cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS); 196 return EOPNOTSUPP; 197 } 198 if (cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS) { 199 DPRINTF("%s: Disallowed flags for clone3: %#" PRIx64 "\n", 200 __func__, cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS); 201 return EINVAL; 202 } 203 204 #if 0 205 // XXX: this is wrong, exit_signal is the signal to deliver to the 206 // process upon exit. 207 if ((cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL) != 0){ 208 DPRINTF("%s: Disallowed flags for clone3: %#x\n", __func__, 209 cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL); 210 return EINVAL; 211 } 212 #endif 213 214 if (cl_args.stack == 0 && cl_args.stack_size != 0) { 215 DPRINTF("%s: Stack is NULL but stack size is not 0\n", 216 __func__); 217 return EINVAL; 218 } 219 if (cl_args.stack != 0 && cl_args.stack_size == 0) { 220 DPRINTF("%s: Stack is not NULL but stack size is 0\n", 221 __func__); 222 return EINVAL; 223 } 224 225 int flags = cl_args.flags & LINUX_CLONE_ALLOWED_FLAGS; 226 #if 0 227 int sig = cl_args.exit_signal & LINUX_CLONE_CSIGNAL; 228 #endif 229 // XXX: Pidfd member handling 230 // XXX: we don't have cgroups 231 // XXX: what to do with tid_set and tid_set_size 232 // XXX: clone3 has stacksize, instead implement clone as a clone3 233 // wrapper. 234 SCARG(&clone_args, flags) = flags; 235 #ifdef __MACHINE_STACK_GROWS_UP 236 SCARG(&clone_args, stack) = (void *)(uintptr_t)cl_args.stack; 237 #else 238 SCARG(&clone_args, stack) = 239 (void *)(uintptr_t)((uintptr_t)cl_args.stack + cl_args.stack_size); 240 #endif 241 SCARG(&clone_args, parent_tidptr) = 242 (void *)(intptr_t)cl_args.parent_tid; 243 SCARG(&clone_args, tls) = 244 (void *)(intptr_t)cl_args.tls; 245 SCARG(&clone_args, child_tidptr) = 246 (void *)(intptr_t)cl_args.child_tid; 247 248 return linux_sys_clone(l, &clone_args, retval); 249 } 250 251 static int 252 linux_clone_nptl(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) 253 { 254 /* { 255 syscallarg(int) flags; 256 syscallarg(void *) stack; 257 syscallarg(void *) parent_tidptr; 258 syscallarg(void *) tls; 259 syscallarg(void *) child_tidptr; 260 } */ 261 struct proc *p; 262 struct lwp *l2; 263 struct linux_emuldata *led; 264 void *parent_tidptr, *tls, *child_tidptr; 265 vaddr_t uaddr; 266 lwpid_t lid; 267 int flags, error; 268 269 p = l->l_proc; 270 flags = SCARG(uap, flags); 271 parent_tidptr = SCARG(uap, parent_tidptr); 272 tls = SCARG(uap, tls); 273 child_tidptr = SCARG(uap, child_tidptr); 274 275 uaddr = uvm_uarea_alloc(); 276 if (__predict_false(uaddr == 0)) { 277 return ENOMEM; 278 } 279 280 error = lwp_create(l, p, uaddr, LWP_DETACHED, 281 SCARG(uap, stack), 0, child_return, NULL, &l2, l->l_class, 282 &l->l_sigmask, &l->l_sigstk); 283 if (__predict_false(error)) { 284 DPRINTF("%s: lwp_create error=%d\n", __func__, error); 285 uvm_uarea_free(uaddr); 286 return error; 287 } 288 lid = l2->l_lid; 289 290 /* LINUX_CLONE_CHILD_CLEARTID: clear TID in child's memory on exit() */ 291 if (flags & LINUX_CLONE_CHILD_CLEARTID) { 292 led = l2->l_emuldata; 293 led->led_clear_tid = child_tidptr; 294 } 295 296 /* LINUX_CLONE_PARENT_SETTID: store child's TID in parent's memory */ 297 if (flags & LINUX_CLONE_PARENT_SETTID) { 298 if ((error = copyout(&lid, parent_tidptr, sizeof(lid))) != 0) 299 printf("%s: LINUX_CLONE_PARENT_SETTID " 300 "failed (parent_tidptr = %p tid = %d error=%d)\n", 301 __func__, parent_tidptr, lid, error); 302 } 303 304 /* LINUX_CLONE_CHILD_SETTID: store child's TID in child's memory */ 305 if (flags & LINUX_CLONE_CHILD_SETTID) { 306 if ((error = copyout(&lid, child_tidptr, sizeof(lid))) != 0) 307 printf("%s: LINUX_CLONE_CHILD_SETTID " 308 "failed (child_tidptr = %p, tid = %d error=%d)\n", 309 __func__, child_tidptr, lid, error); 310 } 311 312 if (flags & LINUX_CLONE_SETTLS) { 313 error = LINUX_LWP_SETPRIVATE(l2, tls); 314 if (error) { 315 DPRINTF("%s: LINUX_LWP_SETPRIVATE %d\n", __func__, 316 error); 317 lwp_exit(l2); 318 return error; 319 } 320 } 321 322 /* Set the new LWP running. */ 323 lwp_start(l2, 0); 324 325 retval[0] = lid; 326 retval[1] = 0; 327 return 0; 328 } 329 330 /* 331 * linux realtime priority 332 * 333 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99]. 334 * 335 * - SCHED_OTHER tasks don't have realtime priorities. 336 * in particular, sched_param::sched_priority is always 0. 337 */ 338 339 #define LINUX_SCHED_RTPRIO_MIN 1 340 #define LINUX_SCHED_RTPRIO_MAX 99 341 342 static int 343 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params, 344 int *native_policy, struct sched_param *native_params) 345 { 346 347 switch (linux_policy) { 348 case LINUX_SCHED_OTHER: 349 if (native_policy != NULL) { 350 *native_policy = SCHED_OTHER; 351 } 352 break; 353 354 case LINUX_SCHED_FIFO: 355 if (native_policy != NULL) { 356 *native_policy = SCHED_FIFO; 357 } 358 break; 359 360 case LINUX_SCHED_RR: 361 if (native_policy != NULL) { 362 *native_policy = SCHED_RR; 363 } 364 break; 365 366 default: 367 return EINVAL; 368 } 369 370 if (linux_params != NULL) { 371 int prio = linux_params->sched_priority; 372 373 KASSERT(native_params != NULL); 374 375 if (linux_policy == LINUX_SCHED_OTHER) { 376 if (prio != 0) { 377 return EINVAL; 378 } 379 native_params->sched_priority = PRI_NONE; /* XXX */ 380 } else { 381 if (prio < LINUX_SCHED_RTPRIO_MIN || 382 prio > LINUX_SCHED_RTPRIO_MAX) { 383 return EINVAL; 384 } 385 native_params->sched_priority = 386 (prio - LINUX_SCHED_RTPRIO_MIN) 387 * (SCHED_PRI_MAX - SCHED_PRI_MIN) 388 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 389 + SCHED_PRI_MIN; 390 } 391 } 392 393 return 0; 394 } 395 396 static int 397 sched_native2linux(int native_policy, struct sched_param *native_params, 398 int *linux_policy, struct linux_sched_param *linux_params) 399 { 400 401 switch (native_policy) { 402 case SCHED_OTHER: 403 if (linux_policy != NULL) { 404 *linux_policy = LINUX_SCHED_OTHER; 405 } 406 break; 407 408 case SCHED_FIFO: 409 if (linux_policy != NULL) { 410 *linux_policy = LINUX_SCHED_FIFO; 411 } 412 break; 413 414 case SCHED_RR: 415 if (linux_policy != NULL) { 416 *linux_policy = LINUX_SCHED_RR; 417 } 418 break; 419 420 default: 421 panic("%s: unknown policy %d\n", __func__, native_policy); 422 } 423 424 if (native_params != NULL) { 425 int prio = native_params->sched_priority; 426 427 KASSERT(prio >= SCHED_PRI_MIN); 428 KASSERT(prio <= SCHED_PRI_MAX); 429 KASSERT(linux_params != NULL); 430 431 memset(linux_params, 0, sizeof(*linux_params)); 432 433 DPRINTF("%s: native: policy %d, priority %d\n", 434 __func__, native_policy, prio); 435 436 if (native_policy == SCHED_OTHER) { 437 linux_params->sched_priority = 0; 438 } else { 439 linux_params->sched_priority = 440 (prio - SCHED_PRI_MIN) 441 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN) 442 / (SCHED_PRI_MAX - SCHED_PRI_MIN) 443 + LINUX_SCHED_RTPRIO_MIN; 444 } 445 DPRINTF("%s: linux: policy %d, priority %d\n", 446 __func__, -1, linux_params->sched_priority); 447 } 448 449 return 0; 450 } 451 452 int 453 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval) 454 { 455 /* { 456 syscallarg(linux_pid_t) pid; 457 syscallarg(const struct linux_sched_param *) sp; 458 } */ 459 int error, policy; 460 struct linux_sched_param lp; 461 struct sched_param sp; 462 463 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 464 error = EINVAL; 465 goto out; 466 } 467 468 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 469 if (error) 470 goto out; 471 472 /* We need the current policy in Linux terms. */ 473 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 474 if (error) 475 goto out; 476 error = sched_native2linux(policy, NULL, &policy, NULL); 477 if (error) 478 goto out; 479 480 error = sched_linux2native(policy, &lp, &policy, &sp); 481 if (error) 482 goto out; 483 484 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 485 if (error) 486 goto out; 487 488 out: 489 return error; 490 } 491 492 int 493 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval) 494 { 495 /* { 496 syscallarg(linux_pid_t) pid; 497 syscallarg(struct linux_sched_param *) sp; 498 } */ 499 struct linux_sched_param lp; 500 struct sched_param sp; 501 int error, policy; 502 503 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 504 error = EINVAL; 505 goto out; 506 } 507 508 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp); 509 if (error) 510 goto out; 511 DPRINTF("%s: native: policy %d, priority %d\n", 512 __func__, policy, sp.sched_priority); 513 514 error = sched_native2linux(policy, &sp, NULL, &lp); 515 if (error) 516 goto out; 517 DPRINTF("%s: linux: policy %d, priority %d\n", 518 __func__, policy, lp.sched_priority); 519 520 error = copyout(&lp, SCARG(uap, sp), sizeof(lp)); 521 if (error) 522 goto out; 523 524 out: 525 return error; 526 } 527 528 int 529 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval) 530 { 531 /* { 532 syscallarg(linux_pid_t) pid; 533 syscallarg(int) policy; 534 syscallarg(cont struct linux_sched_param *) sp; 535 } */ 536 int error, policy; 537 struct linux_sched_param lp; 538 struct sched_param sp; 539 540 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) { 541 error = EINVAL; 542 goto out; 543 } 544 545 error = copyin(SCARG(uap, sp), &lp, sizeof(lp)); 546 if (error) 547 goto out; 548 DPRINTF("%s: linux: policy %d, priority %d\n", 549 __func__, SCARG(uap, policy), lp.sched_priority); 550 551 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp); 552 if (error) 553 goto out; 554 DPRINTF("%s: native: policy %d, priority %d\n", 555 __func__, policy, sp.sched_priority); 556 557 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp); 558 if (error) 559 goto out; 560 561 out: 562 return error; 563 } 564 565 int 566 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval) 567 { 568 /* { 569 syscallarg(linux_pid_t) pid; 570 } */ 571 int error, policy; 572 573 *retval = -1; 574 575 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL); 576 if (error) 577 goto out; 578 579 error = sched_native2linux(policy, NULL, &policy, NULL); 580 if (error) 581 goto out; 582 583 *retval = policy; 584 585 out: 586 return error; 587 } 588 589 int 590 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 591 { 592 593 yield(); 594 return 0; 595 } 596 597 int 598 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval) 599 { 600 /* { 601 syscallarg(int) policy; 602 } */ 603 604 switch (SCARG(uap, policy)) { 605 case LINUX_SCHED_OTHER: 606 *retval = 0; 607 break; 608 case LINUX_SCHED_FIFO: 609 case LINUX_SCHED_RR: 610 *retval = LINUX_SCHED_RTPRIO_MAX; 611 break; 612 default: 613 return EINVAL; 614 } 615 616 return 0; 617 } 618 619 int 620 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval) 621 { 622 /* { 623 syscallarg(int) policy; 624 } */ 625 626 switch (SCARG(uap, policy)) { 627 case LINUX_SCHED_OTHER: 628 *retval = 0; 629 break; 630 case LINUX_SCHED_FIFO: 631 case LINUX_SCHED_RR: 632 *retval = LINUX_SCHED_RTPRIO_MIN; 633 break; 634 default: 635 return EINVAL; 636 } 637 638 return 0; 639 } 640 641 int 642 linux_sys_exit(struct lwp *l, const struct linux_sys_exit_args *uap, register_t *retval) 643 { 644 645 lwp_exit(l); 646 return 0; 647 } 648 649 #ifndef __m68k__ 650 /* Present on everything but m68k */ 651 int 652 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval) 653 { 654 655 return sys_exit(l, (const void *)uap, retval); 656 } 657 #endif /* !__m68k__ */ 658 659 int 660 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval) 661 { 662 /* { 663 syscallarg(int *) tidptr; 664 } */ 665 struct linux_emuldata *led; 666 667 led = (struct linux_emuldata *)l->l_emuldata; 668 led->led_clear_tid = SCARG(uap, tid); 669 *retval = l->l_lid; 670 671 return 0; 672 } 673 674 /* ARGUSED1 */ 675 int 676 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval) 677 { 678 679 *retval = l->l_lid; 680 return 0; 681 } 682 683 /* 684 * The affinity syscalls assume that the layout of our cpu kcpuset is 685 * the same as linux's: a linear bitmask. 686 */ 687 int 688 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval) 689 { 690 /* { 691 syscallarg(linux_pid_t) pid; 692 syscallarg(unsigned int) len; 693 syscallarg(unsigned long *) mask; 694 } */ 695 struct proc *p; 696 struct lwp *t; 697 kcpuset_t *kcset; 698 size_t size; 699 cpuid_t i; 700 int error; 701 702 size = LINUX_CPU_MASK_SIZE; 703 if (SCARG(uap, len) < size) 704 return EINVAL; 705 706 if (SCARG(uap, pid) == 0) { 707 p = curproc; 708 mutex_enter(p->p_lock); 709 t = curlwp; 710 } else { 711 t = lwp_find2(-1, SCARG(uap, pid)); 712 if (__predict_false(t == NULL)) { 713 return ESRCH; 714 } 715 p = t->l_proc; 716 KASSERT(mutex_owned(p->p_lock)); 717 } 718 719 /* Check the permission */ 720 if (kauth_authorize_process(l->l_cred, 721 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, p, NULL, NULL, NULL)) { 722 mutex_exit(p->p_lock); 723 return EPERM; 724 } 725 726 kcpuset_create(&kcset, true); 727 lwp_lock(t); 728 if (t->l_affinity != NULL) 729 kcpuset_copy(kcset, t->l_affinity); 730 else { 731 /* 732 * All available CPUs should be masked when affinity has not 733 * been set. 734 */ 735 kcpuset_zero(kcset); 736 for (i = 0; i < ncpu; i++) 737 kcpuset_set(kcset, i); 738 } 739 lwp_unlock(t); 740 mutex_exit(p->p_lock); 741 error = kcpuset_copyout(kcset, (cpuset_t *)SCARG(uap, mask), size); 742 kcpuset_unuse(kcset, NULL); 743 *retval = size; 744 return error; 745 } 746 747 int 748 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval) 749 { 750 /* { 751 syscallarg(linux_pid_t) pid; 752 syscallarg(unsigned int) len; 753 syscallarg(unsigned long *) mask; 754 } */ 755 struct sys__sched_setaffinity_args ssa; 756 size_t size; 757 pid_t pid; 758 lwpid_t lid; 759 760 size = LINUX_CPU_MASK_SIZE; 761 if (SCARG(uap, len) < size) 762 return EINVAL; 763 764 lid = SCARG(uap, pid); 765 if (lid != 0) { 766 /* Get the canonical PID for the process. */ 767 mutex_enter(&proc_lock); 768 struct proc *p = proc_find_lwpid(SCARG(uap, pid)); 769 if (p == NULL) { 770 mutex_exit(&proc_lock); 771 return ESRCH; 772 } 773 pid = p->p_pid; 774 mutex_exit(&proc_lock); 775 } else { 776 pid = curproc->p_pid; 777 lid = curlwp->l_lid; 778 } 779 780 SCARG(&ssa, pid) = pid; 781 SCARG(&ssa, lid) = lid; 782 SCARG(&ssa, size) = size; 783 SCARG(&ssa, cpuset) = (cpuset_t *)SCARG(uap, mask); 784 785 return sys__sched_setaffinity(l, &ssa, retval); 786 } 787 788 int 789 linux_sys___prctl(struct lwp *l, const struct linux_sys___prctl_args *uap, 790 register_t *retval) 791 { 792 /* { 793 syscallarg(int) code; 794 syscallarg(void *) args[LINUX_SYS_MAXSYSARGS]; 795 } */ 796 797 unsigned int c = SCARG(uap, code); 798 799 /* TODO: add other commonly used prctl codes */ 800 switch(c) { 801 case LINUX_PR_SET_NAME: { 802 struct sys__lwp_setname_args sls; 803 SCARG(&sls, name) = (char *) SCARG(uap, args[0]); 804 return sys__lwp_setname(l, &sls, retval); 805 } 806 807 case LINUX_PR_GET_NAME: { 808 struct sys__lwp_getname_args slg; 809 SCARG(&slg, name) = (char *) SCARG(uap, args[0]); 810 SCARG(&slg, len) = MAXCOMLEN; 811 return sys__lwp_getname(l, &slg, retval); 812 } 813 default: 814 printf("Unimplemented linux prctl code: (%d)", c); 815 return ENOSYS; 816 } 817 818 } 819