linux_sched.c revision 1.81 1 /* $NetBSD: linux_sched.c,v 1.81 2024/09/30 01:26:47 kre Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Linux compatibility module. Try to deal with scheduler related syscalls.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.81 2024/09/30 01:26:47 kre Exp $");
39
40 #include <sys/param.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/syscallargs.h>
46 #include <sys/wait.h>
47 #include <sys/kauth.h>
48 #include <sys/ptrace.h>
49 #include <sys/atomic.h>
50
51 #include <sys/cpu.h>
52
53 #include <compat/linux/common/linux_types.h>
54 #include <compat/linux/common/linux_signal.h>
55 #include <compat/linux/common/linux_emuldata.h>
56 #include <compat/linux/common/linux_ipc.h>
57 #include <compat/linux/common/linux_sem.h>
58 #include <compat/linux/common/linux_exec.h>
59 #include <compat/linux/common/linux_machdep.h>
60
61 #include <compat/linux/linux_syscallargs.h>
62
63 #include <compat/linux/common/linux_sched.h>
64
65 static int linux_clone_nptl(struct lwp *, const struct linux_sys_clone_args *,
66 register_t *);
67
68 /* Unlike Linux, dynamically calculate CPU mask size */
69 #define LINUX_CPU_MASK_SIZE (sizeof(long) * ((ncpu + LONG_BIT - 1) / LONG_BIT))
70
71 #if DEBUG_LINUX
72 #define DPRINTF(x, ...) uprintf(x, __VA_ARGS__)
73 #else
74 #define DPRINTF(x, ...)
75 #endif
76
77 static void
78 linux_child_return(void *arg)
79 {
80 struct lwp *l = arg;
81 struct proc *p = l->l_proc;
82 struct linux_emuldata *led = l->l_emuldata;
83 void *ctp = led->led_child_tidptr;
84 int error;
85
86 if (ctp) {
87 if ((error = copyout(&p->p_pid, ctp, sizeof(p->p_pid))) != 0)
88 printf("%s: LINUX_CLONE_CHILD_SETTID "
89 "failed (child_tidptr = %p, tid = %d error =%d)\n",
90 __func__, ctp, p->p_pid, error);
91 }
92 child_return(arg);
93 }
94
95 int
96 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap,
97 register_t *retval)
98 {
99 /* {
100 syscallarg(int) flags;
101 syscallarg(void *) stack;
102 syscallarg(void *) parent_tidptr;
103 syscallarg(void *) tls;
104 syscallarg(void *) child_tidptr;
105 } */
106 struct linux_emuldata *led;
107 int flags, sig, error;
108
109 /*
110 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
111 */
112 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
113 return EINVAL;
114
115 /*
116 * Thread group implies shared signals. Shared signals
117 * imply shared VM. This matches what Linux kernel does.
118 */
119 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
120 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
121 return EINVAL;
122 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
123 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
124 return EINVAL;
125
126 /*
127 * The thread group flavor is implemented totally differently.
128 */
129 if (SCARG(uap, flags) & LINUX_CLONE_THREAD)
130 return linux_clone_nptl(l, uap, retval);
131
132 flags = 0;
133 if (SCARG(uap, flags) & LINUX_CLONE_VM)
134 flags |= FORK_SHAREVM;
135 if (SCARG(uap, flags) & LINUX_CLONE_FS)
136 flags |= FORK_SHARECWD;
137 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
138 flags |= FORK_SHAREFILES;
139 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
140 flags |= FORK_SHARESIGS;
141 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
142 flags |= FORK_PPWAIT;
143
144 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
145 if (sig < 0 || sig >= LINUX__NSIG)
146 return EINVAL;
147 sig = linux_to_native_signo[sig];
148
149 if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID) {
150 led = l->l_emuldata;
151 led->led_child_tidptr = SCARG(uap, child_tidptr);
152 }
153
154 /*
155 * Note that Linux does not provide a portable way of specifying
156 * the stack area; the caller must know if the stack grows up
157 * or down. So, we pass a stack size of 0, so that the code
158 * that makes this adjustment is a noop.
159 */
160 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
161 linux_child_return, NULL, retval)) != 0) {
162 DPRINTF("%s: fork1: error %d\n", __func__, error);
163 return error;
164 }
165
166 return 0;
167 }
168
169
170 int
171 linux_sys_clone3(struct lwp *l, const struct linux_sys_clone3_args *uap, register_t *retval)
172 {
173 struct linux_user_clone3_args cl_args;
174 struct linux_sys_clone_args clone_args;
175 int error;
176
177 if (SCARG(uap, size) != sizeof(cl_args)) {
178 DPRINTF("%s: Invalid size less or more\n", __func__);
179 return EINVAL;
180 }
181
182 error = copyin(SCARG(uap, cl_args), &cl_args, SCARG(uap, size));
183 if (error) {
184 DPRINTF("%s: Copyin failed: %d\n", __func__, error);
185 return error;
186 }
187
188 DPRINTF("%s: Flags: %#jx\n", __func__, (intmax_t)cl_args.flags);
189
190 /* Define allowed flags */
191 if (cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS) {
192 DPRINTF("%s: Unsupported flags for clone3: %#x\n", __func__,
193 cl_args.flags & LINUX_CLONE_UNIMPLEMENTED_FLAGS);
194 return EOPNOTSUPP;
195 }
196 if (cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS) {
197 DPRINTF("%s: Disallowed flags for clone3: %#x\n", __func__,
198 cl_args.flags & ~LINUX_CLONE_ALLOWED_FLAGS);
199 return EINVAL;
200 }
201
202 #if 0
203 // XXX: this is wrong, exit_signal is the signal to deliver to the
204 // process upon exit.
205 if ((cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL) != 0){
206 DPRINTF("%s: Disallowed flags for clone3: %#x\n", __func__,
207 cl_args.exit_signal & ~(uint64_t)LINUX_CLONE_CSIGNAL);
208 return EINVAL;
209 }
210 #endif
211
212 if (cl_args.stack == 0 && cl_args.stack_size != 0) {
213 DPRINTF("%s: Stack is NULL but stack size is not 0\n",
214 __func__);
215 return EINVAL;
216 }
217 if (cl_args.stack != 0 && cl_args.stack_size == 0) {
218 DPRINTF("%s: Stack is not NULL but stack size is 0\n",
219 __func__);
220 return EINVAL;
221 }
222
223 int flags = cl_args.flags & LINUX_CLONE_ALLOWED_FLAGS;
224 #if 0
225 int sig = cl_args.exit_signal & LINUX_CLONE_CSIGNAL;
226 #endif
227 // XXX: Pidfd member handling
228 // XXX: we don't have cgroups
229 // XXX: what to do with tid_set and tid_set_size
230 // XXX: clone3 has stacksize, instead implement clone as a clone3
231 // wrapper.
232 SCARG(&clone_args, flags) = flags;
233 SCARG(&clone_args, stack) = (void *)(uintptr_t)cl_args.stack;
234 SCARG(&clone_args, parent_tidptr) =
235 (void *)(intptr_t)cl_args.parent_tid;
236 SCARG(&clone_args, tls) =
237 (void *)(intptr_t)cl_args.tls;
238 SCARG(&clone_args, child_tidptr) =
239 (void *)(intptr_t)cl_args.child_tid;
240
241 return linux_sys_clone(l, &clone_args, retval);
242 }
243
244 static int
245 linux_clone_nptl(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
246 {
247 /* {
248 syscallarg(int) flags;
249 syscallarg(void *) stack;
250 syscallarg(void *) parent_tidptr;
251 syscallarg(void *) tls;
252 syscallarg(void *) child_tidptr;
253 } */
254 struct proc *p;
255 struct lwp *l2;
256 struct linux_emuldata *led;
257 void *parent_tidptr, *tls, *child_tidptr;
258 vaddr_t uaddr;
259 lwpid_t lid;
260 int flags, error;
261
262 p = l->l_proc;
263 flags = SCARG(uap, flags);
264 parent_tidptr = SCARG(uap, parent_tidptr);
265 tls = SCARG(uap, tls);
266 child_tidptr = SCARG(uap, child_tidptr);
267
268 uaddr = uvm_uarea_alloc();
269 if (__predict_false(uaddr == 0)) {
270 return ENOMEM;
271 }
272
273 error = lwp_create(l, p, uaddr, LWP_DETACHED,
274 SCARG(uap, stack), 0, child_return, NULL, &l2, l->l_class,
275 &l->l_sigmask, &l->l_sigstk);
276 if (__predict_false(error)) {
277 DPRINTF(("%s: lwp_create error=%d\n", __func__, error));
278 uvm_uarea_free(uaddr);
279 return error;
280 }
281 lid = l2->l_lid;
282
283 /* LINUX_CLONE_CHILD_CLEARTID: clear TID in child's memory on exit() */
284 if (flags & LINUX_CLONE_CHILD_CLEARTID) {
285 led = l2->l_emuldata;
286 led->led_clear_tid = child_tidptr;
287 }
288
289 /* LINUX_CLONE_PARENT_SETTID: store child's TID in parent's memory */
290 if (flags & LINUX_CLONE_PARENT_SETTID) {
291 if ((error = copyout(&lid, parent_tidptr, sizeof(lid))) != 0)
292 printf("%s: LINUX_CLONE_PARENT_SETTID "
293 "failed (parent_tidptr = %p tid = %d error=%d)\n",
294 __func__, parent_tidptr, lid, error);
295 }
296
297 /* LINUX_CLONE_CHILD_SETTID: store child's TID in child's memory */
298 if (flags & LINUX_CLONE_CHILD_SETTID) {
299 if ((error = copyout(&lid, child_tidptr, sizeof(lid))) != 0)
300 printf("%s: LINUX_CLONE_CHILD_SETTID "
301 "failed (child_tidptr = %p, tid = %d error=%d)\n",
302 __func__, child_tidptr, lid, error);
303 }
304
305 if (flags & LINUX_CLONE_SETTLS) {
306 error = LINUX_LWP_SETPRIVATE(l2, tls);
307 if (error) {
308 DPRINTF(("%s: LINUX_LWP_SETPRIVATE %d\n", __func__,
309 error));
310 lwp_exit(l2);
311 return error;
312 }
313 }
314
315 /* Set the new LWP running. */
316 lwp_start(l2, 0);
317
318 retval[0] = lid;
319 retval[1] = 0;
320 return 0;
321 }
322
323 /*
324 * linux realtime priority
325 *
326 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
327 *
328 * - SCHED_OTHER tasks don't have realtime priorities.
329 * in particular, sched_param::sched_priority is always 0.
330 */
331
332 #define LINUX_SCHED_RTPRIO_MIN 1
333 #define LINUX_SCHED_RTPRIO_MAX 99
334
335 static int
336 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
337 int *native_policy, struct sched_param *native_params)
338 {
339
340 switch (linux_policy) {
341 case LINUX_SCHED_OTHER:
342 if (native_policy != NULL) {
343 *native_policy = SCHED_OTHER;
344 }
345 break;
346
347 case LINUX_SCHED_FIFO:
348 if (native_policy != NULL) {
349 *native_policy = SCHED_FIFO;
350 }
351 break;
352
353 case LINUX_SCHED_RR:
354 if (native_policy != NULL) {
355 *native_policy = SCHED_RR;
356 }
357 break;
358
359 default:
360 return EINVAL;
361 }
362
363 if (linux_params != NULL) {
364 int prio = linux_params->sched_priority;
365
366 KASSERT(native_params != NULL);
367
368 if (linux_policy == LINUX_SCHED_OTHER) {
369 if (prio != 0) {
370 return EINVAL;
371 }
372 native_params->sched_priority = PRI_NONE; /* XXX */
373 } else {
374 if (prio < LINUX_SCHED_RTPRIO_MIN ||
375 prio > LINUX_SCHED_RTPRIO_MAX) {
376 return EINVAL;
377 }
378 native_params->sched_priority =
379 (prio - LINUX_SCHED_RTPRIO_MIN)
380 * (SCHED_PRI_MAX - SCHED_PRI_MIN)
381 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
382 + SCHED_PRI_MIN;
383 }
384 }
385
386 return 0;
387 }
388
389 static int
390 sched_native2linux(int native_policy, struct sched_param *native_params,
391 int *linux_policy, struct linux_sched_param *linux_params)
392 {
393
394 switch (native_policy) {
395 case SCHED_OTHER:
396 if (linux_policy != NULL) {
397 *linux_policy = LINUX_SCHED_OTHER;
398 }
399 break;
400
401 case SCHED_FIFO:
402 if (linux_policy != NULL) {
403 *linux_policy = LINUX_SCHED_FIFO;
404 }
405 break;
406
407 case SCHED_RR:
408 if (linux_policy != NULL) {
409 *linux_policy = LINUX_SCHED_RR;
410 }
411 break;
412
413 default:
414 panic("%s: unknown policy %d\n", __func__, native_policy);
415 }
416
417 if (native_params != NULL) {
418 int prio = native_params->sched_priority;
419
420 KASSERT(prio >= SCHED_PRI_MIN);
421 KASSERT(prio <= SCHED_PRI_MAX);
422 KASSERT(linux_params != NULL);
423
424 memset(linux_params, 0, sizeof(*linux_params));
425
426 DPRINTF("%s: native: policy %d, priority %d\n",
427 __func__, native_policy, prio);
428
429 if (native_policy == SCHED_OTHER) {
430 linux_params->sched_priority = 0;
431 } else {
432 linux_params->sched_priority =
433 (prio - SCHED_PRI_MIN)
434 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
435 / (SCHED_PRI_MAX - SCHED_PRI_MIN)
436 + LINUX_SCHED_RTPRIO_MIN;
437 }
438 DPRINTF("%s: linux: policy %d, priority %d\n",
439 __func__, -1, linux_params->sched_priority);
440 }
441
442 return 0;
443 }
444
445 int
446 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
447 {
448 /* {
449 syscallarg(linux_pid_t) pid;
450 syscallarg(const struct linux_sched_param *) sp;
451 } */
452 int error, policy;
453 struct linux_sched_param lp;
454 struct sched_param sp;
455
456 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
457 error = EINVAL;
458 goto out;
459 }
460
461 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
462 if (error)
463 goto out;
464
465 /* We need the current policy in Linux terms. */
466 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
467 if (error)
468 goto out;
469 error = sched_native2linux(policy, NULL, &policy, NULL);
470 if (error)
471 goto out;
472
473 error = sched_linux2native(policy, &lp, &policy, &sp);
474 if (error)
475 goto out;
476
477 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
478 if (error)
479 goto out;
480
481 out:
482 return error;
483 }
484
485 int
486 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
487 {
488 /* {
489 syscallarg(linux_pid_t) pid;
490 syscallarg(struct linux_sched_param *) sp;
491 } */
492 struct linux_sched_param lp;
493 struct sched_param sp;
494 int error, policy;
495
496 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
497 error = EINVAL;
498 goto out;
499 }
500
501 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
502 if (error)
503 goto out;
504 DPRINTF("%s: native: policy %d, priority %d\n",
505 __func__, policy, sp.sched_priority);
506
507 error = sched_native2linux(policy, &sp, NULL, &lp);
508 if (error)
509 goto out;
510 DPRINTF("%s: linux: policy %d, priority %d\n",
511 __func__, policy, lp.sched_priority);
512
513 error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
514 if (error)
515 goto out;
516
517 out:
518 return error;
519 }
520
521 int
522 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
523 {
524 /* {
525 syscallarg(linux_pid_t) pid;
526 syscallarg(int) policy;
527 syscallarg(cont struct linux_sched_param *) sp;
528 } */
529 int error, policy;
530 struct linux_sched_param lp;
531 struct sched_param sp;
532
533 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
534 error = EINVAL;
535 goto out;
536 }
537
538 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
539 if (error)
540 goto out;
541 DPRINTF("%s: linux: policy %d, priority %d\n",
542 __func__, SCARG(uap, policy), lp.sched_priority);
543
544 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
545 if (error)
546 goto out;
547 DPRINTF("%s: native: policy %d, priority %d\n",
548 __func__, policy, sp.sched_priority);
549
550 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
551 if (error)
552 goto out;
553
554 out:
555 return error;
556 }
557
558 int
559 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
560 {
561 /* {
562 syscallarg(linux_pid_t) pid;
563 } */
564 int error, policy;
565
566 *retval = -1;
567
568 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
569 if (error)
570 goto out;
571
572 error = sched_native2linux(policy, NULL, &policy, NULL);
573 if (error)
574 goto out;
575
576 *retval = policy;
577
578 out:
579 return error;
580 }
581
582 int
583 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
584 {
585
586 yield();
587 return 0;
588 }
589
590 int
591 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
592 {
593 /* {
594 syscallarg(int) policy;
595 } */
596
597 switch (SCARG(uap, policy)) {
598 case LINUX_SCHED_OTHER:
599 *retval = 0;
600 break;
601 case LINUX_SCHED_FIFO:
602 case LINUX_SCHED_RR:
603 *retval = LINUX_SCHED_RTPRIO_MAX;
604 break;
605 default:
606 return EINVAL;
607 }
608
609 return 0;
610 }
611
612 int
613 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
614 {
615 /* {
616 syscallarg(int) policy;
617 } */
618
619 switch (SCARG(uap, policy)) {
620 case LINUX_SCHED_OTHER:
621 *retval = 0;
622 break;
623 case LINUX_SCHED_FIFO:
624 case LINUX_SCHED_RR:
625 *retval = LINUX_SCHED_RTPRIO_MIN;
626 break;
627 default:
628 return EINVAL;
629 }
630
631 return 0;
632 }
633
634 int
635 linux_sys_exit(struct lwp *l, const struct linux_sys_exit_args *uap, register_t *retval)
636 {
637
638 lwp_exit(l);
639 return 0;
640 }
641
642 #ifndef __m68k__
643 /* Present on everything but m68k */
644 int
645 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
646 {
647
648 return sys_exit(l, (const void *)uap, retval);
649 }
650 #endif /* !__m68k__ */
651
652 int
653 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
654 {
655 /* {
656 syscallarg(int *) tidptr;
657 } */
658 struct linux_emuldata *led;
659
660 led = (struct linux_emuldata *)l->l_emuldata;
661 led->led_clear_tid = SCARG(uap, tid);
662 *retval = l->l_lid;
663
664 return 0;
665 }
666
667 /* ARGUSED1 */
668 int
669 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
670 {
671
672 *retval = l->l_lid;
673 return 0;
674 }
675
676 /*
677 * The affinity syscalls assume that the layout of our cpu kcpuset is
678 * the same as linux's: a linear bitmask.
679 */
680 int
681 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
682 {
683 /* {
684 syscallarg(linux_pid_t) pid;
685 syscallarg(unsigned int) len;
686 syscallarg(unsigned long *) mask;
687 } */
688 struct proc *p;
689 struct lwp *t;
690 kcpuset_t *kcset;
691 size_t size;
692 cpuid_t i;
693 int error;
694
695 size = LINUX_CPU_MASK_SIZE;
696 if (SCARG(uap, len) < size)
697 return EINVAL;
698
699 if (SCARG(uap, pid) == 0) {
700 p = curproc;
701 mutex_enter(p->p_lock);
702 t = curlwp;
703 } else {
704 t = lwp_find2(-1, SCARG(uap, pid));
705 if (__predict_false(t == NULL)) {
706 return ESRCH;
707 }
708 p = t->l_proc;
709 KASSERT(mutex_owned(p->p_lock));
710 }
711
712 /* Check the permission */
713 if (kauth_authorize_process(l->l_cred,
714 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, p, NULL, NULL, NULL)) {
715 mutex_exit(p->p_lock);
716 return EPERM;
717 }
718
719 kcpuset_create(&kcset, true);
720 lwp_lock(t);
721 if (t->l_affinity != NULL)
722 kcpuset_copy(kcset, t->l_affinity);
723 else {
724 /*
725 * All available CPUs should be masked when affinity has not
726 * been set.
727 */
728 kcpuset_zero(kcset);
729 for (i = 0; i < ncpu; i++)
730 kcpuset_set(kcset, i);
731 }
732 lwp_unlock(t);
733 mutex_exit(p->p_lock);
734 error = kcpuset_copyout(kcset, (cpuset_t *)SCARG(uap, mask), size);
735 kcpuset_unuse(kcset, NULL);
736 *retval = size;
737 return error;
738 }
739
740 int
741 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
742 {
743 /* {
744 syscallarg(linux_pid_t) pid;
745 syscallarg(unsigned int) len;
746 syscallarg(unsigned long *) mask;
747 } */
748 struct sys__sched_setaffinity_args ssa;
749 size_t size;
750 pid_t pid;
751 lwpid_t lid;
752
753 size = LINUX_CPU_MASK_SIZE;
754 if (SCARG(uap, len) < size)
755 return EINVAL;
756
757 lid = SCARG(uap, pid);
758 if (lid != 0) {
759 /* Get the canonical PID for the process. */
760 mutex_enter(&proc_lock);
761 struct proc *p = proc_find_lwpid(SCARG(uap, pid));
762 if (p == NULL) {
763 mutex_exit(&proc_lock);
764 return ESRCH;
765 }
766 pid = p->p_pid;
767 mutex_exit(&proc_lock);
768 } else {
769 pid = curproc->p_pid;
770 lid = curlwp->l_lid;
771 }
772
773 SCARG(&ssa, pid) = pid;
774 SCARG(&ssa, lid) = lid;
775 SCARG(&ssa, size) = size;
776 SCARG(&ssa, cpuset) = (cpuset_t *)SCARG(uap, mask);
777
778 return sys__sched_setaffinity(l, &ssa, retval);
779 }
780