linux_sched.c revision 1.48.6.2 1 /* $NetBSD: linux_sched.c,v 1.48.6.2 2008/06/02 13:23:03 mjf Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Linux compatibility module. Try to deal with scheduler related syscalls.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.48.6.2 2008/06/02 13:23:03 mjf Exp $");
39
40 #include <sys/param.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/malloc.h>
46 #include <sys/syscallargs.h>
47 #include <sys/wait.h>
48 #include <sys/kauth.h>
49 #include <sys/ptrace.h>
50
51 #include <sys/cpu.h>
52
53 #include <compat/linux/common/linux_types.h>
54 #include <compat/linux/common/linux_signal.h>
55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
56 #include <compat/linux/common/linux_emuldata.h>
57 #include <compat/linux/common/linux_ipc.h>
58 #include <compat/linux/common/linux_sem.h>
59
60 #include <compat/linux/linux_syscallargs.h>
61
62 #include <compat/linux/common/linux_sched.h>
63
64 int
65 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
66 {
67 /* {
68 syscallarg(int) flags;
69 syscallarg(void *) stack;
70 #ifdef LINUX_NPTL
71 syscallarg(void *) parent_tidptr;
72 syscallarg(void *) child_tidptr;
73 #endif
74 } */
75 int flags, sig;
76 int error;
77 #ifdef LINUX_NPTL
78 struct linux_emuldata *led;
79 #endif
80
81 /*
82 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
83 */
84 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
85 return (EINVAL);
86
87 /*
88 * Thread group implies shared signals. Shared signals
89 * imply shared VM. This matches what Linux kernel does.
90 */
91 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
92 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
93 return (EINVAL);
94 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
95 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
96 return (EINVAL);
97
98 flags = 0;
99
100 if (SCARG(uap, flags) & LINUX_CLONE_VM)
101 flags |= FORK_SHAREVM;
102 if (SCARG(uap, flags) & LINUX_CLONE_FS)
103 flags |= FORK_SHARECWD;
104 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
105 flags |= FORK_SHAREFILES;
106 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
107 flags |= FORK_SHARESIGS;
108 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
109 flags |= FORK_PPWAIT;
110
111 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
112 if (sig < 0 || sig >= LINUX__NSIG)
113 return (EINVAL);
114 sig = linux_to_native_signo[sig];
115
116 #ifdef LINUX_NPTL
117 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
118
119 led->parent_tidptr = SCARG(uap, parent_tidptr);
120 led->child_tidptr = SCARG(uap, child_tidptr);
121 led->clone_flags = SCARG(uap, flags);
122 #endif /* LINUX_NPTL */
123
124 /*
125 * Note that Linux does not provide a portable way of specifying
126 * the stack area; the caller must know if the stack grows up
127 * or down. So, we pass a stack size of 0, so that the code
128 * that makes this adjustment is a noop.
129 */
130 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
131 NULL, NULL, retval, NULL)) != 0)
132 return error;
133
134 return 0;
135 }
136
137 /*
138 * linux realtime priority
139 *
140 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
141 *
142 * - SCHED_OTHER tasks don't have realtime priorities.
143 * in particular, sched_param::sched_priority is always 0.
144 */
145
146 #define LINUX_SCHED_RTPRIO_MIN 1
147 #define LINUX_SCHED_RTPRIO_MAX 99
148
149 static int
150 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
151 int *native_policy, struct sched_param *native_params)
152 {
153
154 switch (linux_policy) {
155 case LINUX_SCHED_OTHER:
156 if (native_policy != NULL) {
157 *native_policy = SCHED_OTHER;
158 }
159 break;
160
161 case LINUX_SCHED_FIFO:
162 if (native_policy != NULL) {
163 *native_policy = SCHED_FIFO;
164 }
165 break;
166
167 case LINUX_SCHED_RR:
168 if (native_policy != NULL) {
169 *native_policy = SCHED_RR;
170 }
171 break;
172
173 default:
174 return EINVAL;
175 }
176
177 if (linux_params != NULL) {
178 int prio = linux_params->sched_priority;
179
180 KASSERT(native_params != NULL);
181
182 if (linux_policy == LINUX_SCHED_OTHER) {
183 if (prio != 0) {
184 return EINVAL;
185 }
186 native_params->sched_priority = PRI_NONE; /* XXX */
187 } else {
188 if (prio < LINUX_SCHED_RTPRIO_MIN ||
189 prio > LINUX_SCHED_RTPRIO_MAX) {
190 return EINVAL;
191 }
192 native_params->sched_priority =
193 (prio - LINUX_SCHED_RTPRIO_MIN)
194 * (SCHED_PRI_MAX - SCHED_PRI_MIN)
195 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
196 + SCHED_PRI_MIN;
197 }
198 }
199
200 return 0;
201 }
202
203 static int
204 sched_native2linux(int native_policy, struct sched_param *native_params,
205 int *linux_policy, struct linux_sched_param *linux_params)
206 {
207
208 switch (native_policy) {
209 case SCHED_OTHER:
210 if (linux_policy != NULL) {
211 *linux_policy = LINUX_SCHED_OTHER;
212 }
213 break;
214
215 case SCHED_FIFO:
216 if (linux_policy != NULL) {
217 *linux_policy = LINUX_SCHED_FIFO;
218 }
219 break;
220
221 case SCHED_RR:
222 if (linux_policy != NULL) {
223 *linux_policy = LINUX_SCHED_RR;
224 }
225 break;
226
227 default:
228 panic("%s: unknown policy %d\n", __func__, native_policy);
229 }
230
231 if (native_params != NULL) {
232 int prio = native_params->sched_priority;
233
234 KASSERT(prio >= SCHED_PRI_MIN);
235 KASSERT(prio <= SCHED_PRI_MAX);
236 KASSERT(linux_params != NULL);
237
238 #ifdef DEBUG_LINUX
239 printf("native2linux: native: policy %d, priority %d\n",
240 native_policy, prio);
241 #endif
242
243 if (native_policy == SCHED_OTHER) {
244 linux_params->sched_priority = 0;
245 } else {
246 linux_params->sched_priority =
247 (prio - SCHED_PRI_MIN)
248 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
249 / (SCHED_PRI_MAX - SCHED_PRI_MIN)
250 + LINUX_SCHED_RTPRIO_MIN;
251 }
252 #ifdef DEBUG_LINUX
253 printf("native2linux: linux: policy %d, priority %d\n",
254 -1, linux_params->sched_priority);
255 #endif
256 }
257
258 return 0;
259 }
260
261 int
262 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
263 {
264 /* {
265 syscallarg(linux_pid_t) pid;
266 syscallarg(const struct linux_sched_param *) sp;
267 } */
268 int error, policy;
269 struct linux_sched_param lp;
270 struct sched_param sp;
271
272 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
273 error = EINVAL;
274 goto out;
275 }
276
277 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
278 if (error)
279 goto out;
280
281 /* We need the current policy in Linux terms. */
282 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
283 if (error)
284 goto out;
285 error = sched_native2linux(policy, NULL, &policy, NULL);
286 if (error)
287 goto out;
288
289 error = sched_linux2native(policy, &lp, &policy, &sp);
290 if (error)
291 goto out;
292
293 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
294 if (error)
295 goto out;
296
297 out:
298 return error;
299 }
300
301 int
302 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
303 {
304 /* {
305 syscallarg(linux_pid_t) pid;
306 syscallarg(struct linux_sched_param *) sp;
307 } */
308 struct linux_sched_param lp;
309 struct sched_param sp;
310 int error, policy;
311
312 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
313 error = EINVAL;
314 goto out;
315 }
316
317 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
318 if (error)
319 goto out;
320 #ifdef DEBUG_LINUX
321 printf("getparam: native: policy %d, priority %d\n",
322 policy, sp.sched_priority);
323 #endif
324
325 error = sched_native2linux(policy, &sp, NULL, &lp);
326 if (error)
327 goto out;
328 #ifdef DEBUG_LINUX
329 printf("getparam: linux: policy %d, priority %d\n",
330 policy, lp.sched_priority);
331 #endif
332
333 error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
334 if (error)
335 goto out;
336
337 out:
338 return error;
339 }
340
341 int
342 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
343 {
344 /* {
345 syscallarg(linux_pid_t) pid;
346 syscallarg(int) policy;
347 syscallarg(cont struct linux_sched_scheduler *) sp;
348 } */
349 int error, policy;
350 struct linux_sched_param lp;
351 struct sched_param sp;
352
353 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
354 error = EINVAL;
355 goto out;
356 }
357
358 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
359 if (error)
360 goto out;
361 #ifdef DEBUG_LINUX
362 printf("setscheduler: linux: policy %d, priority %d\n",
363 SCARG(uap, policy), lp.sched_priority);
364 #endif
365
366 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
367 if (error)
368 goto out;
369 #ifdef DEBUG_LINUX
370 printf("setscheduler: native: policy %d, priority %d\n",
371 policy, sp.sched_priority);
372 #endif
373
374 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
375 if (error)
376 goto out;
377
378 out:
379 return error;
380 }
381
382 int
383 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
384 {
385 /* {
386 syscallarg(linux_pid_t) pid;
387 } */
388 int error, policy;
389
390 *retval = -1;
391
392 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
393 if (error)
394 goto out;
395
396 error = sched_native2linux(policy, NULL, &policy, NULL);
397 if (error)
398 goto out;
399
400 *retval = policy;
401
402 out:
403 return error;
404 }
405
406 int
407 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
408 {
409
410 yield();
411 return 0;
412 }
413
414 int
415 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
416 {
417 /* {
418 syscallarg(int) policy;
419 } */
420
421 switch (SCARG(uap, policy)) {
422 case LINUX_SCHED_OTHER:
423 *retval = 0;
424 break;
425 case LINUX_SCHED_FIFO:
426 case LINUX_SCHED_RR:
427 *retval = LINUX_SCHED_RTPRIO_MAX;
428 break;
429 default:
430 return EINVAL;
431 }
432
433 return 0;
434 }
435
436 int
437 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
438 {
439 /* {
440 syscallarg(int) policy;
441 } */
442
443 switch (SCARG(uap, policy)) {
444 case LINUX_SCHED_OTHER:
445 *retval = 0;
446 break;
447 case LINUX_SCHED_FIFO:
448 case LINUX_SCHED_RR:
449 *retval = LINUX_SCHED_RTPRIO_MIN;
450 break;
451 default:
452 return EINVAL;
453 }
454
455 return 0;
456 }
457
458 #ifndef __m68k__
459 /* Present on everything but m68k */
460 int
461 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
462 {
463 #ifdef LINUX_NPTL
464 /* {
465 syscallarg(int) error_code;
466 } */
467 struct proc *p = l->l_proc;
468 struct linux_emuldata *led = p->p_emuldata;
469 struct linux_emuldata *e;
470
471 if (led->s->flags & LINUX_LES_USE_NPTL) {
472
473 #ifdef DEBUG_LINUX
474 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
475 led->s->refs);
476 #endif
477
478 /*
479 * The calling thread is supposed to kill all threads
480 * in the same thread group (i.e. all threads created
481 * via clone(2) with CLONE_THREAD flag set).
482 *
483 * If there is only one thread, things are quite simple
484 */
485 if (led->s->refs == 1)
486 return sys_exit(l, (const void *)uap, retval);
487
488 #ifdef DEBUG_LINUX
489 printf("%s:%d\n", __func__, __LINE__);
490 #endif
491
492 mutex_enter(proc_lock);
493 led->s->flags |= LINUX_LES_INEXITGROUP;
494 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
495
496 /*
497 * Kill all threads in the group. The emulation exit hook takes
498 * care of hiding the zombies and reporting the exit code
499 * properly.
500 */
501 LIST_FOREACH(e, &led->s->threads, threads) {
502 if (e->proc == p)
503 continue;
504
505 #ifdef DEBUG_LINUX
506 printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
507 #endif
508 psignal(e->proc, SIGKILL);
509 }
510
511 /* Now, kill ourselves */
512 psignal(p, SIGKILL);
513 mutex_exit(proc_lock);
514
515 return 0;
516
517 }
518 #endif /* LINUX_NPTL */
519
520 return sys_exit(l, (const void *)uap, retval);
521 }
522 #endif /* !__m68k__ */
523
524 #ifdef LINUX_NPTL
525 int
526 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
527 {
528 /* {
529 syscallarg(int *) tidptr;
530 } */
531 struct linux_emuldata *led;
532
533 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
534 led->clear_tid = SCARG(uap, tid);
535
536 led->s->flags |= LINUX_LES_USE_NPTL;
537
538 *retval = l->l_proc->p_pid;
539
540 return 0;
541 }
542
543 /* ARGUSED1 */
544 int
545 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
546 {
547 /* The Linux kernel does it exactly that way */
548 *retval = l->l_proc->p_pid;
549 return 0;
550 }
551
552 #ifdef LINUX_NPTL
553 /* ARGUSED1 */
554 int
555 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
556 {
557 struct linux_emuldata *led = l->l_proc->p_emuldata;
558
559 if (led->s->flags & LINUX_LES_USE_NPTL) {
560 /* The Linux kernel does it exactly that way */
561 *retval = led->s->group_pid;
562 } else {
563 *retval = l->l_proc->p_pid;
564 }
565
566 return 0;
567 }
568
569 /* ARGUSED1 */
570 int
571 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
572 {
573 struct proc *p = l->l_proc;
574 struct linux_emuldata *led = p->p_emuldata;
575 struct proc *glp;
576 struct proc *pp;
577
578 mutex_enter(proc_lock);
579 if (led->s->flags & LINUX_LES_USE_NPTL) {
580
581 /* Find the thread group leader's parent */
582 if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) {
583 /* Maybe panic... */
584 printf("linux_sys_getppid: missing group leader PID"
585 " %d\n", led->s->group_pid);
586 mutex_exit(proc_lock);
587 return -1;
588 }
589 pp = glp->p_pptr;
590
591 /* If this is a Linux process too, return thread group PID */
592 if (pp->p_emul == p->p_emul) {
593 struct linux_emuldata *pled;
594
595 pled = pp->p_emuldata;
596 *retval = pled->s->group_pid;
597 } else {
598 *retval = pp->p_pid;
599 }
600
601 } else {
602 *retval = p->p_pptr->p_pid;
603 }
604 mutex_exit(proc_lock);
605
606 return 0;
607 }
608 #endif /* LINUX_NPTL */
609
610 int
611 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
612 {
613 /* {
614 syscallarg(pid_t) pid;
615 syscallarg(unsigned int) len;
616 syscallarg(unsigned long *) mask;
617 } */
618 int error;
619 int ret;
620 char *data;
621 int *retp;
622
623 if (SCARG(uap, mask) == NULL)
624 return EINVAL;
625
626 if (SCARG(uap, len) < sizeof(int))
627 return EINVAL;
628
629 if (pfind(SCARG(uap, pid)) == NULL)
630 return ESRCH;
631
632 /*
633 * return the actual number of CPU, tag all of them as available
634 * The result is a mask, the first CPU being in the least significant
635 * bit.
636 */
637 ret = (1 << ncpu) - 1;
638 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
639 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
640 *retp = ret;
641
642 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
643 return error;
644
645 free(data, M_TEMP);
646
647 return 0;
648
649 }
650
651 int
652 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
653 {
654 /* {
655 syscallarg(pid_t) pid;
656 syscallarg(unsigned int) len;
657 syscallarg(unsigned long *) mask;
658 } */
659
660 if (pfind(SCARG(uap, pid)) == NULL)
661 return ESRCH;
662
663 /* Let's ignore it */
664 #ifdef DEBUG_LINUX
665 printf("linux_sys_sched_setaffinity\n");
666 #endif
667 return 0;
668 };
669 #endif /* LINUX_NPTL */
670