linux_sched.c revision 1.57.2.1 1 /* $NetBSD: linux_sched.c,v 1.57.2.1 2008/05/10 23:48:56 wrstuden Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Linux compatibility module. Try to deal with scheduler related syscalls.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.57.2.1 2008/05/10 23:48:56 wrstuden Exp $");
39
40 #include <sys/param.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/malloc.h>
46 #include <sys/sa.h>
47 #include <sys/syscallargs.h>
48 #include <sys/wait.h>
49 #include <sys/kauth.h>
50 #include <sys/ptrace.h>
51
52 #include <sys/cpu.h>
53
54 #include <compat/linux/common/linux_types.h>
55 #include <compat/linux/common/linux_signal.h>
56 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
57 #include <compat/linux/common/linux_emuldata.h>
58 #include <compat/linux/common/linux_ipc.h>
59 #include <compat/linux/common/linux_sem.h>
60
61 #include <compat/linux/linux_syscallargs.h>
62
63 #include <compat/linux/common/linux_sched.h>
64
65 int
66 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
67 {
68 /* {
69 syscallarg(int) flags;
70 syscallarg(void *) stack;
71 #ifdef LINUX_NPTL
72 syscallarg(void *) parent_tidptr;
73 syscallarg(void *) child_tidptr;
74 #endif
75 } */
76 int flags, sig;
77 int error;
78 #ifdef LINUX_NPTL
79 struct linux_emuldata *led;
80 #endif
81
82 /*
83 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
84 */
85 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
86 return (EINVAL);
87
88 /*
89 * Thread group implies shared signals. Shared signals
90 * imply shared VM. This matches what Linux kernel does.
91 */
92 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
93 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
94 return (EINVAL);
95 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
96 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
97 return (EINVAL);
98
99 flags = 0;
100
101 if (SCARG(uap, flags) & LINUX_CLONE_VM)
102 flags |= FORK_SHAREVM;
103 if (SCARG(uap, flags) & LINUX_CLONE_FS)
104 flags |= FORK_SHARECWD;
105 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
106 flags |= FORK_SHAREFILES;
107 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
108 flags |= FORK_SHARESIGS;
109 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
110 flags |= FORK_PPWAIT;
111
112 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
113 if (sig < 0 || sig >= LINUX__NSIG)
114 return (EINVAL);
115 sig = linux_to_native_signo[sig];
116
117 #ifdef LINUX_NPTL
118 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
119
120 led->parent_tidptr = SCARG(uap, parent_tidptr);
121 led->child_tidptr = SCARG(uap, child_tidptr);
122 led->clone_flags = SCARG(uap, flags);
123 #endif /* LINUX_NPTL */
124
125 /*
126 * Note that Linux does not provide a portable way of specifying
127 * the stack area; the caller must know if the stack grows up
128 * or down. So, we pass a stack size of 0, so that the code
129 * that makes this adjustment is a noop.
130 */
131 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
132 NULL, NULL, retval, NULL)) != 0)
133 return error;
134
135 return 0;
136 }
137
138 /*
139 * linux realtime priority
140 *
141 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
142 *
143 * - SCHED_OTHER tasks don't have realtime priorities.
144 * in particular, sched_param::sched_priority is always 0.
145 */
146
147 #define LINUX_SCHED_RTPRIO_MIN 1
148 #define LINUX_SCHED_RTPRIO_MAX 99
149
150 static int
151 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
152 int *native_policy, struct sched_param *native_params)
153 {
154
155 switch (linux_policy) {
156 case LINUX_SCHED_OTHER:
157 if (native_policy != NULL) {
158 *native_policy = SCHED_OTHER;
159 }
160 break;
161
162 case LINUX_SCHED_FIFO:
163 if (native_policy != NULL) {
164 *native_policy = SCHED_FIFO;
165 }
166 break;
167
168 case LINUX_SCHED_RR:
169 if (native_policy != NULL) {
170 *native_policy = SCHED_RR;
171 }
172 break;
173
174 default:
175 return EINVAL;
176 }
177
178 if (linux_params != NULL) {
179 int prio = linux_params->sched_priority;
180
181 KASSERT(native_params != NULL);
182
183 if (linux_policy == LINUX_SCHED_OTHER) {
184 if (prio != 0) {
185 return EINVAL;
186 }
187 native_params->sched_priority = PRI_NONE; /* XXX */
188 } else {
189 if (prio < LINUX_SCHED_RTPRIO_MIN ||
190 prio > LINUX_SCHED_RTPRIO_MAX) {
191 return EINVAL;
192 }
193 native_params->sched_priority =
194 (prio - LINUX_SCHED_RTPRIO_MIN)
195 * (SCHED_PRI_MAX - SCHED_PRI_MIN)
196 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
197 + SCHED_PRI_MIN;
198 }
199 }
200
201 return 0;
202 }
203
204 static int
205 sched_native2linux(int native_policy, struct sched_param *native_params,
206 int *linux_policy, struct linux_sched_param *linux_params)
207 {
208
209 switch (native_policy) {
210 case SCHED_OTHER:
211 if (linux_policy != NULL) {
212 *linux_policy = LINUX_SCHED_OTHER;
213 }
214 break;
215
216 case SCHED_FIFO:
217 if (linux_policy != NULL) {
218 *linux_policy = LINUX_SCHED_FIFO;
219 }
220 break;
221
222 case SCHED_RR:
223 if (linux_policy != NULL) {
224 *linux_policy = LINUX_SCHED_RR;
225 }
226 break;
227
228 default:
229 panic("%s: unknown policy %d\n", __func__, native_policy);
230 }
231
232 if (native_params != NULL) {
233 int prio = native_params->sched_priority;
234
235 KASSERT(prio >= SCHED_PRI_MIN);
236 KASSERT(prio <= SCHED_PRI_MAX);
237 KASSERT(linux_params != NULL);
238
239 #ifdef DEBUG_LINUX
240 printf("native2linux: native: policy %d, priority %d\n",
241 native_policy, prio);
242 #endif
243
244 if (native_policy == SCHED_OTHER) {
245 linux_params->sched_priority = 0;
246 } else {
247 linux_params->sched_priority =
248 (prio - SCHED_PRI_MIN)
249 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
250 / (SCHED_PRI_MAX - SCHED_PRI_MIN)
251 + LINUX_SCHED_RTPRIO_MIN;
252 }
253 #ifdef DEBUG_LINUX
254 printf("native2linux: linux: policy %d, priority %d\n",
255 -1, linux_params->sched_priority);
256 #endif
257 }
258
259 return 0;
260 }
261
262 int
263 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
264 {
265 /* {
266 syscallarg(linux_pid_t) pid;
267 syscallarg(const struct linux_sched_param *) sp;
268 } */
269 int error, policy;
270 struct linux_sched_param lp;
271 struct sched_param sp;
272
273 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
274 error = EINVAL;
275 goto out;
276 }
277
278 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
279 if (error)
280 goto out;
281
282 /* We need the current policy in Linux terms. */
283 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
284 if (error)
285 goto out;
286 error = sched_native2linux(policy, NULL, &policy, NULL);
287 if (error)
288 goto out;
289
290 error = sched_linux2native(policy, &lp, &policy, &sp);
291 if (error)
292 goto out;
293
294 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
295 if (error)
296 goto out;
297
298 out:
299 return error;
300 }
301
302 int
303 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
304 {
305 /* {
306 syscallarg(linux_pid_t) pid;
307 syscallarg(struct linux_sched_param *) sp;
308 } */
309 struct linux_sched_param lp;
310 struct sched_param sp;
311 int error, policy;
312
313 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
314 error = EINVAL;
315 goto out;
316 }
317
318 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
319 if (error)
320 goto out;
321 #ifdef DEBUG_LINUX
322 printf("getparam: native: policy %d, priority %d\n",
323 policy, sp.sched_priority);
324 #endif
325
326 error = sched_native2linux(policy, &sp, NULL, &lp);
327 if (error)
328 goto out;
329 #ifdef DEBUG_LINUX
330 printf("getparam: linux: policy %d, priority %d\n",
331 policy, lp.sched_priority);
332 #endif
333
334 error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
335 if (error)
336 goto out;
337
338 out:
339 return error;
340 }
341
342 int
343 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
344 {
345 /* {
346 syscallarg(linux_pid_t) pid;
347 syscallarg(int) policy;
348 syscallarg(cont struct linux_sched_scheduler *) sp;
349 } */
350 int error, policy;
351 struct linux_sched_param lp;
352 struct sched_param sp;
353
354 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
355 error = EINVAL;
356 goto out;
357 }
358
359 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
360 if (error)
361 goto out;
362 #ifdef DEBUG_LINUX
363 printf("setscheduler: linux: policy %d, priority %d\n",
364 SCARG(uap, policy), lp.sched_priority);
365 #endif
366
367 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
368 if (error)
369 goto out;
370 #ifdef DEBUG_LINUX
371 printf("setscheduler: native: policy %d, priority %d\n",
372 policy, sp.sched_priority);
373 #endif
374
375 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
376 if (error)
377 goto out;
378
379 out:
380 return error;
381 }
382
383 int
384 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
385 {
386 /* {
387 syscallarg(linux_pid_t) pid;
388 } */
389 int error, policy;
390
391 *retval = -1;
392
393 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
394 if (error)
395 goto out;
396
397 error = sched_native2linux(policy, NULL, &policy, NULL);
398 if (error)
399 goto out;
400
401 *retval = policy;
402
403 out:
404 return error;
405 }
406
407 int
408 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
409 {
410
411 yield();
412 return 0;
413 }
414
415 int
416 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
417 {
418 /* {
419 syscallarg(int) policy;
420 } */
421
422 switch (SCARG(uap, policy)) {
423 case LINUX_SCHED_OTHER:
424 *retval = 0;
425 break;
426 case LINUX_SCHED_FIFO:
427 case LINUX_SCHED_RR:
428 *retval = LINUX_SCHED_RTPRIO_MAX;
429 break;
430 default:
431 return EINVAL;
432 }
433
434 return 0;
435 }
436
437 int
438 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
439 {
440 /* {
441 syscallarg(int) policy;
442 } */
443
444 switch (SCARG(uap, policy)) {
445 case LINUX_SCHED_OTHER:
446 *retval = 0;
447 break;
448 case LINUX_SCHED_FIFO:
449 case LINUX_SCHED_RR:
450 *retval = LINUX_SCHED_RTPRIO_MIN;
451 break;
452 default:
453 return EINVAL;
454 }
455
456 return 0;
457 }
458
459 #ifndef __m68k__
460 /* Present on everything but m68k */
461 int
462 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
463 {
464 #ifdef LINUX_NPTL
465 /* {
466 syscallarg(int) error_code;
467 } */
468 struct proc *p = l->l_proc;
469 struct linux_emuldata *led = p->p_emuldata;
470 struct linux_emuldata *e;
471
472 if (led->s->flags & LINUX_LES_USE_NPTL) {
473
474 #ifdef DEBUG_LINUX
475 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
476 led->s->refs);
477 #endif
478
479 /*
480 * The calling thread is supposed to kill all threads
481 * in the same thread group (i.e. all threads created
482 * via clone(2) with CLONE_THREAD flag set).
483 *
484 * If there is only one thread, things are quite simple
485 */
486 if (led->s->refs == 1)
487 return sys_exit(l, (const void *)uap, retval);
488
489 #ifdef DEBUG_LINUX
490 printf("%s:%d\n", __func__, __LINE__);
491 #endif
492
493 mutex_enter(proc_lock);
494 led->s->flags |= LINUX_LES_INEXITGROUP;
495 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
496
497 /*
498 * Kill all threads in the group. The emulation exit hook takes
499 * care of hiding the zombies and reporting the exit code
500 * properly.
501 */
502 LIST_FOREACH(e, &led->s->threads, threads) {
503 if (e->proc == p)
504 continue;
505
506 #ifdef DEBUG_LINUX
507 printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
508 #endif
509 psignal(e->proc, SIGKILL);
510 }
511
512 /* Now, kill ourselves */
513 psignal(p, SIGKILL);
514 mutex_exit(proc_lock);
515
516 return 0;
517
518 }
519 #endif /* LINUX_NPTL */
520
521 return sys_exit(l, (const void *)uap, retval);
522 }
523 #endif /* !__m68k__ */
524
525 #ifdef LINUX_NPTL
526 int
527 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
528 {
529 /* {
530 syscallarg(int *) tidptr;
531 } */
532 struct linux_emuldata *led;
533
534 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
535 led->clear_tid = SCARG(uap, tid);
536
537 led->s->flags |= LINUX_LES_USE_NPTL;
538
539 *retval = l->l_proc->p_pid;
540
541 return 0;
542 }
543
544 /* ARGUSED1 */
545 int
546 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
547 {
548 /* The Linux kernel does it exactly that way */
549 *retval = l->l_proc->p_pid;
550 return 0;
551 }
552
553 #ifdef LINUX_NPTL
554 /* ARGUSED1 */
555 int
556 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
557 {
558 struct linux_emuldata *led = l->l_proc->p_emuldata;
559
560 if (led->s->flags & LINUX_LES_USE_NPTL) {
561 /* The Linux kernel does it exactly that way */
562 *retval = led->s->group_pid;
563 } else {
564 *retval = l->l_proc->p_pid;
565 }
566
567 return 0;
568 }
569
570 /* ARGUSED1 */
571 int
572 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
573 {
574 struct proc *p = l->l_proc;
575 struct linux_emuldata *led = p->p_emuldata;
576 struct proc *glp;
577 struct proc *pp;
578
579 mutex_enter(proc_lock);
580 if (led->s->flags & LINUX_LES_USE_NPTL) {
581
582 /* Find the thread group leader's parent */
583 if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) {
584 /* Maybe panic... */
585 printf("linux_sys_getppid: missing group leader PID"
586 " %d\n", led->s->group_pid);
587 mutex_exit(proc_lock);
588 return -1;
589 }
590 pp = glp->p_pptr;
591
592 /* If this is a Linux process too, return thread group PID */
593 if (pp->p_emul == p->p_emul) {
594 struct linux_emuldata *pled;
595
596 pled = pp->p_emuldata;
597 *retval = pled->s->group_pid;
598 } else {
599 *retval = pp->p_pid;
600 }
601
602 } else {
603 *retval = p->p_pptr->p_pid;
604 }
605 mutex_exit(proc_lock);
606
607 return 0;
608 }
609 #endif /* LINUX_NPTL */
610
611 int
612 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
613 {
614 /* {
615 syscallarg(pid_t) pid;
616 syscallarg(unsigned int) len;
617 syscallarg(unsigned long *) mask;
618 } */
619 int error;
620 int ret;
621 char *data;
622 int *retp;
623
624 if (SCARG(uap, mask) == NULL)
625 return EINVAL;
626
627 if (SCARG(uap, len) < sizeof(int))
628 return EINVAL;
629
630 if (pfind(SCARG(uap, pid)) == NULL)
631 return ESRCH;
632
633 /*
634 * return the actual number of CPU, tag all of them as available
635 * The result is a mask, the first CPU being in the least significant
636 * bit.
637 */
638 ret = (1 << ncpu) - 1;
639 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
640 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
641 *retp = ret;
642
643 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
644 return error;
645
646 free(data, M_TEMP);
647
648 return 0;
649
650 }
651
652 int
653 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
654 {
655 /* {
656 syscallarg(pid_t) pid;
657 syscallarg(unsigned int) len;
658 syscallarg(unsigned long *) mask;
659 } */
660
661 if (pfind(SCARG(uap, pid)) == NULL)
662 return ESRCH;
663
664 /* Let's ignore it */
665 #ifdef DEBUG_LINUX
666 printf("linux_sys_sched_setaffinity\n");
667 #endif
668 return 0;
669 };
670 #endif /* LINUX_NPTL */
671