linux_sched.c revision 1.19.2.8 1 /* $NetBSD: linux_sched.c,v 1.19.2.8 2008/03/17 09:14:36 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Linux compatibility module. Try to deal with scheduler related syscalls.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.19.2.8 2008/03/17 09:14:36 yamt Exp $");
46
47 #include <sys/param.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/malloc.h>
53 #include <sys/syscallargs.h>
54 #include <sys/wait.h>
55 #include <sys/kauth.h>
56 #include <sys/ptrace.h>
57
58 #include <sys/cpu.h>
59
60 #include <compat/linux/common/linux_types.h>
61 #include <compat/linux/common/linux_signal.h>
62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
63 #include <compat/linux/common/linux_emuldata.h>
64 #include <compat/linux/common/linux_ipc.h>
65 #include <compat/linux/common/linux_sem.h>
66
67 #include <compat/linux/linux_syscallargs.h>
68
69 #include <compat/linux/common/linux_sched.h>
70
71 int
72 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
73 {
74 /* {
75 syscallarg(int) flags;
76 syscallarg(void *) stack;
77 #ifdef LINUX_NPTL
78 syscallarg(void *) parent_tidptr;
79 syscallarg(void *) child_tidptr;
80 #endif
81 } */
82 int flags, sig;
83 int error;
84 #ifdef LINUX_NPTL
85 struct linux_emuldata *led;
86 #endif
87
88 /*
89 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
90 */
91 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
92 return (EINVAL);
93
94 /*
95 * Thread group implies shared signals. Shared signals
96 * imply shared VM. This matches what Linux kernel does.
97 */
98 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
99 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
100 return (EINVAL);
101 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
102 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
103 return (EINVAL);
104
105 flags = 0;
106
107 if (SCARG(uap, flags) & LINUX_CLONE_VM)
108 flags |= FORK_SHAREVM;
109 if (SCARG(uap, flags) & LINUX_CLONE_FS)
110 flags |= FORK_SHARECWD;
111 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
112 flags |= FORK_SHAREFILES;
113 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
114 flags |= FORK_SHARESIGS;
115 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
116 flags |= FORK_PPWAIT;
117
118 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
119 if (sig < 0 || sig >= LINUX__NSIG)
120 return (EINVAL);
121 sig = linux_to_native_signo[sig];
122
123 #ifdef LINUX_NPTL
124 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
125
126 led->parent_tidptr = SCARG(uap, parent_tidptr);
127 led->child_tidptr = SCARG(uap, child_tidptr);
128 led->clone_flags = SCARG(uap, flags);
129 #endif /* LINUX_NPTL */
130
131 /*
132 * Note that Linux does not provide a portable way of specifying
133 * the stack area; the caller must know if the stack grows up
134 * or down. So, we pass a stack size of 0, so that the code
135 * that makes this adjustment is a noop.
136 */
137 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
138 NULL, NULL, retval, NULL)) != 0)
139 return error;
140
141 return 0;
142 }
143
144 /*
145 * linux realtime priority
146 *
147 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
148 *
149 * - SCHED_OTHER tasks don't have realtime priorities.
150 * in particular, sched_param::sched_priority is always 0.
151 */
152
153 #define LINUX_SCHED_RTPRIO_MIN 1
154 #define LINUX_SCHED_RTPRIO_MAX 99
155
156 static int
157 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
158 int *native_policy, struct sched_param *native_params)
159 {
160
161 switch (linux_policy) {
162 case LINUX_SCHED_OTHER:
163 if (native_policy != NULL) {
164 *native_policy = SCHED_OTHER;
165 }
166 break;
167
168 case LINUX_SCHED_FIFO:
169 if (native_policy != NULL) {
170 *native_policy = SCHED_FIFO;
171 }
172 break;
173
174 case LINUX_SCHED_RR:
175 if (native_policy != NULL) {
176 *native_policy = SCHED_RR;
177 }
178 break;
179
180 default:
181 return EINVAL;
182 }
183
184 if (linux_params != NULL) {
185 int prio = linux_params->sched_priority;
186
187 KASSERT(native_params != NULL);
188
189 if (linux_policy == LINUX_SCHED_OTHER) {
190 if (prio != 0) {
191 return EINVAL;
192 }
193 native_params->sched_priority = PRI_NONE; /* XXX */
194 } else {
195 if (prio < LINUX_SCHED_RTPRIO_MIN ||
196 prio > LINUX_SCHED_RTPRIO_MAX) {
197 return EINVAL;
198 }
199 native_params->sched_priority =
200 (prio - LINUX_SCHED_RTPRIO_MIN)
201 * (SCHED_PRI_MAX - SCHED_PRI_MIN)
202 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
203 + SCHED_PRI_MIN;
204 }
205 }
206
207 return 0;
208 }
209
210 static int
211 sched_native2linux(int native_policy, struct sched_param *native_params,
212 int *linux_policy, struct linux_sched_param *linux_params)
213 {
214
215 switch (native_policy) {
216 case SCHED_OTHER:
217 if (linux_policy != NULL) {
218 *linux_policy = LINUX_SCHED_OTHER;
219 }
220 break;
221
222 case SCHED_FIFO:
223 if (linux_policy != NULL) {
224 *linux_policy = LINUX_SCHED_FIFO;
225 }
226 break;
227
228 case SCHED_RR:
229 if (linux_policy != NULL) {
230 *linux_policy = LINUX_SCHED_RR;
231 }
232 break;
233
234 default:
235 panic("%s: unknown policy %d\n", __func__, native_policy);
236 }
237
238 if (native_params != NULL) {
239 int prio = native_params->sched_priority;
240
241 KASSERT(prio >= SCHED_PRI_MIN);
242 KASSERT(prio <= SCHED_PRI_MAX);
243 KASSERT(linux_params != NULL);
244
245 if (native_policy == SCHED_OTHER) {
246 linux_params->sched_priority = 0;
247 } else {
248 linux_params->sched_priority =
249 (prio - SCHED_PRI_MIN)
250 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
251 / (SCHED_PRI_MAX - SCHED_PRI_MIN)
252 + LINUX_SCHED_RTPRIO_MIN;
253 }
254 }
255
256 return 0;
257 }
258
259 int
260 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
261 {
262 /* {
263 syscallarg(linux_pid_t) pid;
264 syscallarg(const struct linux_sched_param *) sp;
265 } */
266 int error, policy;
267 struct linux_sched_param lp;
268 struct sched_param sp;
269
270 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
271 error = EINVAL;
272 goto out;
273 }
274
275 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
276 if (error)
277 goto out;
278
279 /* We need the current policy in Linux terms. */
280 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
281 if (error)
282 goto out;
283 error = sched_native2linux(policy, NULL, &policy, NULL);
284 if (error)
285 goto out;
286
287 error = sched_linux2native(policy, &lp, &policy, &sp);
288 if (error)
289 goto out;
290
291 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
292 if (error)
293 goto out;
294
295 out:
296 return error;
297 }
298
299 int
300 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
301 {
302 /* {
303 syscallarg(linux_pid_t) pid;
304 syscallarg(struct linux_sched_param *) sp;
305 } */
306 struct linux_sched_param lp;
307 struct sched_param sp;
308 int error, policy;
309
310 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
311 error = EINVAL;
312 goto out;
313 }
314
315 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
316 if (error)
317 goto out;
318
319 error = sched_native2linux(policy, &sp, NULL, &lp);
320 if (error)
321 goto out;
322
323 error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
324 if (error)
325 goto out;
326
327 out:
328 return error;
329 }
330
331 int
332 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
333 {
334 /* {
335 syscallarg(linux_pid_t) pid;
336 syscallarg(int) policy;
337 syscallarg(cont struct linux_sched_scheduler *) sp;
338 } */
339 int error, policy;
340 struct linux_sched_param lp;
341 struct sched_param sp;
342
343 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
344 error = EINVAL;
345 goto out;
346 }
347
348 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
349 if (error)
350 goto out;
351
352 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
353 if (error)
354 goto out;
355
356 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
357 if (error)
358 goto out;
359
360 out:
361 return error;
362 }
363
364 int
365 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
366 {
367 /* {
368 syscallarg(linux_pid_t) pid;
369 } */
370 int error, policy;
371
372 *retval = -1;
373
374 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
375 if (error)
376 goto out;
377
378 error = sched_native2linux(policy, NULL, &policy, NULL);
379 if (error)
380 goto out;
381
382 *retval = policy;
383
384 out:
385 return error;
386 }
387
388 int
389 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
390 {
391
392 yield();
393 return 0;
394 }
395
396 int
397 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
398 {
399 /* {
400 syscallarg(int) policy;
401 } */
402
403 /*
404 * We can't emulate anything put the default scheduling policy.
405 */
406 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
407 *retval = -1;
408 return EINVAL;
409 }
410
411 *retval = 0;
412 return 0;
413 }
414
415 int
416 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
417 {
418 /* {
419 syscallarg(int) policy;
420 } */
421
422 /*
423 * We can't emulate anything put the default scheduling policy.
424 */
425 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
426 *retval = -1;
427 return EINVAL;
428 }
429
430 *retval = 0;
431 return 0;
432 }
433
434 #ifndef __m68k__
435 /* Present on everything but m68k */
436 int
437 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
438 {
439 #ifdef LINUX_NPTL
440 /* {
441 syscallarg(int) error_code;
442 } */
443 struct proc *p = l->l_proc;
444 struct linux_emuldata *led = p->p_emuldata;
445 struct linux_emuldata *e;
446
447 if (led->s->flags & LINUX_LES_USE_NPTL) {
448
449 #ifdef DEBUG_LINUX
450 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
451 led->s->refs);
452 #endif
453
454 /*
455 * The calling thread is supposed to kill all threads
456 * in the same thread group (i.e. all threads created
457 * via clone(2) with CLONE_THREAD flag set).
458 *
459 * If there is only one thread, things are quite simple
460 */
461 if (led->s->refs == 1)
462 return sys_exit(l, (const void *)uap, retval);
463
464 #ifdef DEBUG_LINUX
465 printf("%s:%d\n", __func__, __LINE__);
466 #endif
467
468 led->s->flags |= LINUX_LES_INEXITGROUP;
469 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
470
471 /*
472 * Kill all threads in the group. The emulation exit hook takes
473 * care of hiding the zombies and reporting the exit code
474 * properly.
475 */
476 mutex_enter(&proclist_mutex);
477 LIST_FOREACH(e, &led->s->threads, threads) {
478 if (e->proc == p)
479 continue;
480
481 #ifdef DEBUG_LINUX
482 printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
483 #endif
484 psignal(e->proc, SIGKILL);
485 }
486
487 /* Now, kill ourselves */
488 psignal(p, SIGKILL);
489 mutex_exit(&proclist_mutex);
490
491 return 0;
492
493 }
494 #endif /* LINUX_NPTL */
495
496 return sys_exit(l, (const void *)uap, retval);
497 }
498 #endif /* !__m68k__ */
499
500 #ifdef LINUX_NPTL
501 int
502 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
503 {
504 /* {
505 syscallarg(int *) tidptr;
506 } */
507 struct linux_emuldata *led;
508
509 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
510 led->clear_tid = SCARG(uap, tid);
511
512 led->s->flags |= LINUX_LES_USE_NPTL;
513
514 *retval = l->l_proc->p_pid;
515
516 return 0;
517 }
518
519 /* ARGUSED1 */
520 int
521 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
522 {
523 /* The Linux kernel does it exactly that way */
524 *retval = l->l_proc->p_pid;
525 return 0;
526 }
527
528 #ifdef LINUX_NPTL
529 /* ARGUSED1 */
530 int
531 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
532 {
533 struct linux_emuldata *led = l->l_proc->p_emuldata;
534
535 if (led->s->flags & LINUX_LES_USE_NPTL) {
536 /* The Linux kernel does it exactly that way */
537 *retval = led->s->group_pid;
538 } else {
539 *retval = l->l_proc->p_pid;
540 }
541
542 return 0;
543 }
544
545 /* ARGUSED1 */
546 int
547 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
548 {
549 struct proc *p = l->l_proc;
550 struct linux_emuldata *led = p->p_emuldata;
551 struct proc *glp;
552 struct proc *pp;
553
554 if (led->s->flags & LINUX_LES_USE_NPTL) {
555
556 /* Find the thread group leader's parent */
557 if ((glp = pfind(led->s->group_pid)) == NULL) {
558 /* Maybe panic... */
559 printf("linux_sys_getppid: missing group leader PID"
560 " %d\n", led->s->group_pid);
561 return -1;
562 }
563 pp = glp->p_pptr;
564
565 /* If this is a Linux process too, return thread group PID */
566 if (pp->p_emul == p->p_emul) {
567 struct linux_emuldata *pled;
568
569 pled = pp->p_emuldata;
570 *retval = pled->s->group_pid;
571 } else {
572 *retval = pp->p_pid;
573 }
574
575 } else {
576 *retval = p->p_pptr->p_pid;
577 }
578
579 return 0;
580 }
581 #endif /* LINUX_NPTL */
582
583 int
584 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
585 {
586 /* {
587 syscallarg(pid_t) pid;
588 syscallarg(unsigned int) len;
589 syscallarg(unsigned long *) mask;
590 } */
591 int error;
592 int ret;
593 char *data;
594 int *retp;
595
596 if (SCARG(uap, mask) == NULL)
597 return EINVAL;
598
599 if (SCARG(uap, len) < sizeof(int))
600 return EINVAL;
601
602 if (pfind(SCARG(uap, pid)) == NULL)
603 return ESRCH;
604
605 /*
606 * return the actual number of CPU, tag all of them as available
607 * The result is a mask, the first CPU being in the least significant
608 * bit.
609 */
610 ret = (1 << ncpu) - 1;
611 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
612 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
613 *retp = ret;
614
615 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
616 return error;
617
618 free(data, M_TEMP);
619
620 return 0;
621
622 }
623
624 int
625 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
626 {
627 /* {
628 syscallarg(pid_t) pid;
629 syscallarg(unsigned int) len;
630 syscallarg(unsigned long *) mask;
631 } */
632
633 if (pfind(SCARG(uap, pid)) == NULL)
634 return ESRCH;
635
636 /* Let's ignore it */
637 #ifdef DEBUG_LINUX
638 printf("linux_sys_sched_setaffinity\n");
639 #endif
640 return 0;
641 };
642 #endif /* LINUX_NPTL */
643