linux_sched.c revision 1.56 1 /* $NetBSD: linux_sched.c,v 1.56 2008/05/05 02:29:31 jmcneill Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Linux compatibility module. Try to deal with scheduler related syscalls.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.56 2008/05/05 02:29:31 jmcneill Exp $");
39
40 #include <sys/param.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/sysctl.h>
45 #include <sys/malloc.h>
46 #include <sys/syscallargs.h>
47 #include <sys/wait.h>
48 #include <sys/kauth.h>
49 #include <sys/ptrace.h>
50
51 #include <sys/cpu.h>
52
53 #include <compat/linux/common/linux_types.h>
54 #include <compat/linux/common/linux_signal.h>
55 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
56 #include <compat/linux/common/linux_emuldata.h>
57 #include <compat/linux/common/linux_ipc.h>
58 #include <compat/linux/common/linux_sem.h>
59
60 #include <compat/linux/linux_syscallargs.h>
61
62 #include <compat/linux/common/linux_sched.h>
63
64 int
65 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
66 {
67 /* {
68 syscallarg(int) flags;
69 syscallarg(void *) stack;
70 #ifdef LINUX_NPTL
71 syscallarg(void *) parent_tidptr;
72 syscallarg(void *) child_tidptr;
73 #endif
74 } */
75 int flags, sig;
76 int error;
77 #ifdef LINUX_NPTL
78 struct linux_emuldata *led;
79 #endif
80
81 /*
82 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
83 */
84 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
85 return (EINVAL);
86
87 /*
88 * Thread group implies shared signals. Shared signals
89 * imply shared VM. This matches what Linux kernel does.
90 */
91 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
92 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
93 return (EINVAL);
94 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
95 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
96 return (EINVAL);
97
98 flags = 0;
99
100 if (SCARG(uap, flags) & LINUX_CLONE_VM)
101 flags |= FORK_SHAREVM;
102 if (SCARG(uap, flags) & LINUX_CLONE_FS)
103 flags |= FORK_SHARECWD;
104 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
105 flags |= FORK_SHAREFILES;
106 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
107 flags |= FORK_SHARESIGS;
108 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
109 flags |= FORK_PPWAIT;
110
111 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
112 if (sig < 0 || sig >= LINUX__NSIG)
113 return (EINVAL);
114 sig = linux_to_native_signo[sig];
115
116 #ifdef LINUX_NPTL
117 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
118
119 led->parent_tidptr = SCARG(uap, parent_tidptr);
120 led->child_tidptr = SCARG(uap, child_tidptr);
121 led->clone_flags = SCARG(uap, flags);
122 #endif /* LINUX_NPTL */
123
124 /*
125 * Note that Linux does not provide a portable way of specifying
126 * the stack area; the caller must know if the stack grows up
127 * or down. So, we pass a stack size of 0, so that the code
128 * that makes this adjustment is a noop.
129 */
130 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
131 NULL, NULL, retval, NULL)) != 0)
132 return error;
133
134 return 0;
135 }
136
137 /*
138 * linux realtime priority
139 *
140 * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
141 *
142 * - SCHED_OTHER tasks don't have realtime priorities.
143 * in particular, sched_param::sched_priority is always 0.
144 */
145
146 #define LINUX_SCHED_RTPRIO_MIN 1
147 #define LINUX_SCHED_RTPRIO_MAX 99
148
149 static int
150 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
151 int *native_policy, struct sched_param *native_params)
152 {
153
154 switch (linux_policy) {
155 case LINUX_SCHED_OTHER:
156 if (native_policy != NULL) {
157 *native_policy = SCHED_OTHER;
158 }
159 break;
160
161 case LINUX_SCHED_FIFO:
162 if (native_policy != NULL) {
163 *native_policy = SCHED_FIFO;
164 }
165 break;
166
167 case LINUX_SCHED_RR:
168 if (native_policy != NULL) {
169 *native_policy = SCHED_RR;
170 }
171 break;
172
173 default:
174 return EINVAL;
175 }
176
177 if (linux_params != NULL) {
178 int prio = linux_params->sched_priority;
179
180 KASSERT(native_params != NULL);
181
182 if (linux_policy == LINUX_SCHED_OTHER) {
183 if (prio != 0) {
184 return EINVAL;
185 }
186 native_params->sched_priority = PRI_NONE; /* XXX */
187 } else {
188 if (prio < LINUX_SCHED_RTPRIO_MIN ||
189 prio > LINUX_SCHED_RTPRIO_MAX) {
190 return EINVAL;
191 }
192 native_params->sched_priority =
193 (prio - LINUX_SCHED_RTPRIO_MIN)
194 * (SCHED_PRI_MAX - SCHED_PRI_MIN)
195 / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
196 + SCHED_PRI_MIN;
197 }
198 }
199
200 return 0;
201 }
202
203 static int
204 sched_native2linux(int native_policy, struct sched_param *native_params,
205 int *linux_policy, struct linux_sched_param *linux_params)
206 {
207
208 switch (native_policy) {
209 case SCHED_OTHER:
210 if (linux_policy != NULL) {
211 *linux_policy = LINUX_SCHED_OTHER;
212 }
213 break;
214
215 case SCHED_FIFO:
216 if (linux_policy != NULL) {
217 *linux_policy = LINUX_SCHED_FIFO;
218 }
219 break;
220
221 case SCHED_RR:
222 if (linux_policy != NULL) {
223 *linux_policy = LINUX_SCHED_RR;
224 }
225 break;
226
227 default:
228 panic("%s: unknown policy %d\n", __func__, native_policy);
229 }
230
231 if (native_params != NULL) {
232 int prio = native_params->sched_priority;
233
234 #if 0
235 KASSERT(prio >= SCHED_PRI_MIN);
236 KASSERT(prio <= SCHED_PRI_MAX);
237 KASSERT(linux_params != NULL);
238 #endif
239
240 #ifdef DEBUG_LINUX
241 printf("native2linux: native: policy %d, priority %d\n",
242 native_policy, prio);
243 #endif
244
245 if (native_policy == SCHED_OTHER) {
246 linux_params->sched_priority = 0;
247 } else {
248 linux_params->sched_priority =
249 (prio - SCHED_PRI_MIN)
250 * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
251 / (SCHED_PRI_MAX - SCHED_PRI_MIN)
252 + LINUX_SCHED_RTPRIO_MIN;
253 }
254 #ifdef DEBUG_LINUX
255 printf("native2linux: linux: policy %d, priority %d\n",
256 -1, linux_params->sched_priority);
257 #endif
258 }
259
260 return 0;
261 }
262
263 int
264 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
265 {
266 /* {
267 syscallarg(linux_pid_t) pid;
268 syscallarg(const struct linux_sched_param *) sp;
269 } */
270 int error, policy;
271 struct linux_sched_param lp;
272 struct sched_param sp;
273
274 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
275 error = EINVAL;
276 goto out;
277 }
278
279 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
280 if (error)
281 goto out;
282
283 /* We need the current policy in Linux terms. */
284 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
285 if (error)
286 goto out;
287 error = sched_native2linux(policy, NULL, &policy, NULL);
288 if (error)
289 goto out;
290
291 error = sched_linux2native(policy, &lp, &policy, &sp);
292 if (error)
293 goto out;
294
295 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
296 if (error)
297 goto out;
298
299 out:
300 return error;
301 }
302
303 int
304 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
305 {
306 /* {
307 syscallarg(linux_pid_t) pid;
308 syscallarg(struct linux_sched_param *) sp;
309 } */
310 struct linux_sched_param lp;
311 struct sched_param sp;
312 int error, policy;
313
314 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
315 error = EINVAL;
316 goto out;
317 }
318
319 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
320 if (error)
321 goto out;
322 #ifdef DEBUG_LINUX
323 printf("getparam: native: policy %d, priority %d\n",
324 policy, sp.sched_priority);
325 #endif
326
327 error = sched_native2linux(policy, &sp, NULL, &lp);
328 if (error)
329 goto out;
330 #ifdef DEBUG_LINUX
331 printf("getparam: linux: policy %d, priority %d\n",
332 policy, lp.sched_priority);
333 #endif
334
335 error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
336 if (error)
337 goto out;
338
339 out:
340 return error;
341 }
342
343 int
344 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
345 {
346 /* {
347 syscallarg(linux_pid_t) pid;
348 syscallarg(int) policy;
349 syscallarg(cont struct linux_sched_scheduler *) sp;
350 } */
351 int error, policy;
352 struct linux_sched_param lp;
353 struct sched_param sp;
354
355 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
356 error = EINVAL;
357 goto out;
358 }
359
360 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
361 if (error)
362 goto out;
363 #ifdef DEBUG_LINUX
364 printf("setscheduler: linux: policy %d, priority %d\n",
365 SCARG(uap, policy), lp.sched_priority);
366 #endif
367
368 error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
369 if (error)
370 goto out;
371 #ifdef DEBUG_LINUX
372 printf("setscheduler: native: policy %d, priority %d\n",
373 policy, sp.sched_priority);
374 #endif
375
376 error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
377 if (error)
378 goto out;
379
380 out:
381 return error;
382 }
383
384 int
385 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
386 {
387 /* {
388 syscallarg(linux_pid_t) pid;
389 } */
390 int error, policy;
391
392 *retval = -1;
393
394 error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
395 if (error)
396 goto out;
397
398 error = sched_native2linux(policy, NULL, &policy, NULL);
399 if (error)
400 goto out;
401
402 *retval = policy;
403
404 out:
405 return error;
406 }
407
408 int
409 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
410 {
411
412 yield();
413 return 0;
414 }
415
416 int
417 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
418 {
419 /* {
420 syscallarg(int) policy;
421 } */
422
423 switch (SCARG(uap, policy)) {
424 case LINUX_SCHED_OTHER:
425 *retval = 0;
426 break;
427 case LINUX_SCHED_FIFO:
428 case LINUX_SCHED_RR:
429 *retval = LINUX_SCHED_RTPRIO_MAX;
430 break;
431 default:
432 return EINVAL;
433 }
434
435 return 0;
436 }
437
438 int
439 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
440 {
441 /* {
442 syscallarg(int) policy;
443 } */
444
445 switch (SCARG(uap, policy)) {
446 case LINUX_SCHED_OTHER:
447 *retval = 0;
448 break;
449 case LINUX_SCHED_FIFO:
450 case LINUX_SCHED_RR:
451 *retval = LINUX_SCHED_RTPRIO_MIN;
452 break;
453 default:
454 return EINVAL;
455 }
456
457 return 0;
458 }
459
460 #ifndef __m68k__
461 /* Present on everything but m68k */
462 int
463 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
464 {
465 #ifdef LINUX_NPTL
466 /* {
467 syscallarg(int) error_code;
468 } */
469 struct proc *p = l->l_proc;
470 struct linux_emuldata *led = p->p_emuldata;
471 struct linux_emuldata *e;
472
473 if (led->s->flags & LINUX_LES_USE_NPTL) {
474
475 #ifdef DEBUG_LINUX
476 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
477 led->s->refs);
478 #endif
479
480 /*
481 * The calling thread is supposed to kill all threads
482 * in the same thread group (i.e. all threads created
483 * via clone(2) with CLONE_THREAD flag set).
484 *
485 * If there is only one thread, things are quite simple
486 */
487 if (led->s->refs == 1)
488 return sys_exit(l, (const void *)uap, retval);
489
490 #ifdef DEBUG_LINUX
491 printf("%s:%d\n", __func__, __LINE__);
492 #endif
493
494 mutex_enter(proc_lock);
495 led->s->flags |= LINUX_LES_INEXITGROUP;
496 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
497
498 /*
499 * Kill all threads in the group. The emulation exit hook takes
500 * care of hiding the zombies and reporting the exit code
501 * properly.
502 */
503 LIST_FOREACH(e, &led->s->threads, threads) {
504 if (e->proc == p)
505 continue;
506
507 #ifdef DEBUG_LINUX
508 printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
509 #endif
510 psignal(e->proc, SIGKILL);
511 }
512
513 /* Now, kill ourselves */
514 psignal(p, SIGKILL);
515 mutex_exit(proc_lock);
516
517 return 0;
518
519 }
520 #endif /* LINUX_NPTL */
521
522 return sys_exit(l, (const void *)uap, retval);
523 }
524 #endif /* !__m68k__ */
525
526 #ifdef LINUX_NPTL
527 int
528 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
529 {
530 /* {
531 syscallarg(int *) tidptr;
532 } */
533 struct linux_emuldata *led;
534
535 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
536 led->clear_tid = SCARG(uap, tid);
537
538 led->s->flags |= LINUX_LES_USE_NPTL;
539
540 *retval = l->l_proc->p_pid;
541
542 return 0;
543 }
544
545 /* ARGUSED1 */
546 int
547 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
548 {
549 /* The Linux kernel does it exactly that way */
550 *retval = l->l_proc->p_pid;
551 return 0;
552 }
553
554 #ifdef LINUX_NPTL
555 /* ARGUSED1 */
556 int
557 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
558 {
559 struct linux_emuldata *led = l->l_proc->p_emuldata;
560
561 if (led->s->flags & LINUX_LES_USE_NPTL) {
562 /* The Linux kernel does it exactly that way */
563 *retval = led->s->group_pid;
564 } else {
565 *retval = l->l_proc->p_pid;
566 }
567
568 return 0;
569 }
570
571 /* ARGUSED1 */
572 int
573 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
574 {
575 struct proc *p = l->l_proc;
576 struct linux_emuldata *led = p->p_emuldata;
577 struct proc *glp;
578 struct proc *pp;
579
580 mutex_enter(proc_lock);
581 if (led->s->flags & LINUX_LES_USE_NPTL) {
582
583 /* Find the thread group leader's parent */
584 if ((glp = p_find(led->s->group_pid, PFIND_LOCKED)) == NULL) {
585 /* Maybe panic... */
586 printf("linux_sys_getppid: missing group leader PID"
587 " %d\n", led->s->group_pid);
588 mutex_exit(proc_lock);
589 return -1;
590 }
591 pp = glp->p_pptr;
592
593 /* If this is a Linux process too, return thread group PID */
594 if (pp->p_emul == p->p_emul) {
595 struct linux_emuldata *pled;
596
597 pled = pp->p_emuldata;
598 *retval = pled->s->group_pid;
599 } else {
600 *retval = pp->p_pid;
601 }
602
603 } else {
604 *retval = p->p_pptr->p_pid;
605 }
606 mutex_exit(proc_lock);
607
608 return 0;
609 }
610 #endif /* LINUX_NPTL */
611
612 int
613 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
614 {
615 /* {
616 syscallarg(pid_t) pid;
617 syscallarg(unsigned int) len;
618 syscallarg(unsigned long *) mask;
619 } */
620 int error;
621 int ret;
622 char *data;
623 int *retp;
624
625 if (SCARG(uap, mask) == NULL)
626 return EINVAL;
627
628 if (SCARG(uap, len) < sizeof(int))
629 return EINVAL;
630
631 if (pfind(SCARG(uap, pid)) == NULL)
632 return ESRCH;
633
634 /*
635 * return the actual number of CPU, tag all of them as available
636 * The result is a mask, the first CPU being in the least significant
637 * bit.
638 */
639 ret = (1 << ncpu) - 1;
640 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
641 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
642 *retp = ret;
643
644 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
645 return error;
646
647 free(data, M_TEMP);
648
649 return 0;
650
651 }
652
653 int
654 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
655 {
656 /* {
657 syscallarg(pid_t) pid;
658 syscallarg(unsigned int) len;
659 syscallarg(unsigned long *) mask;
660 } */
661
662 if (pfind(SCARG(uap, pid)) == NULL)
663 return ESRCH;
664
665 /* Let's ignore it */
666 #ifdef DEBUG_LINUX
667 printf("linux_sys_sched_setaffinity\n");
668 #endif
669 return 0;
670 };
671 #endif /* LINUX_NPTL */
672