linux_sched.c revision 1.32 1 /* $NetBSD: linux_sched.c,v 1.32 2006/06/26 07:42:00 manu Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Linux compatibility module. Try to deal with scheduler related syscalls.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.32 2006/06/26 07:42:00 manu Exp $");
46
47 #include <sys/param.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/malloc.h>
53 #include <sys/sa.h>
54 #include <sys/syscallargs.h>
55 #include <sys/wait.h>
56 #include <sys/kauth.h>
57
58 #include <machine/cpu.h>
59
60 #include <compat/linux/common/linux_types.h>
61 #include <compat/linux/common/linux_signal.h>
62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
63 #include <compat/linux/common/linux_emuldata.h>
64
65 #include <compat/linux/linux_syscallargs.h>
66
67 #include <compat/linux/common/linux_sched.h>
68
69 int
70 linux_sys_clone(l, v, retval)
71 struct lwp *l;
72 void *v;
73 register_t *retval;
74 {
75 struct linux_sys_clone_args /* {
76 syscallarg(int) flags;
77 syscallarg(void *) stack;
78 #ifdef LINUX_NPTL
79 syscallarg(void *) parent_tidptr;
80 syscallarg(void *) child_tidptr;
81 #endif
82 } */ *uap = v;
83 int flags, sig;
84 int error;
85 #ifdef LINUX_NPTL
86 struct linux_emuldata *led;
87 #endif
88
89 /*
90 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
91 */
92 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
93 return (EINVAL);
94
95 /*
96 * Thread group implies shared signals. Shared signals
97 * imply shared VM. This matches what Linux kernel does.
98 */
99 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
100 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
101 return (EINVAL);
102 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
103 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
104 return (EINVAL);
105
106 flags = 0;
107
108 if (SCARG(uap, flags) & LINUX_CLONE_VM)
109 flags |= FORK_SHAREVM;
110 if (SCARG(uap, flags) & LINUX_CLONE_FS)
111 flags |= FORK_SHARECWD;
112 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
113 flags |= FORK_SHAREFILES;
114 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
115 flags |= FORK_SHARESIGS;
116 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
117 flags |= FORK_PPWAIT;
118
119 /* Thread should not issue a SIGCHLD on termination */
120 if (SCARG(uap, flags) & LINUX_CLONE_THREAD) {
121 sig = 0;
122 } else {
123 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
124 if (sig < 0 || sig >= LINUX__NSIG)
125 return (EINVAL);
126 sig = linux_to_native_signo[sig];
127 }
128
129 #ifdef LINUX_NPTL
130 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
131
132 if (SCARG(uap, flags) & LINUX_CLONE_PARENT_SETTID) {
133 if (SCARG(uap, parent_tidptr) == NULL) {
134 printf("linux_sys_clone: NULL parent_tidptr\n");
135 return EINVAL;
136 }
137
138 if ((error = copyout(&l->l_proc->p_pid,
139 SCARG(uap, parent_tidptr),
140 sizeof(l->l_proc->p_pid))) != 0)
141 return error;
142 }
143
144 /* CLONE_CHILD_CLEARTID: TID clear in the child on exit() */
145 if (SCARG(uap, flags) & LINUX_CLONE_CHILD_CLEARTID)
146 led->child_clear_tid = SCARG(uap, child_tidptr);
147 else
148 led->child_clear_tid = NULL;
149
150 /* CLONE_CHILD_SETTID: TID set in the child on clone() */
151 if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID)
152 led->child_set_tid = SCARG(uap, child_tidptr);
153 else
154 led->child_set_tid = NULL;
155
156 /* CLONE_SETTLS: new Thread Local Storage in the child */
157 if (SCARG(uap, flags) & LINUX_CLONE_SETTLS)
158 led->set_tls = linux_get_newtls(l);
159 else
160 led->set_tls = 0;
161 #endif /* LINUX_NPTL */
162 /*
163 * Note that Linux does not provide a portable way of specifying
164 * the stack area; the caller must know if the stack grows up
165 * or down. So, we pass a stack size of 0, so that the code
166 * that makes this adjustment is a noop.
167 */
168 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
169 NULL, NULL, retval, NULL)) != 0)
170 return error;
171
172 return 0;
173 }
174
175 int
176 linux_sys_sched_setparam(cl, v, retval)
177 struct lwp *cl;
178 void *v;
179 register_t *retval;
180 {
181 struct linux_sys_sched_setparam_args /* {
182 syscallarg(linux_pid_t) pid;
183 syscallarg(const struct linux_sched_param *) sp;
184 } */ *uap = v;
185 struct proc *cp = cl->l_proc;
186 int error;
187 struct linux_sched_param lp;
188 struct proc *p;
189
190 /*
191 * We only check for valid parameters and return afterwards.
192 */
193
194 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
195 return EINVAL;
196
197 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
198 if (error)
199 return error;
200
201 if (SCARG(uap, pid) != 0) {
202 kauth_cred_t pc = cp->p_cred;
203
204 if ((p = pfind(SCARG(uap, pid))) == NULL)
205 return ESRCH;
206 if (!(cp == p ||
207 kauth_cred_geteuid(pc) == 0 ||
208 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
209 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
210 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
211 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
212 return EPERM;
213 }
214
215 return 0;
216 }
217
218 int
219 linux_sys_sched_getparam(cl, v, retval)
220 struct lwp *cl;
221 void *v;
222 register_t *retval;
223 {
224 struct linux_sys_sched_getparam_args /* {
225 syscallarg(linux_pid_t) pid;
226 syscallarg(struct linux_sched_param *) sp;
227 } */ *uap = v;
228 struct proc *cp = cl->l_proc;
229 struct proc *p;
230 struct linux_sched_param lp;
231
232 /*
233 * We only check for valid parameters and return a dummy priority afterwards.
234 */
235 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
236 return EINVAL;
237
238 if (SCARG(uap, pid) != 0) {
239 kauth_cred_t pc = cp->p_cred;
240
241 if ((p = pfind(SCARG(uap, pid))) == NULL)
242 return ESRCH;
243 if (!(cp == p ||
244 kauth_cred_geteuid(pc) == 0 ||
245 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
246 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
247 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
248 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
249 return EPERM;
250 }
251
252 lp.sched_priority = 0;
253 return copyout(&lp, SCARG(uap, sp), sizeof(lp));
254 }
255
256 int
257 linux_sys_sched_setscheduler(cl, v, retval)
258 struct lwp *cl;
259 void *v;
260 register_t *retval;
261 {
262 struct linux_sys_sched_setscheduler_args /* {
263 syscallarg(linux_pid_t) pid;
264 syscallarg(int) policy;
265 syscallarg(cont struct linux_sched_scheduler *) sp;
266 } */ *uap = v;
267 struct proc *cp = cl->l_proc;
268 int error;
269 struct linux_sched_param lp;
270 struct proc *p;
271
272 /*
273 * We only check for valid parameters and return afterwards.
274 */
275
276 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
277 return EINVAL;
278
279 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
280 if (error)
281 return error;
282
283 if (SCARG(uap, pid) != 0) {
284 kauth_cred_t pc = cp->p_cred;
285
286 if ((p = pfind(SCARG(uap, pid))) == NULL)
287 return ESRCH;
288 if (!(cp == p ||
289 kauth_cred_geteuid(pc) == 0 ||
290 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
291 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
292 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
293 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
294 return EPERM;
295 }
296
297 /*
298 * We can't emulate anything put the default scheduling policy.
299 */
300 if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0)
301 return EINVAL;
302
303 return 0;
304 }
305
306 int
307 linux_sys_sched_getscheduler(cl, v, retval)
308 struct lwp *cl;
309 void *v;
310 register_t *retval;
311 {
312 struct linux_sys_sched_getscheduler_args /* {
313 syscallarg(linux_pid_t) pid;
314 } */ *uap = v;
315 struct proc *cp = cl->l_proc;
316 struct proc *p;
317
318 *retval = -1;
319 /*
320 * We only check for valid parameters and return afterwards.
321 */
322
323 if (SCARG(uap, pid) != 0) {
324 kauth_cred_t pc = cp->p_cred;
325
326 if ((p = pfind(SCARG(uap, pid))) == NULL)
327 return ESRCH;
328 if (!(cp == p ||
329 kauth_cred_geteuid(pc) == 0 ||
330 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
331 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
332 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
333 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
334 return EPERM;
335 }
336
337 /*
338 * We can't emulate anything put the default scheduling policy.
339 */
340 *retval = LINUX_SCHED_OTHER;
341 return 0;
342 }
343
344 int
345 linux_sys_sched_yield(cl, v, retval)
346 struct lwp *cl;
347 void *v;
348 register_t *retval;
349 {
350
351 yield();
352 return 0;
353 }
354
355 int
356 linux_sys_sched_get_priority_max(cl, v, retval)
357 struct lwp *cl;
358 void *v;
359 register_t *retval;
360 {
361 struct linux_sys_sched_get_priority_max_args /* {
362 syscallarg(int) policy;
363 } */ *uap = v;
364
365 /*
366 * We can't emulate anything put the default scheduling policy.
367 */
368 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
369 *retval = -1;
370 return EINVAL;
371 }
372
373 *retval = 0;
374 return 0;
375 }
376
377 int
378 linux_sys_sched_get_priority_min(cl, v, retval)
379 struct lwp *cl;
380 void *v;
381 register_t *retval;
382 {
383 struct linux_sys_sched_get_priority_min_args /* {
384 syscallarg(int) policy;
385 } */ *uap = v;
386
387 /*
388 * We can't emulate anything put the default scheduling policy.
389 */
390 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
391 *retval = -1;
392 return EINVAL;
393 }
394
395 *retval = 0;
396 return 0;
397 }
398
399 #ifndef __m68k__
400 /* Present on everything but m68k */
401 int
402 linux_sys_exit_group(l, v, retval)
403 struct lwp *l;
404 void *v;
405 register_t *retval;
406 {
407 struct linux_sys_exit_group_args /* {
408 syscallarg(int) error_code;
409 } */ *uap = v;
410 #ifdef LINUX_NPTL
411 struct proc *p = l->l_proc;
412 struct linux_emuldata *led = p->p_emuldata;
413 struct linux_emuldata *e;
414 struct lwp *sl;
415 struct proc *sp;
416 int s;
417
418 SCHED_LOCK(s);
419 /*
420 * The calling thread is supposed to kill all threads
421 * in the same thread group (i.e. all threads created
422 * via clone(2) with CLONE_THREAD flag set).
423 */
424 LIST_FOREACH(e, &led->s->threads, threads) {
425 sp = e->proc;
426
427 if (sp == p)
428 continue;
429 #ifdef DEBUG_LINUX
430 printf("linux_sys_exit_group: kill PID %d\n", sp->p_pid);
431 #endif
432 /* wakeup any waiter */
433 if (sp->p_sigctx.ps_sigwaited &&
434 sigismember(sp->p_sigctx.ps_sigwait, SIGKILL) &&
435 sp->p_stat != SSTOP) {
436 sched_wakeup(&sp->p_sigctx.ps_sigwait);
437 }
438
439 /* post SIGKILL */
440 sigaddset(&sp->p_sigctx.ps_siglist, SIGKILL);
441 sp->p_sigctx.ps_sigcheck = 1;
442
443 /* Unblock the process if sleeping or stopped */
444 switch(sp->p_stat) {
445 case SSTOP:
446 sl = proc_unstop(sp);
447 break;
448 case SACTIVE:
449 sl = proc_representative_lwp(sp);
450 break;
451 default:
452 sl = NULL;
453 break;
454 }
455
456 if (sl == NULL) {
457 printf("linux_sys_exit_group: no lwp for process %d\n",
458 sp->p_pid);
459 continue;
460 }
461
462 if (sl->l_priority > PUSER)
463 sl->l_priority = PUSER;
464
465 switch(sl->l_stat) {
466 case LSSUSPENDED:
467 lwp_continue(sl);
468 /* FALLTHROUGH */
469 case LSSTOP:
470 case LSSLEEP:
471 case LSIDL:
472 setrunnable(sl);
473 /* FALLTHROUGH */
474 default:
475 break;
476 }
477 }
478 SCHED_UNLOCK(s);
479 #endif /* LINUX_NPTL */
480
481 exit1(l, W_EXITCODE(SCARG(uap, error_code), 0));
482 /* NOTREACHED */
483 return 0;
484 }
485 #endif /* !__m68k__ */
486
487 #ifdef LINUX_NPTL
488 int
489 linux_sys_set_tid_address(l, v, retval)
490 struct lwp *l;
491 void *v;
492 register_t *retval;
493 {
494 struct linux_sys_set_tid_address_args /* {
495 syscallarg(int *) tidptr;
496 } */ *uap = v;
497 struct linux_emuldata *led;
498
499 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
500 led->clear_tid = SCARG(uap, tid);
501
502 *retval = l->l_proc->p_pid;
503
504 return 0;
505 }
506
507 /* ARGUSED1 */
508 int
509 linux_sys_gettid(l, v, retval)
510 struct lwp *l;
511 void *v;
512 register_t *retval;
513 {
514 /* The Linux kernel does it exactly that way */
515 *retval = l->l_proc->p_pid;
516 return 0;
517 }
518
519 #ifdef LINUX_NPTL
520 /* ARGUSED1 */
521 int
522 linux_sys_getpid(l, v, retval)
523 struct lwp *l;
524 void *v;
525 register_t *retval;
526 {
527 struct linux_emuldata *led;
528
529 led = l->l_proc->p_emuldata;
530
531 /* The Linux kernel does it exactly that way */
532 *retval = led->s->group_pid;
533
534 return 0;
535 }
536
537 /* ARGUSED1 */
538 int
539 linux_sys_getppid(l, v, retval)
540 struct lwp *l;
541 void *v;
542 register_t *retval;
543 {
544 struct proc *p = l->l_proc;
545 struct linux_emuldata *led = p->p_emuldata;
546 struct proc *glp;
547 struct proc *pp;
548
549 /* Find the thread group leader's parent */
550 if ((glp = pfind(led->s->group_pid)) == NULL) {
551 /* Maybe panic... */
552 printf("linux_sys_getppid: missing group leader PID %d\n",
553 led->s->group_pid);
554 return -1;
555 }
556 pp = glp->p_pptr;
557
558 /* If this is a Linux process too, return thread group PID */
559 if (pp->p_emul == p->p_emul) {
560 struct linux_emuldata *pled;
561
562 pled = pp->p_emuldata;
563 *retval = pled->s->group_pid;
564 } else {
565 *retval = pp->p_pid;
566 }
567
568 return 0;
569 }
570 #endif /* LINUX_NPTL */
571
572 int
573 linux_sys_sched_getaffinity(l, v, retval)
574 struct lwp *l;
575 void *v;
576 register_t *retval;
577 {
578 struct linux_sys_sched_getaffinity_args /* {
579 syscallarg(pid_t) pid;
580 syscallarg(unsigned int) len;
581 syscallarg(unsigned long *) mask;
582 } */ *uap = v;
583 int error;
584 int ret;
585 int ncpu;
586 int name[2];
587 size_t sz;
588 char *data;
589 int *retp;
590
591 if (SCARG(uap, mask) == NULL)
592 return EINVAL;
593
594 if (SCARG(uap, len) < sizeof(int))
595 return EINVAL;
596
597 if (pfind(SCARG(uap, pid)) == NULL)
598 return ESRCH;
599
600 /*
601 * return the actual number of CPU, tag all of them as available
602 * The result is a mask, the first CPU being in the least significant
603 * bit.
604 */
605 name[0] = CTL_HW;
606 name[1] = HW_NCPU;
607 sz = sizeof(ncpu);
608
609 if ((error = old_sysctl(&name[0], 2, &ncpu, &sz, NULL, 0, NULL)) != 0)
610 return error;
611
612 ret = (1 << ncpu) - 1;
613
614 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
615 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
616 *retp = ret;
617
618 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
619 return error;
620
621 free(data, M_TEMP);
622
623 return 0;
624
625 }
626
627 int
628 linux_sys_sched_setaffinity(l, v, retval)
629 struct lwp *l;
630 void *v;
631 register_t *retval;
632 {
633 struct linux_sys_sched_setaffinity_args /* {
634 syscallarg(pid_t) pid;
635 syscallarg(unsigned int) len;
636 syscallarg(unsigned long *) mask;
637 } */ *uap = v;
638
639 if (pfind(SCARG(uap, pid)) == NULL)
640 return ESRCH;
641
642 /* Let's ignore it */
643 #ifdef DEBUG_LINUX
644 printf("linux_sys_sched_setaffinity\n");
645 #endif
646 return 0;
647 };
648 #endif /* LINUX_NPTL */
649