linux_sched.c revision 1.44 1 /* $NetBSD: linux_sched.c,v 1.44 2007/10/19 18:52:11 njoly Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Linux compatibility module. Try to deal with scheduler related syscalls.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.44 2007/10/19 18:52:11 njoly Exp $");
46
47 #include <sys/param.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/malloc.h>
53 #include <sys/syscallargs.h>
54 #include <sys/wait.h>
55 #include <sys/kauth.h>
56 #include <sys/ptrace.h>
57
58 #include <sys/cpu.h>
59
60 #include <compat/linux/common/linux_types.h>
61 #include <compat/linux/common/linux_signal.h>
62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
63 #include <compat/linux/common/linux_emuldata.h>
64 #include <compat/linux/common/linux_ipc.h>
65 #include <compat/linux/common/linux_sem.h>
66
67 #include <compat/linux/linux_syscallargs.h>
68
69 #include <compat/linux/common/linux_sched.h>
70
71 int
72 linux_sys_clone(l, v, retval)
73 struct lwp *l;
74 void *v;
75 register_t *retval;
76 {
77 struct linux_sys_clone_args /* {
78 syscallarg(int) flags;
79 syscallarg(void *) stack;
80 #ifdef LINUX_NPTL
81 syscallarg(void *) parent_tidptr;
82 syscallarg(void *) child_tidptr;
83 #endif
84 } */ *uap = v;
85 int flags, sig;
86 int error;
87 #ifdef LINUX_NPTL
88 struct linux_emuldata *led;
89 #endif
90
91 /*
92 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
93 */
94 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
95 return (EINVAL);
96
97 /*
98 * Thread group implies shared signals. Shared signals
99 * imply shared VM. This matches what Linux kernel does.
100 */
101 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
102 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
103 return (EINVAL);
104 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
105 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
106 return (EINVAL);
107
108 flags = 0;
109
110 if (SCARG(uap, flags) & LINUX_CLONE_VM)
111 flags |= FORK_SHAREVM;
112 if (SCARG(uap, flags) & LINUX_CLONE_FS)
113 flags |= FORK_SHARECWD;
114 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
115 flags |= FORK_SHAREFILES;
116 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
117 flags |= FORK_SHARESIGS;
118 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
119 flags |= FORK_PPWAIT;
120
121 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
122 if (sig < 0 || sig >= LINUX__NSIG)
123 return (EINVAL);
124 sig = linux_to_native_signo[sig];
125
126 #ifdef LINUX_NPTL
127 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
128
129 led->parent_tidptr = SCARG(uap, parent_tidptr);
130 led->child_tidptr = SCARG(uap, child_tidptr);
131 led->clone_flags = SCARG(uap, flags);
132 #endif /* LINUX_NPTL */
133
134 /*
135 * Note that Linux does not provide a portable way of specifying
136 * the stack area; the caller must know if the stack grows up
137 * or down. So, we pass a stack size of 0, so that the code
138 * that makes this adjustment is a noop.
139 */
140 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
141 NULL, NULL, retval, NULL)) != 0)
142 return error;
143
144 return 0;
145 }
146
147 int
148 linux_sys_sched_setparam(struct lwp *cl, void *v, register_t *retval)
149 {
150 struct linux_sys_sched_setparam_args /* {
151 syscallarg(linux_pid_t) pid;
152 syscallarg(const struct linux_sched_param *) sp;
153 } */ *uap = v;
154 int error;
155 struct linux_sched_param lp;
156 struct proc *p;
157
158 /*
159 * We only check for valid parameters and return afterwards.
160 */
161
162 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
163 return EINVAL;
164
165 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
166 if (error)
167 return error;
168
169 if (SCARG(uap, pid) != 0) {
170 kauth_cred_t pc = cl->l_cred;
171
172 if ((p = pfind(SCARG(uap, pid))) == NULL)
173 return ESRCH;
174 if (!(cl->l_proc == p ||
175 kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
176 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
177 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
178 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
179 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
180 return EPERM;
181 }
182
183 return 0;
184 }
185
186 int
187 linux_sys_sched_getparam(struct lwp *cl, void *v, register_t *retval)
188 {
189 struct linux_sys_sched_getparam_args /* {
190 syscallarg(linux_pid_t) pid;
191 syscallarg(struct linux_sched_param *) sp;
192 } */ *uap = v;
193 struct proc *p;
194 struct linux_sched_param lp;
195
196 /*
197 * We only check for valid parameters and return a dummy priority afterwards.
198 */
199 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
200 return EINVAL;
201
202 if (SCARG(uap, pid) != 0) {
203 kauth_cred_t pc = cl->l_cred;
204
205 if ((p = pfind(SCARG(uap, pid))) == NULL)
206 return ESRCH;
207 if (!(cl->l_proc == p ||
208 kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
209 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
210 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
211 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
212 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
213 return EPERM;
214 }
215
216 lp.sched_priority = 0;
217 return copyout(&lp, SCARG(uap, sp), sizeof(lp));
218 }
219
220 int
221 linux_sys_sched_setscheduler(struct lwp *cl, void *v,
222 register_t *retval)
223 {
224 struct linux_sys_sched_setscheduler_args /* {
225 syscallarg(linux_pid_t) pid;
226 syscallarg(int) policy;
227 syscallarg(cont struct linux_sched_scheduler *) sp;
228 } */ *uap = v;
229 int error;
230 struct linux_sched_param lp;
231 struct proc *p;
232
233 /*
234 * We only check for valid parameters and return afterwards.
235 */
236
237 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
238 return EINVAL;
239
240 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
241 if (error)
242 return error;
243
244 if (SCARG(uap, pid) != 0) {
245 kauth_cred_t pc = cl->l_cred;
246
247 if ((p = pfind(SCARG(uap, pid))) == NULL)
248 return ESRCH;
249 if (!(cl->l_proc == p ||
250 kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
251 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
252 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
253 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
254 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
255 return EPERM;
256 }
257
258 return 0;
259 /*
260 * We can't emulate anything put the default scheduling policy.
261 */
262 if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0)
263 return EINVAL;
264
265 return 0;
266 }
267
268 int
269 linux_sys_sched_getscheduler(cl, v, retval)
270 struct lwp *cl;
271 void *v;
272 register_t *retval;
273 {
274 struct linux_sys_sched_getscheduler_args /* {
275 syscallarg(linux_pid_t) pid;
276 } */ *uap = v;
277 struct proc *p;
278
279 *retval = -1;
280 /*
281 * We only check for valid parameters and return afterwards.
282 */
283
284 if (SCARG(uap, pid) != 0) {
285 kauth_cred_t pc = cl->l_cred;
286
287 if ((p = pfind(SCARG(uap, pid))) == NULL)
288 return ESRCH;
289 if (!(cl->l_proc == p ||
290 kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
291 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
292 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
293 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
294 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
295 return EPERM;
296 }
297
298 /*
299 * We can't emulate anything put the default scheduling policy.
300 */
301 *retval = LINUX_SCHED_OTHER;
302 return 0;
303 }
304
305 int
306 linux_sys_sched_yield(struct lwp *cl, void *v,
307 register_t *retval)
308 {
309
310 yield();
311 return 0;
312 }
313
314 int
315 linux_sys_sched_get_priority_max(struct lwp *cl, void *v,
316 register_t *retval)
317 {
318 struct linux_sys_sched_get_priority_max_args /* {
319 syscallarg(int) policy;
320 } */ *uap = v;
321
322 /*
323 * We can't emulate anything put the default scheduling policy.
324 */
325 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
326 *retval = -1;
327 return EINVAL;
328 }
329
330 *retval = 0;
331 return 0;
332 }
333
334 int
335 linux_sys_sched_get_priority_min(struct lwp *cl, void *v,
336 register_t *retval)
337 {
338 struct linux_sys_sched_get_priority_min_args /* {
339 syscallarg(int) policy;
340 } */ *uap = v;
341
342 /*
343 * We can't emulate anything put the default scheduling policy.
344 */
345 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
346 *retval = -1;
347 return EINVAL;
348 }
349
350 *retval = 0;
351 return 0;
352 }
353
354 #ifndef __m68k__
355 /* Present on everything but m68k */
356 int
357 linux_sys_exit_group(l, v, retval)
358 struct lwp *l;
359 void *v;
360 register_t *retval;
361 {
362 #ifdef LINUX_NPTL
363 struct linux_sys_exit_group_args /* {
364 syscallarg(int) error_code;
365 } */ *uap = v;
366 struct proc *p = l->l_proc;
367 struct linux_emuldata *led = p->p_emuldata;
368 struct linux_emuldata *e;
369
370 if (led->s->flags & LINUX_LES_USE_NPTL) {
371
372 #ifdef DEBUG_LINUX
373 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
374 led->s->refs);
375 #endif
376
377 /*
378 * The calling thread is supposed to kill all threads
379 * in the same thread group (i.e. all threads created
380 * via clone(2) with CLONE_THREAD flag set).
381 *
382 * If there is only one thread, things are quite simple
383 */
384 if (led->s->refs == 1)
385 return sys_exit(l, v, retval);
386
387 #ifdef DEBUG_LINUX
388 printf("%s:%d\n", __func__, __LINE__);
389 #endif
390
391 led->s->flags |= LINUX_LES_INEXITGROUP;
392 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
393
394 /*
395 * Kill all threads in the group. The emulation exit hook takes
396 * care of hiding the zombies and reporting the exit code
397 * properly.
398 */
399 mutex_enter(&proclist_mutex);
400 LIST_FOREACH(e, &led->s->threads, threads) {
401 if (e->proc == p)
402 continue;
403
404 #ifdef DEBUG_LINUX
405 printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
406 #endif
407 psignal(e->proc, SIGKILL);
408 }
409
410 /* Now, kill ourselves */
411 psignal(p, SIGKILL);
412 mutex_exit(&proclist_mutex);
413
414 return 0;
415
416 }
417 #endif /* LINUX_NPTL */
418
419 return sys_exit(l, v, retval);
420 }
421 #endif /* !__m68k__ */
422
423 #ifdef LINUX_NPTL
424 int
425 linux_sys_set_tid_address(l, v, retval)
426 struct lwp *l;
427 void *v;
428 register_t *retval;
429 {
430 struct linux_sys_set_tid_address_args /* {
431 syscallarg(int *) tidptr;
432 } */ *uap = v;
433 struct linux_emuldata *led;
434
435 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
436 led->clear_tid = SCARG(uap, tid);
437
438 led->s->flags |= LINUX_LES_USE_NPTL;
439
440 *retval = l->l_proc->p_pid;
441
442 return 0;
443 }
444
445 /* ARGUSED1 */
446 int
447 linux_sys_gettid(l, v, retval)
448 struct lwp *l;
449 void *v;
450 register_t *retval;
451 {
452 /* The Linux kernel does it exactly that way */
453 *retval = l->l_proc->p_pid;
454 return 0;
455 }
456
457 #ifdef LINUX_NPTL
458 /* ARGUSED1 */
459 int
460 linux_sys_getpid(l, v, retval)
461 struct lwp *l;
462 void *v;
463 register_t *retval;
464 {
465 struct linux_emuldata *led = l->l_proc->p_emuldata;
466
467 if (led->s->flags & LINUX_LES_USE_NPTL) {
468 /* The Linux kernel does it exactly that way */
469 *retval = led->s->group_pid;
470 } else {
471 *retval = l->l_proc->p_pid;
472 }
473
474 return 0;
475 }
476
477 /* ARGUSED1 */
478 int
479 linux_sys_getppid(l, v, retval)
480 struct lwp *l;
481 void *v;
482 register_t *retval;
483 {
484 struct proc *p = l->l_proc;
485 struct linux_emuldata *led = p->p_emuldata;
486 struct proc *glp;
487 struct proc *pp;
488
489 if (led->s->flags & LINUX_LES_USE_NPTL) {
490
491 /* Find the thread group leader's parent */
492 if ((glp = pfind(led->s->group_pid)) == NULL) {
493 /* Maybe panic... */
494 printf("linux_sys_getppid: missing group leader PID"
495 " %d\n", led->s->group_pid);
496 return -1;
497 }
498 pp = glp->p_pptr;
499
500 /* If this is a Linux process too, return thread group PID */
501 if (pp->p_emul == p->p_emul) {
502 struct linux_emuldata *pled;
503
504 pled = pp->p_emuldata;
505 *retval = pled->s->group_pid;
506 } else {
507 *retval = pp->p_pid;
508 }
509
510 } else {
511 *retval = p->p_pptr->p_pid;
512 }
513
514 return 0;
515 }
516 #endif /* LINUX_NPTL */
517
518 int
519 linux_sys_sched_getaffinity(l, v, retval)
520 struct lwp *l;
521 void *v;
522 register_t *retval;
523 {
524 struct linux_sys_sched_getaffinity_args /* {
525 syscallarg(pid_t) pid;
526 syscallarg(unsigned int) len;
527 syscallarg(unsigned long *) mask;
528 } */ *uap = v;
529 int error;
530 int ret;
531 char *data;
532 int *retp;
533
534 if (SCARG(uap, mask) == NULL)
535 return EINVAL;
536
537 if (SCARG(uap, len) < sizeof(int))
538 return EINVAL;
539
540 if (pfind(SCARG(uap, pid)) == NULL)
541 return ESRCH;
542
543 /*
544 * return the actual number of CPU, tag all of them as available
545 * The result is a mask, the first CPU being in the least significant
546 * bit.
547 */
548 ret = (1 << ncpu) - 1;
549 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
550 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
551 *retp = ret;
552
553 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
554 return error;
555
556 free(data, M_TEMP);
557
558 return 0;
559
560 }
561
562 int
563 linux_sys_sched_setaffinity(l, v, retval)
564 struct lwp *l;
565 void *v;
566 register_t *retval;
567 {
568 struct linux_sys_sched_setaffinity_args /* {
569 syscallarg(pid_t) pid;
570 syscallarg(unsigned int) len;
571 syscallarg(unsigned long *) mask;
572 } */ *uap = v;
573
574 if (pfind(SCARG(uap, pid)) == NULL)
575 return ESRCH;
576
577 /* Let's ignore it */
578 #ifdef DEBUG_LINUX
579 printf("linux_sys_sched_setaffinity\n");
580 #endif
581 return 0;
582 };
583 #endif /* LINUX_NPTL */
584