linux_sched.c revision 1.29.8.4 1 /* $NetBSD: linux_sched.c,v 1.29.8.4 2006/09/03 15:23:41 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center; by Matthias Scheler.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Linux compatibility module. Try to deal with scheduler related syscalls.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.29.8.4 2006/09/03 15:23:41 yamt Exp $");
46
47 #include <sys/param.h>
48 #include <sys/mount.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/sysctl.h>
52 #include <sys/malloc.h>
53 #include <sys/sa.h>
54 #include <sys/syscallargs.h>
55 #include <sys/wait.h>
56 #include <sys/kauth.h>
57 #include <sys/ptrace.h>
58
59 #include <machine/cpu.h>
60
61 #include <compat/linux/common/linux_types.h>
62 #include <compat/linux/common/linux_signal.h>
63 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
64 #include <compat/linux/common/linux_emuldata.h>
65
66 #include <compat/linux/linux_syscallargs.h>
67
68 #include <compat/linux/common/linux_sched.h>
69
70 int
71 linux_sys_clone(l, v, retval)
72 struct lwp *l;
73 void *v;
74 register_t *retval;
75 {
76 struct linux_sys_clone_args /* {
77 syscallarg(int) flags;
78 syscallarg(void *) stack;
79 #ifdef LINUX_NPTL
80 syscallarg(void *) parent_tidptr;
81 syscallarg(void *) child_tidptr;
82 #endif
83 } */ *uap = v;
84 int flags, sig;
85 int error;
86 #ifdef LINUX_NPTL
87 struct linux_emuldata *led;
88 #endif
89
90 /*
91 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
92 */
93 if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
94 return (EINVAL);
95
96 /*
97 * Thread group implies shared signals. Shared signals
98 * imply shared VM. This matches what Linux kernel does.
99 */
100 if (SCARG(uap, flags) & LINUX_CLONE_THREAD
101 && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
102 return (EINVAL);
103 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
104 && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
105 return (EINVAL);
106
107 flags = 0;
108
109 if (SCARG(uap, flags) & LINUX_CLONE_VM)
110 flags |= FORK_SHAREVM;
111 if (SCARG(uap, flags) & LINUX_CLONE_FS)
112 flags |= FORK_SHARECWD;
113 if (SCARG(uap, flags) & LINUX_CLONE_FILES)
114 flags |= FORK_SHAREFILES;
115 if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
116 flags |= FORK_SHARESIGS;
117 if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
118 flags |= FORK_PPWAIT;
119
120 sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
121 if (sig < 0 || sig >= LINUX__NSIG)
122 return (EINVAL);
123 sig = linux_to_native_signo[sig];
124
125 #ifdef LINUX_NPTL
126 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
127
128 led->parent_tidptr = SCARG(uap, parent_tidptr);
129 led->child_tidptr = SCARG(uap, child_tidptr);
130 led->clone_flags = SCARG(uap, flags);
131 #endif /* LINUX_NPTL */
132
133 /*
134 * Note that Linux does not provide a portable way of specifying
135 * the stack area; the caller must know if the stack grows up
136 * or down. So, we pass a stack size of 0, so that the code
137 * that makes this adjustment is a noop.
138 */
139 if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
140 NULL, NULL, retval, NULL)) != 0)
141 return error;
142
143 return 0;
144 }
145
146 int
147 linux_sys_sched_setparam(cl, v, retval)
148 struct lwp *cl;
149 void *v;
150 register_t *retval;
151 {
152 struct linux_sys_sched_setparam_args /* {
153 syscallarg(linux_pid_t) pid;
154 syscallarg(const struct linux_sched_param *) sp;
155 } */ *uap = v;
156 int error;
157 struct linux_sched_param lp;
158 struct proc *p;
159
160 /*
161 * We only check for valid parameters and return afterwards.
162 */
163
164 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
165 return EINVAL;
166
167 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
168 if (error)
169 return error;
170
171 if (SCARG(uap, pid) != 0) {
172 kauth_cred_t pc = cl->l_cred;
173
174 if ((p = pfind(SCARG(uap, pid))) == NULL)
175 return ESRCH;
176 if (!(cl->l_proc == p ||
177 kauth_cred_geteuid(pc) == 0 ||
178 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
179 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
180 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
181 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
182 return EPERM;
183 }
184
185 return 0;
186 }
187
188 int
189 linux_sys_sched_getparam(cl, v, retval)
190 struct lwp *cl;
191 void *v;
192 register_t *retval;
193 {
194 struct linux_sys_sched_getparam_args /* {
195 syscallarg(linux_pid_t) pid;
196 syscallarg(struct linux_sched_param *) sp;
197 } */ *uap = v;
198 struct proc *p;
199 struct linux_sched_param lp;
200
201 /*
202 * We only check for valid parameters and return a dummy priority afterwards.
203 */
204 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
205 return EINVAL;
206
207 if (SCARG(uap, pid) != 0) {
208 kauth_cred_t pc = cl->l_cred;
209
210 if ((p = pfind(SCARG(uap, pid))) == NULL)
211 return ESRCH;
212 if (!(cl->l_proc == p ||
213 kauth_cred_geteuid(pc) == 0 ||
214 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
215 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
216 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
217 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
218 return EPERM;
219 }
220
221 lp.sched_priority = 0;
222 return copyout(&lp, SCARG(uap, sp), sizeof(lp));
223 }
224
225 int
226 linux_sys_sched_setscheduler(cl, v, retval)
227 struct lwp *cl;
228 void *v;
229 register_t *retval;
230 {
231 struct linux_sys_sched_setscheduler_args /* {
232 syscallarg(linux_pid_t) pid;
233 syscallarg(int) policy;
234 syscallarg(cont struct linux_sched_scheduler *) sp;
235 } */ *uap = v;
236 int error;
237 struct linux_sched_param lp;
238 struct proc *p;
239
240 /*
241 * We only check for valid parameters and return afterwards.
242 */
243
244 if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
245 return EINVAL;
246
247 error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
248 if (error)
249 return error;
250
251 if (SCARG(uap, pid) != 0) {
252 kauth_cred_t pc = cl->l_cred;
253
254 if ((p = pfind(SCARG(uap, pid))) == NULL)
255 return ESRCH;
256 if (!(cl->l_proc == p ||
257 kauth_cred_geteuid(pc) == 0 ||
258 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
259 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
260 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
261 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
262 return EPERM;
263 }
264
265 return 0;
266 /*
267 * We can't emulate anything put the default scheduling policy.
268 */
269 if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0)
270 return EINVAL;
271
272 return 0;
273 }
274
275 int
276 linux_sys_sched_getscheduler(cl, v, retval)
277 struct lwp *cl;
278 void *v;
279 register_t *retval;
280 {
281 struct linux_sys_sched_getscheduler_args /* {
282 syscallarg(linux_pid_t) pid;
283 } */ *uap = v;
284 struct proc *p;
285
286 *retval = -1;
287 /*
288 * We only check for valid parameters and return afterwards.
289 */
290
291 if (SCARG(uap, pid) != 0) {
292 kauth_cred_t pc = cl->l_cred;
293
294 if ((p = pfind(SCARG(uap, pid))) == NULL)
295 return ESRCH;
296 if (!(cl->l_proc == p ||
297 kauth_cred_geteuid(pc) == 0 ||
298 kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
299 kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
300 kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
301 kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
302 return EPERM;
303 }
304
305 /*
306 * We can't emulate anything put the default scheduling policy.
307 */
308 *retval = LINUX_SCHED_OTHER;
309 return 0;
310 }
311
312 int
313 linux_sys_sched_yield(cl, v, retval)
314 struct lwp *cl;
315 void *v;
316 register_t *retval;
317 {
318
319 yield();
320 return 0;
321 }
322
323 int
324 linux_sys_sched_get_priority_max(cl, v, retval)
325 struct lwp *cl;
326 void *v;
327 register_t *retval;
328 {
329 struct linux_sys_sched_get_priority_max_args /* {
330 syscallarg(int) policy;
331 } */ *uap = v;
332
333 /*
334 * We can't emulate anything put the default scheduling policy.
335 */
336 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
337 *retval = -1;
338 return EINVAL;
339 }
340
341 *retval = 0;
342 return 0;
343 }
344
345 int
346 linux_sys_sched_get_priority_min(cl, v, retval)
347 struct lwp *cl;
348 void *v;
349 register_t *retval;
350 {
351 struct linux_sys_sched_get_priority_min_args /* {
352 syscallarg(int) policy;
353 } */ *uap = v;
354
355 /*
356 * We can't emulate anything put the default scheduling policy.
357 */
358 if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
359 *retval = -1;
360 return EINVAL;
361 }
362
363 *retval = 0;
364 return 0;
365 }
366
367 #ifndef __m68k__
368 /* Present on everything but m68k */
369 int
370 linux_sys_exit_group(l, v, retval)
371 struct lwp *l;
372 void *v;
373 register_t *retval;
374 {
375 #ifdef LINUX_NPTL
376 struct linux_sys_exit_group_args /* {
377 syscallarg(int) error_code;
378 } */ *uap = v;
379 struct proc *p = l->l_proc;
380 struct linux_emuldata *led = p->p_emuldata;
381 struct linux_emuldata *e;
382
383 #ifdef DEBUG_LINUX
384 printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, led->s->refs);
385 #endif
386 /*
387 * The calling thread is supposed to kill all threads
388 * in the same thread group (i.e. all threads created
389 * via clone(2) with CLONE_THREAD flag set).
390 *
391 * If there is only one thread, things are quite simple
392 */
393 if (led->s->refs == 1)
394 return sys_exit(l, v, retval);
395
396 #ifdef DEBUG_LINUX
397 printf("%s:%d\n", __func__, __LINE__);
398 #endif
399
400 led->s->flags |= LINUX_LES_INEXITGROUP;
401 led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
402
403 /*
404 * Kill all threads in the group. The emulation exit hook takes
405 * care of hiding the zombies and reporting the exit code properly
406 */
407 LIST_FOREACH(e, &led->s->threads, threads) {
408 if (e->proc == p)
409 continue;
410
411 #ifdef DEBUG_LINUX
412 printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
413 #endif
414 psignal(e->proc, SIGKILL);
415 }
416
417 /* Now, kill ourselves */
418 psignal(p, SIGKILL);
419 return 0;
420 #else /* LINUX_NPTL */
421 return sys_exit(l, v, retval);
422 #endif /* LINUX_NPTL */
423 }
424 #endif /* !__m68k__ */
425
426 #ifdef LINUX_NPTL
427 int
428 linux_sys_set_tid_address(l, v, retval)
429 struct lwp *l;
430 void *v;
431 register_t *retval;
432 {
433 struct linux_sys_set_tid_address_args /* {
434 syscallarg(int *) tidptr;
435 } */ *uap = v;
436 struct linux_emuldata *led;
437
438 led = (struct linux_emuldata *)l->l_proc->p_emuldata;
439 led->clear_tid = SCARG(uap, tid);
440
441 *retval = l->l_proc->p_pid;
442
443 return 0;
444 }
445
446 /* ARGUSED1 */
447 int
448 linux_sys_gettid(l, v, retval)
449 struct lwp *l;
450 void *v;
451 register_t *retval;
452 {
453 /* The Linux kernel does it exactly that way */
454 *retval = l->l_proc->p_pid;
455 return 0;
456 }
457
458 #ifdef LINUX_NPTL
459 /* ARGUSED1 */
460 int
461 linux_sys_getpid(l, v, retval)
462 struct lwp *l;
463 void *v;
464 register_t *retval;
465 {
466 struct linux_emuldata *led;
467
468 led = l->l_proc->p_emuldata;
469
470 /* The Linux kernel does it exactly that way */
471 *retval = led->s->group_pid;
472
473 return 0;
474 }
475
476 /* ARGUSED1 */
477 int
478 linux_sys_getppid(l, v, retval)
479 struct lwp *l;
480 void *v;
481 register_t *retval;
482 {
483 struct proc *p = l->l_proc;
484 struct linux_emuldata *led = p->p_emuldata;
485 struct proc *glp;
486 struct proc *pp;
487
488 /* Find the thread group leader's parent */
489 if ((glp = pfind(led->s->group_pid)) == NULL) {
490 /* Maybe panic... */
491 printf("linux_sys_getppid: missing group leader PID %d\n",
492 led->s->group_pid);
493 return -1;
494 }
495 pp = glp->p_pptr;
496
497 /* If this is a Linux process too, return thread group PID */
498 if (pp->p_emul == p->p_emul) {
499 struct linux_emuldata *pled;
500
501 pled = pp->p_emuldata;
502 *retval = pled->s->group_pid;
503 } else {
504 *retval = pp->p_pid;
505 }
506
507 return 0;
508 }
509 #endif /* LINUX_NPTL */
510
511 int
512 linux_sys_sched_getaffinity(l, v, retval)
513 struct lwp *l;
514 void *v;
515 register_t *retval;
516 {
517 struct linux_sys_sched_getaffinity_args /* {
518 syscallarg(pid_t) pid;
519 syscallarg(unsigned int) len;
520 syscallarg(unsigned long *) mask;
521 } */ *uap = v;
522 int error;
523 int ret;
524 int ncpu;
525 int name[2];
526 size_t sz;
527 char *data;
528 int *retp;
529
530 if (SCARG(uap, mask) == NULL)
531 return EINVAL;
532
533 if (SCARG(uap, len) < sizeof(int))
534 return EINVAL;
535
536 if (pfind(SCARG(uap, pid)) == NULL)
537 return ESRCH;
538
539 /*
540 * return the actual number of CPU, tag all of them as available
541 * The result is a mask, the first CPU being in the least significant
542 * bit.
543 */
544 name[0] = CTL_HW;
545 name[1] = HW_NCPU;
546 sz = sizeof(ncpu);
547
548 if ((error = old_sysctl(&name[0], 2, &ncpu, &sz, NULL, 0, NULL)) != 0)
549 return error;
550
551 ret = (1 << ncpu) - 1;
552
553 data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
554 retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
555 *retp = ret;
556
557 if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
558 return error;
559
560 free(data, M_TEMP);
561
562 return 0;
563
564 }
565
566 int
567 linux_sys_sched_setaffinity(l, v, retval)
568 struct lwp *l;
569 void *v;
570 register_t *retval;
571 {
572 struct linux_sys_sched_setaffinity_args /* {
573 syscallarg(pid_t) pid;
574 syscallarg(unsigned int) len;
575 syscallarg(unsigned long *) mask;
576 } */ *uap = v;
577
578 if (pfind(SCARG(uap, pid)) == NULL)
579 return ESRCH;
580
581 /* Let's ignore it */
582 #ifdef DEBUG_LINUX
583 printf("linux_sys_sched_setaffinity\n");
584 #endif
585 return 0;
586 };
587 #endif /* LINUX_NPTL */
588