Home | History | Annotate | Line # | Download | only in common
linux_sched.c revision 1.44
      1 /*	$NetBSD: linux_sched.c,v 1.44 2007/10/19 18:52:11 njoly Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center; by Matthias Scheler.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the NetBSD
     22  *	Foundation, Inc. and its contributors.
     23  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24  *    contributors may be used to endorse or promote products derived
     25  *    from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 /*
     41  * Linux compatibility module. Try to deal with scheduler related syscalls.
     42  */
     43 
     44 #include <sys/cdefs.h>
     45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.44 2007/10/19 18:52:11 njoly Exp $");
     46 
     47 #include <sys/param.h>
     48 #include <sys/mount.h>
     49 #include <sys/proc.h>
     50 #include <sys/systm.h>
     51 #include <sys/sysctl.h>
     52 #include <sys/malloc.h>
     53 #include <sys/syscallargs.h>
     54 #include <sys/wait.h>
     55 #include <sys/kauth.h>
     56 #include <sys/ptrace.h>
     57 
     58 #include <sys/cpu.h>
     59 
     60 #include <compat/linux/common/linux_types.h>
     61 #include <compat/linux/common/linux_signal.h>
     62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
     63 #include <compat/linux/common/linux_emuldata.h>
     64 #include <compat/linux/common/linux_ipc.h>
     65 #include <compat/linux/common/linux_sem.h>
     66 
     67 #include <compat/linux/linux_syscallargs.h>
     68 
     69 #include <compat/linux/common/linux_sched.h>
     70 
     71 int
     72 linux_sys_clone(l, v, retval)
     73 	struct lwp *l;
     74 	void *v;
     75 	register_t *retval;
     76 {
     77 	struct linux_sys_clone_args /* {
     78 		syscallarg(int) flags;
     79 		syscallarg(void *) stack;
     80 #ifdef LINUX_NPTL
     81 		syscallarg(void *) parent_tidptr;
     82 		syscallarg(void *) child_tidptr;
     83 #endif
     84 	} */ *uap = v;
     85 	int flags, sig;
     86 	int error;
     87 #ifdef LINUX_NPTL
     88 	struct linux_emuldata *led;
     89 #endif
     90 
     91 	/*
     92 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
     93 	 */
     94 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
     95 		return (EINVAL);
     96 
     97 	/*
     98 	 * Thread group implies shared signals. Shared signals
     99 	 * imply shared VM. This matches what Linux kernel does.
    100 	 */
    101 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
    102 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
    103 		return (EINVAL);
    104 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
    105 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
    106 		return (EINVAL);
    107 
    108 	flags = 0;
    109 
    110 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
    111 		flags |= FORK_SHAREVM;
    112 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
    113 		flags |= FORK_SHARECWD;
    114 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
    115 		flags |= FORK_SHAREFILES;
    116 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
    117 		flags |= FORK_SHARESIGS;
    118 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
    119 		flags |= FORK_PPWAIT;
    120 
    121 	sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
    122 	if (sig < 0 || sig >= LINUX__NSIG)
    123 		return (EINVAL);
    124 	sig = linux_to_native_signo[sig];
    125 
    126 #ifdef LINUX_NPTL
    127 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
    128 
    129 	led->parent_tidptr = SCARG(uap, parent_tidptr);
    130 	led->child_tidptr = SCARG(uap, child_tidptr);
    131 	led->clone_flags = SCARG(uap, flags);
    132 #endif /* LINUX_NPTL */
    133 
    134 	/*
    135 	 * Note that Linux does not provide a portable way of specifying
    136 	 * the stack area; the caller must know if the stack grows up
    137 	 * or down.  So, we pass a stack size of 0, so that the code
    138 	 * that makes this adjustment is a noop.
    139 	 */
    140 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
    141 	    NULL, NULL, retval, NULL)) != 0)
    142 		return error;
    143 
    144 	return 0;
    145 }
    146 
    147 int
    148 linux_sys_sched_setparam(struct lwp *cl, void *v, register_t *retval)
    149 {
    150 	struct linux_sys_sched_setparam_args /* {
    151 		syscallarg(linux_pid_t) pid;
    152 		syscallarg(const struct linux_sched_param *) sp;
    153 	} */ *uap = v;
    154 	int error;
    155 	struct linux_sched_param lp;
    156 	struct proc *p;
    157 
    158 /*
    159  * We only check for valid parameters and return afterwards.
    160  */
    161 
    162 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
    163 		return EINVAL;
    164 
    165 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
    166 	if (error)
    167 		return error;
    168 
    169 	if (SCARG(uap, pid) != 0) {
    170 		kauth_cred_t pc = cl->l_cred;
    171 
    172 		if ((p = pfind(SCARG(uap, pid))) == NULL)
    173 			return ESRCH;
    174 		if (!(cl->l_proc == p ||
    175 		      kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
    176 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
    177 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
    178 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
    179 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
    180 			return EPERM;
    181 	}
    182 
    183 	return 0;
    184 }
    185 
    186 int
    187 linux_sys_sched_getparam(struct lwp *cl, void *v, register_t *retval)
    188 {
    189 	struct linux_sys_sched_getparam_args /* {
    190 		syscallarg(linux_pid_t) pid;
    191 		syscallarg(struct linux_sched_param *) sp;
    192 	} */ *uap = v;
    193 	struct proc *p;
    194 	struct linux_sched_param lp;
    195 
    196 /*
    197  * We only check for valid parameters and return a dummy priority afterwards.
    198  */
    199 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
    200 		return EINVAL;
    201 
    202 	if (SCARG(uap, pid) != 0) {
    203 		kauth_cred_t pc = cl->l_cred;
    204 
    205 		if ((p = pfind(SCARG(uap, pid))) == NULL)
    206 			return ESRCH;
    207 		if (!(cl->l_proc == p ||
    208 		      kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
    209 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
    210 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
    211 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
    212 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
    213 			return EPERM;
    214 	}
    215 
    216 	lp.sched_priority = 0;
    217 	return copyout(&lp, SCARG(uap, sp), sizeof(lp));
    218 }
    219 
    220 int
    221 linux_sys_sched_setscheduler(struct lwp *cl, void *v,
    222     register_t *retval)
    223 {
    224 	struct linux_sys_sched_setscheduler_args /* {
    225 		syscallarg(linux_pid_t) pid;
    226 		syscallarg(int) policy;
    227 		syscallarg(cont struct linux_sched_scheduler *) sp;
    228 	} */ *uap = v;
    229 	int error;
    230 	struct linux_sched_param lp;
    231 	struct proc *p;
    232 
    233 /*
    234  * We only check for valid parameters and return afterwards.
    235  */
    236 
    237 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
    238 		return EINVAL;
    239 
    240 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
    241 	if (error)
    242 		return error;
    243 
    244 	if (SCARG(uap, pid) != 0) {
    245 		kauth_cred_t pc = cl->l_cred;
    246 
    247 		if ((p = pfind(SCARG(uap, pid))) == NULL)
    248 			return ESRCH;
    249 		if (!(cl->l_proc == p ||
    250 		      kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
    251 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
    252 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
    253 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
    254 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
    255 			return EPERM;
    256 	}
    257 
    258 	return 0;
    259 /*
    260  * We can't emulate anything put the default scheduling policy.
    261  */
    262 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0)
    263 		return EINVAL;
    264 
    265 	return 0;
    266 }
    267 
    268 int
    269 linux_sys_sched_getscheduler(cl, v, retval)
    270 	struct lwp *cl;
    271 	void *v;
    272 	register_t *retval;
    273 {
    274 	struct linux_sys_sched_getscheduler_args /* {
    275 		syscallarg(linux_pid_t) pid;
    276 	} */ *uap = v;
    277 	struct proc *p;
    278 
    279 	*retval = -1;
    280 /*
    281  * We only check for valid parameters and return afterwards.
    282  */
    283 
    284 	if (SCARG(uap, pid) != 0) {
    285 		kauth_cred_t pc = cl->l_cred;
    286 
    287 		if ((p = pfind(SCARG(uap, pid))) == NULL)
    288 			return ESRCH;
    289 		if (!(cl->l_proc == p ||
    290 		      kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
    291 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
    292 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
    293 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
    294 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
    295 			return EPERM;
    296 	}
    297 
    298 /*
    299  * We can't emulate anything put the default scheduling policy.
    300  */
    301 	*retval = LINUX_SCHED_OTHER;
    302 	return 0;
    303 }
    304 
    305 int
    306 linux_sys_sched_yield(struct lwp *cl, void *v,
    307     register_t *retval)
    308 {
    309 
    310 	yield();
    311 	return 0;
    312 }
    313 
    314 int
    315 linux_sys_sched_get_priority_max(struct lwp *cl, void *v,
    316     register_t *retval)
    317 {
    318 	struct linux_sys_sched_get_priority_max_args /* {
    319 		syscallarg(int) policy;
    320 	} */ *uap = v;
    321 
    322 /*
    323  * We can't emulate anything put the default scheduling policy.
    324  */
    325 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
    326 		*retval = -1;
    327 		return EINVAL;
    328 	}
    329 
    330 	*retval = 0;
    331 	return 0;
    332 }
    333 
    334 int
    335 linux_sys_sched_get_priority_min(struct lwp *cl, void *v,
    336     register_t *retval)
    337 {
    338 	struct linux_sys_sched_get_priority_min_args /* {
    339 		syscallarg(int) policy;
    340 	} */ *uap = v;
    341 
    342 /*
    343  * We can't emulate anything put the default scheduling policy.
    344  */
    345 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
    346 		*retval = -1;
    347 		return EINVAL;
    348 	}
    349 
    350 	*retval = 0;
    351 	return 0;
    352 }
    353 
    354 #ifndef __m68k__
    355 /* Present on everything but m68k */
    356 int
    357 linux_sys_exit_group(l, v, retval)
    358 	struct lwp *l;
    359 	void *v;
    360 	register_t *retval;
    361 {
    362 #ifdef LINUX_NPTL
    363 	struct linux_sys_exit_group_args /* {
    364 		syscallarg(int) error_code;
    365 	} */ *uap = v;
    366 	struct proc *p = l->l_proc;
    367 	struct linux_emuldata *led = p->p_emuldata;
    368 	struct linux_emuldata *e;
    369 
    370 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    371 
    372 #ifdef DEBUG_LINUX
    373 		printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
    374 		    led->s->refs);
    375 #endif
    376 
    377 		/*
    378 		 * The calling thread is supposed to kill all threads
    379 		 * in the same thread group (i.e. all threads created
    380 		 * via clone(2) with CLONE_THREAD flag set).
    381 		 *
    382 		 * If there is only one thread, things are quite simple
    383 		 */
    384 		if (led->s->refs == 1)
    385 			return sys_exit(l, v, retval);
    386 
    387 #ifdef DEBUG_LINUX
    388 		printf("%s:%d\n", __func__, __LINE__);
    389 #endif
    390 
    391 		led->s->flags |= LINUX_LES_INEXITGROUP;
    392 		led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
    393 
    394 		/*
    395 		 * Kill all threads in the group. The emulation exit hook takes
    396 		 * care of hiding the zombies and reporting the exit code
    397 		 * properly.
    398 		 */
    399 		mutex_enter(&proclist_mutex);
    400       		LIST_FOREACH(e, &led->s->threads, threads) {
    401 			if (e->proc == p)
    402 				continue;
    403 
    404 #ifdef DEBUG_LINUX
    405 			printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
    406 #endif
    407 			psignal(e->proc, SIGKILL);
    408 		}
    409 
    410 		/* Now, kill ourselves */
    411 		psignal(p, SIGKILL);
    412 		mutex_exit(&proclist_mutex);
    413 
    414 		return 0;
    415 
    416 	}
    417 #endif /* LINUX_NPTL */
    418 
    419 	return sys_exit(l, v, retval);
    420 }
    421 #endif /* !__m68k__ */
    422 
    423 #ifdef LINUX_NPTL
    424 int
    425 linux_sys_set_tid_address(l, v, retval)
    426 	struct lwp *l;
    427 	void *v;
    428 	register_t *retval;
    429 {
    430 	struct linux_sys_set_tid_address_args /* {
    431 		syscallarg(int *) tidptr;
    432 	} */ *uap = v;
    433 	struct linux_emuldata *led;
    434 
    435 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
    436 	led->clear_tid = SCARG(uap, tid);
    437 
    438 	led->s->flags |= LINUX_LES_USE_NPTL;
    439 
    440 	*retval = l->l_proc->p_pid;
    441 
    442 	return 0;
    443 }
    444 
    445 /* ARGUSED1 */
    446 int
    447 linux_sys_gettid(l, v, retval)
    448 	struct lwp *l;
    449 	void *v;
    450 	register_t *retval;
    451 {
    452 	/* The Linux kernel does it exactly that way */
    453 	*retval = l->l_proc->p_pid;
    454 	return 0;
    455 }
    456 
    457 #ifdef LINUX_NPTL
    458 /* ARGUSED1 */
    459 int
    460 linux_sys_getpid(l, v, retval)
    461 	struct lwp *l;
    462 	void *v;
    463 	register_t *retval;
    464 {
    465 	struct linux_emuldata *led = l->l_proc->p_emuldata;
    466 
    467 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    468 		/* The Linux kernel does it exactly that way */
    469 		*retval = led->s->group_pid;
    470 	} else {
    471 		*retval = l->l_proc->p_pid;
    472 	}
    473 
    474 	return 0;
    475 }
    476 
    477 /* ARGUSED1 */
    478 int
    479 linux_sys_getppid(l, v, retval)
    480 	struct lwp *l;
    481 	void *v;
    482 	register_t *retval;
    483 {
    484 	struct proc *p = l->l_proc;
    485 	struct linux_emuldata *led = p->p_emuldata;
    486 	struct proc *glp;
    487 	struct proc *pp;
    488 
    489 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    490 
    491 		/* Find the thread group leader's parent */
    492 		if ((glp = pfind(led->s->group_pid)) == NULL) {
    493 			/* Maybe panic... */
    494 			printf("linux_sys_getppid: missing group leader PID"
    495 			    " %d\n", led->s->group_pid);
    496 			return -1;
    497 		}
    498 		pp = glp->p_pptr;
    499 
    500 		/* If this is a Linux process too, return thread group PID */
    501 		if (pp->p_emul == p->p_emul) {
    502 			struct linux_emuldata *pled;
    503 
    504 			pled = pp->p_emuldata;
    505 			*retval = pled->s->group_pid;
    506 		} else {
    507 			*retval = pp->p_pid;
    508 		}
    509 
    510 	} else {
    511 		*retval = p->p_pptr->p_pid;
    512 	}
    513 
    514 	return 0;
    515 }
    516 #endif /* LINUX_NPTL */
    517 
    518 int
    519 linux_sys_sched_getaffinity(l, v, retval)
    520 	struct lwp *l;
    521 	void *v;
    522 	register_t *retval;
    523 {
    524 	struct linux_sys_sched_getaffinity_args /* {
    525 		syscallarg(pid_t) pid;
    526 		syscallarg(unsigned int) len;
    527 		syscallarg(unsigned long *) mask;
    528 	} */ *uap = v;
    529 	int error;
    530 	int ret;
    531 	char *data;
    532 	int *retp;
    533 
    534 	if (SCARG(uap, mask) == NULL)
    535 		return EINVAL;
    536 
    537 	if (SCARG(uap, len) < sizeof(int))
    538 		return EINVAL;
    539 
    540 	if (pfind(SCARG(uap, pid)) == NULL)
    541 		return ESRCH;
    542 
    543 	/*
    544 	 * return the actual number of CPU, tag all of them as available
    545 	 * The result is a mask, the first CPU being in the least significant
    546 	 * bit.
    547 	 */
    548 	ret = (1 << ncpu) - 1;
    549 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
    550 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
    551 	*retp = ret;
    552 
    553 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
    554 		return error;
    555 
    556 	free(data, M_TEMP);
    557 
    558 	return 0;
    559 
    560 }
    561 
    562 int
    563 linux_sys_sched_setaffinity(l, v, retval)
    564 	struct lwp *l;
    565 	void *v;
    566 	register_t *retval;
    567 {
    568 	struct linux_sys_sched_setaffinity_args /* {
    569 		syscallarg(pid_t) pid;
    570 		syscallarg(unsigned int) len;
    571 		syscallarg(unsigned long *) mask;
    572 	} */ *uap = v;
    573 
    574 	if (pfind(SCARG(uap, pid)) == NULL)
    575 		return ESRCH;
    576 
    577 	/* Let's ignore it */
    578 #ifdef DEBUG_LINUX
    579 	printf("linux_sys_sched_setaffinity\n");
    580 #endif
    581 	return 0;
    582 };
    583 #endif /* LINUX_NPTL */
    584