Home | History | Annotate | Line # | Download | only in common
linux_sched.c revision 1.46
      1 /*	$NetBSD: linux_sched.c,v 1.46 2007/12/20 23:02:56 dsl Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center; by Matthias Scheler.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the NetBSD
     22  *	Foundation, Inc. and its contributors.
     23  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24  *    contributors may be used to endorse or promote products derived
     25  *    from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 /*
     41  * Linux compatibility module. Try to deal with scheduler related syscalls.
     42  */
     43 
     44 #include <sys/cdefs.h>
     45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.46 2007/12/20 23:02:56 dsl Exp $");
     46 
     47 #include <sys/param.h>
     48 #include <sys/mount.h>
     49 #include <sys/proc.h>
     50 #include <sys/systm.h>
     51 #include <sys/sysctl.h>
     52 #include <sys/malloc.h>
     53 #include <sys/syscallargs.h>
     54 #include <sys/wait.h>
     55 #include <sys/kauth.h>
     56 #include <sys/ptrace.h>
     57 
     58 #include <sys/cpu.h>
     59 
     60 #include <compat/linux/common/linux_types.h>
     61 #include <compat/linux/common/linux_signal.h>
     62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
     63 #include <compat/linux/common/linux_emuldata.h>
     64 #include <compat/linux/common/linux_ipc.h>
     65 #include <compat/linux/common/linux_sem.h>
     66 
     67 #include <compat/linux/linux_syscallargs.h>
     68 
     69 #include <compat/linux/common/linux_sched.h>
     70 
     71 int
     72 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
     73 {
     74 	/* {
     75 		syscallarg(int) flags;
     76 		syscallarg(void *) stack;
     77 #ifdef LINUX_NPTL
     78 		syscallarg(void *) parent_tidptr;
     79 		syscallarg(void *) child_tidptr;
     80 #endif
     81 	} */
     82 	int flags, sig;
     83 	int error;
     84 #ifdef LINUX_NPTL
     85 	struct linux_emuldata *led;
     86 #endif
     87 
     88 	/*
     89 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
     90 	 */
     91 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
     92 		return (EINVAL);
     93 
     94 	/*
     95 	 * Thread group implies shared signals. Shared signals
     96 	 * imply shared VM. This matches what Linux kernel does.
     97 	 */
     98 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
     99 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
    100 		return (EINVAL);
    101 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
    102 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
    103 		return (EINVAL);
    104 
    105 	flags = 0;
    106 
    107 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
    108 		flags |= FORK_SHAREVM;
    109 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
    110 		flags |= FORK_SHARECWD;
    111 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
    112 		flags |= FORK_SHAREFILES;
    113 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
    114 		flags |= FORK_SHARESIGS;
    115 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
    116 		flags |= FORK_PPWAIT;
    117 
    118 	sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
    119 	if (sig < 0 || sig >= LINUX__NSIG)
    120 		return (EINVAL);
    121 	sig = linux_to_native_signo[sig];
    122 
    123 #ifdef LINUX_NPTL
    124 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
    125 
    126 	led->parent_tidptr = SCARG(uap, parent_tidptr);
    127 	led->child_tidptr = SCARG(uap, child_tidptr);
    128 	led->clone_flags = SCARG(uap, flags);
    129 #endif /* LINUX_NPTL */
    130 
    131 	/*
    132 	 * Note that Linux does not provide a portable way of specifying
    133 	 * the stack area; the caller must know if the stack grows up
    134 	 * or down.  So, we pass a stack size of 0, so that the code
    135 	 * that makes this adjustment is a noop.
    136 	 */
    137 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
    138 	    NULL, NULL, retval, NULL)) != 0)
    139 		return error;
    140 
    141 	return 0;
    142 }
    143 
    144 int
    145 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
    146 {
    147 	/* {
    148 		syscallarg(linux_pid_t) pid;
    149 		syscallarg(const struct linux_sched_param *) sp;
    150 	} */
    151 	int error;
    152 	struct linux_sched_param lp;
    153 	struct proc *p;
    154 
    155 /*
    156  * We only check for valid parameters and return afterwards.
    157  */
    158 
    159 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
    160 		return EINVAL;
    161 
    162 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
    163 	if (error)
    164 		return error;
    165 
    166 	if (SCARG(uap, pid) != 0) {
    167 		kauth_cred_t pc = l->l_cred;
    168 
    169 		if ((p = pfind(SCARG(uap, pid))) == NULL)
    170 			return ESRCH;
    171 		if (!(l->l_proc == p ||
    172 		      kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
    173 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
    174 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
    175 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
    176 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
    177 			return EPERM;
    178 	}
    179 
    180 	return 0;
    181 }
    182 
    183 int
    184 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
    185 {
    186 	/* {
    187 		syscallarg(linux_pid_t) pid;
    188 		syscallarg(struct linux_sched_param *) sp;
    189 	} */
    190 	struct proc *p;
    191 	struct linux_sched_param lp;
    192 
    193 /*
    194  * We only check for valid parameters and return a dummy priority afterwards.
    195  */
    196 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
    197 		return EINVAL;
    198 
    199 	if (SCARG(uap, pid) != 0) {
    200 		kauth_cred_t pc = l->l_cred;
    201 
    202 		if ((p = pfind(SCARG(uap, pid))) == NULL)
    203 			return ESRCH;
    204 		if (!(l->l_proc == p ||
    205 		      kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
    206 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
    207 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
    208 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
    209 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
    210 			return EPERM;
    211 	}
    212 
    213 	lp.sched_priority = 0;
    214 	return copyout(&lp, SCARG(uap, sp), sizeof(lp));
    215 }
    216 
    217 int
    218 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
    219 {
    220 	/* {
    221 		syscallarg(linux_pid_t) pid;
    222 		syscallarg(int) policy;
    223 		syscallarg(cont struct linux_sched_scheduler *) sp;
    224 	} */
    225 	int error;
    226 	struct linux_sched_param lp;
    227 	struct proc *p;
    228 
    229 /*
    230  * We only check for valid parameters and return afterwards.
    231  */
    232 
    233 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL)
    234 		return EINVAL;
    235 
    236 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
    237 	if (error)
    238 		return error;
    239 
    240 	if (SCARG(uap, pid) != 0) {
    241 		kauth_cred_t pc = l->l_cred;
    242 
    243 		if ((p = pfind(SCARG(uap, pid))) == NULL)
    244 			return ESRCH;
    245 		if (!(l->l_proc == p ||
    246 		      kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
    247 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
    248 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
    249 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
    250 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
    251 			return EPERM;
    252 	}
    253 
    254 	return 0;
    255 /*
    256  * We can't emulate anything put the default scheduling policy.
    257  */
    258 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER || lp.sched_priority != 0)
    259 		return EINVAL;
    260 
    261 	return 0;
    262 }
    263 
    264 int
    265 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
    266 {
    267 	/* {
    268 		syscallarg(linux_pid_t) pid;
    269 	} */
    270 	struct proc *p;
    271 
    272 	*retval = -1;
    273 /*
    274  * We only check for valid parameters and return afterwards.
    275  */
    276 
    277 	if (SCARG(uap, pid) != 0) {
    278 		kauth_cred_t pc = l->l_cred;
    279 
    280 		if ((p = pfind(SCARG(uap, pid))) == NULL)
    281 			return ESRCH;
    282 		if (!(l->l_proc == p ||
    283 		      kauth_authorize_generic(pc, KAUTH_GENERIC_ISSUSER, NULL) == 0 ||
    284 		      kauth_cred_getuid(pc) == kauth_cred_getuid(p->p_cred) ||
    285 		      kauth_cred_geteuid(pc) == kauth_cred_getuid(p->p_cred) ||
    286 		      kauth_cred_getuid(pc) == kauth_cred_geteuid(p->p_cred) ||
    287 		      kauth_cred_geteuid(pc) == kauth_cred_geteuid(p->p_cred)))
    288 			return EPERM;
    289 	}
    290 
    291 /*
    292  * We can't emulate anything put the default scheduling policy.
    293  */
    294 	*retval = LINUX_SCHED_OTHER;
    295 	return 0;
    296 }
    297 
    298 int
    299 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    300 {
    301 
    302 	yield();
    303 	return 0;
    304 }
    305 
    306 int
    307 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
    308 {
    309 	/* {
    310 		syscallarg(int) policy;
    311 	} */
    312 
    313 /*
    314  * We can't emulate anything put the default scheduling policy.
    315  */
    316 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
    317 		*retval = -1;
    318 		return EINVAL;
    319 	}
    320 
    321 	*retval = 0;
    322 	return 0;
    323 }
    324 
    325 int
    326 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
    327 {
    328 	/* {
    329 		syscallarg(int) policy;
    330 	} */
    331 
    332 /*
    333  * We can't emulate anything put the default scheduling policy.
    334  */
    335 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
    336 		*retval = -1;
    337 		return EINVAL;
    338 	}
    339 
    340 	*retval = 0;
    341 	return 0;
    342 }
    343 
    344 #ifndef __m68k__
    345 /* Present on everything but m68k */
    346 int
    347 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
    348 {
    349 #ifdef LINUX_NPTL
    350 	/* {
    351 		syscallarg(int) error_code;
    352 	} */
    353 	struct proc *p = l->l_proc;
    354 	struct linux_emuldata *led = p->p_emuldata;
    355 	struct linux_emuldata *e;
    356 
    357 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    358 
    359 #ifdef DEBUG_LINUX
    360 		printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
    361 		    led->s->refs);
    362 #endif
    363 
    364 		/*
    365 		 * The calling thread is supposed to kill all threads
    366 		 * in the same thread group (i.e. all threads created
    367 		 * via clone(2) with CLONE_THREAD flag set).
    368 		 *
    369 		 * If there is only one thread, things are quite simple
    370 		 */
    371 		if (led->s->refs == 1)
    372 			return sys_exit(l, (const void *)uap, retval);
    373 
    374 #ifdef DEBUG_LINUX
    375 		printf("%s:%d\n", __func__, __LINE__);
    376 #endif
    377 
    378 		led->s->flags |= LINUX_LES_INEXITGROUP;
    379 		led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
    380 
    381 		/*
    382 		 * Kill all threads in the group. The emulation exit hook takes
    383 		 * care of hiding the zombies and reporting the exit code
    384 		 * properly.
    385 		 */
    386 		mutex_enter(&proclist_mutex);
    387       		LIST_FOREACH(e, &led->s->threads, threads) {
    388 			if (e->proc == p)
    389 				continue;
    390 
    391 #ifdef DEBUG_LINUX
    392 			printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
    393 #endif
    394 			psignal(e->proc, SIGKILL);
    395 		}
    396 
    397 		/* Now, kill ourselves */
    398 		psignal(p, SIGKILL);
    399 		mutex_exit(&proclist_mutex);
    400 
    401 		return 0;
    402 
    403 	}
    404 #endif /* LINUX_NPTL */
    405 
    406 	return sys_exit(l, (const void *)uap, retval);
    407 }
    408 #endif /* !__m68k__ */
    409 
    410 #ifdef LINUX_NPTL
    411 int
    412 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
    413 {
    414 	/* {
    415 		syscallarg(int *) tidptr;
    416 	} */
    417 	struct linux_emuldata *led;
    418 
    419 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
    420 	led->clear_tid = SCARG(uap, tid);
    421 
    422 	led->s->flags |= LINUX_LES_USE_NPTL;
    423 
    424 	*retval = l->l_proc->p_pid;
    425 
    426 	return 0;
    427 }
    428 
    429 /* ARGUSED1 */
    430 int
    431 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
    432 {
    433 	/* The Linux kernel does it exactly that way */
    434 	*retval = l->l_proc->p_pid;
    435 	return 0;
    436 }
    437 
    438 #ifdef LINUX_NPTL
    439 /* ARGUSED1 */
    440 int
    441 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
    442 {
    443 	struct linux_emuldata *led = l->l_proc->p_emuldata;
    444 
    445 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    446 		/* The Linux kernel does it exactly that way */
    447 		*retval = led->s->group_pid;
    448 	} else {
    449 		*retval = l->l_proc->p_pid;
    450 	}
    451 
    452 	return 0;
    453 }
    454 
    455 /* ARGUSED1 */
    456 int
    457 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
    458 {
    459 	struct proc *p = l->l_proc;
    460 	struct linux_emuldata *led = p->p_emuldata;
    461 	struct proc *glp;
    462 	struct proc *pp;
    463 
    464 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    465 
    466 		/* Find the thread group leader's parent */
    467 		if ((glp = pfind(led->s->group_pid)) == NULL) {
    468 			/* Maybe panic... */
    469 			printf("linux_sys_getppid: missing group leader PID"
    470 			    " %d\n", led->s->group_pid);
    471 			return -1;
    472 		}
    473 		pp = glp->p_pptr;
    474 
    475 		/* If this is a Linux process too, return thread group PID */
    476 		if (pp->p_emul == p->p_emul) {
    477 			struct linux_emuldata *pled;
    478 
    479 			pled = pp->p_emuldata;
    480 			*retval = pled->s->group_pid;
    481 		} else {
    482 			*retval = pp->p_pid;
    483 		}
    484 
    485 	} else {
    486 		*retval = p->p_pptr->p_pid;
    487 	}
    488 
    489 	return 0;
    490 }
    491 #endif /* LINUX_NPTL */
    492 
    493 int
    494 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
    495 {
    496 	/* {
    497 		syscallarg(pid_t) pid;
    498 		syscallarg(unsigned int) len;
    499 		syscallarg(unsigned long *) mask;
    500 	} */
    501 	int error;
    502 	int ret;
    503 	char *data;
    504 	int *retp;
    505 
    506 	if (SCARG(uap, mask) == NULL)
    507 		return EINVAL;
    508 
    509 	if (SCARG(uap, len) < sizeof(int))
    510 		return EINVAL;
    511 
    512 	if (pfind(SCARG(uap, pid)) == NULL)
    513 		return ESRCH;
    514 
    515 	/*
    516 	 * return the actual number of CPU, tag all of them as available
    517 	 * The result is a mask, the first CPU being in the least significant
    518 	 * bit.
    519 	 */
    520 	ret = (1 << ncpu) - 1;
    521 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
    522 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
    523 	*retp = ret;
    524 
    525 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
    526 		return error;
    527 
    528 	free(data, M_TEMP);
    529 
    530 	return 0;
    531 
    532 }
    533 
    534 int
    535 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
    536 {
    537 	/* {
    538 		syscallarg(pid_t) pid;
    539 		syscallarg(unsigned int) len;
    540 		syscallarg(unsigned long *) mask;
    541 	} */
    542 
    543 	if (pfind(SCARG(uap, pid)) == NULL)
    544 		return ESRCH;
    545 
    546 	/* Let's ignore it */
    547 #ifdef DEBUG_LINUX
    548 	printf("linux_sys_sched_setaffinity\n");
    549 #endif
    550 	return 0;
    551 };
    552 #endif /* LINUX_NPTL */
    553