Home | History | Annotate | Line # | Download | only in common
linux_sched.c revision 1.19.2.8
      1 /*	$NetBSD: linux_sched.c,v 1.19.2.8 2008/03/17 09:14:36 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center; by Matthias Scheler.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the NetBSD
     22  *	Foundation, Inc. and its contributors.
     23  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24  *    contributors may be used to endorse or promote products derived
     25  *    from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 /*
     41  * Linux compatibility module. Try to deal with scheduler related syscalls.
     42  */
     43 
     44 #include <sys/cdefs.h>
     45 __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.19.2.8 2008/03/17 09:14:36 yamt Exp $");
     46 
     47 #include <sys/param.h>
     48 #include <sys/mount.h>
     49 #include <sys/proc.h>
     50 #include <sys/systm.h>
     51 #include <sys/sysctl.h>
     52 #include <sys/malloc.h>
     53 #include <sys/syscallargs.h>
     54 #include <sys/wait.h>
     55 #include <sys/kauth.h>
     56 #include <sys/ptrace.h>
     57 
     58 #include <sys/cpu.h>
     59 
     60 #include <compat/linux/common/linux_types.h>
     61 #include <compat/linux/common/linux_signal.h>
     62 #include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
     63 #include <compat/linux/common/linux_emuldata.h>
     64 #include <compat/linux/common/linux_ipc.h>
     65 #include <compat/linux/common/linux_sem.h>
     66 
     67 #include <compat/linux/linux_syscallargs.h>
     68 
     69 #include <compat/linux/common/linux_sched.h>
     70 
     71 int
     72 linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
     73 {
     74 	/* {
     75 		syscallarg(int) flags;
     76 		syscallarg(void *) stack;
     77 #ifdef LINUX_NPTL
     78 		syscallarg(void *) parent_tidptr;
     79 		syscallarg(void *) child_tidptr;
     80 #endif
     81 	} */
     82 	int flags, sig;
     83 	int error;
     84 #ifdef LINUX_NPTL
     85 	struct linux_emuldata *led;
     86 #endif
     87 
     88 	/*
     89 	 * We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
     90 	 */
     91 	if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
     92 		return (EINVAL);
     93 
     94 	/*
     95 	 * Thread group implies shared signals. Shared signals
     96 	 * imply shared VM. This matches what Linux kernel does.
     97 	 */
     98 	if (SCARG(uap, flags) & LINUX_CLONE_THREAD
     99 	    && (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
    100 		return (EINVAL);
    101 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
    102 	    && (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
    103 		return (EINVAL);
    104 
    105 	flags = 0;
    106 
    107 	if (SCARG(uap, flags) & LINUX_CLONE_VM)
    108 		flags |= FORK_SHAREVM;
    109 	if (SCARG(uap, flags) & LINUX_CLONE_FS)
    110 		flags |= FORK_SHARECWD;
    111 	if (SCARG(uap, flags) & LINUX_CLONE_FILES)
    112 		flags |= FORK_SHAREFILES;
    113 	if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
    114 		flags |= FORK_SHARESIGS;
    115 	if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
    116 		flags |= FORK_PPWAIT;
    117 
    118 	sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
    119 	if (sig < 0 || sig >= LINUX__NSIG)
    120 		return (EINVAL);
    121 	sig = linux_to_native_signo[sig];
    122 
    123 #ifdef LINUX_NPTL
    124 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
    125 
    126 	led->parent_tidptr = SCARG(uap, parent_tidptr);
    127 	led->child_tidptr = SCARG(uap, child_tidptr);
    128 	led->clone_flags = SCARG(uap, flags);
    129 #endif /* LINUX_NPTL */
    130 
    131 	/*
    132 	 * Note that Linux does not provide a portable way of specifying
    133 	 * the stack area; the caller must know if the stack grows up
    134 	 * or down.  So, we pass a stack size of 0, so that the code
    135 	 * that makes this adjustment is a noop.
    136 	 */
    137 	if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
    138 	    NULL, NULL, retval, NULL)) != 0)
    139 		return error;
    140 
    141 	return 0;
    142 }
    143 
    144 /*
    145  * linux realtime priority
    146  *
    147  * - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
    148  *
    149  * - SCHED_OTHER tasks don't have realtime priorities.
    150  *   in particular, sched_param::sched_priority is always 0.
    151  */
    152 
    153 #define	LINUX_SCHED_RTPRIO_MIN	1
    154 #define	LINUX_SCHED_RTPRIO_MAX	99
    155 
    156 static int
    157 sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
    158     int *native_policy, struct sched_param *native_params)
    159 {
    160 
    161 	switch (linux_policy) {
    162 	case LINUX_SCHED_OTHER:
    163 		if (native_policy != NULL) {
    164 			*native_policy = SCHED_OTHER;
    165 		}
    166 		break;
    167 
    168 	case LINUX_SCHED_FIFO:
    169 		if (native_policy != NULL) {
    170 			*native_policy = SCHED_FIFO;
    171 		}
    172 		break;
    173 
    174 	case LINUX_SCHED_RR:
    175 		if (native_policy != NULL) {
    176 			*native_policy = SCHED_RR;
    177 		}
    178 		break;
    179 
    180 	default:
    181 		return EINVAL;
    182 	}
    183 
    184 	if (linux_params != NULL) {
    185 		int prio = linux_params->sched_priority;
    186 
    187 		KASSERT(native_params != NULL);
    188 
    189 		if (linux_policy == LINUX_SCHED_OTHER) {
    190 			if (prio != 0) {
    191 				return EINVAL;
    192 			}
    193 			native_params->sched_priority = PRI_NONE; /* XXX */
    194 		} else {
    195 			if (prio < LINUX_SCHED_RTPRIO_MIN ||
    196 			    prio > LINUX_SCHED_RTPRIO_MAX) {
    197 				return EINVAL;
    198 			}
    199 			native_params->sched_priority =
    200 			    (prio - LINUX_SCHED_RTPRIO_MIN)
    201 			    * (SCHED_PRI_MAX - SCHED_PRI_MIN)
    202 			    / (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
    203 			    + SCHED_PRI_MIN;
    204 		}
    205 	}
    206 
    207 	return 0;
    208 }
    209 
    210 static int
    211 sched_native2linux(int native_policy, struct sched_param *native_params,
    212     int *linux_policy, struct linux_sched_param *linux_params)
    213 {
    214 
    215 	switch (native_policy) {
    216 	case SCHED_OTHER:
    217 		if (linux_policy != NULL) {
    218 			*linux_policy = LINUX_SCHED_OTHER;
    219 		}
    220 		break;
    221 
    222 	case SCHED_FIFO:
    223 		if (linux_policy != NULL) {
    224 			*linux_policy = LINUX_SCHED_FIFO;
    225 		}
    226 		break;
    227 
    228 	case SCHED_RR:
    229 		if (linux_policy != NULL) {
    230 			*linux_policy = LINUX_SCHED_RR;
    231 		}
    232 		break;
    233 
    234 	default:
    235 		panic("%s: unknown policy %d\n", __func__, native_policy);
    236 	}
    237 
    238 	if (native_params != NULL) {
    239 		int prio = native_params->sched_priority;
    240 
    241 		KASSERT(prio >= SCHED_PRI_MIN);
    242 		KASSERT(prio <= SCHED_PRI_MAX);
    243 		KASSERT(linux_params != NULL);
    244 
    245 		if (native_policy == SCHED_OTHER) {
    246 			linux_params->sched_priority = 0;
    247 		} else {
    248 			linux_params->sched_priority =
    249 			    (prio - SCHED_PRI_MIN)
    250 			    * (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
    251 			    / (SCHED_PRI_MAX - SCHED_PRI_MIN)
    252 			    + LINUX_SCHED_RTPRIO_MIN;
    253 		}
    254 	}
    255 
    256 	return 0;
    257 }
    258 
    259 int
    260 linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
    261 {
    262 	/* {
    263 		syscallarg(linux_pid_t) pid;
    264 		syscallarg(const struct linux_sched_param *) sp;
    265 	} */
    266 	int error, policy;
    267 	struct linux_sched_param lp;
    268 	struct sched_param sp;
    269 
    270 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
    271 		error = EINVAL;
    272 		goto out;
    273 	}
    274 
    275 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
    276 	if (error)
    277 		goto out;
    278 
    279 	/* We need the current policy in Linux terms. */
    280 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
    281 	if (error)
    282 		goto out;
    283 	error = sched_native2linux(policy, NULL, &policy, NULL);
    284 	if (error)
    285 		goto out;
    286 
    287 	error = sched_linux2native(policy, &lp, &policy, &sp);
    288 	if (error)
    289 		goto out;
    290 
    291 	error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
    292 	if (error)
    293 		goto out;
    294 
    295  out:
    296 	return error;
    297 }
    298 
    299 int
    300 linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
    301 {
    302 	/* {
    303 		syscallarg(linux_pid_t) pid;
    304 		syscallarg(struct linux_sched_param *) sp;
    305 	} */
    306 	struct linux_sched_param lp;
    307 	struct sched_param sp;
    308 	int error, policy;
    309 
    310 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
    311 		error = EINVAL;
    312 		goto out;
    313 	}
    314 
    315 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
    316 	if (error)
    317 		goto out;
    318 
    319 	error = sched_native2linux(policy, &sp, NULL, &lp);
    320 	if (error)
    321 		goto out;
    322 
    323 	error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
    324 	if (error)
    325 		goto out;
    326 
    327  out:
    328 	return error;
    329 }
    330 
    331 int
    332 linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
    333 {
    334 	/* {
    335 		syscallarg(linux_pid_t) pid;
    336 		syscallarg(int) policy;
    337 		syscallarg(cont struct linux_sched_scheduler *) sp;
    338 	} */
    339 	int error, policy;
    340 	struct linux_sched_param lp;
    341 	struct sched_param sp;
    342 
    343 	if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
    344 		error = EINVAL;
    345 		goto out;
    346 	}
    347 
    348 	error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
    349 	if (error)
    350 		goto out;
    351 
    352 	error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
    353 	if (error)
    354 		goto out;
    355 
    356 	error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
    357 	if (error)
    358 		goto out;
    359 
    360  out:
    361 	return error;
    362 }
    363 
    364 int
    365 linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
    366 {
    367 	/* {
    368 		syscallarg(linux_pid_t) pid;
    369 	} */
    370 	int error, policy;
    371 
    372 	*retval = -1;
    373 
    374 	error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
    375 	if (error)
    376 		goto out;
    377 
    378 	error = sched_native2linux(policy, NULL, &policy, NULL);
    379 	if (error)
    380 		goto out;
    381 
    382 	*retval = policy;
    383 
    384  out:
    385 	return error;
    386 }
    387 
    388 int
    389 linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    390 {
    391 
    392 	yield();
    393 	return 0;
    394 }
    395 
    396 int
    397 linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
    398 {
    399 	/* {
    400 		syscallarg(int) policy;
    401 	} */
    402 
    403 /*
    404  * We can't emulate anything put the default scheduling policy.
    405  */
    406 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
    407 		*retval = -1;
    408 		return EINVAL;
    409 	}
    410 
    411 	*retval = 0;
    412 	return 0;
    413 }
    414 
    415 int
    416 linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
    417 {
    418 	/* {
    419 		syscallarg(int) policy;
    420 	} */
    421 
    422 /*
    423  * We can't emulate anything put the default scheduling policy.
    424  */
    425 	if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
    426 		*retval = -1;
    427 		return EINVAL;
    428 	}
    429 
    430 	*retval = 0;
    431 	return 0;
    432 }
    433 
    434 #ifndef __m68k__
    435 /* Present on everything but m68k */
    436 int
    437 linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
    438 {
    439 #ifdef LINUX_NPTL
    440 	/* {
    441 		syscallarg(int) error_code;
    442 	} */
    443 	struct proc *p = l->l_proc;
    444 	struct linux_emuldata *led = p->p_emuldata;
    445 	struct linux_emuldata *e;
    446 
    447 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    448 
    449 #ifdef DEBUG_LINUX
    450 		printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
    451 		    led->s->refs);
    452 #endif
    453 
    454 		/*
    455 		 * The calling thread is supposed to kill all threads
    456 		 * in the same thread group (i.e. all threads created
    457 		 * via clone(2) with CLONE_THREAD flag set).
    458 		 *
    459 		 * If there is only one thread, things are quite simple
    460 		 */
    461 		if (led->s->refs == 1)
    462 			return sys_exit(l, (const void *)uap, retval);
    463 
    464 #ifdef DEBUG_LINUX
    465 		printf("%s:%d\n", __func__, __LINE__);
    466 #endif
    467 
    468 		led->s->flags |= LINUX_LES_INEXITGROUP;
    469 		led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
    470 
    471 		/*
    472 		 * Kill all threads in the group. The emulation exit hook takes
    473 		 * care of hiding the zombies and reporting the exit code
    474 		 * properly.
    475 		 */
    476 		mutex_enter(&proclist_mutex);
    477       		LIST_FOREACH(e, &led->s->threads, threads) {
    478 			if (e->proc == p)
    479 				continue;
    480 
    481 #ifdef DEBUG_LINUX
    482 			printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
    483 #endif
    484 			psignal(e->proc, SIGKILL);
    485 		}
    486 
    487 		/* Now, kill ourselves */
    488 		psignal(p, SIGKILL);
    489 		mutex_exit(&proclist_mutex);
    490 
    491 		return 0;
    492 
    493 	}
    494 #endif /* LINUX_NPTL */
    495 
    496 	return sys_exit(l, (const void *)uap, retval);
    497 }
    498 #endif /* !__m68k__ */
    499 
    500 #ifdef LINUX_NPTL
    501 int
    502 linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
    503 {
    504 	/* {
    505 		syscallarg(int *) tidptr;
    506 	} */
    507 	struct linux_emuldata *led;
    508 
    509 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
    510 	led->clear_tid = SCARG(uap, tid);
    511 
    512 	led->s->flags |= LINUX_LES_USE_NPTL;
    513 
    514 	*retval = l->l_proc->p_pid;
    515 
    516 	return 0;
    517 }
    518 
    519 /* ARGUSED1 */
    520 int
    521 linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
    522 {
    523 	/* The Linux kernel does it exactly that way */
    524 	*retval = l->l_proc->p_pid;
    525 	return 0;
    526 }
    527 
    528 #ifdef LINUX_NPTL
    529 /* ARGUSED1 */
    530 int
    531 linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
    532 {
    533 	struct linux_emuldata *led = l->l_proc->p_emuldata;
    534 
    535 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    536 		/* The Linux kernel does it exactly that way */
    537 		*retval = led->s->group_pid;
    538 	} else {
    539 		*retval = l->l_proc->p_pid;
    540 	}
    541 
    542 	return 0;
    543 }
    544 
    545 /* ARGUSED1 */
    546 int
    547 linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
    548 {
    549 	struct proc *p = l->l_proc;
    550 	struct linux_emuldata *led = p->p_emuldata;
    551 	struct proc *glp;
    552 	struct proc *pp;
    553 
    554 	if (led->s->flags & LINUX_LES_USE_NPTL) {
    555 
    556 		/* Find the thread group leader's parent */
    557 		if ((glp = pfind(led->s->group_pid)) == NULL) {
    558 			/* Maybe panic... */
    559 			printf("linux_sys_getppid: missing group leader PID"
    560 			    " %d\n", led->s->group_pid);
    561 			return -1;
    562 		}
    563 		pp = glp->p_pptr;
    564 
    565 		/* If this is a Linux process too, return thread group PID */
    566 		if (pp->p_emul == p->p_emul) {
    567 			struct linux_emuldata *pled;
    568 
    569 			pled = pp->p_emuldata;
    570 			*retval = pled->s->group_pid;
    571 		} else {
    572 			*retval = pp->p_pid;
    573 		}
    574 
    575 	} else {
    576 		*retval = p->p_pptr->p_pid;
    577 	}
    578 
    579 	return 0;
    580 }
    581 #endif /* LINUX_NPTL */
    582 
    583 int
    584 linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
    585 {
    586 	/* {
    587 		syscallarg(pid_t) pid;
    588 		syscallarg(unsigned int) len;
    589 		syscallarg(unsigned long *) mask;
    590 	} */
    591 	int error;
    592 	int ret;
    593 	char *data;
    594 	int *retp;
    595 
    596 	if (SCARG(uap, mask) == NULL)
    597 		return EINVAL;
    598 
    599 	if (SCARG(uap, len) < sizeof(int))
    600 		return EINVAL;
    601 
    602 	if (pfind(SCARG(uap, pid)) == NULL)
    603 		return ESRCH;
    604 
    605 	/*
    606 	 * return the actual number of CPU, tag all of them as available
    607 	 * The result is a mask, the first CPU being in the least significant
    608 	 * bit.
    609 	 */
    610 	ret = (1 << ncpu) - 1;
    611 	data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
    612 	retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
    613 	*retp = ret;
    614 
    615 	if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
    616 		return error;
    617 
    618 	free(data, M_TEMP);
    619 
    620 	return 0;
    621 
    622 }
    623 
    624 int
    625 linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
    626 {
    627 	/* {
    628 		syscallarg(pid_t) pid;
    629 		syscallarg(unsigned int) len;
    630 		syscallarg(unsigned long *) mask;
    631 	} */
    632 
    633 	if (pfind(SCARG(uap, pid)) == NULL)
    634 		return ESRCH;
    635 
    636 	/* Let's ignore it */
    637 #ifdef DEBUG_LINUX
    638 	printf("linux_sys_sched_setaffinity\n");
    639 #endif
    640 	return 0;
    641 };
    642 #endif /* LINUX_NPTL */
    643