Home | History | Annotate | Line # | Download | only in kern
sys_sched.c revision 1.28
      1 /*	$NetBSD: sys_sched.c,v 1.28 2008/10/15 06:51:20 wrstuden Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * System calls relating to the scheduler.
     31  *
     32  * TODO:
     33  *  - Handle pthread_setschedprio() as defined by POSIX;
     34  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.28 2008/10/15 06:51:20 wrstuden Exp $");
     39 
     40 #include <sys/param.h>
     41 
     42 #include <sys/cpu.h>
     43 #include <sys/kauth.h>
     44 #include <sys/kmem.h>
     45 #include <sys/lwp.h>
     46 #include <sys/mutex.h>
     47 #include <sys/proc.h>
     48 #include <sys/pset.h>
     49 #include <sys/sa.h>
     50 #include <sys/savar.h>
     51 #include <sys/sched.h>
     52 #include <sys/syscallargs.h>
     53 #include <sys/sysctl.h>
     54 #include <sys/systm.h>
     55 #include <sys/types.h>
     56 #include <sys/unistd.h>
     57 
     58 #include "opt_sa.h"
     59 
     60 /*
     61  * Convert user priority or the in-kernel priority or convert the current
     62  * priority to the appropriate range according to the policy change.
     63  */
     64 static pri_t
     65 convert_pri(lwp_t *l, int policy, pri_t pri)
     66 {
     67 	int delta = 0;
     68 
     69 	switch (policy) {
     70 	case SCHED_OTHER:
     71 		delta = PRI_USER;
     72 		break;
     73 	case SCHED_FIFO:
     74 	case SCHED_RR:
     75 		delta = PRI_USER_RT;
     76 		break;
     77 	default:
     78 		panic("upri_to_kpri");
     79 	}
     80 
     81 	if (pri != PRI_NONE) {
     82 		/* Convert user priority to the in-kernel */
     83 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
     84 		return pri + delta;
     85 	}
     86 	if (l->l_class == policy)
     87 		return l->l_priority;
     88 
     89 	/* Change the current priority to the appropriate range */
     90 	if (l->l_class == SCHED_OTHER) {
     91 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
     92 		return delta;
     93 	}
     94 	if (policy == SCHED_OTHER) {
     95 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
     96 		return l->l_priority - delta;
     97 	}
     98 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
     99 	return l->l_class;
    100 }
    101 
    102 int
    103 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
    104     const struct sched_param *params)
    105 {
    106 	struct proc *p;
    107 	struct lwp *t;
    108 	pri_t pri;
    109 	u_int lcnt;
    110 	int error;
    111 
    112 	error = 0;
    113 
    114 	pri = params->sched_priority;
    115 
    116 	/* If no parameters specified, just return (this should not happen) */
    117 	if (pri == PRI_NONE && policy == SCHED_NONE)
    118 		return 0;
    119 
    120 	/* Validate scheduling class */
    121 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
    122 		return EINVAL;
    123 
    124 	/* Validate priority */
    125 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
    126 		return EINVAL;
    127 
    128 	if (pid != 0) {
    129 		/* Find the process */
    130 		mutex_enter(proc_lock);
    131 		p = p_find(pid, PFIND_LOCKED);
    132 		if (p == NULL) {
    133 			mutex_exit(proc_lock);
    134 			return ESRCH;
    135 		}
    136 		mutex_enter(p->p_lock);
    137 		mutex_exit(proc_lock);
    138 		/* Disallow modification of system processes */
    139 		if ((p->p_flag & PK_SYSTEM) != 0) {
    140 			mutex_exit(p->p_lock);
    141 			return EPERM;
    142 		}
    143 	} else {
    144 		/* Use the calling process */
    145 		p = curlwp->l_proc;
    146 		mutex_enter(p->p_lock);
    147 	}
    148 
    149 	/* Find the LWP(s) */
    150 	lcnt = 0;
    151 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    152 		pri_t kpri;
    153 		int lpolicy;
    154 
    155 		if (lid && lid != t->l_lid)
    156 			continue;
    157 		lcnt++;
    158 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
    159 		lwp_lock(t);
    160 
    161 		if (policy == SCHED_NONE)
    162 			lpolicy = t->l_class;
    163 		else
    164 			lpolicy = policy;
    165 
    166 		/*
    167 		 * Note that, priority may need to be changed to get into
    168 		 * the correct priority range of the new scheduling class.
    169 		 */
    170 		kpri = convert_pri(t, lpolicy, pri);
    171 
    172 		/* Check the permission */
    173 		error = kauth_authorize_process(kauth_cred_get(),
    174 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
    175 		    KAUTH_ARG(kpri));
    176 		if (error) {
    177 			lwp_unlock(t);
    178 			break;
    179 		}
    180 
    181 		/* Set the scheduling class */
    182 		if (policy != SCHED_NONE)
    183 			t->l_class = policy;
    184 
    185 		/* Change the priority */
    186 		if (t->l_priority != kpri)
    187 			lwp_changepri(t, kpri);
    188 
    189 		lwp_unlock(t);
    190 	}
    191 	mutex_exit(p->p_lock);
    192 	return (lcnt == 0) ? ESRCH : error;
    193 }
    194 
    195 /*
    196  * Set scheduling parameters.
    197  */
    198 int
    199 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    200     register_t *retval)
    201 {
    202 	/* {
    203 		syscallarg(pid_t) pid;
    204 		syscallarg(lwpid_t) lid;
    205 		syscallarg(int) policy;
    206 		syscallarg(const struct sched_param *) params;
    207 	} */
    208 	struct sched_param params;
    209 	int error;
    210 
    211 	/* Get the parameters from the user-space */
    212 	error = copyin(SCARG(uap, params), &params, sizeof(params));
    213 	if (error)
    214 		goto out;
    215 
    216 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
    217 	    SCARG(uap, policy), &params);
    218 
    219  out:
    220 	return (error);
    221 }
    222 
    223 int
    224 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
    225     struct sched_param *params)
    226 {
    227 	struct sched_param lparams;
    228 	struct lwp *t;
    229 	int error, lpolicy;
    230 
    231 	/* Locks the LWP */
    232 	t = lwp_find2(pid, lid);
    233 	if (t == NULL)
    234 		return ESRCH;
    235 
    236 	/* Check the permission */
    237 	error = kauth_authorize_process(kauth_cred_get(),
    238 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
    239 	if (error != 0) {
    240 		mutex_exit(t->l_proc->p_lock);
    241 		return error;
    242 	}
    243 
    244 	lwp_lock(t);
    245 	lparams.sched_priority = t->l_priority;
    246 	lpolicy = t->l_class;
    247 
    248 	switch (lpolicy) {
    249 	case SCHED_OTHER:
    250 		lparams.sched_priority -= PRI_USER;
    251 		break;
    252 	case SCHED_RR:
    253 	case SCHED_FIFO:
    254 		lparams.sched_priority -= PRI_USER_RT;
    255 		break;
    256 	}
    257 
    258 	if (policy != NULL)
    259 		*policy = lpolicy;
    260 
    261 	if (params != NULL)
    262 		*params = lparams;
    263 
    264 	lwp_unlock(t);
    265 	mutex_exit(t->l_proc->p_lock);
    266 	return error;
    267 }
    268 
    269 /*
    270  * Get scheduling parameters.
    271  */
    272 int
    273 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    274     register_t *retval)
    275 {
    276 	/* {
    277 		syscallarg(pid_t) pid;
    278 		syscallarg(lwpid_t) lid;
    279 		syscallarg(int *) policy;
    280 		syscallarg(struct sched_param *) params;
    281 	} */
    282 	struct sched_param params;
    283 	int error, policy;
    284 
    285 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
    286 	    &params);
    287 	if (error)
    288 		goto out;
    289 
    290 	error = copyout(&params, SCARG(uap, params), sizeof(params));
    291 	if (error == 0 && SCARG(uap, policy) != NULL)
    292 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
    293 
    294  out:
    295 	return (error);
    296 }
    297 
    298 /* Allocate the CPU set, and get it from userspace */
    299 static int
    300 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
    301 {
    302 	int error;
    303 
    304 	*dset = kcpuset_create();
    305 	error = kcpuset_copyin(sset, *dset, size);
    306 	if (error != 0)
    307 		kcpuset_unuse(*dset, NULL);
    308 	return error;
    309 }
    310 
    311 /*
    312  * Set affinity.
    313  */
    314 int
    315 sys__sched_setaffinity(struct lwp *l,
    316     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    317 {
    318 	/* {
    319 		syscallarg(pid_t) pid;
    320 		syscallarg(lwpid_t) lid;
    321 		syscallarg(size_t) size;
    322 		syscallarg(const cpuset_t *) cpuset;
    323 	} */
    324 	kcpuset_t *cpuset, *cpulst = NULL;
    325 	struct cpu_info *ci = NULL;
    326 	struct proc *p;
    327 	struct lwp *t;
    328 	CPU_INFO_ITERATOR cii;
    329 	lwpid_t lid;
    330 	u_int lcnt;
    331 	int error;
    332 
    333 	if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
    334 		return error;
    335 
    336 	/* Look for a CPU in the set */
    337 	for (CPU_INFO_FOREACH(cii, ci)) {
    338 		error = kcpuset_isset(cpu_index(ci), cpuset);
    339 		if (error) {
    340 			if (error == -1) {
    341 				error = E2BIG;
    342 				goto out;
    343 			}
    344 			break;
    345 		}
    346 	}
    347 
    348 	if (ci == NULL) {
    349 		/* Empty set */
    350 		kcpuset_unuse(cpuset, NULL);
    351 		cpuset = NULL;
    352 	}
    353 
    354 	if (SCARG(uap, pid) != 0) {
    355 		/* Find the process */
    356 		mutex_enter(proc_lock);
    357 		p = p_find(SCARG(uap, pid), PFIND_LOCKED);
    358 		if (p == NULL) {
    359 			mutex_exit(proc_lock);
    360 			error = ESRCH;
    361 			goto out;
    362 		}
    363 		mutex_enter(p->p_lock);
    364 		mutex_exit(proc_lock);
    365 		/* Disallow modification of system processes. */
    366 		if ((p->p_flag & PK_SYSTEM) != 0) {
    367 			mutex_exit(p->p_lock);
    368 			error = EPERM;
    369 			goto out;
    370 		}
    371 	} else {
    372 		/* Use the calling process */
    373 		p = l->l_proc;
    374 		mutex_enter(p->p_lock);
    375 	}
    376 
    377 	/*
    378 	 * Check the permission.
    379 	 */
    380 	error = kauth_authorize_process(l->l_cred,
    381 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
    382 	if (error != 0) {
    383 		mutex_exit(p->p_lock);
    384 		goto out;
    385 	}
    386 
    387 #ifdef KERN_SA
    388 	/*
    389 	 * Don't permit changing the affinity of an SA process. The only
    390 	 * thing that would make sense wold be to set the affinity of
    391 	 * a VP and all threads running on it. But we don't support that
    392 	 * now, so just don't permit it.
    393 	 *
    394 	 * Test is here so that caller gets auth errors before SA
    395 	 * errors.
    396 	 */
    397 	if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
    398 		mutex_exit(p->p_lock);
    399 		error = EINVAL;
    400 		goto out;
    401 	}
    402 #endif
    403 
    404 	/* Find the LWP(s) */
    405 	lcnt = 0;
    406 	lid = SCARG(uap, lid);
    407 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    408 		if (lid && lid != t->l_lid)
    409 			continue;
    410 		lwp_lock(t);
    411 		/* It is not allowed to set the affinity for zombie LWPs */
    412 		if (t->l_stat == LSZOMB) {
    413 			lwp_unlock(t);
    414 			continue;
    415 		}
    416 		if (cpuset) {
    417 			/* Set the affinity flag and new CPU set */
    418 			t->l_flag |= LW_AFFINITY;
    419 			kcpuset_use(cpuset);
    420 			if (t->l_affinity != NULL)
    421 				kcpuset_unuse(t->l_affinity, &cpulst);
    422 			t->l_affinity = cpuset;
    423 			/* Migrate to another CPU, unlocks LWP */
    424 			lwp_migrate(t, ci);
    425 		} else {
    426 			/* Unset the affinity flag */
    427 			t->l_flag &= ~LW_AFFINITY;
    428 			if (t->l_affinity != NULL)
    429 				kcpuset_unuse(t->l_affinity, &cpulst);
    430 			t->l_affinity = NULL;
    431 			lwp_unlock(t);
    432 		}
    433 		lcnt++;
    434 	}
    435 	mutex_exit(p->p_lock);
    436 	if (lcnt == 0)
    437 		error = ESRCH;
    438 out:
    439 	if (cpuset != NULL)
    440 		kcpuset_unuse(cpuset, &cpulst);
    441 	kcpuset_destroy(cpulst);
    442 	return error;
    443 }
    444 
    445 /*
    446  * Get affinity.
    447  */
    448 int
    449 sys__sched_getaffinity(struct lwp *l,
    450     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    451 {
    452 	/* {
    453 		syscallarg(pid_t) pid;
    454 		syscallarg(lwpid_t) lid;
    455 		syscallarg(size_t) size;
    456 		syscallarg(cpuset_t *) cpuset;
    457 	} */
    458 	struct lwp *t;
    459 	kcpuset_t *cpuset;
    460 	int error;
    461 
    462 	if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
    463 		return error;
    464 
    465 	/* Locks the LWP */
    466 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    467 	if (t == NULL) {
    468 		error = ESRCH;
    469 		goto out;
    470 	}
    471 	/* Check the permission */
    472 	if (kauth_authorize_process(l->l_cred,
    473 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
    474 		mutex_exit(t->l_proc->p_lock);
    475 		error = EPERM;
    476 		goto out;
    477 	}
    478 	lwp_lock(t);
    479 	if (t->l_flag & LW_AFFINITY) {
    480 		KASSERT(t->l_affinity != NULL);
    481 		kcpuset_copy(cpuset, t->l_affinity);
    482 	} else
    483 		kcpuset_zero(cpuset);
    484 	lwp_unlock(t);
    485 	mutex_exit(t->l_proc->p_lock);
    486 
    487 	error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size));
    488 out:
    489 	kcpuset_unuse(cpuset, NULL);
    490 	return error;
    491 }
    492 
    493 /*
    494  * Yield.
    495  */
    496 int
    497 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    498 {
    499 
    500 	yield();
    501 #ifdef KERN_SA
    502 	if (l->l_flag & LW_SA) {
    503 		sa_preempt(l);
    504 	}
    505 #endif
    506 	return 0;
    507 }
    508 
    509 /*
    510  * Sysctl nodes and initialization.
    511  */
    512 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
    513 {
    514 	const struct sysctlnode *node = NULL;
    515 
    516 	sysctl_createv(clog, 0, NULL, NULL,
    517 		CTLFLAG_PERMANENT,
    518 		CTLTYPE_NODE, "kern", NULL,
    519 		NULL, 0, NULL, 0,
    520 		CTL_KERN, CTL_EOL);
    521 	sysctl_createv(clog, 0, NULL, NULL,
    522 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    523 		CTLTYPE_INT, "posix_sched",
    524 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    525 			     "Process Scheduling option to which the "
    526 			     "system attempts to conform"),
    527 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    528 		CTL_KERN, CTL_CREATE, CTL_EOL);
    529 	sysctl_createv(clog, 0, NULL, &node,
    530 		CTLFLAG_PERMANENT,
    531 		CTLTYPE_NODE, "sched",
    532 		SYSCTL_DESCR("Scheduler options"),
    533 		NULL, 0, NULL, 0,
    534 		CTL_KERN, CTL_CREATE, CTL_EOL);
    535 
    536 	if (node == NULL)
    537 		return;
    538 
    539 	sysctl_createv(clog, 0, &node, NULL,
    540 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    541 		CTLTYPE_INT, "pri_min",
    542 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    543 		NULL, SCHED_PRI_MIN, NULL, 0,
    544 		CTL_CREATE, CTL_EOL);
    545 	sysctl_createv(clog, 0, &node, NULL,
    546 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    547 		CTLTYPE_INT, "pri_max",
    548 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
    549 		NULL, SCHED_PRI_MAX, NULL, 0,
    550 		CTL_CREATE, CTL_EOL);
    551 }
    552