Home | History | Annotate | Line # | Download | only in kern
sys_sched.c revision 1.30
      1 /*	$NetBSD: sys_sched.c,v 1.30 2008/10/18 19:24:04 rmind Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * System calls relating to the scheduler.
     31  *
     32  * TODO:
     33  *  - Handle pthread_setschedprio() as defined by POSIX;
     34  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.30 2008/10/18 19:24:04 rmind Exp $");
     39 
     40 #include <sys/param.h>
     41 
     42 #include <sys/cpu.h>
     43 #include <sys/kauth.h>
     44 #include <sys/kmem.h>
     45 #include <sys/lwp.h>
     46 #include <sys/mutex.h>
     47 #include <sys/proc.h>
     48 #include <sys/pset.h>
     49 #include <sys/sa.h>
     50 #include <sys/savar.h>
     51 #include <sys/sched.h>
     52 #include <sys/syscallargs.h>
     53 #include <sys/sysctl.h>
     54 #include <sys/systm.h>
     55 #include <sys/types.h>
     56 #include <sys/unistd.h>
     57 
     58 #include "opt_sa.h"
     59 
     60 /*
     61  * Convert user priority or the in-kernel priority or convert the current
     62  * priority to the appropriate range according to the policy change.
     63  */
     64 static pri_t
     65 convert_pri(lwp_t *l, int policy, pri_t pri)
     66 {
     67 
     68 	/* Convert user priority to the in-kernel */
     69 	if (pri != PRI_NONE) {
     70 		/* Only for real-time threads */
     71 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
     72 		KASSERT(policy != SCHED_OTHER);
     73 		return PRI_USER_RT + pri;
     74 	}
     75 
     76 	/* Neither policy, nor priority change */
     77 	if (l->l_class == policy)
     78 		return l->l_priority;
     79 
     80 	/* Time-sharing -> real-time */
     81 	if (l->l_class == SCHED_OTHER) {
     82 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
     83 		return PRI_USER_RT;
     84 	}
     85 
     86 	/* Real-time -> time-sharing */
     87 	if (policy == SCHED_OTHER) {
     88 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
     89 		return l->l_priority - PRI_USER_RT;
     90 	}
     91 
     92 	/* Real-time -> real-time */
     93 	return l->l_priority;
     94 }
     95 
     96 int
     97 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
     98     const struct sched_param *params)
     99 {
    100 	struct proc *p;
    101 	struct lwp *t;
    102 	pri_t pri;
    103 	u_int lcnt;
    104 	int error;
    105 
    106 	error = 0;
    107 
    108 	pri = params->sched_priority;
    109 
    110 	/* If no parameters specified, just return (this should not happen) */
    111 	if (pri == PRI_NONE && policy == SCHED_NONE)
    112 		return 0;
    113 
    114 	/* Validate scheduling class */
    115 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
    116 		return EINVAL;
    117 
    118 	/* Validate priority */
    119 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
    120 		return EINVAL;
    121 
    122 	if (pid != 0) {
    123 		/* Find the process */
    124 		mutex_enter(proc_lock);
    125 		p = p_find(pid, PFIND_LOCKED);
    126 		if (p == NULL) {
    127 			mutex_exit(proc_lock);
    128 			return ESRCH;
    129 		}
    130 		mutex_enter(p->p_lock);
    131 		mutex_exit(proc_lock);
    132 		/* Disallow modification of system processes */
    133 		if ((p->p_flag & PK_SYSTEM) != 0) {
    134 			mutex_exit(p->p_lock);
    135 			return EPERM;
    136 		}
    137 	} else {
    138 		/* Use the calling process */
    139 		p = curlwp->l_proc;
    140 		mutex_enter(p->p_lock);
    141 	}
    142 
    143 	/* Find the LWP(s) */
    144 	lcnt = 0;
    145 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    146 		pri_t kpri;
    147 		int lpolicy;
    148 
    149 		if (lid && lid != t->l_lid)
    150 			continue;
    151 
    152 		lcnt++;
    153 		lwp_lock(t);
    154 		lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
    155 
    156 		/* Disallow setting of priority for SCHED_OTHER threads */
    157 		if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
    158 			lwp_unlock(t);
    159 			error = EINVAL;
    160 			break;
    161 		}
    162 
    163 		/* Convert priority, if needed */
    164 		kpri = convert_pri(t, lpolicy, pri);
    165 
    166 		/* Check the permission */
    167 		error = kauth_authorize_process(kauth_cred_get(),
    168 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
    169 		    KAUTH_ARG(kpri));
    170 		if (error) {
    171 			lwp_unlock(t);
    172 			break;
    173 		}
    174 
    175 		/* Set the scheduling class, change the priority */
    176 		t->l_class = lpolicy;
    177 		lwp_changepri(t, kpri);
    178 		lwp_unlock(t);
    179 	}
    180 	mutex_exit(p->p_lock);
    181 	return (lcnt == 0) ? ESRCH : error;
    182 }
    183 
    184 /*
    185  * Set scheduling parameters.
    186  */
    187 int
    188 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    189     register_t *retval)
    190 {
    191 	/* {
    192 		syscallarg(pid_t) pid;
    193 		syscallarg(lwpid_t) lid;
    194 		syscallarg(int) policy;
    195 		syscallarg(const struct sched_param *) params;
    196 	} */
    197 	struct sched_param params;
    198 	int error;
    199 
    200 	/* Get the parameters from the user-space */
    201 	error = copyin(SCARG(uap, params), &params, sizeof(params));
    202 	if (error)
    203 		goto out;
    204 
    205 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
    206 	    SCARG(uap, policy), &params);
    207 
    208  out:
    209 	return (error);
    210 }
    211 
    212 int
    213 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
    214     struct sched_param *params)
    215 {
    216 	struct sched_param lparams;
    217 	struct lwp *t;
    218 	int error, lpolicy;
    219 
    220 	/* Locks the LWP */
    221 	t = lwp_find2(pid, lid);
    222 	if (t == NULL)
    223 		return ESRCH;
    224 
    225 	/* Check the permission */
    226 	error = kauth_authorize_process(kauth_cred_get(),
    227 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
    228 	if (error != 0) {
    229 		mutex_exit(t->l_proc->p_lock);
    230 		return error;
    231 	}
    232 
    233 	lwp_lock(t);
    234 	lparams.sched_priority = t->l_priority;
    235 	lpolicy = t->l_class;
    236 
    237 	switch (lpolicy) {
    238 	case SCHED_OTHER:
    239 		lparams.sched_priority -= PRI_USER;
    240 		break;
    241 	case SCHED_RR:
    242 	case SCHED_FIFO:
    243 		lparams.sched_priority -= PRI_USER_RT;
    244 		break;
    245 	}
    246 
    247 	if (policy != NULL)
    248 		*policy = lpolicy;
    249 
    250 	if (params != NULL)
    251 		*params = lparams;
    252 
    253 	lwp_unlock(t);
    254 	mutex_exit(t->l_proc->p_lock);
    255 	return error;
    256 }
    257 
    258 /*
    259  * Get scheduling parameters.
    260  */
    261 int
    262 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    263     register_t *retval)
    264 {
    265 	/* {
    266 		syscallarg(pid_t) pid;
    267 		syscallarg(lwpid_t) lid;
    268 		syscallarg(int *) policy;
    269 		syscallarg(struct sched_param *) params;
    270 	} */
    271 	struct sched_param params;
    272 	int error, policy;
    273 
    274 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
    275 	    &params);
    276 	if (error)
    277 		goto out;
    278 
    279 	error = copyout(&params, SCARG(uap, params), sizeof(params));
    280 	if (error == 0 && SCARG(uap, policy) != NULL)
    281 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
    282 
    283  out:
    284 	return (error);
    285 }
    286 
    287 /* Allocate the CPU set, and get it from userspace */
    288 static int
    289 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
    290 {
    291 	int error;
    292 
    293 	*dset = kcpuset_create();
    294 	error = kcpuset_copyin(sset, *dset, size);
    295 	if (error != 0)
    296 		kcpuset_unuse(*dset, NULL);
    297 	return error;
    298 }
    299 
    300 /*
    301  * Set affinity.
    302  */
    303 int
    304 sys__sched_setaffinity(struct lwp *l,
    305     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    306 {
    307 	/* {
    308 		syscallarg(pid_t) pid;
    309 		syscallarg(lwpid_t) lid;
    310 		syscallarg(size_t) size;
    311 		syscallarg(const cpuset_t *) cpuset;
    312 	} */
    313 	kcpuset_t *cpuset, *cpulst = NULL;
    314 	struct cpu_info *ci = NULL;
    315 	struct proc *p;
    316 	struct lwp *t;
    317 	CPU_INFO_ITERATOR cii;
    318 	lwpid_t lid;
    319 	u_int lcnt;
    320 	int error;
    321 
    322 	if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
    323 		return error;
    324 
    325 	/* Look for a CPU in the set */
    326 	for (CPU_INFO_FOREACH(cii, ci)) {
    327 		error = kcpuset_isset(cpu_index(ci), cpuset);
    328 		if (error) {
    329 			if (error == -1) {
    330 				error = E2BIG;
    331 				goto out;
    332 			}
    333 			break;
    334 		}
    335 	}
    336 	if (ci == NULL) {
    337 		/* Empty set */
    338 		kcpuset_unuse(cpuset, NULL);
    339 		cpuset = NULL;
    340 	}
    341 
    342 	if (SCARG(uap, pid) != 0) {
    343 		/* Find the process */
    344 		mutex_enter(proc_lock);
    345 		p = p_find(SCARG(uap, pid), PFIND_LOCKED);
    346 		if (p == NULL) {
    347 			mutex_exit(proc_lock);
    348 			error = ESRCH;
    349 			goto out;
    350 		}
    351 		mutex_enter(p->p_lock);
    352 		mutex_exit(proc_lock);
    353 		/* Disallow modification of system processes. */
    354 		if ((p->p_flag & PK_SYSTEM) != 0) {
    355 			mutex_exit(p->p_lock);
    356 			error = EPERM;
    357 			goto out;
    358 		}
    359 	} else {
    360 		/* Use the calling process */
    361 		p = l->l_proc;
    362 		mutex_enter(p->p_lock);
    363 	}
    364 
    365 	/*
    366 	 * Check the permission.
    367 	 */
    368 	error = kauth_authorize_process(l->l_cred,
    369 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
    370 	if (error != 0) {
    371 		mutex_exit(p->p_lock);
    372 		goto out;
    373 	}
    374 
    375 #ifdef KERN_SA
    376 	/*
    377 	 * Don't permit changing the affinity of an SA process. The only
    378 	 * thing that would make sense wold be to set the affinity of
    379 	 * a VP and all threads running on it. But we don't support that
    380 	 * now, so just don't permit it.
    381 	 *
    382 	 * Test is here so that caller gets auth errors before SA
    383 	 * errors.
    384 	 */
    385 	if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
    386 		mutex_exit(p->p_lock);
    387 		error = EINVAL;
    388 		goto out;
    389 	}
    390 #endif
    391 
    392 	/* Find the LWP(s) */
    393 	lcnt = 0;
    394 	lid = SCARG(uap, lid);
    395 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    396 		if (lid && lid != t->l_lid)
    397 			continue;
    398 		lwp_lock(t);
    399 		/* It is not allowed to set the affinity for zombie LWPs */
    400 		if (t->l_stat == LSZOMB) {
    401 			lwp_unlock(t);
    402 			continue;
    403 		}
    404 		if (cpuset) {
    405 			/* Set the affinity flag and new CPU set */
    406 			t->l_flag |= LW_AFFINITY;
    407 			kcpuset_use(cpuset);
    408 			if (t->l_affinity != NULL)
    409 				kcpuset_unuse(t->l_affinity, &cpulst);
    410 			t->l_affinity = cpuset;
    411 			/* Migrate to another CPU, unlocks LWP */
    412 			lwp_migrate(t, ci);
    413 		} else {
    414 			/* Unset the affinity flag */
    415 			t->l_flag &= ~LW_AFFINITY;
    416 			if (t->l_affinity != NULL)
    417 				kcpuset_unuse(t->l_affinity, &cpulst);
    418 			t->l_affinity = NULL;
    419 			lwp_unlock(t);
    420 		}
    421 		lcnt++;
    422 	}
    423 	mutex_exit(p->p_lock);
    424 	if (lcnt == 0)
    425 		error = ESRCH;
    426 out:
    427 	if (cpuset != NULL)
    428 		kcpuset_unuse(cpuset, &cpulst);
    429 	kcpuset_destroy(cpulst);
    430 	return error;
    431 }
    432 
    433 /*
    434  * Get affinity.
    435  */
    436 int
    437 sys__sched_getaffinity(struct lwp *l,
    438     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    439 {
    440 	/* {
    441 		syscallarg(pid_t) pid;
    442 		syscallarg(lwpid_t) lid;
    443 		syscallarg(size_t) size;
    444 		syscallarg(cpuset_t *) cpuset;
    445 	} */
    446 	struct lwp *t;
    447 	kcpuset_t *cpuset;
    448 	int error;
    449 
    450 	if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
    451 		return error;
    452 
    453 	/* Locks the LWP */
    454 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    455 	if (t == NULL) {
    456 		error = ESRCH;
    457 		goto out;
    458 	}
    459 	/* Check the permission */
    460 	if (kauth_authorize_process(l->l_cred,
    461 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
    462 		mutex_exit(t->l_proc->p_lock);
    463 		error = EPERM;
    464 		goto out;
    465 	}
    466 	lwp_lock(t);
    467 	if (t->l_flag & LW_AFFINITY) {
    468 		KASSERT(t->l_affinity != NULL);
    469 		kcpuset_copy(cpuset, t->l_affinity);
    470 	} else
    471 		kcpuset_zero(cpuset);
    472 	lwp_unlock(t);
    473 	mutex_exit(t->l_proc->p_lock);
    474 
    475 	error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size));
    476 out:
    477 	kcpuset_unuse(cpuset, NULL);
    478 	return error;
    479 }
    480 
    481 /*
    482  * Yield.
    483  */
    484 int
    485 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    486 {
    487 
    488 	yield();
    489 #ifdef KERN_SA
    490 	if (l->l_flag & LW_SA) {
    491 		sa_preempt(l);
    492 	}
    493 #endif
    494 	return 0;
    495 }
    496 
    497 /*
    498  * Sysctl nodes and initialization.
    499  */
    500 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
    501 {
    502 	const struct sysctlnode *node = NULL;
    503 
    504 	sysctl_createv(clog, 0, NULL, NULL,
    505 		CTLFLAG_PERMANENT,
    506 		CTLTYPE_NODE, "kern", NULL,
    507 		NULL, 0, NULL, 0,
    508 		CTL_KERN, CTL_EOL);
    509 	sysctl_createv(clog, 0, NULL, NULL,
    510 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    511 		CTLTYPE_INT, "posix_sched",
    512 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    513 			     "Process Scheduling option to which the "
    514 			     "system attempts to conform"),
    515 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    516 		CTL_KERN, CTL_CREATE, CTL_EOL);
    517 	sysctl_createv(clog, 0, NULL, &node,
    518 		CTLFLAG_PERMANENT,
    519 		CTLTYPE_NODE, "sched",
    520 		SYSCTL_DESCR("Scheduler options"),
    521 		NULL, 0, NULL, 0,
    522 		CTL_KERN, CTL_CREATE, CTL_EOL);
    523 
    524 	if (node == NULL)
    525 		return;
    526 
    527 	sysctl_createv(clog, 0, &node, NULL,
    528 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    529 		CTLTYPE_INT, "pri_min",
    530 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    531 		NULL, SCHED_PRI_MIN, NULL, 0,
    532 		CTL_CREATE, CTL_EOL);
    533 	sysctl_createv(clog, 0, &node, NULL,
    534 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    535 		CTLTYPE_INT, "pri_max",
    536 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
    537 		NULL, SCHED_PRI_MAX, NULL, 0,
    538 		CTL_CREATE, CTL_EOL);
    539 }
    540