Home | History | Annotate | Line # | Download | only in kern
sys_sched.c revision 1.24
      1 /*	$NetBSD: sys_sched.c,v 1.24 2008/06/15 23:29:09 rmind Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * System calls relating to the scheduler.
     31  *
     32  * TODO:
     33  *  - Handle pthread_setschedprio() as defined by POSIX;
     34  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.24 2008/06/15 23:29:09 rmind Exp $");
     39 
     40 #include <sys/param.h>
     41 
     42 #include <sys/cpu.h>
     43 #include <sys/kauth.h>
     44 #include <sys/kmem.h>
     45 #include <sys/lwp.h>
     46 #include <sys/mutex.h>
     47 #include <sys/proc.h>
     48 #include <sys/pset.h>
     49 #include <sys/sched.h>
     50 #include <sys/syscallargs.h>
     51 #include <sys/sysctl.h>
     52 #include <sys/systm.h>
     53 #include <sys/types.h>
     54 #include <sys/unistd.h>
     55 
     56 /*
     57  * Convert user priority or the in-kernel priority or convert the current
     58  * priority to the appropriate range according to the policy change.
     59  */
     60 static pri_t
     61 convert_pri(lwp_t *l, int policy, pri_t pri)
     62 {
     63 	int delta = 0;
     64 
     65 	switch (policy) {
     66 	case SCHED_OTHER:
     67 		delta = PRI_USER;
     68 		break;
     69 	case SCHED_FIFO:
     70 	case SCHED_RR:
     71 		delta = PRI_USER_RT;
     72 		break;
     73 	default:
     74 		panic("upri_to_kpri");
     75 	}
     76 
     77 	if (pri != PRI_NONE) {
     78 		/* Convert user priority to the in-kernel */
     79 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
     80 		return pri + delta;
     81 	}
     82 	if (l->l_class == policy)
     83 		return l->l_priority;
     84 
     85 	/* Change the current priority to the appropriate range */
     86 	if (l->l_class == SCHED_OTHER) {
     87 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
     88 		return delta;
     89 	}
     90 	if (policy == SCHED_OTHER) {
     91 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
     92 		return l->l_priority - delta;
     93 	}
     94 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
     95 	return l->l_class;
     96 }
     97 
     98 int
     99 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
    100     const struct sched_param *params)
    101 {
    102 	struct proc *p;
    103 	struct lwp *t;
    104 	pri_t pri;
    105 	u_int lcnt;
    106 	int error;
    107 
    108 	error = 0;
    109 
    110 	pri = params->sched_priority;
    111 
    112 	/* If no parameters specified, just return (this should not happen) */
    113 	if (pri == PRI_NONE && policy == SCHED_NONE)
    114 		return 0;
    115 
    116 	/* Validate scheduling class */
    117 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
    118 		return EINVAL;
    119 
    120 	/* Validate priority */
    121 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
    122 		return EINVAL;
    123 
    124 	if (pid != 0) {
    125 		/* Find the process */
    126 		mutex_enter(proc_lock);
    127 		p = p_find(pid, PFIND_LOCKED);
    128 		if (p == NULL) {
    129 			mutex_exit(proc_lock);
    130 			return ESRCH;
    131 		}
    132 		mutex_enter(p->p_lock);
    133 		mutex_exit(proc_lock);
    134 		/* Disallow modification of system processes */
    135 		if ((p->p_flag & PK_SYSTEM) != 0) {
    136 			mutex_exit(p->p_lock);
    137 			return EPERM;
    138 		}
    139 	} else {
    140 		/* Use the calling process */
    141 		p = curlwp->l_proc;
    142 		mutex_enter(p->p_lock);
    143 	}
    144 
    145 	/* Find the LWP(s) */
    146 	lcnt = 0;
    147 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    148 		pri_t kpri;
    149 		int lpolicy;
    150 
    151 		if (lid && lid != t->l_lid)
    152 			continue;
    153 		lcnt++;
    154 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
    155 		lwp_lock(t);
    156 
    157 		if (policy == SCHED_NONE)
    158 			lpolicy = t->l_class;
    159 		else
    160 			lpolicy = policy;
    161 
    162 		/*
    163 		 * Note that, priority may need to be changed to get into
    164 		 * the correct priority range of the new scheduling class.
    165 		 */
    166 		kpri = convert_pri(t, lpolicy, pri);
    167 
    168 		/* Check the permission */
    169 		error = kauth_authorize_process(kauth_cred_get(),
    170 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
    171 		    KAUTH_ARG(kpri));
    172 		if (error) {
    173 			lwp_unlock(t);
    174 			break;
    175 		}
    176 
    177 		/* Set the scheduling class */
    178 		if (policy != SCHED_NONE)
    179 			t->l_class = policy;
    180 
    181 		/* Change the priority */
    182 		if (t->l_priority != kpri)
    183 			lwp_changepri(t, kpri);
    184 
    185 		lwp_unlock(t);
    186 	}
    187 	mutex_exit(p->p_lock);
    188 	return (lcnt == 0) ? ESRCH : error;
    189 }
    190 
    191 /*
    192  * Set scheduling parameters.
    193  */
    194 int
    195 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    196     register_t *retval)
    197 {
    198 	/* {
    199 		syscallarg(pid_t) pid;
    200 		syscallarg(lwpid_t) lid;
    201 		syscallarg(int) policy;
    202 		syscallarg(const struct sched_param *) params;
    203 	} */
    204 	struct sched_param params;
    205 	int error;
    206 
    207 	/* Get the parameters from the user-space */
    208 	error = copyin(SCARG(uap, params), &params, sizeof(params));
    209 	if (error)
    210 		goto out;
    211 
    212 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
    213 	    SCARG(uap, policy), &params);
    214 
    215  out:
    216 	return (error);
    217 }
    218 
    219 int
    220 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
    221     struct sched_param *params)
    222 {
    223 	struct sched_param lparams;
    224 	struct lwp *t;
    225 	int error, lpolicy;
    226 
    227 	/* Locks the LWP */
    228 	t = lwp_find2(pid, lid);
    229 	if (t == NULL)
    230 		return ESRCH;
    231 
    232 	/* Check the permission */
    233 	error = kauth_authorize_process(kauth_cred_get(),
    234 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
    235 	if (error != 0) {
    236 		mutex_exit(t->l_proc->p_lock);
    237 		return error;
    238 	}
    239 
    240 	lwp_lock(t);
    241 	lparams.sched_priority = t->l_priority;
    242 	lpolicy = t->l_class;
    243 
    244 	switch (lpolicy) {
    245 	case SCHED_OTHER:
    246 		lparams.sched_priority -= PRI_USER;
    247 		break;
    248 	case SCHED_RR:
    249 	case SCHED_FIFO:
    250 		lparams.sched_priority -= PRI_USER_RT;
    251 		break;
    252 	}
    253 
    254 	if (policy != NULL)
    255 		*policy = lpolicy;
    256 
    257 	if (params != NULL)
    258 		*params = lparams;
    259 
    260 	lwp_unlock(t);
    261 	mutex_exit(t->l_proc->p_lock);
    262 	return error;
    263 }
    264 
    265 /*
    266  * Get scheduling parameters.
    267  */
    268 int
    269 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    270     register_t *retval)
    271 {
    272 	/* {
    273 		syscallarg(pid_t) pid;
    274 		syscallarg(lwpid_t) lid;
    275 		syscallarg(int *) policy;
    276 		syscallarg(struct sched_param *) params;
    277 	} */
    278 	struct sched_param params;
    279 	int error, policy;
    280 
    281 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
    282 	    &params);
    283 	if (error)
    284 		goto out;
    285 
    286 	error = copyout(&params, SCARG(uap, params), sizeof(params));
    287 	if (error == 0 && SCARG(uap, policy) != NULL)
    288 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
    289 
    290  out:
    291 	return (error);
    292 }
    293 
    294 /* Allocate the CPU set, and get it from userspace */
    295 static int
    296 gencpuset(cpuset_t **dset, const cpuset_t *sset, size_t size)
    297 {
    298 	int error;
    299 
    300 	*dset = _cpuset_create();
    301 	if (size != _cpuset_size(*dset)) {
    302 		error = EINVAL;
    303 		goto out;
    304 	}
    305 
    306 	error = copyin(sset, *dset, size);
    307 	if (error)
    308 		goto out;
    309 
    310 	if (_cpuset_nused(*dset) != 1) {
    311 		error = EINVAL;
    312 		goto out;
    313 	}
    314 
    315 	return 0;
    316 out:
    317 	_cpuset_unuse(*dset, NULL);
    318 	return error;
    319 }
    320 
    321 /*
    322  * Set affinity.
    323  */
    324 int
    325 sys__sched_setaffinity(struct lwp *l,
    326     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    327 {
    328 	/* {
    329 		syscallarg(pid_t) pid;
    330 		syscallarg(lwpid_t) lid;
    331 		syscallarg(size_t) size;
    332 		syscallarg(const cpuset_t *) cpuset;
    333 	} */
    334 	cpuset_t *cpuset, *cpulst = NULL;
    335 	struct cpu_info *ci = NULL;
    336 	struct proc *p;
    337 	struct lwp *t;
    338 	CPU_INFO_ITERATOR cii;
    339 	lwpid_t lid;
    340 	u_int lcnt;
    341 	int error;
    342 
    343 	if ((error = gencpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
    344 		return error;
    345 
    346 	/* Look for a CPU in the set */
    347 	for (CPU_INFO_FOREACH(cii, ci)) {
    348 		error = CPU_ISSET(cpu_index(ci), cpuset);
    349 		if (error) {
    350 			if (error == -1) {
    351 				error = E2BIG;
    352 				goto out;
    353 			}
    354 			break;
    355 		}
    356 	}
    357 
    358 	if (ci == NULL) {
    359 		/* Empty set */
    360 		_cpuset_unuse(cpuset, NULL);
    361 		cpuset = NULL;
    362 	}
    363 
    364 	if (SCARG(uap, pid) != 0) {
    365 		/* Find the process */
    366 		mutex_enter(proc_lock);
    367 		p = p_find(SCARG(uap, pid), PFIND_LOCKED);
    368 		if (p == NULL) {
    369 			mutex_exit(proc_lock);
    370 			error = ESRCH;
    371 			goto out;
    372 		}
    373 		mutex_enter(p->p_lock);
    374 		mutex_exit(proc_lock);
    375 		/* Disallow modification of system processes. */
    376 		if ((p->p_flag & PK_SYSTEM) != 0) {
    377 			mutex_exit(p->p_lock);
    378 			error = EPERM;
    379 			goto out;
    380 		}
    381 	} else {
    382 		/* Use the calling process */
    383 		p = l->l_proc;
    384 		mutex_enter(p->p_lock);
    385 	}
    386 
    387 	/*
    388 	 * Check the permission.
    389 	 */
    390 	error = kauth_authorize_process(l->l_cred,
    391 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
    392 	if (error != 0) {
    393 		mutex_exit(p->p_lock);
    394 		goto out;
    395 	}
    396 
    397 	/* Find the LWP(s) */
    398 	lcnt = 0;
    399 	lid = SCARG(uap, lid);
    400 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    401 		if (lid && lid != t->l_lid)
    402 			continue;
    403 		lwp_lock(t);
    404 		if (cpuset) {
    405 			/* Set the affinity flag and new CPU set */
    406 			t->l_flag |= LW_AFFINITY;
    407 			_cpuset_use(cpuset);
    408 			if (t->l_affinity != NULL)
    409 				_cpuset_unuse(t->l_affinity, &cpulst);
    410 			t->l_affinity = cpuset;
    411 			/* Migrate to another CPU, unlocks LWP */
    412 			lwp_migrate(t, ci);
    413 		} else {
    414 			/* Unset the affinity flag */
    415 			t->l_flag &= ~LW_AFFINITY;
    416 			if (t->l_affinity != NULL)
    417 				_cpuset_unuse(t->l_affinity, &cpulst);
    418 			t->l_affinity = NULL;
    419 			lwp_unlock(t);
    420 		}
    421 		lcnt++;
    422 	}
    423 	mutex_exit(p->p_lock);
    424 	if (lcnt == 0)
    425 		error = ESRCH;
    426 out:
    427 	if (cpuset != NULL)
    428 		_cpuset_unuse(cpuset, &cpulst);
    429 	_cpuset_destroy(cpulst);
    430 	return error;
    431 }
    432 
    433 /*
    434  * Get affinity.
    435  */
    436 int
    437 sys__sched_getaffinity(struct lwp *l,
    438     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    439 {
    440 	/* {
    441 		syscallarg(pid_t) pid;
    442 		syscallarg(lwpid_t) lid;
    443 		syscallarg(size_t) size;
    444 		syscallarg(cpuset_t *) cpuset;
    445 	} */
    446 	struct lwp *t;
    447 	cpuset_t *cpuset;
    448 	int error;
    449 
    450 	if ((error = gencpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
    451 		return error;
    452 
    453 	/* Locks the LWP */
    454 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    455 	if (t == NULL) {
    456 		error = ESRCH;
    457 		goto out;
    458 	}
    459 	/* Check the permission */
    460 	if (kauth_authorize_process(l->l_cred,
    461 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
    462 		mutex_exit(t->l_proc->p_lock);
    463 		error = EPERM;
    464 		goto out;
    465 	}
    466 	lwp_lock(t);
    467 	if (t->l_flag & LW_AFFINITY) {
    468 		KASSERT(t->l_affinity != NULL);
    469 		_cpuset_copy(cpuset, t->l_affinity);
    470 	} else
    471 		_cpuset_zero(cpuset);
    472 	lwp_unlock(t);
    473 	mutex_exit(t->l_proc->p_lock);
    474 
    475 	error = copyout(cpuset, SCARG(uap, cpuset), _cpuset_size(cpuset));
    476 out:
    477 	_cpuset_unuse(cpuset, NULL);
    478 	return error;
    479 }
    480 
    481 /*
    482  * Yield.
    483  */
    484 int
    485 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    486 {
    487 
    488 	yield();
    489 	return 0;
    490 }
    491 
    492 /*
    493  * Sysctl nodes and initialization.
    494  */
    495 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
    496 {
    497 	const struct sysctlnode *node = NULL;
    498 
    499 	sysctl_createv(clog, 0, NULL, NULL,
    500 		CTLFLAG_PERMANENT,
    501 		CTLTYPE_NODE, "kern", NULL,
    502 		NULL, 0, NULL, 0,
    503 		CTL_KERN, CTL_EOL);
    504 	sysctl_createv(clog, 0, NULL, NULL,
    505 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    506 		CTLTYPE_INT, "posix_sched",
    507 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    508 			     "Process Scheduling option to which the "
    509 			     "system attempts to conform"),
    510 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    511 		CTL_KERN, CTL_CREATE, CTL_EOL);
    512 	sysctl_createv(clog, 0, NULL, &node,
    513 		CTLFLAG_PERMANENT,
    514 		CTLTYPE_NODE, "sched",
    515 		SYSCTL_DESCR("Scheduler options"),
    516 		NULL, 0, NULL, 0,
    517 		CTL_KERN, CTL_CREATE, CTL_EOL);
    518 
    519 	if (node == NULL)
    520 		return;
    521 
    522 	sysctl_createv(clog, 0, &node, NULL,
    523 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    524 		CTLTYPE_INT, "pri_min",
    525 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    526 		NULL, SCHED_PRI_MIN, NULL, 0,
    527 		CTL_CREATE, CTL_EOL);
    528 	sysctl_createv(clog, 0, &node, NULL,
    529 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    530 		CTLTYPE_INT, "pri_max",
    531 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
    532 		NULL, SCHED_PRI_MAX, NULL, 0,
    533 		CTL_CREATE, CTL_EOL);
    534 }
    535