Home | History | Annotate | Line # | Download | only in kern
sys_sched.c revision 1.17
      1 /*	$NetBSD: sys_sched.c,v 1.17 2008/02/22 23:10:12 ad Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * System calls relating to the scheduler.
     31  *
     32  * TODO:
     33  *  - Handle pthread_setschedprio() as defined by POSIX;
     34  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.17 2008/02/22 23:10:12 ad Exp $");
     39 
     40 #include <sys/param.h>
     41 
     42 #include <sys/cpu.h>
     43 #include <sys/kauth.h>
     44 #include <sys/kmem.h>
     45 #include <sys/lwp.h>
     46 #include <sys/mutex.h>
     47 #include <sys/proc.h>
     48 #include <sys/pset.h>
     49 #include <sys/sched.h>
     50 #include <sys/syscallargs.h>
     51 #include <sys/sysctl.h>
     52 #include <sys/systm.h>
     53 #include <sys/types.h>
     54 #include <sys/unistd.h>
     55 
     56 /*
     57  * Convert user priority or the in-kernel priority or convert the current
     58  * priority to the appropriate range according to the policy change.
     59  */
     60 static pri_t
     61 convert_pri(lwp_t *l, int policy, pri_t pri)
     62 {
     63 	int delta = 0;
     64 
     65 	switch (policy) {
     66 	case SCHED_OTHER:
     67 		delta = PRI_USER;
     68 		break;
     69 	case SCHED_FIFO:
     70 	case SCHED_RR:
     71 		delta = PRI_USER_RT;
     72 		break;
     73 	default:
     74 		panic("upri_to_kpri");
     75 	}
     76 
     77 	if (pri != PRI_NONE) {
     78 		/* Convert user priority to the in-kernel */
     79 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
     80 		return pri + delta;
     81 	}
     82 	if (l->l_class == policy)
     83 		return l->l_priority;
     84 
     85 	/* Change the current priority to the appropriate range */
     86 	if (l->l_class == SCHED_OTHER) {
     87 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
     88 		return l->l_priority + delta;
     89 	}
     90 	if (policy == SCHED_OTHER) {
     91 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
     92 		return l->l_priority - delta;
     93 	}
     94 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
     95 	return l->l_class;
     96 }
     97 
     98 /*
     99  * Set scheduling parameters.
    100  */
    101 int
    102 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    103     register_t *retval)
    104 {
    105 	/* {
    106 		syscallarg(pid_t) pid;
    107 		syscallarg(lwpid_t) lid;
    108 		syscallarg(int) policy;
    109 		syscallarg(const struct sched_param *) params;
    110 	} */
    111 	struct sched_param param;
    112 	struct proc *p;
    113 	struct lwp *t;
    114 	lwpid_t lid;
    115 	u_int lcnt;
    116 	int policy;
    117 	pri_t pri;
    118 	int error;
    119 
    120 	/* Get the parameters from the user-space */
    121 	error = copyin(SCARG(uap, params), &param, sizeof(param));
    122 	if (error) {
    123 		return error;
    124 	}
    125 	pri = param.sched_priority;
    126 	policy = SCARG(uap, policy);
    127 
    128 	/* If no parameters specified, just return (this should not happen) */
    129 	if (pri == PRI_NONE && policy == SCHED_NONE)
    130 		return 0;
    131 
    132 	/* Validate scheduling class */
    133 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
    134 		return EINVAL;
    135 
    136 	/* Validate priority */
    137 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
    138 		return EINVAL;
    139 
    140 	if (SCARG(uap, pid) != 0) {
    141 		/* Find the process */
    142 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
    143 		if (p == NULL)
    144 			return ESRCH;
    145 		mutex_enter(&p->p_smutex);
    146 		mutex_exit(&proclist_lock);
    147 		/* Disallow modification of system processes */
    148 		if ((p->p_flag & PK_SYSTEM) != 0) {
    149 			mutex_exit(&p->p_smutex);
    150 			return EPERM;
    151 		}
    152 	} else {
    153 		/* Use the calling process */
    154 		p = l->l_proc;
    155 		mutex_enter(&p->p_smutex);
    156 	}
    157 
    158 	/* Find the LWP(s) */
    159 	lcnt = 0;
    160 	lid = SCARG(uap, lid);
    161 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    162 		pri_t kpri;
    163 		int lpolicy;
    164 
    165 		if (lid && lid != t->l_lid)
    166 			continue;
    167 		lcnt++;
    168 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
    169 		lwp_lock(t);
    170 
    171 		if (policy == SCHED_NONE)
    172 			lpolicy = t->l_class;
    173 		else
    174 			lpolicy = policy;
    175 
    176 		/*
    177 		 * Note that, priority may need to be changed to get into
    178 		 * the correct priority range of the new scheduling class.
    179 		 */
    180 		kpri = convert_pri(t, lpolicy, pri);
    181 
    182 		/* Check the permission */
    183 		error = kauth_authorize_process(l->l_cred,
    184 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
    185 		    KAUTH_ARG(kpri));
    186 		if (error) {
    187 			lwp_unlock(t);
    188 			break;
    189 		}
    190 
    191 		/* Set the scheduling class */
    192 		if (policy != SCHED_NONE)
    193 			t->l_class = policy;
    194 
    195 		/* Change the priority */
    196 		if (t->l_priority != kpri)
    197 			lwp_changepri(t, kpri);
    198 
    199 		lwp_unlock(t);
    200 	}
    201 	mutex_exit(&p->p_smutex);
    202 	return (lcnt == 0) ? ESRCH : error;
    203 }
    204 
    205 /*
    206  * Get scheduling parameters.
    207  */
    208 int
    209 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    210     register_t *retval)
    211 {
    212 	/* {
    213 		syscallarg(pid_t) pid;
    214 		syscallarg(lwpid_t) lid;
    215 		syscallarg(int *) policy;
    216 		syscallarg(struct sched_param *) params;
    217 	} */
    218 	struct sched_param param;
    219 	struct lwp *t;
    220 	int error, policy;
    221 
    222 	/* Locks the LWP */
    223 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    224 	if (t == NULL)
    225 		return ESRCH;
    226 
    227 	/* Check the permission */
    228 	error = kauth_authorize_process(l->l_cred,
    229 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
    230 	if (error != 0) {
    231 		lwp_unlock(t);
    232 		return error;
    233 	}
    234 
    235 	param.sched_priority = t->l_priority;
    236 	policy = t->l_class;
    237 	lwp_unlock(t);
    238 
    239 	switch (policy) {
    240 	case SCHED_OTHER:
    241 		param.sched_priority -= PRI_USER;
    242 		break;
    243 	case SCHED_RR:
    244 	case SCHED_FIFO:
    245 		param.sched_priority -= PRI_USER_RT;
    246 		break;
    247 	}
    248 	error = copyout(&param, SCARG(uap, params), sizeof(param));
    249 	if (error == 0 && SCARG(uap, policy) != NULL)
    250 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
    251 	return error;
    252 }
    253 
    254 /*
    255  * Set affinity.
    256  */
    257 int
    258 sys__sched_setaffinity(struct lwp *l,
    259     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    260 {
    261 	/* {
    262 		syscallarg(pid_t) pid;
    263 		syscallarg(lwpid_t) lid;
    264 		syscallarg(size_t) size;
    265 		syscallarg(void *) cpuset;
    266 	} */
    267 	cpuset_t *cpuset;
    268 	struct cpu_info *ci = NULL;
    269 	struct proc *p;
    270 	struct lwp *t;
    271 	CPU_INFO_ITERATOR cii;
    272 	lwpid_t lid;
    273 	u_int lcnt;
    274 	int error;
    275 
    276 	/* Allocate the CPU set, and get it from userspace */
    277 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
    278 	error = copyin(SCARG(uap, cpuset), cpuset,
    279 	    min(SCARG(uap, size), sizeof(cpuset_t)));
    280 	if (error)
    281 		goto error;
    282 
    283 	/* Look for a CPU in the set */
    284 	for (CPU_INFO_FOREACH(cii, ci))
    285 		if (CPU_ISSET(cpu_index(ci), cpuset))
    286 			break;
    287 	if (ci == NULL) {
    288 		/* Empty set */
    289 		kmem_free(cpuset, sizeof(cpuset_t));
    290 		cpuset = NULL;
    291 	}
    292 
    293 	if (SCARG(uap, pid) != 0) {
    294 		/* Find the process */
    295 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
    296 		if (p == NULL) {
    297 			error = ESRCH;
    298 			goto error;
    299 		}
    300 		mutex_enter(&p->p_smutex);
    301 		mutex_exit(&proclist_lock);
    302 		/* Disallow modification of system processes. */
    303 		if ((p->p_flag & PK_SYSTEM) != 0) {
    304 			mutex_exit(&p->p_smutex);
    305 			error = EPERM;
    306 			goto error;
    307 		}
    308 	} else {
    309 		/* Use the calling process */
    310 		p = l->l_proc;
    311 		mutex_enter(&p->p_smutex);
    312 	}
    313 
    314 	/*
    315 	 * Check the permission.
    316 	 */
    317 	error = kauth_authorize_process(l->l_cred,
    318 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
    319 	if (error != 0) {
    320 		mutex_exit(&p->p_smutex);
    321 		goto error;
    322 	}
    323 
    324 	/* Find the LWP(s) */
    325 	lcnt = 0;
    326 	lid = SCARG(uap, lid);
    327 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    328 		if (lid && lid != t->l_lid)
    329 			continue;
    330 		lwp_lock(t);
    331 		if (cpuset) {
    332 			/* Set the affinity flag and new CPU set */
    333 			t->l_flag |= LW_AFFINITY;
    334 			memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
    335 			/* Migrate to another CPU, unlocks LWP */
    336 			lwp_migrate(t, ci);
    337 		} else {
    338 			/* Unset the affinity flag */
    339 			t->l_flag &= ~LW_AFFINITY;
    340 			lwp_unlock(t);
    341 		}
    342 		lcnt++;
    343 	}
    344 	mutex_exit(&p->p_smutex);
    345 	if (lcnt == 0)
    346 		error = ESRCH;
    347 error:
    348 	if (cpuset != NULL)
    349 		kmem_free(cpuset, sizeof(cpuset_t));
    350 	return error;
    351 }
    352 
    353 /*
    354  * Get affinity.
    355  */
    356 int
    357 sys__sched_getaffinity(struct lwp *l,
    358     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    359 {
    360 	/* {
    361 		syscallarg(pid_t) pid;
    362 		syscallarg(lwpid_t) lid;
    363 		syscallarg(size_t) size;
    364 		syscallarg(void *) cpuset;
    365 	} */
    366 	struct lwp *t;
    367 	void *cpuset;
    368 	int error;
    369 
    370 	if (SCARG(uap, size) <= 0)
    371 		return EINVAL;
    372 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
    373 
    374 	/* Locks the LWP */
    375 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    376 	if (t == NULL) {
    377 		kmem_free(cpuset, sizeof(cpuset_t));
    378 		return ESRCH;
    379 	}
    380 	/* Check the permission */
    381 	if (kauth_authorize_process(l->l_cred,
    382 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
    383 		lwp_unlock(t);
    384 		kmem_free(cpuset, sizeof(cpuset_t));
    385 		return EPERM;
    386 	}
    387 	if (t->l_flag & LW_AFFINITY)
    388 		memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
    389 	lwp_unlock(t);
    390 
    391 	error = copyout(cpuset, SCARG(uap, cpuset),
    392 	    min(SCARG(uap, size), sizeof(cpuset_t)));
    393 
    394 	kmem_free(cpuset, sizeof(cpuset_t));
    395 	return error;
    396 }
    397 
    398 /*
    399  * Yield.
    400  */
    401 int
    402 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    403 {
    404 
    405 	yield();
    406 	return 0;
    407 }
    408 
    409 /*
    410  * Sysctl nodes and initialization.
    411  */
    412 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
    413 {
    414 	const struct sysctlnode *node = NULL;
    415 
    416 	sysctl_createv(clog, 0, NULL, NULL,
    417 		CTLFLAG_PERMANENT,
    418 		CTLTYPE_NODE, "kern", NULL,
    419 		NULL, 0, NULL, 0,
    420 		CTL_KERN, CTL_EOL);
    421 	sysctl_createv(clog, 0, NULL, NULL,
    422 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    423 		CTLTYPE_INT, "posix_sched",
    424 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    425 			     "Process Scheduling option to which the "
    426 			     "system attempts to conform"),
    427 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    428 		CTL_KERN, CTL_CREATE, CTL_EOL);
    429 	sysctl_createv(clog, 0, NULL, &node,
    430 		CTLFLAG_PERMANENT,
    431 		CTLTYPE_NODE, "sched",
    432 		SYSCTL_DESCR("Scheduler options"),
    433 		NULL, 0, NULL, 0,
    434 		CTL_KERN, CTL_CREATE, CTL_EOL);
    435 
    436 	if (node == NULL)
    437 		return;
    438 
    439 	sysctl_createv(clog, 0, &node, NULL,
    440 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    441 		CTLTYPE_INT, "pri_min",
    442 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    443 		NULL, SCHED_PRI_MIN, NULL, 0,
    444 		CTL_CREATE, CTL_EOL);
    445 	sysctl_createv(clog, 0, &node, NULL,
    446 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    447 		CTLTYPE_INT, "pri_max",
    448 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    449 		NULL, SCHED_PRI_MAX, NULL, 0,
    450 		CTL_CREATE, CTL_EOL);
    451 }
    452