Home | History | Annotate | Line # | Download | only in kern
sys_sched.c revision 1.14
      1 /*	$NetBSD: sys_sched.c,v 1.14 2008/02/19 09:44:26 yamt Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * TODO:
     31  *  - Handle pthread_setschedprio() as defined by POSIX;
     32  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.14 2008/02/19 09:44:26 yamt Exp $");
     37 
     38 #include <sys/param.h>
     39 
     40 #include <sys/cpu.h>
     41 #include <sys/kauth.h>
     42 #include <sys/kmem.h>
     43 #include <sys/lwp.h>
     44 #include <sys/mutex.h>
     45 #include <sys/proc.h>
     46 #include <sys/pset.h>
     47 #include <sys/sched.h>
     48 #include <sys/syscallargs.h>
     49 #include <sys/sysctl.h>
     50 #include <sys/systm.h>
     51 #include <sys/types.h>
     52 #include <sys/unistd.h>
     53 
     54 /*
     55  * Convert user priority or the in-kernel priority or convert the current
     56  * priority to the appropriate range according to the policy change.
     57  */
     58 static pri_t
     59 convert_pri(lwp_t *l, int policy, pri_t pri)
     60 {
     61 	int delta = 0;
     62 
     63 	switch (policy) {
     64 	case SCHED_OTHER:
     65 		delta = PRI_USER;
     66 		break;
     67 	case SCHED_FIFO:
     68 	case SCHED_RR:
     69 		delta = PRI_USER_RT;
     70 		break;
     71 	default:
     72 		panic("upri_to_kpri");
     73 	}
     74 
     75 	if (pri != PRI_NONE) {
     76 		/* Convert user priority to the in-kernel */
     77 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
     78 		return pri + delta;
     79 	}
     80 	if (l->l_class == policy)
     81 		return l->l_priority;
     82 
     83 	/* Change the current priority to the appropriate range */
     84 	if (l->l_class == SCHED_OTHER) {
     85 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
     86 		return l->l_priority + delta;
     87 	}
     88 	if (policy == SCHED_OTHER) {
     89 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
     90 		return l->l_priority - delta;
     91 	}
     92 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
     93 	return l->l_class;
     94 }
     95 
     96 /*
     97  * Set scheduling parameters.
     98  */
     99 int
    100 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    101     register_t *retval)
    102 {
    103 	/* {
    104 		syscallarg(pid_t) pid;
    105 		syscallarg(lwpid_t) lid;
    106 		syscallarg(int) policy;
    107 		syscallarg(const struct sched_param *) params;
    108 	} */
    109 	struct sched_param param;
    110 	struct proc *p;
    111 	struct lwp *t;
    112 	lwpid_t lid;
    113 	u_int lcnt;
    114 	int policy;
    115 	pri_t pri;
    116 	int error;
    117 
    118 	/* Get the parameters from the user-space */
    119 	error = copyin(SCARG(uap, params), &param, sizeof(param));
    120 	if (error) {
    121 		return error;
    122 	}
    123 	pri = param.sched_priority;
    124 	policy = SCARG(uap, policy);
    125 
    126 	/* If no parameters specified, just return (this should not happen) */
    127 	if (pri == PRI_NONE && policy == SCHED_NONE)
    128 		return 0;
    129 
    130 	/* Validate scheduling class */
    131 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
    132 		return EINVAL;
    133 
    134 	/* Validate priority */
    135 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
    136 		return EINVAL;
    137 
    138 	if (SCARG(uap, pid) != 0) {
    139 		/* Find the process */
    140 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
    141 		if (p == NULL)
    142 			return ESRCH;
    143 		mutex_enter(&p->p_smutex);
    144 		mutex_exit(&proclist_lock);
    145 		/* Disallow modification of system processes */
    146 		if (p->p_flag & PK_SYSTEM) {
    147 			mutex_exit(&p->p_smutex);
    148 			return EPERM;
    149 		}
    150 	} else {
    151 		/* Use the calling process */
    152 		p = l->l_proc;
    153 		mutex_enter(&p->p_smutex);
    154 	}
    155 
    156 	/* Find the LWP(s) */
    157 	lcnt = 0;
    158 	lid = SCARG(uap, lid);
    159 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    160 		pri_t kpri;
    161 		int lpolicy;
    162 
    163 		if (lid && lid != t->l_lid)
    164 			continue;
    165 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
    166 		lwp_lock(t);
    167 
    168 		if (policy == SCHED_NONE)
    169 			lpolicy = t->l_class;
    170 		else
    171 			lpolicy = policy;
    172 
    173 		/*
    174 		 * Note that, priority may need to be changed to get into
    175 		 * the correct priority range of the new scheduling class.
    176 		 */
    177 		kpri = convert_pri(t, lpolicy, pri);
    178 
    179 		/* Check the permission */
    180 		error = kauth_authorize_process(l->l_cred,
    181 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
    182 		    KAUTH_ARG(kpri));
    183 		if (error) {
    184 			lwp_unlock(t);
    185 			break;
    186 		}
    187 
    188 		/* Set the scheduling class */
    189 		if (policy != SCHED_NONE)
    190 			t->l_class = policy;
    191 
    192 		/* Change the priority */
    193 		if (t->l_priority != kpri)
    194 			lwp_changepri(t, kpri);
    195 
    196 		lwp_unlock(t);
    197 		lcnt++;
    198 	}
    199 	mutex_exit(&p->p_smutex);
    200 	return (lcnt == 0) ? ESRCH : error;
    201 }
    202 
    203 /*
    204  * Get scheduling parameters.
    205  */
    206 int
    207 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    208     register_t *retval)
    209 {
    210 	/* {
    211 		syscallarg(pid_t) pid;
    212 		syscallarg(lwpid_t) lid;
    213 		syscallarg(int *) policy;
    214 		syscallarg(struct sched_param *) params;
    215 	} */
    216 	struct sched_param param;
    217 	struct lwp *t;
    218 	lwpid_t lid;
    219 	int error, policy;
    220 
    221 	/* If not specified, use the first LWP */
    222 	lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
    223 
    224 	if (SCARG(uap, pid) != 0) {
    225 		/* Locks the LWP */
    226 		t = lwp_find2(SCARG(uap, pid), lid);
    227 	} else {
    228 		struct proc *p = l->l_proc;
    229 		/* Use the calling process */
    230 		mutex_enter(&p->p_smutex);
    231 		t = lwp_find(p, lid);
    232 		if (t != NULL)
    233 			lwp_lock(t);
    234 		mutex_exit(&p->p_smutex);
    235 	}
    236 	if (t == NULL) {
    237 		error = ESRCH;
    238 		goto error;
    239 	}
    240 
    241 	/* Check the permission */
    242 	error = kauth_authorize_process(l->l_cred,
    243 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
    244 	if (error != 0) {
    245 		lwp_unlock(t);
    246 		goto error;
    247 	}
    248 
    249 	param.sched_priority = t->l_priority;
    250 	policy = t->l_class;
    251 	lwp_unlock(t);
    252 
    253 	switch (policy) {
    254 	case SCHED_OTHER:
    255 		param.sched_priority -= PRI_USER;
    256 		break;
    257 	case SCHED_RR:
    258 	case SCHED_FIFO:
    259 		param.sched_priority -= PRI_USER_RT;
    260 		break;
    261 	}
    262 	error = copyout(&param, SCARG(uap, params), sizeof(param));
    263 	if (error == 0 && SCARG(uap, policy) != NULL)
    264 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
    265 error:
    266 	return error;
    267 }
    268 
    269 /*
    270  * Set affinity.
    271  */
    272 int
    273 sys__sched_setaffinity(struct lwp *l,
    274     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    275 {
    276 	/* {
    277 		syscallarg(pid_t) pid;
    278 		syscallarg(lwpid_t) lid;
    279 		syscallarg(size_t) size;
    280 		syscallarg(void *) cpuset;
    281 	} */
    282 	cpuset_t *cpuset;
    283 	struct cpu_info *ci = NULL;
    284 	struct proc *p;
    285 	struct lwp *t;
    286 	CPU_INFO_ITERATOR cii;
    287 	lwpid_t lid;
    288 	u_int lcnt;
    289 	int error;
    290 
    291 	/* Allocate the CPU set, and get it from userspace */
    292 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
    293 	error = copyin(SCARG(uap, cpuset), cpuset,
    294 	    min(SCARG(uap, size), sizeof(cpuset_t)));
    295 	if (error)
    296 		goto error;
    297 
    298 	/* Look for a CPU in the set */
    299 	for (CPU_INFO_FOREACH(cii, ci))
    300 		if (CPU_ISSET(cpu_index(ci), cpuset))
    301 			break;
    302 	if (ci == NULL) {
    303 		/* Empty set */
    304 		kmem_free(cpuset, sizeof(cpuset_t));
    305 		cpuset = NULL;
    306 	}
    307 
    308 	if (SCARG(uap, pid) != 0) {
    309 		/* Find the process */
    310 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
    311 		if (p == NULL) {
    312 			error = ESRCH;
    313 			goto error;
    314 		}
    315 		mutex_enter(&p->p_smutex);
    316 		mutex_exit(&proclist_lock);
    317 	} else {
    318 		/* Use the calling process */
    319 		p = l->l_proc;
    320 		mutex_enter(&p->p_smutex);
    321 	}
    322 
    323 	/*
    324 	 * Check the permission.
    325 	 * Disallow modification of system processes.
    326 	 */
    327 	error = kauth_authorize_process(l->l_cred,
    328 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
    329 	if (error != 0) {
    330 		mutex_exit(&p->p_smutex);
    331 		goto error;
    332 	}
    333 	if ((p->p_flag & PK_SYSTEM) != 0) {
    334 		mutex_exit(&p->p_smutex);
    335 		error = EPERM;
    336 		goto error;
    337 	}
    338 
    339 	/* Find the LWP(s) */
    340 	lcnt = 0;
    341 	lid = SCARG(uap, lid);
    342 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    343 		if (lid && lid != t->l_lid)
    344 			continue;
    345 		lwp_lock(t);
    346 		if (cpuset) {
    347 			/* Set the affinity flag and new CPU set */
    348 			t->l_flag |= LW_AFFINITY;
    349 			memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
    350 			/* Migrate to another CPU, unlocks LWP */
    351 			lwp_migrate(t, ci);
    352 		} else {
    353 			/* Unset the affinity flag */
    354 			t->l_flag &= ~LW_AFFINITY;
    355 			lwp_unlock(t);
    356 		}
    357 		lcnt++;
    358 	}
    359 	mutex_exit(&p->p_smutex);
    360 	if (lcnt == 0)
    361 		error = ESRCH;
    362 error:
    363 	if (cpuset != NULL)
    364 		kmem_free(cpuset, sizeof(cpuset_t));
    365 	return error;
    366 }
    367 
    368 /*
    369  * Get affinity.
    370  */
    371 int
    372 sys__sched_getaffinity(struct lwp *l,
    373     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    374 {
    375 	/* {
    376 		syscallarg(pid_t) pid;
    377 		syscallarg(lwpid_t) lid;
    378 		syscallarg(size_t) size;
    379 		syscallarg(void *) cpuset;
    380 	} */
    381 	struct lwp *t;
    382 	void *cpuset;
    383 	lwpid_t lid;
    384 	int error;
    385 
    386 	if (SCARG(uap, size) <= 0)
    387 		return EINVAL;
    388 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
    389 
    390 	/* If not specified, use the first LWP */
    391 	lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
    392 
    393 	if (SCARG(uap, pid) != 0) {
    394 		/* Locks the LWP */
    395 		t = lwp_find2(SCARG(uap, pid), lid);
    396 	} else {
    397 		struct proc *p = l->l_proc;
    398 		/* Use the calling process */
    399 		mutex_enter(&p->p_smutex);
    400 		t = lwp_find(p, lid);
    401 		if (t != NULL)
    402 			lwp_lock(t);
    403 		mutex_exit(&p->p_smutex);
    404 	}
    405 	if (t == NULL) {
    406 		kmem_free(cpuset, sizeof(cpuset_t));
    407 		return ESRCH;
    408 	}
    409 	/* Check the permission */
    410 	if (kauth_authorize_process(l->l_cred,
    411 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
    412 		lwp_unlock(t);
    413 		kmem_free(cpuset, sizeof(cpuset_t));
    414 		return EPERM;
    415 	}
    416 	if (t->l_flag & LW_AFFINITY)
    417 		memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
    418 	lwp_unlock(t);
    419 
    420 	error = copyout(cpuset, SCARG(uap, cpuset),
    421 	    min(SCARG(uap, size), sizeof(cpuset_t)));
    422 
    423 	kmem_free(cpuset, sizeof(cpuset_t));
    424 	return error;
    425 }
    426 
    427 /*
    428  * Yield.
    429  */
    430 int
    431 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    432 {
    433 
    434 	yield();
    435 	return 0;
    436 }
    437 
    438 /*
    439  * Sysctl nodes and initialization.
    440  */
    441 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
    442 {
    443 	const struct sysctlnode *node = NULL;
    444 
    445 	sysctl_createv(clog, 0, NULL, NULL,
    446 		CTLFLAG_PERMANENT,
    447 		CTLTYPE_NODE, "kern", NULL,
    448 		NULL, 0, NULL, 0,
    449 		CTL_KERN, CTL_EOL);
    450 	sysctl_createv(clog, 0, NULL, NULL,
    451 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    452 		CTLTYPE_INT, "posix_sched",
    453 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    454 			     "Process Scheduling option to which the "
    455 			     "system attempts to conform"),
    456 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    457 		CTL_KERN, CTL_CREATE, CTL_EOL);
    458 	sysctl_createv(clog, 0, NULL, &node,
    459 		CTLFLAG_PERMANENT,
    460 		CTLTYPE_NODE, "sched",
    461 		SYSCTL_DESCR("Scheduler options"),
    462 		NULL, 0, NULL, 0,
    463 		CTL_KERN, CTL_CREATE, CTL_EOL);
    464 
    465 	if (node == NULL)
    466 		return;
    467 
    468 	sysctl_createv(clog, 0, &node, NULL,
    469 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    470 		CTLTYPE_INT, "pri_min",
    471 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    472 		NULL, SCHED_PRI_MIN, NULL, 0,
    473 		CTL_CREATE, CTL_EOL);
    474 	sysctl_createv(clog, 0, &node, NULL,
    475 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    476 		CTLTYPE_INT, "pri_max",
    477 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    478 		NULL, SCHED_PRI_MAX, NULL, 0,
    479 		CTL_CREATE, CTL_EOL);
    480 }
    481