Home | History | Annotate | Line # | Download | only in kern
sys_sched.c revision 1.10
      1 /*	$NetBSD: sys_sched.c,v 1.10 2008/02/09 16:58:01 yamt Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * TODO:
     31  *  - Handle pthread_setschedprio() as defined by POSIX;
     32  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.10 2008/02/09 16:58:01 yamt Exp $");
     37 
     38 #include <sys/param.h>
     39 
     40 #include <sys/cpu.h>
     41 #include <sys/kauth.h>
     42 #include <sys/kmem.h>
     43 #include <sys/lwp.h>
     44 #include <sys/mutex.h>
     45 #include <sys/proc.h>
     46 #include <sys/pset.h>
     47 #include <sys/sched.h>
     48 #include <sys/syscallargs.h>
     49 #include <sys/sysctl.h>
     50 #include <sys/systm.h>
     51 #include <sys/types.h>
     52 #include <sys/unistd.h>
     53 
     54 /*
     55  * Convert user priority or the in-kernel priority or convert the current
     56  * priority to the appropriate range according to the policy change.
     57  */
     58 static pri_t
     59 convert_pri(lwp_t *l, int policy, pri_t pri)
     60 {
     61 	int delta = 0;
     62 
     63 	if (policy == SCHED_NONE)
     64 		policy = l->l_class;
     65 
     66 	switch (policy) {
     67 	case SCHED_OTHER:
     68 		delta = PRI_USER;
     69 		break;
     70 	case SCHED_FIFO:
     71 	case SCHED_RR:
     72 		delta = PRI_USER_RT;
     73 		break;
     74 	default:
     75 		panic("upri_to_kpri");
     76 	}
     77 
     78 	if (pri != PRI_NONE) {
     79 		/* Convert user priority to the in-kernel */
     80 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
     81 		return pri + delta;
     82 	}
     83 	if (l->l_class == policy)
     84 		return l->l_priority;
     85 
     86 	/* Change the current priority to the appropriate range */
     87 	if (l->l_class == SCHED_OTHER) {
     88 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
     89 		return l->l_priority + delta;
     90 	}
     91 	if (policy == SCHED_OTHER) {
     92 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
     93 		return l->l_priority - delta;
     94 	}
     95 	KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
     96 	return l->l_class;
     97 }
     98 
     99 /*
    100  * Set scheduling parameters.
    101  */
    102 int
    103 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    104     register_t *retval)
    105 {
    106 	/* {
    107 		syscallarg(pid_t) pid;
    108 		syscallarg(lwpid_t) lid;
    109 		syscallarg(int) policy;
    110 		syscallarg(const struct sched_param *) params;
    111 	} */
    112 	struct sched_param param;
    113 	struct proc *p;
    114 	struct lwp *t;
    115 	lwpid_t lid;
    116 	u_int lcnt;
    117 	int policy;
    118 	pri_t pri;
    119 	int error;
    120 
    121 	/* Get the parameters from the user-space */
    122 	error = copyin(SCARG(uap, params), &param, sizeof(param));
    123 	if (error) {
    124 		return error;
    125 	}
    126 	pri = param.sched_priority;
    127 	policy = SCARG(uap, policy);
    128 
    129 	/* If no parameters specified, just return (this should not happen) */
    130 	if (pri == PRI_NONE && policy == SCHED_NONE)
    131 		return 0;
    132 
    133 	/* Validate scheduling class */
    134 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
    135 		return EINVAL;
    136 
    137 	/* Validate priority */
    138 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
    139 		return EINVAL;
    140 
    141 	if (SCARG(uap, pid) != 0) {
    142 		/* Find the process */
    143 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
    144 		if (p == NULL)
    145 			return ESRCH;
    146 		mutex_enter(&p->p_smutex);
    147 		mutex_exit(&proclist_lock);
    148 		/* Disallow modification of system processes */
    149 		if (p->p_flag & PK_SYSTEM) {
    150 			mutex_exit(&p->p_smutex);
    151 			return EPERM;
    152 		}
    153 	} else {
    154 		/* Use the calling process */
    155 		p = l->l_proc;
    156 		mutex_enter(&p->p_smutex);
    157 	}
    158 
    159 	/* Check the permission */
    160 	if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SCHEDULER, p,
    161 	    KAUTH_ARG(KAUTH_REQ_PROCESS_SCHEDULER_SETPARAM), NULL, NULL)) {
    162 		mutex_exit(&p->p_smutex);
    163 		return EPERM;
    164 	}
    165 
    166 	/* Find the LWP(s) */
    167 	lcnt = 0;
    168 	lid = SCARG(uap, lid);
    169 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    170 		pri_t kpri;
    171 
    172 		if (lid && lid != t->l_lid)
    173 			continue;
    174 		KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
    175 		lwp_lock(t);
    176 
    177 		/*
    178 		 * Note that, priority may need to be changed to get into
    179 		 * the correct priority range of the new scheduling class.
    180 		 */
    181 		kpri = convert_pri(t, policy, pri);
    182 
    183 		/* Set the scheduling class */
    184 		if (policy != SCHED_NONE)
    185 			t->l_class = policy;
    186 
    187 		/* Change the priority */
    188 		if (t->l_priority != kpri)
    189 			lwp_changepri(t, kpri);
    190 
    191 		lwp_unlock(t);
    192 		lcnt++;
    193 	}
    194 	mutex_exit(&p->p_smutex);
    195 	return (lcnt == 0) ? ESRCH : error;
    196 }
    197 
    198 /*
    199  * Get scheduling parameters.
    200  */
    201 int
    202 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    203     register_t *retval)
    204 {
    205 	/* {
    206 		syscallarg(pid_t) pid;
    207 		syscallarg(lwpid_t) lid;
    208 		syscallarg(int *) policy;
    209 		syscallarg(struct sched_param *) params;
    210 	} */
    211 	struct sched_param param;
    212 	struct lwp *t;
    213 	lwpid_t lid;
    214 	int error, policy;
    215 
    216 	/* If not specified, use the first LWP */
    217 	lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
    218 
    219 	if (SCARG(uap, pid) != 0) {
    220 		/* Locks the LWP */
    221 		t = lwp_find2(SCARG(uap, pid), lid);
    222 	} else {
    223 		struct proc *p = l->l_proc;
    224 		/* Use the calling process */
    225 		mutex_enter(&p->p_smutex);
    226 		t = lwp_find(p, lid);
    227 		if (t != NULL)
    228 			lwp_lock(t);
    229 		mutex_exit(&p->p_smutex);
    230 	}
    231 	if (t == NULL) {
    232 		error = ESRCH;
    233 		goto error;
    234 	}
    235 
    236 	/* Check the permission */
    237 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SCHEDULER,
    238 	    t->l_proc, KAUTH_ARG(KAUTH_REQ_PROCESS_SCHEDULER_GETPARAM),
    239 	    NULL, NULL);
    240 	if (error != 0) {
    241 		lwp_unlock(t);
    242 		goto error;
    243 	}
    244 
    245 	param.sched_priority = t->l_priority;
    246 	policy = t->l_class;
    247 	lwp_unlock(t);
    248 
    249 	switch (policy) {
    250 	case SCHED_OTHER:
    251 		param.sched_priority -= PRI_USER;
    252 		break;
    253 	case SCHED_RR:
    254 	case SCHED_FIFO:
    255 		param.sched_priority -= PRI_USER_RT;
    256 		break;
    257 	}
    258 	error = copyout(&param, SCARG(uap, params), sizeof(param));
    259 	if (error == 0 && SCARG(uap, policy) != NULL)
    260 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
    261 error:
    262 	return error;
    263 }
    264 
    265 /*
    266  * Set affinity.
    267  */
    268 int
    269 sys__sched_setaffinity(struct lwp *l,
    270     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    271 {
    272 	/* {
    273 		syscallarg(pid_t) pid;
    274 		syscallarg(lwpid_t) lid;
    275 		syscallarg(size_t) size;
    276 		syscallarg(void *) cpuset;
    277 	} */
    278 	cpuset_t *cpuset;
    279 	struct cpu_info *ci = NULL;
    280 	struct proc *p;
    281 	struct lwp *t;
    282 	CPU_INFO_ITERATOR cii;
    283 	lwpid_t lid;
    284 	u_int lcnt;
    285 	int error;
    286 
    287 	/* Allocate the CPU set, and get it from userspace */
    288 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
    289 	error = copyin(SCARG(uap, cpuset), cpuset,
    290 	    min(SCARG(uap, size), sizeof(cpuset_t)));
    291 	if (error)
    292 		goto error;
    293 
    294 	/* Look for a CPU in the set */
    295 	for (CPU_INFO_FOREACH(cii, ci))
    296 		if (CPU_ISSET(cpu_index(ci), cpuset))
    297 			break;
    298 	if (ci == NULL) {
    299 		/* Empty set */
    300 		kmem_free(cpuset, sizeof(cpuset_t));
    301 		cpuset = NULL;
    302 	}
    303 
    304 	if (SCARG(uap, pid) != 0) {
    305 		/* Find the process */
    306 		p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
    307 		if (p == NULL) {
    308 			error = ESRCH;
    309 			goto error;
    310 		}
    311 		mutex_enter(&p->p_smutex);
    312 		mutex_exit(&proclist_lock);
    313 	} else {
    314 		/* Use the calling process */
    315 		p = l->l_proc;
    316 		mutex_enter(&p->p_smutex);
    317 	}
    318 
    319 	/*
    320 	 * Check the permission.
    321 	 * Disallow modification of system processes.
    322 	 */
    323 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SCHEDULER, p,
    324 	    KAUTH_ARG(KAUTH_REQ_PROCESS_SCHEDULER_SETAFFINITY), NULL, NULL);
    325 	if (error != 0) {
    326 		mutex_exit(&p->p_smutex);
    327 		goto error;
    328 	}
    329 	if ((p->p_flag & PK_SYSTEM) != 0) {
    330 		mutex_exit(&p->p_smutex);
    331 		error = EPERM;
    332 		goto error;
    333 	}
    334 
    335 	/* Find the LWP(s) */
    336 	lcnt = 0;
    337 	lid = SCARG(uap, lid);
    338 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    339 		if (lid && lid != t->l_lid)
    340 			continue;
    341 		lwp_lock(t);
    342 		if (cpuset) {
    343 			/* Set the affinity flag and new CPU set */
    344 			t->l_flag |= LW_AFFINITY;
    345 			memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
    346 			/* Migrate to another CPU, unlocks LWP */
    347 			lwp_migrate(t, ci);
    348 		} else {
    349 			/* Unset the affinity flag */
    350 			t->l_flag &= ~LW_AFFINITY;
    351 			lwp_unlock(t);
    352 		}
    353 		lcnt++;
    354 	}
    355 	mutex_exit(&p->p_smutex);
    356 	if (lcnt == 0)
    357 		error = ESRCH;
    358 error:
    359 	if (cpuset != NULL)
    360 		kmem_free(cpuset, sizeof(cpuset_t));
    361 	return error;
    362 }
    363 
    364 /*
    365  * Get affinity.
    366  */
    367 int
    368 sys__sched_getaffinity(struct lwp *l,
    369     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    370 {
    371 	/* {
    372 		syscallarg(pid_t) pid;
    373 		syscallarg(lwpid_t) lid;
    374 		syscallarg(size_t) size;
    375 		syscallarg(void *) cpuset;
    376 	} */
    377 	struct lwp *t;
    378 	void *cpuset;
    379 	lwpid_t lid;
    380 	int error;
    381 
    382 	if (SCARG(uap, size) <= 0)
    383 		return EINVAL;
    384 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
    385 
    386 	/* If not specified, use the first LWP */
    387 	lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
    388 
    389 	if (SCARG(uap, pid) != 0) {
    390 		/* Locks the LWP */
    391 		t = lwp_find2(SCARG(uap, pid), lid);
    392 	} else {
    393 		struct proc *p = l->l_proc;
    394 		/* Use the calling process */
    395 		mutex_enter(&p->p_smutex);
    396 		t = lwp_find(p, lid);
    397 		if (t != NULL)
    398 			lwp_lock(t);
    399 		mutex_exit(&p->p_smutex);
    400 	}
    401 	if (t == NULL) {
    402 		kmem_free(cpuset, sizeof(cpuset_t));
    403 		return ESRCH;
    404 	}
    405 	/* Check the permission */
    406 	if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SCHEDULER,
    407 	    t->l_proc, KAUTH_ARG(KAUTH_REQ_PROCESS_SCHEDULER_GETAFFINITY),
    408 	    NULL, NULL)) {
    409 		lwp_unlock(t);
    410 		kmem_free(cpuset, sizeof(cpuset_t));
    411 		return EPERM;
    412 	}
    413 	if (t->l_flag & LW_AFFINITY)
    414 		memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
    415 	lwp_unlock(t);
    416 
    417 	error = copyout(cpuset, SCARG(uap, cpuset),
    418 	    min(SCARG(uap, size), sizeof(cpuset_t)));
    419 
    420 	kmem_free(cpuset, sizeof(cpuset_t));
    421 	return error;
    422 }
    423 
    424 /*
    425  * Yield.
    426  */
    427 int
    428 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    429 {
    430 
    431 	yield();
    432 	return 0;
    433 }
    434 
    435 /*
    436  * Sysctl nodes and initialization.
    437  */
    438 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
    439 {
    440 	const struct sysctlnode *node = NULL;
    441 
    442 	sysctl_createv(clog, 0, NULL, NULL,
    443 		CTLFLAG_PERMANENT,
    444 		CTLTYPE_NODE, "kern", NULL,
    445 		NULL, 0, NULL, 0,
    446 		CTL_KERN, CTL_EOL);
    447 	sysctl_createv(clog, 0, NULL, NULL,
    448 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    449 		CTLTYPE_INT, "posix_sched",
    450 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    451 			     "Process Scheduling option to which the "
    452 			     "system attempts to conform"),
    453 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    454 		CTL_KERN, CTL_CREATE, CTL_EOL);
    455 	sysctl_createv(clog, 0, NULL, &node,
    456 		CTLFLAG_PERMANENT,
    457 		CTLTYPE_NODE, "sched",
    458 		SYSCTL_DESCR("Scheduler options"),
    459 		NULL, 0, NULL, 0,
    460 		CTL_KERN, CTL_CREATE, CTL_EOL);
    461 
    462 	if (node == NULL)
    463 		return;
    464 
    465 	sysctl_createv(clog, 0, &node, NULL,
    466 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    467 		CTLTYPE_INT, "pri_min",
    468 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    469 		NULL, SCHED_PRI_MIN, NULL, 0,
    470 		CTL_CREATE, CTL_EOL);
    471 	sysctl_createv(clog, 0, &node, NULL,
    472 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    473 		CTLTYPE_INT, "pri_max",
    474 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    475 		NULL, SCHED_PRI_MAX, NULL, 0,
    476 		CTL_CREATE, CTL_EOL);
    477 }
    478