Home | History | Annotate | Line # | Download | only in kern
sys_sched.c revision 1.5
      1 /*	$NetBSD: sys_sched.c,v 1.5 2008/01/15 03:37:11 rmind Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * TODO:
     31  *  - Handle pthread_setschedprio() as defined by POSIX;
     32  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.5 2008/01/15 03:37:11 rmind Exp $");
     37 
     38 #include <sys/param.h>
     39 
     40 #include <sys/cpu.h>
     41 #include <sys/kauth.h>
     42 #include <sys/kmem.h>
     43 #include <sys/lwp.h>
     44 #include <sys/mutex.h>
     45 #include <sys/proc.h>
     46 #include <sys/pset.h>
     47 #include <sys/sched.h>
     48 #include <sys/syscallargs.h>
     49 #include <sys/sysctl.h>
     50 #include <sys/systm.h>
     51 #include <sys/types.h>
     52 #include <sys/unistd.h>
     53 
     54 /*
     55  * Set scheduling parameters.
     56  */
     57 int
     58 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
     59     register_t *retval)
     60 {
     61 	/* {
     62 		syscallarg(pid_t) pid;
     63 		syscallarg(lwpid_t) lid;
     64 		syscallarg(const struct sched_param *) params;
     65 	} */
     66 	struct sched_param *sp;
     67 	struct proc *p;
     68 	struct lwp *t;
     69 	pid_t pid;
     70 	lwpid_t lid;
     71 	u_int lcnt;
     72 	pri_t pri;
     73 	int error;
     74 
     75 	/* Available only for super-user */
     76 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
     77 		return EACCES;
     78 
     79 	/* Get the parameters from the user-space */
     80 	sp = kmem_zalloc(sizeof(struct sched_param), KM_SLEEP);
     81 	error = copyin(SCARG(uap, params), sp, sizeof(struct sched_param));
     82 	if (error)
     83 		goto error;
     84 
     85 	/*
     86 	 * Validate scheduling class and priority.
     87 	 * Convert the user priority to the in-kernel value.
     88 	 */
     89 	pri = sp->sched_priority;
     90 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) {
     91 		error = EINVAL;
     92 		goto error;
     93 	}
     94 	switch (sp->sched_class) {
     95 	case SCHED_OTHER:
     96 		if (pri == PRI_NONE)
     97 			pri = PRI_USER;
     98 		else
     99 			pri += PRI_USER;
    100 		break;
    101 	case SCHED_RR:
    102 	case SCHED_FIFO:
    103 		if (pri == PRI_NONE)
    104 			pri = PRI_USER_RT;
    105 		else
    106 			pri += PRI_USER_RT;
    107 		break;
    108 	case SCHED_NONE:
    109 		break;
    110 	default:
    111 		error = EINVAL;
    112 		goto error;
    113 	}
    114 
    115 	/* Find the process */
    116 	pid = SCARG(uap, pid);
    117 	p = p_find(pid, PFIND_UNLOCK_FAIL);
    118 	if (p == NULL) {
    119 		error = ESRCH;
    120 		goto error;
    121 	}
    122 	mutex_enter(&p->p_smutex);
    123 	mutex_exit(&proclist_lock);
    124 
    125 	/* Disallow modification of system processes */
    126 	if (p->p_flag & PK_SYSTEM) {
    127 		mutex_exit(&p->p_smutex);
    128 		error = EACCES;
    129 		goto error;
    130 	}
    131 
    132 	/* Find the LWP(s) */
    133 	lcnt = 0;
    134 	lid = SCARG(uap, lid);
    135 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    136 		bool chpri;
    137 
    138 		if (lid && lid != t->l_lid)
    139 			continue;
    140 
    141 		/* Set the scheduling class */
    142 		lwp_lock(t);
    143 		if (sp->sched_class != SCHED_NONE) {
    144 			/*
    145 			 * Priority must be changed to get into the correct
    146 			 * priority range of the new scheduling class.
    147 			 */
    148 			chpri = (t->l_class != sp->sched_class);
    149 			t->l_class = sp->sched_class;
    150 		} else
    151 			chpri = false;
    152 
    153 		/* Change the priority */
    154 		if (sp->sched_priority != PRI_NONE || chpri)
    155 			lwp_changepri(t, pri);
    156 
    157 		lwp_unlock(t);
    158 		lcnt++;
    159 	}
    160 	mutex_exit(&p->p_smutex);
    161 	if (lcnt != 0)
    162 		*retval = lcnt;
    163 	else
    164 		error = ESRCH;
    165 error:
    166 	kmem_free(sp, sizeof(struct sched_param));
    167 	return error;
    168 }
    169 
    170 /*
    171  * Get scheduling parameters.
    172  */
    173 int
    174 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    175     register_t *retval)
    176 {
    177 	/* {
    178 		syscallarg(pid_t) pid;
    179 		syscallarg(lwpid_t) lid;
    180 		syscallarg(struct sched_param *) params;
    181 	} */
    182 	struct sched_param *sp;
    183 	struct lwp *t;
    184 	int error;
    185 
    186 	sp = kmem_zalloc(sizeof(struct sched_param), KM_SLEEP);
    187 
    188 	/* Locks the LWP */
    189 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    190 	if (t == NULL) {
    191 		kmem_free(sp, sizeof(struct sched_param));
    192 		return ESRCH;
    193 	}
    194 	sp->sched_priority = t->l_priority;
    195 	sp->sched_class = t->l_class;
    196 	lwp_unlock(t);
    197 
    198 	switch (sp->sched_class) {
    199 	case SCHED_OTHER:
    200 		sp->sched_priority -= PRI_USER;
    201 		break;
    202 	case SCHED_RR:
    203 	case SCHED_FIFO:
    204 		sp->sched_priority -= PRI_USER_RT;
    205 		break;
    206 	}
    207 	error = copyout(sp, SCARG(uap, params), sizeof(struct sched_param));
    208 	kmem_free(sp, sizeof(struct sched_param));
    209 	return error;
    210 }
    211 
    212 /*
    213  * Set affinity.
    214  */
    215 int
    216 sys__sched_setaffinity(struct lwp *l,
    217     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    218 {
    219 	/* {
    220 		syscallarg(pid_t) pid;
    221 		syscallarg(lwpid_t) lid;
    222 		syscallarg(size_t) size;
    223 		syscallarg(void *) cpuset;
    224 	} */
    225 	cpuset_t *cpuset;
    226 	struct cpu_info *ci = NULL;
    227 	struct proc *p;
    228 	struct lwp *t;
    229 	CPU_INFO_ITERATOR cii;
    230 	lwpid_t lid;
    231 	u_int lcnt;
    232 	int error;
    233 
    234 	/* Available only for super-user */
    235 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
    236 		return EACCES;
    237 
    238 	if (SCARG(uap, size) <= 0)
    239 		return EINVAL;
    240 
    241 	/* Allocate the CPU set, and get it from userspace */
    242 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
    243 	error = copyin(SCARG(uap, cpuset), cpuset,
    244 	    min(SCARG(uap, size), sizeof(cpuset_t)));
    245 	if (error)
    246 		goto error;
    247 
    248 	/* Look for a CPU in the set */
    249 	for (CPU_INFO_FOREACH(cii, ci))
    250 		if (CPU_ISSET(cpu_index(ci), cpuset))
    251 			break;
    252 	if (ci == NULL) {
    253 		/* Empty set */
    254 		kmem_free(cpuset, sizeof(cpuset_t));
    255 		cpuset = NULL;
    256 	}
    257 
    258 	/* Find the process */
    259 	p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
    260 	if (p == NULL) {
    261 		error = ESRCH;
    262 		goto error;
    263 	}
    264 	mutex_enter(&p->p_smutex);
    265 	mutex_exit(&proclist_lock);
    266 
    267 	/* Disallow modification of system processes */
    268 	if (p->p_flag & PK_SYSTEM) {
    269 		mutex_exit(&p->p_smutex);
    270 		error = EACCES;
    271 		goto error;
    272 	}
    273 
    274 	/* Find the LWP(s) */
    275 	lcnt = 0;
    276 	lid = SCARG(uap, lid);
    277 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    278 		if (lid && lid != t->l_lid)
    279 			continue;
    280 		lwp_lock(t);
    281 		if (cpuset) {
    282 			/* Set the affinity flag and new CPU set */
    283 			t->l_flag |= LW_AFFINITY;
    284 			memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
    285 			/* Migrate to another CPU, unlocks LWP */
    286 			lwp_migrate(t, ci);
    287 		} else {
    288 			/* Unset the affinity flag */
    289 			t->l_flag &= ~LW_AFFINITY;
    290 			lwp_unlock(t);
    291 		}
    292 		lcnt++;
    293 	}
    294 	mutex_exit(&p->p_smutex);
    295 	if (lcnt == 0)
    296 		error = ESRCH;
    297 	else
    298 		*retval = lcnt;
    299 error:
    300 	if (cpuset != NULL)
    301 		kmem_free(cpuset, sizeof(cpuset_t));
    302 	return error;
    303 }
    304 
    305 /*
    306  * Get affinity.
    307  */
    308 int
    309 sys__sched_getaffinity(struct lwp *l,
    310     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    311 {
    312 	/* {
    313 		syscallarg(pid_t) pid;
    314 		syscallarg(lwpid_t) lid;
    315 		syscallarg(size_t) size;
    316 		syscallarg(void *) cpuset;
    317 	} */
    318 	struct lwp *t;
    319 	void *cpuset;
    320 	int error;
    321 
    322 	if (SCARG(uap, size) <= 0)
    323 		return EINVAL;
    324 
    325 	cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
    326 
    327 	/* Locks the LWP */
    328 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    329 	if (t == NULL) {
    330 		kmem_free(cpuset, sizeof(cpuset_t));
    331 		return ESRCH;
    332 	}
    333 	if (t->l_flag & LW_AFFINITY)
    334 		memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
    335 	lwp_unlock(t);
    336 
    337 	error = copyout(cpuset, SCARG(uap, cpuset),
    338 	    min(SCARG(uap, size), sizeof(cpuset_t)));
    339 
    340 	kmem_free(cpuset, sizeof(cpuset_t));
    341 	return error;
    342 }
    343 
    344 /*
    345  * Yield.
    346  */
    347 int
    348 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    349 {
    350 
    351 	yield();
    352 	return 0;
    353 }
    354 
    355 /*
    356  * Sysctl nodes and initialization.
    357  */
    358 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
    359 {
    360 	const struct sysctlnode *node = NULL;
    361 
    362 	sysctl_createv(clog, 0, NULL, NULL,
    363 		CTLFLAG_PERMANENT,
    364 		CTLTYPE_NODE, "kern", NULL,
    365 		NULL, 0, NULL, 0,
    366 		CTL_KERN, CTL_EOL);
    367 	sysctl_createv(clog, 0, NULL, NULL,
    368 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    369 		CTLTYPE_INT, "posix_sched",
    370 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    371 			     "Process Scheduling option to which the "
    372 			     "system attempts to conform"),
    373 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    374 		CTL_KERN, CTL_CREATE, CTL_EOL);
    375 	sysctl_createv(clog, 0, NULL, &node,
    376 		CTLFLAG_PERMANENT,
    377 		CTLTYPE_NODE, "sched",
    378 		SYSCTL_DESCR("Scheduler options"),
    379 		NULL, 0, NULL, 0,
    380 		CTL_KERN, CTL_CREATE, CTL_EOL);
    381 
    382 	if (node == NULL)
    383 		return;
    384 
    385 	sysctl_createv(clog, 0, &node, NULL,
    386 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    387 		CTLTYPE_INT, "pri_min",
    388 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    389 		NULL, SCHED_PRI_MIN, NULL, 0,
    390 		CTL_CREATE, CTL_EOL);
    391 	sysctl_createv(clog, 0, &node, NULL,
    392 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    393 		CTLTYPE_INT, "pri_max",
    394 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    395 		NULL, SCHED_PRI_MAX, NULL, 0,
    396 		CTL_CREATE, CTL_EOL);
    397 }
    398