Home | History | Annotate | Line # | Download | only in kern
sys_sched.c revision 1.33
      1  1.33     rmind /*	$NetBSD: sys_sched.c,v 1.33 2009/03/03 21:55:06 rmind Exp $	*/
      2   1.1        ad 
      3   1.5     rmind /*
      4   1.5     rmind  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5   1.1        ad  * All rights reserved.
      6   1.5     rmind  *
      7   1.1        ad  * Redistribution and use in source and binary forms, with or without
      8   1.1        ad  * modification, are permitted provided that the following conditions
      9   1.1        ad  * are met:
     10   1.1        ad  * 1. Redistributions of source code must retain the above copyright
     11   1.1        ad  *    notice, this list of conditions and the following disclaimer.
     12   1.1        ad  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.1        ad  *    notice, this list of conditions and the following disclaimer in the
     14   1.1        ad  *    documentation and/or other materials provided with the distribution.
     15   1.1        ad  *
     16  1.16     rmind  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  1.16     rmind  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  1.16     rmind  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  1.16     rmind  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  1.16     rmind  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  1.16     rmind  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  1.16     rmind  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  1.16     rmind  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  1.16     rmind  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  1.16     rmind  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  1.16     rmind  * SUCH DAMAGE.
     27   1.1        ad  */
     28   1.1        ad 
     29   1.5     rmind /*
     30  1.17        ad  * System calls relating to the scheduler.
     31  1.17        ad  *
     32  1.31     rmind  * Lock order:
     33  1.31     rmind  *
     34  1.31     rmind  *	cpu_lock ->
     35  1.31     rmind  *	    proc_lock ->
     36  1.31     rmind  *		proc_t::p_lock ->
     37  1.31     rmind  *		    lwp_t::lwp_lock
     38  1.31     rmind  *
     39   1.5     rmind  * TODO:
     40   1.5     rmind  *  - Handle pthread_setschedprio() as defined by POSIX;
     41   1.5     rmind  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
     42   1.5     rmind  */
     43   1.5     rmind 
     44   1.1        ad #include <sys/cdefs.h>
     45  1.33     rmind __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.33 2009/03/03 21:55:06 rmind Exp $");
     46   1.1        ad 
     47   1.1        ad #include <sys/param.h>
     48   1.5     rmind 
     49   1.5     rmind #include <sys/cpu.h>
     50   1.5     rmind #include <sys/kauth.h>
     51   1.5     rmind #include <sys/kmem.h>
     52   1.5     rmind #include <sys/lwp.h>
     53   1.5     rmind #include <sys/mutex.h>
     54   1.1        ad #include <sys/proc.h>
     55   1.5     rmind #include <sys/pset.h>
     56  1.28  wrstuden #include <sys/sa.h>
     57  1.28  wrstuden #include <sys/savar.h>
     58   1.5     rmind #include <sys/sched.h>
     59   1.1        ad #include <sys/syscallargs.h>
     60   1.5     rmind #include <sys/sysctl.h>
     61   1.5     rmind #include <sys/systm.h>
     62   1.5     rmind #include <sys/types.h>
     63   1.5     rmind #include <sys/unistd.h>
     64   1.5     rmind 
     65  1.28  wrstuden #include "opt_sa.h"
     66  1.28  wrstuden 
     67   1.5     rmind /*
     68   1.7     rmind  * Convert user priority or the in-kernel priority or convert the current
     69   1.7     rmind  * priority to the appropriate range according to the policy change.
     70   1.7     rmind  */
     71   1.7     rmind static pri_t
     72   1.7     rmind convert_pri(lwp_t *l, int policy, pri_t pri)
     73   1.7     rmind {
     74   1.7     rmind 
     75  1.29     rmind 	/* Convert user priority to the in-kernel */
     76   1.7     rmind 	if (pri != PRI_NONE) {
     77  1.29     rmind 		/* Only for real-time threads */
     78   1.7     rmind 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
     79  1.29     rmind 		KASSERT(policy != SCHED_OTHER);
     80  1.29     rmind 		return PRI_USER_RT + pri;
     81   1.7     rmind 	}
     82  1.29     rmind 
     83  1.29     rmind 	/* Neither policy, nor priority change */
     84   1.7     rmind 	if (l->l_class == policy)
     85   1.7     rmind 		return l->l_priority;
     86   1.7     rmind 
     87  1.29     rmind 	/* Time-sharing -> real-time */
     88   1.7     rmind 	if (l->l_class == SCHED_OTHER) {
     89   1.7     rmind 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
     90  1.29     rmind 		return PRI_USER_RT;
     91   1.7     rmind 	}
     92  1.29     rmind 
     93  1.29     rmind 	/* Real-time -> time-sharing */
     94   1.7     rmind 	if (policy == SCHED_OTHER) {
     95   1.7     rmind 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
     96  1.29     rmind 		return l->l_priority - PRI_USER_RT;
     97   1.7     rmind 	}
     98  1.29     rmind 
     99  1.29     rmind 	/* Real-time -> real-time */
    100  1.29     rmind 	return l->l_priority;
    101   1.7     rmind }
    102   1.7     rmind 
    103   1.5     rmind int
    104  1.18      elad do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
    105  1.18      elad     const struct sched_param *params)
    106   1.5     rmind {
    107   1.5     rmind 	struct proc *p;
    108   1.5     rmind 	struct lwp *t;
    109  1.18      elad 	pri_t pri;
    110   1.5     rmind 	u_int lcnt;
    111   1.5     rmind 	int error;
    112   1.5     rmind 
    113  1.18      elad 	error = 0;
    114  1.18      elad 
    115  1.18      elad 	pri = params->sched_priority;
    116   1.7     rmind 
    117   1.7     rmind 	/* If no parameters specified, just return (this should not happen) */
    118   1.7     rmind 	if (pri == PRI_NONE && policy == SCHED_NONE)
    119   1.7     rmind 		return 0;
    120   1.5     rmind 
    121   1.7     rmind 	/* Validate scheduling class */
    122   1.7     rmind 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
    123   1.7     rmind 		return EINVAL;
    124   1.5     rmind 
    125   1.7     rmind 	/* Validate priority */
    126   1.7     rmind 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
    127   1.7     rmind 		return EINVAL;
    128   1.5     rmind 
    129  1.18      elad 	if (pid != 0) {
    130   1.7     rmind 		/* Find the process */
    131  1.20        ad 		mutex_enter(proc_lock);
    132  1.20        ad 		p = p_find(pid, PFIND_LOCKED);
    133  1.20        ad 		if (p == NULL) {
    134  1.20        ad 			mutex_exit(proc_lock);
    135   1.7     rmind 			return ESRCH;
    136  1.20        ad 		}
    137  1.21        ad 		mutex_enter(p->p_lock);
    138  1.20        ad 		mutex_exit(proc_lock);
    139   1.7     rmind 		/* Disallow modification of system processes */
    140  1.17        ad 		if ((p->p_flag & PK_SYSTEM) != 0) {
    141  1.21        ad 			mutex_exit(p->p_lock);
    142   1.7     rmind 			return EPERM;
    143   1.7     rmind 		}
    144   1.7     rmind 	} else {
    145   1.7     rmind 		/* Use the calling process */
    146  1.18      elad 		p = curlwp->l_proc;
    147  1.21        ad 		mutex_enter(p->p_lock);
    148   1.5     rmind 	}
    149   1.1        ad 
    150   1.5     rmind 	/* Find the LWP(s) */
    151   1.5     rmind 	lcnt = 0;
    152   1.5     rmind 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    153   1.7     rmind 		pri_t kpri;
    154  1.12      elad 		int lpolicy;
    155   1.5     rmind 
    156   1.5     rmind 		if (lid && lid != t->l_lid)
    157   1.5     rmind 			continue;
    158  1.29     rmind 
    159  1.15  drochner 		lcnt++;
    160   1.7     rmind 		lwp_lock(t);
    161  1.29     rmind 		lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
    162  1.29     rmind 
    163  1.29     rmind 		/* Disallow setting of priority for SCHED_OTHER threads */
    164  1.30     rmind 		if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
    165  1.29     rmind 			lwp_unlock(t);
    166  1.29     rmind 			error = EINVAL;
    167  1.29     rmind 			break;
    168  1.29     rmind 		}
    169   1.7     rmind 
    170  1.29     rmind 		/* Convert priority, if needed */
    171  1.12      elad 		kpri = convert_pri(t, lpolicy, pri);
    172  1.12      elad 
    173  1.12      elad 		/* Check the permission */
    174  1.18      elad 		error = kauth_authorize_process(kauth_cred_get(),
    175  1.12      elad 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
    176  1.12      elad 		    KAUTH_ARG(kpri));
    177  1.14      yamt 		if (error) {
    178  1.14      yamt 			lwp_unlock(t);
    179  1.12      elad 			break;
    180  1.14      yamt 		}
    181   1.5     rmind 
    182  1.29     rmind 		/* Set the scheduling class, change the priority */
    183  1.29     rmind 		t->l_class = lpolicy;
    184  1.29     rmind 		lwp_changepri(t, kpri);
    185   1.5     rmind 		lwp_unlock(t);
    186   1.5     rmind 	}
    187  1.21        ad 	mutex_exit(p->p_lock);
    188   1.7     rmind 	return (lcnt == 0) ? ESRCH : error;
    189   1.5     rmind }
    190   1.5     rmind 
    191   1.5     rmind /*
    192  1.18      elad  * Set scheduling parameters.
    193   1.5     rmind  */
    194   1.5     rmind int
    195  1.18      elad sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    196   1.5     rmind     register_t *retval)
    197   1.5     rmind {
    198   1.5     rmind 	/* {
    199   1.5     rmind 		syscallarg(pid_t) pid;
    200   1.5     rmind 		syscallarg(lwpid_t) lid;
    201  1.18      elad 		syscallarg(int) policy;
    202  1.18      elad 		syscallarg(const struct sched_param *) params;
    203   1.5     rmind 	} */
    204  1.18      elad 	struct sched_param params;
    205  1.18      elad 	int error;
    206  1.18      elad 
    207  1.18      elad 	/* Get the parameters from the user-space */
    208  1.18      elad 	error = copyin(SCARG(uap, params), &params, sizeof(params));
    209  1.18      elad 	if (error)
    210  1.18      elad 		goto out;
    211  1.18      elad 
    212  1.18      elad 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
    213  1.18      elad 	    SCARG(uap, policy), &params);
    214  1.31     rmind out:
    215  1.31     rmind 	return error;
    216  1.18      elad }
    217  1.18      elad 
    218  1.18      elad int
    219  1.18      elad do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
    220  1.18      elad     struct sched_param *params)
    221  1.18      elad {
    222  1.18      elad 	struct sched_param lparams;
    223   1.5     rmind 	struct lwp *t;
    224  1.18      elad 	int error, lpolicy;
    225   1.5     rmind 
    226  1.16     rmind 	/* Locks the LWP */
    227  1.18      elad 	t = lwp_find2(pid, lid);
    228  1.21        ad 	if (t == NULL)
    229  1.21        ad 		return ESRCH;
    230  1.10      yamt 
    231  1.10      yamt 	/* Check the permission */
    232  1.18      elad 	error = kauth_authorize_process(kauth_cred_get(),
    233  1.11      elad 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
    234  1.10      yamt 	if (error != 0) {
    235  1.21        ad 		mutex_exit(t->l_proc->p_lock);
    236  1.21        ad 		return error;
    237   1.5     rmind 	}
    238  1.10      yamt 
    239  1.21        ad 	lwp_lock(t);
    240  1.18      elad 	lparams.sched_priority = t->l_priority;
    241  1.18      elad 	lpolicy = t->l_class;
    242   1.5     rmind 
    243  1.18      elad 	switch (lpolicy) {
    244   1.5     rmind 	case SCHED_OTHER:
    245  1.18      elad 		lparams.sched_priority -= PRI_USER;
    246   1.5     rmind 		break;
    247   1.5     rmind 	case SCHED_RR:
    248   1.5     rmind 	case SCHED_FIFO:
    249  1.18      elad 		lparams.sched_priority -= PRI_USER_RT;
    250   1.5     rmind 		break;
    251   1.5     rmind 	}
    252  1.18      elad 
    253  1.18      elad 	if (policy != NULL)
    254  1.18      elad 		*policy = lpolicy;
    255  1.18      elad 
    256  1.18      elad 	if (params != NULL)
    257  1.18      elad 		*params = lparams;
    258  1.18      elad 
    259  1.21        ad 	lwp_unlock(t);
    260  1.21        ad 	mutex_exit(t->l_proc->p_lock);
    261  1.18      elad 	return error;
    262  1.18      elad }
    263  1.18      elad 
    264  1.18      elad /*
    265  1.18      elad  * Get scheduling parameters.
    266  1.18      elad  */
    267  1.18      elad int
    268  1.18      elad sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    269  1.18      elad     register_t *retval)
    270  1.18      elad {
    271  1.18      elad 	/* {
    272  1.18      elad 		syscallarg(pid_t) pid;
    273  1.18      elad 		syscallarg(lwpid_t) lid;
    274  1.18      elad 		syscallarg(int *) policy;
    275  1.18      elad 		syscallarg(struct sched_param *) params;
    276  1.18      elad 	} */
    277  1.18      elad 	struct sched_param params;
    278  1.18      elad 	int error, policy;
    279  1.18      elad 
    280  1.18      elad 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
    281  1.18      elad 	    &params);
    282  1.18      elad 	if (error)
    283  1.18      elad 		goto out;
    284  1.18      elad 
    285  1.18      elad 	error = copyout(&params, SCARG(uap, params), sizeof(params));
    286  1.10      yamt 	if (error == 0 && SCARG(uap, policy) != NULL)
    287  1.10      yamt 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
    288  1.31     rmind out:
    289  1.31     rmind 	return error;
    290   1.5     rmind }
    291   1.5     rmind 
    292  1.31     rmind /*
    293  1.31     rmind  * Allocate the CPU set, and get it from userspace.
    294  1.31     rmind  */
    295  1.23  christos static int
    296  1.26  christos genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
    297  1.23  christos {
    298  1.23  christos 	int error;
    299  1.23  christos 
    300  1.26  christos 	*dset = kcpuset_create();
    301  1.26  christos 	error = kcpuset_copyin(sset, *dset, size);
    302  1.26  christos 	if (error != 0)
    303  1.26  christos 		kcpuset_unuse(*dset, NULL);
    304  1.23  christos 	return error;
    305  1.23  christos }
    306  1.23  christos 
    307   1.5     rmind /*
    308   1.5     rmind  * Set affinity.
    309   1.5     rmind  */
    310   1.5     rmind int
    311   1.5     rmind sys__sched_setaffinity(struct lwp *l,
    312   1.5     rmind     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    313   1.5     rmind {
    314   1.5     rmind 	/* {
    315   1.5     rmind 		syscallarg(pid_t) pid;
    316   1.5     rmind 		syscallarg(lwpid_t) lid;
    317   1.5     rmind 		syscallarg(size_t) size;
    318  1.23  christos 		syscallarg(const cpuset_t *) cpuset;
    319   1.5     rmind 	} */
    320  1.26  christos 	kcpuset_t *cpuset, *cpulst = NULL;
    321  1.32     rmind 	struct cpu_info *ici, *ci;
    322   1.5     rmind 	struct proc *p;
    323   1.5     rmind 	struct lwp *t;
    324   1.5     rmind 	CPU_INFO_ITERATOR cii;
    325  1.32     rmind 	bool alloff;
    326   1.5     rmind 	lwpid_t lid;
    327   1.5     rmind 	u_int lcnt;
    328   1.5     rmind 	int error;
    329   1.5     rmind 
    330  1.31     rmind 	error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size));
    331  1.31     rmind 	if (error)
    332  1.23  christos 		return error;
    333   1.5     rmind 
    334  1.31     rmind 	/*
    335  1.32     rmind 	 * Traverse _each_ CPU to:
    336  1.32     rmind 	 *  - Check that CPUs in the mask have no assigned processor set.
    337  1.32     rmind 	 *  - Check that at least one CPU from the mask is online.
    338  1.32     rmind 	 *  - Find the first target CPU to migrate.
    339  1.31     rmind 	 *
    340  1.32     rmind 	 * To avoid the race with CPU online/offline calls and processor sets,
    341  1.32     rmind 	 * cpu_lock will be locked for the entire operation.
    342  1.31     rmind 	 */
    343  1.32     rmind 	ci = NULL;
    344  1.32     rmind 	alloff = false;
    345  1.31     rmind 	mutex_enter(&cpu_lock);
    346  1.32     rmind 	for (CPU_INFO_FOREACH(cii, ici)) {
    347  1.32     rmind 		struct schedstate_percpu *ispc;
    348  1.31     rmind 
    349  1.32     rmind 		if (kcpuset_isset(cpu_index(ici), cpuset) == 0)
    350  1.31     rmind 			continue;
    351  1.32     rmind 
    352  1.32     rmind 		ispc = &ici->ci_schedstate;
    353  1.32     rmind 		/* Check that CPU is not in the processor-set */
    354  1.32     rmind 		if (ispc->spc_psid != PS_NONE) {
    355  1.32     rmind 			error = EPERM;
    356  1.32     rmind 			goto out;
    357  1.32     rmind 		}
    358  1.32     rmind 		/* Skip offline CPUs */
    359  1.32     rmind 		if (ispc->spc_flags & SPCF_OFFLINE) {
    360  1.32     rmind 			alloff = true;
    361  1.31     rmind 			continue;
    362  1.24     rmind 		}
    363  1.32     rmind 		/* Target CPU to migrate */
    364  1.32     rmind 		if (ci == NULL) {
    365  1.32     rmind 			ci = ici;
    366  1.32     rmind 		}
    367  1.23  christos 	}
    368   1.5     rmind 	if (ci == NULL) {
    369  1.32     rmind 		if (alloff) {
    370  1.31     rmind 			/* All CPUs in the set are offline */
    371  1.31     rmind 			error = EPERM;
    372  1.31     rmind 			goto out;
    373  1.31     rmind 		}
    374   1.5     rmind 		/* Empty set */
    375  1.33     rmind 		kcpuset_unuse(cpuset, &cpulst);
    376   1.5     rmind 		cpuset = NULL;
    377   1.5     rmind 	}
    378   1.5     rmind 
    379   1.7     rmind 	if (SCARG(uap, pid) != 0) {
    380   1.7     rmind 		/* Find the process */
    381  1.20        ad 		mutex_enter(proc_lock);
    382  1.20        ad 		p = p_find(SCARG(uap, pid), PFIND_LOCKED);
    383   1.7     rmind 		if (p == NULL) {
    384  1.20        ad 			mutex_exit(proc_lock);
    385   1.7     rmind 			error = ESRCH;
    386  1.23  christos 			goto out;
    387   1.7     rmind 		}
    388  1.21        ad 		mutex_enter(p->p_lock);
    389  1.20        ad 		mutex_exit(proc_lock);
    390  1.17        ad 		/* Disallow modification of system processes. */
    391  1.17        ad 		if ((p->p_flag & PK_SYSTEM) != 0) {
    392  1.21        ad 			mutex_exit(p->p_lock);
    393  1.17        ad 			error = EPERM;
    394  1.23  christos 			goto out;
    395  1.17        ad 		}
    396   1.7     rmind 	} else {
    397   1.7     rmind 		/* Use the calling process */
    398   1.7     rmind 		p = l->l_proc;
    399  1.21        ad 		mutex_enter(p->p_lock);
    400   1.5     rmind 	}
    401   1.5     rmind 
    402  1.10      yamt 	/*
    403  1.10      yamt 	 * Check the permission.
    404  1.10      yamt 	 */
    405  1.11      elad 	error = kauth_authorize_process(l->l_cred,
    406  1.11      elad 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
    407  1.10      yamt 	if (error != 0) {
    408  1.21        ad 		mutex_exit(p->p_lock);
    409  1.23  christos 		goto out;
    410  1.10      yamt 	}
    411   1.5     rmind 
    412  1.28  wrstuden #ifdef KERN_SA
    413  1.31     rmind 	/* Changing the affinity of a SA process is not supported */
    414  1.28  wrstuden 	if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
    415  1.28  wrstuden 		mutex_exit(p->p_lock);
    416  1.28  wrstuden 		error = EINVAL;
    417  1.28  wrstuden 		goto out;
    418  1.28  wrstuden 	}
    419  1.28  wrstuden #endif
    420  1.28  wrstuden 
    421   1.5     rmind 	/* Find the LWP(s) */
    422   1.5     rmind 	lcnt = 0;
    423   1.5     rmind 	lid = SCARG(uap, lid);
    424   1.5     rmind 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    425   1.5     rmind 		if (lid && lid != t->l_lid)
    426   1.5     rmind 			continue;
    427   1.5     rmind 		lwp_lock(t);
    428  1.27     rmind 		/* It is not allowed to set the affinity for zombie LWPs */
    429  1.27     rmind 		if (t->l_stat == LSZOMB) {
    430  1.27     rmind 			lwp_unlock(t);
    431  1.27     rmind 			continue;
    432  1.27     rmind 		}
    433   1.5     rmind 		if (cpuset) {
    434   1.5     rmind 			/* Set the affinity flag and new CPU set */
    435   1.5     rmind 			t->l_flag |= LW_AFFINITY;
    436  1.25     rmind 			kcpuset_use(cpuset);
    437  1.23  christos 			if (t->l_affinity != NULL)
    438  1.25     rmind 				kcpuset_unuse(t->l_affinity, &cpulst);
    439  1.23  christos 			t->l_affinity = cpuset;
    440   1.5     rmind 			/* Migrate to another CPU, unlocks LWP */
    441   1.5     rmind 			lwp_migrate(t, ci);
    442   1.5     rmind 		} else {
    443   1.5     rmind 			/* Unset the affinity flag */
    444   1.5     rmind 			t->l_flag &= ~LW_AFFINITY;
    445  1.23  christos 			if (t->l_affinity != NULL)
    446  1.25     rmind 				kcpuset_unuse(t->l_affinity, &cpulst);
    447  1.23  christos 			t->l_affinity = NULL;
    448   1.5     rmind 			lwp_unlock(t);
    449   1.5     rmind 		}
    450   1.5     rmind 		lcnt++;
    451   1.5     rmind 	}
    452  1.21        ad 	mutex_exit(p->p_lock);
    453   1.5     rmind 	if (lcnt == 0)
    454   1.5     rmind 		error = ESRCH;
    455  1.23  christos out:
    456  1.31     rmind 	mutex_exit(&cpu_lock);
    457   1.5     rmind 	if (cpuset != NULL)
    458  1.25     rmind 		kcpuset_unuse(cpuset, &cpulst);
    459  1.26  christos 	kcpuset_destroy(cpulst);
    460   1.5     rmind 	return error;
    461   1.5     rmind }
    462   1.5     rmind 
    463   1.5     rmind /*
    464   1.5     rmind  * Get affinity.
    465   1.5     rmind  */
    466   1.5     rmind int
    467   1.5     rmind sys__sched_getaffinity(struct lwp *l,
    468   1.5     rmind     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    469   1.5     rmind {
    470   1.5     rmind 	/* {
    471   1.5     rmind 		syscallarg(pid_t) pid;
    472   1.5     rmind 		syscallarg(lwpid_t) lid;
    473   1.5     rmind 		syscallarg(size_t) size;
    474  1.23  christos 		syscallarg(cpuset_t *) cpuset;
    475   1.5     rmind 	} */
    476   1.5     rmind 	struct lwp *t;
    477  1.26  christos 	kcpuset_t *cpuset;
    478   1.5     rmind 	int error;
    479   1.5     rmind 
    480  1.31     rmind 	error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size));
    481  1.31     rmind 	if (error)
    482  1.23  christos 		return error;
    483   1.5     rmind 
    484  1.16     rmind 	/* Locks the LWP */
    485  1.16     rmind 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    486   1.5     rmind 	if (t == NULL) {
    487  1.23  christos 		error = ESRCH;
    488  1.23  christos 		goto out;
    489   1.5     rmind 	}
    490  1.10      yamt 	/* Check the permission */
    491  1.11      elad 	if (kauth_authorize_process(l->l_cred,
    492  1.11      elad 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
    493  1.21        ad 		mutex_exit(t->l_proc->p_lock);
    494  1.23  christos 		error = EPERM;
    495  1.23  christos 		goto out;
    496  1.10      yamt 	}
    497  1.21        ad 	lwp_lock(t);
    498  1.23  christos 	if (t->l_flag & LW_AFFINITY) {
    499  1.23  christos 		KASSERT(t->l_affinity != NULL);
    500  1.25     rmind 		kcpuset_copy(cpuset, t->l_affinity);
    501  1.23  christos 	} else
    502  1.26  christos 		kcpuset_zero(cpuset);
    503   1.5     rmind 	lwp_unlock(t);
    504  1.21        ad 	mutex_exit(t->l_proc->p_lock);
    505   1.5     rmind 
    506  1.26  christos 	error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size));
    507  1.23  christos out:
    508  1.25     rmind 	kcpuset_unuse(cpuset, NULL);
    509   1.5     rmind 	return error;
    510   1.5     rmind }
    511   1.5     rmind 
    512   1.5     rmind /*
    513   1.5     rmind  * Yield.
    514   1.5     rmind  */
    515   1.1        ad int
    516   1.4       dsl sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    517   1.1        ad {
    518   1.1        ad 
    519   1.1        ad 	yield();
    520  1.28  wrstuden #ifdef KERN_SA
    521  1.28  wrstuden 	if (l->l_flag & LW_SA) {
    522  1.28  wrstuden 		sa_preempt(l);
    523  1.28  wrstuden 	}
    524  1.28  wrstuden #endif
    525   1.1        ad 	return 0;
    526   1.1        ad }
    527   1.5     rmind 
    528   1.5     rmind /*
    529   1.5     rmind  * Sysctl nodes and initialization.
    530   1.5     rmind  */
    531   1.5     rmind SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
    532   1.5     rmind {
    533   1.5     rmind 	const struct sysctlnode *node = NULL;
    534   1.5     rmind 
    535   1.5     rmind 	sysctl_createv(clog, 0, NULL, NULL,
    536   1.5     rmind 		CTLFLAG_PERMANENT,
    537   1.5     rmind 		CTLTYPE_NODE, "kern", NULL,
    538   1.5     rmind 		NULL, 0, NULL, 0,
    539   1.5     rmind 		CTL_KERN, CTL_EOL);
    540   1.5     rmind 	sysctl_createv(clog, 0, NULL, NULL,
    541   1.5     rmind 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    542   1.5     rmind 		CTLTYPE_INT, "posix_sched",
    543   1.5     rmind 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    544   1.5     rmind 			     "Process Scheduling option to which the "
    545   1.5     rmind 			     "system attempts to conform"),
    546   1.5     rmind 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    547   1.5     rmind 		CTL_KERN, CTL_CREATE, CTL_EOL);
    548   1.5     rmind 	sysctl_createv(clog, 0, NULL, &node,
    549   1.5     rmind 		CTLFLAG_PERMANENT,
    550   1.5     rmind 		CTLTYPE_NODE, "sched",
    551   1.5     rmind 		SYSCTL_DESCR("Scheduler options"),
    552   1.5     rmind 		NULL, 0, NULL, 0,
    553   1.5     rmind 		CTL_KERN, CTL_CREATE, CTL_EOL);
    554   1.5     rmind 
    555   1.5     rmind 	if (node == NULL)
    556   1.5     rmind 		return;
    557   1.5     rmind 
    558   1.5     rmind 	sysctl_createv(clog, 0, &node, NULL,
    559   1.5     rmind 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    560   1.5     rmind 		CTLTYPE_INT, "pri_min",
    561   1.5     rmind 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    562   1.5     rmind 		NULL, SCHED_PRI_MIN, NULL, 0,
    563   1.5     rmind 		CTL_CREATE, CTL_EOL);
    564   1.5     rmind 	sysctl_createv(clog, 0, &node, NULL,
    565   1.5     rmind 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    566   1.5     rmind 		CTLTYPE_INT, "pri_max",
    567  1.19     njoly 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
    568   1.5     rmind 		NULL, SCHED_PRI_MAX, NULL, 0,
    569   1.5     rmind 		CTL_CREATE, CTL_EOL);
    570   1.5     rmind }
    571