Home | History | Annotate | Line # | Download | only in kern
      1  1.50  riastrad /*	$NetBSD: sys_sched.c,v 1.50 2023/04/09 09:18:09 riastradh Exp $	*/
      2   1.1        ad 
      3   1.5     rmind /*
      4  1.36     rmind  * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
      5   1.1        ad  * All rights reserved.
      6   1.5     rmind  *
      7   1.1        ad  * Redistribution and use in source and binary forms, with or without
      8   1.1        ad  * modification, are permitted provided that the following conditions
      9   1.1        ad  * are met:
     10   1.1        ad  * 1. Redistributions of source code must retain the above copyright
     11   1.1        ad  *    notice, this list of conditions and the following disclaimer.
     12   1.1        ad  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.1        ad  *    notice, this list of conditions and the following disclaimer in the
     14   1.1        ad  *    documentation and/or other materials provided with the distribution.
     15   1.1        ad  *
     16  1.16     rmind  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  1.16     rmind  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  1.16     rmind  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  1.16     rmind  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  1.16     rmind  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  1.16     rmind  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  1.16     rmind  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  1.16     rmind  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  1.16     rmind  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  1.16     rmind  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  1.16     rmind  * SUCH DAMAGE.
     27   1.1        ad  */
     28   1.1        ad 
     29   1.5     rmind /*
     30  1.17        ad  * System calls relating to the scheduler.
     31  1.17        ad  *
     32  1.31     rmind  * Lock order:
     33  1.31     rmind  *
     34  1.31     rmind  *	cpu_lock ->
     35  1.31     rmind  *	    proc_lock ->
     36  1.31     rmind  *		proc_t::p_lock ->
     37  1.31     rmind  *		    lwp_t::lwp_lock
     38  1.31     rmind  *
     39   1.5     rmind  * TODO:
     40   1.5     rmind  *  - Handle pthread_setschedprio() as defined by POSIX;
     41   1.5     rmind  */
     42   1.5     rmind 
     43   1.1        ad #include <sys/cdefs.h>
     44  1.50  riastrad __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.50 2023/04/09 09:18:09 riastradh Exp $");
     45   1.1        ad 
     46   1.1        ad #include <sys/param.h>
     47   1.5     rmind 
     48   1.5     rmind #include <sys/cpu.h>
     49   1.5     rmind #include <sys/kauth.h>
     50   1.5     rmind #include <sys/kmem.h>
     51   1.5     rmind #include <sys/lwp.h>
     52   1.5     rmind #include <sys/mutex.h>
     53   1.1        ad #include <sys/proc.h>
     54   1.5     rmind #include <sys/pset.h>
     55   1.5     rmind #include <sys/sched.h>
     56   1.1        ad #include <sys/syscallargs.h>
     57   1.5     rmind #include <sys/sysctl.h>
     58   1.5     rmind #include <sys/systm.h>
     59   1.5     rmind #include <sys/types.h>
     60   1.5     rmind #include <sys/unistd.h>
     61   1.5     rmind 
     62  1.34      elad static struct sysctllog *sched_sysctl_log;
     63  1.34      elad static kauth_listener_t sched_listener;
     64  1.34      elad 
     65   1.5     rmind /*
     66   1.7     rmind  * Convert user priority or the in-kernel priority or convert the current
     67   1.7     rmind  * priority to the appropriate range according to the policy change.
     68   1.7     rmind  */
     69   1.7     rmind static pri_t
     70   1.7     rmind convert_pri(lwp_t *l, int policy, pri_t pri)
     71   1.7     rmind {
     72   1.7     rmind 
     73  1.29     rmind 	/* Convert user priority to the in-kernel */
     74   1.7     rmind 	if (pri != PRI_NONE) {
     75  1.29     rmind 		/* Only for real-time threads */
     76  1.50  riastrad 		KASSERT(pri >= SCHED_PRI_MIN);
     77  1.50  riastrad 		KASSERT(pri <= SCHED_PRI_MAX);
     78  1.29     rmind 		KASSERT(policy != SCHED_OTHER);
     79  1.29     rmind 		return PRI_USER_RT + pri;
     80   1.7     rmind 	}
     81  1.29     rmind 
     82  1.29     rmind 	/* Neither policy, nor priority change */
     83   1.7     rmind 	if (l->l_class == policy)
     84   1.7     rmind 		return l->l_priority;
     85   1.7     rmind 
     86  1.29     rmind 	/* Time-sharing -> real-time */
     87   1.7     rmind 	if (l->l_class == SCHED_OTHER) {
     88   1.7     rmind 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
     89  1.29     rmind 		return PRI_USER_RT;
     90   1.7     rmind 	}
     91  1.29     rmind 
     92  1.29     rmind 	/* Real-time -> time-sharing */
     93   1.7     rmind 	if (policy == SCHED_OTHER) {
     94   1.7     rmind 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
     95  1.41      yamt 		/*
     96  1.41      yamt 		 * this is a bit arbitrary because the priority is dynamic
     97  1.41      yamt 		 * for SCHED_OTHER threads and will likely be changed by
     98  1.41      yamt 		 * the scheduler soon anyway.
     99  1.41      yamt 		 */
    100  1.29     rmind 		return l->l_priority - PRI_USER_RT;
    101   1.7     rmind 	}
    102  1.29     rmind 
    103  1.29     rmind 	/* Real-time -> real-time */
    104  1.29     rmind 	return l->l_priority;
    105   1.7     rmind }
    106   1.7     rmind 
    107   1.5     rmind int
    108  1.18      elad do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
    109  1.18      elad     const struct sched_param *params)
    110   1.5     rmind {
    111   1.5     rmind 	struct proc *p;
    112   1.5     rmind 	struct lwp *t;
    113  1.18      elad 	pri_t pri;
    114   1.5     rmind 	u_int lcnt;
    115   1.5     rmind 	int error;
    116   1.5     rmind 
    117  1.18      elad 	error = 0;
    118  1.18      elad 
    119  1.18      elad 	pri = params->sched_priority;
    120   1.7     rmind 
    121   1.7     rmind 	/* If no parameters specified, just return (this should not happen) */
    122   1.7     rmind 	if (pri == PRI_NONE && policy == SCHED_NONE)
    123   1.7     rmind 		return 0;
    124   1.5     rmind 
    125   1.7     rmind 	/* Validate scheduling class */
    126   1.7     rmind 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
    127   1.7     rmind 		return EINVAL;
    128   1.5     rmind 
    129   1.7     rmind 	/* Validate priority */
    130   1.7     rmind 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
    131   1.7     rmind 		return EINVAL;
    132   1.5     rmind 
    133  1.18      elad 	if (pid != 0) {
    134   1.7     rmind 		/* Find the process */
    135  1.49        ad 		mutex_enter(&proc_lock);
    136  1.35     rmind 		p = proc_find(pid);
    137  1.20        ad 		if (p == NULL) {
    138  1.49        ad 			mutex_exit(&proc_lock);
    139   1.7     rmind 			return ESRCH;
    140  1.20        ad 		}
    141  1.21        ad 		mutex_enter(p->p_lock);
    142  1.49        ad 		mutex_exit(&proc_lock);
    143   1.7     rmind 		/* Disallow modification of system processes */
    144  1.17        ad 		if ((p->p_flag & PK_SYSTEM) != 0) {
    145  1.21        ad 			mutex_exit(p->p_lock);
    146   1.7     rmind 			return EPERM;
    147   1.7     rmind 		}
    148   1.7     rmind 	} else {
    149   1.7     rmind 		/* Use the calling process */
    150  1.18      elad 		p = curlwp->l_proc;
    151  1.21        ad 		mutex_enter(p->p_lock);
    152   1.5     rmind 	}
    153   1.1        ad 
    154   1.5     rmind 	/* Find the LWP(s) */
    155   1.5     rmind 	lcnt = 0;
    156   1.5     rmind 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    157   1.7     rmind 		pri_t kpri;
    158  1.12      elad 		int lpolicy;
    159   1.5     rmind 
    160   1.5     rmind 		if (lid && lid != t->l_lid)
    161   1.5     rmind 			continue;
    162  1.29     rmind 
    163  1.15  drochner 		lcnt++;
    164   1.7     rmind 		lwp_lock(t);
    165  1.29     rmind 		lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
    166  1.29     rmind 
    167  1.29     rmind 		/* Disallow setting of priority for SCHED_OTHER threads */
    168  1.30     rmind 		if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
    169  1.29     rmind 			lwp_unlock(t);
    170  1.29     rmind 			error = EINVAL;
    171  1.29     rmind 			break;
    172  1.29     rmind 		}
    173   1.7     rmind 
    174  1.29     rmind 		/* Convert priority, if needed */
    175  1.12      elad 		kpri = convert_pri(t, lpolicy, pri);
    176  1.12      elad 
    177  1.12      elad 		/* Check the permission */
    178  1.18      elad 		error = kauth_authorize_process(kauth_cred_get(),
    179  1.12      elad 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
    180  1.12      elad 		    KAUTH_ARG(kpri));
    181  1.14      yamt 		if (error) {
    182  1.14      yamt 			lwp_unlock(t);
    183  1.12      elad 			break;
    184  1.14      yamt 		}
    185   1.5     rmind 
    186  1.29     rmind 		/* Set the scheduling class, change the priority */
    187  1.29     rmind 		t->l_class = lpolicy;
    188  1.29     rmind 		lwp_changepri(t, kpri);
    189   1.5     rmind 		lwp_unlock(t);
    190   1.5     rmind 	}
    191  1.21        ad 	mutex_exit(p->p_lock);
    192   1.7     rmind 	return (lcnt == 0) ? ESRCH : error;
    193   1.5     rmind }
    194   1.5     rmind 
    195   1.5     rmind /*
    196  1.18      elad  * Set scheduling parameters.
    197   1.5     rmind  */
    198   1.5     rmind int
    199  1.18      elad sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    200   1.5     rmind     register_t *retval)
    201   1.5     rmind {
    202   1.5     rmind 	/* {
    203   1.5     rmind 		syscallarg(pid_t) pid;
    204   1.5     rmind 		syscallarg(lwpid_t) lid;
    205  1.18      elad 		syscallarg(int) policy;
    206  1.18      elad 		syscallarg(const struct sched_param *) params;
    207   1.5     rmind 	} */
    208  1.18      elad 	struct sched_param params;
    209  1.18      elad 	int error;
    210  1.18      elad 
    211  1.18      elad 	/* Get the parameters from the user-space */
    212  1.18      elad 	error = copyin(SCARG(uap, params), &params, sizeof(params));
    213  1.18      elad 	if (error)
    214  1.18      elad 		goto out;
    215  1.18      elad 
    216  1.18      elad 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
    217  1.18      elad 	    SCARG(uap, policy), &params);
    218  1.31     rmind out:
    219  1.31     rmind 	return error;
    220  1.18      elad }
    221  1.18      elad 
    222  1.41      yamt /*
    223  1.41      yamt  * do_sched_getparam:
    224  1.41      yamt  *
    225  1.41      yamt  * if lid=0, returns the parameter of the first LWP in the process.
    226  1.41      yamt  */
    227  1.18      elad int
    228  1.18      elad do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
    229  1.18      elad     struct sched_param *params)
    230  1.18      elad {
    231  1.18      elad 	struct sched_param lparams;
    232   1.5     rmind 	struct lwp *t;
    233  1.18      elad 	int error, lpolicy;
    234   1.5     rmind 
    235  1.48   thorpej 	if (pid < 0 || lid < 0)
    236  1.48   thorpej 		return EINVAL;
    237  1.48   thorpej 
    238  1.41      yamt 	t = lwp_find2(pid, lid); /* acquire p_lock */
    239  1.21        ad 	if (t == NULL)
    240  1.21        ad 		return ESRCH;
    241  1.10      yamt 
    242  1.10      yamt 	/* Check the permission */
    243  1.18      elad 	error = kauth_authorize_process(kauth_cred_get(),
    244  1.11      elad 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
    245  1.10      yamt 	if (error != 0) {
    246  1.21        ad 		mutex_exit(t->l_proc->p_lock);
    247  1.21        ad 		return error;
    248   1.5     rmind 	}
    249  1.10      yamt 
    250  1.21        ad 	lwp_lock(t);
    251  1.18      elad 	lparams.sched_priority = t->l_priority;
    252  1.18      elad 	lpolicy = t->l_class;
    253  1.41      yamt 	lwp_unlock(t);
    254  1.41      yamt 	mutex_exit(t->l_proc->p_lock);
    255   1.5     rmind 
    256  1.41      yamt 	/*
    257  1.41      yamt 	 * convert to the user-visible priority value.
    258  1.41      yamt 	 * it's an inversion of convert_pri().
    259  1.41      yamt 	 *
    260  1.41      yamt 	 * the SCHED_OTHER case is a bit arbitrary given that
    261  1.41      yamt 	 *	- we don't allow setting the priority.
    262  1.41      yamt 	 *	- the priority is dynamic.
    263  1.41      yamt 	 */
    264  1.18      elad 	switch (lpolicy) {
    265   1.5     rmind 	case SCHED_OTHER:
    266  1.18      elad 		lparams.sched_priority -= PRI_USER;
    267   1.5     rmind 		break;
    268   1.5     rmind 	case SCHED_RR:
    269   1.5     rmind 	case SCHED_FIFO:
    270  1.18      elad 		lparams.sched_priority -= PRI_USER_RT;
    271   1.5     rmind 		break;
    272   1.5     rmind 	}
    273  1.18      elad 
    274  1.18      elad 	if (policy != NULL)
    275  1.18      elad 		*policy = lpolicy;
    276  1.18      elad 
    277  1.18      elad 	if (params != NULL)
    278  1.18      elad 		*params = lparams;
    279  1.18      elad 
    280  1.18      elad 	return error;
    281  1.18      elad }
    282  1.18      elad 
    283  1.18      elad /*
    284  1.18      elad  * Get scheduling parameters.
    285  1.18      elad  */
    286  1.18      elad int
    287  1.18      elad sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    288  1.18      elad     register_t *retval)
    289  1.18      elad {
    290  1.18      elad 	/* {
    291  1.18      elad 		syscallarg(pid_t) pid;
    292  1.18      elad 		syscallarg(lwpid_t) lid;
    293  1.18      elad 		syscallarg(int *) policy;
    294  1.18      elad 		syscallarg(struct sched_param *) params;
    295  1.18      elad 	} */
    296  1.18      elad 	struct sched_param params;
    297  1.18      elad 	int error, policy;
    298  1.18      elad 
    299  1.18      elad 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
    300  1.18      elad 	    &params);
    301  1.18      elad 	if (error)
    302  1.18      elad 		goto out;
    303  1.18      elad 
    304  1.18      elad 	error = copyout(&params, SCARG(uap, params), sizeof(params));
    305  1.10      yamt 	if (error == 0 && SCARG(uap, policy) != NULL)
    306  1.10      yamt 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
    307  1.31     rmind out:
    308  1.31     rmind 	return error;
    309   1.5     rmind }
    310   1.5     rmind 
    311  1.31     rmind /*
    312  1.31     rmind  * Allocate the CPU set, and get it from userspace.
    313  1.31     rmind  */
    314  1.23  christos static int
    315  1.26  christos genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
    316  1.23  christos {
    317  1.36     rmind 	kcpuset_t *kset;
    318  1.23  christos 	int error;
    319  1.23  christos 
    320  1.42     rmind 	kcpuset_create(&kset, true);
    321  1.36     rmind 	error = kcpuset_copyin(sset, kset, size);
    322  1.36     rmind 	if (error) {
    323  1.36     rmind 		kcpuset_unuse(kset, NULL);
    324  1.36     rmind 	} else {
    325  1.36     rmind 		*dset = kset;
    326  1.36     rmind 	}
    327  1.23  christos 	return error;
    328  1.23  christos }
    329  1.23  christos 
    330   1.5     rmind /*
    331   1.5     rmind  * Set affinity.
    332   1.5     rmind  */
    333   1.5     rmind int
    334   1.5     rmind sys__sched_setaffinity(struct lwp *l,
    335   1.5     rmind     const struct sys__sched_setaffinity_args *uap, register_t *retval)
    336   1.5     rmind {
    337   1.5     rmind 	/* {
    338   1.5     rmind 		syscallarg(pid_t) pid;
    339   1.5     rmind 		syscallarg(lwpid_t) lid;
    340   1.5     rmind 		syscallarg(size_t) size;
    341  1.23  christos 		syscallarg(const cpuset_t *) cpuset;
    342   1.5     rmind 	} */
    343  1.36     rmind 	kcpuset_t *kcset, *kcpulst = NULL;
    344  1.32     rmind 	struct cpu_info *ici, *ci;
    345   1.5     rmind 	struct proc *p;
    346   1.5     rmind 	struct lwp *t;
    347   1.5     rmind 	CPU_INFO_ITERATOR cii;
    348  1.32     rmind 	bool alloff;
    349   1.5     rmind 	lwpid_t lid;
    350   1.5     rmind 	u_int lcnt;
    351   1.5     rmind 	int error;
    352   1.5     rmind 
    353  1.36     rmind 	error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
    354  1.31     rmind 	if (error)
    355  1.23  christos 		return error;
    356   1.5     rmind 
    357  1.31     rmind 	/*
    358  1.32     rmind 	 * Traverse _each_ CPU to:
    359  1.32     rmind 	 *  - Check that CPUs in the mask have no assigned processor set.
    360  1.32     rmind 	 *  - Check that at least one CPU from the mask is online.
    361  1.32     rmind 	 *  - Find the first target CPU to migrate.
    362  1.31     rmind 	 *
    363  1.32     rmind 	 * To avoid the race with CPU online/offline calls and processor sets,
    364  1.32     rmind 	 * cpu_lock will be locked for the entire operation.
    365  1.31     rmind 	 */
    366  1.32     rmind 	ci = NULL;
    367  1.32     rmind 	alloff = false;
    368  1.31     rmind 	mutex_enter(&cpu_lock);
    369  1.32     rmind 	for (CPU_INFO_FOREACH(cii, ici)) {
    370  1.32     rmind 		struct schedstate_percpu *ispc;
    371  1.31     rmind 
    372  1.39     rmind 		if (!kcpuset_isset(kcset, cpu_index(ici))) {
    373  1.31     rmind 			continue;
    374  1.39     rmind 		}
    375  1.32     rmind 
    376  1.32     rmind 		ispc = &ici->ci_schedstate;
    377  1.32     rmind 		/* Check that CPU is not in the processor-set */
    378  1.32     rmind 		if (ispc->spc_psid != PS_NONE) {
    379  1.32     rmind 			error = EPERM;
    380  1.32     rmind 			goto out;
    381  1.32     rmind 		}
    382  1.32     rmind 		/* Skip offline CPUs */
    383  1.32     rmind 		if (ispc->spc_flags & SPCF_OFFLINE) {
    384  1.32     rmind 			alloff = true;
    385  1.31     rmind 			continue;
    386  1.24     rmind 		}
    387  1.32     rmind 		/* Target CPU to migrate */
    388  1.32     rmind 		if (ci == NULL) {
    389  1.32     rmind 			ci = ici;
    390  1.32     rmind 		}
    391  1.23  christos 	}
    392   1.5     rmind 	if (ci == NULL) {
    393  1.32     rmind 		if (alloff) {
    394  1.31     rmind 			/* All CPUs in the set are offline */
    395  1.31     rmind 			error = EPERM;
    396  1.31     rmind 			goto out;
    397  1.31     rmind 		}
    398   1.5     rmind 		/* Empty set */
    399  1.36     rmind 		kcpuset_unuse(kcset, &kcpulst);
    400  1.45   msaitoh 		kcset = NULL;
    401   1.5     rmind 	}
    402   1.5     rmind 
    403   1.7     rmind 	if (SCARG(uap, pid) != 0) {
    404   1.7     rmind 		/* Find the process */
    405  1.49        ad 		mutex_enter(&proc_lock);
    406  1.35     rmind 		p = proc_find(SCARG(uap, pid));
    407   1.7     rmind 		if (p == NULL) {
    408  1.49        ad 			mutex_exit(&proc_lock);
    409   1.7     rmind 			error = ESRCH;
    410  1.23  christos 			goto out;
    411   1.7     rmind 		}
    412  1.21        ad 		mutex_enter(p->p_lock);
    413  1.49        ad 		mutex_exit(&proc_lock);
    414  1.17        ad 		/* Disallow modification of system processes. */
    415  1.17        ad 		if ((p->p_flag & PK_SYSTEM) != 0) {
    416  1.21        ad 			mutex_exit(p->p_lock);
    417  1.17        ad 			error = EPERM;
    418  1.23  christos 			goto out;
    419  1.17        ad 		}
    420   1.7     rmind 	} else {
    421   1.7     rmind 		/* Use the calling process */
    422   1.7     rmind 		p = l->l_proc;
    423  1.21        ad 		mutex_enter(p->p_lock);
    424   1.5     rmind 	}
    425   1.5     rmind 
    426  1.10      yamt 	/*
    427  1.10      yamt 	 * Check the permission.
    428  1.10      yamt 	 */
    429  1.11      elad 	error = kauth_authorize_process(l->l_cred,
    430  1.11      elad 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
    431  1.10      yamt 	if (error != 0) {
    432  1.21        ad 		mutex_exit(p->p_lock);
    433  1.23  christos 		goto out;
    434  1.10      yamt 	}
    435   1.5     rmind 
    436  1.37     rmind 	/* Iterate through LWP(s). */
    437   1.5     rmind 	lcnt = 0;
    438   1.5     rmind 	lid = SCARG(uap, lid);
    439   1.5     rmind 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    440  1.37     rmind 		if (lid && lid != t->l_lid) {
    441   1.5     rmind 			continue;
    442  1.37     rmind 		}
    443   1.5     rmind 		lwp_lock(t);
    444  1.37     rmind 		/* No affinity for zombie LWPs. */
    445  1.27     rmind 		if (t->l_stat == LSZOMB) {
    446  1.27     rmind 			lwp_unlock(t);
    447  1.27     rmind 			continue;
    448  1.27     rmind 		}
    449  1.37     rmind 		/* First, release existing affinity, if any. */
    450  1.37     rmind 		if (t->l_affinity) {
    451  1.37     rmind 			kcpuset_unuse(t->l_affinity, &kcpulst);
    452  1.37     rmind 		}
    453  1.36     rmind 		if (kcset) {
    454  1.37     rmind 			/*
    455  1.37     rmind 			 * Hold a reference on affinity mask, assign mask to
    456  1.37     rmind 			 * LWP and migrate it to another CPU (unlocks LWP).
    457  1.37     rmind 			 */
    458  1.36     rmind 			kcpuset_use(kcset);
    459  1.36     rmind 			t->l_affinity = kcset;
    460   1.5     rmind 			lwp_migrate(t, ci);
    461   1.5     rmind 		} else {
    462  1.37     rmind 			/* Old affinity mask is released, just clear. */
    463  1.23  christos 			t->l_affinity = NULL;
    464   1.5     rmind 			lwp_unlock(t);
    465   1.5     rmind 		}
    466   1.5     rmind 		lcnt++;
    467   1.5     rmind 	}
    468  1.21        ad 	mutex_exit(p->p_lock);
    469  1.36     rmind 	if (lcnt == 0) {
    470   1.5     rmind 		error = ESRCH;
    471  1.36     rmind 	}
    472  1.23  christos out:
    473  1.31     rmind 	mutex_exit(&cpu_lock);
    474  1.36     rmind 
    475  1.36     rmind 	/*
    476  1.36     rmind 	 * Drop the initial reference (LWPs, if any, have the ownership now),
    477  1.36     rmind 	 * and destroy whatever is in the G/C list, if filled.
    478  1.36     rmind 	 */
    479  1.36     rmind 	if (kcset) {
    480  1.36     rmind 		kcpuset_unuse(kcset, &kcpulst);
    481  1.36     rmind 	}
    482  1.36     rmind 	if (kcpulst) {
    483  1.36     rmind 		kcpuset_destroy(kcpulst);
    484  1.36     rmind 	}
    485   1.5     rmind 	return error;
    486   1.5     rmind }
    487   1.5     rmind 
    488   1.5     rmind /*
    489   1.5     rmind  * Get affinity.
    490   1.5     rmind  */
    491   1.5     rmind int
    492   1.5     rmind sys__sched_getaffinity(struct lwp *l,
    493   1.5     rmind     const struct sys__sched_getaffinity_args *uap, register_t *retval)
    494   1.5     rmind {
    495   1.5     rmind 	/* {
    496   1.5     rmind 		syscallarg(pid_t) pid;
    497   1.5     rmind 		syscallarg(lwpid_t) lid;
    498   1.5     rmind 		syscallarg(size_t) size;
    499  1.23  christos 		syscallarg(cpuset_t *) cpuset;
    500   1.5     rmind 	} */
    501   1.5     rmind 	struct lwp *t;
    502  1.36     rmind 	kcpuset_t *kcset;
    503   1.5     rmind 	int error;
    504   1.5     rmind 
    505  1.48   thorpej 	if (SCARG(uap, pid) < 0 || SCARG(uap, lid) < 0)
    506  1.48   thorpej 		return EINVAL;
    507  1.48   thorpej 
    508  1.36     rmind 	error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
    509  1.31     rmind 	if (error)
    510  1.23  christos 		return error;
    511   1.5     rmind 
    512  1.16     rmind 	/* Locks the LWP */
    513  1.16     rmind 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
    514   1.5     rmind 	if (t == NULL) {
    515  1.23  christos 		error = ESRCH;
    516  1.23  christos 		goto out;
    517   1.5     rmind 	}
    518  1.10      yamt 	/* Check the permission */
    519  1.11      elad 	if (kauth_authorize_process(l->l_cred,
    520  1.11      elad 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
    521  1.21        ad 		mutex_exit(t->l_proc->p_lock);
    522  1.23  christos 		error = EPERM;
    523  1.23  christos 		goto out;
    524  1.10      yamt 	}
    525  1.21        ad 	lwp_lock(t);
    526  1.37     rmind 	if (t->l_affinity) {
    527  1.36     rmind 		kcpuset_copy(kcset, t->l_affinity);
    528  1.36     rmind 	} else {
    529  1.36     rmind 		kcpuset_zero(kcset);
    530  1.36     rmind 	}
    531   1.5     rmind 	lwp_unlock(t);
    532  1.21        ad 	mutex_exit(t->l_proc->p_lock);
    533   1.5     rmind 
    534  1.36     rmind 	error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size));
    535  1.23  christos out:
    536  1.36     rmind 	kcpuset_unuse(kcset, NULL);
    537   1.5     rmind 	return error;
    538   1.5     rmind }
    539   1.5     rmind 
    540   1.5     rmind /*
    541  1.44  christos  * Priority protection for PTHREAD_PRIO_PROTECT. This is a weak
    542  1.44  christos  * analogue of priority inheritance: temp raise the priority
    543  1.44  christos  * of the caller when accessing a protected resource.
    544  1.44  christos  */
    545  1.44  christos int
    546  1.44  christos sys__sched_protect(struct lwp *l,
    547  1.44  christos     const struct sys__sched_protect_args *uap, register_t *retval)
    548  1.44  christos {
    549  1.44  christos         /* {
    550  1.44  christos                 syscallarg(int) priority;
    551  1.44  christos 		syscallarg(int *) opriority;
    552  1.44  christos         } */
    553  1.44  christos 	int error;
    554  1.44  christos 	pri_t pri;
    555  1.44  christos 
    556  1.44  christos 	KASSERT(l->l_inheritedprio == -1);
    557  1.44  christos 	KASSERT(l->l_auxprio == -1 || l->l_auxprio == l->l_protectprio);
    558  1.44  christos 
    559  1.44  christos 	pri = SCARG(uap, priority);
    560  1.44  christos 	error = 0;
    561  1.44  christos 	lwp_lock(l);
    562  1.44  christos 	if (pri == -1) {
    563  1.44  christos 		/* back out priority changes */
    564  1.44  christos 		switch(l->l_protectdepth) {
    565  1.44  christos 		case 0:
    566  1.44  christos 			error = EINVAL;
    567  1.44  christos 			break;
    568  1.44  christos 		case 1:
    569  1.44  christos 			l->l_protectdepth = 0;
    570  1.44  christos 			l->l_protectprio = -1;
    571  1.44  christos 			l->l_auxprio = -1;
    572  1.44  christos 			break;
    573  1.44  christos 		default:
    574  1.44  christos 			l->l_protectdepth--;
    575  1.45   msaitoh 			break;
    576  1.44  christos 		}
    577  1.44  christos 	} else if (pri < 0) {
    578  1.44  christos 		/* Just retrieve the current value, for debugging */
    579  1.46  christos 		if (l->l_protectprio == -1)
    580  1.44  christos 			error = ENOENT;
    581  1.44  christos 		else
    582  1.44  christos 			*retval = l->l_protectprio - PRI_USER_RT;
    583  1.44  christos 	} else if (__predict_false(pri < SCHED_PRI_MIN ||
    584  1.44  christos 	    pri > SCHED_PRI_MAX || l->l_priority > pri + PRI_USER_RT)) {
    585  1.44  christos 		/* must fail if existing priority is higher */
    586  1.44  christos 		error = EPERM;
    587  1.44  christos 	} else {
    588  1.44  christos 		/* play along but make no changes if not a realtime LWP. */
    589  1.44  christos 		l->l_protectdepth++;
    590  1.44  christos 		pri += PRI_USER_RT;
    591  1.44  christos 		if (__predict_true(l->l_class != SCHED_OTHER &&
    592  1.44  christos 		    pri > l->l_protectprio)) {
    593  1.44  christos 			l->l_protectprio = pri;
    594  1.44  christos 			l->l_auxprio = pri;
    595  1.44  christos 		}
    596  1.44  christos 	}
    597  1.44  christos 	lwp_unlock(l);
    598  1.44  christos 
    599  1.44  christos 	return error;
    600  1.44  christos }
    601  1.44  christos 
    602  1.44  christos /*
    603   1.5     rmind  * Yield.
    604   1.5     rmind  */
    605   1.1        ad int
    606   1.4       dsl sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
    607   1.1        ad {
    608   1.1        ad 
    609   1.1        ad 	yield();
    610   1.1        ad 	return 0;
    611   1.1        ad }
    612   1.5     rmind 
    613   1.5     rmind /*
    614   1.5     rmind  * Sysctl nodes and initialization.
    615   1.5     rmind  */
    616  1.34      elad static void
    617  1.34      elad sysctl_sched_setup(struct sysctllog **clog)
    618   1.5     rmind {
    619   1.5     rmind 	const struct sysctlnode *node = NULL;
    620   1.5     rmind 
    621   1.5     rmind 	sysctl_createv(clog, 0, NULL, NULL,
    622   1.5     rmind 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
    623   1.5     rmind 		CTLTYPE_INT, "posix_sched",
    624   1.5     rmind 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
    625   1.5     rmind 			     "Process Scheduling option to which the "
    626   1.5     rmind 			     "system attempts to conform"),
    627   1.5     rmind 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
    628   1.5     rmind 		CTL_KERN, CTL_CREATE, CTL_EOL);
    629   1.5     rmind 	sysctl_createv(clog, 0, NULL, &node,
    630   1.5     rmind 		CTLFLAG_PERMANENT,
    631   1.5     rmind 		CTLTYPE_NODE, "sched",
    632   1.5     rmind 		SYSCTL_DESCR("Scheduler options"),
    633   1.5     rmind 		NULL, 0, NULL, 0,
    634   1.5     rmind 		CTL_KERN, CTL_CREATE, CTL_EOL);
    635   1.5     rmind 
    636   1.5     rmind 	if (node == NULL)
    637   1.5     rmind 		return;
    638   1.5     rmind 
    639   1.5     rmind 	sysctl_createv(clog, 0, &node, NULL,
    640   1.5     rmind 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    641   1.5     rmind 		CTLTYPE_INT, "pri_min",
    642   1.5     rmind 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
    643   1.5     rmind 		NULL, SCHED_PRI_MIN, NULL, 0,
    644   1.5     rmind 		CTL_CREATE, CTL_EOL);
    645   1.5     rmind 	sysctl_createv(clog, 0, &node, NULL,
    646   1.5     rmind 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
    647   1.5     rmind 		CTLTYPE_INT, "pri_max",
    648  1.19     njoly 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
    649   1.5     rmind 		NULL, SCHED_PRI_MAX, NULL, 0,
    650   1.5     rmind 		CTL_CREATE, CTL_EOL);
    651   1.5     rmind }
    652  1.34      elad 
    653  1.34      elad static int
    654  1.34      elad sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    655  1.34      elad     void *arg0, void *arg1, void *arg2, void *arg3)
    656  1.34      elad {
    657  1.34      elad 	struct proc *p;
    658  1.34      elad 	int result;
    659  1.34      elad 
    660  1.34      elad 	result = KAUTH_RESULT_DEFER;
    661  1.34      elad 	p = arg0;
    662  1.34      elad 
    663  1.34      elad 	switch (action) {
    664  1.34      elad 	case KAUTH_PROCESS_SCHEDULER_GETPARAM:
    665  1.34      elad 		if (kauth_cred_uidmatch(cred, p->p_cred))
    666  1.34      elad 			result = KAUTH_RESULT_ALLOW;
    667  1.34      elad 		break;
    668  1.34      elad 
    669  1.34      elad 	case KAUTH_PROCESS_SCHEDULER_SETPARAM:
    670  1.34      elad 		if (kauth_cred_uidmatch(cred, p->p_cred)) {
    671  1.34      elad 			struct lwp *l;
    672  1.34      elad 			int policy;
    673  1.34      elad 			pri_t priority;
    674  1.34      elad 
    675  1.34      elad 			l = arg1;
    676  1.34      elad 			policy = (int)(unsigned long)arg2;
    677  1.34      elad 			priority = (pri_t)(unsigned long)arg3;
    678  1.34      elad 
    679  1.34      elad 			if ((policy == l->l_class ||
    680  1.34      elad 			    (policy != SCHED_FIFO && policy != SCHED_RR)) &&
    681  1.34      elad 			    priority <= l->l_priority)
    682  1.34      elad 				result = KAUTH_RESULT_ALLOW;
    683  1.34      elad 		}
    684  1.34      elad 
    685  1.34      elad 		break;
    686  1.34      elad 
    687  1.34      elad 	case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
    688  1.34      elad 		result = KAUTH_RESULT_ALLOW;
    689  1.34      elad 		break;
    690  1.34      elad 
    691  1.34      elad 	case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
    692  1.34      elad 		/* Privileged; we let the secmodel handle this. */
    693  1.34      elad 		break;
    694  1.34      elad 
    695  1.34      elad 	default:
    696  1.34      elad 		break;
    697  1.34      elad 	}
    698  1.34      elad 
    699  1.34      elad 	return result;
    700  1.34      elad }
    701  1.34      elad 
    702  1.34      elad void
    703  1.34      elad sched_init(void)
    704  1.34      elad {
    705  1.34      elad 
    706  1.34      elad 	sysctl_sched_setup(&sched_sysctl_log);
    707  1.34      elad 
    708  1.34      elad 	sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
    709  1.34      elad 	    sched_listener_cb, NULL);
    710  1.34      elad }
    711