sys_sched.c revision 1.17 1 1.17 ad /* $NetBSD: sys_sched.c,v 1.17 2008/02/22 23:10:12 ad Exp $ */
2 1.1 ad
3 1.5 rmind /*
4 1.5 rmind * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 1.1 ad * All rights reserved.
6 1.5 rmind *
7 1.1 ad * Redistribution and use in source and binary forms, with or without
8 1.1 ad * modification, are permitted provided that the following conditions
9 1.1 ad * are met:
10 1.1 ad * 1. Redistributions of source code must retain the above copyright
11 1.1 ad * notice, this list of conditions and the following disclaimer.
12 1.1 ad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 ad * notice, this list of conditions and the following disclaimer in the
14 1.1 ad * documentation and/or other materials provided with the distribution.
15 1.1 ad *
16 1.16 rmind * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 1.16 rmind * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 1.16 rmind * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 1.16 rmind * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 1.16 rmind * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 1.16 rmind * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 1.16 rmind * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 1.16 rmind * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 1.16 rmind * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.16 rmind * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.16 rmind * SUCH DAMAGE.
27 1.1 ad */
28 1.1 ad
29 1.5 rmind /*
30 1.17 ad * System calls relating to the scheduler.
31 1.17 ad *
32 1.5 rmind * TODO:
33 1.5 rmind * - Handle pthread_setschedprio() as defined by POSIX;
34 1.5 rmind * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35 1.5 rmind */
36 1.5 rmind
37 1.1 ad #include <sys/cdefs.h>
38 1.17 ad __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.17 2008/02/22 23:10:12 ad Exp $");
39 1.1 ad
40 1.1 ad #include <sys/param.h>
41 1.5 rmind
42 1.5 rmind #include <sys/cpu.h>
43 1.5 rmind #include <sys/kauth.h>
44 1.5 rmind #include <sys/kmem.h>
45 1.5 rmind #include <sys/lwp.h>
46 1.5 rmind #include <sys/mutex.h>
47 1.1 ad #include <sys/proc.h>
48 1.5 rmind #include <sys/pset.h>
49 1.5 rmind #include <sys/sched.h>
50 1.1 ad #include <sys/syscallargs.h>
51 1.5 rmind #include <sys/sysctl.h>
52 1.5 rmind #include <sys/systm.h>
53 1.5 rmind #include <sys/types.h>
54 1.5 rmind #include <sys/unistd.h>
55 1.5 rmind
56 1.5 rmind /*
57 1.7 rmind * Convert user priority or the in-kernel priority or convert the current
58 1.7 rmind * priority to the appropriate range according to the policy change.
59 1.7 rmind */
60 1.7 rmind static pri_t
61 1.7 rmind convert_pri(lwp_t *l, int policy, pri_t pri)
62 1.7 rmind {
63 1.7 rmind int delta = 0;
64 1.7 rmind
65 1.7 rmind switch (policy) {
66 1.7 rmind case SCHED_OTHER:
67 1.7 rmind delta = PRI_USER;
68 1.7 rmind break;
69 1.7 rmind case SCHED_FIFO:
70 1.7 rmind case SCHED_RR:
71 1.7 rmind delta = PRI_USER_RT;
72 1.7 rmind break;
73 1.7 rmind default:
74 1.7 rmind panic("upri_to_kpri");
75 1.7 rmind }
76 1.7 rmind
77 1.7 rmind if (pri != PRI_NONE) {
78 1.7 rmind /* Convert user priority to the in-kernel */
79 1.7 rmind KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
80 1.7 rmind return pri + delta;
81 1.7 rmind }
82 1.7 rmind if (l->l_class == policy)
83 1.7 rmind return l->l_priority;
84 1.7 rmind
85 1.7 rmind /* Change the current priority to the appropriate range */
86 1.7 rmind if (l->l_class == SCHED_OTHER) {
87 1.7 rmind KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
88 1.7 rmind return l->l_priority + delta;
89 1.7 rmind }
90 1.7 rmind if (policy == SCHED_OTHER) {
91 1.7 rmind KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
92 1.7 rmind return l->l_priority - delta;
93 1.7 rmind }
94 1.7 rmind KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
95 1.7 rmind return l->l_class;
96 1.7 rmind }
97 1.7 rmind
98 1.7 rmind /*
99 1.5 rmind * Set scheduling parameters.
100 1.5 rmind */
101 1.5 rmind int
102 1.5 rmind sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
103 1.5 rmind register_t *retval)
104 1.5 rmind {
105 1.5 rmind /* {
106 1.5 rmind syscallarg(pid_t) pid;
107 1.5 rmind syscallarg(lwpid_t) lid;
108 1.10 yamt syscallarg(int) policy;
109 1.5 rmind syscallarg(const struct sched_param *) params;
110 1.5 rmind } */
111 1.10 yamt struct sched_param param;
112 1.5 rmind struct proc *p;
113 1.5 rmind struct lwp *t;
114 1.5 rmind lwpid_t lid;
115 1.5 rmind u_int lcnt;
116 1.7 rmind int policy;
117 1.5 rmind pri_t pri;
118 1.5 rmind int error;
119 1.5 rmind
120 1.5 rmind /* Get the parameters from the user-space */
121 1.10 yamt error = copyin(SCARG(uap, params), ¶m, sizeof(param));
122 1.7 rmind if (error) {
123 1.7 rmind return error;
124 1.7 rmind }
125 1.10 yamt pri = param.sched_priority;
126 1.10 yamt policy = SCARG(uap, policy);
127 1.7 rmind
128 1.7 rmind /* If no parameters specified, just return (this should not happen) */
129 1.7 rmind if (pri == PRI_NONE && policy == SCHED_NONE)
130 1.7 rmind return 0;
131 1.5 rmind
132 1.7 rmind /* Validate scheduling class */
133 1.7 rmind if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
134 1.7 rmind return EINVAL;
135 1.5 rmind
136 1.7 rmind /* Validate priority */
137 1.7 rmind if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
138 1.7 rmind return EINVAL;
139 1.5 rmind
140 1.7 rmind if (SCARG(uap, pid) != 0) {
141 1.7 rmind /* Find the process */
142 1.7 rmind p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
143 1.7 rmind if (p == NULL)
144 1.7 rmind return ESRCH;
145 1.7 rmind mutex_enter(&p->p_smutex);
146 1.7 rmind mutex_exit(&proclist_lock);
147 1.7 rmind /* Disallow modification of system processes */
148 1.17 ad if ((p->p_flag & PK_SYSTEM) != 0) {
149 1.7 rmind mutex_exit(&p->p_smutex);
150 1.7 rmind return EPERM;
151 1.7 rmind }
152 1.7 rmind } else {
153 1.7 rmind /* Use the calling process */
154 1.7 rmind p = l->l_proc;
155 1.7 rmind mutex_enter(&p->p_smutex);
156 1.5 rmind }
157 1.1 ad
158 1.5 rmind /* Find the LWP(s) */
159 1.5 rmind lcnt = 0;
160 1.5 rmind lid = SCARG(uap, lid);
161 1.5 rmind LIST_FOREACH(t, &p->p_lwps, l_sibling) {
162 1.7 rmind pri_t kpri;
163 1.12 elad int lpolicy;
164 1.5 rmind
165 1.5 rmind if (lid && lid != t->l_lid)
166 1.5 rmind continue;
167 1.15 drochner lcnt++;
168 1.7 rmind KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
169 1.7 rmind lwp_lock(t);
170 1.7 rmind
171 1.12 elad if (policy == SCHED_NONE)
172 1.13 yamt lpolicy = t->l_class;
173 1.12 elad else
174 1.12 elad lpolicy = policy;
175 1.12 elad
176 1.7 rmind /*
177 1.7 rmind * Note that, priority may need to be changed to get into
178 1.7 rmind * the correct priority range of the new scheduling class.
179 1.7 rmind */
180 1.12 elad kpri = convert_pri(t, lpolicy, pri);
181 1.12 elad
182 1.12 elad /* Check the permission */
183 1.12 elad error = kauth_authorize_process(l->l_cred,
184 1.12 elad KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
185 1.12 elad KAUTH_ARG(kpri));
186 1.14 yamt if (error) {
187 1.14 yamt lwp_unlock(t);
188 1.12 elad break;
189 1.14 yamt }
190 1.5 rmind
191 1.5 rmind /* Set the scheduling class */
192 1.7 rmind if (policy != SCHED_NONE)
193 1.7 rmind t->l_class = policy;
194 1.5 rmind
195 1.5 rmind /* Change the priority */
196 1.7 rmind if (t->l_priority != kpri)
197 1.7 rmind lwp_changepri(t, kpri);
198 1.5 rmind
199 1.5 rmind lwp_unlock(t);
200 1.5 rmind }
201 1.5 rmind mutex_exit(&p->p_smutex);
202 1.7 rmind return (lcnt == 0) ? ESRCH : error;
203 1.5 rmind }
204 1.5 rmind
205 1.5 rmind /*
206 1.5 rmind * Get scheduling parameters.
207 1.5 rmind */
208 1.5 rmind int
209 1.5 rmind sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
210 1.5 rmind register_t *retval)
211 1.5 rmind {
212 1.5 rmind /* {
213 1.5 rmind syscallarg(pid_t) pid;
214 1.5 rmind syscallarg(lwpid_t) lid;
215 1.10 yamt syscallarg(int *) policy;
216 1.5 rmind syscallarg(struct sched_param *) params;
217 1.5 rmind } */
218 1.10 yamt struct sched_param param;
219 1.5 rmind struct lwp *t;
220 1.10 yamt int error, policy;
221 1.5 rmind
222 1.16 rmind /* Locks the LWP */
223 1.16 rmind t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
224 1.16 rmind if (t == NULL)
225 1.16 rmind return ESRCH;
226 1.10 yamt
227 1.10 yamt /* Check the permission */
228 1.11 elad error = kauth_authorize_process(l->l_cred,
229 1.11 elad KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
230 1.10 yamt if (error != 0) {
231 1.10 yamt lwp_unlock(t);
232 1.16 rmind return error;
233 1.5 rmind }
234 1.10 yamt
235 1.10 yamt param.sched_priority = t->l_priority;
236 1.10 yamt policy = t->l_class;
237 1.5 rmind lwp_unlock(t);
238 1.5 rmind
239 1.10 yamt switch (policy) {
240 1.5 rmind case SCHED_OTHER:
241 1.10 yamt param.sched_priority -= PRI_USER;
242 1.5 rmind break;
243 1.5 rmind case SCHED_RR:
244 1.5 rmind case SCHED_FIFO:
245 1.10 yamt param.sched_priority -= PRI_USER_RT;
246 1.5 rmind break;
247 1.5 rmind }
248 1.10 yamt error = copyout(¶m, SCARG(uap, params), sizeof(param));
249 1.10 yamt if (error == 0 && SCARG(uap, policy) != NULL)
250 1.10 yamt error = copyout(&policy, SCARG(uap, policy), sizeof(int));
251 1.5 rmind return error;
252 1.5 rmind }
253 1.5 rmind
254 1.5 rmind /*
255 1.5 rmind * Set affinity.
256 1.5 rmind */
257 1.5 rmind int
258 1.5 rmind sys__sched_setaffinity(struct lwp *l,
259 1.5 rmind const struct sys__sched_setaffinity_args *uap, register_t *retval)
260 1.5 rmind {
261 1.5 rmind /* {
262 1.5 rmind syscallarg(pid_t) pid;
263 1.5 rmind syscallarg(lwpid_t) lid;
264 1.5 rmind syscallarg(size_t) size;
265 1.5 rmind syscallarg(void *) cpuset;
266 1.5 rmind } */
267 1.5 rmind cpuset_t *cpuset;
268 1.5 rmind struct cpu_info *ci = NULL;
269 1.5 rmind struct proc *p;
270 1.5 rmind struct lwp *t;
271 1.5 rmind CPU_INFO_ITERATOR cii;
272 1.5 rmind lwpid_t lid;
273 1.5 rmind u_int lcnt;
274 1.5 rmind int error;
275 1.5 rmind
276 1.5 rmind /* Allocate the CPU set, and get it from userspace */
277 1.5 rmind cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
278 1.5 rmind error = copyin(SCARG(uap, cpuset), cpuset,
279 1.5 rmind min(SCARG(uap, size), sizeof(cpuset_t)));
280 1.5 rmind if (error)
281 1.5 rmind goto error;
282 1.5 rmind
283 1.5 rmind /* Look for a CPU in the set */
284 1.5 rmind for (CPU_INFO_FOREACH(cii, ci))
285 1.5 rmind if (CPU_ISSET(cpu_index(ci), cpuset))
286 1.5 rmind break;
287 1.5 rmind if (ci == NULL) {
288 1.5 rmind /* Empty set */
289 1.5 rmind kmem_free(cpuset, sizeof(cpuset_t));
290 1.5 rmind cpuset = NULL;
291 1.5 rmind }
292 1.5 rmind
293 1.7 rmind if (SCARG(uap, pid) != 0) {
294 1.7 rmind /* Find the process */
295 1.7 rmind p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
296 1.7 rmind if (p == NULL) {
297 1.7 rmind error = ESRCH;
298 1.7 rmind goto error;
299 1.7 rmind }
300 1.7 rmind mutex_enter(&p->p_smutex);
301 1.7 rmind mutex_exit(&proclist_lock);
302 1.17 ad /* Disallow modification of system processes. */
303 1.17 ad if ((p->p_flag & PK_SYSTEM) != 0) {
304 1.17 ad mutex_exit(&p->p_smutex);
305 1.17 ad error = EPERM;
306 1.17 ad goto error;
307 1.17 ad }
308 1.7 rmind } else {
309 1.7 rmind /* Use the calling process */
310 1.7 rmind p = l->l_proc;
311 1.7 rmind mutex_enter(&p->p_smutex);
312 1.5 rmind }
313 1.5 rmind
314 1.10 yamt /*
315 1.10 yamt * Check the permission.
316 1.10 yamt */
317 1.11 elad error = kauth_authorize_process(l->l_cred,
318 1.11 elad KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
319 1.10 yamt if (error != 0) {
320 1.10 yamt mutex_exit(&p->p_smutex);
321 1.10 yamt goto error;
322 1.10 yamt }
323 1.5 rmind
324 1.5 rmind /* Find the LWP(s) */
325 1.5 rmind lcnt = 0;
326 1.5 rmind lid = SCARG(uap, lid);
327 1.5 rmind LIST_FOREACH(t, &p->p_lwps, l_sibling) {
328 1.5 rmind if (lid && lid != t->l_lid)
329 1.5 rmind continue;
330 1.5 rmind lwp_lock(t);
331 1.5 rmind if (cpuset) {
332 1.5 rmind /* Set the affinity flag and new CPU set */
333 1.5 rmind t->l_flag |= LW_AFFINITY;
334 1.5 rmind memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
335 1.5 rmind /* Migrate to another CPU, unlocks LWP */
336 1.5 rmind lwp_migrate(t, ci);
337 1.5 rmind } else {
338 1.5 rmind /* Unset the affinity flag */
339 1.5 rmind t->l_flag &= ~LW_AFFINITY;
340 1.5 rmind lwp_unlock(t);
341 1.5 rmind }
342 1.5 rmind lcnt++;
343 1.5 rmind }
344 1.5 rmind mutex_exit(&p->p_smutex);
345 1.5 rmind if (lcnt == 0)
346 1.5 rmind error = ESRCH;
347 1.5 rmind error:
348 1.5 rmind if (cpuset != NULL)
349 1.5 rmind kmem_free(cpuset, sizeof(cpuset_t));
350 1.5 rmind return error;
351 1.5 rmind }
352 1.5 rmind
353 1.5 rmind /*
354 1.5 rmind * Get affinity.
355 1.5 rmind */
356 1.5 rmind int
357 1.5 rmind sys__sched_getaffinity(struct lwp *l,
358 1.5 rmind const struct sys__sched_getaffinity_args *uap, register_t *retval)
359 1.5 rmind {
360 1.5 rmind /* {
361 1.5 rmind syscallarg(pid_t) pid;
362 1.5 rmind syscallarg(lwpid_t) lid;
363 1.5 rmind syscallarg(size_t) size;
364 1.5 rmind syscallarg(void *) cpuset;
365 1.5 rmind } */
366 1.5 rmind struct lwp *t;
367 1.5 rmind void *cpuset;
368 1.5 rmind int error;
369 1.5 rmind
370 1.5 rmind if (SCARG(uap, size) <= 0)
371 1.5 rmind return EINVAL;
372 1.5 rmind cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
373 1.5 rmind
374 1.16 rmind /* Locks the LWP */
375 1.16 rmind t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
376 1.5 rmind if (t == NULL) {
377 1.5 rmind kmem_free(cpuset, sizeof(cpuset_t));
378 1.5 rmind return ESRCH;
379 1.5 rmind }
380 1.10 yamt /* Check the permission */
381 1.11 elad if (kauth_authorize_process(l->l_cred,
382 1.11 elad KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
383 1.10 yamt lwp_unlock(t);
384 1.10 yamt kmem_free(cpuset, sizeof(cpuset_t));
385 1.10 yamt return EPERM;
386 1.10 yamt }
387 1.5 rmind if (t->l_flag & LW_AFFINITY)
388 1.5 rmind memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
389 1.5 rmind lwp_unlock(t);
390 1.5 rmind
391 1.5 rmind error = copyout(cpuset, SCARG(uap, cpuset),
392 1.5 rmind min(SCARG(uap, size), sizeof(cpuset_t)));
393 1.5 rmind
394 1.5 rmind kmem_free(cpuset, sizeof(cpuset_t));
395 1.5 rmind return error;
396 1.5 rmind }
397 1.5 rmind
398 1.5 rmind /*
399 1.5 rmind * Yield.
400 1.5 rmind */
401 1.1 ad int
402 1.4 dsl sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
403 1.1 ad {
404 1.1 ad
405 1.1 ad yield();
406 1.1 ad return 0;
407 1.1 ad }
408 1.5 rmind
409 1.5 rmind /*
410 1.5 rmind * Sysctl nodes and initialization.
411 1.5 rmind */
412 1.5 rmind SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
413 1.5 rmind {
414 1.5 rmind const struct sysctlnode *node = NULL;
415 1.5 rmind
416 1.5 rmind sysctl_createv(clog, 0, NULL, NULL,
417 1.5 rmind CTLFLAG_PERMANENT,
418 1.5 rmind CTLTYPE_NODE, "kern", NULL,
419 1.5 rmind NULL, 0, NULL, 0,
420 1.5 rmind CTL_KERN, CTL_EOL);
421 1.5 rmind sysctl_createv(clog, 0, NULL, NULL,
422 1.5 rmind CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
423 1.5 rmind CTLTYPE_INT, "posix_sched",
424 1.5 rmind SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
425 1.5 rmind "Process Scheduling option to which the "
426 1.5 rmind "system attempts to conform"),
427 1.5 rmind NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
428 1.5 rmind CTL_KERN, CTL_CREATE, CTL_EOL);
429 1.5 rmind sysctl_createv(clog, 0, NULL, &node,
430 1.5 rmind CTLFLAG_PERMANENT,
431 1.5 rmind CTLTYPE_NODE, "sched",
432 1.5 rmind SYSCTL_DESCR("Scheduler options"),
433 1.5 rmind NULL, 0, NULL, 0,
434 1.5 rmind CTL_KERN, CTL_CREATE, CTL_EOL);
435 1.5 rmind
436 1.5 rmind if (node == NULL)
437 1.5 rmind return;
438 1.5 rmind
439 1.5 rmind sysctl_createv(clog, 0, &node, NULL,
440 1.5 rmind CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
441 1.5 rmind CTLTYPE_INT, "pri_min",
442 1.5 rmind SYSCTL_DESCR("Minimal POSIX real-time priority"),
443 1.5 rmind NULL, SCHED_PRI_MIN, NULL, 0,
444 1.5 rmind CTL_CREATE, CTL_EOL);
445 1.5 rmind sysctl_createv(clog, 0, &node, NULL,
446 1.5 rmind CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
447 1.5 rmind CTLTYPE_INT, "pri_max",
448 1.5 rmind SYSCTL_DESCR("Minimal POSIX real-time priority"),
449 1.5 rmind NULL, SCHED_PRI_MAX, NULL, 0,
450 1.5 rmind CTL_CREATE, CTL_EOL);
451 1.5 rmind }
452