sys_sched.c revision 1.30 1 /* $NetBSD: sys_sched.c,v 1.30 2008/10/18 19:24:04 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * System calls relating to the scheduler.
31 *
32 * TODO:
33 * - Handle pthread_setschedprio() as defined by POSIX;
34 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.30 2008/10/18 19:24:04 rmind Exp $");
39
40 #include <sys/param.h>
41
42 #include <sys/cpu.h>
43 #include <sys/kauth.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/pset.h>
49 #include <sys/sa.h>
50 #include <sys/savar.h>
51 #include <sys/sched.h>
52 #include <sys/syscallargs.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #include <sys/types.h>
56 #include <sys/unistd.h>
57
58 #include "opt_sa.h"
59
60 /*
61 * Convert user priority or the in-kernel priority or convert the current
62 * priority to the appropriate range according to the policy change.
63 */
64 static pri_t
65 convert_pri(lwp_t *l, int policy, pri_t pri)
66 {
67
68 /* Convert user priority to the in-kernel */
69 if (pri != PRI_NONE) {
70 /* Only for real-time threads */
71 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
72 KASSERT(policy != SCHED_OTHER);
73 return PRI_USER_RT + pri;
74 }
75
76 /* Neither policy, nor priority change */
77 if (l->l_class == policy)
78 return l->l_priority;
79
80 /* Time-sharing -> real-time */
81 if (l->l_class == SCHED_OTHER) {
82 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
83 return PRI_USER_RT;
84 }
85
86 /* Real-time -> time-sharing */
87 if (policy == SCHED_OTHER) {
88 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
89 return l->l_priority - PRI_USER_RT;
90 }
91
92 /* Real-time -> real-time */
93 return l->l_priority;
94 }
95
96 int
97 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
98 const struct sched_param *params)
99 {
100 struct proc *p;
101 struct lwp *t;
102 pri_t pri;
103 u_int lcnt;
104 int error;
105
106 error = 0;
107
108 pri = params->sched_priority;
109
110 /* If no parameters specified, just return (this should not happen) */
111 if (pri == PRI_NONE && policy == SCHED_NONE)
112 return 0;
113
114 /* Validate scheduling class */
115 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
116 return EINVAL;
117
118 /* Validate priority */
119 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
120 return EINVAL;
121
122 if (pid != 0) {
123 /* Find the process */
124 mutex_enter(proc_lock);
125 p = p_find(pid, PFIND_LOCKED);
126 if (p == NULL) {
127 mutex_exit(proc_lock);
128 return ESRCH;
129 }
130 mutex_enter(p->p_lock);
131 mutex_exit(proc_lock);
132 /* Disallow modification of system processes */
133 if ((p->p_flag & PK_SYSTEM) != 0) {
134 mutex_exit(p->p_lock);
135 return EPERM;
136 }
137 } else {
138 /* Use the calling process */
139 p = curlwp->l_proc;
140 mutex_enter(p->p_lock);
141 }
142
143 /* Find the LWP(s) */
144 lcnt = 0;
145 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
146 pri_t kpri;
147 int lpolicy;
148
149 if (lid && lid != t->l_lid)
150 continue;
151
152 lcnt++;
153 lwp_lock(t);
154 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
155
156 /* Disallow setting of priority for SCHED_OTHER threads */
157 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
158 lwp_unlock(t);
159 error = EINVAL;
160 break;
161 }
162
163 /* Convert priority, if needed */
164 kpri = convert_pri(t, lpolicy, pri);
165
166 /* Check the permission */
167 error = kauth_authorize_process(kauth_cred_get(),
168 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
169 KAUTH_ARG(kpri));
170 if (error) {
171 lwp_unlock(t);
172 break;
173 }
174
175 /* Set the scheduling class, change the priority */
176 t->l_class = lpolicy;
177 lwp_changepri(t, kpri);
178 lwp_unlock(t);
179 }
180 mutex_exit(p->p_lock);
181 return (lcnt == 0) ? ESRCH : error;
182 }
183
184 /*
185 * Set scheduling parameters.
186 */
187 int
188 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
189 register_t *retval)
190 {
191 /* {
192 syscallarg(pid_t) pid;
193 syscallarg(lwpid_t) lid;
194 syscallarg(int) policy;
195 syscallarg(const struct sched_param *) params;
196 } */
197 struct sched_param params;
198 int error;
199
200 /* Get the parameters from the user-space */
201 error = copyin(SCARG(uap, params), ¶ms, sizeof(params));
202 if (error)
203 goto out;
204
205 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
206 SCARG(uap, policy), ¶ms);
207
208 out:
209 return (error);
210 }
211
212 int
213 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
214 struct sched_param *params)
215 {
216 struct sched_param lparams;
217 struct lwp *t;
218 int error, lpolicy;
219
220 /* Locks the LWP */
221 t = lwp_find2(pid, lid);
222 if (t == NULL)
223 return ESRCH;
224
225 /* Check the permission */
226 error = kauth_authorize_process(kauth_cred_get(),
227 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
228 if (error != 0) {
229 mutex_exit(t->l_proc->p_lock);
230 return error;
231 }
232
233 lwp_lock(t);
234 lparams.sched_priority = t->l_priority;
235 lpolicy = t->l_class;
236
237 switch (lpolicy) {
238 case SCHED_OTHER:
239 lparams.sched_priority -= PRI_USER;
240 break;
241 case SCHED_RR:
242 case SCHED_FIFO:
243 lparams.sched_priority -= PRI_USER_RT;
244 break;
245 }
246
247 if (policy != NULL)
248 *policy = lpolicy;
249
250 if (params != NULL)
251 *params = lparams;
252
253 lwp_unlock(t);
254 mutex_exit(t->l_proc->p_lock);
255 return error;
256 }
257
258 /*
259 * Get scheduling parameters.
260 */
261 int
262 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
263 register_t *retval)
264 {
265 /* {
266 syscallarg(pid_t) pid;
267 syscallarg(lwpid_t) lid;
268 syscallarg(int *) policy;
269 syscallarg(struct sched_param *) params;
270 } */
271 struct sched_param params;
272 int error, policy;
273
274 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
275 ¶ms);
276 if (error)
277 goto out;
278
279 error = copyout(¶ms, SCARG(uap, params), sizeof(params));
280 if (error == 0 && SCARG(uap, policy) != NULL)
281 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
282
283 out:
284 return (error);
285 }
286
287 /* Allocate the CPU set, and get it from userspace */
288 static int
289 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
290 {
291 int error;
292
293 *dset = kcpuset_create();
294 error = kcpuset_copyin(sset, *dset, size);
295 if (error != 0)
296 kcpuset_unuse(*dset, NULL);
297 return error;
298 }
299
300 /*
301 * Set affinity.
302 */
303 int
304 sys__sched_setaffinity(struct lwp *l,
305 const struct sys__sched_setaffinity_args *uap, register_t *retval)
306 {
307 /* {
308 syscallarg(pid_t) pid;
309 syscallarg(lwpid_t) lid;
310 syscallarg(size_t) size;
311 syscallarg(const cpuset_t *) cpuset;
312 } */
313 kcpuset_t *cpuset, *cpulst = NULL;
314 struct cpu_info *ci = NULL;
315 struct proc *p;
316 struct lwp *t;
317 CPU_INFO_ITERATOR cii;
318 lwpid_t lid;
319 u_int lcnt;
320 int error;
321
322 if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
323 return error;
324
325 /* Look for a CPU in the set */
326 for (CPU_INFO_FOREACH(cii, ci)) {
327 error = kcpuset_isset(cpu_index(ci), cpuset);
328 if (error) {
329 if (error == -1) {
330 error = E2BIG;
331 goto out;
332 }
333 break;
334 }
335 }
336 if (ci == NULL) {
337 /* Empty set */
338 kcpuset_unuse(cpuset, NULL);
339 cpuset = NULL;
340 }
341
342 if (SCARG(uap, pid) != 0) {
343 /* Find the process */
344 mutex_enter(proc_lock);
345 p = p_find(SCARG(uap, pid), PFIND_LOCKED);
346 if (p == NULL) {
347 mutex_exit(proc_lock);
348 error = ESRCH;
349 goto out;
350 }
351 mutex_enter(p->p_lock);
352 mutex_exit(proc_lock);
353 /* Disallow modification of system processes. */
354 if ((p->p_flag & PK_SYSTEM) != 0) {
355 mutex_exit(p->p_lock);
356 error = EPERM;
357 goto out;
358 }
359 } else {
360 /* Use the calling process */
361 p = l->l_proc;
362 mutex_enter(p->p_lock);
363 }
364
365 /*
366 * Check the permission.
367 */
368 error = kauth_authorize_process(l->l_cred,
369 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
370 if (error != 0) {
371 mutex_exit(p->p_lock);
372 goto out;
373 }
374
375 #ifdef KERN_SA
376 /*
377 * Don't permit changing the affinity of an SA process. The only
378 * thing that would make sense wold be to set the affinity of
379 * a VP and all threads running on it. But we don't support that
380 * now, so just don't permit it.
381 *
382 * Test is here so that caller gets auth errors before SA
383 * errors.
384 */
385 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
386 mutex_exit(p->p_lock);
387 error = EINVAL;
388 goto out;
389 }
390 #endif
391
392 /* Find the LWP(s) */
393 lcnt = 0;
394 lid = SCARG(uap, lid);
395 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
396 if (lid && lid != t->l_lid)
397 continue;
398 lwp_lock(t);
399 /* It is not allowed to set the affinity for zombie LWPs */
400 if (t->l_stat == LSZOMB) {
401 lwp_unlock(t);
402 continue;
403 }
404 if (cpuset) {
405 /* Set the affinity flag and new CPU set */
406 t->l_flag |= LW_AFFINITY;
407 kcpuset_use(cpuset);
408 if (t->l_affinity != NULL)
409 kcpuset_unuse(t->l_affinity, &cpulst);
410 t->l_affinity = cpuset;
411 /* Migrate to another CPU, unlocks LWP */
412 lwp_migrate(t, ci);
413 } else {
414 /* Unset the affinity flag */
415 t->l_flag &= ~LW_AFFINITY;
416 if (t->l_affinity != NULL)
417 kcpuset_unuse(t->l_affinity, &cpulst);
418 t->l_affinity = NULL;
419 lwp_unlock(t);
420 }
421 lcnt++;
422 }
423 mutex_exit(p->p_lock);
424 if (lcnt == 0)
425 error = ESRCH;
426 out:
427 if (cpuset != NULL)
428 kcpuset_unuse(cpuset, &cpulst);
429 kcpuset_destroy(cpulst);
430 return error;
431 }
432
433 /*
434 * Get affinity.
435 */
436 int
437 sys__sched_getaffinity(struct lwp *l,
438 const struct sys__sched_getaffinity_args *uap, register_t *retval)
439 {
440 /* {
441 syscallarg(pid_t) pid;
442 syscallarg(lwpid_t) lid;
443 syscallarg(size_t) size;
444 syscallarg(cpuset_t *) cpuset;
445 } */
446 struct lwp *t;
447 kcpuset_t *cpuset;
448 int error;
449
450 if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
451 return error;
452
453 /* Locks the LWP */
454 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
455 if (t == NULL) {
456 error = ESRCH;
457 goto out;
458 }
459 /* Check the permission */
460 if (kauth_authorize_process(l->l_cred,
461 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
462 mutex_exit(t->l_proc->p_lock);
463 error = EPERM;
464 goto out;
465 }
466 lwp_lock(t);
467 if (t->l_flag & LW_AFFINITY) {
468 KASSERT(t->l_affinity != NULL);
469 kcpuset_copy(cpuset, t->l_affinity);
470 } else
471 kcpuset_zero(cpuset);
472 lwp_unlock(t);
473 mutex_exit(t->l_proc->p_lock);
474
475 error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size));
476 out:
477 kcpuset_unuse(cpuset, NULL);
478 return error;
479 }
480
481 /*
482 * Yield.
483 */
484 int
485 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
486 {
487
488 yield();
489 #ifdef KERN_SA
490 if (l->l_flag & LW_SA) {
491 sa_preempt(l);
492 }
493 #endif
494 return 0;
495 }
496
497 /*
498 * Sysctl nodes and initialization.
499 */
500 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
501 {
502 const struct sysctlnode *node = NULL;
503
504 sysctl_createv(clog, 0, NULL, NULL,
505 CTLFLAG_PERMANENT,
506 CTLTYPE_NODE, "kern", NULL,
507 NULL, 0, NULL, 0,
508 CTL_KERN, CTL_EOL);
509 sysctl_createv(clog, 0, NULL, NULL,
510 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
511 CTLTYPE_INT, "posix_sched",
512 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
513 "Process Scheduling option to which the "
514 "system attempts to conform"),
515 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
516 CTL_KERN, CTL_CREATE, CTL_EOL);
517 sysctl_createv(clog, 0, NULL, &node,
518 CTLFLAG_PERMANENT,
519 CTLTYPE_NODE, "sched",
520 SYSCTL_DESCR("Scheduler options"),
521 NULL, 0, NULL, 0,
522 CTL_KERN, CTL_CREATE, CTL_EOL);
523
524 if (node == NULL)
525 return;
526
527 sysctl_createv(clog, 0, &node, NULL,
528 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
529 CTLTYPE_INT, "pri_min",
530 SYSCTL_DESCR("Minimal POSIX real-time priority"),
531 NULL, SCHED_PRI_MIN, NULL, 0,
532 CTL_CREATE, CTL_EOL);
533 sysctl_createv(clog, 0, &node, NULL,
534 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
535 CTLTYPE_INT, "pri_max",
536 SYSCTL_DESCR("Maximal POSIX real-time priority"),
537 NULL, SCHED_PRI_MAX, NULL, 0,
538 CTL_CREATE, CTL_EOL);
539 }
540