sys_sched.c revision 1.28 1 /* $NetBSD: sys_sched.c,v 1.28 2008/10/15 06:51:20 wrstuden Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * System calls relating to the scheduler.
31 *
32 * TODO:
33 * - Handle pthread_setschedprio() as defined by POSIX;
34 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.28 2008/10/15 06:51:20 wrstuden Exp $");
39
40 #include <sys/param.h>
41
42 #include <sys/cpu.h>
43 #include <sys/kauth.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/pset.h>
49 #include <sys/sa.h>
50 #include <sys/savar.h>
51 #include <sys/sched.h>
52 #include <sys/syscallargs.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #include <sys/types.h>
56 #include <sys/unistd.h>
57
58 #include "opt_sa.h"
59
60 /*
61 * Convert user priority or the in-kernel priority or convert the current
62 * priority to the appropriate range according to the policy change.
63 */
64 static pri_t
65 convert_pri(lwp_t *l, int policy, pri_t pri)
66 {
67 int delta = 0;
68
69 switch (policy) {
70 case SCHED_OTHER:
71 delta = PRI_USER;
72 break;
73 case SCHED_FIFO:
74 case SCHED_RR:
75 delta = PRI_USER_RT;
76 break;
77 default:
78 panic("upri_to_kpri");
79 }
80
81 if (pri != PRI_NONE) {
82 /* Convert user priority to the in-kernel */
83 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
84 return pri + delta;
85 }
86 if (l->l_class == policy)
87 return l->l_priority;
88
89 /* Change the current priority to the appropriate range */
90 if (l->l_class == SCHED_OTHER) {
91 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
92 return delta;
93 }
94 if (policy == SCHED_OTHER) {
95 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
96 return l->l_priority - delta;
97 }
98 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
99 return l->l_class;
100 }
101
102 int
103 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
104 const struct sched_param *params)
105 {
106 struct proc *p;
107 struct lwp *t;
108 pri_t pri;
109 u_int lcnt;
110 int error;
111
112 error = 0;
113
114 pri = params->sched_priority;
115
116 /* If no parameters specified, just return (this should not happen) */
117 if (pri == PRI_NONE && policy == SCHED_NONE)
118 return 0;
119
120 /* Validate scheduling class */
121 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
122 return EINVAL;
123
124 /* Validate priority */
125 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
126 return EINVAL;
127
128 if (pid != 0) {
129 /* Find the process */
130 mutex_enter(proc_lock);
131 p = p_find(pid, PFIND_LOCKED);
132 if (p == NULL) {
133 mutex_exit(proc_lock);
134 return ESRCH;
135 }
136 mutex_enter(p->p_lock);
137 mutex_exit(proc_lock);
138 /* Disallow modification of system processes */
139 if ((p->p_flag & PK_SYSTEM) != 0) {
140 mutex_exit(p->p_lock);
141 return EPERM;
142 }
143 } else {
144 /* Use the calling process */
145 p = curlwp->l_proc;
146 mutex_enter(p->p_lock);
147 }
148
149 /* Find the LWP(s) */
150 lcnt = 0;
151 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
152 pri_t kpri;
153 int lpolicy;
154
155 if (lid && lid != t->l_lid)
156 continue;
157 lcnt++;
158 KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
159 lwp_lock(t);
160
161 if (policy == SCHED_NONE)
162 lpolicy = t->l_class;
163 else
164 lpolicy = policy;
165
166 /*
167 * Note that, priority may need to be changed to get into
168 * the correct priority range of the new scheduling class.
169 */
170 kpri = convert_pri(t, lpolicy, pri);
171
172 /* Check the permission */
173 error = kauth_authorize_process(kauth_cred_get(),
174 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
175 KAUTH_ARG(kpri));
176 if (error) {
177 lwp_unlock(t);
178 break;
179 }
180
181 /* Set the scheduling class */
182 if (policy != SCHED_NONE)
183 t->l_class = policy;
184
185 /* Change the priority */
186 if (t->l_priority != kpri)
187 lwp_changepri(t, kpri);
188
189 lwp_unlock(t);
190 }
191 mutex_exit(p->p_lock);
192 return (lcnt == 0) ? ESRCH : error;
193 }
194
195 /*
196 * Set scheduling parameters.
197 */
198 int
199 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
200 register_t *retval)
201 {
202 /* {
203 syscallarg(pid_t) pid;
204 syscallarg(lwpid_t) lid;
205 syscallarg(int) policy;
206 syscallarg(const struct sched_param *) params;
207 } */
208 struct sched_param params;
209 int error;
210
211 /* Get the parameters from the user-space */
212 error = copyin(SCARG(uap, params), ¶ms, sizeof(params));
213 if (error)
214 goto out;
215
216 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
217 SCARG(uap, policy), ¶ms);
218
219 out:
220 return (error);
221 }
222
223 int
224 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
225 struct sched_param *params)
226 {
227 struct sched_param lparams;
228 struct lwp *t;
229 int error, lpolicy;
230
231 /* Locks the LWP */
232 t = lwp_find2(pid, lid);
233 if (t == NULL)
234 return ESRCH;
235
236 /* Check the permission */
237 error = kauth_authorize_process(kauth_cred_get(),
238 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
239 if (error != 0) {
240 mutex_exit(t->l_proc->p_lock);
241 return error;
242 }
243
244 lwp_lock(t);
245 lparams.sched_priority = t->l_priority;
246 lpolicy = t->l_class;
247
248 switch (lpolicy) {
249 case SCHED_OTHER:
250 lparams.sched_priority -= PRI_USER;
251 break;
252 case SCHED_RR:
253 case SCHED_FIFO:
254 lparams.sched_priority -= PRI_USER_RT;
255 break;
256 }
257
258 if (policy != NULL)
259 *policy = lpolicy;
260
261 if (params != NULL)
262 *params = lparams;
263
264 lwp_unlock(t);
265 mutex_exit(t->l_proc->p_lock);
266 return error;
267 }
268
269 /*
270 * Get scheduling parameters.
271 */
272 int
273 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
274 register_t *retval)
275 {
276 /* {
277 syscallarg(pid_t) pid;
278 syscallarg(lwpid_t) lid;
279 syscallarg(int *) policy;
280 syscallarg(struct sched_param *) params;
281 } */
282 struct sched_param params;
283 int error, policy;
284
285 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
286 ¶ms);
287 if (error)
288 goto out;
289
290 error = copyout(¶ms, SCARG(uap, params), sizeof(params));
291 if (error == 0 && SCARG(uap, policy) != NULL)
292 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
293
294 out:
295 return (error);
296 }
297
298 /* Allocate the CPU set, and get it from userspace */
299 static int
300 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
301 {
302 int error;
303
304 *dset = kcpuset_create();
305 error = kcpuset_copyin(sset, *dset, size);
306 if (error != 0)
307 kcpuset_unuse(*dset, NULL);
308 return error;
309 }
310
311 /*
312 * Set affinity.
313 */
314 int
315 sys__sched_setaffinity(struct lwp *l,
316 const struct sys__sched_setaffinity_args *uap, register_t *retval)
317 {
318 /* {
319 syscallarg(pid_t) pid;
320 syscallarg(lwpid_t) lid;
321 syscallarg(size_t) size;
322 syscallarg(const cpuset_t *) cpuset;
323 } */
324 kcpuset_t *cpuset, *cpulst = NULL;
325 struct cpu_info *ci = NULL;
326 struct proc *p;
327 struct lwp *t;
328 CPU_INFO_ITERATOR cii;
329 lwpid_t lid;
330 u_int lcnt;
331 int error;
332
333 if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
334 return error;
335
336 /* Look for a CPU in the set */
337 for (CPU_INFO_FOREACH(cii, ci)) {
338 error = kcpuset_isset(cpu_index(ci), cpuset);
339 if (error) {
340 if (error == -1) {
341 error = E2BIG;
342 goto out;
343 }
344 break;
345 }
346 }
347
348 if (ci == NULL) {
349 /* Empty set */
350 kcpuset_unuse(cpuset, NULL);
351 cpuset = NULL;
352 }
353
354 if (SCARG(uap, pid) != 0) {
355 /* Find the process */
356 mutex_enter(proc_lock);
357 p = p_find(SCARG(uap, pid), PFIND_LOCKED);
358 if (p == NULL) {
359 mutex_exit(proc_lock);
360 error = ESRCH;
361 goto out;
362 }
363 mutex_enter(p->p_lock);
364 mutex_exit(proc_lock);
365 /* Disallow modification of system processes. */
366 if ((p->p_flag & PK_SYSTEM) != 0) {
367 mutex_exit(p->p_lock);
368 error = EPERM;
369 goto out;
370 }
371 } else {
372 /* Use the calling process */
373 p = l->l_proc;
374 mutex_enter(p->p_lock);
375 }
376
377 /*
378 * Check the permission.
379 */
380 error = kauth_authorize_process(l->l_cred,
381 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
382 if (error != 0) {
383 mutex_exit(p->p_lock);
384 goto out;
385 }
386
387 #ifdef KERN_SA
388 /*
389 * Don't permit changing the affinity of an SA process. The only
390 * thing that would make sense wold be to set the affinity of
391 * a VP and all threads running on it. But we don't support that
392 * now, so just don't permit it.
393 *
394 * Test is here so that caller gets auth errors before SA
395 * errors.
396 */
397 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
398 mutex_exit(p->p_lock);
399 error = EINVAL;
400 goto out;
401 }
402 #endif
403
404 /* Find the LWP(s) */
405 lcnt = 0;
406 lid = SCARG(uap, lid);
407 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
408 if (lid && lid != t->l_lid)
409 continue;
410 lwp_lock(t);
411 /* It is not allowed to set the affinity for zombie LWPs */
412 if (t->l_stat == LSZOMB) {
413 lwp_unlock(t);
414 continue;
415 }
416 if (cpuset) {
417 /* Set the affinity flag and new CPU set */
418 t->l_flag |= LW_AFFINITY;
419 kcpuset_use(cpuset);
420 if (t->l_affinity != NULL)
421 kcpuset_unuse(t->l_affinity, &cpulst);
422 t->l_affinity = cpuset;
423 /* Migrate to another CPU, unlocks LWP */
424 lwp_migrate(t, ci);
425 } else {
426 /* Unset the affinity flag */
427 t->l_flag &= ~LW_AFFINITY;
428 if (t->l_affinity != NULL)
429 kcpuset_unuse(t->l_affinity, &cpulst);
430 t->l_affinity = NULL;
431 lwp_unlock(t);
432 }
433 lcnt++;
434 }
435 mutex_exit(p->p_lock);
436 if (lcnt == 0)
437 error = ESRCH;
438 out:
439 if (cpuset != NULL)
440 kcpuset_unuse(cpuset, &cpulst);
441 kcpuset_destroy(cpulst);
442 return error;
443 }
444
445 /*
446 * Get affinity.
447 */
448 int
449 sys__sched_getaffinity(struct lwp *l,
450 const struct sys__sched_getaffinity_args *uap, register_t *retval)
451 {
452 /* {
453 syscallarg(pid_t) pid;
454 syscallarg(lwpid_t) lid;
455 syscallarg(size_t) size;
456 syscallarg(cpuset_t *) cpuset;
457 } */
458 struct lwp *t;
459 kcpuset_t *cpuset;
460 int error;
461
462 if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
463 return error;
464
465 /* Locks the LWP */
466 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
467 if (t == NULL) {
468 error = ESRCH;
469 goto out;
470 }
471 /* Check the permission */
472 if (kauth_authorize_process(l->l_cred,
473 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
474 mutex_exit(t->l_proc->p_lock);
475 error = EPERM;
476 goto out;
477 }
478 lwp_lock(t);
479 if (t->l_flag & LW_AFFINITY) {
480 KASSERT(t->l_affinity != NULL);
481 kcpuset_copy(cpuset, t->l_affinity);
482 } else
483 kcpuset_zero(cpuset);
484 lwp_unlock(t);
485 mutex_exit(t->l_proc->p_lock);
486
487 error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size));
488 out:
489 kcpuset_unuse(cpuset, NULL);
490 return error;
491 }
492
493 /*
494 * Yield.
495 */
496 int
497 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
498 {
499
500 yield();
501 #ifdef KERN_SA
502 if (l->l_flag & LW_SA) {
503 sa_preempt(l);
504 }
505 #endif
506 return 0;
507 }
508
509 /*
510 * Sysctl nodes and initialization.
511 */
512 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
513 {
514 const struct sysctlnode *node = NULL;
515
516 sysctl_createv(clog, 0, NULL, NULL,
517 CTLFLAG_PERMANENT,
518 CTLTYPE_NODE, "kern", NULL,
519 NULL, 0, NULL, 0,
520 CTL_KERN, CTL_EOL);
521 sysctl_createv(clog, 0, NULL, NULL,
522 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
523 CTLTYPE_INT, "posix_sched",
524 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
525 "Process Scheduling option to which the "
526 "system attempts to conform"),
527 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
528 CTL_KERN, CTL_CREATE, CTL_EOL);
529 sysctl_createv(clog, 0, NULL, &node,
530 CTLFLAG_PERMANENT,
531 CTLTYPE_NODE, "sched",
532 SYSCTL_DESCR("Scheduler options"),
533 NULL, 0, NULL, 0,
534 CTL_KERN, CTL_CREATE, CTL_EOL);
535
536 if (node == NULL)
537 return;
538
539 sysctl_createv(clog, 0, &node, NULL,
540 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
541 CTLTYPE_INT, "pri_min",
542 SYSCTL_DESCR("Minimal POSIX real-time priority"),
543 NULL, SCHED_PRI_MIN, NULL, 0,
544 CTL_CREATE, CTL_EOL);
545 sysctl_createv(clog, 0, &node, NULL,
546 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
547 CTLTYPE_INT, "pri_max",
548 SYSCTL_DESCR("Maximal POSIX real-time priority"),
549 NULL, SCHED_PRI_MAX, NULL, 0,
550 CTL_CREATE, CTL_EOL);
551 }
552