sys_sched.c revision 1.37 1 /* $NetBSD: sys_sched.c,v 1.37 2011/08/07 21:13:05 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * System calls relating to the scheduler.
31 *
32 * Lock order:
33 *
34 * cpu_lock ->
35 * proc_lock ->
36 * proc_t::p_lock ->
37 * lwp_t::lwp_lock
38 *
39 * TODO:
40 * - Handle pthread_setschedprio() as defined by POSIX;
41 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.37 2011/08/07 21:13:05 rmind Exp $");
46
47 #include <sys/param.h>
48
49 #include <sys/cpu.h>
50 #include <sys/kauth.h>
51 #include <sys/kmem.h>
52 #include <sys/lwp.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/pset.h>
56 #include <sys/sa.h>
57 #include <sys/savar.h>
58 #include <sys/sched.h>
59 #include <sys/syscallargs.h>
60 #include <sys/sysctl.h>
61 #include <sys/systm.h>
62 #include <sys/types.h>
63 #include <sys/unistd.h>
64
65 #include "opt_sa.h"
66
67 static struct sysctllog *sched_sysctl_log;
68 static kauth_listener_t sched_listener;
69
70 /*
71 * Convert user priority or the in-kernel priority or convert the current
72 * priority to the appropriate range according to the policy change.
73 */
74 static pri_t
75 convert_pri(lwp_t *l, int policy, pri_t pri)
76 {
77
78 /* Convert user priority to the in-kernel */
79 if (pri != PRI_NONE) {
80 /* Only for real-time threads */
81 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
82 KASSERT(policy != SCHED_OTHER);
83 return PRI_USER_RT + pri;
84 }
85
86 /* Neither policy, nor priority change */
87 if (l->l_class == policy)
88 return l->l_priority;
89
90 /* Time-sharing -> real-time */
91 if (l->l_class == SCHED_OTHER) {
92 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
93 return PRI_USER_RT;
94 }
95
96 /* Real-time -> time-sharing */
97 if (policy == SCHED_OTHER) {
98 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
99 return l->l_priority - PRI_USER_RT;
100 }
101
102 /* Real-time -> real-time */
103 return l->l_priority;
104 }
105
106 int
107 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
108 const struct sched_param *params)
109 {
110 struct proc *p;
111 struct lwp *t;
112 pri_t pri;
113 u_int lcnt;
114 int error;
115
116 error = 0;
117
118 pri = params->sched_priority;
119
120 /* If no parameters specified, just return (this should not happen) */
121 if (pri == PRI_NONE && policy == SCHED_NONE)
122 return 0;
123
124 /* Validate scheduling class */
125 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
126 return EINVAL;
127
128 /* Validate priority */
129 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
130 return EINVAL;
131
132 if (pid != 0) {
133 /* Find the process */
134 mutex_enter(proc_lock);
135 p = proc_find(pid);
136 if (p == NULL) {
137 mutex_exit(proc_lock);
138 return ESRCH;
139 }
140 mutex_enter(p->p_lock);
141 mutex_exit(proc_lock);
142 /* Disallow modification of system processes */
143 if ((p->p_flag & PK_SYSTEM) != 0) {
144 mutex_exit(p->p_lock);
145 return EPERM;
146 }
147 } else {
148 /* Use the calling process */
149 p = curlwp->l_proc;
150 mutex_enter(p->p_lock);
151 }
152
153 /* Find the LWP(s) */
154 lcnt = 0;
155 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
156 pri_t kpri;
157 int lpolicy;
158
159 if (lid && lid != t->l_lid)
160 continue;
161
162 lcnt++;
163 lwp_lock(t);
164 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
165
166 /* Disallow setting of priority for SCHED_OTHER threads */
167 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
168 lwp_unlock(t);
169 error = EINVAL;
170 break;
171 }
172
173 /* Convert priority, if needed */
174 kpri = convert_pri(t, lpolicy, pri);
175
176 /* Check the permission */
177 error = kauth_authorize_process(kauth_cred_get(),
178 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
179 KAUTH_ARG(kpri));
180 if (error) {
181 lwp_unlock(t);
182 break;
183 }
184
185 /* Set the scheduling class, change the priority */
186 t->l_class = lpolicy;
187 lwp_changepri(t, kpri);
188 lwp_unlock(t);
189 }
190 mutex_exit(p->p_lock);
191 return (lcnt == 0) ? ESRCH : error;
192 }
193
194 /*
195 * Set scheduling parameters.
196 */
197 int
198 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
199 register_t *retval)
200 {
201 /* {
202 syscallarg(pid_t) pid;
203 syscallarg(lwpid_t) lid;
204 syscallarg(int) policy;
205 syscallarg(const struct sched_param *) params;
206 } */
207 struct sched_param params;
208 int error;
209
210 /* Get the parameters from the user-space */
211 error = copyin(SCARG(uap, params), ¶ms, sizeof(params));
212 if (error)
213 goto out;
214
215 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
216 SCARG(uap, policy), ¶ms);
217 out:
218 return error;
219 }
220
221 int
222 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
223 struct sched_param *params)
224 {
225 struct sched_param lparams;
226 struct lwp *t;
227 int error, lpolicy;
228
229 /* Locks the LWP */
230 t = lwp_find2(pid, lid);
231 if (t == NULL)
232 return ESRCH;
233
234 /* Check the permission */
235 error = kauth_authorize_process(kauth_cred_get(),
236 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
237 if (error != 0) {
238 mutex_exit(t->l_proc->p_lock);
239 return error;
240 }
241
242 lwp_lock(t);
243 lparams.sched_priority = t->l_priority;
244 lpolicy = t->l_class;
245
246 switch (lpolicy) {
247 case SCHED_OTHER:
248 lparams.sched_priority -= PRI_USER;
249 break;
250 case SCHED_RR:
251 case SCHED_FIFO:
252 lparams.sched_priority -= PRI_USER_RT;
253 break;
254 }
255
256 if (policy != NULL)
257 *policy = lpolicy;
258
259 if (params != NULL)
260 *params = lparams;
261
262 lwp_unlock(t);
263 mutex_exit(t->l_proc->p_lock);
264 return error;
265 }
266
267 /*
268 * Get scheduling parameters.
269 */
270 int
271 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
272 register_t *retval)
273 {
274 /* {
275 syscallarg(pid_t) pid;
276 syscallarg(lwpid_t) lid;
277 syscallarg(int *) policy;
278 syscallarg(struct sched_param *) params;
279 } */
280 struct sched_param params;
281 int error, policy;
282
283 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
284 ¶ms);
285 if (error)
286 goto out;
287
288 error = copyout(¶ms, SCARG(uap, params), sizeof(params));
289 if (error == 0 && SCARG(uap, policy) != NULL)
290 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
291 out:
292 return error;
293 }
294
295 /*
296 * Allocate the CPU set, and get it from userspace.
297 */
298 static int
299 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
300 {
301 kcpuset_t *kset;
302 int error;
303
304 kcpuset_create(&kset);
305 error = kcpuset_copyin(sset, kset, size);
306 if (error) {
307 kcpuset_unuse(kset, NULL);
308 } else {
309 *dset = kset;
310 }
311 return error;
312 }
313
314 /*
315 * Set affinity.
316 */
317 int
318 sys__sched_setaffinity(struct lwp *l,
319 const struct sys__sched_setaffinity_args *uap, register_t *retval)
320 {
321 /* {
322 syscallarg(pid_t) pid;
323 syscallarg(lwpid_t) lid;
324 syscallarg(size_t) size;
325 syscallarg(const cpuset_t *) cpuset;
326 } */
327 kcpuset_t *kcset, *kcpulst = NULL;
328 struct cpu_info *ici, *ci;
329 struct proc *p;
330 struct lwp *t;
331 CPU_INFO_ITERATOR cii;
332 bool alloff;
333 lwpid_t lid;
334 u_int lcnt;
335 int error;
336
337 error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
338 if (error)
339 return error;
340
341 /*
342 * Traverse _each_ CPU to:
343 * - Check that CPUs in the mask have no assigned processor set.
344 * - Check that at least one CPU from the mask is online.
345 * - Find the first target CPU to migrate.
346 *
347 * To avoid the race with CPU online/offline calls and processor sets,
348 * cpu_lock will be locked for the entire operation.
349 */
350 ci = NULL;
351 alloff = false;
352 mutex_enter(&cpu_lock);
353 for (CPU_INFO_FOREACH(cii, ici)) {
354 struct schedstate_percpu *ispc;
355
356 if (kcpuset_isset(kcset, cpu_index(ici)) == 0)
357 continue;
358
359 ispc = &ici->ci_schedstate;
360 /* Check that CPU is not in the processor-set */
361 if (ispc->spc_psid != PS_NONE) {
362 error = EPERM;
363 goto out;
364 }
365 /* Skip offline CPUs */
366 if (ispc->spc_flags & SPCF_OFFLINE) {
367 alloff = true;
368 continue;
369 }
370 /* Target CPU to migrate */
371 if (ci == NULL) {
372 ci = ici;
373 }
374 }
375 if (ci == NULL) {
376 if (alloff) {
377 /* All CPUs in the set are offline */
378 error = EPERM;
379 goto out;
380 }
381 /* Empty set */
382 kcpuset_unuse(kcset, &kcpulst);
383 kcset = NULL;
384 }
385
386 if (SCARG(uap, pid) != 0) {
387 /* Find the process */
388 mutex_enter(proc_lock);
389 p = proc_find(SCARG(uap, pid));
390 if (p == NULL) {
391 mutex_exit(proc_lock);
392 error = ESRCH;
393 goto out;
394 }
395 mutex_enter(p->p_lock);
396 mutex_exit(proc_lock);
397 /* Disallow modification of system processes. */
398 if ((p->p_flag & PK_SYSTEM) != 0) {
399 mutex_exit(p->p_lock);
400 error = EPERM;
401 goto out;
402 }
403 } else {
404 /* Use the calling process */
405 p = l->l_proc;
406 mutex_enter(p->p_lock);
407 }
408
409 /*
410 * Check the permission.
411 */
412 error = kauth_authorize_process(l->l_cred,
413 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
414 if (error != 0) {
415 mutex_exit(p->p_lock);
416 goto out;
417 }
418
419 #ifdef KERN_SA
420 /* Changing the affinity of a SA process is not supported */
421 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
422 mutex_exit(p->p_lock);
423 error = EINVAL;
424 goto out;
425 }
426 #endif
427
428 /* Iterate through LWP(s). */
429 lcnt = 0;
430 lid = SCARG(uap, lid);
431 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
432 if (lid && lid != t->l_lid) {
433 continue;
434 }
435 lwp_lock(t);
436 /* No affinity for zombie LWPs. */
437 if (t->l_stat == LSZOMB) {
438 lwp_unlock(t);
439 continue;
440 }
441 /* First, release existing affinity, if any. */
442 if (t->l_affinity) {
443 kcpuset_unuse(t->l_affinity, &kcpulst);
444 }
445 if (kcset) {
446 /*
447 * Hold a reference on affinity mask, assign mask to
448 * LWP and migrate it to another CPU (unlocks LWP).
449 */
450 kcpuset_use(kcset);
451 t->l_affinity = kcset;
452 lwp_migrate(t, ci);
453 } else {
454 /* Old affinity mask is released, just clear. */
455 t->l_affinity = NULL;
456 lwp_unlock(t);
457 }
458 lcnt++;
459 }
460 mutex_exit(p->p_lock);
461 if (lcnt == 0) {
462 error = ESRCH;
463 }
464 out:
465 mutex_exit(&cpu_lock);
466
467 /*
468 * Drop the initial reference (LWPs, if any, have the ownership now),
469 * and destroy whatever is in the G/C list, if filled.
470 */
471 if (kcset) {
472 kcpuset_unuse(kcset, &kcpulst);
473 }
474 if (kcpulst) {
475 kcpuset_destroy(kcpulst);
476 }
477 return error;
478 }
479
480 /*
481 * Get affinity.
482 */
483 int
484 sys__sched_getaffinity(struct lwp *l,
485 const struct sys__sched_getaffinity_args *uap, register_t *retval)
486 {
487 /* {
488 syscallarg(pid_t) pid;
489 syscallarg(lwpid_t) lid;
490 syscallarg(size_t) size;
491 syscallarg(cpuset_t *) cpuset;
492 } */
493 struct lwp *t;
494 kcpuset_t *kcset;
495 int error;
496
497 error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
498 if (error)
499 return error;
500
501 /* Locks the LWP */
502 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
503 if (t == NULL) {
504 error = ESRCH;
505 goto out;
506 }
507 /* Check the permission */
508 if (kauth_authorize_process(l->l_cred,
509 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
510 mutex_exit(t->l_proc->p_lock);
511 error = EPERM;
512 goto out;
513 }
514 lwp_lock(t);
515 if (t->l_affinity) {
516 kcpuset_copy(kcset, t->l_affinity);
517 } else {
518 kcpuset_zero(kcset);
519 }
520 lwp_unlock(t);
521 mutex_exit(t->l_proc->p_lock);
522
523 error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size));
524 out:
525 kcpuset_unuse(kcset, NULL);
526 return error;
527 }
528
529 /*
530 * Yield.
531 */
532 int
533 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
534 {
535
536 yield();
537 #ifdef KERN_SA
538 if (l->l_flag & LW_SA) {
539 sa_preempt(l);
540 }
541 #endif
542 return 0;
543 }
544
545 /*
546 * Sysctl nodes and initialization.
547 */
548 static void
549 sysctl_sched_setup(struct sysctllog **clog)
550 {
551 const struct sysctlnode *node = NULL;
552
553 sysctl_createv(clog, 0, NULL, NULL,
554 CTLFLAG_PERMANENT,
555 CTLTYPE_NODE, "kern", NULL,
556 NULL, 0, NULL, 0,
557 CTL_KERN, CTL_EOL);
558 sysctl_createv(clog, 0, NULL, NULL,
559 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
560 CTLTYPE_INT, "posix_sched",
561 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
562 "Process Scheduling option to which the "
563 "system attempts to conform"),
564 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
565 CTL_KERN, CTL_CREATE, CTL_EOL);
566 sysctl_createv(clog, 0, NULL, &node,
567 CTLFLAG_PERMANENT,
568 CTLTYPE_NODE, "sched",
569 SYSCTL_DESCR("Scheduler options"),
570 NULL, 0, NULL, 0,
571 CTL_KERN, CTL_CREATE, CTL_EOL);
572
573 if (node == NULL)
574 return;
575
576 sysctl_createv(clog, 0, &node, NULL,
577 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
578 CTLTYPE_INT, "pri_min",
579 SYSCTL_DESCR("Minimal POSIX real-time priority"),
580 NULL, SCHED_PRI_MIN, NULL, 0,
581 CTL_CREATE, CTL_EOL);
582 sysctl_createv(clog, 0, &node, NULL,
583 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
584 CTLTYPE_INT, "pri_max",
585 SYSCTL_DESCR("Maximal POSIX real-time priority"),
586 NULL, SCHED_PRI_MAX, NULL, 0,
587 CTL_CREATE, CTL_EOL);
588 }
589
590 static int
591 sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
592 void *arg0, void *arg1, void *arg2, void *arg3)
593 {
594 struct proc *p;
595 int result;
596
597 result = KAUTH_RESULT_DEFER;
598 p = arg0;
599
600 switch (action) {
601 case KAUTH_PROCESS_SCHEDULER_GETPARAM:
602 if (kauth_cred_uidmatch(cred, p->p_cred))
603 result = KAUTH_RESULT_ALLOW;
604 break;
605
606 case KAUTH_PROCESS_SCHEDULER_SETPARAM:
607 if (kauth_cred_uidmatch(cred, p->p_cred)) {
608 struct lwp *l;
609 int policy;
610 pri_t priority;
611
612 l = arg1;
613 policy = (int)(unsigned long)arg2;
614 priority = (pri_t)(unsigned long)arg3;
615
616 if ((policy == l->l_class ||
617 (policy != SCHED_FIFO && policy != SCHED_RR)) &&
618 priority <= l->l_priority)
619 result = KAUTH_RESULT_ALLOW;
620 }
621
622 break;
623
624 case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
625 result = KAUTH_RESULT_ALLOW;
626 break;
627
628 case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
629 /* Privileged; we let the secmodel handle this. */
630 break;
631
632 default:
633 break;
634 }
635
636 return result;
637 }
638
639 void
640 sched_init(void)
641 {
642
643 sysctl_sched_setup(&sched_sysctl_log);
644
645 sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
646 sched_listener_cb, NULL);
647 }
648