sys_sched.c revision 1.24 1 /* $NetBSD: sys_sched.c,v 1.24 2008/06/15 23:29:09 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * System calls relating to the scheduler.
31 *
32 * TODO:
33 * - Handle pthread_setschedprio() as defined by POSIX;
34 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.24 2008/06/15 23:29:09 rmind Exp $");
39
40 #include <sys/param.h>
41
42 #include <sys/cpu.h>
43 #include <sys/kauth.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/pset.h>
49 #include <sys/sched.h>
50 #include <sys/syscallargs.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/unistd.h>
55
56 /*
57 * Convert user priority or the in-kernel priority or convert the current
58 * priority to the appropriate range according to the policy change.
59 */
60 static pri_t
61 convert_pri(lwp_t *l, int policy, pri_t pri)
62 {
63 int delta = 0;
64
65 switch (policy) {
66 case SCHED_OTHER:
67 delta = PRI_USER;
68 break;
69 case SCHED_FIFO:
70 case SCHED_RR:
71 delta = PRI_USER_RT;
72 break;
73 default:
74 panic("upri_to_kpri");
75 }
76
77 if (pri != PRI_NONE) {
78 /* Convert user priority to the in-kernel */
79 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
80 return pri + delta;
81 }
82 if (l->l_class == policy)
83 return l->l_priority;
84
85 /* Change the current priority to the appropriate range */
86 if (l->l_class == SCHED_OTHER) {
87 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
88 return delta;
89 }
90 if (policy == SCHED_OTHER) {
91 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
92 return l->l_priority - delta;
93 }
94 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
95 return l->l_class;
96 }
97
98 int
99 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
100 const struct sched_param *params)
101 {
102 struct proc *p;
103 struct lwp *t;
104 pri_t pri;
105 u_int lcnt;
106 int error;
107
108 error = 0;
109
110 pri = params->sched_priority;
111
112 /* If no parameters specified, just return (this should not happen) */
113 if (pri == PRI_NONE && policy == SCHED_NONE)
114 return 0;
115
116 /* Validate scheduling class */
117 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
118 return EINVAL;
119
120 /* Validate priority */
121 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
122 return EINVAL;
123
124 if (pid != 0) {
125 /* Find the process */
126 mutex_enter(proc_lock);
127 p = p_find(pid, PFIND_LOCKED);
128 if (p == NULL) {
129 mutex_exit(proc_lock);
130 return ESRCH;
131 }
132 mutex_enter(p->p_lock);
133 mutex_exit(proc_lock);
134 /* Disallow modification of system processes */
135 if ((p->p_flag & PK_SYSTEM) != 0) {
136 mutex_exit(p->p_lock);
137 return EPERM;
138 }
139 } else {
140 /* Use the calling process */
141 p = curlwp->l_proc;
142 mutex_enter(p->p_lock);
143 }
144
145 /* Find the LWP(s) */
146 lcnt = 0;
147 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
148 pri_t kpri;
149 int lpolicy;
150
151 if (lid && lid != t->l_lid)
152 continue;
153 lcnt++;
154 KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
155 lwp_lock(t);
156
157 if (policy == SCHED_NONE)
158 lpolicy = t->l_class;
159 else
160 lpolicy = policy;
161
162 /*
163 * Note that, priority may need to be changed to get into
164 * the correct priority range of the new scheduling class.
165 */
166 kpri = convert_pri(t, lpolicy, pri);
167
168 /* Check the permission */
169 error = kauth_authorize_process(kauth_cred_get(),
170 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
171 KAUTH_ARG(kpri));
172 if (error) {
173 lwp_unlock(t);
174 break;
175 }
176
177 /* Set the scheduling class */
178 if (policy != SCHED_NONE)
179 t->l_class = policy;
180
181 /* Change the priority */
182 if (t->l_priority != kpri)
183 lwp_changepri(t, kpri);
184
185 lwp_unlock(t);
186 }
187 mutex_exit(p->p_lock);
188 return (lcnt == 0) ? ESRCH : error;
189 }
190
191 /*
192 * Set scheduling parameters.
193 */
194 int
195 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
196 register_t *retval)
197 {
198 /* {
199 syscallarg(pid_t) pid;
200 syscallarg(lwpid_t) lid;
201 syscallarg(int) policy;
202 syscallarg(const struct sched_param *) params;
203 } */
204 struct sched_param params;
205 int error;
206
207 /* Get the parameters from the user-space */
208 error = copyin(SCARG(uap, params), ¶ms, sizeof(params));
209 if (error)
210 goto out;
211
212 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
213 SCARG(uap, policy), ¶ms);
214
215 out:
216 return (error);
217 }
218
219 int
220 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
221 struct sched_param *params)
222 {
223 struct sched_param lparams;
224 struct lwp *t;
225 int error, lpolicy;
226
227 /* Locks the LWP */
228 t = lwp_find2(pid, lid);
229 if (t == NULL)
230 return ESRCH;
231
232 /* Check the permission */
233 error = kauth_authorize_process(kauth_cred_get(),
234 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
235 if (error != 0) {
236 mutex_exit(t->l_proc->p_lock);
237 return error;
238 }
239
240 lwp_lock(t);
241 lparams.sched_priority = t->l_priority;
242 lpolicy = t->l_class;
243
244 switch (lpolicy) {
245 case SCHED_OTHER:
246 lparams.sched_priority -= PRI_USER;
247 break;
248 case SCHED_RR:
249 case SCHED_FIFO:
250 lparams.sched_priority -= PRI_USER_RT;
251 break;
252 }
253
254 if (policy != NULL)
255 *policy = lpolicy;
256
257 if (params != NULL)
258 *params = lparams;
259
260 lwp_unlock(t);
261 mutex_exit(t->l_proc->p_lock);
262 return error;
263 }
264
265 /*
266 * Get scheduling parameters.
267 */
268 int
269 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
270 register_t *retval)
271 {
272 /* {
273 syscallarg(pid_t) pid;
274 syscallarg(lwpid_t) lid;
275 syscallarg(int *) policy;
276 syscallarg(struct sched_param *) params;
277 } */
278 struct sched_param params;
279 int error, policy;
280
281 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
282 ¶ms);
283 if (error)
284 goto out;
285
286 error = copyout(¶ms, SCARG(uap, params), sizeof(params));
287 if (error == 0 && SCARG(uap, policy) != NULL)
288 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
289
290 out:
291 return (error);
292 }
293
294 /* Allocate the CPU set, and get it from userspace */
295 static int
296 gencpuset(cpuset_t **dset, const cpuset_t *sset, size_t size)
297 {
298 int error;
299
300 *dset = _cpuset_create();
301 if (size != _cpuset_size(*dset)) {
302 error = EINVAL;
303 goto out;
304 }
305
306 error = copyin(sset, *dset, size);
307 if (error)
308 goto out;
309
310 if (_cpuset_nused(*dset) != 1) {
311 error = EINVAL;
312 goto out;
313 }
314
315 return 0;
316 out:
317 _cpuset_unuse(*dset, NULL);
318 return error;
319 }
320
321 /*
322 * Set affinity.
323 */
324 int
325 sys__sched_setaffinity(struct lwp *l,
326 const struct sys__sched_setaffinity_args *uap, register_t *retval)
327 {
328 /* {
329 syscallarg(pid_t) pid;
330 syscallarg(lwpid_t) lid;
331 syscallarg(size_t) size;
332 syscallarg(const cpuset_t *) cpuset;
333 } */
334 cpuset_t *cpuset, *cpulst = NULL;
335 struct cpu_info *ci = NULL;
336 struct proc *p;
337 struct lwp *t;
338 CPU_INFO_ITERATOR cii;
339 lwpid_t lid;
340 u_int lcnt;
341 int error;
342
343 if ((error = gencpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
344 return error;
345
346 /* Look for a CPU in the set */
347 for (CPU_INFO_FOREACH(cii, ci)) {
348 error = CPU_ISSET(cpu_index(ci), cpuset);
349 if (error) {
350 if (error == -1) {
351 error = E2BIG;
352 goto out;
353 }
354 break;
355 }
356 }
357
358 if (ci == NULL) {
359 /* Empty set */
360 _cpuset_unuse(cpuset, NULL);
361 cpuset = NULL;
362 }
363
364 if (SCARG(uap, pid) != 0) {
365 /* Find the process */
366 mutex_enter(proc_lock);
367 p = p_find(SCARG(uap, pid), PFIND_LOCKED);
368 if (p == NULL) {
369 mutex_exit(proc_lock);
370 error = ESRCH;
371 goto out;
372 }
373 mutex_enter(p->p_lock);
374 mutex_exit(proc_lock);
375 /* Disallow modification of system processes. */
376 if ((p->p_flag & PK_SYSTEM) != 0) {
377 mutex_exit(p->p_lock);
378 error = EPERM;
379 goto out;
380 }
381 } else {
382 /* Use the calling process */
383 p = l->l_proc;
384 mutex_enter(p->p_lock);
385 }
386
387 /*
388 * Check the permission.
389 */
390 error = kauth_authorize_process(l->l_cred,
391 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
392 if (error != 0) {
393 mutex_exit(p->p_lock);
394 goto out;
395 }
396
397 /* Find the LWP(s) */
398 lcnt = 0;
399 lid = SCARG(uap, lid);
400 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
401 if (lid && lid != t->l_lid)
402 continue;
403 lwp_lock(t);
404 if (cpuset) {
405 /* Set the affinity flag and new CPU set */
406 t->l_flag |= LW_AFFINITY;
407 _cpuset_use(cpuset);
408 if (t->l_affinity != NULL)
409 _cpuset_unuse(t->l_affinity, &cpulst);
410 t->l_affinity = cpuset;
411 /* Migrate to another CPU, unlocks LWP */
412 lwp_migrate(t, ci);
413 } else {
414 /* Unset the affinity flag */
415 t->l_flag &= ~LW_AFFINITY;
416 if (t->l_affinity != NULL)
417 _cpuset_unuse(t->l_affinity, &cpulst);
418 t->l_affinity = NULL;
419 lwp_unlock(t);
420 }
421 lcnt++;
422 }
423 mutex_exit(p->p_lock);
424 if (lcnt == 0)
425 error = ESRCH;
426 out:
427 if (cpuset != NULL)
428 _cpuset_unuse(cpuset, &cpulst);
429 _cpuset_destroy(cpulst);
430 return error;
431 }
432
433 /*
434 * Get affinity.
435 */
436 int
437 sys__sched_getaffinity(struct lwp *l,
438 const struct sys__sched_getaffinity_args *uap, register_t *retval)
439 {
440 /* {
441 syscallarg(pid_t) pid;
442 syscallarg(lwpid_t) lid;
443 syscallarg(size_t) size;
444 syscallarg(cpuset_t *) cpuset;
445 } */
446 struct lwp *t;
447 cpuset_t *cpuset;
448 int error;
449
450 if ((error = gencpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
451 return error;
452
453 /* Locks the LWP */
454 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
455 if (t == NULL) {
456 error = ESRCH;
457 goto out;
458 }
459 /* Check the permission */
460 if (kauth_authorize_process(l->l_cred,
461 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
462 mutex_exit(t->l_proc->p_lock);
463 error = EPERM;
464 goto out;
465 }
466 lwp_lock(t);
467 if (t->l_flag & LW_AFFINITY) {
468 KASSERT(t->l_affinity != NULL);
469 _cpuset_copy(cpuset, t->l_affinity);
470 } else
471 _cpuset_zero(cpuset);
472 lwp_unlock(t);
473 mutex_exit(t->l_proc->p_lock);
474
475 error = copyout(cpuset, SCARG(uap, cpuset), _cpuset_size(cpuset));
476 out:
477 _cpuset_unuse(cpuset, NULL);
478 return error;
479 }
480
481 /*
482 * Yield.
483 */
484 int
485 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
486 {
487
488 yield();
489 return 0;
490 }
491
492 /*
493 * Sysctl nodes and initialization.
494 */
495 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
496 {
497 const struct sysctlnode *node = NULL;
498
499 sysctl_createv(clog, 0, NULL, NULL,
500 CTLFLAG_PERMANENT,
501 CTLTYPE_NODE, "kern", NULL,
502 NULL, 0, NULL, 0,
503 CTL_KERN, CTL_EOL);
504 sysctl_createv(clog, 0, NULL, NULL,
505 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
506 CTLTYPE_INT, "posix_sched",
507 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
508 "Process Scheduling option to which the "
509 "system attempts to conform"),
510 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
511 CTL_KERN, CTL_CREATE, CTL_EOL);
512 sysctl_createv(clog, 0, NULL, &node,
513 CTLFLAG_PERMANENT,
514 CTLTYPE_NODE, "sched",
515 SYSCTL_DESCR("Scheduler options"),
516 NULL, 0, NULL, 0,
517 CTL_KERN, CTL_CREATE, CTL_EOL);
518
519 if (node == NULL)
520 return;
521
522 sysctl_createv(clog, 0, &node, NULL,
523 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
524 CTLTYPE_INT, "pri_min",
525 SYSCTL_DESCR("Minimal POSIX real-time priority"),
526 NULL, SCHED_PRI_MIN, NULL, 0,
527 CTL_CREATE, CTL_EOL);
528 sysctl_createv(clog, 0, &node, NULL,
529 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
530 CTLTYPE_INT, "pri_max",
531 SYSCTL_DESCR("Maximal POSIX real-time priority"),
532 NULL, SCHED_PRI_MAX, NULL, 0,
533 CTL_CREATE, CTL_EOL);
534 }
535