sys_sched.c revision 1.10 1 /* $NetBSD: sys_sched.c,v 1.10 2008/02/09 16:58:01 yamt Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * TODO:
31 * - Handle pthread_setschedprio() as defined by POSIX;
32 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.10 2008/02/09 16:58:01 yamt Exp $");
37
38 #include <sys/param.h>
39
40 #include <sys/cpu.h>
41 #include <sys/kauth.h>
42 #include <sys/kmem.h>
43 #include <sys/lwp.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/pset.h>
47 #include <sys/sched.h>
48 #include <sys/syscallargs.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/types.h>
52 #include <sys/unistd.h>
53
54 /*
55 * Convert user priority or the in-kernel priority or convert the current
56 * priority to the appropriate range according to the policy change.
57 */
58 static pri_t
59 convert_pri(lwp_t *l, int policy, pri_t pri)
60 {
61 int delta = 0;
62
63 if (policy == SCHED_NONE)
64 policy = l->l_class;
65
66 switch (policy) {
67 case SCHED_OTHER:
68 delta = PRI_USER;
69 break;
70 case SCHED_FIFO:
71 case SCHED_RR:
72 delta = PRI_USER_RT;
73 break;
74 default:
75 panic("upri_to_kpri");
76 }
77
78 if (pri != PRI_NONE) {
79 /* Convert user priority to the in-kernel */
80 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
81 return pri + delta;
82 }
83 if (l->l_class == policy)
84 return l->l_priority;
85
86 /* Change the current priority to the appropriate range */
87 if (l->l_class == SCHED_OTHER) {
88 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
89 return l->l_priority + delta;
90 }
91 if (policy == SCHED_OTHER) {
92 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
93 return l->l_priority - delta;
94 }
95 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
96 return l->l_class;
97 }
98
99 /*
100 * Set scheduling parameters.
101 */
102 int
103 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
104 register_t *retval)
105 {
106 /* {
107 syscallarg(pid_t) pid;
108 syscallarg(lwpid_t) lid;
109 syscallarg(int) policy;
110 syscallarg(const struct sched_param *) params;
111 } */
112 struct sched_param param;
113 struct proc *p;
114 struct lwp *t;
115 lwpid_t lid;
116 u_int lcnt;
117 int policy;
118 pri_t pri;
119 int error;
120
121 /* Get the parameters from the user-space */
122 error = copyin(SCARG(uap, params), ¶m, sizeof(param));
123 if (error) {
124 return error;
125 }
126 pri = param.sched_priority;
127 policy = SCARG(uap, policy);
128
129 /* If no parameters specified, just return (this should not happen) */
130 if (pri == PRI_NONE && policy == SCHED_NONE)
131 return 0;
132
133 /* Validate scheduling class */
134 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
135 return EINVAL;
136
137 /* Validate priority */
138 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
139 return EINVAL;
140
141 if (SCARG(uap, pid) != 0) {
142 /* Find the process */
143 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
144 if (p == NULL)
145 return ESRCH;
146 mutex_enter(&p->p_smutex);
147 mutex_exit(&proclist_lock);
148 /* Disallow modification of system processes */
149 if (p->p_flag & PK_SYSTEM) {
150 mutex_exit(&p->p_smutex);
151 return EPERM;
152 }
153 } else {
154 /* Use the calling process */
155 p = l->l_proc;
156 mutex_enter(&p->p_smutex);
157 }
158
159 /* Check the permission */
160 if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SCHEDULER, p,
161 KAUTH_ARG(KAUTH_REQ_PROCESS_SCHEDULER_SETPARAM), NULL, NULL)) {
162 mutex_exit(&p->p_smutex);
163 return EPERM;
164 }
165
166 /* Find the LWP(s) */
167 lcnt = 0;
168 lid = SCARG(uap, lid);
169 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
170 pri_t kpri;
171
172 if (lid && lid != t->l_lid)
173 continue;
174 KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
175 lwp_lock(t);
176
177 /*
178 * Note that, priority may need to be changed to get into
179 * the correct priority range of the new scheduling class.
180 */
181 kpri = convert_pri(t, policy, pri);
182
183 /* Set the scheduling class */
184 if (policy != SCHED_NONE)
185 t->l_class = policy;
186
187 /* Change the priority */
188 if (t->l_priority != kpri)
189 lwp_changepri(t, kpri);
190
191 lwp_unlock(t);
192 lcnt++;
193 }
194 mutex_exit(&p->p_smutex);
195 return (lcnt == 0) ? ESRCH : error;
196 }
197
198 /*
199 * Get scheduling parameters.
200 */
201 int
202 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
203 register_t *retval)
204 {
205 /* {
206 syscallarg(pid_t) pid;
207 syscallarg(lwpid_t) lid;
208 syscallarg(int *) policy;
209 syscallarg(struct sched_param *) params;
210 } */
211 struct sched_param param;
212 struct lwp *t;
213 lwpid_t lid;
214 int error, policy;
215
216 /* If not specified, use the first LWP */
217 lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
218
219 if (SCARG(uap, pid) != 0) {
220 /* Locks the LWP */
221 t = lwp_find2(SCARG(uap, pid), lid);
222 } else {
223 struct proc *p = l->l_proc;
224 /* Use the calling process */
225 mutex_enter(&p->p_smutex);
226 t = lwp_find(p, lid);
227 if (t != NULL)
228 lwp_lock(t);
229 mutex_exit(&p->p_smutex);
230 }
231 if (t == NULL) {
232 error = ESRCH;
233 goto error;
234 }
235
236 /* Check the permission */
237 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SCHEDULER,
238 t->l_proc, KAUTH_ARG(KAUTH_REQ_PROCESS_SCHEDULER_GETPARAM),
239 NULL, NULL);
240 if (error != 0) {
241 lwp_unlock(t);
242 goto error;
243 }
244
245 param.sched_priority = t->l_priority;
246 policy = t->l_class;
247 lwp_unlock(t);
248
249 switch (policy) {
250 case SCHED_OTHER:
251 param.sched_priority -= PRI_USER;
252 break;
253 case SCHED_RR:
254 case SCHED_FIFO:
255 param.sched_priority -= PRI_USER_RT;
256 break;
257 }
258 error = copyout(¶m, SCARG(uap, params), sizeof(param));
259 if (error == 0 && SCARG(uap, policy) != NULL)
260 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
261 error:
262 return error;
263 }
264
265 /*
266 * Set affinity.
267 */
268 int
269 sys__sched_setaffinity(struct lwp *l,
270 const struct sys__sched_setaffinity_args *uap, register_t *retval)
271 {
272 /* {
273 syscallarg(pid_t) pid;
274 syscallarg(lwpid_t) lid;
275 syscallarg(size_t) size;
276 syscallarg(void *) cpuset;
277 } */
278 cpuset_t *cpuset;
279 struct cpu_info *ci = NULL;
280 struct proc *p;
281 struct lwp *t;
282 CPU_INFO_ITERATOR cii;
283 lwpid_t lid;
284 u_int lcnt;
285 int error;
286
287 /* Allocate the CPU set, and get it from userspace */
288 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
289 error = copyin(SCARG(uap, cpuset), cpuset,
290 min(SCARG(uap, size), sizeof(cpuset_t)));
291 if (error)
292 goto error;
293
294 /* Look for a CPU in the set */
295 for (CPU_INFO_FOREACH(cii, ci))
296 if (CPU_ISSET(cpu_index(ci), cpuset))
297 break;
298 if (ci == NULL) {
299 /* Empty set */
300 kmem_free(cpuset, sizeof(cpuset_t));
301 cpuset = NULL;
302 }
303
304 if (SCARG(uap, pid) != 0) {
305 /* Find the process */
306 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
307 if (p == NULL) {
308 error = ESRCH;
309 goto error;
310 }
311 mutex_enter(&p->p_smutex);
312 mutex_exit(&proclist_lock);
313 } else {
314 /* Use the calling process */
315 p = l->l_proc;
316 mutex_enter(&p->p_smutex);
317 }
318
319 /*
320 * Check the permission.
321 * Disallow modification of system processes.
322 */
323 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SCHEDULER, p,
324 KAUTH_ARG(KAUTH_REQ_PROCESS_SCHEDULER_SETAFFINITY), NULL, NULL);
325 if (error != 0) {
326 mutex_exit(&p->p_smutex);
327 goto error;
328 }
329 if ((p->p_flag & PK_SYSTEM) != 0) {
330 mutex_exit(&p->p_smutex);
331 error = EPERM;
332 goto error;
333 }
334
335 /* Find the LWP(s) */
336 lcnt = 0;
337 lid = SCARG(uap, lid);
338 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
339 if (lid && lid != t->l_lid)
340 continue;
341 lwp_lock(t);
342 if (cpuset) {
343 /* Set the affinity flag and new CPU set */
344 t->l_flag |= LW_AFFINITY;
345 memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
346 /* Migrate to another CPU, unlocks LWP */
347 lwp_migrate(t, ci);
348 } else {
349 /* Unset the affinity flag */
350 t->l_flag &= ~LW_AFFINITY;
351 lwp_unlock(t);
352 }
353 lcnt++;
354 }
355 mutex_exit(&p->p_smutex);
356 if (lcnt == 0)
357 error = ESRCH;
358 error:
359 if (cpuset != NULL)
360 kmem_free(cpuset, sizeof(cpuset_t));
361 return error;
362 }
363
364 /*
365 * Get affinity.
366 */
367 int
368 sys__sched_getaffinity(struct lwp *l,
369 const struct sys__sched_getaffinity_args *uap, register_t *retval)
370 {
371 /* {
372 syscallarg(pid_t) pid;
373 syscallarg(lwpid_t) lid;
374 syscallarg(size_t) size;
375 syscallarg(void *) cpuset;
376 } */
377 struct lwp *t;
378 void *cpuset;
379 lwpid_t lid;
380 int error;
381
382 if (SCARG(uap, size) <= 0)
383 return EINVAL;
384 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
385
386 /* If not specified, use the first LWP */
387 lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
388
389 if (SCARG(uap, pid) != 0) {
390 /* Locks the LWP */
391 t = lwp_find2(SCARG(uap, pid), lid);
392 } else {
393 struct proc *p = l->l_proc;
394 /* Use the calling process */
395 mutex_enter(&p->p_smutex);
396 t = lwp_find(p, lid);
397 if (t != NULL)
398 lwp_lock(t);
399 mutex_exit(&p->p_smutex);
400 }
401 if (t == NULL) {
402 kmem_free(cpuset, sizeof(cpuset_t));
403 return ESRCH;
404 }
405 /* Check the permission */
406 if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SCHEDULER,
407 t->l_proc, KAUTH_ARG(KAUTH_REQ_PROCESS_SCHEDULER_GETAFFINITY),
408 NULL, NULL)) {
409 lwp_unlock(t);
410 kmem_free(cpuset, sizeof(cpuset_t));
411 return EPERM;
412 }
413 if (t->l_flag & LW_AFFINITY)
414 memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
415 lwp_unlock(t);
416
417 error = copyout(cpuset, SCARG(uap, cpuset),
418 min(SCARG(uap, size), sizeof(cpuset_t)));
419
420 kmem_free(cpuset, sizeof(cpuset_t));
421 return error;
422 }
423
424 /*
425 * Yield.
426 */
427 int
428 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
429 {
430
431 yield();
432 return 0;
433 }
434
435 /*
436 * Sysctl nodes and initialization.
437 */
438 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
439 {
440 const struct sysctlnode *node = NULL;
441
442 sysctl_createv(clog, 0, NULL, NULL,
443 CTLFLAG_PERMANENT,
444 CTLTYPE_NODE, "kern", NULL,
445 NULL, 0, NULL, 0,
446 CTL_KERN, CTL_EOL);
447 sysctl_createv(clog, 0, NULL, NULL,
448 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
449 CTLTYPE_INT, "posix_sched",
450 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
451 "Process Scheduling option to which the "
452 "system attempts to conform"),
453 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
454 CTL_KERN, CTL_CREATE, CTL_EOL);
455 sysctl_createv(clog, 0, NULL, &node,
456 CTLFLAG_PERMANENT,
457 CTLTYPE_NODE, "sched",
458 SYSCTL_DESCR("Scheduler options"),
459 NULL, 0, NULL, 0,
460 CTL_KERN, CTL_CREATE, CTL_EOL);
461
462 if (node == NULL)
463 return;
464
465 sysctl_createv(clog, 0, &node, NULL,
466 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
467 CTLTYPE_INT, "pri_min",
468 SYSCTL_DESCR("Minimal POSIX real-time priority"),
469 NULL, SCHED_PRI_MIN, NULL, 0,
470 CTL_CREATE, CTL_EOL);
471 sysctl_createv(clog, 0, &node, NULL,
472 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
473 CTLTYPE_INT, "pri_max",
474 SYSCTL_DESCR("Minimal POSIX real-time priority"),
475 NULL, SCHED_PRI_MAX, NULL, 0,
476 CTL_CREATE, CTL_EOL);
477 }
478