sys_sched.c revision 1.18 1 /* $NetBSD: sys_sched.c,v 1.18 2008/02/28 16:09:19 elad Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * System calls relating to the scheduler.
31 *
32 * TODO:
33 * - Handle pthread_setschedprio() as defined by POSIX;
34 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.18 2008/02/28 16:09:19 elad Exp $");
39
40 #include <sys/param.h>
41
42 #include <sys/cpu.h>
43 #include <sys/kauth.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/pset.h>
49 #include <sys/sched.h>
50 #include <sys/syscallargs.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/unistd.h>
55
56 /*
57 * Convert user priority or the in-kernel priority or convert the current
58 * priority to the appropriate range according to the policy change.
59 */
60 static pri_t
61 convert_pri(lwp_t *l, int policy, pri_t pri)
62 {
63 int delta = 0;
64
65 switch (policy) {
66 case SCHED_OTHER:
67 delta = PRI_USER;
68 break;
69 case SCHED_FIFO:
70 case SCHED_RR:
71 delta = PRI_USER_RT;
72 break;
73 default:
74 panic("upri_to_kpri");
75 }
76
77 if (pri != PRI_NONE) {
78 /* Convert user priority to the in-kernel */
79 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
80 return pri + delta;
81 }
82 if (l->l_class == policy)
83 return l->l_priority;
84
85 /* Change the current priority to the appropriate range */
86 if (l->l_class == SCHED_OTHER) {
87 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
88 return l->l_priority + delta;
89 }
90 if (policy == SCHED_OTHER) {
91 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
92 return l->l_priority - delta;
93 }
94 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
95 return l->l_class;
96 }
97
98 int
99 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
100 const struct sched_param *params)
101 {
102 struct proc *p;
103 struct lwp *t;
104 pri_t pri;
105 u_int lcnt;
106 int error;
107
108 error = 0;
109
110 pri = params->sched_priority;
111
112 /* If no parameters specified, just return (this should not happen) */
113 if (pri == PRI_NONE && policy == SCHED_NONE)
114 return 0;
115
116 /* Validate scheduling class */
117 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
118 return EINVAL;
119
120 /* Validate priority */
121 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
122 return EINVAL;
123
124 if (pid != 0) {
125 /* Find the process */
126 p = p_find(pid, PFIND_UNLOCK_FAIL);
127 if (p == NULL)
128 return ESRCH;
129 mutex_enter(&p->p_smutex);
130 mutex_exit(&proclist_lock);
131 /* Disallow modification of system processes */
132 if ((p->p_flag & PK_SYSTEM) != 0) {
133 mutex_exit(&p->p_smutex);
134 return EPERM;
135 }
136 } else {
137 /* Use the calling process */
138 p = curlwp->l_proc;
139 mutex_enter(&p->p_smutex);
140 }
141
142 /* Find the LWP(s) */
143 lcnt = 0;
144 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
145 pri_t kpri;
146 int lpolicy;
147
148 if (lid && lid != t->l_lid)
149 continue;
150 lcnt++;
151 KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
152 lwp_lock(t);
153
154 if (policy == SCHED_NONE)
155 lpolicy = t->l_class;
156 else
157 lpolicy = policy;
158
159 /*
160 * Note that, priority may need to be changed to get into
161 * the correct priority range of the new scheduling class.
162 */
163 kpri = convert_pri(t, lpolicy, pri);
164
165 /* Check the permission */
166 error = kauth_authorize_process(kauth_cred_get(),
167 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
168 KAUTH_ARG(kpri));
169 if (error) {
170 lwp_unlock(t);
171 break;
172 }
173
174 /* Set the scheduling class */
175 if (policy != SCHED_NONE)
176 t->l_class = policy;
177
178 /* Change the priority */
179 if (t->l_priority != kpri)
180 lwp_changepri(t, kpri);
181
182 lwp_unlock(t);
183 }
184 mutex_exit(&p->p_smutex);
185 return (lcnt == 0) ? ESRCH : error;
186 }
187
188 /*
189 * Set scheduling parameters.
190 */
191 int
192 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
193 register_t *retval)
194 {
195 /* {
196 syscallarg(pid_t) pid;
197 syscallarg(lwpid_t) lid;
198 syscallarg(int) policy;
199 syscallarg(const struct sched_param *) params;
200 } */
201 struct sched_param params;
202 int error;
203
204 /* Get the parameters from the user-space */
205 error = copyin(SCARG(uap, params), ¶ms, sizeof(params));
206 if (error)
207 goto out;
208
209 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
210 SCARG(uap, policy), ¶ms);
211
212 out:
213 return (error);
214 }
215
216 int
217 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
218 struct sched_param *params)
219 {
220 struct sched_param lparams;
221 struct lwp *t;
222 int error, lpolicy;
223
224 /* Locks the LWP */
225 t = lwp_find2(pid, lid);
226 if (t == NULL) {
227 error = ESRCH;
228 goto out;
229 }
230
231 /* Check the permission */
232 error = kauth_authorize_process(kauth_cred_get(),
233 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
234 if (error != 0) {
235 lwp_unlock(t);
236 goto out;
237 }
238
239 lparams.sched_priority = t->l_priority;
240 lpolicy = t->l_class;
241 lwp_unlock(t);
242
243 switch (lpolicy) {
244 case SCHED_OTHER:
245 lparams.sched_priority -= PRI_USER;
246 break;
247 case SCHED_RR:
248 case SCHED_FIFO:
249 lparams.sched_priority -= PRI_USER_RT;
250 break;
251 }
252
253 if (policy != NULL)
254 *policy = lpolicy;
255
256 if (params != NULL)
257 *params = lparams;
258
259 out:
260 return error;
261 }
262
263 /*
264 * Get scheduling parameters.
265 */
266 int
267 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
268 register_t *retval)
269 {
270 /* {
271 syscallarg(pid_t) pid;
272 syscallarg(lwpid_t) lid;
273 syscallarg(int *) policy;
274 syscallarg(struct sched_param *) params;
275 } */
276 struct sched_param params;
277 int error, policy;
278
279 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
280 ¶ms);
281 if (error)
282 goto out;
283
284 error = copyout(¶ms, SCARG(uap, params), sizeof(params));
285 if (error == 0 && SCARG(uap, policy) != NULL)
286 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
287
288 out:
289 return (error);
290 }
291
292 /*
293 * Set affinity.
294 */
295 int
296 sys__sched_setaffinity(struct lwp *l,
297 const struct sys__sched_setaffinity_args *uap, register_t *retval)
298 {
299 /* {
300 syscallarg(pid_t) pid;
301 syscallarg(lwpid_t) lid;
302 syscallarg(size_t) size;
303 syscallarg(void *) cpuset;
304 } */
305 cpuset_t *cpuset;
306 struct cpu_info *ci = NULL;
307 struct proc *p;
308 struct lwp *t;
309 CPU_INFO_ITERATOR cii;
310 lwpid_t lid;
311 u_int lcnt;
312 int error;
313
314 /* Allocate the CPU set, and get it from userspace */
315 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
316 error = copyin(SCARG(uap, cpuset), cpuset,
317 min(SCARG(uap, size), sizeof(cpuset_t)));
318 if (error)
319 goto error;
320
321 /* Look for a CPU in the set */
322 for (CPU_INFO_FOREACH(cii, ci))
323 if (CPU_ISSET(cpu_index(ci), cpuset))
324 break;
325 if (ci == NULL) {
326 /* Empty set */
327 kmem_free(cpuset, sizeof(cpuset_t));
328 cpuset = NULL;
329 }
330
331 if (SCARG(uap, pid) != 0) {
332 /* Find the process */
333 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
334 if (p == NULL) {
335 error = ESRCH;
336 goto error;
337 }
338 mutex_enter(&p->p_smutex);
339 mutex_exit(&proclist_lock);
340 /* Disallow modification of system processes. */
341 if ((p->p_flag & PK_SYSTEM) != 0) {
342 mutex_exit(&p->p_smutex);
343 error = EPERM;
344 goto error;
345 }
346 } else {
347 /* Use the calling process */
348 p = l->l_proc;
349 mutex_enter(&p->p_smutex);
350 }
351
352 /*
353 * Check the permission.
354 */
355 error = kauth_authorize_process(l->l_cred,
356 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
357 if (error != 0) {
358 mutex_exit(&p->p_smutex);
359 goto error;
360 }
361
362 /* Find the LWP(s) */
363 lcnt = 0;
364 lid = SCARG(uap, lid);
365 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
366 if (lid && lid != t->l_lid)
367 continue;
368 lwp_lock(t);
369 if (cpuset) {
370 /* Set the affinity flag and new CPU set */
371 t->l_flag |= LW_AFFINITY;
372 memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
373 /* Migrate to another CPU, unlocks LWP */
374 lwp_migrate(t, ci);
375 } else {
376 /* Unset the affinity flag */
377 t->l_flag &= ~LW_AFFINITY;
378 lwp_unlock(t);
379 }
380 lcnt++;
381 }
382 mutex_exit(&p->p_smutex);
383 if (lcnt == 0)
384 error = ESRCH;
385 error:
386 if (cpuset != NULL)
387 kmem_free(cpuset, sizeof(cpuset_t));
388 return error;
389 }
390
391 /*
392 * Get affinity.
393 */
394 int
395 sys__sched_getaffinity(struct lwp *l,
396 const struct sys__sched_getaffinity_args *uap, register_t *retval)
397 {
398 /* {
399 syscallarg(pid_t) pid;
400 syscallarg(lwpid_t) lid;
401 syscallarg(size_t) size;
402 syscallarg(void *) cpuset;
403 } */
404 struct lwp *t;
405 void *cpuset;
406 int error;
407
408 if (SCARG(uap, size) <= 0)
409 return EINVAL;
410 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
411
412 /* Locks the LWP */
413 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
414 if (t == NULL) {
415 kmem_free(cpuset, sizeof(cpuset_t));
416 return ESRCH;
417 }
418 /* Check the permission */
419 if (kauth_authorize_process(l->l_cred,
420 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
421 lwp_unlock(t);
422 kmem_free(cpuset, sizeof(cpuset_t));
423 return EPERM;
424 }
425 if (t->l_flag & LW_AFFINITY)
426 memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
427 lwp_unlock(t);
428
429 error = copyout(cpuset, SCARG(uap, cpuset),
430 min(SCARG(uap, size), sizeof(cpuset_t)));
431
432 kmem_free(cpuset, sizeof(cpuset_t));
433 return error;
434 }
435
436 /*
437 * Yield.
438 */
439 int
440 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
441 {
442
443 yield();
444 return 0;
445 }
446
447 /*
448 * Sysctl nodes and initialization.
449 */
450 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
451 {
452 const struct sysctlnode *node = NULL;
453
454 sysctl_createv(clog, 0, NULL, NULL,
455 CTLFLAG_PERMANENT,
456 CTLTYPE_NODE, "kern", NULL,
457 NULL, 0, NULL, 0,
458 CTL_KERN, CTL_EOL);
459 sysctl_createv(clog, 0, NULL, NULL,
460 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
461 CTLTYPE_INT, "posix_sched",
462 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
463 "Process Scheduling option to which the "
464 "system attempts to conform"),
465 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
466 CTL_KERN, CTL_CREATE, CTL_EOL);
467 sysctl_createv(clog, 0, NULL, &node,
468 CTLFLAG_PERMANENT,
469 CTLTYPE_NODE, "sched",
470 SYSCTL_DESCR("Scheduler options"),
471 NULL, 0, NULL, 0,
472 CTL_KERN, CTL_CREATE, CTL_EOL);
473
474 if (node == NULL)
475 return;
476
477 sysctl_createv(clog, 0, &node, NULL,
478 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
479 CTLTYPE_INT, "pri_min",
480 SYSCTL_DESCR("Minimal POSIX real-time priority"),
481 NULL, SCHED_PRI_MIN, NULL, 0,
482 CTL_CREATE, CTL_EOL);
483 sysctl_createv(clog, 0, &node, NULL,
484 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
485 CTLTYPE_INT, "pri_max",
486 SYSCTL_DESCR("Minimal POSIX real-time priority"),
487 NULL, SCHED_PRI_MAX, NULL, 0,
488 CTL_CREATE, CTL_EOL);
489 }
490