sys_sched.c revision 1.17 1 /* $NetBSD: sys_sched.c,v 1.17 2008/02/22 23:10:12 ad Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * System calls relating to the scheduler.
31 *
32 * TODO:
33 * - Handle pthread_setschedprio() as defined by POSIX;
34 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.17 2008/02/22 23:10:12 ad Exp $");
39
40 #include <sys/param.h>
41
42 #include <sys/cpu.h>
43 #include <sys/kauth.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/pset.h>
49 #include <sys/sched.h>
50 #include <sys/syscallargs.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/unistd.h>
55
56 /*
57 * Convert user priority or the in-kernel priority or convert the current
58 * priority to the appropriate range according to the policy change.
59 */
60 static pri_t
61 convert_pri(lwp_t *l, int policy, pri_t pri)
62 {
63 int delta = 0;
64
65 switch (policy) {
66 case SCHED_OTHER:
67 delta = PRI_USER;
68 break;
69 case SCHED_FIFO:
70 case SCHED_RR:
71 delta = PRI_USER_RT;
72 break;
73 default:
74 panic("upri_to_kpri");
75 }
76
77 if (pri != PRI_NONE) {
78 /* Convert user priority to the in-kernel */
79 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
80 return pri + delta;
81 }
82 if (l->l_class == policy)
83 return l->l_priority;
84
85 /* Change the current priority to the appropriate range */
86 if (l->l_class == SCHED_OTHER) {
87 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
88 return l->l_priority + delta;
89 }
90 if (policy == SCHED_OTHER) {
91 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
92 return l->l_priority - delta;
93 }
94 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
95 return l->l_class;
96 }
97
98 /*
99 * Set scheduling parameters.
100 */
101 int
102 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
103 register_t *retval)
104 {
105 /* {
106 syscallarg(pid_t) pid;
107 syscallarg(lwpid_t) lid;
108 syscallarg(int) policy;
109 syscallarg(const struct sched_param *) params;
110 } */
111 struct sched_param param;
112 struct proc *p;
113 struct lwp *t;
114 lwpid_t lid;
115 u_int lcnt;
116 int policy;
117 pri_t pri;
118 int error;
119
120 /* Get the parameters from the user-space */
121 error = copyin(SCARG(uap, params), ¶m, sizeof(param));
122 if (error) {
123 return error;
124 }
125 pri = param.sched_priority;
126 policy = SCARG(uap, policy);
127
128 /* If no parameters specified, just return (this should not happen) */
129 if (pri == PRI_NONE && policy == SCHED_NONE)
130 return 0;
131
132 /* Validate scheduling class */
133 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
134 return EINVAL;
135
136 /* Validate priority */
137 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
138 return EINVAL;
139
140 if (SCARG(uap, pid) != 0) {
141 /* Find the process */
142 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
143 if (p == NULL)
144 return ESRCH;
145 mutex_enter(&p->p_smutex);
146 mutex_exit(&proclist_lock);
147 /* Disallow modification of system processes */
148 if ((p->p_flag & PK_SYSTEM) != 0) {
149 mutex_exit(&p->p_smutex);
150 return EPERM;
151 }
152 } else {
153 /* Use the calling process */
154 p = l->l_proc;
155 mutex_enter(&p->p_smutex);
156 }
157
158 /* Find the LWP(s) */
159 lcnt = 0;
160 lid = SCARG(uap, lid);
161 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
162 pri_t kpri;
163 int lpolicy;
164
165 if (lid && lid != t->l_lid)
166 continue;
167 lcnt++;
168 KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
169 lwp_lock(t);
170
171 if (policy == SCHED_NONE)
172 lpolicy = t->l_class;
173 else
174 lpolicy = policy;
175
176 /*
177 * Note that, priority may need to be changed to get into
178 * the correct priority range of the new scheduling class.
179 */
180 kpri = convert_pri(t, lpolicy, pri);
181
182 /* Check the permission */
183 error = kauth_authorize_process(l->l_cred,
184 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
185 KAUTH_ARG(kpri));
186 if (error) {
187 lwp_unlock(t);
188 break;
189 }
190
191 /* Set the scheduling class */
192 if (policy != SCHED_NONE)
193 t->l_class = policy;
194
195 /* Change the priority */
196 if (t->l_priority != kpri)
197 lwp_changepri(t, kpri);
198
199 lwp_unlock(t);
200 }
201 mutex_exit(&p->p_smutex);
202 return (lcnt == 0) ? ESRCH : error;
203 }
204
205 /*
206 * Get scheduling parameters.
207 */
208 int
209 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
210 register_t *retval)
211 {
212 /* {
213 syscallarg(pid_t) pid;
214 syscallarg(lwpid_t) lid;
215 syscallarg(int *) policy;
216 syscallarg(struct sched_param *) params;
217 } */
218 struct sched_param param;
219 struct lwp *t;
220 int error, policy;
221
222 /* Locks the LWP */
223 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
224 if (t == NULL)
225 return ESRCH;
226
227 /* Check the permission */
228 error = kauth_authorize_process(l->l_cred,
229 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
230 if (error != 0) {
231 lwp_unlock(t);
232 return error;
233 }
234
235 param.sched_priority = t->l_priority;
236 policy = t->l_class;
237 lwp_unlock(t);
238
239 switch (policy) {
240 case SCHED_OTHER:
241 param.sched_priority -= PRI_USER;
242 break;
243 case SCHED_RR:
244 case SCHED_FIFO:
245 param.sched_priority -= PRI_USER_RT;
246 break;
247 }
248 error = copyout(¶m, SCARG(uap, params), sizeof(param));
249 if (error == 0 && SCARG(uap, policy) != NULL)
250 error = copyout(&policy, SCARG(uap, policy), sizeof(int));
251 return error;
252 }
253
254 /*
255 * Set affinity.
256 */
257 int
258 sys__sched_setaffinity(struct lwp *l,
259 const struct sys__sched_setaffinity_args *uap, register_t *retval)
260 {
261 /* {
262 syscallarg(pid_t) pid;
263 syscallarg(lwpid_t) lid;
264 syscallarg(size_t) size;
265 syscallarg(void *) cpuset;
266 } */
267 cpuset_t *cpuset;
268 struct cpu_info *ci = NULL;
269 struct proc *p;
270 struct lwp *t;
271 CPU_INFO_ITERATOR cii;
272 lwpid_t lid;
273 u_int lcnt;
274 int error;
275
276 /* Allocate the CPU set, and get it from userspace */
277 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
278 error = copyin(SCARG(uap, cpuset), cpuset,
279 min(SCARG(uap, size), sizeof(cpuset_t)));
280 if (error)
281 goto error;
282
283 /* Look for a CPU in the set */
284 for (CPU_INFO_FOREACH(cii, ci))
285 if (CPU_ISSET(cpu_index(ci), cpuset))
286 break;
287 if (ci == NULL) {
288 /* Empty set */
289 kmem_free(cpuset, sizeof(cpuset_t));
290 cpuset = NULL;
291 }
292
293 if (SCARG(uap, pid) != 0) {
294 /* Find the process */
295 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
296 if (p == NULL) {
297 error = ESRCH;
298 goto error;
299 }
300 mutex_enter(&p->p_smutex);
301 mutex_exit(&proclist_lock);
302 /* Disallow modification of system processes. */
303 if ((p->p_flag & PK_SYSTEM) != 0) {
304 mutex_exit(&p->p_smutex);
305 error = EPERM;
306 goto error;
307 }
308 } else {
309 /* Use the calling process */
310 p = l->l_proc;
311 mutex_enter(&p->p_smutex);
312 }
313
314 /*
315 * Check the permission.
316 */
317 error = kauth_authorize_process(l->l_cred,
318 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
319 if (error != 0) {
320 mutex_exit(&p->p_smutex);
321 goto error;
322 }
323
324 /* Find the LWP(s) */
325 lcnt = 0;
326 lid = SCARG(uap, lid);
327 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
328 if (lid && lid != t->l_lid)
329 continue;
330 lwp_lock(t);
331 if (cpuset) {
332 /* Set the affinity flag and new CPU set */
333 t->l_flag |= LW_AFFINITY;
334 memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
335 /* Migrate to another CPU, unlocks LWP */
336 lwp_migrate(t, ci);
337 } else {
338 /* Unset the affinity flag */
339 t->l_flag &= ~LW_AFFINITY;
340 lwp_unlock(t);
341 }
342 lcnt++;
343 }
344 mutex_exit(&p->p_smutex);
345 if (lcnt == 0)
346 error = ESRCH;
347 error:
348 if (cpuset != NULL)
349 kmem_free(cpuset, sizeof(cpuset_t));
350 return error;
351 }
352
353 /*
354 * Get affinity.
355 */
356 int
357 sys__sched_getaffinity(struct lwp *l,
358 const struct sys__sched_getaffinity_args *uap, register_t *retval)
359 {
360 /* {
361 syscallarg(pid_t) pid;
362 syscallarg(lwpid_t) lid;
363 syscallarg(size_t) size;
364 syscallarg(void *) cpuset;
365 } */
366 struct lwp *t;
367 void *cpuset;
368 int error;
369
370 if (SCARG(uap, size) <= 0)
371 return EINVAL;
372 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
373
374 /* Locks the LWP */
375 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
376 if (t == NULL) {
377 kmem_free(cpuset, sizeof(cpuset_t));
378 return ESRCH;
379 }
380 /* Check the permission */
381 if (kauth_authorize_process(l->l_cred,
382 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
383 lwp_unlock(t);
384 kmem_free(cpuset, sizeof(cpuset_t));
385 return EPERM;
386 }
387 if (t->l_flag & LW_AFFINITY)
388 memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
389 lwp_unlock(t);
390
391 error = copyout(cpuset, SCARG(uap, cpuset),
392 min(SCARG(uap, size), sizeof(cpuset_t)));
393
394 kmem_free(cpuset, sizeof(cpuset_t));
395 return error;
396 }
397
398 /*
399 * Yield.
400 */
401 int
402 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
403 {
404
405 yield();
406 return 0;
407 }
408
409 /*
410 * Sysctl nodes and initialization.
411 */
412 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
413 {
414 const struct sysctlnode *node = NULL;
415
416 sysctl_createv(clog, 0, NULL, NULL,
417 CTLFLAG_PERMANENT,
418 CTLTYPE_NODE, "kern", NULL,
419 NULL, 0, NULL, 0,
420 CTL_KERN, CTL_EOL);
421 sysctl_createv(clog, 0, NULL, NULL,
422 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
423 CTLTYPE_INT, "posix_sched",
424 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
425 "Process Scheduling option to which the "
426 "system attempts to conform"),
427 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
428 CTL_KERN, CTL_CREATE, CTL_EOL);
429 sysctl_createv(clog, 0, NULL, &node,
430 CTLFLAG_PERMANENT,
431 CTLTYPE_NODE, "sched",
432 SYSCTL_DESCR("Scheduler options"),
433 NULL, 0, NULL, 0,
434 CTL_KERN, CTL_CREATE, CTL_EOL);
435
436 if (node == NULL)
437 return;
438
439 sysctl_createv(clog, 0, &node, NULL,
440 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
441 CTLTYPE_INT, "pri_min",
442 SYSCTL_DESCR("Minimal POSIX real-time priority"),
443 NULL, SCHED_PRI_MIN, NULL, 0,
444 CTL_CREATE, CTL_EOL);
445 sysctl_createv(clog, 0, &node, NULL,
446 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
447 CTLTYPE_INT, "pri_max",
448 SYSCTL_DESCR("Minimal POSIX real-time priority"),
449 NULL, SCHED_PRI_MAX, NULL, 0,
450 CTL_CREATE, CTL_EOL);
451 }
452