sys_sched.c revision 1.7 1 /* $NetBSD: sys_sched.c,v 1.7 2008/01/26 17:55:29 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * TODO:
31 * - Handle pthread_setschedprio() as defined by POSIX;
32 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.7 2008/01/26 17:55:29 rmind Exp $");
37
38 #include <sys/param.h>
39
40 #include <sys/cpu.h>
41 #include <sys/kauth.h>
42 #include <sys/kmem.h>
43 #include <sys/lwp.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/pset.h>
47 #include <sys/sched.h>
48 #include <sys/syscallargs.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/types.h>
52 #include <sys/unistd.h>
53
54 /*
55 * Convert user priority or the in-kernel priority or convert the current
56 * priority to the appropriate range according to the policy change.
57 */
58 static pri_t
59 convert_pri(lwp_t *l, int policy, pri_t pri)
60 {
61 int delta = 0;
62
63 if (policy == SCHED_NONE)
64 policy = l->l_class;
65
66 switch (policy) {
67 case SCHED_OTHER:
68 delta = PRI_USER;
69 break;
70 case SCHED_FIFO:
71 case SCHED_RR:
72 delta = PRI_USER_RT;
73 break;
74 default:
75 panic("upri_to_kpri");
76 }
77
78 if (pri != PRI_NONE) {
79 /* Convert user priority to the in-kernel */
80 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
81 return pri + delta;
82 }
83 if (l->l_class == policy)
84 return l->l_priority;
85
86 /* Change the current priority to the appropriate range */
87 if (l->l_class == SCHED_OTHER) {
88 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
89 return l->l_priority + delta;
90 }
91 if (policy == SCHED_OTHER) {
92 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
93 return l->l_priority - delta;
94 }
95 KASSERT(l->l_class != SCHED_OTHER && policy != SCHED_OTHER);
96 return l->l_class;
97 }
98
99 /*
100 * Set scheduling parameters.
101 */
102 int
103 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
104 register_t *retval)
105 {
106 /* {
107 syscallarg(pid_t) pid;
108 syscallarg(lwpid_t) lid;
109 syscallarg(const struct sched_param *) params;
110 } */
111 struct sched_param *sp;
112 struct proc *p;
113 struct lwp *t;
114 lwpid_t lid;
115 u_int lcnt;
116 int policy;
117 pri_t pri;
118 int error;
119
120 /* Available only for super-user */
121 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
122 return EPERM;
123
124 /* Get the parameters from the user-space */
125 sp = kmem_zalloc(sizeof(struct sched_param), KM_SLEEP);
126 error = copyin(SCARG(uap, params), sp, sizeof(struct sched_param));
127 if (error) {
128 kmem_free(sp, sizeof(struct sched_param));
129 return error;
130 }
131 pri = sp->sched_priority;
132 policy = sp->sched_class;
133 kmem_free(sp, sizeof(struct sched_param));
134
135 /* If no parameters specified, just return (this should not happen) */
136 if (pri == PRI_NONE && policy == SCHED_NONE)
137 return 0;
138
139 /* Validate scheduling class */
140 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
141 return EINVAL;
142
143 /* Validate priority */
144 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
145 return EINVAL;
146
147 if (SCARG(uap, pid) != 0) {
148 /* Find the process */
149 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
150 if (p == NULL)
151 return ESRCH;
152 mutex_enter(&p->p_smutex);
153 mutex_exit(&proclist_lock);
154 /* Disallow modification of system processes */
155 if (p->p_flag & PK_SYSTEM) {
156 mutex_exit(&p->p_smutex);
157 return EPERM;
158 }
159 } else {
160 /* Use the calling process */
161 p = l->l_proc;
162 mutex_enter(&p->p_smutex);
163 }
164
165 /* Find the LWP(s) */
166 lcnt = 0;
167 lid = SCARG(uap, lid);
168 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
169 pri_t kpri;
170
171 if (lid && lid != t->l_lid)
172 continue;
173 KASSERT(pri != PRI_NONE || policy != SCHED_NONE);
174 lwp_lock(t);
175
176 /*
177 * Note that, priority may need to be changed to get into
178 * the correct priority range of the new scheduling class.
179 */
180 kpri = convert_pri(t, policy, pri);
181
182 /* Set the scheduling class */
183 if (policy != SCHED_NONE)
184 t->l_class = policy;
185
186 /* Change the priority */
187 if (t->l_priority != kpri)
188 lwp_changepri(t, kpri);
189
190 lwp_unlock(t);
191 lcnt++;
192 }
193 mutex_exit(&p->p_smutex);
194 return (lcnt == 0) ? ESRCH : error;
195 }
196
197 /*
198 * Get scheduling parameters.
199 */
200 int
201 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
202 register_t *retval)
203 {
204 /* {
205 syscallarg(pid_t) pid;
206 syscallarg(lwpid_t) lid;
207 syscallarg(struct sched_param *) params;
208 } */
209 struct sched_param *sp;
210 struct lwp *t;
211 lwpid_t lid;
212 int error;
213
214 sp = kmem_zalloc(sizeof(struct sched_param), KM_SLEEP);
215
216 /* If not specified, use the first LWP */
217 lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
218
219 if (SCARG(uap, pid) != 0) {
220 /* Locks the LWP */
221 t = lwp_find2(SCARG(uap, pid), lid);
222 } else {
223 struct proc *p = l->l_proc;
224 /* Use the calling process */
225 mutex_enter(&p->p_smutex);
226 t = lwp_find(p, lid);
227 if (t != NULL)
228 lwp_lock(t);
229 mutex_exit(&p->p_smutex);
230 }
231 if (t == NULL) {
232 kmem_free(sp, sizeof(struct sched_param));
233 return ESRCH;
234 }
235 sp->sched_priority = t->l_priority;
236 sp->sched_class = t->l_class;
237 lwp_unlock(t);
238
239 switch (sp->sched_class) {
240 case SCHED_OTHER:
241 sp->sched_priority -= PRI_USER;
242 break;
243 case SCHED_RR:
244 case SCHED_FIFO:
245 sp->sched_priority -= PRI_USER_RT;
246 break;
247 }
248 error = copyout(sp, SCARG(uap, params), sizeof(struct sched_param));
249 kmem_free(sp, sizeof(struct sched_param));
250 return error;
251 }
252
253 /*
254 * Set affinity.
255 */
256 int
257 sys__sched_setaffinity(struct lwp *l,
258 const struct sys__sched_setaffinity_args *uap, register_t *retval)
259 {
260 /* {
261 syscallarg(pid_t) pid;
262 syscallarg(lwpid_t) lid;
263 syscallarg(size_t) size;
264 syscallarg(void *) cpuset;
265 } */
266 cpuset_t *cpuset;
267 struct cpu_info *ci = NULL;
268 struct proc *p;
269 struct lwp *t;
270 CPU_INFO_ITERATOR cii;
271 lwpid_t lid;
272 u_int lcnt;
273 int error;
274
275 /* Available only for super-user */
276 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
277 return EPERM;
278
279 if (SCARG(uap, size) <= 0)
280 return EINVAL;
281
282 /* Allocate the CPU set, and get it from userspace */
283 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
284 error = copyin(SCARG(uap, cpuset), cpuset,
285 min(SCARG(uap, size), sizeof(cpuset_t)));
286 if (error)
287 goto error;
288
289 /* Look for a CPU in the set */
290 for (CPU_INFO_FOREACH(cii, ci))
291 if (CPU_ISSET(cpu_index(ci), cpuset))
292 break;
293 if (ci == NULL) {
294 /* Empty set */
295 kmem_free(cpuset, sizeof(cpuset_t));
296 cpuset = NULL;
297 }
298
299 if (SCARG(uap, pid) != 0) {
300 /* Find the process */
301 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
302 if (p == NULL) {
303 error = ESRCH;
304 goto error;
305 }
306 mutex_enter(&p->p_smutex);
307 mutex_exit(&proclist_lock);
308 } else {
309 /* Use the calling process */
310 p = l->l_proc;
311 mutex_enter(&p->p_smutex);
312 }
313
314 /* Disallow modification of system processes */
315 if (p->p_flag & PK_SYSTEM) {
316 mutex_exit(&p->p_smutex);
317 error = EPERM;
318 goto error;
319 }
320
321 /* Find the LWP(s) */
322 lcnt = 0;
323 lid = SCARG(uap, lid);
324 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
325 if (lid && lid != t->l_lid)
326 continue;
327 lwp_lock(t);
328 if (cpuset) {
329 /* Set the affinity flag and new CPU set */
330 t->l_flag |= LW_AFFINITY;
331 memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
332 /* Migrate to another CPU, unlocks LWP */
333 lwp_migrate(t, ci);
334 } else {
335 /* Unset the affinity flag */
336 t->l_flag &= ~LW_AFFINITY;
337 lwp_unlock(t);
338 }
339 lcnt++;
340 }
341 mutex_exit(&p->p_smutex);
342 if (lcnt == 0)
343 error = ESRCH;
344 error:
345 if (cpuset != NULL)
346 kmem_free(cpuset, sizeof(cpuset_t));
347 return error;
348 }
349
350 /*
351 * Get affinity.
352 */
353 int
354 sys__sched_getaffinity(struct lwp *l,
355 const struct sys__sched_getaffinity_args *uap, register_t *retval)
356 {
357 /* {
358 syscallarg(pid_t) pid;
359 syscallarg(lwpid_t) lid;
360 syscallarg(size_t) size;
361 syscallarg(void *) cpuset;
362 } */
363 struct lwp *t;
364 void *cpuset;
365 lwpid_t lid;
366 int error;
367
368 if (SCARG(uap, size) <= 0)
369 return EINVAL;
370
371 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
372
373 /* If not specified, use the first LWP */
374 lid = SCARG(uap, lid) == 0 ? 1 : SCARG(uap, lid);
375
376 if (SCARG(uap, pid) != 0) {
377 /* Locks the LWP */
378 t = lwp_find2(SCARG(uap, pid), lid);
379 } else {
380 struct proc *p = l->l_proc;
381 /* Use the calling process */
382 mutex_enter(&p->p_smutex);
383 t = lwp_find(p, lid);
384 if (t != NULL)
385 lwp_lock(t);
386 mutex_exit(&p->p_smutex);
387 }
388 if (t == NULL) {
389 kmem_free(cpuset, sizeof(cpuset_t));
390 return ESRCH;
391 }
392 if (t->l_flag & LW_AFFINITY)
393 memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
394 lwp_unlock(t);
395
396 error = copyout(cpuset, SCARG(uap, cpuset),
397 min(SCARG(uap, size), sizeof(cpuset_t)));
398
399 kmem_free(cpuset, sizeof(cpuset_t));
400 return error;
401 }
402
403 /*
404 * Yield.
405 */
406 int
407 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
408 {
409
410 yield();
411 return 0;
412 }
413
414 /*
415 * Sysctl nodes and initialization.
416 */
417 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
418 {
419 const struct sysctlnode *node = NULL;
420
421 sysctl_createv(clog, 0, NULL, NULL,
422 CTLFLAG_PERMANENT,
423 CTLTYPE_NODE, "kern", NULL,
424 NULL, 0, NULL, 0,
425 CTL_KERN, CTL_EOL);
426 sysctl_createv(clog, 0, NULL, NULL,
427 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
428 CTLTYPE_INT, "posix_sched",
429 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
430 "Process Scheduling option to which the "
431 "system attempts to conform"),
432 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
433 CTL_KERN, CTL_CREATE, CTL_EOL);
434 sysctl_createv(clog, 0, NULL, &node,
435 CTLFLAG_PERMANENT,
436 CTLTYPE_NODE, "sched",
437 SYSCTL_DESCR("Scheduler options"),
438 NULL, 0, NULL, 0,
439 CTL_KERN, CTL_CREATE, CTL_EOL);
440
441 if (node == NULL)
442 return;
443
444 sysctl_createv(clog, 0, &node, NULL,
445 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
446 CTLTYPE_INT, "pri_min",
447 SYSCTL_DESCR("Minimal POSIX real-time priority"),
448 NULL, SCHED_PRI_MIN, NULL, 0,
449 CTL_CREATE, CTL_EOL);
450 sysctl_createv(clog, 0, &node, NULL,
451 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
452 CTLTYPE_INT, "pri_max",
453 SYSCTL_DESCR("Minimal POSIX real-time priority"),
454 NULL, SCHED_PRI_MAX, NULL, 0,
455 CTL_CREATE, CTL_EOL);
456 }
457