sys_sched.c revision 1.5 1 /* $NetBSD: sys_sched.c,v 1.5 2008/01/15 03:37:11 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * TODO:
31 * - Handle pthread_setschedprio() as defined by POSIX;
32 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.5 2008/01/15 03:37:11 rmind Exp $");
37
38 #include <sys/param.h>
39
40 #include <sys/cpu.h>
41 #include <sys/kauth.h>
42 #include <sys/kmem.h>
43 #include <sys/lwp.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/pset.h>
47 #include <sys/sched.h>
48 #include <sys/syscallargs.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/types.h>
52 #include <sys/unistd.h>
53
54 /*
55 * Set scheduling parameters.
56 */
57 int
58 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
59 register_t *retval)
60 {
61 /* {
62 syscallarg(pid_t) pid;
63 syscallarg(lwpid_t) lid;
64 syscallarg(const struct sched_param *) params;
65 } */
66 struct sched_param *sp;
67 struct proc *p;
68 struct lwp *t;
69 pid_t pid;
70 lwpid_t lid;
71 u_int lcnt;
72 pri_t pri;
73 int error;
74
75 /* Available only for super-user */
76 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
77 return EACCES;
78
79 /* Get the parameters from the user-space */
80 sp = kmem_zalloc(sizeof(struct sched_param), KM_SLEEP);
81 error = copyin(SCARG(uap, params), sp, sizeof(struct sched_param));
82 if (error)
83 goto error;
84
85 /*
86 * Validate scheduling class and priority.
87 * Convert the user priority to the in-kernel value.
88 */
89 pri = sp->sched_priority;
90 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) {
91 error = EINVAL;
92 goto error;
93 }
94 switch (sp->sched_class) {
95 case SCHED_OTHER:
96 if (pri == PRI_NONE)
97 pri = PRI_USER;
98 else
99 pri += PRI_USER;
100 break;
101 case SCHED_RR:
102 case SCHED_FIFO:
103 if (pri == PRI_NONE)
104 pri = PRI_USER_RT;
105 else
106 pri += PRI_USER_RT;
107 break;
108 case SCHED_NONE:
109 break;
110 default:
111 error = EINVAL;
112 goto error;
113 }
114
115 /* Find the process */
116 pid = SCARG(uap, pid);
117 p = p_find(pid, PFIND_UNLOCK_FAIL);
118 if (p == NULL) {
119 error = ESRCH;
120 goto error;
121 }
122 mutex_enter(&p->p_smutex);
123 mutex_exit(&proclist_lock);
124
125 /* Disallow modification of system processes */
126 if (p->p_flag & PK_SYSTEM) {
127 mutex_exit(&p->p_smutex);
128 error = EACCES;
129 goto error;
130 }
131
132 /* Find the LWP(s) */
133 lcnt = 0;
134 lid = SCARG(uap, lid);
135 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
136 bool chpri;
137
138 if (lid && lid != t->l_lid)
139 continue;
140
141 /* Set the scheduling class */
142 lwp_lock(t);
143 if (sp->sched_class != SCHED_NONE) {
144 /*
145 * Priority must be changed to get into the correct
146 * priority range of the new scheduling class.
147 */
148 chpri = (t->l_class != sp->sched_class);
149 t->l_class = sp->sched_class;
150 } else
151 chpri = false;
152
153 /* Change the priority */
154 if (sp->sched_priority != PRI_NONE || chpri)
155 lwp_changepri(t, pri);
156
157 lwp_unlock(t);
158 lcnt++;
159 }
160 mutex_exit(&p->p_smutex);
161 if (lcnt != 0)
162 *retval = lcnt;
163 else
164 error = ESRCH;
165 error:
166 kmem_free(sp, sizeof(struct sched_param));
167 return error;
168 }
169
170 /*
171 * Get scheduling parameters.
172 */
173 int
174 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
175 register_t *retval)
176 {
177 /* {
178 syscallarg(pid_t) pid;
179 syscallarg(lwpid_t) lid;
180 syscallarg(struct sched_param *) params;
181 } */
182 struct sched_param *sp;
183 struct lwp *t;
184 int error;
185
186 sp = kmem_zalloc(sizeof(struct sched_param), KM_SLEEP);
187
188 /* Locks the LWP */
189 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
190 if (t == NULL) {
191 kmem_free(sp, sizeof(struct sched_param));
192 return ESRCH;
193 }
194 sp->sched_priority = t->l_priority;
195 sp->sched_class = t->l_class;
196 lwp_unlock(t);
197
198 switch (sp->sched_class) {
199 case SCHED_OTHER:
200 sp->sched_priority -= PRI_USER;
201 break;
202 case SCHED_RR:
203 case SCHED_FIFO:
204 sp->sched_priority -= PRI_USER_RT;
205 break;
206 }
207 error = copyout(sp, SCARG(uap, params), sizeof(struct sched_param));
208 kmem_free(sp, sizeof(struct sched_param));
209 return error;
210 }
211
212 /*
213 * Set affinity.
214 */
215 int
216 sys__sched_setaffinity(struct lwp *l,
217 const struct sys__sched_setaffinity_args *uap, register_t *retval)
218 {
219 /* {
220 syscallarg(pid_t) pid;
221 syscallarg(lwpid_t) lid;
222 syscallarg(size_t) size;
223 syscallarg(void *) cpuset;
224 } */
225 cpuset_t *cpuset;
226 struct cpu_info *ci = NULL;
227 struct proc *p;
228 struct lwp *t;
229 CPU_INFO_ITERATOR cii;
230 lwpid_t lid;
231 u_int lcnt;
232 int error;
233
234 /* Available only for super-user */
235 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
236 return EACCES;
237
238 if (SCARG(uap, size) <= 0)
239 return EINVAL;
240
241 /* Allocate the CPU set, and get it from userspace */
242 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
243 error = copyin(SCARG(uap, cpuset), cpuset,
244 min(SCARG(uap, size), sizeof(cpuset_t)));
245 if (error)
246 goto error;
247
248 /* Look for a CPU in the set */
249 for (CPU_INFO_FOREACH(cii, ci))
250 if (CPU_ISSET(cpu_index(ci), cpuset))
251 break;
252 if (ci == NULL) {
253 /* Empty set */
254 kmem_free(cpuset, sizeof(cpuset_t));
255 cpuset = NULL;
256 }
257
258 /* Find the process */
259 p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL);
260 if (p == NULL) {
261 error = ESRCH;
262 goto error;
263 }
264 mutex_enter(&p->p_smutex);
265 mutex_exit(&proclist_lock);
266
267 /* Disallow modification of system processes */
268 if (p->p_flag & PK_SYSTEM) {
269 mutex_exit(&p->p_smutex);
270 error = EACCES;
271 goto error;
272 }
273
274 /* Find the LWP(s) */
275 lcnt = 0;
276 lid = SCARG(uap, lid);
277 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
278 if (lid && lid != t->l_lid)
279 continue;
280 lwp_lock(t);
281 if (cpuset) {
282 /* Set the affinity flag and new CPU set */
283 t->l_flag |= LW_AFFINITY;
284 memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t));
285 /* Migrate to another CPU, unlocks LWP */
286 lwp_migrate(t, ci);
287 } else {
288 /* Unset the affinity flag */
289 t->l_flag &= ~LW_AFFINITY;
290 lwp_unlock(t);
291 }
292 lcnt++;
293 }
294 mutex_exit(&p->p_smutex);
295 if (lcnt == 0)
296 error = ESRCH;
297 else
298 *retval = lcnt;
299 error:
300 if (cpuset != NULL)
301 kmem_free(cpuset, sizeof(cpuset_t));
302 return error;
303 }
304
305 /*
306 * Get affinity.
307 */
308 int
309 sys__sched_getaffinity(struct lwp *l,
310 const struct sys__sched_getaffinity_args *uap, register_t *retval)
311 {
312 /* {
313 syscallarg(pid_t) pid;
314 syscallarg(lwpid_t) lid;
315 syscallarg(size_t) size;
316 syscallarg(void *) cpuset;
317 } */
318 struct lwp *t;
319 void *cpuset;
320 int error;
321
322 if (SCARG(uap, size) <= 0)
323 return EINVAL;
324
325 cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP);
326
327 /* Locks the LWP */
328 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
329 if (t == NULL) {
330 kmem_free(cpuset, sizeof(cpuset_t));
331 return ESRCH;
332 }
333 if (t->l_flag & LW_AFFINITY)
334 memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t));
335 lwp_unlock(t);
336
337 error = copyout(cpuset, SCARG(uap, cpuset),
338 min(SCARG(uap, size), sizeof(cpuset_t)));
339
340 kmem_free(cpuset, sizeof(cpuset_t));
341 return error;
342 }
343
344 /*
345 * Yield.
346 */
347 int
348 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
349 {
350
351 yield();
352 return 0;
353 }
354
355 /*
356 * Sysctl nodes and initialization.
357 */
358 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
359 {
360 const struct sysctlnode *node = NULL;
361
362 sysctl_createv(clog, 0, NULL, NULL,
363 CTLFLAG_PERMANENT,
364 CTLTYPE_NODE, "kern", NULL,
365 NULL, 0, NULL, 0,
366 CTL_KERN, CTL_EOL);
367 sysctl_createv(clog, 0, NULL, NULL,
368 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
369 CTLTYPE_INT, "posix_sched",
370 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
371 "Process Scheduling option to which the "
372 "system attempts to conform"),
373 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
374 CTL_KERN, CTL_CREATE, CTL_EOL);
375 sysctl_createv(clog, 0, NULL, &node,
376 CTLFLAG_PERMANENT,
377 CTLTYPE_NODE, "sched",
378 SYSCTL_DESCR("Scheduler options"),
379 NULL, 0, NULL, 0,
380 CTL_KERN, CTL_CREATE, CTL_EOL);
381
382 if (node == NULL)
383 return;
384
385 sysctl_createv(clog, 0, &node, NULL,
386 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
387 CTLTYPE_INT, "pri_min",
388 SYSCTL_DESCR("Minimal POSIX real-time priority"),
389 NULL, SCHED_PRI_MIN, NULL, 0,
390 CTL_CREATE, CTL_EOL);
391 sysctl_createv(clog, 0, &node, NULL,
392 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
393 CTLTYPE_INT, "pri_max",
394 SYSCTL_DESCR("Minimal POSIX real-time priority"),
395 NULL, SCHED_PRI_MAX, NULL, 0,
396 CTL_CREATE, CTL_EOL);
397 }
398