sys_pset.c revision 1.5 1 /* $NetBSD: sys_pset.c,v 1.5 2008/04/24 15:35:30 ad Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Implementation of the Processor Sets.
31 *
32 * Locking
33 * The array of the processor-set structures and its members are protected
34 * by the global psets_lock. Note that in scheduler, the very l_psid value
35 * might be used without lock held.
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.5 2008/04/24 15:35:30 ad Exp $");
40
41 #include <sys/param.h>
42
43 #include <sys/cpu.h>
44 #include <sys/kauth.h>
45 #include <sys/kmem.h>
46 #include <sys/lwp.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/pset.h>
50 #include <sys/sched.h>
51 #include <sys/syscallargs.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55
56 static pset_info_t ** psets;
57 static kmutex_t psets_lock;
58 static u_int psets_max;
59 static u_int psets_count;
60
61 static int psets_realloc(int);
62 static int psid_validate(psetid_t, bool);
63 static int kern_pset_create(psetid_t *);
64 static int kern_pset_destroy(psetid_t);
65
66 /*
67 * Initialization of the processor-sets.
68 */
69 void
70 psets_init(void)
71 {
72
73 psets_max = max(MAXCPUS, 32);
74 psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
75 mutex_init(&psets_lock, MUTEX_DEFAULT, IPL_NONE);
76 psets_count = 0;
77 }
78
79 /*
80 * Reallocate the array of the processor-set structures.
81 */
82 static int
83 psets_realloc(int new_psets_max)
84 {
85 pset_info_t **new_psets, **old_psets;
86 const u_int newsize = new_psets_max * sizeof(void *);
87 u_int i, oldsize;
88
89 if (new_psets_max < 1)
90 return EINVAL;
91
92 new_psets = kmem_zalloc(newsize, KM_SLEEP);
93 mutex_enter(&psets_lock);
94 old_psets = psets;
95 oldsize = psets_max * sizeof(void *);
96
97 /* Check if we can lower the size of the array */
98 if (new_psets_max < psets_max) {
99 for (i = new_psets_max; i < psets_max; i++) {
100 if (psets[i] == NULL)
101 continue;
102 mutex_exit(&psets_lock);
103 kmem_free(new_psets, newsize);
104 return EBUSY;
105 }
106 }
107
108 /* Copy all pointers to the new array */
109 memcpy(new_psets, psets, newsize);
110 psets_max = new_psets_max;
111 psets = new_psets;
112 mutex_exit(&psets_lock);
113
114 kmem_free(old_psets, oldsize);
115 return 0;
116 }
117
118 /*
119 * Validate processor-set ID.
120 */
121 static int
122 psid_validate(psetid_t psid, bool chkps)
123 {
124
125 KASSERT(mutex_owned(&psets_lock));
126
127 if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
128 return 0;
129 if (psid <= 0 || psid > psets_max)
130 return EINVAL;
131 if (psets[psid - 1] == NULL)
132 return EINVAL;
133 if (psets[psid - 1]->ps_flags & PSET_BUSY)
134 return EBUSY;
135
136 return 0;
137 }
138
139 /*
140 * Create a processor-set.
141 */
142 static int
143 kern_pset_create(psetid_t *psid)
144 {
145 pset_info_t *pi;
146 u_int i;
147
148 if (psets_count == psets_max)
149 return ENOMEM;
150
151 pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
152
153 mutex_enter(&psets_lock);
154 if (psets_count == psets_max) {
155 mutex_exit(&psets_lock);
156 kmem_free(pi, sizeof(pset_info_t));
157 return ENOMEM;
158 }
159
160 /* Find a free entry in the array */
161 for (i = 0; i < psets_max; i++)
162 if (psets[i] == NULL)
163 break;
164 KASSERT(i != psets_max);
165
166 psets[i] = pi;
167 psets_count++;
168 mutex_exit(&psets_lock);
169
170 *psid = i + 1;
171 return 0;
172 }
173
174 /*
175 * Destroy a processor-set.
176 */
177 static int
178 kern_pset_destroy(psetid_t psid)
179 {
180 struct cpu_info *ci;
181 pset_info_t *pi;
182 struct lwp *l;
183 CPU_INFO_ITERATOR cii;
184 int error;
185
186 mutex_enter(&psets_lock);
187 if (psid == PS_MYID) {
188 /* Use caller's processor-set ID */
189 psid = curlwp->l_psid;
190 }
191 error = psid_validate(psid, false);
192 if (error) {
193 mutex_exit(&psets_lock);
194 return error;
195 }
196
197 /* Release the processor-set from all CPUs */
198 for (CPU_INFO_FOREACH(cii, ci)) {
199 struct schedstate_percpu *spc;
200
201 spc = &ci->ci_schedstate;
202 if (spc->spc_psid != psid)
203 continue;
204 spc->spc_psid = PS_NONE;
205 }
206 /* Mark that processor-set is going to be destroyed */
207 pi = psets[psid - 1];
208 pi->ps_flags |= PSET_BUSY;
209 mutex_exit(&psets_lock);
210
211 /* Unmark the processor-set ID from each thread */
212 mutex_enter(proc_lock);
213 LIST_FOREACH(l, &alllwp, l_list) {
214 /* Safe to check and set without lock held */
215 if (l->l_psid != psid)
216 continue;
217 l->l_psid = PS_NONE;
218 }
219 mutex_exit(proc_lock);
220
221 /* Destroy the processor-set */
222 mutex_enter(&psets_lock);
223 psets[psid - 1] = NULL;
224 psets_count--;
225 mutex_exit(&psets_lock);
226
227 kmem_free(pi, sizeof(pset_info_t));
228 return 0;
229 }
230
231 /*
232 * General system calls for the processor-sets.
233 */
234
235 int
236 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
237 register_t *retval)
238 {
239 /* {
240 syscallarg(psetid_t) *psid;
241 } */
242 psetid_t psid;
243 int error;
244
245 /* Available only for super-user */
246 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
247 KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
248 return EPERM;
249
250 error = kern_pset_create(&psid);
251 if (error)
252 return error;
253
254 error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
255 if (error)
256 (void)kern_pset_destroy(psid);
257
258 return error;
259 }
260
261 int
262 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
263 register_t *retval)
264 {
265 /* {
266 syscallarg(psetid_t) psid;
267 } */
268
269 /* Available only for super-user */
270 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
271 KAUTH_REQ_SYSTEM_PSET_DESTROY,
272 KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
273 return EPERM;
274
275 return kern_pset_destroy(SCARG(uap, psid));
276 }
277
278 int
279 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
280 register_t *retval)
281 {
282 /* {
283 syscallarg(psetid_t) psid;
284 syscallarg(cpuid_t) cpuid;
285 syscallarg(psetid_t) *opsid;
286 } */
287 struct cpu_info *ci;
288 struct schedstate_percpu *spc;
289 psetid_t psid = SCARG(uap, psid), opsid = 0;
290 CPU_INFO_ITERATOR cii;
291 int error = 0;
292
293 /* Available only for super-user, except the case of PS_QUERY */
294 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
295 KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
296 NULL))
297 return EPERM;
298
299 /* Find the target CPU */
300 for (CPU_INFO_FOREACH(cii, ci))
301 if (cpu_index(ci) == SCARG(uap, cpuid))
302 break;
303 if (ci == NULL)
304 return EINVAL;
305 spc = &ci->ci_schedstate;
306
307 mutex_enter(&psets_lock);
308 error = psid_validate(psid, true);
309 if (error) {
310 mutex_exit(&psets_lock);
311 return error;
312 }
313 opsid = spc->spc_psid;
314 switch (psid) {
315 case PS_QUERY:
316 break;
317 case PS_MYID:
318 psid = curlwp->l_psid;
319 default:
320 spc->spc_psid = psid;
321 }
322 mutex_exit(&psets_lock);
323
324 if (SCARG(uap, opsid) != NULL)
325 error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
326
327 return error;
328 }
329
330 int
331 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
332 register_t *retval)
333 {
334 /* {
335 syscallarg(idtype_t) idtype;
336 syscallarg(id_t) first_id;
337 syscallarg(id_t) second_id;
338 syscallarg(psetid_t) psid;
339 syscallarg(psetid_t) *opsid;
340 } */
341 struct cpu_info *ci;
342 struct proc *p;
343 struct lwp *t;
344 id_t id1, id2;
345 pid_t pid = 0;
346 lwpid_t lid = 0;
347 psetid_t psid, opsid;
348 int error = 0, lcnt;
349
350 psid = SCARG(uap, psid);
351
352 /* Available only for super-user, except the case of PS_QUERY */
353 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
354 KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
355 NULL))
356 return EPERM;
357
358 mutex_enter(&psets_lock);
359 error = psid_validate(psid, true);
360 if (error) {
361 mutex_exit(&psets_lock);
362 return error;
363 }
364 if (psid == PS_MYID)
365 psid = curlwp->l_psid;
366 if (psid != PS_QUERY && psid != PS_NONE)
367 psets[psid - 1]->ps_flags |= PSET_BUSY;
368 mutex_exit(&psets_lock);
369
370 /*
371 * Get PID and LID from the ID.
372 */
373 p = l->l_proc;
374 id1 = SCARG(uap, first_id);
375 id2 = SCARG(uap, second_id);
376
377 switch (SCARG(uap, idtype)) {
378 case P_PID:
379 /*
380 * Process:
381 * First ID - PID;
382 * Second ID - ignored;
383 */
384 pid = (id1 == P_MYID) ? p->p_pid : id1;
385 lid = 0;
386 break;
387 case P_LWPID:
388 /*
389 * Thread (LWP):
390 * First ID - LID;
391 * Second ID - PID;
392 */
393 if (id1 == P_MYID) {
394 pid = p->p_pid;
395 lid = l->l_lid;
396 break;
397 }
398 lid = id1;
399 pid = (id2 == P_MYID) ? p->p_pid : id2;
400 break;
401 default:
402 error = EINVAL;
403 goto error;
404 }
405
406 /* Find the process */
407 mutex_enter(proc_lock);
408 p = p_find(pid, PFIND_LOCKED);
409 if (p == NULL) {
410 mutex_exit(proc_lock);
411 error = ESRCH;
412 goto error;
413 }
414 mutex_enter(&p->p_smutex);
415 mutex_exit(proc_lock);
416
417 /* Disallow modification of the system processes */
418 if (p->p_flag & PK_SYSTEM) {
419 mutex_exit(&p->p_smutex);
420 error = EPERM;
421 goto error;
422 }
423
424 /* Find the LWP(s) */
425 lcnt = 0;
426 ci = NULL;
427 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
428 if (lid && lid != t->l_lid)
429 continue;
430 /*
431 * Bind the thread to the processor-set,
432 * take some CPU and migrate.
433 */
434 lwp_lock(t);
435 opsid = t->l_psid;
436 t->l_psid = psid;
437 ci = sched_takecpu(l);
438 /* Unlocks LWP */
439 lwp_migrate(t, ci);
440 lcnt++;
441 }
442 mutex_exit(&p->p_smutex);
443 if (lcnt == 0) {
444 error = ESRCH;
445 goto error;
446 }
447 if (SCARG(uap, opsid))
448 error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
449 error:
450 if (psid != PS_QUERY && psid != PS_NONE) {
451 mutex_enter(&psets_lock);
452 psets[psid - 1]->ps_flags &= ~PSET_BUSY;
453 mutex_exit(&psets_lock);
454 }
455 return error;
456 }
457
458 /*
459 * Sysctl nodes and initialization.
460 */
461
462 static int
463 sysctl_psets_max(SYSCTLFN_ARGS)
464 {
465 struct sysctlnode node;
466 int error, newsize;
467
468 node = *rnode;
469 node.sysctl_data = &newsize;
470
471 newsize = psets_max;
472 error = sysctl_lookup(SYSCTLFN_CALL(&node));
473 if (error || newp == NULL)
474 return error;
475
476 if (newsize <= 0)
477 return EINVAL;
478
479 sysctl_unlock();
480 error = psets_realloc(newsize);
481 sysctl_relock();
482 return error;
483 }
484
485 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
486 {
487 const struct sysctlnode *node = NULL;
488
489 sysctl_createv(clog, 0, NULL, NULL,
490 CTLFLAG_PERMANENT,
491 CTLTYPE_NODE, "kern", NULL,
492 NULL, 0, NULL, 0,
493 CTL_KERN, CTL_EOL);
494 sysctl_createv(clog, 0, NULL, &node,
495 CTLFLAG_PERMANENT,
496 CTLTYPE_NODE, "pset",
497 SYSCTL_DESCR("Processor-set options"),
498 NULL, 0, NULL, 0,
499 CTL_KERN, CTL_CREATE, CTL_EOL);
500
501 if (node == NULL)
502 return;
503
504 sysctl_createv(clog, 0, &node, NULL,
505 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
506 CTLTYPE_INT, "psets_max",
507 SYSCTL_DESCR("Maximal count of the processor-sets"),
508 sysctl_psets_max, 0, &psets_max, 0,
509 CTL_CREATE, CTL_EOL);
510 }
511