sys_pset.c revision 1.1 1 /* $NetBSD: sys_pset.c,v 1.1 2008/01/15 03:41:49 rmind Exp $ */
2
3 /*
4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Implementation of the Processor Sets.
31 *
32 * Locking
33 * The array of the processor-set structures and its members are protected
34 * by the global psets_lock. Note that in scheduler, the very l_psid value
35 * might be used without lock held.
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.1 2008/01/15 03:41:49 rmind Exp $");
40
41 #include <sys/param.h>
42
43 #include <sys/cpu.h>
44 #include <sys/kauth.h>
45 #include <sys/kmem.h>
46 #include <sys/lwp.h>
47 #include <sys/mutex.h>
48 #include <sys/proc.h>
49 #include <sys/pset.h>
50 #include <sys/sched.h>
51 #include <sys/syscallargs.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55
56 static pset_info_t ** psets;
57 static kmutex_t psets_lock;
58 static u_int psets_max;
59 static u_int psets_count;
60
61 static int psets_realloc(int);
62 static int psid_validate(psetid_t, bool);
63 static int kern_pset_create(psetid_t *);
64 static int kern_pset_destroy(psetid_t);
65
66 /*
67 * Initialization of the processor-sets.
68 */
69 void
70 psets_init(void)
71 {
72
73 psets_max = max(MAXCPUS, 32);
74 psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
75 mutex_init(&psets_lock, MUTEX_DEFAULT, IPL_NONE);
76 psets_count = 0;
77 }
78
79 /*
80 * Reallocate the array of the processor-set structures.
81 */
82 static int
83 psets_realloc(int new_psets_max)
84 {
85 pset_info_t **new_psets, **old_psets;
86 const u_int newsize = new_psets_max * sizeof(void *);
87 u_int i, oldsize;
88
89 if (new_psets_max < 1)
90 return EINVAL;
91
92 new_psets = kmem_zalloc(newsize, KM_SLEEP);
93 mutex_enter(&psets_lock);
94 old_psets = psets;
95 oldsize = psets_max * sizeof(void *);
96
97 /* Check if we can lower the size of the array */
98 if (new_psets_max < psets_max) {
99 for (i = new_psets_max; i < psets_max; i++) {
100 if (psets[i] == NULL)
101 continue;
102 mutex_exit(&psets_lock);
103 kmem_free(new_psets, newsize);
104 return EBUSY;
105 }
106 }
107
108 /* Copy all pointers to the new array */
109 memcpy(new_psets, psets, newsize);
110 psets_max = new_psets_max;
111 psets = new_psets;
112 mutex_exit(&psets_lock);
113
114 kmem_free(old_psets, oldsize);
115 return 0;
116 }
117
118 /*
119 * Validate processor-set ID.
120 */
121 static int
122 psid_validate(psetid_t psid, bool chkps)
123 {
124
125 KASSERT(mutex_owned(&psets_lock));
126
127 if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
128 return 0;
129 if (psid <= 0 || psid > psets_max)
130 return EINVAL;
131 if (psets[psid - 1] == NULL)
132 return EINVAL;
133 if (psets[psid - 1]->ps_flags & PSET_BUSY)
134 return EBUSY;
135
136 return 0;
137 }
138
139 /*
140 * Create a processor-set.
141 */
142 static int
143 kern_pset_create(psetid_t *psid)
144 {
145 pset_info_t *pi;
146 u_int i;
147
148 if (psets_count == psets_max)
149 return ENOMEM;
150
151 pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
152
153 mutex_enter(&psets_lock);
154 if (psets_count == psets_max) {
155 mutex_exit(&psets_lock);
156 kmem_free(pi, sizeof(pset_info_t));
157 return ENOMEM;
158 }
159
160 /* Find a free entry in the array */
161 for (i = 0; i < psets_max; i++)
162 if (psets[i] == NULL)
163 break;
164 KASSERT(i != psets_max);
165
166 psets[i] = pi;
167 psets_count++;
168 mutex_exit(&psets_lock);
169
170 *psid = i + 1;
171 return 0;
172 }
173
174 /*
175 * Destroy a processor-set.
176 */
177 static int
178 kern_pset_destroy(psetid_t psid)
179 {
180 struct cpu_info *ci;
181 pset_info_t *pi;
182 struct lwp *l;
183 CPU_INFO_ITERATOR cii;
184 int error;
185
186 mutex_enter(&psets_lock);
187 if (psid == PS_MYID) {
188 /* Use caller's processor-set ID */
189 psid = curlwp->l_psid;
190 }
191 error = psid_validate(psid, false);
192 if (error) {
193 mutex_exit(&psets_lock);
194 return error;
195 }
196
197 /* Release the processor-set from all CPUs */
198 for (CPU_INFO_FOREACH(cii, ci)) {
199 struct schedstate_percpu *spc;
200
201 spc = &ci->ci_schedstate;
202 if (spc->spc_psid != psid)
203 continue;
204 spc->spc_psid = PS_NONE;
205 }
206 /* Mark that processor-set is going to be destroyed */
207 pi = psets[psid - 1];
208 pi->ps_flags |= PSET_BUSY;
209 mutex_exit(&psets_lock);
210
211 /* Unmark the processor-set ID from each thread */
212 mutex_enter(&proclist_lock);
213 LIST_FOREACH(l, &alllwp, l_list) {
214 /* Safe to check and set without lock held */
215 if (l->l_psid != psid)
216 continue;
217 l->l_psid = PS_NONE;
218 }
219 mutex_exit(&proclist_lock);
220
221 /* Destroy the processor-set */
222 mutex_enter(&psets_lock);
223 psets[psid - 1] = NULL;
224 psets_count--;
225 mutex_exit(&psets_lock);
226
227 kmem_free(pi, sizeof(pset_info_t));
228 return 0;
229 }
230
231 /*
232 * General system calls for the processor-sets.
233 */
234
235 int
236 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
237 register_t *retval)
238 {
239 /* {
240 syscallarg(psetid_t) *psid;
241 } */
242 psetid_t psid;
243 int error;
244
245 /* Available only for super-user */
246 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
247 return EPERM;
248
249 error = kern_pset_create(&psid);
250 if (error)
251 return error;
252
253 error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
254 if (error)
255 (void)kern_pset_destroy(psid);
256
257 return error;
258 }
259
260 int
261 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
262 register_t *retval)
263 {
264 /* {
265 syscallarg(psetid_t) psid;
266 } */
267
268 /* Available only for super-user */
269 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
270 return EPERM;
271
272 return kern_pset_destroy(SCARG(uap, psid));
273 }
274
275 int
276 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
277 register_t *retval)
278 {
279 /* {
280 syscallarg(psetid_t) psid;
281 syscallarg(cpuid_t) cpuid;
282 syscallarg(psetid_t) *opsid;
283 } */
284 struct cpu_info *ci;
285 struct schedstate_percpu *spc;
286 psetid_t psid = SCARG(uap, psid), opsid = 0;
287 CPU_INFO_ITERATOR cii;
288 int error = 0;
289
290 /* Available only for super-user, except the case of PS_QUERY */
291 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL) &&
292 psid != PS_QUERY)
293 return EPERM;
294
295 /* Find the target CPU */
296 for (CPU_INFO_FOREACH(cii, ci))
297 if (cpu_index(ci) == SCARG(uap, cpuid))
298 break;
299 if (ci == NULL)
300 return EINVAL;
301 spc = &ci->ci_schedstate;
302
303 mutex_enter(&psets_lock);
304 error = psid_validate(psid, true);
305 if (error) {
306 mutex_exit(&psets_lock);
307 return error;
308 }
309 opsid = spc->spc_psid;
310 switch (psid) {
311 case PS_QUERY:
312 break;
313 case PS_MYID:
314 psid = curlwp->l_psid;
315 default:
316 spc->spc_psid = psid;
317 }
318 mutex_exit(&psets_lock);
319
320 if (SCARG(uap, opsid) != NULL)
321 error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
322
323 return error;
324 }
325
326 int
327 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
328 register_t *retval)
329 {
330 /* {
331 syscallarg(idtype_t) idtype;
332 syscallarg(id_t) first_id;
333 syscallarg(id_t) second_id;
334 syscallarg(psetid_t) psid;
335 syscallarg(psetid_t) *opsid;
336 } */
337 struct cpu_info *ci;
338 struct proc *p;
339 struct lwp *t;
340 id_t id1, id2;
341 pid_t pid = 0;
342 lwpid_t lid = 0;
343 psetid_t psid, opsid;
344 int error = 0, lcnt;
345
346 psid = SCARG(uap, psid);
347
348 /* Available only for super-user, except the case of PS_QUERY */
349 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL) &&
350 psid != PS_QUERY)
351 return EPERM;
352
353 mutex_enter(&psets_lock);
354 error = psid_validate(psid, true);
355 if (error) {
356 mutex_exit(&psets_lock);
357 return error;
358 }
359 if (psid == PS_MYID)
360 psid = curlwp->l_psid;
361 if (psid != PS_QUERY && psid != PS_NONE)
362 psets[psid - 1]->ps_flags |= PSET_BUSY;
363 mutex_exit(&psets_lock);
364
365 /*
366 * Get PID and LID from the ID.
367 */
368 p = l->l_proc;
369 id1 = SCARG(uap, first_id);
370 id2 = SCARG(uap, second_id);
371
372 switch (SCARG(uap, idtype)) {
373 case P_PID:
374 /*
375 * Process:
376 * First ID - PID;
377 * Second ID - ignored;
378 */
379 pid = (id1 == P_MYID) ? p->p_pid : id1;
380 lid = 0;
381 break;
382 case P_LWPID:
383 /*
384 * Thread (LWP):
385 * First ID - LID;
386 * Second ID - PID;
387 */
388 if (id1 == P_MYID) {
389 pid = p->p_pid;
390 lid = l->l_lid;
391 break;
392 }
393 lid = id1;
394 pid = (id2 == P_MYID) ? p->p_pid : id2;
395 break;
396 default:
397 return EINVAL;
398 }
399
400 /* Find the process */
401 p = p_find(pid, PFIND_UNLOCK_FAIL);
402 if (p == NULL) {
403 error = ESRCH;
404 goto error;
405 }
406 mutex_enter(&p->p_smutex);
407 mutex_exit(&proclist_lock);
408
409 /* Disallow modification of the system processes */
410 if (p->p_flag & PK_SYSTEM) {
411 mutex_exit(&p->p_smutex);
412 error = EPERM;
413 goto error;
414 }
415
416 /* Find the LWP(s) */
417 lcnt = 0;
418 ci = NULL;
419 LIST_FOREACH(t, &p->p_lwps, l_sibling) {
420 if (lid && lid != t->l_lid)
421 continue;
422 /*
423 * Bind the thread to the processor-set,
424 * take some CPU and migrate.
425 */
426 lwp_lock(t);
427 opsid = t->l_psid;
428 t->l_psid = psid;
429 ci = sched_takecpu(l);
430 /* Unlocks LWP */
431 lwp_migrate(t, ci);
432 lcnt++;
433 }
434 mutex_exit(&p->p_smutex);
435 if (lcnt == 0) {
436 error = ESRCH;
437 goto error;
438 }
439 *retval = lcnt;
440 if (SCARG(uap, opsid))
441 error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
442 error:
443 if (psid != PS_QUERY && psid != PS_NONE) {
444 mutex_enter(&psets_lock);
445 psets[psid - 1]->ps_flags &= ~PSET_BUSY;
446 mutex_exit(&psets_lock);
447 }
448 return error;
449 }
450
451 /*
452 * Sysctl nodes and initialization.
453 */
454
455 static int
456 sysctl_psets_max(SYSCTLFN_ARGS)
457 {
458 struct sysctlnode node;
459 int error, newsize;
460
461 node = *rnode;
462 node.sysctl_data = &newsize;
463
464 newsize = psets_max;
465 error = sysctl_lookup(SYSCTLFN_CALL(&node));
466 if (error || newp == NULL)
467 return error;
468
469 if (newsize <= 0)
470 return EINVAL;
471
472 sysctl_unlock();
473 error = psets_realloc(newsize);
474 sysctl_relock();
475 return error;
476 }
477
478 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
479 {
480 const struct sysctlnode *node = NULL;
481
482 sysctl_createv(clog, 0, NULL, NULL,
483 CTLFLAG_PERMANENT,
484 CTLTYPE_NODE, "kern", NULL,
485 NULL, 0, NULL, 0,
486 CTL_KERN, CTL_EOL);
487 sysctl_createv(clog, 0, NULL, &node,
488 CTLFLAG_PERMANENT,
489 CTLTYPE_NODE, "pset",
490 SYSCTL_DESCR("Processor-set options"),
491 NULL, 0, NULL, 0,
492 CTL_KERN, CTL_CREATE, CTL_EOL);
493
494 if (node == NULL)
495 return;
496
497 sysctl_createv(clog, 0, &node, NULL,
498 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
499 CTLTYPE_INT, "psets_max",
500 SYSCTL_DESCR("Maximal count of the processor-sets"),
501 sysctl_psets_max, 0, &psets_max, 0,
502 CTL_CREATE, CTL_EOL);
503 }
504