Home | History | Annotate | Line # | Download | only in kern
sys_pset.c revision 1.6
      1 /*	$NetBSD: sys_pset.c,v 1.6 2008/04/24 18:39:24 ad Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * Implementation of the Processor Sets.
     31  *
     32  * Locking
     33  *  The array of the processor-set structures and its members are protected
     34  *  by the global psets_lock.  Note that in scheduler, the very l_psid value
     35  *  might be used without lock held.
     36  */
     37 
     38 #include <sys/cdefs.h>
     39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.6 2008/04/24 18:39:24 ad Exp $");
     40 
     41 #include <sys/param.h>
     42 
     43 #include <sys/cpu.h>
     44 #include <sys/kauth.h>
     45 #include <sys/kmem.h>
     46 #include <sys/lwp.h>
     47 #include <sys/mutex.h>
     48 #include <sys/proc.h>
     49 #include <sys/pset.h>
     50 #include <sys/sched.h>
     51 #include <sys/syscallargs.h>
     52 #include <sys/sysctl.h>
     53 #include <sys/systm.h>
     54 #include <sys/types.h>
     55 
     56 static pset_info_t **	psets;
     57 static kmutex_t		psets_lock;
     58 static u_int		psets_max;
     59 static u_int		psets_count;
     60 
     61 static int	psets_realloc(int);
     62 static int	psid_validate(psetid_t, bool);
     63 static int	kern_pset_create(psetid_t *);
     64 static int	kern_pset_destroy(psetid_t);
     65 
     66 /*
     67  * Initialization of the processor-sets.
     68  */
     69 void
     70 psets_init(void)
     71 {
     72 
     73 	psets_max = max(MAXCPUS, 32);
     74 	psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
     75 	mutex_init(&psets_lock, MUTEX_DEFAULT, IPL_NONE);
     76 	psets_count = 0;
     77 }
     78 
     79 /*
     80  * Reallocate the array of the processor-set structures.
     81  */
     82 static int
     83 psets_realloc(int new_psets_max)
     84 {
     85 	pset_info_t **new_psets, **old_psets;
     86 	const u_int newsize = new_psets_max * sizeof(void *);
     87 	u_int i, oldsize;
     88 
     89 	if (new_psets_max < 1)
     90 		return EINVAL;
     91 
     92 	new_psets = kmem_zalloc(newsize, KM_SLEEP);
     93 	mutex_enter(&psets_lock);
     94 	old_psets = psets;
     95 	oldsize = psets_max * sizeof(void *);
     96 
     97 	/* Check if we can lower the size of the array */
     98 	if (new_psets_max < psets_max) {
     99 		for (i = new_psets_max; i < psets_max; i++) {
    100 			if (psets[i] == NULL)
    101 				continue;
    102 			mutex_exit(&psets_lock);
    103 			kmem_free(new_psets, newsize);
    104 			return EBUSY;
    105 		}
    106 	}
    107 
    108 	/* Copy all pointers to the new array */
    109 	memcpy(new_psets, psets, newsize);
    110 	psets_max = new_psets_max;
    111 	psets = new_psets;
    112 	mutex_exit(&psets_lock);
    113 
    114 	kmem_free(old_psets, oldsize);
    115 	return 0;
    116 }
    117 
    118 /*
    119  * Validate processor-set ID.
    120  */
    121 static int
    122 psid_validate(psetid_t psid, bool chkps)
    123 {
    124 
    125 	KASSERT(mutex_owned(&psets_lock));
    126 
    127 	if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
    128 		return 0;
    129 	if (psid <= 0 || psid > psets_max)
    130 		return EINVAL;
    131 	if (psets[psid - 1] == NULL)
    132 		return EINVAL;
    133 	if (psets[psid - 1]->ps_flags & PSET_BUSY)
    134 		return EBUSY;
    135 
    136 	return 0;
    137 }
    138 
    139 /*
    140  * Create a processor-set.
    141  */
    142 static int
    143 kern_pset_create(psetid_t *psid)
    144 {
    145 	pset_info_t *pi;
    146 	u_int i;
    147 
    148 	if (psets_count == psets_max)
    149 		return ENOMEM;
    150 
    151 	pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
    152 
    153 	mutex_enter(&psets_lock);
    154 	if (psets_count == psets_max) {
    155 		mutex_exit(&psets_lock);
    156 		kmem_free(pi, sizeof(pset_info_t));
    157 		return ENOMEM;
    158 	}
    159 
    160 	/* Find a free entry in the array */
    161 	for (i = 0; i < psets_max; i++)
    162 		if (psets[i] == NULL)
    163 			break;
    164 	KASSERT(i != psets_max);
    165 
    166 	psets[i] = pi;
    167 	psets_count++;
    168 	mutex_exit(&psets_lock);
    169 
    170 	*psid = i + 1;
    171 	return 0;
    172 }
    173 
    174 /*
    175  * Destroy a processor-set.
    176  */
    177 static int
    178 kern_pset_destroy(psetid_t psid)
    179 {
    180 	struct cpu_info *ci;
    181 	pset_info_t *pi;
    182 	struct lwp *l;
    183 	CPU_INFO_ITERATOR cii;
    184 	int error;
    185 
    186 	mutex_enter(&psets_lock);
    187 	if (psid == PS_MYID) {
    188 		/* Use caller's processor-set ID */
    189 		psid = curlwp->l_psid;
    190 	}
    191 	error = psid_validate(psid, false);
    192 	if (error) {
    193 		mutex_exit(&psets_lock);
    194 		return error;
    195 	}
    196 
    197 	/* Release the processor-set from all CPUs */
    198 	for (CPU_INFO_FOREACH(cii, ci)) {
    199 		struct schedstate_percpu *spc;
    200 
    201 		spc = &ci->ci_schedstate;
    202 		if (spc->spc_psid != psid)
    203 			continue;
    204 		spc->spc_psid = PS_NONE;
    205 	}
    206 	/* Mark that processor-set is going to be destroyed */
    207 	pi = psets[psid - 1];
    208 	pi->ps_flags |= PSET_BUSY;
    209 	mutex_exit(&psets_lock);
    210 
    211 	/* Unmark the processor-set ID from each thread */
    212 	mutex_enter(proc_lock);
    213 	LIST_FOREACH(l, &alllwp, l_list) {
    214 		/* Safe to check and set without lock held */
    215 		if (l->l_psid != psid)
    216 			continue;
    217 		l->l_psid = PS_NONE;
    218 	}
    219 	mutex_exit(proc_lock);
    220 
    221 	/* Destroy the processor-set */
    222 	mutex_enter(&psets_lock);
    223 	psets[psid - 1] = NULL;
    224 	psets_count--;
    225 	mutex_exit(&psets_lock);
    226 
    227 	kmem_free(pi, sizeof(pset_info_t));
    228 	return 0;
    229 }
    230 
    231 /*
    232  * General system calls for the processor-sets.
    233  */
    234 
    235 int
    236 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
    237     register_t *retval)
    238 {
    239 	/* {
    240 		syscallarg(psetid_t) *psid;
    241 	} */
    242 	psetid_t psid;
    243 	int error;
    244 
    245 	/* Available only for super-user */
    246 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
    247 	    KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
    248 		return EPERM;
    249 
    250 	error = kern_pset_create(&psid);
    251 	if (error)
    252 		return error;
    253 
    254 	error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
    255 	if (error)
    256 		(void)kern_pset_destroy(psid);
    257 
    258 	return error;
    259 }
    260 
    261 int
    262 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
    263     register_t *retval)
    264 {
    265 	/* {
    266 		syscallarg(psetid_t) psid;
    267 	} */
    268 
    269 	/* Available only for super-user */
    270 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
    271 	    KAUTH_REQ_SYSTEM_PSET_DESTROY,
    272 	    KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
    273 		return EPERM;
    274 
    275 	return kern_pset_destroy(SCARG(uap, psid));
    276 }
    277 
    278 int
    279 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
    280     register_t *retval)
    281 {
    282 	/* {
    283 		syscallarg(psetid_t) psid;
    284 		syscallarg(cpuid_t) cpuid;
    285 		syscallarg(psetid_t) *opsid;
    286 	} */
    287 	struct cpu_info *ci;
    288 	struct schedstate_percpu *spc;
    289 	psetid_t psid = SCARG(uap, psid), opsid = 0;
    290 	CPU_INFO_ITERATOR cii;
    291 	int error = 0;
    292 
    293 	/* Available only for super-user, except the case of PS_QUERY */
    294 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
    295 	    KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
    296 	    NULL))
    297 		return EPERM;
    298 
    299 	/* Find the target CPU */
    300 	for (CPU_INFO_FOREACH(cii, ci))
    301 		if (cpu_index(ci) == SCARG(uap, cpuid))
    302 			break;
    303 	if (ci == NULL)
    304 		return EINVAL;
    305 	spc = &ci->ci_schedstate;
    306 
    307 	mutex_enter(&psets_lock);
    308 	error = psid_validate(psid, true);
    309 	if (error) {
    310 		mutex_exit(&psets_lock);
    311 		return error;
    312 	}
    313 	opsid = spc->spc_psid;
    314 	switch (psid) {
    315 	case PS_QUERY:
    316 		break;
    317 	case PS_MYID:
    318 		psid = curlwp->l_psid;
    319 	default:
    320 		spc->spc_psid = psid;
    321 	}
    322 	mutex_exit(&psets_lock);
    323 
    324 	if (SCARG(uap, opsid) != NULL)
    325 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
    326 
    327 	return error;
    328 }
    329 
    330 int
    331 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
    332     register_t *retval)
    333 {
    334 	/* {
    335 		syscallarg(idtype_t) idtype;
    336 		syscallarg(id_t) first_id;
    337 		syscallarg(id_t) second_id;
    338 		syscallarg(psetid_t) psid;
    339 		syscallarg(psetid_t) *opsid;
    340 	} */
    341 	struct cpu_info *ci;
    342 	struct proc *p;
    343 	struct lwp *t;
    344 	id_t id1, id2;
    345 	pid_t pid = 0;
    346 	lwpid_t lid = 0;
    347 	psetid_t psid, opsid;
    348 	int error = 0, lcnt;
    349 
    350 	psid = SCARG(uap, psid);
    351 
    352 	/* Available only for super-user, except the case of PS_QUERY */
    353 	if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
    354 	    KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
    355 	    NULL))
    356 		return EPERM;
    357 
    358 	mutex_enter(&psets_lock);
    359 	error = psid_validate(psid, true);
    360 	if (error) {
    361 		mutex_exit(&psets_lock);
    362 		return error;
    363 	}
    364 	if (psid == PS_MYID)
    365 		psid = curlwp->l_psid;
    366 	if (psid != PS_QUERY && psid != PS_NONE)
    367 		psets[psid - 1]->ps_flags |= PSET_BUSY;
    368 	mutex_exit(&psets_lock);
    369 
    370 	/*
    371 	 * Get PID and LID from the ID.
    372 	 */
    373 	p = l->l_proc;
    374 	id1 = SCARG(uap, first_id);
    375 	id2 = SCARG(uap, second_id);
    376 
    377 	switch (SCARG(uap, idtype)) {
    378 	case P_PID:
    379 		/*
    380 		 * Process:
    381 		 *  First ID	- PID;
    382 		 *  Second ID	- ignored;
    383 		 */
    384 		pid = (id1 == P_MYID) ? p->p_pid : id1;
    385 		lid = 0;
    386 		break;
    387 	case P_LWPID:
    388 		/*
    389 		 * Thread (LWP):
    390 		 *  First ID	- LID;
    391 		 *  Second ID	- PID;
    392 		 */
    393 		if (id1 == P_MYID) {
    394 			pid = p->p_pid;
    395 			lid = l->l_lid;
    396 			break;
    397 		}
    398 		lid = id1;
    399 		pid = (id2 == P_MYID) ? p->p_pid : id2;
    400 		break;
    401 	default:
    402 		error = EINVAL;
    403 		goto error;
    404 	}
    405 
    406 	/* Find the process */
    407 	mutex_enter(proc_lock);
    408 	p = p_find(pid, PFIND_LOCKED);
    409 	if (p == NULL) {
    410 		mutex_exit(proc_lock);
    411 		error = ESRCH;
    412 		goto error;
    413 	}
    414 	mutex_enter(p->p_lock);
    415 	mutex_exit(proc_lock);
    416 
    417 	/* Disallow modification of the system processes */
    418 	if (p->p_flag & PK_SYSTEM) {
    419 		mutex_exit(p->p_lock);
    420 		error = EPERM;
    421 		goto error;
    422 	}
    423 
    424 	/* Find the LWP(s) */
    425 	lcnt = 0;
    426 	ci = NULL;
    427 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    428 		if (lid && lid != t->l_lid)
    429 			continue;
    430 		/*
    431 		 * Bind the thread to the processor-set,
    432 		 * take some CPU and migrate.
    433 		 */
    434 		lwp_lock(t);
    435 		opsid = t->l_psid;
    436 		t->l_psid = psid;
    437 		ci = sched_takecpu(l);
    438 		/* Unlocks LWP */
    439 		lwp_migrate(t, ci);
    440 		lcnt++;
    441 	}
    442 	mutex_exit(p->p_lock);
    443 	if (lcnt == 0) {
    444 		error = ESRCH;
    445 		goto error;
    446 	}
    447 	if (SCARG(uap, opsid))
    448 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
    449 error:
    450 	if (psid != PS_QUERY && psid != PS_NONE) {
    451 		mutex_enter(&psets_lock);
    452 		psets[psid - 1]->ps_flags &= ~PSET_BUSY;
    453 		mutex_exit(&psets_lock);
    454 	}
    455 	return error;
    456 }
    457 
    458 /*
    459  * Sysctl nodes and initialization.
    460  */
    461 
    462 static int
    463 sysctl_psets_max(SYSCTLFN_ARGS)
    464 {
    465 	struct sysctlnode node;
    466 	int error, newsize;
    467 
    468 	node = *rnode;
    469 	node.sysctl_data = &newsize;
    470 
    471 	newsize = psets_max;
    472 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    473 	if (error || newp == NULL)
    474 		return error;
    475 
    476 	if (newsize <= 0)
    477 		return EINVAL;
    478 
    479 	sysctl_unlock();
    480 	error = psets_realloc(newsize);
    481 	sysctl_relock();
    482 	return error;
    483 }
    484 
    485 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
    486 {
    487 	const struct sysctlnode *node = NULL;
    488 
    489 	sysctl_createv(clog, 0, NULL, NULL,
    490 		CTLFLAG_PERMANENT,
    491 		CTLTYPE_NODE, "kern", NULL,
    492 		NULL, 0, NULL, 0,
    493 		CTL_KERN, CTL_EOL);
    494 	sysctl_createv(clog, 0, NULL, &node,
    495 		CTLFLAG_PERMANENT,
    496 		CTLTYPE_NODE, "pset",
    497 		SYSCTL_DESCR("Processor-set options"),
    498 		NULL, 0, NULL, 0,
    499 		CTL_KERN, CTL_CREATE, CTL_EOL);
    500 
    501 	if (node == NULL)
    502 		return;
    503 
    504 	sysctl_createv(clog, 0, &node, NULL,
    505 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    506 		CTLTYPE_INT, "psets_max",
    507 		SYSCTL_DESCR("Maximal count of the processor-sets"),
    508 		sysctl_psets_max, 0, &psets_max, 0,
    509 		CTL_CREATE, CTL_EOL);
    510 }
    511