Home | History | Annotate | Line # | Download | only in kern
sys_pset.c revision 1.1
      1 /*	$NetBSD: sys_pset.c,v 1.1 2008/01/15 03:41:49 rmind Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * Implementation of the Processor Sets.
     31  *
     32  * Locking
     33  *  The array of the processor-set structures and its members are protected
     34  *  by the global psets_lock.  Note that in scheduler, the very l_psid value
     35  *  might be used without lock held.
     36  */
     37 
     38 #include <sys/cdefs.h>
     39 __KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.1 2008/01/15 03:41:49 rmind Exp $");
     40 
     41 #include <sys/param.h>
     42 
     43 #include <sys/cpu.h>
     44 #include <sys/kauth.h>
     45 #include <sys/kmem.h>
     46 #include <sys/lwp.h>
     47 #include <sys/mutex.h>
     48 #include <sys/proc.h>
     49 #include <sys/pset.h>
     50 #include <sys/sched.h>
     51 #include <sys/syscallargs.h>
     52 #include <sys/sysctl.h>
     53 #include <sys/systm.h>
     54 #include <sys/types.h>
     55 
     56 static pset_info_t **	psets;
     57 static kmutex_t		psets_lock;
     58 static u_int		psets_max;
     59 static u_int		psets_count;
     60 
     61 static int	psets_realloc(int);
     62 static int	psid_validate(psetid_t, bool);
     63 static int	kern_pset_create(psetid_t *);
     64 static int	kern_pset_destroy(psetid_t);
     65 
     66 /*
     67  * Initialization of the processor-sets.
     68  */
     69 void
     70 psets_init(void)
     71 {
     72 
     73 	psets_max = max(MAXCPUS, 32);
     74 	psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
     75 	mutex_init(&psets_lock, MUTEX_DEFAULT, IPL_NONE);
     76 	psets_count = 0;
     77 }
     78 
     79 /*
     80  * Reallocate the array of the processor-set structures.
     81  */
     82 static int
     83 psets_realloc(int new_psets_max)
     84 {
     85 	pset_info_t **new_psets, **old_psets;
     86 	const u_int newsize = new_psets_max * sizeof(void *);
     87 	u_int i, oldsize;
     88 
     89 	if (new_psets_max < 1)
     90 		return EINVAL;
     91 
     92 	new_psets = kmem_zalloc(newsize, KM_SLEEP);
     93 	mutex_enter(&psets_lock);
     94 	old_psets = psets;
     95 	oldsize = psets_max * sizeof(void *);
     96 
     97 	/* Check if we can lower the size of the array */
     98 	if (new_psets_max < psets_max) {
     99 		for (i = new_psets_max; i < psets_max; i++) {
    100 			if (psets[i] == NULL)
    101 				continue;
    102 			mutex_exit(&psets_lock);
    103 			kmem_free(new_psets, newsize);
    104 			return EBUSY;
    105 		}
    106 	}
    107 
    108 	/* Copy all pointers to the new array */
    109 	memcpy(new_psets, psets, newsize);
    110 	psets_max = new_psets_max;
    111 	psets = new_psets;
    112 	mutex_exit(&psets_lock);
    113 
    114 	kmem_free(old_psets, oldsize);
    115 	return 0;
    116 }
    117 
    118 /*
    119  * Validate processor-set ID.
    120  */
    121 static int
    122 psid_validate(psetid_t psid, bool chkps)
    123 {
    124 
    125 	KASSERT(mutex_owned(&psets_lock));
    126 
    127 	if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
    128 		return 0;
    129 	if (psid <= 0 || psid > psets_max)
    130 		return EINVAL;
    131 	if (psets[psid - 1] == NULL)
    132 		return EINVAL;
    133 	if (psets[psid - 1]->ps_flags & PSET_BUSY)
    134 		return EBUSY;
    135 
    136 	return 0;
    137 }
    138 
    139 /*
    140  * Create a processor-set.
    141  */
    142 static int
    143 kern_pset_create(psetid_t *psid)
    144 {
    145 	pset_info_t *pi;
    146 	u_int i;
    147 
    148 	if (psets_count == psets_max)
    149 		return ENOMEM;
    150 
    151 	pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);
    152 
    153 	mutex_enter(&psets_lock);
    154 	if (psets_count == psets_max) {
    155 		mutex_exit(&psets_lock);
    156 		kmem_free(pi, sizeof(pset_info_t));
    157 		return ENOMEM;
    158 	}
    159 
    160 	/* Find a free entry in the array */
    161 	for (i = 0; i < psets_max; i++)
    162 		if (psets[i] == NULL)
    163 			break;
    164 	KASSERT(i != psets_max);
    165 
    166 	psets[i] = pi;
    167 	psets_count++;
    168 	mutex_exit(&psets_lock);
    169 
    170 	*psid = i + 1;
    171 	return 0;
    172 }
    173 
    174 /*
    175  * Destroy a processor-set.
    176  */
    177 static int
    178 kern_pset_destroy(psetid_t psid)
    179 {
    180 	struct cpu_info *ci;
    181 	pset_info_t *pi;
    182 	struct lwp *l;
    183 	CPU_INFO_ITERATOR cii;
    184 	int error;
    185 
    186 	mutex_enter(&psets_lock);
    187 	if (psid == PS_MYID) {
    188 		/* Use caller's processor-set ID */
    189 		psid = curlwp->l_psid;
    190 	}
    191 	error = psid_validate(psid, false);
    192 	if (error) {
    193 		mutex_exit(&psets_lock);
    194 		return error;
    195 	}
    196 
    197 	/* Release the processor-set from all CPUs */
    198 	for (CPU_INFO_FOREACH(cii, ci)) {
    199 		struct schedstate_percpu *spc;
    200 
    201 		spc = &ci->ci_schedstate;
    202 		if (spc->spc_psid != psid)
    203 			continue;
    204 		spc->spc_psid = PS_NONE;
    205 	}
    206 	/* Mark that processor-set is going to be destroyed */
    207 	pi = psets[psid - 1];
    208 	pi->ps_flags |= PSET_BUSY;
    209 	mutex_exit(&psets_lock);
    210 
    211 	/* Unmark the processor-set ID from each thread */
    212 	mutex_enter(&proclist_lock);
    213 	LIST_FOREACH(l, &alllwp, l_list) {
    214 		/* Safe to check and set without lock held */
    215 		if (l->l_psid != psid)
    216 			continue;
    217 		l->l_psid = PS_NONE;
    218 	}
    219 	mutex_exit(&proclist_lock);
    220 
    221 	/* Destroy the processor-set */
    222 	mutex_enter(&psets_lock);
    223 	psets[psid - 1] = NULL;
    224 	psets_count--;
    225 	mutex_exit(&psets_lock);
    226 
    227 	kmem_free(pi, sizeof(pset_info_t));
    228 	return 0;
    229 }
    230 
    231 /*
    232  * General system calls for the processor-sets.
    233  */
    234 
    235 int
    236 sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
    237     register_t *retval)
    238 {
    239 	/* {
    240 		syscallarg(psetid_t) *psid;
    241 	} */
    242 	psetid_t psid;
    243 	int error;
    244 
    245 	/* Available only for super-user */
    246 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
    247 		return EPERM;
    248 
    249 	error = kern_pset_create(&psid);
    250 	if (error)
    251 		return error;
    252 
    253 	error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
    254 	if (error)
    255 		(void)kern_pset_destroy(psid);
    256 
    257 	return error;
    258 }
    259 
    260 int
    261 sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
    262     register_t *retval)
    263 {
    264 	/* {
    265 		syscallarg(psetid_t) psid;
    266 	} */
    267 
    268 	/* Available only for super-user */
    269 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL))
    270 		return EPERM;
    271 
    272 	return kern_pset_destroy(SCARG(uap, psid));
    273 }
    274 
    275 int
    276 sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
    277     register_t *retval)
    278 {
    279 	/* {
    280 		syscallarg(psetid_t) psid;
    281 		syscallarg(cpuid_t) cpuid;
    282 		syscallarg(psetid_t) *opsid;
    283 	} */
    284 	struct cpu_info *ci;
    285 	struct schedstate_percpu *spc;
    286 	psetid_t psid = SCARG(uap, psid), opsid = 0;
    287 	CPU_INFO_ITERATOR cii;
    288 	int error = 0;
    289 
    290 	/* Available only for super-user, except the case of PS_QUERY */
    291 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL) &&
    292 	    psid != PS_QUERY)
    293 		return EPERM;
    294 
    295 	/* Find the target CPU */
    296 	for (CPU_INFO_FOREACH(cii, ci))
    297 		if (cpu_index(ci) == SCARG(uap, cpuid))
    298 			break;
    299 	if (ci == NULL)
    300 		return EINVAL;
    301 	spc = &ci->ci_schedstate;
    302 
    303 	mutex_enter(&psets_lock);
    304 	error = psid_validate(psid, true);
    305 	if (error) {
    306 		mutex_exit(&psets_lock);
    307 		return error;
    308 	}
    309 	opsid = spc->spc_psid;
    310 	switch (psid) {
    311 	case PS_QUERY:
    312 		break;
    313 	case PS_MYID:
    314 		psid = curlwp->l_psid;
    315 	default:
    316 		spc->spc_psid = psid;
    317 	}
    318 	mutex_exit(&psets_lock);
    319 
    320 	if (SCARG(uap, opsid) != NULL)
    321 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
    322 
    323 	return error;
    324 }
    325 
    326 int
    327 sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
    328     register_t *retval)
    329 {
    330 	/* {
    331 		syscallarg(idtype_t) idtype;
    332 		syscallarg(id_t) first_id;
    333 		syscallarg(id_t) second_id;
    334 		syscallarg(psetid_t) psid;
    335 		syscallarg(psetid_t) *opsid;
    336 	} */
    337 	struct cpu_info *ci;
    338 	struct proc *p;
    339 	struct lwp *t;
    340 	id_t id1, id2;
    341 	pid_t pid = 0;
    342 	lwpid_t lid = 0;
    343 	psetid_t psid, opsid;
    344 	int error = 0, lcnt;
    345 
    346 	psid = SCARG(uap, psid);
    347 
    348 	/* Available only for super-user, except the case of PS_QUERY */
    349 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL) &&
    350 	    psid != PS_QUERY)
    351 		return EPERM;
    352 
    353 	mutex_enter(&psets_lock);
    354 	error = psid_validate(psid, true);
    355 	if (error) {
    356 		mutex_exit(&psets_lock);
    357 		return error;
    358 	}
    359 	if (psid == PS_MYID)
    360 		psid = curlwp->l_psid;
    361 	if (psid != PS_QUERY && psid != PS_NONE)
    362 		psets[psid - 1]->ps_flags |= PSET_BUSY;
    363 	mutex_exit(&psets_lock);
    364 
    365 	/*
    366 	 * Get PID and LID from the ID.
    367 	 */
    368 	p = l->l_proc;
    369 	id1 = SCARG(uap, first_id);
    370 	id2 = SCARG(uap, second_id);
    371 
    372 	switch (SCARG(uap, idtype)) {
    373 	case P_PID:
    374 		/*
    375 		 * Process:
    376 		 *  First ID	- PID;
    377 		 *  Second ID	- ignored;
    378 		 */
    379 		pid = (id1 == P_MYID) ? p->p_pid : id1;
    380 		lid = 0;
    381 		break;
    382 	case P_LWPID:
    383 		/*
    384 		 * Thread (LWP):
    385 		 *  First ID	- LID;
    386 		 *  Second ID	- PID;
    387 		 */
    388 		if (id1 == P_MYID) {
    389 			pid = p->p_pid;
    390 			lid = l->l_lid;
    391 			break;
    392 		}
    393 		lid = id1;
    394 		pid = (id2 == P_MYID) ? p->p_pid : id2;
    395 		break;
    396 	default:
    397 		return EINVAL;
    398 	}
    399 
    400 	/* Find the process */
    401 	p = p_find(pid, PFIND_UNLOCK_FAIL);
    402 	if (p == NULL) {
    403 		error = ESRCH;
    404 		goto error;
    405 	}
    406 	mutex_enter(&p->p_smutex);
    407 	mutex_exit(&proclist_lock);
    408 
    409 	/* Disallow modification of the system processes */
    410 	if (p->p_flag & PK_SYSTEM) {
    411 		mutex_exit(&p->p_smutex);
    412 		error = EPERM;
    413 		goto error;
    414 	}
    415 
    416 	/* Find the LWP(s) */
    417 	lcnt = 0;
    418 	ci = NULL;
    419 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
    420 		if (lid && lid != t->l_lid)
    421 			continue;
    422 		/*
    423 		 * Bind the thread to the processor-set,
    424 		 * take some CPU and migrate.
    425 		 */
    426 		lwp_lock(t);
    427 		opsid = t->l_psid;
    428 		t->l_psid = psid;
    429 		ci = sched_takecpu(l);
    430 		/* Unlocks LWP */
    431 		lwp_migrate(t, ci);
    432 		lcnt++;
    433 	}
    434 	mutex_exit(&p->p_smutex);
    435 	if (lcnt == 0) {
    436 		error = ESRCH;
    437 		goto error;
    438 	}
    439 	*retval = lcnt;
    440 	if (SCARG(uap, opsid))
    441 		error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
    442 error:
    443 	if (psid != PS_QUERY && psid != PS_NONE) {
    444 		mutex_enter(&psets_lock);
    445 		psets[psid - 1]->ps_flags &= ~PSET_BUSY;
    446 		mutex_exit(&psets_lock);
    447 	}
    448 	return error;
    449 }
    450 
    451 /*
    452  * Sysctl nodes and initialization.
    453  */
    454 
    455 static int
    456 sysctl_psets_max(SYSCTLFN_ARGS)
    457 {
    458 	struct sysctlnode node;
    459 	int error, newsize;
    460 
    461 	node = *rnode;
    462 	node.sysctl_data = &newsize;
    463 
    464 	newsize = psets_max;
    465 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    466 	if (error || newp == NULL)
    467 		return error;
    468 
    469 	if (newsize <= 0)
    470 		return EINVAL;
    471 
    472 	sysctl_unlock();
    473 	error = psets_realloc(newsize);
    474 	sysctl_relock();
    475 	return error;
    476 }
    477 
    478 SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
    479 {
    480 	const struct sysctlnode *node = NULL;
    481 
    482 	sysctl_createv(clog, 0, NULL, NULL,
    483 		CTLFLAG_PERMANENT,
    484 		CTLTYPE_NODE, "kern", NULL,
    485 		NULL, 0, NULL, 0,
    486 		CTL_KERN, CTL_EOL);
    487 	sysctl_createv(clog, 0, NULL, &node,
    488 		CTLFLAG_PERMANENT,
    489 		CTLTYPE_NODE, "pset",
    490 		SYSCTL_DESCR("Processor-set options"),
    491 		NULL, 0, NULL, 0,
    492 		CTL_KERN, CTL_CREATE, CTL_EOL);
    493 
    494 	if (node == NULL)
    495 		return;
    496 
    497 	sysctl_createv(clog, 0, &node, NULL,
    498 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    499 		CTLTYPE_INT, "psets_max",
    500 		SYSCTL_DESCR("Maximal count of the processor-sets"),
    501 		sysctl_psets_max, 0, &psets_max, 0,
    502 		CTL_CREATE, CTL_EOL);
    503 }
    504