Home | History | Annotate | Line # | Download | only in kern
sysv_sem.c revision 1.79.8.2
      1 /*	$NetBSD: sysv_sem.c,v 1.79.8.2 2008/11/01 21:22:27 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center, and by Andrew Doran.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * Implementation of SVID semaphores
     35  *
     36  * Author: Daniel Boulet
     37  *
     38  * This software is provided ``AS IS'' without any warranties of any kind.
     39  */
     40 
     41 #include <sys/cdefs.h>
     42 __KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.79.8.2 2008/11/01 21:22:27 christos Exp $");
     43 
     44 #define SYSVSEM
     45 
     46 #include <sys/param.h>
     47 #include <sys/kernel.h>
     48 #include <sys/sem.h>
     49 #include <sys/sysctl.h>
     50 #include <sys/kmem.h>
     51 #include <sys/mount.h>		/* XXX for <sys/syscallargs.h> */
     52 #include <sys/syscallargs.h>
     53 #include <sys/kauth.h>
     54 
     55 /*
     56  * Memory areas:
     57  *  1st: Pool of semaphore identifiers
     58  *  2nd: Semaphores
     59  *  3rd: Conditional variables
     60  *  4th: Undo structures
     61  */
     62 struct semid_ds		*sema;
     63 static struct __sem	*sem;
     64 static kcondvar_t	*semcv;
     65 static int		*semu;
     66 
     67 static kmutex_t	semlock;
     68 static struct	sem_undo *semu_list;	/* list of active undo structures */
     69 static u_int	semtot = 0;		/* total number of semaphores */
     70 
     71 static u_int	sem_waiters = 0;	/* total number of semop waiters */
     72 static bool	sem_realloc_state;
     73 static kcondvar_t sem_realloc_cv;
     74 
     75 /* Macro to find a particular sem_undo vector */
     76 #define SEMU(s, ix)	((struct sem_undo *)(((long)s) + ix * seminfo.semusz))
     77 
     78 #ifdef SEM_DEBUG
     79 #define SEM_PRINTF(a) printf a
     80 #else
     81 #define SEM_PRINTF(a)
     82 #endif
     83 
     84 struct sem_undo *semu_alloc(struct proc *);
     85 int semundo_adjust(struct proc *, struct sem_undo **, int, int, int);
     86 void semundo_clear(int, int);
     87 
     88 void
     89 seminit(void)
     90 {
     91 	int i, sz;
     92 	vaddr_t v;
     93 
     94 	mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE);
     95 	cv_init(&sem_realloc_cv, "semrealc");
     96 	sem_realloc_state = false;
     97 
     98 	/* Allocate the wired memory for our structures */
     99 	sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
    100 	    ALIGN(seminfo.semmns * sizeof(struct __sem)) +
    101 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
    102 	    ALIGN(seminfo.semmnu * seminfo.semusz);
    103 	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
    104 	    UVM_KMF_WIRED|UVM_KMF_ZERO);
    105 	if (v == 0)
    106 		panic("sysv_sem: cannot allocate memory");
    107 	sema = (void *)v;
    108 	sem = (void *)((uintptr_t)sema +
    109 	    ALIGN(seminfo.semmni * sizeof(struct semid_ds)));
    110 	semcv = (void *)((uintptr_t)sem +
    111 	    ALIGN(seminfo.semmns * sizeof(struct __sem)));
    112 	semu = (void *)((uintptr_t)semcv +
    113 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)));
    114 
    115 	for (i = 0; i < seminfo.semmni; i++) {
    116 		sema[i]._sem_base = 0;
    117 		sema[i].sem_perm.mode = 0;
    118 		cv_init(&semcv[i], "semwait");
    119 	}
    120 	for (i = 0; i < seminfo.semmnu; i++) {
    121 		struct sem_undo *suptr = SEMU(semu, i);
    122 		suptr->un_proc = NULL;
    123 	}
    124 	semu_list = NULL;
    125 	exithook_establish(semexit, NULL);
    126 }
    127 
    128 static int
    129 semrealloc(int newsemmni, int newsemmns, int newsemmnu)
    130 {
    131 	struct semid_ds *new_sema, *old_sema;
    132 	struct __sem *new_sem;
    133 	struct sem_undo *new_semu_list, *suptr, *nsuptr;
    134 	int *new_semu;
    135 	kcondvar_t *new_semcv;
    136 	vaddr_t v;
    137 	int i, j, lsemid, nmnus, sz;
    138 
    139 	if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1)
    140 		return EINVAL;
    141 
    142 	/* Allocate the wired memory for our structures */
    143 	sz = ALIGN(newsemmni * sizeof(struct semid_ds)) +
    144 	    ALIGN(newsemmns * sizeof(struct __sem)) +
    145 	    ALIGN(newsemmni * sizeof(kcondvar_t)) +
    146 	    ALIGN(newsemmnu * seminfo.semusz);
    147 	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
    148 	    UVM_KMF_WIRED|UVM_KMF_ZERO);
    149 	if (v == 0)
    150 		return ENOMEM;
    151 
    152 	mutex_enter(&semlock);
    153 	if (sem_realloc_state) {
    154 		mutex_exit(&semlock);
    155 		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
    156 		return EBUSY;
    157 	}
    158 	sem_realloc_state = true;
    159 	if (sem_waiters) {
    160 		/*
    161 		 * Mark reallocation state, wake-up all waiters,
    162 		 * and wait while they will all exit.
    163 		 */
    164 		for (i = 0; i < seminfo.semmni; i++)
    165 			cv_broadcast(&semcv[i]);
    166 		while (sem_waiters)
    167 			cv_wait(&sem_realloc_cv, &semlock);
    168 	}
    169 	old_sema = sema;
    170 
    171 	/* Get the number of last slot */
    172 	lsemid = 0;
    173 	for (i = 0; i < seminfo.semmni; i++)
    174 		if (sema[i].sem_perm.mode & SEM_ALLOC)
    175 			lsemid = i;
    176 
    177 	/* Get the number of currently used undo structures */
    178 	nmnus = 0;
    179 	for (i = 0; i < seminfo.semmnu; i++) {
    180 		suptr = SEMU(semu, i);
    181 		if (suptr->un_proc == NULL)
    182 			continue;
    183 		nmnus++;
    184 	}
    185 
    186 	/* We cannot reallocate less memory than we use */
    187 	if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) {
    188 		mutex_exit(&semlock);
    189 		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
    190 		return EBUSY;
    191 	}
    192 
    193 	new_sema = (void *)v;
    194 	new_sem = (void *)((uintptr_t)new_sema +
    195 	    ALIGN(newsemmni * sizeof(struct semid_ds)));
    196 	new_semcv = (void *)((uintptr_t)new_sem +
    197 	    ALIGN(newsemmns * sizeof(struct __sem)));
    198 	new_semu = (void *)((uintptr_t)new_semcv +
    199 	    ALIGN(newsemmni * sizeof(kcondvar_t)));
    200 
    201 	/* Initialize all semaphore identifiers and condvars */
    202 	for (i = 0; i < newsemmni; i++) {
    203 		new_sema[i]._sem_base = 0;
    204 		new_sema[i].sem_perm.mode = 0;
    205 		cv_init(&new_semcv[i], "semwait");
    206 	}
    207 	for (i = 0; i < newsemmnu; i++) {
    208 		nsuptr = SEMU(new_semu, i);
    209 		nsuptr->un_proc = NULL;
    210 	}
    211 
    212 	/*
    213 	 * Copy all identifiers, semaphores and list of the
    214 	 * undo structures to the new memory allocation.
    215 	 */
    216 	j = 0;
    217 	for (i = 0; i <= lsemid; i++) {
    218 		if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0)
    219 			continue;
    220 		memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds));
    221 		new_sema[i]._sem_base = &new_sem[j];
    222 		memcpy(new_sema[i]._sem_base, sema[i]._sem_base,
    223 		    (sizeof(struct __sem) * sema[i].sem_nsems));
    224 		j += sema[i].sem_nsems;
    225 	}
    226 	KASSERT(j == semtot);
    227 
    228 	j = 0;
    229 	new_semu_list = NULL;
    230 	for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
    231 		KASSERT(j < newsemmnu);
    232 		nsuptr = SEMU(new_semu, j);
    233 		memcpy(nsuptr, suptr, SEMUSZ);
    234 		nsuptr->un_next = new_semu_list;
    235 		new_semu_list = nsuptr;
    236 		j++;
    237 	}
    238 
    239 	for (i = 0; i < seminfo.semmni; i++) {
    240 		KASSERT(cv_has_waiters(&semcv[i]) == false);
    241 		cv_destroy(&semcv[i]);
    242 	}
    243 
    244 	sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
    245 	    ALIGN(seminfo.semmns * sizeof(struct __sem)) +
    246 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
    247 	    ALIGN(seminfo.semmnu * seminfo.semusz);
    248 
    249 	/* Set the pointers and update the new values */
    250 	sema = new_sema;
    251 	sem = new_sem;
    252 	semcv = new_semcv;
    253 	semu = new_semu;
    254 	semu_list = new_semu_list;
    255 
    256 	seminfo.semmni = newsemmni;
    257 	seminfo.semmns = newsemmns;
    258 	seminfo.semmnu = newsemmnu;
    259 
    260 	/* Reallocation completed - notify all waiters, if any */
    261 	sem_realloc_state = false;
    262 	cv_broadcast(&sem_realloc_cv);
    263 	mutex_exit(&semlock);
    264 
    265 	uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED);
    266 	return 0;
    267 }
    268 
    269 /*
    270  * Placebo.
    271  */
    272 
    273 int
    274 sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval)
    275 {
    276 
    277 	*retval = 0;
    278 	return 0;
    279 }
    280 
    281 /*
    282  * Allocate a new sem_undo structure for a process
    283  * (returns ptr to structure or NULL if no more room)
    284  */
    285 
    286 struct sem_undo *
    287 semu_alloc(struct proc *p)
    288 {
    289 	int i;
    290 	struct sem_undo *suptr;
    291 	struct sem_undo **supptr;
    292 	int attempt;
    293 
    294 	KASSERT(mutex_owned(&semlock));
    295 
    296 	/*
    297 	 * Try twice to allocate something.
    298 	 * (we'll purge any empty structures after the first pass so
    299 	 * two passes are always enough)
    300 	 */
    301 
    302 	for (attempt = 0; attempt < 2; attempt++) {
    303 		/*
    304 		 * Look for a free structure.
    305 		 * Fill it in and return it if we find one.
    306 		 */
    307 
    308 		for (i = 0; i < seminfo.semmnu; i++) {
    309 			suptr = SEMU(semu, i);
    310 			if (suptr->un_proc == NULL) {
    311 				suptr->un_next = semu_list;
    312 				semu_list = suptr;
    313 				suptr->un_cnt = 0;
    314 				suptr->un_proc = p;
    315 				return (suptr);
    316 			}
    317 		}
    318 
    319 		/*
    320 		 * We didn't find a free one, if this is the first attempt
    321 		 * then try to free some structures.
    322 		 */
    323 
    324 		if (attempt == 0) {
    325 			/* All the structures are in use - try to free some */
    326 			int did_something = 0;
    327 
    328 			supptr = &semu_list;
    329 			while ((suptr = *supptr) != NULL) {
    330 				if (suptr->un_cnt == 0)  {
    331 					suptr->un_proc = NULL;
    332 					*supptr = suptr->un_next;
    333 					did_something = 1;
    334 				} else
    335 					supptr = &suptr->un_next;
    336 			}
    337 
    338 			/* If we didn't free anything then just give-up */
    339 			if (!did_something)
    340 				return (NULL);
    341 		} else {
    342 			/*
    343 			 * The second pass failed even though we freed
    344 			 * something after the first pass!
    345 			 * This is IMPOSSIBLE!
    346 			 */
    347 			panic("semu_alloc - second attempt failed");
    348 		}
    349 	}
    350 	return NULL;
    351 }
    352 
    353 /*
    354  * Adjust a particular entry for a particular proc
    355  */
    356 
    357 int
    358 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
    359     int adjval)
    360 {
    361 	struct sem_undo *suptr;
    362 	struct undo *sunptr;
    363 	int i;
    364 
    365 	KASSERT(mutex_owned(&semlock));
    366 
    367 	/*
    368 	 * Look for and remember the sem_undo if the caller doesn't
    369 	 * provide it
    370 	 */
    371 
    372 	suptr = *supptr;
    373 	if (suptr == NULL) {
    374 		for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
    375 			if (suptr->un_proc == p)
    376 				break;
    377 
    378 		if (suptr == NULL) {
    379 			suptr = semu_alloc(p);
    380 			if (suptr == NULL)
    381 				return (ENOSPC);
    382 		}
    383 		*supptr = suptr;
    384 	}
    385 
    386 	/*
    387 	 * Look for the requested entry and adjust it (delete if
    388 	 * adjval becomes 0).
    389 	 */
    390 	sunptr = &suptr->un_ent[0];
    391 	for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
    392 		if (sunptr->un_id != semid || sunptr->un_num != semnum)
    393 			continue;
    394 		sunptr->un_adjval += adjval;
    395 		if (sunptr->un_adjval == 0) {
    396 			suptr->un_cnt--;
    397 			if (i < suptr->un_cnt)
    398 				suptr->un_ent[i] =
    399 				    suptr->un_ent[suptr->un_cnt];
    400 		}
    401 		return (0);
    402 	}
    403 
    404 	/* Didn't find the right entry - create it */
    405 	if (suptr->un_cnt == SEMUME)
    406 		return (EINVAL);
    407 
    408 	sunptr = &suptr->un_ent[suptr->un_cnt];
    409 	suptr->un_cnt++;
    410 	sunptr->un_adjval = adjval;
    411 	sunptr->un_id = semid;
    412 	sunptr->un_num = semnum;
    413 	return (0);
    414 }
    415 
    416 void
    417 semundo_clear(int semid, int semnum)
    418 {
    419 	struct sem_undo *suptr;
    420 	struct undo *sunptr, *sunend;
    421 
    422 	KASSERT(mutex_owned(&semlock));
    423 
    424 	for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
    425 		for (sunptr = &suptr->un_ent[0],
    426 		    sunend = sunptr + suptr->un_cnt; sunptr < sunend;) {
    427 			if (sunptr->un_id == semid) {
    428 				if (semnum == -1 || sunptr->un_num == semnum) {
    429 					suptr->un_cnt--;
    430 					sunend--;
    431 					if (sunptr != sunend)
    432 						*sunptr = *sunend;
    433 					if (semnum != -1)
    434 						break;
    435 					else
    436 						continue;
    437 				}
    438 			}
    439 			sunptr++;
    440 		}
    441 }
    442 
    443 int
    444 sys_____semctl50(struct lwp *l, const struct sys_____semctl50_args *uap,
    445     register_t *retval)
    446 {
    447 	/* {
    448 		syscallarg(int) semid;
    449 		syscallarg(int) semnum;
    450 		syscallarg(int) cmd;
    451 		syscallarg(union __semun *) arg;
    452 	} */
    453 	struct semid_ds sembuf;
    454 	int cmd, error;
    455 	void *pass_arg;
    456 	union __semun karg;
    457 
    458 	cmd = SCARG(uap, cmd);
    459 
    460 	pass_arg = get_semctl_arg(cmd, &sembuf, &karg);
    461 
    462 	if (pass_arg) {
    463 		error = copyin(SCARG(uap, arg), &karg, sizeof(karg));
    464 		if (error)
    465 			return error;
    466 		if (cmd == IPC_SET) {
    467 			error = copyin(karg.buf, &sembuf, sizeof(sembuf));
    468 			if (error)
    469 				return (error);
    470 		}
    471 	}
    472 
    473 	error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd,
    474 	    pass_arg, retval);
    475 
    476 	if (error == 0 && cmd == IPC_STAT)
    477 		error = copyout(&sembuf, karg.buf, sizeof(sembuf));
    478 
    479 	return (error);
    480 }
    481 
    482 int
    483 semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v,
    484     register_t *retval)
    485 {
    486 	kauth_cred_t cred = l->l_cred;
    487 	union __semun *arg = v;
    488 	struct semid_ds *sembuf = v, *semaptr;
    489 	int i, error, ix;
    490 
    491 	SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n",
    492 	    semid, semnum, cmd, v));
    493 
    494 	mutex_enter(&semlock);
    495 
    496 	ix = IPCID_TO_IX(semid);
    497 	if (ix < 0 || ix >= seminfo.semmni) {
    498 		mutex_exit(&semlock);
    499 		return (EINVAL);
    500 	}
    501 
    502 	semaptr = &sema[ix];
    503 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    504 	    semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) {
    505 		mutex_exit(&semlock);
    506 		return (EINVAL);
    507 	}
    508 
    509 	switch (cmd) {
    510 	case IPC_RMID:
    511 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0)
    512 			break;
    513 		semaptr->sem_perm.cuid = kauth_cred_geteuid(cred);
    514 		semaptr->sem_perm.uid = kauth_cred_geteuid(cred);
    515 		semtot -= semaptr->sem_nsems;
    516 		for (i = semaptr->_sem_base - sem; i < semtot; i++)
    517 			sem[i] = sem[i + semaptr->sem_nsems];
    518 		for (i = 0; i < seminfo.semmni; i++) {
    519 			if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
    520 			    sema[i]._sem_base > semaptr->_sem_base)
    521 				sema[i]._sem_base -= semaptr->sem_nsems;
    522 		}
    523 		semaptr->sem_perm.mode = 0;
    524 		semundo_clear(ix, -1);
    525 		cv_broadcast(&semcv[ix]);
    526 		break;
    527 
    528 	case IPC_SET:
    529 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
    530 			break;
    531 		KASSERT(sembuf != NULL);
    532 		semaptr->sem_perm.uid = sembuf->sem_perm.uid;
    533 		semaptr->sem_perm.gid = sembuf->sem_perm.gid;
    534 		semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
    535 		    (sembuf->sem_perm.mode & 0777);
    536 		semaptr->sem_ctime = time_second;
    537 		break;
    538 
    539 	case IPC_STAT:
    540 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    541 			break;
    542 		KASSERT(sembuf != NULL);
    543 		memcpy(sembuf, semaptr, sizeof(struct semid_ds));
    544 		sembuf->sem_perm.mode &= 0777;
    545 		break;
    546 
    547 	case GETNCNT:
    548 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    549 			break;
    550 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    551 			error = EINVAL;
    552 			break;
    553 		}
    554 		*retval = semaptr->_sem_base[semnum].semncnt;
    555 		break;
    556 
    557 	case GETPID:
    558 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    559 			break;
    560 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    561 			error = EINVAL;
    562 			break;
    563 		}
    564 		*retval = semaptr->_sem_base[semnum].sempid;
    565 		break;
    566 
    567 	case GETVAL:
    568 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    569 			break;
    570 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    571 			error = EINVAL;
    572 			break;
    573 		}
    574 		*retval = semaptr->_sem_base[semnum].semval;
    575 		break;
    576 
    577 	case GETALL:
    578 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    579 			break;
    580 		KASSERT(arg != NULL);
    581 		for (i = 0; i < semaptr->sem_nsems; i++) {
    582 			error = copyout(&semaptr->_sem_base[i].semval,
    583 			    &arg->array[i], sizeof(arg->array[i]));
    584 			if (error != 0)
    585 				break;
    586 		}
    587 		break;
    588 
    589 	case GETZCNT:
    590 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    591 			break;
    592 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    593 			error = EINVAL;
    594 			break;
    595 		}
    596 		*retval = semaptr->_sem_base[semnum].semzcnt;
    597 		break;
    598 
    599 	case SETVAL:
    600 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
    601 			break;
    602 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    603 			error = EINVAL;
    604 			break;
    605 		}
    606 		KASSERT(arg != NULL);
    607 		if ((unsigned int)arg->val > seminfo.semvmx) {
    608 			error = ERANGE;
    609 			break;
    610 		}
    611 		semaptr->_sem_base[semnum].semval = arg->val;
    612 		semundo_clear(ix, semnum);
    613 		cv_broadcast(&semcv[ix]);
    614 		break;
    615 
    616 	case SETALL:
    617 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
    618 			break;
    619 		KASSERT(arg != NULL);
    620 		for (i = 0; i < semaptr->sem_nsems; i++) {
    621 			unsigned short semval;
    622 			error = copyin(&arg->array[i], &semval,
    623 			    sizeof(arg->array[i]));
    624 			if (error != 0)
    625 				break;
    626 			if ((unsigned int)semval > seminfo.semvmx) {
    627 				error = ERANGE;
    628 				break;
    629 			}
    630 			semaptr->_sem_base[i].semval = semval;
    631 		}
    632 		semundo_clear(ix, -1);
    633 		cv_broadcast(&semcv[ix]);
    634 		break;
    635 
    636 	default:
    637 		error = EINVAL;
    638 		break;
    639 	}
    640 
    641 	mutex_exit(&semlock);
    642 	return (error);
    643 }
    644 
    645 int
    646 sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval)
    647 {
    648 	/* {
    649 		syscallarg(key_t) key;
    650 		syscallarg(int) nsems;
    651 		syscallarg(int) semflg;
    652 	} */
    653 	int semid, error = 0;
    654 	int key = SCARG(uap, key);
    655 	int nsems = SCARG(uap, nsems);
    656 	int semflg = SCARG(uap, semflg);
    657 	kauth_cred_t cred = l->l_cred;
    658 
    659 	SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
    660 
    661 	mutex_enter(&semlock);
    662 
    663 	if (key != IPC_PRIVATE) {
    664 		for (semid = 0; semid < seminfo.semmni; semid++) {
    665 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
    666 			    sema[semid].sem_perm._key == key)
    667 				break;
    668 		}
    669 		if (semid < seminfo.semmni) {
    670 			SEM_PRINTF(("found public key\n"));
    671 			if ((error = ipcperm(cred, &sema[semid].sem_perm,
    672 			    semflg & 0700)))
    673 			    	goto out;
    674 			if (nsems > 0 && sema[semid].sem_nsems < nsems) {
    675 				SEM_PRINTF(("too small\n"));
    676 				error = EINVAL;
    677 				goto out;
    678 			}
    679 			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
    680 				SEM_PRINTF(("not exclusive\n"));
    681 				error = EEXIST;
    682 				goto out;
    683 			}
    684 			goto found;
    685 		}
    686 	}
    687 
    688 	SEM_PRINTF(("need to allocate the semid_ds\n"));
    689 	if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
    690 		if (nsems <= 0 || nsems > seminfo.semmsl) {
    691 			SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
    692 			    seminfo.semmsl));
    693 			error = EINVAL;
    694 			goto out;
    695 		}
    696 		if (nsems > seminfo.semmns - semtot) {
    697 			SEM_PRINTF(("not enough semaphores left "
    698 			    "(need %d, got %d)\n",
    699 			    nsems, seminfo.semmns - semtot));
    700 			error = ENOSPC;
    701 			goto out;
    702 		}
    703 		for (semid = 0; semid < seminfo.semmni; semid++) {
    704 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
    705 				break;
    706 		}
    707 		if (semid == seminfo.semmni) {
    708 			SEM_PRINTF(("no more semid_ds's available\n"));
    709 			error = ENOSPC;
    710 			goto out;
    711 		}
    712 		SEM_PRINTF(("semid %d is available\n", semid));
    713 		sema[semid].sem_perm._key = key;
    714 		sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred);
    715 		sema[semid].sem_perm.uid = kauth_cred_geteuid(cred);
    716 		sema[semid].sem_perm.cgid = kauth_cred_getegid(cred);
    717 		sema[semid].sem_perm.gid = kauth_cred_getegid(cred);
    718 		sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
    719 		sema[semid].sem_perm._seq =
    720 		    (sema[semid].sem_perm._seq + 1) & 0x7fff;
    721 		sema[semid].sem_nsems = nsems;
    722 		sema[semid].sem_otime = 0;
    723 		sema[semid].sem_ctime = time_second;
    724 		sema[semid]._sem_base = &sem[semtot];
    725 		semtot += nsems;
    726 		memset(sema[semid]._sem_base, 0,
    727 		    sizeof(sema[semid]._sem_base[0]) * nsems);
    728 		SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base,
    729 		    &sem[semtot]));
    730 	} else {
    731 		SEM_PRINTF(("didn't find it and wasn't asked to create it\n"));
    732 		error = ENOENT;
    733 		goto out;
    734 	}
    735 
    736  found:
    737 	*retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
    738  out:
    739 	mutex_exit(&semlock);
    740 	return (error);
    741 }
    742 
    743 #define SMALL_SOPS 8
    744 
    745 int
    746 sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval)
    747 {
    748 	/* {
    749 		syscallarg(int) semid;
    750 		syscallarg(struct sembuf *) sops;
    751 		syscallarg(size_t) nsops;
    752 	} */
    753 	struct proc *p = l->l_proc;
    754 	int semid = SCARG(uap, semid), seq;
    755 	size_t nsops = SCARG(uap, nsops);
    756 	struct sembuf small_sops[SMALL_SOPS];
    757 	struct sembuf *sops;
    758 	struct semid_ds *semaptr;
    759 	struct sembuf *sopptr = NULL;
    760 	struct __sem *semptr = NULL;
    761 	struct sem_undo *suptr = NULL;
    762 	kauth_cred_t cred = l->l_cred;
    763 	int i, error;
    764 	int do_wakeup, do_undos;
    765 
    766 	SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops));
    767 
    768 	if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) {
    769 		mutex_enter(p->p_lock);
    770 		p->p_flag |= PK_SYSVSEM;
    771 		mutex_exit(p->p_lock);
    772 	}
    773 
    774 restart:
    775 	if (nsops <= SMALL_SOPS) {
    776 		sops = small_sops;
    777 	} else if (nsops <= seminfo.semopm) {
    778 		sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP);
    779 	} else {
    780 		SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n",
    781 		    seminfo.semopm, nsops));
    782 		return (E2BIG);
    783 	}
    784 
    785 	error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0]));
    786 	if (error) {
    787 		SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error,
    788 		    SCARG(uap, sops), &sops, nsops * sizeof(sops[0])));
    789 		if (sops != small_sops)
    790 			kmem_free(sops, nsops * sizeof(*sops));
    791 		return error;
    792 	}
    793 
    794 	mutex_enter(&semlock);
    795 	/* In case of reallocation, we will wait for completion */
    796 	while (__predict_false(sem_realloc_state))
    797 		cv_wait(&sem_realloc_cv, &semlock);
    798 
    799 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
    800 	if (semid < 0 || semid >= seminfo.semmni) {
    801 		error = EINVAL;
    802 		goto out;
    803 	}
    804 
    805 	semaptr = &sema[semid];
    806 	seq = IPCID_TO_SEQ(SCARG(uap, semid));
    807 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    808 	    semaptr->sem_perm._seq != seq) {
    809 		error = EINVAL;
    810 		goto out;
    811 	}
    812 
    813 	if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
    814 		SEM_PRINTF(("error = %d from ipaccess\n", error));
    815 		goto out;
    816 	}
    817 
    818 	for (i = 0; i < nsops; i++)
    819 		if (sops[i].sem_num >= semaptr->sem_nsems) {
    820 			error = EFBIG;
    821 			goto out;
    822 		}
    823 
    824 	/*
    825 	 * Loop trying to satisfy the vector of requests.
    826 	 * If we reach a point where we must wait, any requests already
    827 	 * performed are rolled back and we go to sleep until some other
    828 	 * process wakes us up.  At this point, we start all over again.
    829 	 *
    830 	 * This ensures that from the perspective of other tasks, a set
    831 	 * of requests is atomic (never partially satisfied).
    832 	 */
    833 	do_undos = 0;
    834 
    835 	for (;;) {
    836 		do_wakeup = 0;
    837 
    838 		for (i = 0; i < nsops; i++) {
    839 			sopptr = &sops[i];
    840 			semptr = &semaptr->_sem_base[sopptr->sem_num];
    841 
    842 			SEM_PRINTF(("semop:  semaptr=%p, sem_base=%p, "
    843 			    "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
    844 			    semaptr, semaptr->_sem_base, semptr,
    845 			    sopptr->sem_num, semptr->semval, sopptr->sem_op,
    846 			    (sopptr->sem_flg & IPC_NOWAIT) ?
    847 			    "nowait" : "wait"));
    848 
    849 			if (sopptr->sem_op < 0) {
    850 				if ((int)(semptr->semval +
    851 				    sopptr->sem_op) < 0) {
    852 					SEM_PRINTF(("semop:  "
    853 					    "can't do it now\n"));
    854 					break;
    855 				} else {
    856 					semptr->semval += sopptr->sem_op;
    857 					if (semptr->semval == 0 &&
    858 					    semptr->semzcnt > 0)
    859 						do_wakeup = 1;
    860 				}
    861 				if (sopptr->sem_flg & SEM_UNDO)
    862 					do_undos = 1;
    863 			} else if (sopptr->sem_op == 0) {
    864 				if (semptr->semval > 0) {
    865 					SEM_PRINTF(("semop:  not zero now\n"));
    866 					break;
    867 				}
    868 			} else {
    869 				if (semptr->semncnt > 0)
    870 					do_wakeup = 1;
    871 				semptr->semval += sopptr->sem_op;
    872 				if (sopptr->sem_flg & SEM_UNDO)
    873 					do_undos = 1;
    874 			}
    875 		}
    876 
    877 		/*
    878 		 * Did we get through the entire vector?
    879 		 */
    880 		if (i >= nsops)
    881 			goto done;
    882 
    883 		/*
    884 		 * No ... rollback anything that we've already done
    885 		 */
    886 		SEM_PRINTF(("semop:  rollback 0 through %d\n", i - 1));
    887 		while (i-- > 0)
    888 			semaptr->_sem_base[sops[i].sem_num].semval -=
    889 			    sops[i].sem_op;
    890 
    891 		/*
    892 		 * If the request that we couldn't satisfy has the
    893 		 * NOWAIT flag set then return with EAGAIN.
    894 		 */
    895 		if (sopptr->sem_flg & IPC_NOWAIT) {
    896 			error = EAGAIN;
    897 			goto out;
    898 		}
    899 
    900 		if (sopptr->sem_op == 0)
    901 			semptr->semzcnt++;
    902 		else
    903 			semptr->semncnt++;
    904 
    905 		sem_waiters++;
    906 		SEM_PRINTF(("semop:  good night!\n"));
    907 		error = cv_wait_sig(&semcv[semid], &semlock);
    908 		SEM_PRINTF(("semop:  good morning (error=%d)!\n", error));
    909 		sem_waiters--;
    910 
    911 		/* Notify reallocator, if it is waiting */
    912 		cv_broadcast(&sem_realloc_cv);
    913 
    914 		/*
    915 		 * Make sure that the semaphore still exists
    916 		 */
    917 		if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    918 		    semaptr->sem_perm._seq != seq) {
    919 			error = EIDRM;
    920 			goto out;
    921 		}
    922 
    923 		/*
    924 		 * The semaphore is still alive.  Readjust the count of
    925 		 * waiting processes.
    926 		 */
    927 		semptr = &semaptr->_sem_base[sopptr->sem_num];
    928 		if (sopptr->sem_op == 0)
    929 			semptr->semzcnt--;
    930 		else
    931 			semptr->semncnt--;
    932 
    933 		/* In case of such state, restart the call */
    934 		if (sem_realloc_state) {
    935 			mutex_exit(&semlock);
    936 			goto restart;
    937 		}
    938 
    939 		/* Is it really morning, or was our sleep interrupted? */
    940 		if (error != 0) {
    941 			error = EINTR;
    942 			goto out;
    943 		}
    944 		SEM_PRINTF(("semop:  good morning!\n"));
    945 	}
    946 
    947 done:
    948 	/*
    949 	 * Process any SEM_UNDO requests.
    950 	 */
    951 	if (do_undos) {
    952 		for (i = 0; i < nsops; i++) {
    953 			/*
    954 			 * We only need to deal with SEM_UNDO's for non-zero
    955 			 * op's.
    956 			 */
    957 			int adjval;
    958 
    959 			if ((sops[i].sem_flg & SEM_UNDO) == 0)
    960 				continue;
    961 			adjval = sops[i].sem_op;
    962 			if (adjval == 0)
    963 				continue;
    964 			error = semundo_adjust(p, &suptr, semid,
    965 			    sops[i].sem_num, -adjval);
    966 			if (error == 0)
    967 				continue;
    968 
    969 			/*
    970 			 * Oh-Oh!  We ran out of either sem_undo's or undo's.
    971 			 * Rollback the adjustments to this point and then
    972 			 * rollback the semaphore ups and down so we can return
    973 			 * with an error with all structures restored.  We
    974 			 * rollback the undo's in the exact reverse order that
    975 			 * we applied them.  This guarantees that we won't run
    976 			 * out of space as we roll things back out.
    977 			 */
    978 			while (i-- > 0) {
    979 				if ((sops[i].sem_flg & SEM_UNDO) == 0)
    980 					continue;
    981 				adjval = sops[i].sem_op;
    982 				if (adjval == 0)
    983 					continue;
    984 				if (semundo_adjust(p, &suptr, semid,
    985 				    sops[i].sem_num, adjval) != 0)
    986 					panic("semop - can't undo undos");
    987 			}
    988 
    989 			for (i = 0; i < nsops; i++)
    990 				semaptr->_sem_base[sops[i].sem_num].semval -=
    991 				    sops[i].sem_op;
    992 
    993 			SEM_PRINTF(("error = %d from semundo_adjust\n", error));
    994 			goto out;
    995 		} /* loop through the sops */
    996 	} /* if (do_undos) */
    997 
    998 	/* We're definitely done - set the sempid's */
    999 	for (i = 0; i < nsops; i++) {
   1000 		sopptr = &sops[i];
   1001 		semptr = &semaptr->_sem_base[sopptr->sem_num];
   1002 		semptr->sempid = p->p_pid;
   1003 	}
   1004 
   1005 	/* Update sem_otime */
   1006 	semaptr->sem_otime = time_second;
   1007 
   1008 	/* Do a wakeup if any semaphore was up'd. */
   1009 	if (do_wakeup) {
   1010 		SEM_PRINTF(("semop:  doing wakeup\n"));
   1011 		cv_broadcast(&semcv[semid]);
   1012 		SEM_PRINTF(("semop:  back from wakeup\n"));
   1013 	}
   1014 	SEM_PRINTF(("semop:  done\n"));
   1015 	*retval = 0;
   1016 
   1017  out:
   1018 	mutex_exit(&semlock);
   1019 	if (sops != small_sops)
   1020 		kmem_free(sops, nsops * sizeof(*sops));
   1021 	return error;
   1022 }
   1023 
   1024 /*
   1025  * Go through the undo structures for this process and apply the
   1026  * adjustments to semaphores.
   1027  */
   1028 /*ARGSUSED*/
   1029 void
   1030 semexit(struct proc *p, void *v)
   1031 {
   1032 	struct sem_undo *suptr;
   1033 	struct sem_undo **supptr;
   1034 
   1035 	if ((p->p_flag & PK_SYSVSEM) == 0)
   1036 		return;
   1037 
   1038 	mutex_enter(&semlock);
   1039 
   1040 	/*
   1041 	 * Go through the chain of undo vectors looking for one
   1042 	 * associated with this process.
   1043 	 */
   1044 
   1045 	for (supptr = &semu_list; (suptr = *supptr) != NULL;
   1046 	    supptr = &suptr->un_next) {
   1047 		if (suptr->un_proc == p)
   1048 			break;
   1049 	}
   1050 
   1051 	/*
   1052 	 * If there is no undo vector, skip to the end.
   1053 	 */
   1054 
   1055 	if (suptr == NULL) {
   1056 		mutex_exit(&semlock);
   1057 		return;
   1058 	}
   1059 
   1060 	/*
   1061 	 * We now have an undo vector for this process.
   1062 	 */
   1063 
   1064 	SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p,
   1065 	    suptr->un_cnt));
   1066 
   1067 	/*
   1068 	 * If there are any active undo elements then process them.
   1069 	 */
   1070 	if (suptr->un_cnt > 0) {
   1071 		int ix;
   1072 
   1073 		for (ix = 0; ix < suptr->un_cnt; ix++) {
   1074 			int semid = suptr->un_ent[ix].un_id;
   1075 			int semnum = suptr->un_ent[ix].un_num;
   1076 			int adjval = suptr->un_ent[ix].un_adjval;
   1077 			struct semid_ds *semaptr;
   1078 
   1079 			semaptr = &sema[semid];
   1080 			if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
   1081 				panic("semexit - semid not allocated");
   1082 			if (semnum >= semaptr->sem_nsems)
   1083 				panic("semexit - semnum out of range");
   1084 
   1085 			SEM_PRINTF(("semexit:  %p id=%d num=%d(adj=%d) ; "
   1086 			    "sem=%d\n",
   1087 			    suptr->un_proc, suptr->un_ent[ix].un_id,
   1088 			    suptr->un_ent[ix].un_num,
   1089 			    suptr->un_ent[ix].un_adjval,
   1090 			    semaptr->_sem_base[semnum].semval));
   1091 
   1092 			if (adjval < 0 &&
   1093 			    semaptr->_sem_base[semnum].semval < -adjval)
   1094 				semaptr->_sem_base[semnum].semval = 0;
   1095 			else
   1096 				semaptr->_sem_base[semnum].semval += adjval;
   1097 
   1098 			cv_broadcast(&semcv[semid]);
   1099 			SEM_PRINTF(("semexit:  back from wakeup\n"));
   1100 		}
   1101 	}
   1102 
   1103 	/*
   1104 	 * Deallocate the undo vector.
   1105 	 */
   1106 	SEM_PRINTF(("removing vector\n"));
   1107 	suptr->un_proc = NULL;
   1108 	*supptr = suptr->un_next;
   1109 	mutex_exit(&semlock);
   1110 }
   1111 
   1112 /*
   1113  * Sysctl initialization and nodes.
   1114  */
   1115 
   1116 static int
   1117 sysctl_ipc_semmni(SYSCTLFN_ARGS)
   1118 {
   1119 	int newsize, error;
   1120 	struct sysctlnode node;
   1121 	node = *rnode;
   1122 	node.sysctl_data = &newsize;
   1123 
   1124 	newsize = seminfo.semmni;
   1125 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1126 	if (error || newp == NULL)
   1127 		return error;
   1128 
   1129 	return semrealloc(newsize, seminfo.semmns, seminfo.semmnu);
   1130 }
   1131 
   1132 static int
   1133 sysctl_ipc_semmns(SYSCTLFN_ARGS)
   1134 {
   1135 	int newsize, error;
   1136 	struct sysctlnode node;
   1137 	node = *rnode;
   1138 	node.sysctl_data = &newsize;
   1139 
   1140 	newsize = seminfo.semmns;
   1141 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1142 	if (error || newp == NULL)
   1143 		return error;
   1144 
   1145 	return semrealloc(seminfo.semmni, newsize, seminfo.semmnu);
   1146 }
   1147 
   1148 static int
   1149 sysctl_ipc_semmnu(SYSCTLFN_ARGS)
   1150 {
   1151 	int newsize, error;
   1152 	struct sysctlnode node;
   1153 	node = *rnode;
   1154 	node.sysctl_data = &newsize;
   1155 
   1156 	newsize = seminfo.semmnu;
   1157 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1158 	if (error || newp == NULL)
   1159 		return error;
   1160 
   1161 	return semrealloc(seminfo.semmni, seminfo.semmns, newsize);
   1162 }
   1163 
   1164 SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup")
   1165 {
   1166 	const struct sysctlnode *node = NULL;
   1167 
   1168 	sysctl_createv(clog, 0, NULL, NULL,
   1169 		CTLFLAG_PERMANENT,
   1170 		CTLTYPE_NODE, "kern", NULL,
   1171 		NULL, 0, NULL, 0,
   1172 		CTL_KERN, CTL_EOL);
   1173 	sysctl_createv(clog, 0, NULL, &node,
   1174 		CTLFLAG_PERMANENT,
   1175 		CTLTYPE_NODE, "ipc",
   1176 		SYSCTL_DESCR("SysV IPC options"),
   1177 		NULL, 0, NULL, 0,
   1178 		CTL_KERN, KERN_SYSVIPC, CTL_EOL);
   1179 
   1180 	if (node == NULL)
   1181 		return;
   1182 
   1183 	sysctl_createv(clog, 0, &node, NULL,
   1184 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1185 		CTLTYPE_INT, "semmni",
   1186 		SYSCTL_DESCR("Max number of number of semaphore identifiers"),
   1187 		sysctl_ipc_semmni, 0, &seminfo.semmni, 0,
   1188 		CTL_CREATE, CTL_EOL);
   1189 	sysctl_createv(clog, 0, &node, NULL,
   1190 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1191 		CTLTYPE_INT, "semmns",
   1192 		SYSCTL_DESCR("Max number of number of semaphores in system"),
   1193 		sysctl_ipc_semmns, 0, &seminfo.semmns, 0,
   1194 		CTL_CREATE, CTL_EOL);
   1195 	sysctl_createv(clog, 0, &node, NULL,
   1196 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1197 		CTLTYPE_INT, "semmnu",
   1198 		SYSCTL_DESCR("Max number of undo structures in system"),
   1199 		sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0,
   1200 		CTL_CREATE, CTL_EOL);
   1201 }
   1202