Home | History | Annotate | Line # | Download | only in kern
sysv_sem.c revision 1.83
      1 /*	$NetBSD: sysv_sem.c,v 1.83 2008/05/06 20:25:09 njoly Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center, and by Andrew Doran.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * Implementation of SVID semaphores
     35  *
     36  * Author: Daniel Boulet
     37  *
     38  * This software is provided ``AS IS'' without any warranties of any kind.
     39  */
     40 
     41 #include <sys/cdefs.h>
     42 __KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.83 2008/05/06 20:25:09 njoly Exp $");
     43 
     44 #define SYSVSEM
     45 
     46 #include <sys/param.h>
     47 #include <sys/kernel.h>
     48 #include <sys/sem.h>
     49 #include <sys/sysctl.h>
     50 #include <sys/kmem.h>
     51 #include <sys/mount.h>		/* XXX for <sys/syscallargs.h> */
     52 #include <sys/syscallargs.h>
     53 #include <sys/kauth.h>
     54 
     55 /*
     56  * Memory areas:
     57  *  1st: Pool of semaphore identifiers
     58  *  2nd: Semaphores
     59  *  3rd: Conditional variables
     60  *  4th: Undo structures
     61  */
     62 struct semid_ds		*sema;
     63 static struct __sem	*sem;
     64 static kcondvar_t	*semcv;
     65 static int		*semu;
     66 
     67 static kmutex_t	semlock;
     68 static struct	sem_undo *semu_list;	/* list of active undo structures */
     69 static u_int	semtot = 0;		/* total number of semaphores */
     70 
     71 static u_int	sem_waiters = 0;	/* total number of semop waiters */
     72 static bool	sem_realloc_state;
     73 static kcondvar_t sem_realloc_cv;
     74 
     75 /* Macro to find a particular sem_undo vector */
     76 #define SEMU(s, ix)	((struct sem_undo *)(((long)s) + ix * seminfo.semusz))
     77 
     78 #ifdef SEM_DEBUG
     79 #define SEM_PRINTF(a) printf a
     80 #else
     81 #define SEM_PRINTF(a)
     82 #endif
     83 
     84 struct sem_undo *semu_alloc(struct proc *);
     85 int semundo_adjust(struct proc *, struct sem_undo **, int, int, int);
     86 void semundo_clear(int, int);
     87 
     88 void
     89 seminit(void)
     90 {
     91 	int i, sz;
     92 	vaddr_t v;
     93 
     94 	mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE);
     95 	cv_init(&sem_realloc_cv, "semrealc");
     96 	sem_realloc_state = false;
     97 
     98 	/* Allocate the wired memory for our structures */
     99 	sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
    100 	    ALIGN(seminfo.semmns * sizeof(struct __sem)) +
    101 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
    102 	    ALIGN(seminfo.semmnu * seminfo.semusz);
    103 	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
    104 	    UVM_KMF_WIRED|UVM_KMF_ZERO);
    105 	if (v == 0)
    106 		panic("sysv_sem: cannot allocate memory");
    107 	sema = (void *)v;
    108 	sem = (void *)(ALIGN(sema) +
    109 	    seminfo.semmni * sizeof(struct semid_ds));
    110 	semcv = (void *)(ALIGN(sem) +
    111 	    seminfo.semmns * sizeof(struct __sem));
    112 	semu = (void *)(ALIGN(semcv) +
    113 	    seminfo.semmni * sizeof(kcondvar_t));
    114 
    115 	for (i = 0; i < seminfo.semmni; i++) {
    116 		sema[i]._sem_base = 0;
    117 		sema[i].sem_perm.mode = 0;
    118 		cv_init(&semcv[i], "semwait");
    119 	}
    120 	for (i = 0; i < seminfo.semmnu; i++) {
    121 		struct sem_undo *suptr = SEMU(semu, i);
    122 		suptr->un_proc = NULL;
    123 	}
    124 	semu_list = NULL;
    125 	exithook_establish(semexit, NULL);
    126 }
    127 
    128 static int
    129 semrealloc(int newsemmni, int newsemmns, int newsemmnu)
    130 {
    131 	struct semid_ds *new_sema, *old_sema;
    132 	struct __sem *new_sem;
    133 	struct sem_undo *new_semu_list, *suptr, *nsuptr;
    134 	int *new_semu;
    135 	kcondvar_t *new_semcv;
    136 	vaddr_t v;
    137 	int i, j, lsemid, nmnus, sz;
    138 
    139 	if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1)
    140 		return EINVAL;
    141 
    142 	/* Allocate the wired memory for our structures */
    143 	sz = ALIGN(newsemmni * sizeof(struct semid_ds)) +
    144 	    ALIGN(newsemmns * sizeof(struct __sem)) +
    145 	    ALIGN(newsemmni * sizeof(kcondvar_t)) +
    146 	    ALIGN(newsemmnu * seminfo.semusz);
    147 	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
    148 	    UVM_KMF_WIRED|UVM_KMF_ZERO);
    149 	if (v == 0)
    150 		return ENOMEM;
    151 
    152 	mutex_enter(&semlock);
    153 	if (sem_realloc_state) {
    154 		mutex_exit(&semlock);
    155 		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
    156 		return EBUSY;
    157 	}
    158 	sem_realloc_state = true;
    159 	if (sem_waiters) {
    160 		/*
    161 		 * Mark reallocation state, wake-up all waiters,
    162 		 * and wait while they will all exit.
    163 		 */
    164 		for (i = 0; i < seminfo.semmni; i++)
    165 			cv_broadcast(&semcv[i]);
    166 		while (sem_waiters)
    167 			cv_wait(&sem_realloc_cv, &semlock);
    168 	}
    169 	old_sema = sema;
    170 
    171 	/* Get the number of last slot */
    172 	lsemid = 0;
    173 	for (i = 0; i < seminfo.semmni; i++)
    174 		if (sema[i].sem_perm.mode & SEM_ALLOC)
    175 			lsemid = i;
    176 
    177 	/* Get the number of currently used undo structures */
    178 	nmnus = 0;
    179 	for (i = 0; i < seminfo.semmnu; i++) {
    180 		suptr = SEMU(semu, i);
    181 		if (suptr->un_proc == NULL)
    182 			continue;
    183 		nmnus++;
    184 	}
    185 
    186 	/* We cannot reallocate less memory than we use */
    187 	if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) {
    188 		mutex_exit(&semlock);
    189 		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
    190 		return EBUSY;
    191 	}
    192 
    193 	new_sema = (void *)v;
    194 	new_sem = (void *)(ALIGN(new_sema) +
    195 	    newsemmni * sizeof(struct semid_ds));
    196 	new_semcv = (void *)(ALIGN(new_sem) +
    197 	    newsemmns * sizeof(struct __sem));
    198 	new_semu = (void *)(ALIGN(new_semcv) +
    199 	    newsemmni * sizeof(kcondvar_t));
    200 
    201 	/* Initialize all semaphore identifiers and condvars */
    202 	for (i = 0; i < newsemmni; i++) {
    203 		new_sema[i]._sem_base = 0;
    204 		new_sema[i].sem_perm.mode = 0;
    205 		cv_init(&new_semcv[i], "semwait");
    206 	}
    207 	for (i = 0; i < newsemmnu; i++) {
    208 		nsuptr = SEMU(new_semu, i);
    209 		nsuptr->un_proc = NULL;
    210 	}
    211 
    212 	/*
    213 	 * Copy all identifiers, semaphores and list of the
    214 	 * undo structures to the new memory allocation.
    215 	 */
    216 	j = 0;
    217 	for (i = 0; i <= lsemid; i++) {
    218 		if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0)
    219 			continue;
    220 		memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds));
    221 		new_sema[i]._sem_base = &new_sem[j];
    222 		memcpy(new_sema[i]._sem_base, sema[i]._sem_base,
    223 		    (sizeof(struct __sem) * sema[i].sem_nsems));
    224 		j += sema[i].sem_nsems;
    225 	}
    226 	KASSERT(j == semtot);
    227 
    228 	j = 0;
    229 	new_semu_list = NULL;
    230 	for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
    231 		KASSERT(j < newsemmnu);
    232 		nsuptr = SEMU(new_semu, j);
    233 		memcpy(nsuptr, suptr, SEMUSZ);
    234 		nsuptr->un_next = new_semu_list;
    235 		new_semu_list = nsuptr;
    236 		j++;
    237 	}
    238 
    239 	for (i = 0; i < seminfo.semmni; i++) {
    240 		KASSERT(cv_has_waiters(&semcv[i]) == false);
    241 		cv_destroy(&semcv[i]);
    242 	}
    243 
    244 	sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
    245 	    ALIGN(seminfo.semmns * sizeof(struct __sem)) +
    246 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
    247 	    ALIGN(seminfo.semmnu * seminfo.semusz);
    248 
    249 	/* Set the pointers and update the new values */
    250 	sema = new_sema;
    251 	sem = new_sem;
    252 	semcv = new_semcv;
    253 	semu = new_semu;
    254 	semu_list = new_semu_list;
    255 
    256 	seminfo.semmni = newsemmni;
    257 	seminfo.semmns = newsemmns;
    258 	seminfo.semmnu = newsemmnu;
    259 
    260 	/* Reallocation completed - notify all waiters, if any */
    261 	sem_realloc_state = false;
    262 	cv_broadcast(&sem_realloc_cv);
    263 	mutex_exit(&semlock);
    264 
    265 	uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED);
    266 	return 0;
    267 }
    268 
    269 /*
    270  * Placebo.
    271  */
    272 
    273 int
    274 sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval)
    275 {
    276 
    277 	*retval = 0;
    278 	return 0;
    279 }
    280 
    281 /*
    282  * Allocate a new sem_undo structure for a process
    283  * (returns ptr to structure or NULL if no more room)
    284  */
    285 
    286 struct sem_undo *
    287 semu_alloc(struct proc *p)
    288 {
    289 	int i;
    290 	struct sem_undo *suptr;
    291 	struct sem_undo **supptr;
    292 	int attempt;
    293 
    294 	KASSERT(mutex_owned(&semlock));
    295 
    296 	/*
    297 	 * Try twice to allocate something.
    298 	 * (we'll purge any empty structures after the first pass so
    299 	 * two passes are always enough)
    300 	 */
    301 
    302 	for (attempt = 0; attempt < 2; attempt++) {
    303 		/*
    304 		 * Look for a free structure.
    305 		 * Fill it in and return it if we find one.
    306 		 */
    307 
    308 		for (i = 0; i < seminfo.semmnu; i++) {
    309 			suptr = SEMU(semu, i);
    310 			if (suptr->un_proc == NULL) {
    311 				suptr->un_next = semu_list;
    312 				semu_list = suptr;
    313 				suptr->un_cnt = 0;
    314 				suptr->un_proc = p;
    315 				return (suptr);
    316 			}
    317 		}
    318 
    319 		/*
    320 		 * We didn't find a free one, if this is the first attempt
    321 		 * then try to free some structures.
    322 		 */
    323 
    324 		if (attempt == 0) {
    325 			/* All the structures are in use - try to free some */
    326 			int did_something = 0;
    327 
    328 			supptr = &semu_list;
    329 			while ((suptr = *supptr) != NULL) {
    330 				if (suptr->un_cnt == 0)  {
    331 					suptr->un_proc = NULL;
    332 					*supptr = suptr->un_next;
    333 					did_something = 1;
    334 				} else
    335 					supptr = &suptr->un_next;
    336 			}
    337 
    338 			/* If we didn't free anything then just give-up */
    339 			if (!did_something)
    340 				return (NULL);
    341 		} else {
    342 			/*
    343 			 * The second pass failed even though we freed
    344 			 * something after the first pass!
    345 			 * This is IMPOSSIBLE!
    346 			 */
    347 			panic("semu_alloc - second attempt failed");
    348 		}
    349 	}
    350 	return NULL;
    351 }
    352 
    353 /*
    354  * Adjust a particular entry for a particular proc
    355  */
    356 
    357 int
    358 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
    359     int adjval)
    360 {
    361 	struct sem_undo *suptr;
    362 	struct undo *sunptr;
    363 	int i;
    364 
    365 	KASSERT(mutex_owned(&semlock));
    366 
    367 	/*
    368 	 * Look for and remember the sem_undo if the caller doesn't
    369 	 * provide it
    370 	 */
    371 
    372 	suptr = *supptr;
    373 	if (suptr == NULL) {
    374 		for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
    375 			if (suptr->un_proc == p)
    376 				break;
    377 
    378 		if (suptr == NULL) {
    379 			suptr = semu_alloc(p);
    380 			if (suptr == NULL)
    381 				return (ENOSPC);
    382 		}
    383 		*supptr = suptr;
    384 	}
    385 
    386 	/*
    387 	 * Look for the requested entry and adjust it (delete if
    388 	 * adjval becomes 0).
    389 	 */
    390 	sunptr = &suptr->un_ent[0];
    391 	for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
    392 		if (sunptr->un_id != semid || sunptr->un_num != semnum)
    393 			continue;
    394 		sunptr->un_adjval += adjval;
    395 		if (sunptr->un_adjval == 0) {
    396 			suptr->un_cnt--;
    397 			if (i < suptr->un_cnt)
    398 				suptr->un_ent[i] =
    399 				    suptr->un_ent[suptr->un_cnt];
    400 		}
    401 		return (0);
    402 	}
    403 
    404 	/* Didn't find the right entry - create it */
    405 	if (suptr->un_cnt == SEMUME)
    406 		return (EINVAL);
    407 
    408 	sunptr = &suptr->un_ent[suptr->un_cnt];
    409 	suptr->un_cnt++;
    410 	sunptr->un_adjval = adjval;
    411 	sunptr->un_id = semid;
    412 	sunptr->un_num = semnum;
    413 	return (0);
    414 }
    415 
    416 void
    417 semundo_clear(int semid, int semnum)
    418 {
    419 	struct sem_undo *suptr;
    420 	struct undo *sunptr, *sunend;
    421 
    422 	KASSERT(mutex_owned(&semlock));
    423 
    424 	for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
    425 		for (sunptr = &suptr->un_ent[0],
    426 		    sunend = sunptr + suptr->un_cnt; sunptr < sunend;) {
    427 			if (sunptr->un_id == semid) {
    428 				if (semnum == -1 || sunptr->un_num == semnum) {
    429 					suptr->un_cnt--;
    430 					sunend--;
    431 					if (sunptr != sunend)
    432 						*sunptr = *sunend;
    433 					if (semnum != -1)
    434 						break;
    435 					else
    436 						continue;
    437 				}
    438 			}
    439 			sunptr++;
    440 		}
    441 }
    442 
    443 int
    444 sys_____semctl13(struct lwp *l, const struct sys_____semctl13_args *uap, register_t *retval)
    445 {
    446 	/* {
    447 		syscallarg(int) semid;
    448 		syscallarg(int) semnum;
    449 		syscallarg(int) cmd;
    450 		syscallarg(union __semun *) arg;
    451 	} */
    452 	struct semid_ds sembuf;
    453 	int cmd, error;
    454 	void *pass_arg;
    455 	union __semun karg;
    456 
    457 	cmd = SCARG(uap, cmd);
    458 
    459 	pass_arg = get_semctl_arg(cmd, &sembuf, &karg);
    460 
    461 	if (pass_arg) {
    462 		error = copyin(SCARG(uap, arg), &karg, sizeof(karg));
    463 		if (error)
    464 			return error;
    465 		if (cmd == IPC_SET) {
    466 			error = copyin(karg.buf, &sembuf, sizeof(sembuf));
    467 			if (error)
    468 				return (error);
    469 		}
    470 	}
    471 
    472 	error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd,
    473 	    pass_arg, retval);
    474 
    475 	if (error == 0 && cmd == IPC_STAT)
    476 		error = copyout(&sembuf, karg.buf, sizeof(sembuf));
    477 
    478 	return (error);
    479 }
    480 
    481 int
    482 semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v,
    483     register_t *retval)
    484 {
    485 	kauth_cred_t cred = l->l_cred;
    486 	union __semun *arg = v;
    487 	struct semid_ds *sembuf = v, *semaptr;
    488 	int i, error, ix;
    489 
    490 	SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n",
    491 	    semid, semnum, cmd, v));
    492 
    493 	mutex_enter(&semlock);
    494 
    495 	ix = IPCID_TO_IX(semid);
    496 	if (ix < 0 || ix >= seminfo.semmni) {
    497 		mutex_exit(&semlock);
    498 		return (EINVAL);
    499 	}
    500 
    501 	semaptr = &sema[ix];
    502 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    503 	    semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) {
    504 		mutex_exit(&semlock);
    505 		return (EINVAL);
    506 	}
    507 
    508 	switch (cmd) {
    509 	case IPC_RMID:
    510 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0)
    511 			break;
    512 		semaptr->sem_perm.cuid = kauth_cred_geteuid(cred);
    513 		semaptr->sem_perm.uid = kauth_cred_geteuid(cred);
    514 		semtot -= semaptr->sem_nsems;
    515 		for (i = semaptr->_sem_base - sem; i < semtot; i++)
    516 			sem[i] = sem[i + semaptr->sem_nsems];
    517 		for (i = 0; i < seminfo.semmni; i++) {
    518 			if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
    519 			    sema[i]._sem_base > semaptr->_sem_base)
    520 				sema[i]._sem_base -= semaptr->sem_nsems;
    521 		}
    522 		semaptr->sem_perm.mode = 0;
    523 		semundo_clear(ix, -1);
    524 		cv_broadcast(&semcv[ix]);
    525 		break;
    526 
    527 	case IPC_SET:
    528 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
    529 			break;
    530 		KASSERT(sembuf != NULL);
    531 		semaptr->sem_perm.uid = sembuf->sem_perm.uid;
    532 		semaptr->sem_perm.gid = sembuf->sem_perm.gid;
    533 		semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
    534 		    (sembuf->sem_perm.mode & 0777);
    535 		semaptr->sem_ctime = time_second;
    536 		break;
    537 
    538 	case IPC_STAT:
    539 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    540 			break;
    541 		KASSERT(sembuf != NULL);
    542 		memcpy(sembuf, semaptr, sizeof(struct semid_ds));
    543 		sembuf->sem_perm.mode &= 0777;
    544 		break;
    545 
    546 	case GETNCNT:
    547 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    548 			break;
    549 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    550 			error = EINVAL;
    551 			break;
    552 		}
    553 		*retval = semaptr->_sem_base[semnum].semncnt;
    554 		break;
    555 
    556 	case GETPID:
    557 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    558 			break;
    559 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    560 			error = EINVAL;
    561 			break;
    562 		}
    563 		*retval = semaptr->_sem_base[semnum].sempid;
    564 		break;
    565 
    566 	case GETVAL:
    567 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    568 			break;
    569 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    570 			error = EINVAL;
    571 			break;
    572 		}
    573 		*retval = semaptr->_sem_base[semnum].semval;
    574 		break;
    575 
    576 	case GETALL:
    577 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    578 			break;
    579 		KASSERT(arg != NULL);
    580 		for (i = 0; i < semaptr->sem_nsems; i++) {
    581 			error = copyout(&semaptr->_sem_base[i].semval,
    582 			    &arg->array[i], sizeof(arg->array[i]));
    583 			if (error != 0)
    584 				break;
    585 		}
    586 		break;
    587 
    588 	case GETZCNT:
    589 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    590 			break;
    591 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    592 			error = EINVAL;
    593 			break;
    594 		}
    595 		*retval = semaptr->_sem_base[semnum].semzcnt;
    596 		break;
    597 
    598 	case SETVAL:
    599 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
    600 			break;
    601 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    602 			error = EINVAL;
    603 			break;
    604 		}
    605 		KASSERT(arg != NULL);
    606 		if ((unsigned int)arg->val > seminfo.semvmx) {
    607 			error = ERANGE;
    608 			break;
    609 		}
    610 		semaptr->_sem_base[semnum].semval = arg->val;
    611 		semundo_clear(ix, semnum);
    612 		cv_broadcast(&semcv[ix]);
    613 		break;
    614 
    615 	case SETALL:
    616 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
    617 			break;
    618 		KASSERT(arg != NULL);
    619 		for (i = 0; i < semaptr->sem_nsems; i++) {
    620 			unsigned short semval;
    621 			error = copyin(&arg->array[i], &semval,
    622 			    sizeof(arg->array[i]));
    623 			if (error != 0)
    624 				break;
    625 			if ((unsigned int)semval > seminfo.semvmx) {
    626 				error = ERANGE;
    627 				break;
    628 			}
    629 			semaptr->_sem_base[i].semval = semval;
    630 		}
    631 		semundo_clear(ix, -1);
    632 		cv_broadcast(&semcv[ix]);
    633 		break;
    634 
    635 	default:
    636 		error = EINVAL;
    637 		break;
    638 	}
    639 
    640 	mutex_exit(&semlock);
    641 	return (error);
    642 }
    643 
    644 int
    645 sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval)
    646 {
    647 	/* {
    648 		syscallarg(key_t) key;
    649 		syscallarg(int) nsems;
    650 		syscallarg(int) semflg;
    651 	} */
    652 	int semid, error = 0;
    653 	int key = SCARG(uap, key);
    654 	int nsems = SCARG(uap, nsems);
    655 	int semflg = SCARG(uap, semflg);
    656 	kauth_cred_t cred = l->l_cred;
    657 
    658 	SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
    659 
    660 	mutex_enter(&semlock);
    661 
    662 	if (key != IPC_PRIVATE) {
    663 		for (semid = 0; semid < seminfo.semmni; semid++) {
    664 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
    665 			    sema[semid].sem_perm._key == key)
    666 				break;
    667 		}
    668 		if (semid < seminfo.semmni) {
    669 			SEM_PRINTF(("found public key\n"));
    670 			if ((error = ipcperm(cred, &sema[semid].sem_perm,
    671 			    semflg & 0700)))
    672 			    	goto out;
    673 			if (nsems > 0 && sema[semid].sem_nsems < nsems) {
    674 				SEM_PRINTF(("too small\n"));
    675 				error = EINVAL;
    676 				goto out;
    677 			}
    678 			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
    679 				SEM_PRINTF(("not exclusive\n"));
    680 				error = EEXIST;
    681 				goto out;
    682 			}
    683 			goto found;
    684 		}
    685 	}
    686 
    687 	SEM_PRINTF(("need to allocate the semid_ds\n"));
    688 	if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
    689 		if (nsems <= 0 || nsems > seminfo.semmsl) {
    690 			SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
    691 			    seminfo.semmsl));
    692 			error = EINVAL;
    693 			goto out;
    694 		}
    695 		if (nsems > seminfo.semmns - semtot) {
    696 			SEM_PRINTF(("not enough semaphores left "
    697 			    "(need %d, got %d)\n",
    698 			    nsems, seminfo.semmns - semtot));
    699 			error = ENOSPC;
    700 			goto out;
    701 		}
    702 		for (semid = 0; semid < seminfo.semmni; semid++) {
    703 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
    704 				break;
    705 		}
    706 		if (semid == seminfo.semmni) {
    707 			SEM_PRINTF(("no more semid_ds's available\n"));
    708 			error = ENOSPC;
    709 			goto out;
    710 		}
    711 		SEM_PRINTF(("semid %d is available\n", semid));
    712 		sema[semid].sem_perm._key = key;
    713 		sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred);
    714 		sema[semid].sem_perm.uid = kauth_cred_geteuid(cred);
    715 		sema[semid].sem_perm.cgid = kauth_cred_getegid(cred);
    716 		sema[semid].sem_perm.gid = kauth_cred_getegid(cred);
    717 		sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
    718 		sema[semid].sem_perm._seq =
    719 		    (sema[semid].sem_perm._seq + 1) & 0x7fff;
    720 		sema[semid].sem_nsems = nsems;
    721 		sema[semid].sem_otime = 0;
    722 		sema[semid].sem_ctime = time_second;
    723 		sema[semid]._sem_base = &sem[semtot];
    724 		semtot += nsems;
    725 		memset(sema[semid]._sem_base, 0,
    726 		    sizeof(sema[semid]._sem_base[0]) * nsems);
    727 		SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base,
    728 		    &sem[semtot]));
    729 	} else {
    730 		SEM_PRINTF(("didn't find it and wasn't asked to create it\n"));
    731 		error = ENOENT;
    732 		goto out;
    733 	}
    734 
    735  found:
    736 	*retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
    737  out:
    738 	mutex_exit(&semlock);
    739 	return (error);
    740 }
    741 
    742 #define SMALL_SOPS 8
    743 
    744 int
    745 sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval)
    746 {
    747 	/* {
    748 		syscallarg(int) semid;
    749 		syscallarg(struct sembuf *) sops;
    750 		syscallarg(size_t) nsops;
    751 	} */
    752 	struct proc *p = l->l_proc;
    753 	int semid = SCARG(uap, semid), seq;
    754 	size_t nsops = SCARG(uap, nsops);
    755 	struct sembuf small_sops[SMALL_SOPS];
    756 	struct sembuf *sops;
    757 	struct semid_ds *semaptr;
    758 	struct sembuf *sopptr = NULL;
    759 	struct __sem *semptr = NULL;
    760 	struct sem_undo *suptr = NULL;
    761 	kauth_cred_t cred = l->l_cred;
    762 	int i, error;
    763 	int do_wakeup, do_undos;
    764 
    765 	SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops));
    766 
    767 	if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) {
    768 		mutex_enter(p->p_lock);
    769 		p->p_flag |= PK_SYSVSEM;
    770 		mutex_exit(p->p_lock);
    771 	}
    772 
    773 restart:
    774 	if (nsops <= SMALL_SOPS) {
    775 		sops = small_sops;
    776 	} else if (nsops <= seminfo.semopm) {
    777 		sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP);
    778 	} else {
    779 		SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n",
    780 		    seminfo.semopm, nsops));
    781 		return (E2BIG);
    782 	}
    783 
    784 	error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0]));
    785 	if (error) {
    786 		SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error,
    787 		    SCARG(uap, sops), &sops, nsops * sizeof(sops[0])));
    788 		if (sops != small_sops)
    789 			kmem_free(sops, nsops * sizeof(*sops));
    790 		return error;
    791 	}
    792 
    793 	mutex_enter(&semlock);
    794 	/* In case of reallocation, we will wait for completion */
    795 	while (__predict_false(sem_realloc_state))
    796 		cv_wait(&sem_realloc_cv, &semlock);
    797 
    798 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
    799 	if (semid < 0 || semid >= seminfo.semmni) {
    800 		error = EINVAL;
    801 		goto out;
    802 	}
    803 
    804 	semaptr = &sema[semid];
    805 	seq = IPCID_TO_SEQ(SCARG(uap, semid));
    806 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    807 	    semaptr->sem_perm._seq != seq) {
    808 		error = EINVAL;
    809 		goto out;
    810 	}
    811 
    812 	if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
    813 		SEM_PRINTF(("error = %d from ipaccess\n", error));
    814 		goto out;
    815 	}
    816 
    817 	for (i = 0; i < nsops; i++)
    818 		if (sops[i].sem_num >= semaptr->sem_nsems) {
    819 			error = EFBIG;
    820 			goto out;
    821 		}
    822 
    823 	/*
    824 	 * Loop trying to satisfy the vector of requests.
    825 	 * If we reach a point where we must wait, any requests already
    826 	 * performed are rolled back and we go to sleep until some other
    827 	 * process wakes us up.  At this point, we start all over again.
    828 	 *
    829 	 * This ensures that from the perspective of other tasks, a set
    830 	 * of requests is atomic (never partially satisfied).
    831 	 */
    832 	do_undos = 0;
    833 
    834 	for (;;) {
    835 		do_wakeup = 0;
    836 
    837 		for (i = 0; i < nsops; i++) {
    838 			sopptr = &sops[i];
    839 			semptr = &semaptr->_sem_base[sopptr->sem_num];
    840 
    841 			SEM_PRINTF(("semop:  semaptr=%p, sem_base=%p, "
    842 			    "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
    843 			    semaptr, semaptr->_sem_base, semptr,
    844 			    sopptr->sem_num, semptr->semval, sopptr->sem_op,
    845 			    (sopptr->sem_flg & IPC_NOWAIT) ?
    846 			    "nowait" : "wait"));
    847 
    848 			if (sopptr->sem_op < 0) {
    849 				if ((int)(semptr->semval +
    850 				    sopptr->sem_op) < 0) {
    851 					SEM_PRINTF(("semop:  "
    852 					    "can't do it now\n"));
    853 					break;
    854 				} else {
    855 					semptr->semval += sopptr->sem_op;
    856 					if (semptr->semval == 0 &&
    857 					    semptr->semzcnt > 0)
    858 						do_wakeup = 1;
    859 				}
    860 				if (sopptr->sem_flg & SEM_UNDO)
    861 					do_undos = 1;
    862 			} else if (sopptr->sem_op == 0) {
    863 				if (semptr->semval > 0) {
    864 					SEM_PRINTF(("semop:  not zero now\n"));
    865 					break;
    866 				}
    867 			} else {
    868 				if (semptr->semncnt > 0)
    869 					do_wakeup = 1;
    870 				semptr->semval += sopptr->sem_op;
    871 				if (sopptr->sem_flg & SEM_UNDO)
    872 					do_undos = 1;
    873 			}
    874 		}
    875 
    876 		/*
    877 		 * Did we get through the entire vector?
    878 		 */
    879 		if (i >= nsops)
    880 			goto done;
    881 
    882 		/*
    883 		 * No ... rollback anything that we've already done
    884 		 */
    885 		SEM_PRINTF(("semop:  rollback 0 through %d\n", i - 1));
    886 		while (i-- > 0)
    887 			semaptr->_sem_base[sops[i].sem_num].semval -=
    888 			    sops[i].sem_op;
    889 
    890 		/*
    891 		 * If the request that we couldn't satisfy has the
    892 		 * NOWAIT flag set then return with EAGAIN.
    893 		 */
    894 		if (sopptr->sem_flg & IPC_NOWAIT) {
    895 			error = EAGAIN;
    896 			goto out;
    897 		}
    898 
    899 		if (sopptr->sem_op == 0)
    900 			semptr->semzcnt++;
    901 		else
    902 			semptr->semncnt++;
    903 
    904 		sem_waiters++;
    905 		SEM_PRINTF(("semop:  good night!\n"));
    906 		error = cv_wait_sig(&semcv[semid], &semlock);
    907 		SEM_PRINTF(("semop:  good morning (error=%d)!\n", error));
    908 		sem_waiters--;
    909 
    910 		/* Notify reallocator, if it is waiting */
    911 		cv_broadcast(&sem_realloc_cv);
    912 
    913 		/*
    914 		 * Make sure that the semaphore still exists
    915 		 */
    916 		if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    917 		    semaptr->sem_perm._seq != seq) {
    918 			error = EIDRM;
    919 			goto out;
    920 		}
    921 
    922 		/*
    923 		 * The semaphore is still alive.  Readjust the count of
    924 		 * waiting processes.
    925 		 */
    926 		semptr = &semaptr->_sem_base[sopptr->sem_num];
    927 		if (sopptr->sem_op == 0)
    928 			semptr->semzcnt--;
    929 		else
    930 			semptr->semncnt--;
    931 
    932 		/* In case of such state, restart the call */
    933 		if (sem_realloc_state) {
    934 			mutex_exit(&semlock);
    935 			goto restart;
    936 		}
    937 
    938 		/* Is it really morning, or was our sleep interrupted? */
    939 		if (error != 0) {
    940 			error = EINTR;
    941 			goto out;
    942 		}
    943 		SEM_PRINTF(("semop:  good morning!\n"));
    944 	}
    945 
    946 done:
    947 	/*
    948 	 * Process any SEM_UNDO requests.
    949 	 */
    950 	if (do_undos) {
    951 		for (i = 0; i < nsops; i++) {
    952 			/*
    953 			 * We only need to deal with SEM_UNDO's for non-zero
    954 			 * op's.
    955 			 */
    956 			int adjval;
    957 
    958 			if ((sops[i].sem_flg & SEM_UNDO) == 0)
    959 				continue;
    960 			adjval = sops[i].sem_op;
    961 			if (adjval == 0)
    962 				continue;
    963 			error = semundo_adjust(p, &suptr, semid,
    964 			    sops[i].sem_num, -adjval);
    965 			if (error == 0)
    966 				continue;
    967 
    968 			/*
    969 			 * Oh-Oh!  We ran out of either sem_undo's or undo's.
    970 			 * Rollback the adjustments to this point and then
    971 			 * rollback the semaphore ups and down so we can return
    972 			 * with an error with all structures restored.  We
    973 			 * rollback the undo's in the exact reverse order that
    974 			 * we applied them.  This guarantees that we won't run
    975 			 * out of space as we roll things back out.
    976 			 */
    977 			while (i-- > 0) {
    978 				if ((sops[i].sem_flg & SEM_UNDO) == 0)
    979 					continue;
    980 				adjval = sops[i].sem_op;
    981 				if (adjval == 0)
    982 					continue;
    983 				if (semundo_adjust(p, &suptr, semid,
    984 				    sops[i].sem_num, adjval) != 0)
    985 					panic("semop - can't undo undos");
    986 			}
    987 
    988 			for (i = 0; i < nsops; i++)
    989 				semaptr->_sem_base[sops[i].sem_num].semval -=
    990 				    sops[i].sem_op;
    991 
    992 			SEM_PRINTF(("error = %d from semundo_adjust\n", error));
    993 			goto out;
    994 		} /* loop through the sops */
    995 	} /* if (do_undos) */
    996 
    997 	/* We're definitely done - set the sempid's */
    998 	for (i = 0; i < nsops; i++) {
    999 		sopptr = &sops[i];
   1000 		semptr = &semaptr->_sem_base[sopptr->sem_num];
   1001 		semptr->sempid = p->p_pid;
   1002 	}
   1003 
   1004 	/* Update sem_otime */
   1005 	semaptr->sem_otime = time_second;
   1006 
   1007 	/* Do a wakeup if any semaphore was up'd. */
   1008 	if (do_wakeup) {
   1009 		SEM_PRINTF(("semop:  doing wakeup\n"));
   1010 		cv_broadcast(&semcv[semid]);
   1011 		SEM_PRINTF(("semop:  back from wakeup\n"));
   1012 	}
   1013 	SEM_PRINTF(("semop:  done\n"));
   1014 	*retval = 0;
   1015 
   1016  out:
   1017 	mutex_exit(&semlock);
   1018 	if (sops != small_sops)
   1019 		kmem_free(sops, nsops * sizeof(*sops));
   1020 	return error;
   1021 }
   1022 
   1023 /*
   1024  * Go through the undo structures for this process and apply the
   1025  * adjustments to semaphores.
   1026  */
   1027 /*ARGSUSED*/
   1028 void
   1029 semexit(struct proc *p, void *v)
   1030 {
   1031 	struct sem_undo *suptr;
   1032 	struct sem_undo **supptr;
   1033 
   1034 	if ((p->p_flag & PK_SYSVSEM) == 0)
   1035 		return;
   1036 
   1037 	mutex_enter(&semlock);
   1038 
   1039 	/*
   1040 	 * Go through the chain of undo vectors looking for one
   1041 	 * associated with this process.
   1042 	 */
   1043 
   1044 	for (supptr = &semu_list; (suptr = *supptr) != NULL;
   1045 	    supptr = &suptr->un_next) {
   1046 		if (suptr->un_proc == p)
   1047 			break;
   1048 	}
   1049 
   1050 	/*
   1051 	 * If there is no undo vector, skip to the end.
   1052 	 */
   1053 
   1054 	if (suptr == NULL) {
   1055 		mutex_exit(&semlock);
   1056 		return;
   1057 	}
   1058 
   1059 	/*
   1060 	 * We now have an undo vector for this process.
   1061 	 */
   1062 
   1063 	SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p,
   1064 	    suptr->un_cnt));
   1065 
   1066 	/*
   1067 	 * If there are any active undo elements then process them.
   1068 	 */
   1069 	if (suptr->un_cnt > 0) {
   1070 		int ix;
   1071 
   1072 		for (ix = 0; ix < suptr->un_cnt; ix++) {
   1073 			int semid = suptr->un_ent[ix].un_id;
   1074 			int semnum = suptr->un_ent[ix].un_num;
   1075 			int adjval = suptr->un_ent[ix].un_adjval;
   1076 			struct semid_ds *semaptr;
   1077 
   1078 			semaptr = &sema[semid];
   1079 			if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
   1080 				panic("semexit - semid not allocated");
   1081 			if (semnum >= semaptr->sem_nsems)
   1082 				panic("semexit - semnum out of range");
   1083 
   1084 			SEM_PRINTF(("semexit:  %p id=%d num=%d(adj=%d) ; "
   1085 			    "sem=%d\n",
   1086 			    suptr->un_proc, suptr->un_ent[ix].un_id,
   1087 			    suptr->un_ent[ix].un_num,
   1088 			    suptr->un_ent[ix].un_adjval,
   1089 			    semaptr->_sem_base[semnum].semval));
   1090 
   1091 			if (adjval < 0 &&
   1092 			    semaptr->_sem_base[semnum].semval < -adjval)
   1093 				semaptr->_sem_base[semnum].semval = 0;
   1094 			else
   1095 				semaptr->_sem_base[semnum].semval += adjval;
   1096 
   1097 			cv_broadcast(&semcv[semid]);
   1098 			SEM_PRINTF(("semexit:  back from wakeup\n"));
   1099 		}
   1100 	}
   1101 
   1102 	/*
   1103 	 * Deallocate the undo vector.
   1104 	 */
   1105 	SEM_PRINTF(("removing vector\n"));
   1106 	suptr->un_proc = NULL;
   1107 	*supptr = suptr->un_next;
   1108 	mutex_exit(&semlock);
   1109 }
   1110 
   1111 /*
   1112  * Sysctl initialization and nodes.
   1113  */
   1114 
   1115 static int
   1116 sysctl_ipc_semmni(SYSCTLFN_ARGS)
   1117 {
   1118 	int newsize, error;
   1119 	struct sysctlnode node;
   1120 	node = *rnode;
   1121 	node.sysctl_data = &newsize;
   1122 
   1123 	newsize = seminfo.semmni;
   1124 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1125 	if (error || newp == NULL)
   1126 		return error;
   1127 
   1128 	return semrealloc(newsize, seminfo.semmns, seminfo.semmnu);
   1129 }
   1130 
   1131 static int
   1132 sysctl_ipc_semmns(SYSCTLFN_ARGS)
   1133 {
   1134 	int newsize, error;
   1135 	struct sysctlnode node;
   1136 	node = *rnode;
   1137 	node.sysctl_data = &newsize;
   1138 
   1139 	newsize = seminfo.semmns;
   1140 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1141 	if (error || newp == NULL)
   1142 		return error;
   1143 
   1144 	return semrealloc(seminfo.semmni, newsize, seminfo.semmnu);
   1145 }
   1146 
   1147 static int
   1148 sysctl_ipc_semmnu(SYSCTLFN_ARGS)
   1149 {
   1150 	int newsize, error;
   1151 	struct sysctlnode node;
   1152 	node = *rnode;
   1153 	node.sysctl_data = &newsize;
   1154 
   1155 	newsize = seminfo.semmnu;
   1156 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1157 	if (error || newp == NULL)
   1158 		return error;
   1159 
   1160 	return semrealloc(seminfo.semmni, seminfo.semmns, newsize);
   1161 }
   1162 
   1163 SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup")
   1164 {
   1165 	const struct sysctlnode *node = NULL;
   1166 
   1167 	sysctl_createv(clog, 0, NULL, NULL,
   1168 		CTLFLAG_PERMANENT,
   1169 		CTLTYPE_NODE, "kern", NULL,
   1170 		NULL, 0, NULL, 0,
   1171 		CTL_KERN, CTL_EOL);
   1172 	sysctl_createv(clog, 0, NULL, &node,
   1173 		CTLFLAG_PERMANENT,
   1174 		CTLTYPE_NODE, "ipc",
   1175 		SYSCTL_DESCR("SysV IPC options"),
   1176 		NULL, 0, NULL, 0,
   1177 		CTL_KERN, KERN_SYSVIPC, CTL_EOL);
   1178 
   1179 	if (node == NULL)
   1180 		return;
   1181 
   1182 	sysctl_createv(clog, 0, &node, NULL,
   1183 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1184 		CTLTYPE_INT, "semmni",
   1185 		SYSCTL_DESCR("Max number of number of semaphore identifiers"),
   1186 		sysctl_ipc_semmni, 0, &seminfo.semmni, 0,
   1187 		CTL_CREATE, CTL_EOL);
   1188 	sysctl_createv(clog, 0, &node, NULL,
   1189 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1190 		CTLTYPE_INT, "semmns",
   1191 		SYSCTL_DESCR("Max number of number of semaphores in system"),
   1192 		sysctl_ipc_semmns, 0, &seminfo.semmns, 0,
   1193 		CTL_CREATE, CTL_EOL);
   1194 	sysctl_createv(clog, 0, &node, NULL,
   1195 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1196 		CTLTYPE_INT, "semmnu",
   1197 		SYSCTL_DESCR("Max number of undo structures in system"),
   1198 		sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0,
   1199 		CTL_CREATE, CTL_EOL);
   1200 }
   1201