Home | History | Annotate | Line # | Download | only in kern
sysv_sem.c revision 1.81.2.3
      1 /*	$NetBSD: sysv_sem.c,v 1.81.2.3 2010/03/11 15:04:19 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center, and by Andrew Doran.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * Implementation of SVID semaphores
     35  *
     36  * Author: Daniel Boulet
     37  *
     38  * This software is provided ``AS IS'' without any warranties of any kind.
     39  */
     40 
     41 #include <sys/cdefs.h>
     42 __KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.81.2.3 2010/03/11 15:04:19 yamt Exp $");
     43 
     44 #define SYSVSEM
     45 
     46 #include <sys/param.h>
     47 #include <sys/kernel.h>
     48 #include <sys/sem.h>
     49 #include <sys/sysctl.h>
     50 #include <sys/kmem.h>
     51 #include <sys/mount.h>		/* XXX for <sys/syscallargs.h> */
     52 #include <sys/syscallargs.h>
     53 #include <sys/kauth.h>
     54 
     55 /*
     56  * Memory areas:
     57  *  1st: Pool of semaphore identifiers
     58  *  2nd: Semaphores
     59  *  3rd: Conditional variables
     60  *  4th: Undo structures
     61  */
     62 struct semid_ds		*sema;
     63 static struct __sem	*sem;
     64 static kcondvar_t	*semcv;
     65 static int		*semu;
     66 
     67 static kmutex_t	semlock;
     68 static struct	sem_undo *semu_list;	/* list of active undo structures */
     69 static u_int	semtot = 0;		/* total number of semaphores */
     70 
     71 static u_int	sem_waiters = 0;	/* total number of semop waiters */
     72 static bool	sem_realloc_state;
     73 static kcondvar_t sem_realloc_cv;
     74 
     75 /* Macro to find a particular sem_undo vector */
     76 #define SEMU(s, ix)	((struct sem_undo *)(((long)s) + ix * seminfo.semusz))
     77 
     78 #ifdef SEM_DEBUG
     79 #define SEM_PRINTF(a) printf a
     80 #else
     81 #define SEM_PRINTF(a)
     82 #endif
     83 
     84 struct sem_undo *semu_alloc(struct proc *);
     85 int semundo_adjust(struct proc *, struct sem_undo **, int, int, int);
     86 void semundo_clear(int, int);
     87 
     88 void
     89 seminit(void)
     90 {
     91 	int i, sz;
     92 	vaddr_t v;
     93 
     94 	mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE);
     95 	cv_init(&sem_realloc_cv, "semrealc");
     96 	sem_realloc_state = false;
     97 
     98 	/* Allocate the wired memory for our structures */
     99 	sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
    100 	    ALIGN(seminfo.semmns * sizeof(struct __sem)) +
    101 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
    102 	    ALIGN(seminfo.semmnu * seminfo.semusz);
    103 	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
    104 	    UVM_KMF_WIRED|UVM_KMF_ZERO);
    105 	if (v == 0)
    106 		panic("sysv_sem: cannot allocate memory");
    107 	sema = (void *)v;
    108 	sem = (void *)((uintptr_t)sema +
    109 	    ALIGN(seminfo.semmni * sizeof(struct semid_ds)));
    110 	semcv = (void *)((uintptr_t)sem +
    111 	    ALIGN(seminfo.semmns * sizeof(struct __sem)));
    112 	semu = (void *)((uintptr_t)semcv +
    113 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)));
    114 
    115 	for (i = 0; i < seminfo.semmni; i++) {
    116 		sema[i]._sem_base = 0;
    117 		sema[i].sem_perm.mode = 0;
    118 		cv_init(&semcv[i], "semwait");
    119 	}
    120 	for (i = 0; i < seminfo.semmnu; i++) {
    121 		struct sem_undo *suptr = SEMU(semu, i);
    122 		suptr->un_proc = NULL;
    123 	}
    124 	semu_list = NULL;
    125 	exithook_establish(semexit, NULL);
    126 }
    127 
    128 static int
    129 semrealloc(int newsemmni, int newsemmns, int newsemmnu)
    130 {
    131 	struct semid_ds *new_sema, *old_sema;
    132 	struct __sem *new_sem;
    133 	struct sem_undo *new_semu_list, *suptr, *nsuptr;
    134 	int *new_semu;
    135 	kcondvar_t *new_semcv;
    136 	vaddr_t v;
    137 	int i, j, lsemid, nmnus, sz;
    138 
    139 	if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1)
    140 		return EINVAL;
    141 
    142 	/* Allocate the wired memory for our structures */
    143 	sz = ALIGN(newsemmni * sizeof(struct semid_ds)) +
    144 	    ALIGN(newsemmns * sizeof(struct __sem)) +
    145 	    ALIGN(newsemmni * sizeof(kcondvar_t)) +
    146 	    ALIGN(newsemmnu * seminfo.semusz);
    147 	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
    148 	    UVM_KMF_WIRED|UVM_KMF_ZERO);
    149 	if (v == 0)
    150 		return ENOMEM;
    151 
    152 	mutex_enter(&semlock);
    153 	if (sem_realloc_state) {
    154 		mutex_exit(&semlock);
    155 		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
    156 		return EBUSY;
    157 	}
    158 	sem_realloc_state = true;
    159 	if (sem_waiters) {
    160 		/*
    161 		 * Mark reallocation state, wake-up all waiters,
    162 		 * and wait while they will all exit.
    163 		 */
    164 		for (i = 0; i < seminfo.semmni; i++)
    165 			cv_broadcast(&semcv[i]);
    166 		while (sem_waiters)
    167 			cv_wait(&sem_realloc_cv, &semlock);
    168 	}
    169 	old_sema = sema;
    170 
    171 	/* Get the number of last slot */
    172 	lsemid = 0;
    173 	for (i = 0; i < seminfo.semmni; i++)
    174 		if (sema[i].sem_perm.mode & SEM_ALLOC)
    175 			lsemid = i;
    176 
    177 	/* Get the number of currently used undo structures */
    178 	nmnus = 0;
    179 	for (i = 0; i < seminfo.semmnu; i++) {
    180 		suptr = SEMU(semu, i);
    181 		if (suptr->un_proc == NULL)
    182 			continue;
    183 		nmnus++;
    184 	}
    185 
    186 	/* We cannot reallocate less memory than we use */
    187 	if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) {
    188 		mutex_exit(&semlock);
    189 		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
    190 		return EBUSY;
    191 	}
    192 
    193 	new_sema = (void *)v;
    194 	new_sem = (void *)((uintptr_t)new_sema +
    195 	    ALIGN(newsemmni * sizeof(struct semid_ds)));
    196 	new_semcv = (void *)((uintptr_t)new_sem +
    197 	    ALIGN(newsemmns * sizeof(struct __sem)));
    198 	new_semu = (void *)((uintptr_t)new_semcv +
    199 	    ALIGN(newsemmni * sizeof(kcondvar_t)));
    200 
    201 	/* Initialize all semaphore identifiers and condvars */
    202 	for (i = 0; i < newsemmni; i++) {
    203 		new_sema[i]._sem_base = 0;
    204 		new_sema[i].sem_perm.mode = 0;
    205 		cv_init(&new_semcv[i], "semwait");
    206 	}
    207 	for (i = 0; i < newsemmnu; i++) {
    208 		nsuptr = SEMU(new_semu, i);
    209 		nsuptr->un_proc = NULL;
    210 	}
    211 
    212 	/*
    213 	 * Copy all identifiers, semaphores and list of the
    214 	 * undo structures to the new memory allocation.
    215 	 */
    216 	j = 0;
    217 	for (i = 0; i <= lsemid; i++) {
    218 		if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0)
    219 			continue;
    220 		memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds));
    221 		new_sema[i]._sem_base = &new_sem[j];
    222 		memcpy(new_sema[i]._sem_base, sema[i]._sem_base,
    223 		    (sizeof(struct __sem) * sema[i].sem_nsems));
    224 		j += sema[i].sem_nsems;
    225 	}
    226 	KASSERT(j == semtot);
    227 
    228 	j = 0;
    229 	new_semu_list = NULL;
    230 	for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
    231 		KASSERT(j < newsemmnu);
    232 		nsuptr = SEMU(new_semu, j);
    233 		memcpy(nsuptr, suptr, SEMUSZ);
    234 		nsuptr->un_next = new_semu_list;
    235 		new_semu_list = nsuptr;
    236 		j++;
    237 	}
    238 
    239 	for (i = 0; i < seminfo.semmni; i++) {
    240 		KASSERT(cv_has_waiters(&semcv[i]) == false);
    241 		cv_destroy(&semcv[i]);
    242 	}
    243 
    244 	sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
    245 	    ALIGN(seminfo.semmns * sizeof(struct __sem)) +
    246 	    ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
    247 	    ALIGN(seminfo.semmnu * seminfo.semusz);
    248 
    249 	/* Set the pointers and update the new values */
    250 	sema = new_sema;
    251 	sem = new_sem;
    252 	semcv = new_semcv;
    253 	semu = new_semu;
    254 	semu_list = new_semu_list;
    255 
    256 	seminfo.semmni = newsemmni;
    257 	seminfo.semmns = newsemmns;
    258 	seminfo.semmnu = newsemmnu;
    259 
    260 	/* Reallocation completed - notify all waiters, if any */
    261 	sem_realloc_state = false;
    262 	cv_broadcast(&sem_realloc_cv);
    263 	mutex_exit(&semlock);
    264 
    265 	uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED);
    266 	return 0;
    267 }
    268 
    269 /*
    270  * Placebo.
    271  */
    272 
    273 int
    274 sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval)
    275 {
    276 
    277 	*retval = 0;
    278 	return 0;
    279 }
    280 
    281 /*
    282  * Allocate a new sem_undo structure for a process.
    283  * => Returns NULL on failure.
    284  */
    285 struct sem_undo *
    286 semu_alloc(struct proc *p)
    287 {
    288 	struct sem_undo *suptr, **supptr;
    289 	bool attempted = false;
    290 	int i;
    291 
    292 	KASSERT(mutex_owned(&semlock));
    293 again:
    294 	/* Look for a free structure. */
    295 	for (i = 0; i < seminfo.semmnu; i++) {
    296 		suptr = SEMU(semu, i);
    297 		if (suptr->un_proc == NULL) {
    298 			/* Found.  Fill it in and return. */
    299 			suptr->un_next = semu_list;
    300 			semu_list = suptr;
    301 			suptr->un_cnt = 0;
    302 			suptr->un_proc = p;
    303 			return suptr;
    304 		}
    305 	}
    306 
    307 	/* Not found.  Attempt to free some structures. */
    308 	if (!attempted) {
    309 		bool freed = false;
    310 
    311 		attempted = true;
    312 		supptr = &semu_list;
    313 		while ((suptr = *supptr) != NULL) {
    314 			if (suptr->un_cnt == 0)  {
    315 				suptr->un_proc = NULL;
    316 				*supptr = suptr->un_next;
    317 				freed = true;
    318 			} else {
    319 				supptr = &suptr->un_next;
    320 			}
    321 		}
    322 		if (freed) {
    323 			goto again;
    324 		}
    325 	}
    326 	return NULL;
    327 }
    328 
    329 /*
    330  * Adjust a particular entry for a particular proc
    331  */
    332 
    333 int
    334 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
    335     int adjval)
    336 {
    337 	struct sem_undo *suptr;
    338 	struct undo *sunptr;
    339 	int i;
    340 
    341 	KASSERT(mutex_owned(&semlock));
    342 
    343 	/*
    344 	 * Look for and remember the sem_undo if the caller doesn't
    345 	 * provide it
    346 	 */
    347 
    348 	suptr = *supptr;
    349 	if (suptr == NULL) {
    350 		for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
    351 			if (suptr->un_proc == p)
    352 				break;
    353 
    354 		if (suptr == NULL) {
    355 			suptr = semu_alloc(p);
    356 			if (suptr == NULL)
    357 				return (ENOSPC);
    358 		}
    359 		*supptr = suptr;
    360 	}
    361 
    362 	/*
    363 	 * Look for the requested entry and adjust it (delete if
    364 	 * adjval becomes 0).
    365 	 */
    366 	sunptr = &suptr->un_ent[0];
    367 	for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
    368 		if (sunptr->un_id != semid || sunptr->un_num != semnum)
    369 			continue;
    370 		sunptr->un_adjval += adjval;
    371 		if (sunptr->un_adjval == 0) {
    372 			suptr->un_cnt--;
    373 			if (i < suptr->un_cnt)
    374 				suptr->un_ent[i] =
    375 				    suptr->un_ent[suptr->un_cnt];
    376 		}
    377 		return (0);
    378 	}
    379 
    380 	/* Didn't find the right entry - create it */
    381 	if (suptr->un_cnt == SEMUME)
    382 		return (EINVAL);
    383 
    384 	sunptr = &suptr->un_ent[suptr->un_cnt];
    385 	suptr->un_cnt++;
    386 	sunptr->un_adjval = adjval;
    387 	sunptr->un_id = semid;
    388 	sunptr->un_num = semnum;
    389 	return (0);
    390 }
    391 
    392 void
    393 semundo_clear(int semid, int semnum)
    394 {
    395 	struct sem_undo *suptr;
    396 	struct undo *sunptr, *sunend;
    397 
    398 	KASSERT(mutex_owned(&semlock));
    399 
    400 	for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
    401 		for (sunptr = &suptr->un_ent[0],
    402 		    sunend = sunptr + suptr->un_cnt; sunptr < sunend;) {
    403 			if (sunptr->un_id == semid) {
    404 				if (semnum == -1 || sunptr->un_num == semnum) {
    405 					suptr->un_cnt--;
    406 					sunend--;
    407 					if (sunptr != sunend)
    408 						*sunptr = *sunend;
    409 					if (semnum != -1)
    410 						break;
    411 					else
    412 						continue;
    413 				}
    414 			}
    415 			sunptr++;
    416 		}
    417 }
    418 
    419 int
    420 sys_____semctl50(struct lwp *l, const struct sys_____semctl50_args *uap,
    421     register_t *retval)
    422 {
    423 	/* {
    424 		syscallarg(int) semid;
    425 		syscallarg(int) semnum;
    426 		syscallarg(int) cmd;
    427 		syscallarg(union __semun *) arg;
    428 	} */
    429 	struct semid_ds sembuf;
    430 	int cmd, error;
    431 	void *pass_arg;
    432 	union __semun karg;
    433 
    434 	cmd = SCARG(uap, cmd);
    435 
    436 	pass_arg = get_semctl_arg(cmd, &sembuf, &karg);
    437 
    438 	if (pass_arg) {
    439 		error = copyin(SCARG(uap, arg), &karg, sizeof(karg));
    440 		if (error)
    441 			return error;
    442 		if (cmd == IPC_SET) {
    443 			error = copyin(karg.buf, &sembuf, sizeof(sembuf));
    444 			if (error)
    445 				return (error);
    446 		}
    447 	}
    448 
    449 	error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd,
    450 	    pass_arg, retval);
    451 
    452 	if (error == 0 && cmd == IPC_STAT)
    453 		error = copyout(&sembuf, karg.buf, sizeof(sembuf));
    454 
    455 	return (error);
    456 }
    457 
    458 int
    459 semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v,
    460     register_t *retval)
    461 {
    462 	kauth_cred_t cred = l->l_cred;
    463 	union __semun *arg = v;
    464 	struct semid_ds *sembuf = v, *semaptr;
    465 	int i, error, ix;
    466 
    467 	SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n",
    468 	    semid, semnum, cmd, v));
    469 
    470 	mutex_enter(&semlock);
    471 
    472 	ix = IPCID_TO_IX(semid);
    473 	if (ix < 0 || ix >= seminfo.semmni) {
    474 		mutex_exit(&semlock);
    475 		return (EINVAL);
    476 	}
    477 
    478 	semaptr = &sema[ix];
    479 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    480 	    semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) {
    481 		mutex_exit(&semlock);
    482 		return (EINVAL);
    483 	}
    484 
    485 	switch (cmd) {
    486 	case IPC_RMID:
    487 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0)
    488 			break;
    489 		semaptr->sem_perm.cuid = kauth_cred_geteuid(cred);
    490 		semaptr->sem_perm.uid = kauth_cred_geteuid(cred);
    491 		semtot -= semaptr->sem_nsems;
    492 		for (i = semaptr->_sem_base - sem; i < semtot; i++)
    493 			sem[i] = sem[i + semaptr->sem_nsems];
    494 		for (i = 0; i < seminfo.semmni; i++) {
    495 			if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
    496 			    sema[i]._sem_base > semaptr->_sem_base)
    497 				sema[i]._sem_base -= semaptr->sem_nsems;
    498 		}
    499 		semaptr->sem_perm.mode = 0;
    500 		semundo_clear(ix, -1);
    501 		cv_broadcast(&semcv[ix]);
    502 		break;
    503 
    504 	case IPC_SET:
    505 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
    506 			break;
    507 		KASSERT(sembuf != NULL);
    508 		semaptr->sem_perm.uid = sembuf->sem_perm.uid;
    509 		semaptr->sem_perm.gid = sembuf->sem_perm.gid;
    510 		semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
    511 		    (sembuf->sem_perm.mode & 0777);
    512 		semaptr->sem_ctime = time_second;
    513 		break;
    514 
    515 	case IPC_STAT:
    516 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    517 			break;
    518 		KASSERT(sembuf != NULL);
    519 		memcpy(sembuf, semaptr, sizeof(struct semid_ds));
    520 		sembuf->sem_perm.mode &= 0777;
    521 		break;
    522 
    523 	case GETNCNT:
    524 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    525 			break;
    526 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    527 			error = EINVAL;
    528 			break;
    529 		}
    530 		*retval = semaptr->_sem_base[semnum].semncnt;
    531 		break;
    532 
    533 	case GETPID:
    534 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    535 			break;
    536 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    537 			error = EINVAL;
    538 			break;
    539 		}
    540 		*retval = semaptr->_sem_base[semnum].sempid;
    541 		break;
    542 
    543 	case GETVAL:
    544 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    545 			break;
    546 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    547 			error = EINVAL;
    548 			break;
    549 		}
    550 		*retval = semaptr->_sem_base[semnum].semval;
    551 		break;
    552 
    553 	case GETALL:
    554 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    555 			break;
    556 		KASSERT(arg != NULL);
    557 		for (i = 0; i < semaptr->sem_nsems; i++) {
    558 			error = copyout(&semaptr->_sem_base[i].semval,
    559 			    &arg->array[i], sizeof(arg->array[i]));
    560 			if (error != 0)
    561 				break;
    562 		}
    563 		break;
    564 
    565 	case GETZCNT:
    566 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
    567 			break;
    568 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    569 			error = EINVAL;
    570 			break;
    571 		}
    572 		*retval = semaptr->_sem_base[semnum].semzcnt;
    573 		break;
    574 
    575 	case SETVAL:
    576 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
    577 			break;
    578 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
    579 			error = EINVAL;
    580 			break;
    581 		}
    582 		KASSERT(arg != NULL);
    583 		if ((unsigned int)arg->val > seminfo.semvmx) {
    584 			error = ERANGE;
    585 			break;
    586 		}
    587 		semaptr->_sem_base[semnum].semval = arg->val;
    588 		semundo_clear(ix, semnum);
    589 		cv_broadcast(&semcv[ix]);
    590 		break;
    591 
    592 	case SETALL:
    593 		if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
    594 			break;
    595 		KASSERT(arg != NULL);
    596 		for (i = 0; i < semaptr->sem_nsems; i++) {
    597 			unsigned short semval;
    598 			error = copyin(&arg->array[i], &semval,
    599 			    sizeof(arg->array[i]));
    600 			if (error != 0)
    601 				break;
    602 			if ((unsigned int)semval > seminfo.semvmx) {
    603 				error = ERANGE;
    604 				break;
    605 			}
    606 			semaptr->_sem_base[i].semval = semval;
    607 		}
    608 		semundo_clear(ix, -1);
    609 		cv_broadcast(&semcv[ix]);
    610 		break;
    611 
    612 	default:
    613 		error = EINVAL;
    614 		break;
    615 	}
    616 
    617 	mutex_exit(&semlock);
    618 	return (error);
    619 }
    620 
    621 int
    622 sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval)
    623 {
    624 	/* {
    625 		syscallarg(key_t) key;
    626 		syscallarg(int) nsems;
    627 		syscallarg(int) semflg;
    628 	} */
    629 	int semid, error = 0;
    630 	int key = SCARG(uap, key);
    631 	int nsems = SCARG(uap, nsems);
    632 	int semflg = SCARG(uap, semflg);
    633 	kauth_cred_t cred = l->l_cred;
    634 
    635 	SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
    636 
    637 	mutex_enter(&semlock);
    638 
    639 	if (key != IPC_PRIVATE) {
    640 		for (semid = 0; semid < seminfo.semmni; semid++) {
    641 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
    642 			    sema[semid].sem_perm._key == key)
    643 				break;
    644 		}
    645 		if (semid < seminfo.semmni) {
    646 			SEM_PRINTF(("found public key\n"));
    647 			if ((error = ipcperm(cred, &sema[semid].sem_perm,
    648 			    semflg & 0700)))
    649 			    	goto out;
    650 			if (nsems > 0 && sema[semid].sem_nsems < nsems) {
    651 				SEM_PRINTF(("too small\n"));
    652 				error = EINVAL;
    653 				goto out;
    654 			}
    655 			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
    656 				SEM_PRINTF(("not exclusive\n"));
    657 				error = EEXIST;
    658 				goto out;
    659 			}
    660 			goto found;
    661 		}
    662 	}
    663 
    664 	SEM_PRINTF(("need to allocate the semid_ds\n"));
    665 	if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
    666 		if (nsems <= 0 || nsems > seminfo.semmsl) {
    667 			SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
    668 			    seminfo.semmsl));
    669 			error = EINVAL;
    670 			goto out;
    671 		}
    672 		if (nsems > seminfo.semmns - semtot) {
    673 			SEM_PRINTF(("not enough semaphores left "
    674 			    "(need %d, got %d)\n",
    675 			    nsems, seminfo.semmns - semtot));
    676 			error = ENOSPC;
    677 			goto out;
    678 		}
    679 		for (semid = 0; semid < seminfo.semmni; semid++) {
    680 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
    681 				break;
    682 		}
    683 		if (semid == seminfo.semmni) {
    684 			SEM_PRINTF(("no more semid_ds's available\n"));
    685 			error = ENOSPC;
    686 			goto out;
    687 		}
    688 		SEM_PRINTF(("semid %d is available\n", semid));
    689 		sema[semid].sem_perm._key = key;
    690 		sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred);
    691 		sema[semid].sem_perm.uid = kauth_cred_geteuid(cred);
    692 		sema[semid].sem_perm.cgid = kauth_cred_getegid(cred);
    693 		sema[semid].sem_perm.gid = kauth_cred_getegid(cred);
    694 		sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
    695 		sema[semid].sem_perm._seq =
    696 		    (sema[semid].sem_perm._seq + 1) & 0x7fff;
    697 		sema[semid].sem_nsems = nsems;
    698 		sema[semid].sem_otime = 0;
    699 		sema[semid].sem_ctime = time_second;
    700 		sema[semid]._sem_base = &sem[semtot];
    701 		semtot += nsems;
    702 		memset(sema[semid]._sem_base, 0,
    703 		    sizeof(sema[semid]._sem_base[0]) * nsems);
    704 		SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base,
    705 		    &sem[semtot]));
    706 	} else {
    707 		SEM_PRINTF(("didn't find it and wasn't asked to create it\n"));
    708 		error = ENOENT;
    709 		goto out;
    710 	}
    711 
    712  found:
    713 	*retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
    714  out:
    715 	mutex_exit(&semlock);
    716 	return (error);
    717 }
    718 
    719 #define SMALL_SOPS 8
    720 
    721 int
    722 sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval)
    723 {
    724 	/* {
    725 		syscallarg(int) semid;
    726 		syscallarg(struct sembuf *) sops;
    727 		syscallarg(size_t) nsops;
    728 	} */
    729 	struct proc *p = l->l_proc;
    730 	int semid = SCARG(uap, semid), seq;
    731 	size_t nsops = SCARG(uap, nsops);
    732 	struct sembuf small_sops[SMALL_SOPS];
    733 	struct sembuf *sops;
    734 	struct semid_ds *semaptr;
    735 	struct sembuf *sopptr = NULL;
    736 	struct __sem *semptr = NULL;
    737 	struct sem_undo *suptr = NULL;
    738 	kauth_cred_t cred = l->l_cred;
    739 	int i, error;
    740 	int do_wakeup, do_undos;
    741 
    742 	SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops));
    743 
    744 	if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) {
    745 		mutex_enter(p->p_lock);
    746 		p->p_flag |= PK_SYSVSEM;
    747 		mutex_exit(p->p_lock);
    748 	}
    749 
    750 restart:
    751 	if (nsops <= SMALL_SOPS) {
    752 		sops = small_sops;
    753 	} else if (nsops <= seminfo.semopm) {
    754 		sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP);
    755 	} else {
    756 		SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n",
    757 		    seminfo.semopm, nsops));
    758 		return (E2BIG);
    759 	}
    760 
    761 	error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0]));
    762 	if (error) {
    763 		SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error,
    764 		    SCARG(uap, sops), &sops, nsops * sizeof(sops[0])));
    765 		if (sops != small_sops)
    766 			kmem_free(sops, nsops * sizeof(*sops));
    767 		return error;
    768 	}
    769 
    770 	mutex_enter(&semlock);
    771 	/* In case of reallocation, we will wait for completion */
    772 	while (__predict_false(sem_realloc_state))
    773 		cv_wait(&sem_realloc_cv, &semlock);
    774 
    775 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
    776 	if (semid < 0 || semid >= seminfo.semmni) {
    777 		error = EINVAL;
    778 		goto out;
    779 	}
    780 
    781 	semaptr = &sema[semid];
    782 	seq = IPCID_TO_SEQ(SCARG(uap, semid));
    783 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    784 	    semaptr->sem_perm._seq != seq) {
    785 		error = EINVAL;
    786 		goto out;
    787 	}
    788 
    789 	if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
    790 		SEM_PRINTF(("error = %d from ipaccess\n", error));
    791 		goto out;
    792 	}
    793 
    794 	for (i = 0; i < nsops; i++)
    795 		if (sops[i].sem_num >= semaptr->sem_nsems) {
    796 			error = EFBIG;
    797 			goto out;
    798 		}
    799 
    800 	/*
    801 	 * Loop trying to satisfy the vector of requests.
    802 	 * If we reach a point where we must wait, any requests already
    803 	 * performed are rolled back and we go to sleep until some other
    804 	 * process wakes us up.  At this point, we start all over again.
    805 	 *
    806 	 * This ensures that from the perspective of other tasks, a set
    807 	 * of requests is atomic (never partially satisfied).
    808 	 */
    809 	do_undos = 0;
    810 
    811 	for (;;) {
    812 		do_wakeup = 0;
    813 
    814 		for (i = 0; i < nsops; i++) {
    815 			sopptr = &sops[i];
    816 			semptr = &semaptr->_sem_base[sopptr->sem_num];
    817 
    818 			SEM_PRINTF(("semop:  semaptr=%p, sem_base=%p, "
    819 			    "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
    820 			    semaptr, semaptr->_sem_base, semptr,
    821 			    sopptr->sem_num, semptr->semval, sopptr->sem_op,
    822 			    (sopptr->sem_flg & IPC_NOWAIT) ?
    823 			    "nowait" : "wait"));
    824 
    825 			if (sopptr->sem_op < 0) {
    826 				if ((int)(semptr->semval +
    827 				    sopptr->sem_op) < 0) {
    828 					SEM_PRINTF(("semop:  "
    829 					    "can't do it now\n"));
    830 					break;
    831 				} else {
    832 					semptr->semval += sopptr->sem_op;
    833 					if (semptr->semval == 0 &&
    834 					    semptr->semzcnt > 0)
    835 						do_wakeup = 1;
    836 				}
    837 				if (sopptr->sem_flg & SEM_UNDO)
    838 					do_undos = 1;
    839 			} else if (sopptr->sem_op == 0) {
    840 				if (semptr->semval > 0) {
    841 					SEM_PRINTF(("semop:  not zero now\n"));
    842 					break;
    843 				}
    844 			} else {
    845 				if (semptr->semncnt > 0)
    846 					do_wakeup = 1;
    847 				semptr->semval += sopptr->sem_op;
    848 				if (sopptr->sem_flg & SEM_UNDO)
    849 					do_undos = 1;
    850 			}
    851 		}
    852 
    853 		/*
    854 		 * Did we get through the entire vector?
    855 		 */
    856 		if (i >= nsops)
    857 			goto done;
    858 
    859 		/*
    860 		 * No ... rollback anything that we've already done
    861 		 */
    862 		SEM_PRINTF(("semop:  rollback 0 through %d\n", i - 1));
    863 		while (i-- > 0)
    864 			semaptr->_sem_base[sops[i].sem_num].semval -=
    865 			    sops[i].sem_op;
    866 
    867 		/*
    868 		 * If the request that we couldn't satisfy has the
    869 		 * NOWAIT flag set then return with EAGAIN.
    870 		 */
    871 		if (sopptr->sem_flg & IPC_NOWAIT) {
    872 			error = EAGAIN;
    873 			goto out;
    874 		}
    875 
    876 		if (sopptr->sem_op == 0)
    877 			semptr->semzcnt++;
    878 		else
    879 			semptr->semncnt++;
    880 
    881 		sem_waiters++;
    882 		SEM_PRINTF(("semop:  good night!\n"));
    883 		error = cv_wait_sig(&semcv[semid], &semlock);
    884 		SEM_PRINTF(("semop:  good morning (error=%d)!\n", error));
    885 		sem_waiters--;
    886 
    887 		/* Notify reallocator, if it is waiting */
    888 		cv_broadcast(&sem_realloc_cv);
    889 
    890 		/*
    891 		 * Make sure that the semaphore still exists
    892 		 */
    893 		if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
    894 		    semaptr->sem_perm._seq != seq) {
    895 			error = EIDRM;
    896 			goto out;
    897 		}
    898 
    899 		/*
    900 		 * The semaphore is still alive.  Readjust the count of
    901 		 * waiting processes.
    902 		 */
    903 		semptr = &semaptr->_sem_base[sopptr->sem_num];
    904 		if (sopptr->sem_op == 0)
    905 			semptr->semzcnt--;
    906 		else
    907 			semptr->semncnt--;
    908 
    909 		/* In case of such state, restart the call */
    910 		if (sem_realloc_state) {
    911 			mutex_exit(&semlock);
    912 			goto restart;
    913 		}
    914 
    915 		/* Is it really morning, or was our sleep interrupted? */
    916 		if (error != 0) {
    917 			error = EINTR;
    918 			goto out;
    919 		}
    920 		SEM_PRINTF(("semop:  good morning!\n"));
    921 	}
    922 
    923 done:
    924 	/*
    925 	 * Process any SEM_UNDO requests.
    926 	 */
    927 	if (do_undos) {
    928 		for (i = 0; i < nsops; i++) {
    929 			/*
    930 			 * We only need to deal with SEM_UNDO's for non-zero
    931 			 * op's.
    932 			 */
    933 			int adjval;
    934 
    935 			if ((sops[i].sem_flg & SEM_UNDO) == 0)
    936 				continue;
    937 			adjval = sops[i].sem_op;
    938 			if (adjval == 0)
    939 				continue;
    940 			error = semundo_adjust(p, &suptr, semid,
    941 			    sops[i].sem_num, -adjval);
    942 			if (error == 0)
    943 				continue;
    944 
    945 			/*
    946 			 * Oh-Oh!  We ran out of either sem_undo's or undo's.
    947 			 * Rollback the adjustments to this point and then
    948 			 * rollback the semaphore ups and down so we can return
    949 			 * with an error with all structures restored.  We
    950 			 * rollback the undo's in the exact reverse order that
    951 			 * we applied them.  This guarantees that we won't run
    952 			 * out of space as we roll things back out.
    953 			 */
    954 			while (i-- > 0) {
    955 				if ((sops[i].sem_flg & SEM_UNDO) == 0)
    956 					continue;
    957 				adjval = sops[i].sem_op;
    958 				if (adjval == 0)
    959 					continue;
    960 				if (semundo_adjust(p, &suptr, semid,
    961 				    sops[i].sem_num, adjval) != 0)
    962 					panic("semop - can't undo undos");
    963 			}
    964 
    965 			for (i = 0; i < nsops; i++)
    966 				semaptr->_sem_base[sops[i].sem_num].semval -=
    967 				    sops[i].sem_op;
    968 
    969 			SEM_PRINTF(("error = %d from semundo_adjust\n", error));
    970 			goto out;
    971 		} /* loop through the sops */
    972 	} /* if (do_undos) */
    973 
    974 	/* We're definitely done - set the sempid's */
    975 	for (i = 0; i < nsops; i++) {
    976 		sopptr = &sops[i];
    977 		semptr = &semaptr->_sem_base[sopptr->sem_num];
    978 		semptr->sempid = p->p_pid;
    979 	}
    980 
    981 	/* Update sem_otime */
    982 	semaptr->sem_otime = time_second;
    983 
    984 	/* Do a wakeup if any semaphore was up'd. */
    985 	if (do_wakeup) {
    986 		SEM_PRINTF(("semop:  doing wakeup\n"));
    987 		cv_broadcast(&semcv[semid]);
    988 		SEM_PRINTF(("semop:  back from wakeup\n"));
    989 	}
    990 	SEM_PRINTF(("semop:  done\n"));
    991 	*retval = 0;
    992 
    993  out:
    994 	mutex_exit(&semlock);
    995 	if (sops != small_sops)
    996 		kmem_free(sops, nsops * sizeof(*sops));
    997 	return error;
    998 }
    999 
   1000 /*
   1001  * Go through the undo structures for this process and apply the
   1002  * adjustments to semaphores.
   1003  */
   1004 /*ARGSUSED*/
   1005 void
   1006 semexit(struct proc *p, void *v)
   1007 {
   1008 	struct sem_undo *suptr;
   1009 	struct sem_undo **supptr;
   1010 
   1011 	if ((p->p_flag & PK_SYSVSEM) == 0)
   1012 		return;
   1013 
   1014 	mutex_enter(&semlock);
   1015 
   1016 	/*
   1017 	 * Go through the chain of undo vectors looking for one
   1018 	 * associated with this process.
   1019 	 */
   1020 
   1021 	for (supptr = &semu_list; (suptr = *supptr) != NULL;
   1022 	    supptr = &suptr->un_next) {
   1023 		if (suptr->un_proc == p)
   1024 			break;
   1025 	}
   1026 
   1027 	/*
   1028 	 * If there is no undo vector, skip to the end.
   1029 	 */
   1030 
   1031 	if (suptr == NULL) {
   1032 		mutex_exit(&semlock);
   1033 		return;
   1034 	}
   1035 
   1036 	/*
   1037 	 * We now have an undo vector for this process.
   1038 	 */
   1039 
   1040 	SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p,
   1041 	    suptr->un_cnt));
   1042 
   1043 	/*
   1044 	 * If there are any active undo elements then process them.
   1045 	 */
   1046 	if (suptr->un_cnt > 0) {
   1047 		int ix;
   1048 
   1049 		for (ix = 0; ix < suptr->un_cnt; ix++) {
   1050 			int semid = suptr->un_ent[ix].un_id;
   1051 			int semnum = suptr->un_ent[ix].un_num;
   1052 			int adjval = suptr->un_ent[ix].un_adjval;
   1053 			struct semid_ds *semaptr;
   1054 
   1055 			semaptr = &sema[semid];
   1056 			if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
   1057 				panic("semexit - semid not allocated");
   1058 			if (semnum >= semaptr->sem_nsems)
   1059 				panic("semexit - semnum out of range");
   1060 
   1061 			SEM_PRINTF(("semexit:  %p id=%d num=%d(adj=%d) ; "
   1062 			    "sem=%d\n",
   1063 			    suptr->un_proc, suptr->un_ent[ix].un_id,
   1064 			    suptr->un_ent[ix].un_num,
   1065 			    suptr->un_ent[ix].un_adjval,
   1066 			    semaptr->_sem_base[semnum].semval));
   1067 
   1068 			if (adjval < 0 &&
   1069 			    semaptr->_sem_base[semnum].semval < -adjval)
   1070 				semaptr->_sem_base[semnum].semval = 0;
   1071 			else
   1072 				semaptr->_sem_base[semnum].semval += adjval;
   1073 
   1074 			cv_broadcast(&semcv[semid]);
   1075 			SEM_PRINTF(("semexit:  back from wakeup\n"));
   1076 		}
   1077 	}
   1078 
   1079 	/*
   1080 	 * Deallocate the undo vector.
   1081 	 */
   1082 	SEM_PRINTF(("removing vector\n"));
   1083 	suptr->un_proc = NULL;
   1084 	*supptr = suptr->un_next;
   1085 	mutex_exit(&semlock);
   1086 }
   1087 
   1088 /*
   1089  * Sysctl initialization and nodes.
   1090  */
   1091 
   1092 static int
   1093 sysctl_ipc_semmni(SYSCTLFN_ARGS)
   1094 {
   1095 	int newsize, error;
   1096 	struct sysctlnode node;
   1097 	node = *rnode;
   1098 	node.sysctl_data = &newsize;
   1099 
   1100 	newsize = seminfo.semmni;
   1101 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1102 	if (error || newp == NULL)
   1103 		return error;
   1104 
   1105 	return semrealloc(newsize, seminfo.semmns, seminfo.semmnu);
   1106 }
   1107 
   1108 static int
   1109 sysctl_ipc_semmns(SYSCTLFN_ARGS)
   1110 {
   1111 	int newsize, error;
   1112 	struct sysctlnode node;
   1113 	node = *rnode;
   1114 	node.sysctl_data = &newsize;
   1115 
   1116 	newsize = seminfo.semmns;
   1117 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1118 	if (error || newp == NULL)
   1119 		return error;
   1120 
   1121 	return semrealloc(seminfo.semmni, newsize, seminfo.semmnu);
   1122 }
   1123 
   1124 static int
   1125 sysctl_ipc_semmnu(SYSCTLFN_ARGS)
   1126 {
   1127 	int newsize, error;
   1128 	struct sysctlnode node;
   1129 	node = *rnode;
   1130 	node.sysctl_data = &newsize;
   1131 
   1132 	newsize = seminfo.semmnu;
   1133 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   1134 	if (error || newp == NULL)
   1135 		return error;
   1136 
   1137 	return semrealloc(seminfo.semmni, seminfo.semmns, newsize);
   1138 }
   1139 
   1140 SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup")
   1141 {
   1142 	const struct sysctlnode *node = NULL;
   1143 
   1144 	sysctl_createv(clog, 0, NULL, NULL,
   1145 		CTLFLAG_PERMANENT,
   1146 		CTLTYPE_NODE, "kern", NULL,
   1147 		NULL, 0, NULL, 0,
   1148 		CTL_KERN, CTL_EOL);
   1149 	sysctl_createv(clog, 0, NULL, &node,
   1150 		CTLFLAG_PERMANENT,
   1151 		CTLTYPE_NODE, "ipc",
   1152 		SYSCTL_DESCR("SysV IPC options"),
   1153 		NULL, 0, NULL, 0,
   1154 		CTL_KERN, KERN_SYSVIPC, CTL_EOL);
   1155 
   1156 	if (node == NULL)
   1157 		return;
   1158 
   1159 	sysctl_createv(clog, 0, &node, NULL,
   1160 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1161 		CTLTYPE_INT, "semmni",
   1162 		SYSCTL_DESCR("Max number of number of semaphore identifiers"),
   1163 		sysctl_ipc_semmni, 0, &seminfo.semmni, 0,
   1164 		CTL_CREATE, CTL_EOL);
   1165 	sysctl_createv(clog, 0, &node, NULL,
   1166 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1167 		CTLTYPE_INT, "semmns",
   1168 		SYSCTL_DESCR("Max number of number of semaphores in system"),
   1169 		sysctl_ipc_semmns, 0, &seminfo.semmns, 0,
   1170 		CTL_CREATE, CTL_EOL);
   1171 	sysctl_createv(clog, 0, &node, NULL,
   1172 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
   1173 		CTLTYPE_INT, "semmnu",
   1174 		SYSCTL_DESCR("Max number of undo structures in system"),
   1175 		sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0,
   1176 		CTL_CREATE, CTL_EOL);
   1177 }
   1178