Home | History | Annotate | Line # | Download | only in kern
uipc_sem.c revision 1.24.6.1
      1 /*	$NetBSD: uipc_sem.c,v 1.24.6.1 2008/06/02 13:24:13 mjf Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 2002 Alfred Perlstein <alfred (at) FreeBSD.org>
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55  * SUCH DAMAGE.
     56  */
     57 
     58 #include <sys/cdefs.h>
     59 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.24.6.1 2008/06/02 13:24:13 mjf Exp $");
     60 
     61 #include "opt_posix.h"
     62 
     63 #include <sys/param.h>
     64 #include <sys/systm.h>
     65 #include <sys/kernel.h>
     66 #include <sys/proc.h>
     67 #include <sys/ksem.h>
     68 #include <sys/syscall.h>
     69 #include <sys/stat.h>
     70 #include <sys/kmem.h>
     71 #include <sys/fcntl.h>
     72 #include <sys/kauth.h>
     73 #include <sys/sysctl.h>
     74 
     75 #include <sys/mount.h>
     76 
     77 #include <sys/syscallargs.h>
     78 
     79 #define SEM_MAX 128
     80 #define SEM_MAX_NAMELEN	14
     81 #define SEM_VALUE_MAX (~0U)
     82 #define SEM_HASHTBL_SIZE 13
     83 
     84 #define SEM_TO_ID(x)	(((x)->ks_id))
     85 #define SEM_HASH(id)	((id) % SEM_HASHTBL_SIZE)
     86 
     87 MALLOC_DEFINE(M_SEM, "p1003_1b_sem", "p1003_1b semaphores");
     88 
     89 /*
     90  * Note: to read the ks_name member, you need either the ks_interlock
     91  * or the ksem_slock.  To write the ks_name member, you need both.  Make
     92  * sure the order is ksem_slock -> ks_interlock.
     93  */
     94 struct ksem {
     95 	LIST_ENTRY(ksem) ks_entry;	/* global list entry */
     96 	LIST_ENTRY(ksem) ks_hash;	/* hash list entry */
     97 	kmutex_t ks_interlock;		/* lock on this ksem */
     98 	kcondvar_t ks_cv;		/* condition variable */
     99 	unsigned int ks_ref;		/* number of references */
    100 	char *ks_name;			/* if named, this is the name */
    101 	size_t ks_namelen;		/* length of name */
    102 	mode_t ks_mode;			/* protection bits */
    103 	uid_t ks_uid;			/* creator uid */
    104 	gid_t ks_gid;			/* creator gid */
    105 	unsigned int ks_value;		/* current value */
    106 	unsigned int ks_waiters;	/* number of waiters */
    107 	semid_t ks_id;			/* unique identifier */
    108 };
    109 
    110 struct ksem_ref {
    111 	LIST_ENTRY(ksem_ref) ksr_list;
    112 	struct ksem *ksr_ksem;
    113 };
    114 
    115 struct ksem_proc {
    116 	krwlock_t kp_lock;
    117 	LIST_HEAD(, ksem_ref) kp_ksems;
    118 };
    119 
    120 LIST_HEAD(ksem_list, ksem);
    121 
    122 /*
    123  * ksem_slock protects ksem_head and nsems.  Only named semaphores go
    124  * onto ksem_head.
    125  */
    126 static kmutex_t ksem_mutex;
    127 static struct ksem_list ksem_head = LIST_HEAD_INITIALIZER(&ksem_head);
    128 static struct ksem_list ksem_hash[SEM_HASHTBL_SIZE];
    129 static u_int sem_max = SEM_MAX;
    130 static int nsems = 0;
    131 
    132 /*
    133  * ksem_counter is the last assigned semid_t.  It needs to be COMPAT_NETBSD32
    134  * friendly, even though semid_t itself is defined as uintptr_t.
    135  */
    136 static uint32_t ksem_counter = 1;
    137 
    138 static specificdata_key_t ksem_specificdata_key;
    139 
    140 static void
    141 ksem_free(struct ksem *ks)
    142 {
    143 
    144 	KASSERT(mutex_owned(&ks->ks_interlock));
    145 
    146 	/*
    147 	 * If the ksem is anonymous (or has been unlinked), then
    148 	 * this is the end if its life.
    149 	 */
    150 	if (ks->ks_name == NULL) {
    151 		mutex_exit(&ks->ks_interlock);
    152 		mutex_destroy(&ks->ks_interlock);
    153 		cv_destroy(&ks->ks_cv);
    154 
    155 		mutex_enter(&ksem_mutex);
    156 		nsems--;
    157 		LIST_REMOVE(ks, ks_hash);
    158 		mutex_exit(&ksem_mutex);
    159 
    160 		kmem_free(ks, sizeof(*ks));
    161 		return;
    162 	}
    163 	mutex_exit(&ks->ks_interlock);
    164 }
    165 
    166 static inline void
    167 ksem_addref(struct ksem *ks)
    168 {
    169 
    170 	KASSERT(mutex_owned(&ks->ks_interlock));
    171 	ks->ks_ref++;
    172 	KASSERT(ks->ks_ref != 0);
    173 }
    174 
    175 static inline void
    176 ksem_delref(struct ksem *ks)
    177 {
    178 
    179 	KASSERT(mutex_owned(&ks->ks_interlock));
    180 	KASSERT(ks->ks_ref != 0);
    181 	if (--ks->ks_ref == 0) {
    182 		ksem_free(ks);
    183 		return;
    184 	}
    185 	mutex_exit(&ks->ks_interlock);
    186 }
    187 
    188 static struct ksem_proc *
    189 ksem_proc_alloc(void)
    190 {
    191 	struct ksem_proc *kp;
    192 
    193 	kp = kmem_alloc(sizeof(*kp), KM_SLEEP);
    194 	rw_init(&kp->kp_lock);
    195 	LIST_INIT(&kp->kp_ksems);
    196 
    197 	return (kp);
    198 }
    199 
    200 static void
    201 ksem_proc_dtor(void *arg)
    202 {
    203 	struct ksem_proc *kp = arg;
    204 	struct ksem_ref *ksr;
    205 
    206 	rw_enter(&kp->kp_lock, RW_WRITER);
    207 
    208 	while ((ksr = LIST_FIRST(&kp->kp_ksems)) != NULL) {
    209 		LIST_REMOVE(ksr, ksr_list);
    210 		mutex_enter(&ksr->ksr_ksem->ks_interlock);
    211 		ksem_delref(ksr->ksr_ksem);
    212 		kmem_free(ksr, sizeof(*ksr));
    213 	}
    214 
    215 	rw_exit(&kp->kp_lock);
    216 	rw_destroy(&kp->kp_lock);
    217 	kmem_free(kp, sizeof(*kp));
    218 }
    219 
    220 static void
    221 ksem_add_proc(struct proc *p, struct ksem *ks)
    222 {
    223 	struct ksem_proc *kp;
    224 	struct ksem_ref *ksr;
    225 
    226 	kp = proc_getspecific(p, ksem_specificdata_key);
    227 	if (kp == NULL) {
    228 		kp = ksem_proc_alloc();
    229 		proc_setspecific(p, ksem_specificdata_key, kp);
    230 	}
    231 
    232 	ksr = kmem_alloc(sizeof(*ksr), KM_SLEEP);
    233 	ksr->ksr_ksem = ks;
    234 
    235 	rw_enter(&kp->kp_lock, RW_WRITER);
    236 	LIST_INSERT_HEAD(&kp->kp_ksems, ksr, ksr_list);
    237 	rw_exit(&kp->kp_lock);
    238 }
    239 
    240 /* We MUST have a write lock on the ksem_proc list! */
    241 static struct ksem_ref *
    242 ksem_drop_proc(struct ksem_proc *kp, struct ksem *ks)
    243 {
    244 	struct ksem_ref *ksr;
    245 
    246 	KASSERT(mutex_owned(&ks->ks_interlock));
    247 	LIST_FOREACH(ksr, &kp->kp_ksems, ksr_list) {
    248 		if (ksr->ksr_ksem == ks) {
    249 			ksem_delref(ks);
    250 			LIST_REMOVE(ksr, ksr_list);
    251 			return (ksr);
    252 		}
    253 	}
    254 #ifdef DIAGNOSTIC
    255 	panic("ksem_drop_proc: ksem_proc %p ksem %p", kp, ks);
    256 #endif
    257 	return (NULL);
    258 }
    259 
    260 static int
    261 ksem_perm(struct lwp *l, struct ksem *ks)
    262 {
    263 	kauth_cred_t uc;
    264 
    265 	KASSERT(mutex_owned(&ks->ks_interlock));
    266 	uc = l->l_cred;
    267 	if ((kauth_cred_geteuid(uc) == ks->ks_uid && (ks->ks_mode & S_IWUSR) != 0) ||
    268 	    (kauth_cred_getegid(uc) == ks->ks_gid && (ks->ks_mode & S_IWGRP) != 0) ||
    269 	    (ks->ks_mode & S_IWOTH) != 0 ||
    270 	    kauth_authorize_generic(uc, KAUTH_GENERIC_ISSUSER, NULL) == 0)
    271 		return (0);
    272 	return (EPERM);
    273 }
    274 
    275 static struct ksem *
    276 ksem_lookup_byid(semid_t id)
    277 {
    278 	struct ksem *ks;
    279 
    280 	KASSERT(mutex_owned(&ksem_mutex));
    281 	LIST_FOREACH(ks, &ksem_hash[SEM_HASH(id)], ks_hash) {
    282 		if (ks->ks_id == id)
    283 			return ks;
    284 	}
    285 	return NULL;
    286 }
    287 
    288 static struct ksem *
    289 ksem_lookup_byname(const char *name)
    290 {
    291 	struct ksem *ks;
    292 
    293 	KASSERT(mutex_owned(&ksem_mutex));
    294 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
    295 		if (strcmp(ks->ks_name, name) == 0) {
    296 			mutex_enter(&ks->ks_interlock);
    297 			return (ks);
    298 		}
    299 	}
    300 	return (NULL);
    301 }
    302 
    303 static int
    304 ksem_create(struct lwp *l, const char *name, struct ksem **ksret,
    305     mode_t mode, unsigned int value)
    306 {
    307 	struct ksem *ret;
    308 	kauth_cred_t uc;
    309 	size_t len;
    310 
    311 	uc = l->l_cred;
    312 	if (value > SEM_VALUE_MAX)
    313 		return (EINVAL);
    314 	ret = kmem_zalloc(sizeof(*ret), KM_SLEEP);
    315 	if (name != NULL) {
    316 		len = strlen(name);
    317 		if (len > SEM_MAX_NAMELEN) {
    318 			kmem_free(ret, sizeof(*ret));
    319 			return (ENAMETOOLONG);
    320 		}
    321 		/* name must start with a '/' but not contain one. */
    322 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
    323 			kmem_free(ret, sizeof(*ret));
    324 			return (EINVAL);
    325 		}
    326 		ret->ks_namelen = len + 1;
    327 		ret->ks_name = kmem_alloc(ret->ks_namelen, KM_SLEEP);
    328 		strlcpy(ret->ks_name, name, len + 1);
    329 	} else
    330 		ret->ks_name = NULL;
    331 	ret->ks_mode = mode;
    332 	ret->ks_value = value;
    333 	ret->ks_ref = 1;
    334 	ret->ks_waiters = 0;
    335 	ret->ks_uid = kauth_cred_geteuid(uc);
    336 	ret->ks_gid = kauth_cred_getegid(uc);
    337 	mutex_init(&ret->ks_interlock, MUTEX_DEFAULT, IPL_NONE);
    338 	cv_init(&ret->ks_cv, "psem");
    339 
    340 	mutex_enter(&ksem_mutex);
    341 	if (nsems >= sem_max) {
    342 		mutex_exit(&ksem_mutex);
    343 		if (ret->ks_name != NULL)
    344 			kmem_free(ret->ks_name, ret->ks_namelen);
    345 		kmem_free(ret, sizeof(*ret));
    346 		return (ENFILE);
    347 	}
    348 	nsems++;
    349 	while (ksem_lookup_byid(ksem_counter) != NULL) {
    350 		ksem_counter++;
    351 		/* 0 is a special value for libpthread */
    352 		if (ksem_counter == 0)
    353 			ksem_counter++;
    354 	}
    355 	ret->ks_id = ksem_counter;
    356 	LIST_INSERT_HEAD(&ksem_hash[SEM_HASH(ret->ks_id)], ret, ks_hash);
    357 	mutex_exit(&ksem_mutex);
    358 
    359 	*ksret = ret;
    360 	return (0);
    361 }
    362 
    363 int
    364 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap, register_t *retval)
    365 {
    366 	/* {
    367 		unsigned int value;
    368 		semid_t *idp;
    369 	} */
    370 
    371 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp), copyout);
    372 }
    373 
    374 int
    375 do_ksem_init(struct lwp *l, unsigned int value, semid_t *idp,
    376     copyout_t docopyout)
    377 {
    378 	struct ksem *ks;
    379 	semid_t id;
    380 	int error;
    381 
    382 	/* Note the mode does not matter for anonymous semaphores. */
    383 	error = ksem_create(l, NULL, &ks, 0, value);
    384 	if (error)
    385 		return (error);
    386 	id = SEM_TO_ID(ks);
    387 	error = (*docopyout)(&id, idp, sizeof(id));
    388 	if (error) {
    389 		mutex_enter(&ks->ks_interlock);
    390 		ksem_delref(ks);
    391 		return (error);
    392 	}
    393 
    394 	ksem_add_proc(l->l_proc, ks);
    395 
    396 	return (0);
    397 }
    398 
    399 int
    400 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap, register_t *retval)
    401 {
    402 	/* {
    403 		const char *name;
    404 		int oflag;
    405 		mode_t mode;
    406 		unsigned int value;
    407 		semid_t *idp;
    408 	} */
    409 
    410 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
    411 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
    412 }
    413 
    414 int
    415 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
    416      unsigned int value, semid_t *idp, copyout_t docopyout)
    417 {
    418 	char name[SEM_MAX_NAMELEN + 1];
    419 	size_t done;
    420 	int error;
    421 	struct ksem *ksnew, *ks;
    422 	semid_t id;
    423 
    424 	error = copyinstr(semname, name, sizeof(name), &done);
    425 	if (error)
    426 		return (error);
    427 
    428 	ksnew = NULL;
    429 	mutex_enter(&ksem_mutex);
    430 	ks = ksem_lookup_byname(name);
    431 
    432 	/* Found one? */
    433 	if (ks != NULL) {
    434 		/* Check for exclusive create. */
    435 		if (oflag & O_EXCL) {
    436 			mutex_exit(&ks->ks_interlock);
    437 			mutex_exit(&ksem_mutex);
    438 			return (EEXIST);
    439 		}
    440  found_one:
    441 		/*
    442 		 * Verify permissions.  If we can access it, add
    443 		 * this process's reference.
    444 		 */
    445 		KASSERT(mutex_owned(&ks->ks_interlock));
    446 		error = ksem_perm(l, ks);
    447 		if (error == 0)
    448 			ksem_addref(ks);
    449 		mutex_exit(&ks->ks_interlock);
    450 		mutex_exit(&ksem_mutex);
    451 		if (error)
    452 			return (error);
    453 
    454 		id = SEM_TO_ID(ks);
    455 		error = (*docopyout)(&id, idp, sizeof(id));
    456 		if (error) {
    457 			mutex_enter(&ks->ks_interlock);
    458 			ksem_delref(ks);
    459 			return (error);
    460 		}
    461 
    462 		ksem_add_proc(l->l_proc, ks);
    463 
    464 		return (0);
    465 	}
    466 
    467 	/*
    468 	 * didn't ask for creation? error.
    469 	 */
    470 	if ((oflag & O_CREAT) == 0) {
    471 		mutex_exit(&ksem_mutex);
    472 		return (ENOENT);
    473 	}
    474 
    475 	/*
    476 	 * We may block during creation, so drop the lock.
    477 	 */
    478 	mutex_exit(&ksem_mutex);
    479 	error = ksem_create(l, name, &ksnew, mode, value);
    480 	if (error != 0)
    481 		return (error);
    482 
    483 	id = SEM_TO_ID(ksnew);
    484 	error = (*docopyout)(&id, idp, sizeof(id));
    485 	if (error) {
    486 		kmem_free(ksnew->ks_name, ksnew->ks_namelen);
    487 		ksnew->ks_name = NULL;
    488 
    489 		mutex_enter(&ksnew->ks_interlock);
    490 		ksem_delref(ksnew);
    491 		return (error);
    492 	}
    493 
    494 	/*
    495 	 * We need to make sure we haven't lost a race while
    496 	 * allocating during creation.
    497 	 */
    498 	mutex_enter(&ksem_mutex);
    499 	if ((ks = ksem_lookup_byname(name)) != NULL) {
    500 		if (oflag & O_EXCL) {
    501 			mutex_exit(&ks->ks_interlock);
    502 			mutex_exit(&ksem_mutex);
    503 
    504 			kmem_free(ksnew->ks_name, ksnew->ks_namelen);
    505 			ksnew->ks_name = NULL;
    506 
    507 			mutex_enter(&ksnew->ks_interlock);
    508 			ksem_delref(ksnew);
    509 			return (EEXIST);
    510 		}
    511 		goto found_one;
    512 	} else {
    513 		/* ksnew already has its initial reference. */
    514 		LIST_INSERT_HEAD(&ksem_head, ksnew, ks_entry);
    515 		mutex_exit(&ksem_mutex);
    516 
    517 		ksem_add_proc(l->l_proc, ksnew);
    518 	}
    519 	return (error);
    520 }
    521 
    522 /* We must have a read lock on the ksem_proc list! */
    523 static struct ksem *
    524 ksem_lookup_proc(struct ksem_proc *kp, semid_t id)
    525 {
    526 	struct ksem_ref *ksr;
    527 
    528 	LIST_FOREACH(ksr, &kp->kp_ksems, ksr_list) {
    529 		if (id == SEM_TO_ID(ksr->ksr_ksem)) {
    530 			mutex_enter(&ksr->ksr_ksem->ks_interlock);
    531 			return (ksr->ksr_ksem);
    532 		}
    533 	}
    534 
    535 	return (NULL);
    536 }
    537 
    538 int
    539 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap, register_t *retval)
    540 {
    541 	/* {
    542 		const char *name;
    543 	} */
    544 	char name[SEM_MAX_NAMELEN + 1], *cp;
    545 	size_t done, len;
    546 	struct ksem *ks;
    547 	int error;
    548 
    549 	error = copyinstr(SCARG(uap, name), name, sizeof(name), &done);
    550 	if (error)
    551 		return error;
    552 
    553 	mutex_enter(&ksem_mutex);
    554 	ks = ksem_lookup_byname(name);
    555 	if (ks == NULL) {
    556 		mutex_exit(&ksem_mutex);
    557 		return (ENOENT);
    558 	}
    559 
    560 	KASSERT(mutex_owned(&ks->ks_interlock));
    561 
    562 	LIST_REMOVE(ks, ks_entry);
    563 	cp = ks->ks_name;
    564 	len = ks->ks_namelen;
    565 	ks->ks_name = NULL;
    566 
    567 	mutex_exit(&ksem_mutex);
    568 
    569 	if (ks->ks_ref == 0)
    570 		ksem_free(ks);
    571 	else
    572 		mutex_exit(&ks->ks_interlock);
    573 
    574 	kmem_free(cp, len);
    575 
    576 	return (0);
    577 }
    578 
    579 int
    580 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap, register_t *retval)
    581 {
    582 	/* {
    583 		semid_t id;
    584 	} */
    585 	struct ksem_proc *kp;
    586 	struct ksem_ref *ksr;
    587 	struct ksem *ks;
    588 
    589 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    590 	if (kp == NULL)
    591 		return (EINVAL);
    592 
    593 	rw_enter(&kp->kp_lock, RW_WRITER);
    594 
    595 	ks = ksem_lookup_proc(kp, SCARG(uap, id));
    596 	if (ks == NULL) {
    597 		rw_exit(&kp->kp_lock);
    598 		return (EINVAL);
    599 	}
    600 
    601 	KASSERT(mutex_owned(&ks->ks_interlock));
    602 	if (ks->ks_name == NULL) {
    603 		mutex_exit(&ks->ks_interlock);
    604 		rw_exit(&kp->kp_lock);
    605 		return (EINVAL);
    606 	}
    607 
    608 	ksr = ksem_drop_proc(kp, ks);
    609 	rw_exit(&kp->kp_lock);
    610 	kmem_free(ksr, sizeof(*ksr));
    611 
    612 	return (0);
    613 }
    614 
    615 int
    616 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap, register_t *retval)
    617 {
    618 	/* {
    619 		semid_t id;
    620 	} */
    621 	struct ksem_proc *kp;
    622 	struct ksem *ks;
    623 	int error;
    624 
    625 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    626 	if (kp == NULL)
    627 		return (EINVAL);
    628 
    629 	rw_enter(&kp->kp_lock, RW_READER);
    630 	ks = ksem_lookup_proc(kp, SCARG(uap, id));
    631 	rw_exit(&kp->kp_lock);
    632 	if (ks == NULL)
    633 		return (EINVAL);
    634 
    635 	KASSERT(mutex_owned(&ks->ks_interlock));
    636 	if (ks->ks_value == SEM_VALUE_MAX) {
    637 		error = EOVERFLOW;
    638 		goto out;
    639 	}
    640 	++ks->ks_value;
    641 	if (ks->ks_waiters)
    642 		cv_broadcast(&ks->ks_cv);
    643 	error = 0;
    644  out:
    645 	mutex_exit(&ks->ks_interlock);
    646 	return (error);
    647 }
    648 
    649 static int
    650 ksem_wait(struct lwp *l, semid_t id, int tryflag)
    651 {
    652 	struct ksem_proc *kp;
    653 	struct ksem *ks;
    654 	int error;
    655 
    656 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    657 	if (kp == NULL)
    658 		return (EINVAL);
    659 
    660 	rw_enter(&kp->kp_lock, RW_READER);
    661 	ks = ksem_lookup_proc(kp, id);
    662 	rw_exit(&kp->kp_lock);
    663 	if (ks == NULL)
    664 		return (EINVAL);
    665 
    666 	KASSERT(mutex_owned(&ks->ks_interlock));
    667 	ksem_addref(ks);
    668 	while (ks->ks_value == 0) {
    669 		ks->ks_waiters++;
    670 		if (tryflag)
    671 			error = EAGAIN;
    672 		else
    673 			error = cv_wait_sig(&ks->ks_cv, &ks->ks_interlock);
    674 		ks->ks_waiters--;
    675 		if (error)
    676 			goto out;
    677 	}
    678 	ks->ks_value--;
    679 	error = 0;
    680  out:
    681 	ksem_delref(ks);
    682 	return (error);
    683 }
    684 
    685 int
    686 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap, register_t *retval)
    687 {
    688 	/* {
    689 		semid_t id;
    690 	} */
    691 
    692 	return ksem_wait(l, SCARG(uap, id), 0);
    693 }
    694 
    695 int
    696 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap, register_t *retval)
    697 {
    698 	/* {
    699 		semid_t id;
    700 	} */
    701 
    702 	return ksem_wait(l, SCARG(uap, id), 1);
    703 }
    704 
    705 int
    706 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap, register_t *retval)
    707 {
    708 	/* {
    709 		semid_t id;
    710 		unsigned int *value;
    711 	} */
    712 	struct ksem_proc *kp;
    713 	struct ksem *ks;
    714 	unsigned int val;
    715 
    716 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    717 	if (kp == NULL)
    718 		return (EINVAL);
    719 
    720 	rw_enter(&kp->kp_lock, RW_READER);
    721 	ks = ksem_lookup_proc(kp, SCARG(uap, id));
    722 	rw_exit(&kp->kp_lock);
    723 	if (ks == NULL)
    724 		return (EINVAL);
    725 
    726 	KASSERT(mutex_owned(&ks->ks_interlock));
    727 	val = ks->ks_value;
    728 	mutex_exit(&ks->ks_interlock);
    729 
    730 	return (copyout(&val, SCARG(uap, value), sizeof(val)));
    731 }
    732 
    733 int
    734 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap, register_t *retval)
    735 {
    736 	/* {
    737 		semid_t id;
    738 	} */
    739 	struct ksem_proc *kp;
    740 	struct ksem_ref *ksr;
    741 	struct ksem *ks;
    742 
    743 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    744 	if (kp == NULL)
    745 		return (EINVAL);
    746 
    747 	rw_enter(&kp->kp_lock, RW_WRITER);
    748 
    749 	ks = ksem_lookup_proc(kp, SCARG(uap, id));
    750 	if (ks == NULL) {
    751 		rw_exit(&kp->kp_lock);
    752 		return (EINVAL);
    753 	}
    754 
    755 	KASSERT(mutex_owned(&ks->ks_interlock));
    756 
    757 	/*
    758 	 * XXX This misses named semaphores which have been unlink'd,
    759 	 * XXX but since behavior of destroying a named semaphore is
    760 	 * XXX undefined, this is technically allowed.
    761 	 */
    762 	if (ks->ks_name != NULL) {
    763 		mutex_exit(&ks->ks_interlock);
    764 		rw_exit(&kp->kp_lock);
    765 		return (EINVAL);
    766 	}
    767 
    768 	if (ks->ks_waiters) {
    769 		mutex_exit(&ks->ks_interlock);
    770 		rw_exit(&kp->kp_lock);
    771 		return (EBUSY);
    772 	}
    773 
    774 	ksr = ksem_drop_proc(kp, ks);
    775 	rw_exit(&kp->kp_lock);
    776 	kmem_free(ksr, sizeof(*ksr));
    777 
    778 	return (0);
    779 }
    780 
    781 static void
    782 ksem_forkhook(struct proc *p2, struct proc *p1)
    783 {
    784 	struct ksem_proc *kp1, *kp2;
    785 	struct ksem_ref *ksr, *ksr1;
    786 
    787 	kp1 = proc_getspecific(p1, ksem_specificdata_key);
    788 	if (kp1 == NULL)
    789 		return;
    790 
    791 	kp2 = ksem_proc_alloc();
    792 
    793 	rw_enter(&kp1->kp_lock, RW_READER);
    794 
    795 	if (!LIST_EMPTY(&kp1->kp_ksems)) {
    796 		LIST_FOREACH(ksr, &kp1->kp_ksems, ksr_list) {
    797 			ksr1 = kmem_alloc(sizeof(*ksr), KM_SLEEP);
    798 			ksr1->ksr_ksem = ksr->ksr_ksem;
    799 			mutex_enter(&ksr->ksr_ksem->ks_interlock);
    800 			ksem_addref(ksr->ksr_ksem);
    801 			mutex_exit(&ksr->ksr_ksem->ks_interlock);
    802 			LIST_INSERT_HEAD(&kp2->kp_ksems, ksr1, ksr_list);
    803 		}
    804 	}
    805 
    806 	rw_exit(&kp1->kp_lock);
    807 	proc_setspecific(p2, ksem_specificdata_key, kp2);
    808 }
    809 
    810 static void
    811 ksem_exechook(struct proc *p, void *arg)
    812 {
    813 	struct ksem_proc *kp;
    814 
    815 	kp = proc_getspecific(p, ksem_specificdata_key);
    816 	if (kp != NULL) {
    817 		proc_setspecific(p, ksem_specificdata_key, NULL);
    818 		ksem_proc_dtor(kp);
    819 	}
    820 }
    821 
    822 void
    823 ksem_init(void)
    824 {
    825 	int i, error;
    826 
    827 	mutex_init(&ksem_mutex, MUTEX_DEFAULT, IPL_NONE);
    828 	exechook_establish(ksem_exechook, NULL);
    829 	forkhook_establish(ksem_forkhook);
    830 
    831 	for (i = 0; i < SEM_HASHTBL_SIZE; i++)
    832 		LIST_INIT(&ksem_hash[i]);
    833 
    834 	error = proc_specific_key_create(&ksem_specificdata_key,
    835 					 ksem_proc_dtor);
    836 	KASSERT(error == 0);
    837 }
    838 
    839 /*
    840  * Sysctl initialization and nodes.
    841  */
    842 
    843 SYSCTL_SETUP(sysctl_posix_sem_setup, "sysctl kern.posix subtree setup")
    844 {
    845 	const struct sysctlnode *node = NULL;
    846 
    847 	sysctl_createv(clog, 0, NULL, NULL,
    848 		CTLFLAG_PERMANENT,
    849 		CTLTYPE_NODE, "kern", NULL,
    850 		NULL, 0, NULL, 0,
    851 		CTL_KERN, CTL_EOL);
    852 	sysctl_createv(clog, 0, NULL, &node,
    853 		CTLFLAG_PERMANENT,
    854 		CTLTYPE_NODE, "posix",
    855 		SYSCTL_DESCR("POSIX options"),
    856 		NULL, 0, NULL, 0,
    857 		CTL_KERN, CTL_CREATE, CTL_EOL);
    858 
    859 	if (node == NULL)
    860 		return;
    861 
    862 	sysctl_createv(clog, 0, &node, NULL,
    863 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    864 		CTLTYPE_INT, "semmax",
    865 		SYSCTL_DESCR("Maximal number of semaphores"),
    866 		NULL, 0, &sem_max, 0,
    867 		CTL_CREATE, CTL_EOL);
    868 }
    869