Home | History | Annotate | Line # | Download | only in kern
uipc_sem.c revision 1.22.4.2
      1 /*	$NetBSD: uipc_sem.c,v 1.22.4.2 2008/01/08 22:11:45 bouyer Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *        This product includes software developed by the NetBSD
     21  *        Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Copyright (c) 2002 Alfred Perlstein <alfred (at) FreeBSD.org>
     41  * All rights reserved.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  *
     52  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     62  * SUCH DAMAGE.
     63  */
     64 
     65 #include <sys/cdefs.h>
     66 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.22.4.2 2008/01/08 22:11:45 bouyer Exp $");
     67 
     68 #include "opt_posix.h"
     69 
     70 #include <sys/param.h>
     71 #include <sys/systm.h>
     72 #include <sys/kernel.h>
     73 #include <sys/proc.h>
     74 #include <sys/ksem.h>
     75 #include <sys/syscall.h>
     76 #include <sys/stat.h>
     77 #include <sys/kmem.h>
     78 #include <sys/fcntl.h>
     79 #include <sys/kauth.h>
     80 #include <sys/sysctl.h>
     81 
     82 #include <sys/mount.h>
     83 
     84 #include <sys/syscallargs.h>
     85 
     86 #define SEM_MAX 128
     87 #define SEM_MAX_NAMELEN	14
     88 #define SEM_VALUE_MAX (~0U)
     89 #define SEM_HASHTBL_SIZE 13
     90 
     91 #define SEM_TO_ID(x)	(((x)->ks_id))
     92 #define SEM_HASH(id)	((id) % SEM_HASHTBL_SIZE)
     93 
     94 MALLOC_DEFINE(M_SEM, "p1003_1b_sem", "p1003_1b semaphores");
     95 
     96 /*
     97  * Note: to read the ks_name member, you need either the ks_interlock
     98  * or the ksem_slock.  To write the ks_name member, you need both.  Make
     99  * sure the order is ksem_slock -> ks_interlock.
    100  */
    101 struct ksem {
    102 	LIST_ENTRY(ksem) ks_entry;	/* global list entry */
    103 	LIST_ENTRY(ksem) ks_hash;	/* hash list entry */
    104 	kmutex_t ks_interlock;		/* lock on this ksem */
    105 	kcondvar_t ks_cv;		/* condition variable */
    106 	unsigned int ks_ref;		/* number of references */
    107 	char *ks_name;			/* if named, this is the name */
    108 	size_t ks_namelen;		/* length of name */
    109 	mode_t ks_mode;			/* protection bits */
    110 	uid_t ks_uid;			/* creator uid */
    111 	gid_t ks_gid;			/* creator gid */
    112 	unsigned int ks_value;		/* current value */
    113 	unsigned int ks_waiters;	/* number of waiters */
    114 	semid_t ks_id;			/* unique identifier */
    115 };
    116 
    117 struct ksem_ref {
    118 	LIST_ENTRY(ksem_ref) ksr_list;
    119 	struct ksem *ksr_ksem;
    120 };
    121 
    122 struct ksem_proc {
    123 	krwlock_t kp_lock;
    124 	LIST_HEAD(, ksem_ref) kp_ksems;
    125 };
    126 
    127 LIST_HEAD(ksem_list, ksem);
    128 
    129 /*
    130  * ksem_slock protects ksem_head and nsems.  Only named semaphores go
    131  * onto ksem_head.
    132  */
    133 static kmutex_t ksem_mutex;
    134 static struct ksem_list ksem_head = LIST_HEAD_INITIALIZER(&ksem_head);
    135 static struct ksem_list ksem_hash[SEM_HASHTBL_SIZE];
    136 static u_int sem_max = SEM_MAX;
    137 static int nsems = 0;
    138 
    139 /*
    140  * ksem_counter is the last assigned semid_t.  It needs to be COMPAT_NETBSD32
    141  * friendly, even though semid_t itself is defined as uintptr_t.
    142  */
    143 static uint32_t ksem_counter = 1;
    144 
    145 static specificdata_key_t ksem_specificdata_key;
    146 
    147 static void
    148 ksem_free(struct ksem *ks)
    149 {
    150 
    151 	KASSERT(mutex_owned(&ks->ks_interlock));
    152 
    153 	/*
    154 	 * If the ksem is anonymous (or has been unlinked), then
    155 	 * this is the end if its life.
    156 	 */
    157 	if (ks->ks_name == NULL) {
    158 		mutex_exit(&ks->ks_interlock);
    159 		mutex_destroy(&ks->ks_interlock);
    160 		cv_destroy(&ks->ks_cv);
    161 
    162 		mutex_enter(&ksem_mutex);
    163 		nsems--;
    164 		LIST_REMOVE(ks, ks_hash);
    165 		mutex_exit(&ksem_mutex);
    166 
    167 		kmem_free(ks, sizeof(*ks));
    168 		return;
    169 	}
    170 	mutex_exit(&ks->ks_interlock);
    171 }
    172 
    173 static inline void
    174 ksem_addref(struct ksem *ks)
    175 {
    176 
    177 	KASSERT(mutex_owned(&ks->ks_interlock));
    178 	ks->ks_ref++;
    179 	KASSERT(ks->ks_ref != 0);
    180 }
    181 
    182 static inline void
    183 ksem_delref(struct ksem *ks)
    184 {
    185 
    186 	KASSERT(mutex_owned(&ks->ks_interlock));
    187 	KASSERT(ks->ks_ref != 0);
    188 	if (--ks->ks_ref == 0) {
    189 		ksem_free(ks);
    190 		return;
    191 	}
    192 	mutex_exit(&ks->ks_interlock);
    193 }
    194 
    195 static struct ksem_proc *
    196 ksem_proc_alloc(void)
    197 {
    198 	struct ksem_proc *kp;
    199 
    200 	kp = kmem_alloc(sizeof(*kp), KM_SLEEP);
    201 	rw_init(&kp->kp_lock);
    202 	LIST_INIT(&kp->kp_ksems);
    203 
    204 	return (kp);
    205 }
    206 
    207 static void
    208 ksem_proc_dtor(void *arg)
    209 {
    210 	struct ksem_proc *kp = arg;
    211 	struct ksem_ref *ksr;
    212 
    213 	rw_enter(&kp->kp_lock, RW_WRITER);
    214 
    215 	while ((ksr = LIST_FIRST(&kp->kp_ksems)) != NULL) {
    216 		LIST_REMOVE(ksr, ksr_list);
    217 		mutex_enter(&ksr->ksr_ksem->ks_interlock);
    218 		ksem_delref(ksr->ksr_ksem);
    219 		kmem_free(ksr, sizeof(*ksr));
    220 	}
    221 
    222 	rw_exit(&kp->kp_lock);
    223 	rw_destroy(&kp->kp_lock);
    224 	kmem_free(kp, sizeof(*kp));
    225 }
    226 
    227 static void
    228 ksem_add_proc(struct proc *p, struct ksem *ks)
    229 {
    230 	struct ksem_proc *kp;
    231 	struct ksem_ref *ksr;
    232 
    233 	kp = proc_getspecific(p, ksem_specificdata_key);
    234 	if (kp == NULL) {
    235 		kp = ksem_proc_alloc();
    236 		proc_setspecific(p, ksem_specificdata_key, kp);
    237 	}
    238 
    239 	ksr = kmem_alloc(sizeof(*ksr), KM_SLEEP);
    240 	ksr->ksr_ksem = ks;
    241 
    242 	rw_enter(&kp->kp_lock, RW_WRITER);
    243 	LIST_INSERT_HEAD(&kp->kp_ksems, ksr, ksr_list);
    244 	rw_exit(&kp->kp_lock);
    245 }
    246 
    247 /* We MUST have a write lock on the ksem_proc list! */
    248 static struct ksem_ref *
    249 ksem_drop_proc(struct ksem_proc *kp, struct ksem *ks)
    250 {
    251 	struct ksem_ref *ksr;
    252 
    253 	KASSERT(mutex_owned(&ks->ks_interlock));
    254 	LIST_FOREACH(ksr, &kp->kp_ksems, ksr_list) {
    255 		if (ksr->ksr_ksem == ks) {
    256 			ksem_delref(ks);
    257 			LIST_REMOVE(ksr, ksr_list);
    258 			return (ksr);
    259 		}
    260 	}
    261 #ifdef DIAGNOSTIC
    262 	panic("ksem_drop_proc: ksem_proc %p ksem %p", kp, ks);
    263 #endif
    264 	return (NULL);
    265 }
    266 
    267 static int
    268 ksem_perm(struct lwp *l, struct ksem *ks)
    269 {
    270 	kauth_cred_t uc;
    271 
    272 	KASSERT(mutex_owned(&ks->ks_interlock));
    273 	uc = l->l_cred;
    274 	if ((kauth_cred_geteuid(uc) == ks->ks_uid && (ks->ks_mode & S_IWUSR) != 0) ||
    275 	    (kauth_cred_getegid(uc) == ks->ks_gid && (ks->ks_mode & S_IWGRP) != 0) ||
    276 	    (ks->ks_mode & S_IWOTH) != 0 ||
    277 	    kauth_authorize_generic(uc, KAUTH_GENERIC_ISSUSER, NULL) == 0)
    278 		return (0);
    279 	return (EPERM);
    280 }
    281 
    282 static struct ksem *
    283 ksem_lookup_byid(semid_t id)
    284 {
    285 	struct ksem *ks;
    286 
    287 	KASSERT(mutex_owned(&ksem_mutex));
    288 	LIST_FOREACH(ks, &ksem_hash[SEM_HASH(id)], ks_hash) {
    289 		if (ks->ks_id == id)
    290 			return ks;
    291 	}
    292 	return NULL;
    293 }
    294 
    295 static struct ksem *
    296 ksem_lookup_byname(const char *name)
    297 {
    298 	struct ksem *ks;
    299 
    300 	KASSERT(mutex_owned(&ksem_mutex));
    301 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
    302 		if (strcmp(ks->ks_name, name) == 0) {
    303 			mutex_enter(&ks->ks_interlock);
    304 			return (ks);
    305 		}
    306 	}
    307 	return (NULL);
    308 }
    309 
    310 static int
    311 ksem_create(struct lwp *l, const char *name, struct ksem **ksret,
    312     mode_t mode, unsigned int value)
    313 {
    314 	struct ksem *ret;
    315 	kauth_cred_t uc;
    316 	size_t len;
    317 
    318 	uc = l->l_cred;
    319 	if (value > SEM_VALUE_MAX)
    320 		return (EINVAL);
    321 	ret = kmem_zalloc(sizeof(*ret), KM_SLEEP);
    322 	if (name != NULL) {
    323 		len = strlen(name);
    324 		if (len > SEM_MAX_NAMELEN) {
    325 			kmem_free(ret, sizeof(*ret));
    326 			return (ENAMETOOLONG);
    327 		}
    328 		/* name must start with a '/' but not contain one. */
    329 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
    330 			kmem_free(ret, sizeof(*ret));
    331 			return (EINVAL);
    332 		}
    333 		ret->ks_namelen = len + 1;
    334 		ret->ks_name = kmem_alloc(ret->ks_namelen, KM_SLEEP);
    335 		strlcpy(ret->ks_name, name, len + 1);
    336 	} else
    337 		ret->ks_name = NULL;
    338 	ret->ks_mode = mode;
    339 	ret->ks_value = value;
    340 	ret->ks_ref = 1;
    341 	ret->ks_waiters = 0;
    342 	ret->ks_uid = kauth_cred_geteuid(uc);
    343 	ret->ks_gid = kauth_cred_getegid(uc);
    344 	mutex_init(&ret->ks_interlock, MUTEX_DEFAULT, IPL_NONE);
    345 	cv_init(&ret->ks_cv, "psem");
    346 
    347 	mutex_enter(&ksem_mutex);
    348 	if (nsems >= sem_max) {
    349 		mutex_exit(&ksem_mutex);
    350 		if (ret->ks_name != NULL)
    351 			kmem_free(ret->ks_name, ret->ks_namelen);
    352 		kmem_free(ret, sizeof(*ret));
    353 		return (ENFILE);
    354 	}
    355 	nsems++;
    356 	while (ksem_lookup_byid(ksem_counter) != NULL) {
    357 		ksem_counter++;
    358 		/* 0 is a special value for libpthread */
    359 		if (ksem_counter == 0)
    360 			ksem_counter++;
    361 	}
    362 	ret->ks_id = ksem_counter;
    363 	LIST_INSERT_HEAD(&ksem_hash[SEM_HASH(ret->ks_id)], ret, ks_hash);
    364 	mutex_exit(&ksem_mutex);
    365 
    366 	*ksret = ret;
    367 	return (0);
    368 }
    369 
    370 int
    371 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap, register_t *retval)
    372 {
    373 	/* {
    374 		unsigned int value;
    375 		semid_t *idp;
    376 	} */
    377 
    378 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp), copyout);
    379 }
    380 
    381 int
    382 do_ksem_init(struct lwp *l, unsigned int value, semid_t *idp,
    383     copyout_t docopyout)
    384 {
    385 	struct ksem *ks;
    386 	semid_t id;
    387 	int error;
    388 
    389 	/* Note the mode does not matter for anonymous semaphores. */
    390 	error = ksem_create(l, NULL, &ks, 0, value);
    391 	if (error)
    392 		return (error);
    393 	id = SEM_TO_ID(ks);
    394 	error = (*docopyout)(&id, idp, sizeof(id));
    395 	if (error) {
    396 		mutex_enter(&ks->ks_interlock);
    397 		ksem_delref(ks);
    398 		return (error);
    399 	}
    400 
    401 	ksem_add_proc(l->l_proc, ks);
    402 
    403 	return (0);
    404 }
    405 
    406 int
    407 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap, register_t *retval)
    408 {
    409 	/* {
    410 		const char *name;
    411 		int oflag;
    412 		mode_t mode;
    413 		unsigned int value;
    414 		semid_t *idp;
    415 	} */
    416 
    417 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
    418 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
    419 }
    420 
    421 int
    422 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
    423      unsigned int value, semid_t *idp, copyout_t docopyout)
    424 {
    425 	char name[SEM_MAX_NAMELEN + 1];
    426 	size_t done;
    427 	int error;
    428 	struct ksem *ksnew, *ks;
    429 	semid_t id;
    430 
    431 	error = copyinstr(semname, name, sizeof(name), &done);
    432 	if (error)
    433 		return (error);
    434 
    435 	ksnew = NULL;
    436 	mutex_enter(&ksem_mutex);
    437 	ks = ksem_lookup_byname(name);
    438 
    439 	/* Found one? */
    440 	if (ks != NULL) {
    441 		/* Check for exclusive create. */
    442 		if (oflag & O_EXCL) {
    443 			mutex_exit(&ks->ks_interlock);
    444 			mutex_exit(&ksem_mutex);
    445 			return (EEXIST);
    446 		}
    447  found_one:
    448 		/*
    449 		 * Verify permissions.  If we can access it, add
    450 		 * this process's reference.
    451 		 */
    452 		KASSERT(mutex_owned(&ks->ks_interlock));
    453 		error = ksem_perm(l, ks);
    454 		if (error == 0)
    455 			ksem_addref(ks);
    456 		mutex_exit(&ks->ks_interlock);
    457 		mutex_exit(&ksem_mutex);
    458 		if (error)
    459 			return (error);
    460 
    461 		id = SEM_TO_ID(ks);
    462 		error = (*docopyout)(&id, idp, sizeof(id));
    463 		if (error) {
    464 			mutex_enter(&ks->ks_interlock);
    465 			ksem_delref(ks);
    466 			return (error);
    467 		}
    468 
    469 		ksem_add_proc(l->l_proc, ks);
    470 
    471 		return (0);
    472 	}
    473 
    474 	/*
    475 	 * didn't ask for creation? error.
    476 	 */
    477 	if ((oflag & O_CREAT) == 0) {
    478 		mutex_exit(&ksem_mutex);
    479 		return (ENOENT);
    480 	}
    481 
    482 	/*
    483 	 * We may block during creation, so drop the lock.
    484 	 */
    485 	mutex_exit(&ksem_mutex);
    486 	error = ksem_create(l, name, &ksnew, mode, value);
    487 	if (error != 0)
    488 		return (error);
    489 
    490 	id = SEM_TO_ID(ksnew);
    491 	error = (*docopyout)(&id, idp, sizeof(id));
    492 	if (error) {
    493 		kmem_free(ksnew->ks_name, ksnew->ks_namelen);
    494 		ksnew->ks_name = NULL;
    495 
    496 		mutex_enter(&ksnew->ks_interlock);
    497 		ksem_delref(ksnew);
    498 		return (error);
    499 	}
    500 
    501 	/*
    502 	 * We need to make sure we haven't lost a race while
    503 	 * allocating during creation.
    504 	 */
    505 	mutex_enter(&ksem_mutex);
    506 	if ((ks = ksem_lookup_byname(name)) != NULL) {
    507 		if (oflag & O_EXCL) {
    508 			mutex_exit(&ks->ks_interlock);
    509 			mutex_exit(&ksem_mutex);
    510 
    511 			kmem_free(ksnew->ks_name, ksnew->ks_namelen);
    512 			ksnew->ks_name = NULL;
    513 
    514 			mutex_enter(&ksnew->ks_interlock);
    515 			ksem_delref(ksnew);
    516 			return (EEXIST);
    517 		}
    518 		goto found_one;
    519 	} else {
    520 		/* ksnew already has its initial reference. */
    521 		LIST_INSERT_HEAD(&ksem_head, ksnew, ks_entry);
    522 		mutex_exit(&ksem_mutex);
    523 
    524 		ksem_add_proc(l->l_proc, ksnew);
    525 	}
    526 	return (error);
    527 }
    528 
    529 /* We must have a read lock on the ksem_proc list! */
    530 static struct ksem *
    531 ksem_lookup_proc(struct ksem_proc *kp, semid_t id)
    532 {
    533 	struct ksem_ref *ksr;
    534 
    535 	LIST_FOREACH(ksr, &kp->kp_ksems, ksr_list) {
    536 		if (id == SEM_TO_ID(ksr->ksr_ksem)) {
    537 			mutex_enter(&ksr->ksr_ksem->ks_interlock);
    538 			return (ksr->ksr_ksem);
    539 		}
    540 	}
    541 
    542 	return (NULL);
    543 }
    544 
    545 int
    546 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap, register_t *retval)
    547 {
    548 	/* {
    549 		const char *name;
    550 	} */
    551 	char name[SEM_MAX_NAMELEN + 1], *cp;
    552 	size_t done, len;
    553 	struct ksem *ks;
    554 	int error;
    555 
    556 	error = copyinstr(SCARG(uap, name), name, sizeof(name), &done);
    557 	if (error)
    558 		return error;
    559 
    560 	mutex_enter(&ksem_mutex);
    561 	ks = ksem_lookup_byname(name);
    562 	if (ks == NULL) {
    563 		mutex_exit(&ksem_mutex);
    564 		return (ENOENT);
    565 	}
    566 
    567 	KASSERT(mutex_owned(&ks->ks_interlock));
    568 
    569 	LIST_REMOVE(ks, ks_entry);
    570 	cp = ks->ks_name;
    571 	len = ks->ks_namelen;
    572 	ks->ks_name = NULL;
    573 
    574 	mutex_exit(&ksem_mutex);
    575 
    576 	if (ks->ks_ref == 0)
    577 		ksem_free(ks);
    578 	else
    579 		mutex_exit(&ks->ks_interlock);
    580 
    581 	kmem_free(cp, len);
    582 
    583 	return (0);
    584 }
    585 
    586 int
    587 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap, register_t *retval)
    588 {
    589 	/* {
    590 		semid_t id;
    591 	} */
    592 	struct ksem_proc *kp;
    593 	struct ksem_ref *ksr;
    594 	struct ksem *ks;
    595 
    596 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    597 	if (kp == NULL)
    598 		return (EINVAL);
    599 
    600 	rw_enter(&kp->kp_lock, RW_WRITER);
    601 
    602 	ks = ksem_lookup_proc(kp, SCARG(uap, id));
    603 	if (ks == NULL) {
    604 		rw_exit(&kp->kp_lock);
    605 		return (EINVAL);
    606 	}
    607 
    608 	KASSERT(mutex_owned(&ks->ks_interlock));
    609 	if (ks->ks_name == NULL) {
    610 		mutex_exit(&ks->ks_interlock);
    611 		rw_exit(&kp->kp_lock);
    612 		return (EINVAL);
    613 	}
    614 
    615 	ksr = ksem_drop_proc(kp, ks);
    616 	rw_exit(&kp->kp_lock);
    617 	kmem_free(ksr, sizeof(*ksr));
    618 
    619 	return (0);
    620 }
    621 
    622 int
    623 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap, register_t *retval)
    624 {
    625 	/* {
    626 		semid_t id;
    627 	} */
    628 	struct ksem_proc *kp;
    629 	struct ksem *ks;
    630 	int error;
    631 
    632 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    633 	if (kp == NULL)
    634 		return (EINVAL);
    635 
    636 	rw_enter(&kp->kp_lock, RW_READER);
    637 	ks = ksem_lookup_proc(kp, SCARG(uap, id));
    638 	rw_exit(&kp->kp_lock);
    639 	if (ks == NULL)
    640 		return (EINVAL);
    641 
    642 	KASSERT(mutex_owned(&ks->ks_interlock));
    643 	if (ks->ks_value == SEM_VALUE_MAX) {
    644 		error = EOVERFLOW;
    645 		goto out;
    646 	}
    647 	++ks->ks_value;
    648 	if (ks->ks_waiters)
    649 		cv_broadcast(&ks->ks_cv);
    650 	error = 0;
    651  out:
    652 	mutex_exit(&ks->ks_interlock);
    653 	return (error);
    654 }
    655 
    656 static int
    657 ksem_wait(struct lwp *l, semid_t id, int tryflag)
    658 {
    659 	struct ksem_proc *kp;
    660 	struct ksem *ks;
    661 	int error;
    662 
    663 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    664 	if (kp == NULL)
    665 		return (EINVAL);
    666 
    667 	rw_enter(&kp->kp_lock, RW_READER);
    668 	ks = ksem_lookup_proc(kp, id);
    669 	rw_exit(&kp->kp_lock);
    670 	if (ks == NULL)
    671 		return (EINVAL);
    672 
    673 	KASSERT(mutex_owned(&ks->ks_interlock));
    674 	ksem_addref(ks);
    675 	while (ks->ks_value == 0) {
    676 		ks->ks_waiters++;
    677 		if (tryflag)
    678 			error = EAGAIN;
    679 		else
    680 			error = cv_wait_sig(&ks->ks_cv, &ks->ks_interlock);
    681 		ks->ks_waiters--;
    682 		if (error)
    683 			goto out;
    684 	}
    685 	ks->ks_value--;
    686 	error = 0;
    687  out:
    688 	ksem_delref(ks);
    689 	return (error);
    690 }
    691 
    692 int
    693 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap, register_t *retval)
    694 {
    695 	/* {
    696 		semid_t id;
    697 	} */
    698 
    699 	return ksem_wait(l, SCARG(uap, id), 0);
    700 }
    701 
    702 int
    703 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap, register_t *retval)
    704 {
    705 	/* {
    706 		semid_t id;
    707 	} */
    708 
    709 	return ksem_wait(l, SCARG(uap, id), 1);
    710 }
    711 
    712 int
    713 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap, register_t *retval)
    714 {
    715 	/* {
    716 		semid_t id;
    717 		unsigned int *value;
    718 	} */
    719 	struct ksem_proc *kp;
    720 	struct ksem *ks;
    721 	unsigned int val;
    722 
    723 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    724 	if (kp == NULL)
    725 		return (EINVAL);
    726 
    727 	rw_enter(&kp->kp_lock, RW_READER);
    728 	ks = ksem_lookup_proc(kp, SCARG(uap, id));
    729 	rw_exit(&kp->kp_lock);
    730 	if (ks == NULL)
    731 		return (EINVAL);
    732 
    733 	KASSERT(mutex_owned(&ks->ks_interlock));
    734 	val = ks->ks_value;
    735 	mutex_exit(&ks->ks_interlock);
    736 
    737 	return (copyout(&val, SCARG(uap, value), sizeof(val)));
    738 }
    739 
    740 int
    741 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap, register_t *retval)
    742 {
    743 	/* {
    744 		semid_t id;
    745 	} */
    746 	struct ksem_proc *kp;
    747 	struct ksem_ref *ksr;
    748 	struct ksem *ks;
    749 
    750 	kp = proc_getspecific(l->l_proc, ksem_specificdata_key);
    751 	if (kp == NULL)
    752 		return (EINVAL);
    753 
    754 	rw_enter(&kp->kp_lock, RW_WRITER);
    755 
    756 	ks = ksem_lookup_proc(kp, SCARG(uap, id));
    757 	if (ks == NULL) {
    758 		rw_exit(&kp->kp_lock);
    759 		return (EINVAL);
    760 	}
    761 
    762 	KASSERT(mutex_owned(&ks->ks_interlock));
    763 
    764 	/*
    765 	 * XXX This misses named semaphores which have been unlink'd,
    766 	 * XXX but since behavior of destroying a named semaphore is
    767 	 * XXX undefined, this is technically allowed.
    768 	 */
    769 	if (ks->ks_name != NULL) {
    770 		mutex_exit(&ks->ks_interlock);
    771 		rw_exit(&kp->kp_lock);
    772 		return (EINVAL);
    773 	}
    774 
    775 	if (ks->ks_waiters) {
    776 		mutex_exit(&ks->ks_interlock);
    777 		rw_exit(&kp->kp_lock);
    778 		return (EBUSY);
    779 	}
    780 
    781 	ksr = ksem_drop_proc(kp, ks);
    782 	rw_exit(&kp->kp_lock);
    783 	kmem_free(ksr, sizeof(*ksr));
    784 
    785 	return (0);
    786 }
    787 
    788 static void
    789 ksem_forkhook(struct proc *p2, struct proc *p1)
    790 {
    791 	struct ksem_proc *kp1, *kp2;
    792 	struct ksem_ref *ksr, *ksr1;
    793 
    794 	kp1 = proc_getspecific(p1, ksem_specificdata_key);
    795 	if (kp1 == NULL)
    796 		return;
    797 
    798 	kp2 = ksem_proc_alloc();
    799 
    800 	rw_enter(&kp1->kp_lock, RW_READER);
    801 
    802 	if (!LIST_EMPTY(&kp1->kp_ksems)) {
    803 		LIST_FOREACH(ksr, &kp1->kp_ksems, ksr_list) {
    804 			ksr1 = kmem_alloc(sizeof(*ksr), KM_SLEEP);
    805 			ksr1->ksr_ksem = ksr->ksr_ksem;
    806 			mutex_enter(&ksr->ksr_ksem->ks_interlock);
    807 			ksem_addref(ksr->ksr_ksem);
    808 			mutex_exit(&ksr->ksr_ksem->ks_interlock);
    809 			LIST_INSERT_HEAD(&kp2->kp_ksems, ksr1, ksr_list);
    810 		}
    811 	}
    812 
    813 	rw_exit(&kp1->kp_lock);
    814 	proc_setspecific(p2, ksem_specificdata_key, kp2);
    815 }
    816 
    817 static void
    818 ksem_exechook(struct proc *p, void *arg)
    819 {
    820 	struct ksem_proc *kp;
    821 
    822 	kp = proc_getspecific(p, ksem_specificdata_key);
    823 	if (kp != NULL) {
    824 		proc_setspecific(p, ksem_specificdata_key, NULL);
    825 		ksem_proc_dtor(kp);
    826 	}
    827 }
    828 
    829 void
    830 ksem_init(void)
    831 {
    832 	int i, error;
    833 
    834 	mutex_init(&ksem_mutex, MUTEX_DEFAULT, IPL_NONE);
    835 	exechook_establish(ksem_exechook, NULL);
    836 	forkhook_establish(ksem_forkhook);
    837 
    838 	for (i = 0; i < SEM_HASHTBL_SIZE; i++)
    839 		LIST_INIT(&ksem_hash[i]);
    840 
    841 	error = proc_specific_key_create(&ksem_specificdata_key,
    842 					 ksem_proc_dtor);
    843 	KASSERT(error == 0);
    844 }
    845 
    846 /*
    847  * Sysctl initialization and nodes.
    848  */
    849 
    850 SYSCTL_SETUP(sysctl_posix_sem_setup, "sysctl kern.posix subtree setup")
    851 {
    852 	const struct sysctlnode *node = NULL;
    853 
    854 	sysctl_createv(clog, 0, NULL, NULL,
    855 		CTLFLAG_PERMANENT,
    856 		CTLTYPE_NODE, "kern", NULL,
    857 		NULL, 0, NULL, 0,
    858 		CTL_KERN, CTL_EOL);
    859 	sysctl_createv(clog, 0, NULL, &node,
    860 		CTLFLAG_PERMANENT,
    861 		CTLTYPE_NODE, "posix",
    862 		SYSCTL_DESCR("POSIX options"),
    863 		NULL, 0, NULL, 0,
    864 		CTL_KERN, CTL_CREATE, CTL_EOL);
    865 
    866 	if (node == NULL)
    867 		return;
    868 
    869 	sysctl_createv(clog, 0, &node, NULL,
    870 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    871 		CTLTYPE_INT, "semmax",
    872 		SYSCTL_DESCR("Maximal number of semaphores"),
    873 		NULL, 0, &sem_max, 0,
    874 		CTL_CREATE, CTL_EOL);
    875 }
    876