Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: uipc_sem.c,v 1.62 2024/12/06 18:44:00 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 2002 Alfred Perlstein <alfred (at) FreeBSD.org>
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55  * SUCH DAMAGE.
     56  */
     57 
     58 /*
     59  * Implementation of POSIX semaphore.
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.62 2024/12/06 18:44:00 riastradh Exp $");
     64 
     65 #include <sys/param.h>
     66 #include <sys/types.h>
     67 
     68 #include <sys/atomic.h>
     69 #include <sys/cprng.h>
     70 #include <sys/fcntl.h>
     71 #include <sys/file.h>
     72 #include <sys/filedesc.h>
     73 #include <sys/kauth.h>
     74 #include <sys/kernel.h>
     75 #include <sys/kmem.h>
     76 #include <sys/ksem.h>
     77 #include <sys/lwp.h>
     78 #include <sys/module.h>
     79 #include <sys/mount.h>
     80 #include <sys/mutex.h>
     81 #include <sys/proc.h>
     82 #include <sys/rwlock.h>
     83 #include <sys/sdt.h>
     84 #include <sys/semaphore.h>
     85 #include <sys/stat.h>
     86 #include <sys/syscall.h>
     87 #include <sys/syscallargs.h>
     88 #include <sys/syscallvar.h>
     89 #include <sys/sysctl.h>
     90 #include <sys/uidinfo.h>
     91 
     92 MODULE(MODULE_CLASS_MISC, ksem, NULL);
     93 
     94 #define	SEM_MAX_NAMELEN		NAME_MAX
     95 
     96 #define	KS_UNLINKED		0x01
     97 
     98 static kmutex_t		ksem_lock	__cacheline_aligned;
     99 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
    100 static u_int		nsems_total	__cacheline_aligned;
    101 static u_int		nsems		__cacheline_aligned;
    102 
    103 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
    104 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
    105 static u_long		ksem_pshared_hashmask __read_mostly;
    106 
    107 #define	KSEM_PSHARED_HASHSIZE	32
    108 
    109 static kauth_listener_t	ksem_listener;
    110 
    111 static int		ksem_sysinit(void);
    112 static int		ksem_sysfini(bool);
    113 static int		ksem_modcmd(modcmd_t, void *);
    114 static void		ksem_release(ksem_t *, int);
    115 static int		ksem_close_fop(file_t *);
    116 static int		ksem_stat_fop(file_t *, struct stat *);
    117 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
    118     kauth_cred_t, int);
    119 
    120 static const struct fileops semops = {
    121 	.fo_name = "sem",
    122 	.fo_read = ksem_read_fop,
    123 	.fo_write = fbadop_write,
    124 	.fo_ioctl = fbadop_ioctl,
    125 	.fo_fcntl = fnullop_fcntl,
    126 	.fo_poll = fnullop_poll,
    127 	.fo_stat = ksem_stat_fop,
    128 	.fo_close = ksem_close_fop,
    129 	.fo_kqfilter = fnullop_kqfilter,
    130 	.fo_restart = fnullop_restart,
    131 };
    132 
    133 static const struct syscall_package ksem_syscalls[] = {
    134 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
    135 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
    136 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
    137 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
    138 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
    139 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
    140 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
    141 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
    142 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
    143 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
    144 	{ 0, 0, NULL },
    145 };
    146 
    147 struct sysctllog *ksem_clog;
    148 int ksem_max = KSEM_MAX;
    149 
    150 static int
    151 name_copyin(const char *uname, char **name)
    152 {
    153 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
    154 
    155 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
    156 	if (error)
    157 		kmem_free(*name, SEM_MAX_NAMELEN);
    158 
    159 	return error;
    160 }
    161 
    162 static void
    163 name_destroy(char **name)
    164 {
    165 	if (!*name)
    166 		return;
    167 
    168 	kmem_free(*name, SEM_MAX_NAMELEN);
    169 	*name = NULL;
    170 }
    171 
    172 static int
    173 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    174     void *arg0, void *arg1, void *arg2, void *arg3)
    175 {
    176 	ksem_t *ks;
    177 	mode_t mode;
    178 
    179 	if (action != KAUTH_SYSTEM_SEMAPHORE)
    180 		return KAUTH_RESULT_DEFER;
    181 
    182 	ks = arg1;
    183 	mode = ks->ks_mode;
    184 
    185 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
    186 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
    187 	    (mode & S_IWOTH) != 0)
    188 		return KAUTH_RESULT_ALLOW;
    189 
    190 	return KAUTH_RESULT_DEFER;
    191 }
    192 
    193 static int
    194 ksem_sysinit(void)
    195 {
    196 	int error;
    197 	const struct sysctlnode *rnode;
    198 
    199 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
    200 	LIST_INIT(&ksem_head);
    201 	nsems_total = 0;
    202 	nsems = 0;
    203 
    204 	rw_init(&ksem_pshared_lock);
    205 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
    206 	    true, &ksem_pshared_hashmask);
    207 	KASSERT(ksem_pshared_hashtab != NULL);
    208 
    209 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
    210 	    ksem_listener_cb, NULL);
    211 
    212 	/* Define module-specific sysctl tree */
    213 
    214 	ksem_clog = NULL;
    215 
    216 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
    217 			CTLFLAG_PERMANENT,
    218 			CTLTYPE_NODE, "posix",
    219 			SYSCTL_DESCR("POSIX options"),
    220 			NULL, 0, NULL, 0,
    221 			CTL_KERN, CTL_CREATE, CTL_EOL);
    222 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
    223 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    224 			CTLTYPE_INT, "semmax",
    225 			SYSCTL_DESCR("Maximal number of semaphores"),
    226 			NULL, 0, &ksem_max, 0,
    227 			CTL_CREATE, CTL_EOL);
    228 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
    229 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
    230 			CTLTYPE_INT, "semcnt",
    231 			SYSCTL_DESCR("Current number of semaphores"),
    232 			NULL, 0, &nsems, 0,
    233 			CTL_CREATE, CTL_EOL);
    234 
    235 	error = syscall_establish(NULL, ksem_syscalls);
    236 	if (error) {
    237 		(void)ksem_sysfini(false);
    238 	}
    239 
    240 	return error;
    241 }
    242 
    243 static int
    244 ksem_sysfini(bool interface)
    245 {
    246 	int error;
    247 
    248 	if (interface) {
    249 		error = syscall_disestablish(NULL, ksem_syscalls);
    250 		if (error != 0) {
    251 			return error;
    252 		}
    253 		/*
    254 		 * Make sure that no semaphores are in use.  Note: semops
    255 		 * must be unused at this point.
    256 		 */
    257 		if (nsems_total) {
    258 			error = syscall_establish(NULL, ksem_syscalls);
    259 			KASSERT(error == 0);
    260 			return SET_ERROR(EBUSY);
    261 		}
    262 	}
    263 	kauth_unlisten_scope(ksem_listener);
    264 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
    265 	rw_destroy(&ksem_pshared_lock);
    266 	mutex_destroy(&ksem_lock);
    267 	sysctl_teardown(&ksem_clog);
    268 	return 0;
    269 }
    270 
    271 static int
    272 ksem_modcmd(modcmd_t cmd, void *arg)
    273 {
    274 
    275 	switch (cmd) {
    276 	case MODULE_CMD_INIT:
    277 		return ksem_sysinit();
    278 
    279 	case MODULE_CMD_FINI:
    280 		return ksem_sysfini(true);
    281 
    282 	default:
    283 		return SET_ERROR(ENOTTY);
    284 	}
    285 }
    286 
    287 static ksem_t *
    288 ksem_lookup(const char *name)
    289 {
    290 	ksem_t *ks;
    291 
    292 	KASSERT(mutex_owned(&ksem_lock));
    293 
    294 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
    295 		if (strcmp(ks->ks_name, name) == 0) {
    296 			mutex_enter(&ks->ks_lock);
    297 			return ks;
    298 		}
    299 	}
    300 	return NULL;
    301 }
    302 
    303 static int
    304 ksem_perm(lwp_t *l, ksem_t *ks)
    305 {
    306 	kauth_cred_t uc = l->l_cred;
    307 
    308 	KASSERT(mutex_owned(&ks->ks_lock));
    309 
    310 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
    311 		return SET_ERROR(EACCES);
    312 
    313 	return 0;
    314 }
    315 
    316 /*
    317  * Bits 1..23 are random, just pluck a few of those and assume the
    318  * distribution is going to be pretty good.
    319  */
    320 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
    321 
    322 static void
    323 ksem_remove_pshared(ksem_t *ksem)
    324 {
    325 	rw_enter(&ksem_pshared_lock, RW_WRITER);
    326 	LIST_REMOVE(ksem, ks_entry);
    327 	rw_exit(&ksem_pshared_lock);
    328 }
    329 
    330 static ksem_t *
    331 ksem_lookup_pshared_locked(intptr_t id)
    332 {
    333 	u_long bucket = KSEM_PSHARED_HASH(id);
    334 	ksem_t *ksem = NULL;
    335 
    336 	/* ksem_t is locked and referenced upon return. */
    337 
    338 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
    339 		if (ksem->ks_pshared_id == id) {
    340 			mutex_enter(&ksem->ks_lock);
    341 			if (ksem->ks_pshared_proc == NULL) {
    342 				/*
    343 				 * This entry is dead, and in the process
    344 				 * of being torn down; skip it.
    345 				 */
    346 				mutex_exit(&ksem->ks_lock);
    347 				continue;
    348 			}
    349 			ksem->ks_ref++;
    350 			KASSERT(ksem->ks_ref != 0);
    351 			return ksem;
    352 		}
    353 	}
    354 
    355 	return NULL;
    356 }
    357 
    358 static ksem_t *
    359 ksem_lookup_pshared(intptr_t id)
    360 {
    361 	rw_enter(&ksem_pshared_lock, RW_READER);
    362 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
    363 	rw_exit(&ksem_pshared_lock);
    364 	return ksem;
    365 }
    366 
    367 static void
    368 ksem_alloc_pshared_id(ksem_t *ksem)
    369 {
    370 	ksem_t *ksem0;
    371 	uint32_t try;
    372 
    373 	KASSERT(ksem->ks_pshared_proc != NULL);
    374 
    375 	rw_enter(&ksem_pshared_lock, RW_WRITER);
    376 	for (;;) {
    377 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
    378 		    KSEM_PSHARED_MARKER;
    379 
    380 		if ((ksem0 = ksem_lookup_pshared_locked(try)) == NULL) {
    381 			/* Got it! */
    382 			break;
    383 		}
    384 		ksem_release(ksem0, -1);
    385 	}
    386 	ksem->ks_pshared_id = try;
    387 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
    388 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
    389 	rw_exit(&ksem_pshared_lock);
    390 }
    391 
    392 /*
    393  * ksem_get: get the semaphore from the descriptor.
    394  *
    395  * => locks the semaphore, if found, and holds an extra reference.
    396  * => holds a reference on the file descriptor.
    397  */
    398 static int
    399 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
    400 {
    401 	ksem_t *ks;
    402 	int fd;
    403 
    404 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
    405 		/*
    406 		 * ksem_lookup_pshared() returns the ksem_t *
    407 		 * locked and referenced.
    408 		 */
    409 		ks = ksem_lookup_pshared(id);
    410 		if (ks == NULL)
    411 			return SET_ERROR(EINVAL);
    412 		KASSERT(ks->ks_pshared_id == id);
    413 		KASSERT(ks->ks_pshared_proc != NULL);
    414 		fd = -1;
    415 	} else if (id <= INT_MAX) {
    416 		fd = (int)id;
    417 		file_t *fp = fd_getfile(fd);
    418 
    419 		if (__predict_false(fp == NULL))
    420 			return SET_ERROR(EINVAL);
    421 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
    422 			fd_putfile(fd);
    423 			return SET_ERROR(EINVAL);
    424 		}
    425 		ks = fp->f_ksem;
    426 		mutex_enter(&ks->ks_lock);
    427 		ks->ks_ref++;
    428 	} else {
    429 		return SET_ERROR(EINVAL);
    430 	}
    431 
    432 	*ksret = ks;
    433 	*fdp = fd;
    434 	return 0;
    435 }
    436 
    437 /*
    438  * ksem_create: allocate and setup a new semaphore structure.
    439  */
    440 static int
    441 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
    442 {
    443 	ksem_t *ks;
    444 	kauth_cred_t uc;
    445 	char *kname;
    446 	size_t len;
    447 
    448 	/* Pre-check for the limit. */
    449 	if (nsems >= ksem_max) {
    450 		return SET_ERROR(ENFILE);
    451 	}
    452 
    453 	if (val > SEM_VALUE_MAX) {
    454 		return SET_ERROR(EINVAL);
    455 	}
    456 
    457 	if (name != NULL) {
    458 		len = strlen(name);
    459 		if (len > SEM_MAX_NAMELEN) {
    460 			return SET_ERROR(ENAMETOOLONG);
    461 		}
    462 		/* Name must start with a '/' but not contain one. */
    463 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
    464 			return SET_ERROR(EINVAL);
    465 		}
    466 		kname = kmem_alloc(++len, KM_SLEEP);
    467 		strlcpy(kname, name, len);
    468 	} else {
    469 		kname = NULL;
    470 		len = 0;
    471 	}
    472 
    473 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
    474 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
    475 	cv_init(&ks->ks_cv, "psem");
    476 	ks->ks_name = kname;
    477 	ks->ks_namelen = len;
    478 	ks->ks_mode = mode;
    479 	ks->ks_value = val;
    480 	ks->ks_ref = 1;
    481 
    482 	uc = l->l_cred;
    483 	ks->ks_uid = kauth_cred_geteuid(uc);
    484 	ks->ks_gid = kauth_cred_getegid(uc);
    485 	chgsemcnt(ks->ks_uid, 1);
    486 	atomic_inc_uint(&nsems_total);
    487 
    488 	*ksret = ks;
    489 	return 0;
    490 }
    491 
    492 static void
    493 ksem_free(ksem_t *ks)
    494 {
    495 
    496 	KASSERT(!cv_has_waiters(&ks->ks_cv));
    497 
    498 	chgsemcnt(ks->ks_uid, -1);
    499 	atomic_dec_uint(&nsems_total);
    500 
    501 	if (ks->ks_pshared_id) {
    502 		KASSERT(ks->ks_pshared_proc == NULL);
    503 		ksem_remove_pshared(ks);
    504 	}
    505 	if (ks->ks_name) {
    506 		KASSERT(ks->ks_namelen > 0);
    507 		kmem_free(ks->ks_name, ks->ks_namelen);
    508 	}
    509 	mutex_destroy(&ks->ks_lock);
    510 	cv_destroy(&ks->ks_cv);
    511 	kmem_free(ks, sizeof(ksem_t));
    512 }
    513 
    514 #define	KSEM_ID_IS_PSHARED(id)		\
    515 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
    516 
    517 static void
    518 ksem_release(ksem_t *ksem, int fd)
    519 {
    520 	bool destroy = false;
    521 
    522 	KASSERT(mutex_owned(&ksem->ks_lock));
    523 
    524 	KASSERT(ksem->ks_ref > 0);
    525 	if (--ksem->ks_ref == 0) {
    526 		/*
    527 		 * Destroy if the last reference and semaphore is unnamed,
    528 		 * or unlinked (for named semaphore).
    529 		 */
    530 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
    531 		    (ksem->ks_name == NULL);
    532 	}
    533 	mutex_exit(&ksem->ks_lock);
    534 
    535 	if (destroy) {
    536 		ksem_free(ksem);
    537 	}
    538 	if (fd != -1) {
    539 		fd_putfile(fd);
    540 	}
    541 }
    542 
    543 int
    544 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
    545     register_t *retval)
    546 {
    547 	/* {
    548 		unsigned int value;
    549 		intptr_t *idp;
    550 	} */
    551 
    552 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
    553 	    copyin, copyout);
    554 }
    555 
    556 int
    557 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
    558     copyout_t docopyout)
    559 {
    560 	proc_t *p = l->l_proc;
    561 	ksem_t *ks;
    562 	file_t *fp;
    563 	intptr_t id, arg;
    564 	int fd, error;
    565 
    566 	/*
    567 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
    568 	 * indicate that a pshared semaphore is wanted.  In that case we
    569 	 * allocate globally unique ID and return that, rather than the
    570 	 * process-scoped file descriptor ID.
    571 	 */
    572 	error = (*docopyin)(idp, &arg, sizeof(*idp));
    573 	if (error) {
    574 		return error;
    575 	}
    576 
    577 	error = fd_allocfile(&fp, &fd);
    578 	if (error) {
    579 		return error;
    580 	}
    581 	fp->f_type = DTYPE_SEM;
    582 	fp->f_flag = FREAD | FWRITE;
    583 	fp->f_ops = &semops;
    584 
    585 	if (fd >= KSEM_MARKER_MIN) {
    586 		/*
    587 		 * This is super-unlikely, but we check for it anyway
    588 		 * because potential collisions with the pshared marker
    589 		 * would be bad.
    590 		 */
    591 		fd_abort(p, fp, fd);
    592 		return SET_ERROR(EMFILE);
    593 	}
    594 
    595 	/* Note the mode does not matter for anonymous semaphores. */
    596 	error = ksem_create(l, NULL, &ks, 0, val);
    597 	if (error) {
    598 		fd_abort(p, fp, fd);
    599 		return error;
    600 	}
    601 
    602 	if (arg == KSEM_PSHARED) {
    603 		ks->ks_pshared_proc = curproc;
    604 		ks->ks_pshared_fd = fd;
    605 		ksem_alloc_pshared_id(ks);
    606 		id = ks->ks_pshared_id;
    607 	} else {
    608 		id = (intptr_t)fd;
    609 	}
    610 
    611 	error = (*docopyout)(&id, idp, sizeof(*idp));
    612 	if (error) {
    613 		ksem_free(ks);
    614 		fd_abort(p, fp, fd);
    615 		return error;
    616 	}
    617 
    618 	fp->f_ksem = ks;
    619 	fd_affix(p, fp, fd);
    620 	return error;
    621 }
    622 
    623 int
    624 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
    625     register_t *retval)
    626 {
    627 	/* {
    628 		const char *name;
    629 		int oflag;
    630 		mode_t mode;
    631 		unsigned int value;
    632 		intptr_t *idp;
    633 	} */
    634 
    635 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
    636 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
    637 }
    638 
    639 int
    640 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
    641      unsigned int value, intptr_t *idp, copyout_t docopyout)
    642 {
    643 	char *name;
    644 	proc_t *p = l->l_proc;
    645 	ksem_t *ksnew = NULL, *ks;
    646 	file_t *fp;
    647 	intptr_t id;
    648 	int fd, error;
    649 
    650 	error = name_copyin(semname, &name);
    651 	if (error) {
    652 		return error;
    653 	}
    654 	error = fd_allocfile(&fp, &fd);
    655 	if (error) {
    656 		name_destroy(&name);
    657 		return error;
    658 	}
    659 	fp->f_type = DTYPE_SEM;
    660 	fp->f_flag = FREAD | FWRITE;
    661 	fp->f_ops = &semops;
    662 
    663 	if (fd >= KSEM_MARKER_MIN) {
    664 		/*
    665 		 * This is super-unlikely, but we check for it anyway
    666 		 * because potential collisions with the pshared marker
    667 		 * would be bad.
    668 		 */
    669 		fd_abort(p, fp, fd);
    670 		return SET_ERROR(EMFILE);
    671 	}
    672 
    673 	/*
    674 	 * The ID (file descriptor number) can be stored early.
    675 	 * Note that zero is a special value for libpthread.
    676 	 */
    677 	id = (intptr_t)fd;
    678 	error = (*docopyout)(&id, idp, sizeof(*idp));
    679 	if (error) {
    680 		goto err;
    681 	}
    682 
    683 	if (oflag & O_CREAT) {
    684 		/* Create a new semaphore. */
    685 		error = ksem_create(l, name, &ksnew, mode, value);
    686 		if (error) {
    687 			goto err;
    688 		}
    689 		KASSERT(ksnew != NULL);
    690 	}
    691 
    692 	/* Lookup for a semaphore with such name. */
    693 	mutex_enter(&ksem_lock);
    694 	ks = ksem_lookup(name);
    695 	name_destroy(&name);
    696 	if (ks) {
    697 		KASSERT(mutex_owned(&ks->ks_lock));
    698 		mutex_exit(&ksem_lock);
    699 
    700 		/* Check for exclusive create. */
    701 		if (oflag & O_EXCL) {
    702 			mutex_exit(&ks->ks_lock);
    703 			error = SET_ERROR(EEXIST);
    704 			goto err;
    705 		}
    706 		/*
    707 		 * Verify permissions.  If we can access it,
    708 		 * add the reference of this thread.
    709 		 */
    710 		error = ksem_perm(l, ks);
    711 		if (error == 0) {
    712 			ks->ks_ref++;
    713 		}
    714 		mutex_exit(&ks->ks_lock);
    715 		if (error) {
    716 			goto err;
    717 		}
    718 	} else {
    719 		/* Fail if not found and not creating. */
    720 		if ((oflag & O_CREAT) == 0) {
    721 			mutex_exit(&ksem_lock);
    722 			KASSERT(ksnew == NULL);
    723 			error = SET_ERROR(ENOENT);
    724 			goto err;
    725 		}
    726 
    727 		/* Check for the limit locked. */
    728 		if (nsems >= ksem_max) {
    729 			mutex_exit(&ksem_lock);
    730 			error = SET_ERROR(ENFILE);
    731 			goto err;
    732 		}
    733 
    734 		/*
    735 		 * Finally, insert semaphore into the list.
    736 		 * Note: it already has the initial reference.
    737 		 */
    738 		ks = ksnew;
    739 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
    740 		nsems++;
    741 		mutex_exit(&ksem_lock);
    742 
    743 		ksnew = NULL;
    744 	}
    745 	KASSERT(ks != NULL);
    746 	fp->f_ksem = ks;
    747 	fd_affix(p, fp, fd);
    748 err:
    749 	name_destroy(&name);
    750 	if (error) {
    751 		fd_abort(p, fp, fd);
    752 	}
    753 	if (ksnew) {
    754 		ksem_free(ksnew);
    755 	}
    756 	return error;
    757 }
    758 
    759 int
    760 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
    761     register_t *retval)
    762 {
    763 	/* {
    764 		intptr_t id;
    765 	} */
    766 	intptr_t id = SCARG(uap, id);
    767 	int fd, error;
    768 	ksem_t *ks;
    769 
    770 	error = ksem_get(id, &ks, &fd);
    771 	if (error) {
    772 		return error;
    773 	}
    774 
    775 	/* This is only for named semaphores. */
    776 	if (ks->ks_name == NULL) {
    777 		error = SET_ERROR(EINVAL);
    778 	}
    779 	ksem_release(ks, -1);
    780 	if (error) {
    781 		if (fd != -1)
    782 			fd_putfile(fd);
    783 		return error;
    784 	}
    785 	return fd_close(fd);
    786 }
    787 
    788 static int
    789 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    790     int flags)
    791 {
    792 	size_t len;
    793 	char *name;
    794 	ksem_t *ks = fp->f_ksem;
    795 
    796 	mutex_enter(&ks->ks_lock);
    797 	len = ks->ks_namelen;
    798 	name = ks->ks_name;
    799 	mutex_exit(&ks->ks_lock);
    800 	if (name == NULL || len == 0)
    801 		return 0;
    802 	return uiomove(name, len, uio);
    803 }
    804 
    805 static int
    806 ksem_stat_fop(file_t *fp, struct stat *ub)
    807 {
    808 	ksem_t *ks = fp->f_ksem;
    809 
    810 	mutex_enter(&ks->ks_lock);
    811 
    812 	memset(ub, 0, sizeof(*ub));
    813 
    814 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
    815 	    ? _S_IFLNK : _S_IFREG);
    816 	ub->st_uid = ks->ks_uid;
    817 	ub->st_gid = ks->ks_gid;
    818 	ub->st_size = ks->ks_value;
    819 	ub->st_blocks = (ub->st_size) ? 1 : 0;
    820 	ub->st_nlink = ks->ks_ref;
    821 	ub->st_blksize = 4096;
    822 
    823 	nanotime(&ub->st_atimespec);
    824 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
    825 	    ub->st_atimespec;
    826 
    827 	/*
    828 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
    829 	 * XXX (st_dev, st_ino) should be unique.
    830 	 */
    831 	mutex_exit(&ks->ks_lock);
    832 	return 0;
    833 }
    834 
    835 static int
    836 ksem_close_fop(file_t *fp)
    837 {
    838 	ksem_t *ks = fp->f_ksem;
    839 
    840 	mutex_enter(&ks->ks_lock);
    841 
    842 	if (ks->ks_pshared_id) {
    843 		if (ks->ks_pshared_proc != curproc) {
    844 			/* Do nothing if this is not the creator. */
    845 			mutex_exit(&ks->ks_lock);
    846 			return 0;
    847 		}
    848 		/* Mark this semaphore as dead. */
    849 		ks->ks_pshared_proc = NULL;
    850 	}
    851 
    852 	ksem_release(ks, -1);
    853 	return 0;
    854 }
    855 
    856 int
    857 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
    858     register_t *retval)
    859 {
    860 	/* {
    861 		const char *name;
    862 	} */
    863 	char *name;
    864 	ksem_t *ks;
    865 	u_int refcnt;
    866 	int error;
    867 
    868 	error = name_copyin(SCARG(uap, name), &name);
    869 	if (error)
    870 		return error;
    871 
    872 	mutex_enter(&ksem_lock);
    873 	ks = ksem_lookup(name);
    874 	name_destroy(&name);
    875 	if (ks == NULL) {
    876 		mutex_exit(&ksem_lock);
    877 		return SET_ERROR(ENOENT);
    878 	}
    879 	KASSERT(mutex_owned(&ks->ks_lock));
    880 
    881 	/* Verify permissions. */
    882 	error = ksem_perm(l, ks);
    883 	if (error) {
    884 		mutex_exit(&ks->ks_lock);
    885 		mutex_exit(&ksem_lock);
    886 		return error;
    887 	}
    888 
    889 	/* Remove from the global list. */
    890 	LIST_REMOVE(ks, ks_entry);
    891 	nsems--;
    892 	mutex_exit(&ksem_lock);
    893 
    894 	refcnt = ks->ks_ref;
    895 	if (refcnt) {
    896 		/* Mark as unlinked, if there are references. */
    897 		ks->ks_flags |= KS_UNLINKED;
    898 	}
    899 	mutex_exit(&ks->ks_lock);
    900 
    901 	if (refcnt == 0) {
    902 		ksem_free(ks);
    903 	}
    904 	return 0;
    905 }
    906 
    907 int
    908 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
    909     register_t *retval)
    910 {
    911 	/* {
    912 		intptr_t id;
    913 	} */
    914 	int fd, error;
    915 	ksem_t *ks;
    916 
    917 	error = ksem_get(SCARG(uap, id), &ks, &fd);
    918 	if (error) {
    919 		return error;
    920 	}
    921 	KASSERT(mutex_owned(&ks->ks_lock));
    922 	if (ks->ks_value == SEM_VALUE_MAX) {
    923 		error = SET_ERROR(EOVERFLOW);
    924 		goto out;
    925 	}
    926 	ks->ks_value++;
    927 	if (ks->ks_waiters) {
    928 		cv_broadcast(&ks->ks_cv);
    929 	}
    930 out:
    931 	ksem_release(ks, fd);
    932 	return error;
    933 }
    934 
    935 int
    936 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
    937 {
    938 	int fd, error, timeo;
    939 	ksem_t *ks;
    940 
    941 	error = ksem_get(id, &ks, &fd);
    942 	if (error) {
    943 		return error;
    944 	}
    945 	KASSERT(mutex_owned(&ks->ks_lock));
    946 	while (ks->ks_value == 0) {
    947 		ks->ks_waiters++;
    948 		if (!try_p && abstime != NULL) {
    949 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
    950 			    &timeo, NULL);
    951 			if (error != 0)
    952 				goto out;
    953 		} else {
    954 			timeo = 0;
    955 		}
    956 		error = try_p ? SET_ERROR(EAGAIN) : cv_timedwait_sig(&ks->ks_cv,
    957 		    &ks->ks_lock, timeo);
    958 		ks->ks_waiters--;
    959 		if (error)
    960 			goto out;
    961 	}
    962 	ks->ks_value--;
    963 out:
    964 	ksem_release(ks, fd);
    965 	return error;
    966 }
    967 
    968 int
    969 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
    970     register_t *retval)
    971 {
    972 	/* {
    973 		intptr_t id;
    974 	} */
    975 
    976 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
    977 }
    978 
    979 int
    980 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
    981     register_t *retval)
    982 {
    983 	/* {
    984 		intptr_t id;
    985 		const struct timespec *abstime;
    986 	} */
    987 	struct timespec ts;
    988 	int error;
    989 
    990 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
    991 	if (error != 0)
    992 		return error;
    993 
    994 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
    995 		return SET_ERROR(EINVAL);
    996 
    997 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
    998 	if (error == EWOULDBLOCK)
    999 		error = SET_ERROR(ETIMEDOUT);
   1000 	return error;
   1001 }
   1002 
   1003 int
   1004 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
   1005     register_t *retval)
   1006 {
   1007 	/* {
   1008 		intptr_t id;
   1009 	} */
   1010 
   1011 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
   1012 }
   1013 
   1014 int
   1015 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
   1016     register_t *retval)
   1017 {
   1018 	/* {
   1019 		intptr_t id;
   1020 		unsigned int *value;
   1021 	} */
   1022 	int fd, error;
   1023 	ksem_t *ks;
   1024 	unsigned int val;
   1025 
   1026 	error = ksem_get(SCARG(uap, id), &ks, &fd);
   1027 	if (error) {
   1028 		return error;
   1029 	}
   1030 	KASSERT(mutex_owned(&ks->ks_lock));
   1031 	val = ks->ks_value;
   1032 	ksem_release(ks, fd);
   1033 
   1034 	return copyout(&val, SCARG(uap, value), sizeof(val));
   1035 }
   1036 
   1037 int
   1038 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
   1039     register_t *retval)
   1040 {
   1041 	/* {
   1042 		intptr_t id;
   1043 	} */
   1044 	int fd, error;
   1045 	ksem_t *ks;
   1046 
   1047 	intptr_t id = SCARG(uap, id);
   1048 
   1049 	error = ksem_get(id, &ks, &fd);
   1050 	if (error) {
   1051 		return error;
   1052 	}
   1053 	KASSERT(mutex_owned(&ks->ks_lock));
   1054 
   1055 	/* Operation is only for unnamed semaphores. */
   1056 	if (ks->ks_name != NULL) {
   1057 		error = SET_ERROR(EINVAL);
   1058 		goto out;
   1059 	}
   1060 	/* Cannot destroy if there are waiters. */
   1061 	if (ks->ks_waiters) {
   1062 		error = SET_ERROR(EBUSY);
   1063 		goto out;
   1064 	}
   1065 	if (KSEM_ID_IS_PSHARED(id)) {
   1066 		/* Cannot destroy if we did't create it. */
   1067 		KASSERT(fd == -1);
   1068 		KASSERT(ks->ks_pshared_proc != NULL);
   1069 		if (ks->ks_pshared_proc != curproc) {
   1070 			error = SET_ERROR(EINVAL);
   1071 			goto out;
   1072 		}
   1073 		fd = ks->ks_pshared_fd;
   1074 
   1075 		/* Mark it dead so subsequent lookups fail. */
   1076 		ks->ks_pshared_proc = NULL;
   1077 
   1078 		/* Do an fd_getfile() to for the benefit of fd_close(). */
   1079 		file_t *fp __diagused = fd_getfile(fd);
   1080 		KASSERT(fp != NULL);
   1081 		KASSERT(fp->f_ksem == ks);
   1082 	}
   1083 out:
   1084 	ksem_release(ks, -1);
   1085 	if (error) {
   1086 		if (!KSEM_ID_IS_PSHARED(id))
   1087 			fd_putfile(fd);
   1088 		return error;
   1089 	}
   1090 	return fd_close(fd);
   1091 }
   1092