Home | History | Annotate | Line # | Download | only in kern
uipc_sem.c revision 1.57
      1 /*	$NetBSD: uipc_sem.c,v 1.57 2019/12/17 18:10:36 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 2002 Alfred Perlstein <alfred (at) FreeBSD.org>
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55  * SUCH DAMAGE.
     56  */
     57 
     58 /*
     59  * Implementation of POSIX semaphore.
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.57 2019/12/17 18:10:36 ad Exp $");
     64 
     65 #include <sys/param.h>
     66 #include <sys/kernel.h>
     67 
     68 #include <sys/atomic.h>
     69 #include <sys/proc.h>
     70 #include <sys/lwp.h>
     71 #include <sys/ksem.h>
     72 #include <sys/syscall.h>
     73 #include <sys/stat.h>
     74 #include <sys/kmem.h>
     75 #include <sys/fcntl.h>
     76 #include <sys/file.h>
     77 #include <sys/filedesc.h>
     78 #include <sys/kauth.h>
     79 #include <sys/module.h>
     80 #include <sys/mount.h>
     81 #include <sys/mutex.h>
     82 #include <sys/rwlock.h>
     83 #include <sys/semaphore.h>
     84 #include <sys/syscall.h>
     85 #include <sys/syscallargs.h>
     86 #include <sys/syscallvar.h>
     87 #include <sys/sysctl.h>
     88 #include <sys/uidinfo.h>
     89 #include <sys/cprng.h>
     90 
     91 MODULE(MODULE_CLASS_MISC, ksem, NULL);
     92 
     93 #define	SEM_MAX_NAMELEN		NAME_MAX
     94 
     95 #define	KS_UNLINKED		0x01
     96 
     97 static kmutex_t		ksem_lock	__cacheline_aligned;
     98 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
     99 static u_int		nsems_total	__cacheline_aligned;
    100 static u_int		nsems		__cacheline_aligned;
    101 
    102 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
    103 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
    104 static u_long		ksem_pshared_hashmask __read_mostly;
    105 
    106 #define	KSEM_PSHARED_HASHSIZE	32
    107 
    108 static kauth_listener_t	ksem_listener;
    109 
    110 static int		ksem_sysinit(void);
    111 static int		ksem_sysfini(bool);
    112 static int		ksem_modcmd(modcmd_t, void *);
    113 static int		ksem_close_fop(file_t *);
    114 static int		ksem_stat_fop(file_t *, struct stat *);
    115 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
    116     kauth_cred_t, int);
    117 
    118 static const struct fileops semops = {
    119 	.fo_name = "sem",
    120 	.fo_read = ksem_read_fop,
    121 	.fo_write = fbadop_write,
    122 	.fo_ioctl = fbadop_ioctl,
    123 	.fo_fcntl = fnullop_fcntl,
    124 	.fo_poll = fnullop_poll,
    125 	.fo_stat = ksem_stat_fop,
    126 	.fo_close = ksem_close_fop,
    127 	.fo_kqfilter = fnullop_kqfilter,
    128 	.fo_restart = fnullop_restart,
    129 };
    130 
    131 static const struct syscall_package ksem_syscalls[] = {
    132 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
    133 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
    134 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
    135 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
    136 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
    137 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
    138 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
    139 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
    140 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
    141 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
    142 	{ 0, 0, NULL },
    143 };
    144 
    145 struct sysctllog *ksem_clog;
    146 int ksem_max = KSEM_MAX;
    147 
    148 static int
    149 name_copyin(const char *uname, char **name)
    150 {
    151 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
    152 
    153 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
    154 	if (error)
    155 		kmem_free(*name, SEM_MAX_NAMELEN);
    156 
    157 	return error;
    158 }
    159 
    160 static void
    161 name_destroy(char **name)
    162 {
    163 	if (!*name)
    164 		return;
    165 
    166 	kmem_free(*name, SEM_MAX_NAMELEN);
    167 	*name = NULL;
    168 }
    169 
    170 static int
    171 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    172     void *arg0, void *arg1, void *arg2, void *arg3)
    173 {
    174 	ksem_t *ks;
    175 	mode_t mode;
    176 
    177 	if (action != KAUTH_SYSTEM_SEMAPHORE)
    178 		return KAUTH_RESULT_DEFER;
    179 
    180 	ks = arg1;
    181 	mode = ks->ks_mode;
    182 
    183 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
    184 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
    185 	    (mode & S_IWOTH) != 0)
    186 		return KAUTH_RESULT_ALLOW;
    187 
    188 	return KAUTH_RESULT_DEFER;
    189 }
    190 
    191 static int
    192 ksem_sysinit(void)
    193 {
    194 	int error;
    195 	const struct sysctlnode *rnode;
    196 
    197 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
    198 	LIST_INIT(&ksem_head);
    199 	nsems_total = 0;
    200 	nsems = 0;
    201 
    202 	rw_init(&ksem_pshared_lock);
    203 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
    204 	    true, &ksem_pshared_hashmask);
    205 	KASSERT(ksem_pshared_hashtab != NULL);
    206 
    207 	error = syscall_establish(NULL, ksem_syscalls);
    208 	if (error) {
    209 		(void)ksem_sysfini(false);
    210 	}
    211 
    212 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
    213 	    ksem_listener_cb, NULL);
    214 
    215 	/* Define module-specific sysctl tree */
    216 
    217 	ksem_clog = NULL;
    218 
    219 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
    220 			CTLFLAG_PERMANENT,
    221 			CTLTYPE_NODE, "posix",
    222 			SYSCTL_DESCR("POSIX options"),
    223 			NULL, 0, NULL, 0,
    224 			CTL_KERN, CTL_CREATE, CTL_EOL);
    225 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
    226 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    227 			CTLTYPE_INT, "semmax",
    228 			SYSCTL_DESCR("Maximal number of semaphores"),
    229 			NULL, 0, &ksem_max, 0,
    230 			CTL_CREATE, CTL_EOL);
    231 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
    232 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
    233 			CTLTYPE_INT, "semcnt",
    234 			SYSCTL_DESCR("Current number of semaphores"),
    235 			NULL, 0, &nsems, 0,
    236 			CTL_CREATE, CTL_EOL);
    237 
    238 	return error;
    239 }
    240 
    241 static int
    242 ksem_sysfini(bool interface)
    243 {
    244 	int error;
    245 
    246 	if (interface) {
    247 		error = syscall_disestablish(NULL, ksem_syscalls);
    248 		if (error != 0) {
    249 			return error;
    250 		}
    251 		/*
    252 		 * Make sure that no semaphores are in use.  Note: semops
    253 		 * must be unused at this point.
    254 		 */
    255 		if (nsems_total) {
    256 			error = syscall_establish(NULL, ksem_syscalls);
    257 			KASSERT(error == 0);
    258 			return EBUSY;
    259 		}
    260 	}
    261 	kauth_unlisten_scope(ksem_listener);
    262 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
    263 	rw_destroy(&ksem_pshared_lock);
    264 	mutex_destroy(&ksem_lock);
    265 	sysctl_teardown(&ksem_clog);
    266 	return 0;
    267 }
    268 
    269 static int
    270 ksem_modcmd(modcmd_t cmd, void *arg)
    271 {
    272 
    273 	switch (cmd) {
    274 	case MODULE_CMD_INIT:
    275 		return ksem_sysinit();
    276 
    277 	case MODULE_CMD_FINI:
    278 		return ksem_sysfini(true);
    279 
    280 	default:
    281 		return ENOTTY;
    282 	}
    283 }
    284 
    285 static ksem_t *
    286 ksem_lookup(const char *name)
    287 {
    288 	ksem_t *ks;
    289 
    290 	KASSERT(mutex_owned(&ksem_lock));
    291 
    292 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
    293 		if (strcmp(ks->ks_name, name) == 0) {
    294 			mutex_enter(&ks->ks_lock);
    295 			return ks;
    296 		}
    297 	}
    298 	return NULL;
    299 }
    300 
    301 static int
    302 ksem_perm(lwp_t *l, ksem_t *ks)
    303 {
    304 	kauth_cred_t uc = l->l_cred;
    305 
    306 	KASSERT(mutex_owned(&ks->ks_lock));
    307 
    308 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
    309 		return EACCES;
    310 
    311 	return 0;
    312 }
    313 
    314 /*
    315  * Bits 1..23 are random, just pluck a few of those and assume the
    316  * distribution is going to be pretty good.
    317  */
    318 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
    319 
    320 static void
    321 ksem_remove_pshared(ksem_t *ksem)
    322 {
    323 	rw_enter(&ksem_pshared_lock, RW_WRITER);
    324 	LIST_REMOVE(ksem, ks_entry);
    325 	rw_exit(&ksem_pshared_lock);
    326 }
    327 
    328 static ksem_t *
    329 ksem_lookup_pshared_locked(intptr_t id)
    330 {
    331 	u_long bucket = KSEM_PSHARED_HASH(id);
    332 	ksem_t *ksem = NULL;
    333 
    334 	/* ksem_t is locked and referenced upon return. */
    335 
    336 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
    337 		if (ksem->ks_pshared_id == id) {
    338 			mutex_enter(&ksem->ks_lock);
    339 			if (ksem->ks_pshared_proc == NULL) {
    340 				/*
    341 				 * This entry is dead, and in the process
    342 				 * of being torn down; skip it.
    343 				 */
    344 				mutex_exit(&ksem->ks_lock);
    345 				continue;
    346 			}
    347 			ksem->ks_ref++;
    348 			KASSERT(ksem->ks_ref != 0);
    349 			return ksem;
    350 		}
    351 	}
    352 
    353 	return NULL;
    354 }
    355 
    356 static ksem_t *
    357 ksem_lookup_pshared(intptr_t id)
    358 {
    359 	rw_enter(&ksem_pshared_lock, RW_READER);
    360 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
    361 	rw_exit(&ksem_pshared_lock);
    362 	return ksem;
    363 }
    364 
    365 static void
    366 ksem_alloc_pshared_id(ksem_t *ksem)
    367 {
    368 	uint32_t try;
    369 
    370 	KASSERT(ksem->ks_pshared_proc != NULL);
    371 
    372 	rw_enter(&ksem_pshared_lock, RW_WRITER);
    373 	for (;;) {
    374 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
    375 		    KSEM_PSHARED_MARKER;
    376 
    377 		if (ksem_lookup_pshared_locked(try) == NULL) {
    378 			/* Got it! */
    379 			break;
    380 		}
    381 	}
    382 	ksem->ks_pshared_id = try;
    383 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
    384 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
    385 	rw_exit(&ksem_pshared_lock);
    386 }
    387 
    388 /*
    389  * ksem_get: get the semaphore from the descriptor.
    390  *
    391  * => locks the semaphore, if found, and holds an extra reference.
    392  * => holds a reference on the file descriptor.
    393  */
    394 static int
    395 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
    396 {
    397 	ksem_t *ks;
    398 	int fd;
    399 
    400 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
    401 		/*
    402 		 * ksem_lookup_pshared() returns the ksem_t *
    403 		 * locked and referenced.
    404 		 */
    405 		ks = ksem_lookup_pshared(id);
    406 		if (ks == NULL)
    407 			return EINVAL;
    408 		KASSERT(ks->ks_pshared_id == id);
    409 		KASSERT(ks->ks_pshared_proc != NULL);
    410 		fd = -1;
    411 	} else if (id <= INT_MAX) {
    412 		fd = (int)id;
    413 		file_t *fp = fd_getfile(fd);
    414 
    415 		if (__predict_false(fp == NULL))
    416 			return EINVAL;
    417 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
    418 			fd_putfile(fd);
    419 			return EINVAL;
    420 		}
    421 		ks = fp->f_ksem;
    422 		mutex_enter(&ks->ks_lock);
    423 		ks->ks_ref++;
    424 	} else {
    425 		return EINVAL;
    426 	}
    427 
    428 	*ksret = ks;
    429 	*fdp = fd;
    430 	return 0;
    431 }
    432 
    433 /*
    434  * ksem_create: allocate and setup a new semaphore structure.
    435  */
    436 static int
    437 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
    438 {
    439 	ksem_t *ks;
    440 	kauth_cred_t uc;
    441 	char *kname;
    442 	size_t len;
    443 
    444 	/* Pre-check for the limit. */
    445 	if (nsems >= ksem_max) {
    446 		return ENFILE;
    447 	}
    448 
    449 	if (val > SEM_VALUE_MAX) {
    450 		return EINVAL;
    451 	}
    452 
    453 	if (name != NULL) {
    454 		len = strlen(name);
    455 		if (len > SEM_MAX_NAMELEN) {
    456 			return ENAMETOOLONG;
    457 		}
    458 		/* Name must start with a '/' but not contain one. */
    459 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
    460 			return EINVAL;
    461 		}
    462 		kname = kmem_alloc(++len, KM_SLEEP);
    463 		strlcpy(kname, name, len);
    464 	} else {
    465 		kname = NULL;
    466 		len = 0;
    467 	}
    468 
    469 	chgsemcnt(kauth_cred_getuid(l->l_cred), 1);
    470 
    471 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
    472 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
    473 	cv_init(&ks->ks_cv, "psem");
    474 	ks->ks_name = kname;
    475 	ks->ks_namelen = len;
    476 	ks->ks_mode = mode;
    477 	ks->ks_value = val;
    478 	ks->ks_ref = 1;
    479 
    480 	uc = l->l_cred;
    481 	ks->ks_uid = kauth_cred_geteuid(uc);
    482 	ks->ks_gid = kauth_cred_getegid(uc);
    483 
    484 	atomic_inc_uint(&nsems_total);
    485 	*ksret = ks;
    486 	return 0;
    487 }
    488 
    489 static void
    490 ksem_free(ksem_t *ks)
    491 {
    492 
    493 	KASSERT(!cv_has_waiters(&ks->ks_cv));
    494 
    495 	if (ks->ks_pshared_id) {
    496 		KASSERT(ks->ks_pshared_proc == NULL);
    497 		ksem_remove_pshared(ks);
    498 	}
    499 	if (ks->ks_name) {
    500 		KASSERT(ks->ks_namelen > 0);
    501 		kmem_free(ks->ks_name, ks->ks_namelen);
    502 	}
    503 	mutex_destroy(&ks->ks_lock);
    504 	cv_destroy(&ks->ks_cv);
    505 	kmem_free(ks, sizeof(ksem_t));
    506 
    507 	atomic_dec_uint(&nsems_total);
    508 	chgsemcnt(kauth_cred_getuid(curproc->p_cred), -1);
    509 }
    510 
    511 #define	KSEM_ID_IS_PSHARED(id)		\
    512 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
    513 
    514 static void
    515 ksem_release(ksem_t *ksem, int fd)
    516 {
    517 	bool destroy = false;
    518 
    519 	KASSERT(mutex_owned(&ksem->ks_lock));
    520 
    521 	KASSERT(ksem->ks_ref > 0);
    522 	if (--ksem->ks_ref == 0) {
    523 		/*
    524 		 * Destroy if the last reference and semaphore is unnamed,
    525 		 * or unlinked (for named semaphore).
    526 		 */
    527 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
    528 		    (ksem->ks_name == NULL);
    529 	}
    530 	mutex_exit(&ksem->ks_lock);
    531 
    532 	if (destroy) {
    533 		ksem_free(ksem);
    534 	}
    535 	if (fd != -1) {
    536 		fd_putfile(fd);
    537 	}
    538 }
    539 
    540 int
    541 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
    542     register_t *retval)
    543 {
    544 	/* {
    545 		unsigned int value;
    546 		intptr_t *idp;
    547 	} */
    548 
    549 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
    550 	    copyin, copyout);
    551 }
    552 
    553 int
    554 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
    555     copyout_t docopyout)
    556 {
    557 	proc_t *p = l->l_proc;
    558 	ksem_t *ks;
    559 	file_t *fp;
    560 	intptr_t id, arg;
    561 	int fd, error;
    562 
    563 	/*
    564 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
    565 	 * indicate that a pshared semaphore is wanted.  In that case we
    566 	 * allocate globally unique ID and return that, rather than the
    567 	 * process-scoped file descriptor ID.
    568 	 */
    569 	error = (*docopyin)(idp, &arg, sizeof(*idp));
    570 	if (error) {
    571 		return error;
    572 	}
    573 
    574 	error = fd_allocfile(&fp, &fd);
    575 	if (error) {
    576 		return error;
    577 	}
    578 	fp->f_type = DTYPE_SEM;
    579 	fp->f_flag = FREAD | FWRITE;
    580 	fp->f_ops = &semops;
    581 
    582 	if (fd >= KSEM_MARKER_MIN) {
    583 		/*
    584 		 * This is super-unlikely, but we check for it anyway
    585 		 * because potential collisions with the pshared marker
    586 		 * would be bad.
    587 		 */
    588 		fd_abort(p, fp, fd);
    589 		return EMFILE;
    590 	}
    591 
    592 	/* Note the mode does not matter for anonymous semaphores. */
    593 	error = ksem_create(l, NULL, &ks, 0, val);
    594 	if (error) {
    595 		fd_abort(p, fp, fd);
    596 		return error;
    597 	}
    598 
    599 	if (arg == KSEM_PSHARED) {
    600 		ks->ks_pshared_proc = curproc;
    601 		ks->ks_pshared_fd = fd;
    602 		ksem_alloc_pshared_id(ks);
    603 		id = ks->ks_pshared_id;
    604 	} else {
    605 		id = (intptr_t)fd;
    606 	}
    607 
    608 	error = (*docopyout)(&id, idp, sizeof(*idp));
    609 	if (error) {
    610 		ksem_free(ks);
    611 		fd_abort(p, fp, fd);
    612 		return error;
    613 	}
    614 
    615 	fp->f_ksem = ks;
    616 	fd_affix(p, fp, fd);
    617 	return error;
    618 }
    619 
    620 int
    621 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
    622     register_t *retval)
    623 {
    624 	/* {
    625 		const char *name;
    626 		int oflag;
    627 		mode_t mode;
    628 		unsigned int value;
    629 		intptr_t *idp;
    630 	} */
    631 
    632 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
    633 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
    634 }
    635 
    636 int
    637 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
    638      unsigned int value, intptr_t *idp, copyout_t docopyout)
    639 {
    640 	char *name;
    641 	proc_t *p = l->l_proc;
    642 	ksem_t *ksnew = NULL, *ks;
    643 	file_t *fp;
    644 	intptr_t id;
    645 	int fd, error;
    646 
    647 	error = name_copyin(semname, &name);
    648 	if (error) {
    649 		return error;
    650 	}
    651 	error = fd_allocfile(&fp, &fd);
    652 	if (error) {
    653 		name_destroy(&name);
    654 		return error;
    655 	}
    656 	fp->f_type = DTYPE_SEM;
    657 	fp->f_flag = FREAD | FWRITE;
    658 	fp->f_ops = &semops;
    659 
    660 	if (fd >= KSEM_MARKER_MIN) {
    661 		/*
    662 		 * This is super-unlikely, but we check for it anyway
    663 		 * because potential collisions with the pshared marker
    664 		 * would be bad.
    665 		 */
    666 		fd_abort(p, fp, fd);
    667 		return EMFILE;
    668 	}
    669 
    670 	/*
    671 	 * The ID (file descriptor number) can be stored early.
    672 	 * Note that zero is a special value for libpthread.
    673 	 */
    674 	id = (intptr_t)fd;
    675 	error = (*docopyout)(&id, idp, sizeof(*idp));
    676 	if (error) {
    677 		goto err;
    678 	}
    679 
    680 	if (oflag & O_CREAT) {
    681 		/* Create a new semaphore. */
    682 		error = ksem_create(l, name, &ksnew, mode, value);
    683 		if (error) {
    684 			goto err;
    685 		}
    686 		KASSERT(ksnew != NULL);
    687 	}
    688 
    689 	/* Lookup for a semaphore with such name. */
    690 	mutex_enter(&ksem_lock);
    691 	ks = ksem_lookup(name);
    692 	name_destroy(&name);
    693 	if (ks) {
    694 		KASSERT(mutex_owned(&ks->ks_lock));
    695 		mutex_exit(&ksem_lock);
    696 
    697 		/* Check for exclusive create. */
    698 		if (oflag & O_EXCL) {
    699 			mutex_exit(&ks->ks_lock);
    700 			error = EEXIST;
    701 			goto err;
    702 		}
    703 		/*
    704 		 * Verify permissions.  If we can access it,
    705 		 * add the reference of this thread.
    706 		 */
    707 		error = ksem_perm(l, ks);
    708 		if (error == 0) {
    709 			ks->ks_ref++;
    710 		}
    711 		mutex_exit(&ks->ks_lock);
    712 		if (error) {
    713 			goto err;
    714 		}
    715 	} else {
    716 		/* Fail if not found and not creating. */
    717 		if ((oflag & O_CREAT) == 0) {
    718 			mutex_exit(&ksem_lock);
    719 			KASSERT(ksnew == NULL);
    720 			error = ENOENT;
    721 			goto err;
    722 		}
    723 
    724 		/* Check for the limit locked. */
    725 		if (nsems >= ksem_max) {
    726 			mutex_exit(&ksem_lock);
    727 			error = ENFILE;
    728 			goto err;
    729 		}
    730 
    731 		/*
    732 		 * Finally, insert semaphore into the list.
    733 		 * Note: it already has the initial reference.
    734 		 */
    735 		ks = ksnew;
    736 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
    737 		nsems++;
    738 		mutex_exit(&ksem_lock);
    739 
    740 		ksnew = NULL;
    741 	}
    742 	KASSERT(ks != NULL);
    743 	fp->f_ksem = ks;
    744 	fd_affix(p, fp, fd);
    745 err:
    746 	name_destroy(&name);
    747 	if (error) {
    748 		fd_abort(p, fp, fd);
    749 	}
    750 	if (ksnew) {
    751 		ksem_free(ksnew);
    752 	}
    753 	return error;
    754 }
    755 
    756 int
    757 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
    758     register_t *retval)
    759 {
    760 	/* {
    761 		intptr_t id;
    762 	} */
    763 	intptr_t id = SCARG(uap, id);
    764 	int fd, error;
    765 	ksem_t *ks;
    766 
    767 	error = ksem_get(id, &ks, &fd);
    768 	if (error) {
    769 		return error;
    770 	}
    771 
    772 	/* This is only for named semaphores. */
    773 	if (ks->ks_name == NULL) {
    774 		error = EINVAL;
    775 	}
    776 	ksem_release(ks, -1);
    777 	if (error) {
    778 		if (fd != -1)
    779 			fd_putfile(fd);
    780 		return error;
    781 	}
    782 	return fd_close(fd);
    783 }
    784 
    785 static int
    786 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    787     int flags)
    788 {
    789 	size_t len;
    790 	char *name;
    791 	ksem_t *ks = fp->f_ksem;
    792 
    793 	mutex_enter(&ks->ks_lock);
    794 	len = ks->ks_namelen;
    795 	name = ks->ks_name;
    796 	mutex_exit(&ks->ks_lock);
    797 	if (name == NULL || len == 0)
    798 		return 0;
    799 	return uiomove(name, len, uio);
    800 }
    801 
    802 static int
    803 ksem_stat_fop(file_t *fp, struct stat *ub)
    804 {
    805 	ksem_t *ks = fp->f_ksem;
    806 
    807 	mutex_enter(&ks->ks_lock);
    808 
    809 	memset(ub, 0, sizeof(*ub));
    810 
    811 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
    812 	    ? _S_IFLNK : _S_IFREG);
    813 	ub->st_uid = ks->ks_uid;
    814 	ub->st_gid = ks->ks_gid;
    815 	ub->st_size = ks->ks_value;
    816 	ub->st_blocks = (ub->st_size) ? 1 : 0;
    817 	ub->st_nlink = ks->ks_ref;
    818 	ub->st_blksize = 4096;
    819 
    820 	nanotime(&ub->st_atimespec);
    821 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
    822 	    ub->st_atimespec;
    823 
    824 	/*
    825 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
    826 	 * XXX (st_dev, st_ino) should be unique.
    827 	 */
    828 	mutex_exit(&ks->ks_lock);
    829 	return 0;
    830 }
    831 
    832 static int
    833 ksem_close_fop(file_t *fp)
    834 {
    835 	ksem_t *ks = fp->f_ksem;
    836 
    837 	mutex_enter(&ks->ks_lock);
    838 
    839 	if (ks->ks_pshared_id) {
    840 		if (ks->ks_pshared_proc != curproc) {
    841 			/* Do nothing if this is not the creator. */
    842 			mutex_exit(&ks->ks_lock);
    843 			return 0;
    844 		}
    845 		/* Mark this semaphore as dead. */
    846 		ks->ks_pshared_proc = NULL;
    847 	}
    848 
    849 	ksem_release(ks, -1);
    850 	return 0;
    851 }
    852 
    853 int
    854 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
    855     register_t *retval)
    856 {
    857 	/* {
    858 		const char *name;
    859 	} */
    860 	char *name;
    861 	ksem_t *ks;
    862 	u_int refcnt;
    863 	int error;
    864 
    865 	error = name_copyin(SCARG(uap, name), &name);
    866 	if (error)
    867 		return error;
    868 
    869 	mutex_enter(&ksem_lock);
    870 	ks = ksem_lookup(name);
    871 	name_destroy(&name);
    872 	if (ks == NULL) {
    873 		mutex_exit(&ksem_lock);
    874 		return ENOENT;
    875 	}
    876 	KASSERT(mutex_owned(&ks->ks_lock));
    877 
    878 	/* Verify permissions. */
    879 	error = ksem_perm(l, ks);
    880 	if (error) {
    881 		mutex_exit(&ks->ks_lock);
    882 		mutex_exit(&ksem_lock);
    883 		return error;
    884 	}
    885 
    886 	/* Remove from the global list. */
    887 	LIST_REMOVE(ks, ks_entry);
    888 	nsems--;
    889 	mutex_exit(&ksem_lock);
    890 
    891 	refcnt = ks->ks_ref;
    892 	if (refcnt) {
    893 		/* Mark as unlinked, if there are references. */
    894 		ks->ks_flags |= KS_UNLINKED;
    895 	}
    896 	mutex_exit(&ks->ks_lock);
    897 
    898 	if (refcnt == 0) {
    899 		ksem_free(ks);
    900 	}
    901 	return 0;
    902 }
    903 
    904 int
    905 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
    906     register_t *retval)
    907 {
    908 	/* {
    909 		intptr_t id;
    910 	} */
    911 	int fd, error;
    912 	ksem_t *ks;
    913 
    914 	error = ksem_get(SCARG(uap, id), &ks, &fd);
    915 	if (error) {
    916 		return error;
    917 	}
    918 	KASSERT(mutex_owned(&ks->ks_lock));
    919 	if (ks->ks_value == SEM_VALUE_MAX) {
    920 		error = EOVERFLOW;
    921 		goto out;
    922 	}
    923 	ks->ks_value++;
    924 	if (ks->ks_waiters) {
    925 		cv_broadcast(&ks->ks_cv);
    926 	}
    927 out:
    928 	ksem_release(ks, fd);
    929 	return error;
    930 }
    931 
    932 int
    933 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
    934 {
    935 	int fd, error, timeo;
    936 	ksem_t *ks;
    937 
    938 	error = ksem_get(id, &ks, &fd);
    939 	if (error) {
    940 		return error;
    941 	}
    942 	KASSERT(mutex_owned(&ks->ks_lock));
    943 	while (ks->ks_value == 0) {
    944 		ks->ks_waiters++;
    945 		if (!try_p && abstime != NULL) {
    946 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
    947 			    &timeo, NULL);
    948 			if (error != 0)
    949 				goto out;
    950 		} else {
    951 			timeo = 0;
    952 		}
    953 		error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv,
    954 		    &ks->ks_lock, timeo);
    955 		ks->ks_waiters--;
    956 		if (error)
    957 			goto out;
    958 	}
    959 	ks->ks_value--;
    960 out:
    961 	ksem_release(ks, fd);
    962 	return error;
    963 }
    964 
    965 int
    966 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
    967     register_t *retval)
    968 {
    969 	/* {
    970 		intptr_t id;
    971 	} */
    972 
    973 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
    974 }
    975 
    976 int
    977 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
    978     register_t *retval)
    979 {
    980 	/* {
    981 		intptr_t id;
    982 		const struct timespec *abstime;
    983 	} */
    984 	struct timespec ts;
    985 	int error;
    986 
    987 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
    988 	if (error != 0)
    989 		return error;
    990 
    991 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
    992 		return EINVAL;
    993 
    994 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
    995 	if (error == EWOULDBLOCK)
    996 		error = ETIMEDOUT;
    997 	return error;
    998 }
    999 
   1000 int
   1001 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
   1002     register_t *retval)
   1003 {
   1004 	/* {
   1005 		intptr_t id;
   1006 	} */
   1007 
   1008 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
   1009 }
   1010 
   1011 int
   1012 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
   1013     register_t *retval)
   1014 {
   1015 	/* {
   1016 		intptr_t id;
   1017 		unsigned int *value;
   1018 	} */
   1019 	int fd, error;
   1020 	ksem_t *ks;
   1021 	unsigned int val;
   1022 
   1023 	error = ksem_get(SCARG(uap, id), &ks, &fd);
   1024 	if (error) {
   1025 		return error;
   1026 	}
   1027 	KASSERT(mutex_owned(&ks->ks_lock));
   1028 	val = ks->ks_value;
   1029 	ksem_release(ks, fd);
   1030 
   1031 	return copyout(&val, SCARG(uap, value), sizeof(val));
   1032 }
   1033 
   1034 int
   1035 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
   1036     register_t *retval)
   1037 {
   1038 	/* {
   1039 		intptr_t id;
   1040 	} */
   1041 	int fd, error;
   1042 	ksem_t *ks;
   1043 
   1044 	intptr_t id = SCARG(uap, id);
   1045 
   1046 	error = ksem_get(id, &ks, &fd);
   1047 	if (error) {
   1048 		return error;
   1049 	}
   1050 	KASSERT(mutex_owned(&ks->ks_lock));
   1051 
   1052 	/* Operation is only for unnamed semaphores. */
   1053 	if (ks->ks_name != NULL) {
   1054 		error = EINVAL;
   1055 		goto out;
   1056 	}
   1057 	/* Cannot destroy if there are waiters. */
   1058 	if (ks->ks_waiters) {
   1059 		error = EBUSY;
   1060 		goto out;
   1061 	}
   1062 	if (KSEM_ID_IS_PSHARED(id)) {
   1063 		/* Cannot destroy if we did't create it. */
   1064 		KASSERT(fd == -1);
   1065 		KASSERT(ks->ks_pshared_proc != NULL);
   1066 		if (ks->ks_pshared_proc != curproc) {
   1067 			error = EINVAL;
   1068 			goto out;
   1069 		}
   1070 		fd = ks->ks_pshared_fd;
   1071 
   1072 		/* Mark it dead so subsequent lookups fail. */
   1073 		ks->ks_pshared_proc = NULL;
   1074 
   1075 		/* Do an fd_getfile() to for the benefit of fd_close(). */
   1076 		file_t *fp __diagused = fd_getfile(fd);
   1077 		KASSERT(fp != NULL);
   1078 		KASSERT(fp->f_ksem == ks);
   1079 	}
   1080 out:
   1081 	ksem_release(ks, -1);
   1082 	if (error) {
   1083 		if (!KSEM_ID_IS_PSHARED(id))
   1084 			fd_putfile(fd);
   1085 		return error;
   1086 	}
   1087 	return fd_close(fd);
   1088 }
   1089