Home | History | Annotate | Line # | Download | only in kern
uipc_sem.c revision 1.60
      1 /*	$NetBSD: uipc_sem.c,v 1.60 2020/12/14 23:12:12 chs Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 2002 Alfred Perlstein <alfred (at) FreeBSD.org>
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55  * SUCH DAMAGE.
     56  */
     57 
     58 /*
     59  * Implementation of POSIX semaphore.
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.60 2020/12/14 23:12:12 chs Exp $");
     64 
     65 #include <sys/param.h>
     66 #include <sys/kernel.h>
     67 
     68 #include <sys/atomic.h>
     69 #include <sys/proc.h>
     70 #include <sys/lwp.h>
     71 #include <sys/ksem.h>
     72 #include <sys/syscall.h>
     73 #include <sys/stat.h>
     74 #include <sys/kmem.h>
     75 #include <sys/fcntl.h>
     76 #include <sys/file.h>
     77 #include <sys/filedesc.h>
     78 #include <sys/kauth.h>
     79 #include <sys/module.h>
     80 #include <sys/mount.h>
     81 #include <sys/mutex.h>
     82 #include <sys/rwlock.h>
     83 #include <sys/semaphore.h>
     84 #include <sys/syscall.h>
     85 #include <sys/syscallargs.h>
     86 #include <sys/syscallvar.h>
     87 #include <sys/sysctl.h>
     88 #include <sys/uidinfo.h>
     89 #include <sys/cprng.h>
     90 
     91 MODULE(MODULE_CLASS_MISC, ksem, NULL);
     92 
     93 #define	SEM_MAX_NAMELEN		NAME_MAX
     94 
     95 #define	KS_UNLINKED		0x01
     96 
     97 static kmutex_t		ksem_lock	__cacheline_aligned;
     98 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
     99 static u_int		nsems_total	__cacheline_aligned;
    100 static u_int		nsems		__cacheline_aligned;
    101 
    102 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
    103 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
    104 static u_long		ksem_pshared_hashmask __read_mostly;
    105 
    106 #define	KSEM_PSHARED_HASHSIZE	32
    107 
    108 static kauth_listener_t	ksem_listener;
    109 
    110 static int		ksem_sysinit(void);
    111 static int		ksem_sysfini(bool);
    112 static int		ksem_modcmd(modcmd_t, void *);
    113 static void		ksem_release(ksem_t *, int);
    114 static int		ksem_close_fop(file_t *);
    115 static int		ksem_stat_fop(file_t *, struct stat *);
    116 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
    117     kauth_cred_t, int);
    118 
    119 static const struct fileops semops = {
    120 	.fo_name = "sem",
    121 	.fo_read = ksem_read_fop,
    122 	.fo_write = fbadop_write,
    123 	.fo_ioctl = fbadop_ioctl,
    124 	.fo_fcntl = fnullop_fcntl,
    125 	.fo_poll = fnullop_poll,
    126 	.fo_stat = ksem_stat_fop,
    127 	.fo_close = ksem_close_fop,
    128 	.fo_kqfilter = fnullop_kqfilter,
    129 	.fo_restart = fnullop_restart,
    130 };
    131 
    132 static const struct syscall_package ksem_syscalls[] = {
    133 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
    134 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
    135 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
    136 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
    137 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
    138 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
    139 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
    140 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
    141 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
    142 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
    143 	{ 0, 0, NULL },
    144 };
    145 
    146 struct sysctllog *ksem_clog;
    147 int ksem_max = KSEM_MAX;
    148 
    149 static int
    150 name_copyin(const char *uname, char **name)
    151 {
    152 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
    153 
    154 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
    155 	if (error)
    156 		kmem_free(*name, SEM_MAX_NAMELEN);
    157 
    158 	return error;
    159 }
    160 
    161 static void
    162 name_destroy(char **name)
    163 {
    164 	if (!*name)
    165 		return;
    166 
    167 	kmem_free(*name, SEM_MAX_NAMELEN);
    168 	*name = NULL;
    169 }
    170 
    171 static int
    172 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    173     void *arg0, void *arg1, void *arg2, void *arg3)
    174 {
    175 	ksem_t *ks;
    176 	mode_t mode;
    177 
    178 	if (action != KAUTH_SYSTEM_SEMAPHORE)
    179 		return KAUTH_RESULT_DEFER;
    180 
    181 	ks = arg1;
    182 	mode = ks->ks_mode;
    183 
    184 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
    185 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
    186 	    (mode & S_IWOTH) != 0)
    187 		return KAUTH_RESULT_ALLOW;
    188 
    189 	return KAUTH_RESULT_DEFER;
    190 }
    191 
    192 static int
    193 ksem_sysinit(void)
    194 {
    195 	int error;
    196 	const struct sysctlnode *rnode;
    197 
    198 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
    199 	LIST_INIT(&ksem_head);
    200 	nsems_total = 0;
    201 	nsems = 0;
    202 
    203 	rw_init(&ksem_pshared_lock);
    204 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
    205 	    true, &ksem_pshared_hashmask);
    206 	KASSERT(ksem_pshared_hashtab != NULL);
    207 
    208 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
    209 	    ksem_listener_cb, NULL);
    210 
    211 	/* Define module-specific sysctl tree */
    212 
    213 	ksem_clog = NULL;
    214 
    215 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
    216 			CTLFLAG_PERMANENT,
    217 			CTLTYPE_NODE, "posix",
    218 			SYSCTL_DESCR("POSIX options"),
    219 			NULL, 0, NULL, 0,
    220 			CTL_KERN, CTL_CREATE, CTL_EOL);
    221 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
    222 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    223 			CTLTYPE_INT, "semmax",
    224 			SYSCTL_DESCR("Maximal number of semaphores"),
    225 			NULL, 0, &ksem_max, 0,
    226 			CTL_CREATE, CTL_EOL);
    227 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
    228 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
    229 			CTLTYPE_INT, "semcnt",
    230 			SYSCTL_DESCR("Current number of semaphores"),
    231 			NULL, 0, &nsems, 0,
    232 			CTL_CREATE, CTL_EOL);
    233 
    234 	error = syscall_establish(NULL, ksem_syscalls);
    235 	if (error) {
    236 		(void)ksem_sysfini(false);
    237 	}
    238 
    239 	return error;
    240 }
    241 
    242 static int
    243 ksem_sysfini(bool interface)
    244 {
    245 	int error;
    246 
    247 	if (interface) {
    248 		error = syscall_disestablish(NULL, ksem_syscalls);
    249 		if (error != 0) {
    250 			return error;
    251 		}
    252 		/*
    253 		 * Make sure that no semaphores are in use.  Note: semops
    254 		 * must be unused at this point.
    255 		 */
    256 		if (nsems_total) {
    257 			error = syscall_establish(NULL, ksem_syscalls);
    258 			KASSERT(error == 0);
    259 			return EBUSY;
    260 		}
    261 	}
    262 	kauth_unlisten_scope(ksem_listener);
    263 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
    264 	rw_destroy(&ksem_pshared_lock);
    265 	mutex_destroy(&ksem_lock);
    266 	sysctl_teardown(&ksem_clog);
    267 	return 0;
    268 }
    269 
    270 static int
    271 ksem_modcmd(modcmd_t cmd, void *arg)
    272 {
    273 
    274 	switch (cmd) {
    275 	case MODULE_CMD_INIT:
    276 		return ksem_sysinit();
    277 
    278 	case MODULE_CMD_FINI:
    279 		return ksem_sysfini(true);
    280 
    281 	default:
    282 		return ENOTTY;
    283 	}
    284 }
    285 
    286 static ksem_t *
    287 ksem_lookup(const char *name)
    288 {
    289 	ksem_t *ks;
    290 
    291 	KASSERT(mutex_owned(&ksem_lock));
    292 
    293 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
    294 		if (strcmp(ks->ks_name, name) == 0) {
    295 			mutex_enter(&ks->ks_lock);
    296 			return ks;
    297 		}
    298 	}
    299 	return NULL;
    300 }
    301 
    302 static int
    303 ksem_perm(lwp_t *l, ksem_t *ks)
    304 {
    305 	kauth_cred_t uc = l->l_cred;
    306 
    307 	KASSERT(mutex_owned(&ks->ks_lock));
    308 
    309 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
    310 		return EACCES;
    311 
    312 	return 0;
    313 }
    314 
    315 /*
    316  * Bits 1..23 are random, just pluck a few of those and assume the
    317  * distribution is going to be pretty good.
    318  */
    319 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
    320 
    321 static void
    322 ksem_remove_pshared(ksem_t *ksem)
    323 {
    324 	rw_enter(&ksem_pshared_lock, RW_WRITER);
    325 	LIST_REMOVE(ksem, ks_entry);
    326 	rw_exit(&ksem_pshared_lock);
    327 }
    328 
    329 static ksem_t *
    330 ksem_lookup_pshared_locked(intptr_t id)
    331 {
    332 	u_long bucket = KSEM_PSHARED_HASH(id);
    333 	ksem_t *ksem = NULL;
    334 
    335 	/* ksem_t is locked and referenced upon return. */
    336 
    337 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
    338 		if (ksem->ks_pshared_id == id) {
    339 			mutex_enter(&ksem->ks_lock);
    340 			if (ksem->ks_pshared_proc == NULL) {
    341 				/*
    342 				 * This entry is dead, and in the process
    343 				 * of being torn down; skip it.
    344 				 */
    345 				mutex_exit(&ksem->ks_lock);
    346 				continue;
    347 			}
    348 			ksem->ks_ref++;
    349 			KASSERT(ksem->ks_ref != 0);
    350 			return ksem;
    351 		}
    352 	}
    353 
    354 	return NULL;
    355 }
    356 
    357 static ksem_t *
    358 ksem_lookup_pshared(intptr_t id)
    359 {
    360 	rw_enter(&ksem_pshared_lock, RW_READER);
    361 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
    362 	rw_exit(&ksem_pshared_lock);
    363 	return ksem;
    364 }
    365 
    366 static void
    367 ksem_alloc_pshared_id(ksem_t *ksem)
    368 {
    369 	ksem_t *ksem0;
    370 	uint32_t try;
    371 
    372 	KASSERT(ksem->ks_pshared_proc != NULL);
    373 
    374 	rw_enter(&ksem_pshared_lock, RW_WRITER);
    375 	for (;;) {
    376 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
    377 		    KSEM_PSHARED_MARKER;
    378 
    379 		if ((ksem0 = ksem_lookup_pshared_locked(try)) == NULL) {
    380 			/* Got it! */
    381 			break;
    382 		}
    383 		ksem_release(ksem0, -1);
    384 	}
    385 	ksem->ks_pshared_id = try;
    386 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
    387 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
    388 	rw_exit(&ksem_pshared_lock);
    389 }
    390 
    391 /*
    392  * ksem_get: get the semaphore from the descriptor.
    393  *
    394  * => locks the semaphore, if found, and holds an extra reference.
    395  * => holds a reference on the file descriptor.
    396  */
    397 static int
    398 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
    399 {
    400 	ksem_t *ks;
    401 	int fd;
    402 
    403 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
    404 		/*
    405 		 * ksem_lookup_pshared() returns the ksem_t *
    406 		 * locked and referenced.
    407 		 */
    408 		ks = ksem_lookup_pshared(id);
    409 		if (ks == NULL)
    410 			return EINVAL;
    411 		KASSERT(ks->ks_pshared_id == id);
    412 		KASSERT(ks->ks_pshared_proc != NULL);
    413 		fd = -1;
    414 	} else if (id <= INT_MAX) {
    415 		fd = (int)id;
    416 		file_t *fp = fd_getfile(fd);
    417 
    418 		if (__predict_false(fp == NULL))
    419 			return EINVAL;
    420 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
    421 			fd_putfile(fd);
    422 			return EINVAL;
    423 		}
    424 		ks = fp->f_ksem;
    425 		mutex_enter(&ks->ks_lock);
    426 		ks->ks_ref++;
    427 	} else {
    428 		return EINVAL;
    429 	}
    430 
    431 	*ksret = ks;
    432 	*fdp = fd;
    433 	return 0;
    434 }
    435 
    436 /*
    437  * ksem_create: allocate and setup a new semaphore structure.
    438  */
    439 static int
    440 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
    441 {
    442 	ksem_t *ks;
    443 	kauth_cred_t uc;
    444 	char *kname;
    445 	size_t len;
    446 
    447 	/* Pre-check for the limit. */
    448 	if (nsems >= ksem_max) {
    449 		return ENFILE;
    450 	}
    451 
    452 	if (val > SEM_VALUE_MAX) {
    453 		return EINVAL;
    454 	}
    455 
    456 	if (name != NULL) {
    457 		len = strlen(name);
    458 		if (len > SEM_MAX_NAMELEN) {
    459 			return ENAMETOOLONG;
    460 		}
    461 		/* Name must start with a '/' but not contain one. */
    462 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
    463 			return EINVAL;
    464 		}
    465 		kname = kmem_alloc(++len, KM_SLEEP);
    466 		strlcpy(kname, name, len);
    467 	} else {
    468 		kname = NULL;
    469 		len = 0;
    470 	}
    471 
    472 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
    473 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
    474 	cv_init(&ks->ks_cv, "psem");
    475 	ks->ks_name = kname;
    476 	ks->ks_namelen = len;
    477 	ks->ks_mode = mode;
    478 	ks->ks_value = val;
    479 	ks->ks_ref = 1;
    480 
    481 	uc = l->l_cred;
    482 	ks->ks_uid = kauth_cred_geteuid(uc);
    483 	ks->ks_gid = kauth_cred_getegid(uc);
    484 	chgsemcnt(ks->ks_uid, 1);
    485 	atomic_inc_uint(&nsems_total);
    486 
    487 	*ksret = ks;
    488 	return 0;
    489 }
    490 
    491 static void
    492 ksem_free(ksem_t *ks)
    493 {
    494 
    495 	KASSERT(!cv_has_waiters(&ks->ks_cv));
    496 
    497 	chgsemcnt(ks->ks_uid, -1);
    498 	atomic_dec_uint(&nsems_total);
    499 
    500 	if (ks->ks_pshared_id) {
    501 		KASSERT(ks->ks_pshared_proc == NULL);
    502 		ksem_remove_pshared(ks);
    503 	}
    504 	if (ks->ks_name) {
    505 		KASSERT(ks->ks_namelen > 0);
    506 		kmem_free(ks->ks_name, ks->ks_namelen);
    507 	}
    508 	mutex_destroy(&ks->ks_lock);
    509 	cv_destroy(&ks->ks_cv);
    510 	kmem_free(ks, sizeof(ksem_t));
    511 }
    512 
    513 #define	KSEM_ID_IS_PSHARED(id)		\
    514 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
    515 
    516 static void
    517 ksem_release(ksem_t *ksem, int fd)
    518 {
    519 	bool destroy = false;
    520 
    521 	KASSERT(mutex_owned(&ksem->ks_lock));
    522 
    523 	KASSERT(ksem->ks_ref > 0);
    524 	if (--ksem->ks_ref == 0) {
    525 		/*
    526 		 * Destroy if the last reference and semaphore is unnamed,
    527 		 * or unlinked (for named semaphore).
    528 		 */
    529 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
    530 		    (ksem->ks_name == NULL);
    531 	}
    532 	mutex_exit(&ksem->ks_lock);
    533 
    534 	if (destroy) {
    535 		ksem_free(ksem);
    536 	}
    537 	if (fd != -1) {
    538 		fd_putfile(fd);
    539 	}
    540 }
    541 
    542 int
    543 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
    544     register_t *retval)
    545 {
    546 	/* {
    547 		unsigned int value;
    548 		intptr_t *idp;
    549 	} */
    550 
    551 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
    552 	    copyin, copyout);
    553 }
    554 
    555 int
    556 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
    557     copyout_t docopyout)
    558 {
    559 	proc_t *p = l->l_proc;
    560 	ksem_t *ks;
    561 	file_t *fp;
    562 	intptr_t id, arg;
    563 	int fd, error;
    564 
    565 	/*
    566 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
    567 	 * indicate that a pshared semaphore is wanted.  In that case we
    568 	 * allocate globally unique ID and return that, rather than the
    569 	 * process-scoped file descriptor ID.
    570 	 */
    571 	error = (*docopyin)(idp, &arg, sizeof(*idp));
    572 	if (error) {
    573 		return error;
    574 	}
    575 
    576 	error = fd_allocfile(&fp, &fd);
    577 	if (error) {
    578 		return error;
    579 	}
    580 	fp->f_type = DTYPE_SEM;
    581 	fp->f_flag = FREAD | FWRITE;
    582 	fp->f_ops = &semops;
    583 
    584 	if (fd >= KSEM_MARKER_MIN) {
    585 		/*
    586 		 * This is super-unlikely, but we check for it anyway
    587 		 * because potential collisions with the pshared marker
    588 		 * would be bad.
    589 		 */
    590 		fd_abort(p, fp, fd);
    591 		return EMFILE;
    592 	}
    593 
    594 	/* Note the mode does not matter for anonymous semaphores. */
    595 	error = ksem_create(l, NULL, &ks, 0, val);
    596 	if (error) {
    597 		fd_abort(p, fp, fd);
    598 		return error;
    599 	}
    600 
    601 	if (arg == KSEM_PSHARED) {
    602 		ks->ks_pshared_proc = curproc;
    603 		ks->ks_pshared_fd = fd;
    604 		ksem_alloc_pshared_id(ks);
    605 		id = ks->ks_pshared_id;
    606 	} else {
    607 		id = (intptr_t)fd;
    608 	}
    609 
    610 	error = (*docopyout)(&id, idp, sizeof(*idp));
    611 	if (error) {
    612 		ksem_free(ks);
    613 		fd_abort(p, fp, fd);
    614 		return error;
    615 	}
    616 
    617 	fp->f_ksem = ks;
    618 	fd_affix(p, fp, fd);
    619 	return error;
    620 }
    621 
    622 int
    623 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
    624     register_t *retval)
    625 {
    626 	/* {
    627 		const char *name;
    628 		int oflag;
    629 		mode_t mode;
    630 		unsigned int value;
    631 		intptr_t *idp;
    632 	} */
    633 
    634 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
    635 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
    636 }
    637 
    638 int
    639 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
    640      unsigned int value, intptr_t *idp, copyout_t docopyout)
    641 {
    642 	char *name;
    643 	proc_t *p = l->l_proc;
    644 	ksem_t *ksnew = NULL, *ks;
    645 	file_t *fp;
    646 	intptr_t id;
    647 	int fd, error;
    648 
    649 	error = name_copyin(semname, &name);
    650 	if (error) {
    651 		return error;
    652 	}
    653 	error = fd_allocfile(&fp, &fd);
    654 	if (error) {
    655 		name_destroy(&name);
    656 		return error;
    657 	}
    658 	fp->f_type = DTYPE_SEM;
    659 	fp->f_flag = FREAD | FWRITE;
    660 	fp->f_ops = &semops;
    661 
    662 	if (fd >= KSEM_MARKER_MIN) {
    663 		/*
    664 		 * This is super-unlikely, but we check for it anyway
    665 		 * because potential collisions with the pshared marker
    666 		 * would be bad.
    667 		 */
    668 		fd_abort(p, fp, fd);
    669 		return EMFILE;
    670 	}
    671 
    672 	/*
    673 	 * The ID (file descriptor number) can be stored early.
    674 	 * Note that zero is a special value for libpthread.
    675 	 */
    676 	id = (intptr_t)fd;
    677 	error = (*docopyout)(&id, idp, sizeof(*idp));
    678 	if (error) {
    679 		goto err;
    680 	}
    681 
    682 	if (oflag & O_CREAT) {
    683 		/* Create a new semaphore. */
    684 		error = ksem_create(l, name, &ksnew, mode, value);
    685 		if (error) {
    686 			goto err;
    687 		}
    688 		KASSERT(ksnew != NULL);
    689 	}
    690 
    691 	/* Lookup for a semaphore with such name. */
    692 	mutex_enter(&ksem_lock);
    693 	ks = ksem_lookup(name);
    694 	name_destroy(&name);
    695 	if (ks) {
    696 		KASSERT(mutex_owned(&ks->ks_lock));
    697 		mutex_exit(&ksem_lock);
    698 
    699 		/* Check for exclusive create. */
    700 		if (oflag & O_EXCL) {
    701 			mutex_exit(&ks->ks_lock);
    702 			error = EEXIST;
    703 			goto err;
    704 		}
    705 		/*
    706 		 * Verify permissions.  If we can access it,
    707 		 * add the reference of this thread.
    708 		 */
    709 		error = ksem_perm(l, ks);
    710 		if (error == 0) {
    711 			ks->ks_ref++;
    712 		}
    713 		mutex_exit(&ks->ks_lock);
    714 		if (error) {
    715 			goto err;
    716 		}
    717 	} else {
    718 		/* Fail if not found and not creating. */
    719 		if ((oflag & O_CREAT) == 0) {
    720 			mutex_exit(&ksem_lock);
    721 			KASSERT(ksnew == NULL);
    722 			error = ENOENT;
    723 			goto err;
    724 		}
    725 
    726 		/* Check for the limit locked. */
    727 		if (nsems >= ksem_max) {
    728 			mutex_exit(&ksem_lock);
    729 			error = ENFILE;
    730 			goto err;
    731 		}
    732 
    733 		/*
    734 		 * Finally, insert semaphore into the list.
    735 		 * Note: it already has the initial reference.
    736 		 */
    737 		ks = ksnew;
    738 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
    739 		nsems++;
    740 		mutex_exit(&ksem_lock);
    741 
    742 		ksnew = NULL;
    743 	}
    744 	KASSERT(ks != NULL);
    745 	fp->f_ksem = ks;
    746 	fd_affix(p, fp, fd);
    747 err:
    748 	name_destroy(&name);
    749 	if (error) {
    750 		fd_abort(p, fp, fd);
    751 	}
    752 	if (ksnew) {
    753 		ksem_free(ksnew);
    754 	}
    755 	return error;
    756 }
    757 
    758 int
    759 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
    760     register_t *retval)
    761 {
    762 	/* {
    763 		intptr_t id;
    764 	} */
    765 	intptr_t id = SCARG(uap, id);
    766 	int fd, error;
    767 	ksem_t *ks;
    768 
    769 	error = ksem_get(id, &ks, &fd);
    770 	if (error) {
    771 		return error;
    772 	}
    773 
    774 	/* This is only for named semaphores. */
    775 	if (ks->ks_name == NULL) {
    776 		error = EINVAL;
    777 	}
    778 	ksem_release(ks, -1);
    779 	if (error) {
    780 		if (fd != -1)
    781 			fd_putfile(fd);
    782 		return error;
    783 	}
    784 	return fd_close(fd);
    785 }
    786 
    787 static int
    788 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    789     int flags)
    790 {
    791 	size_t len;
    792 	char *name;
    793 	ksem_t *ks = fp->f_ksem;
    794 
    795 	mutex_enter(&ks->ks_lock);
    796 	len = ks->ks_namelen;
    797 	name = ks->ks_name;
    798 	mutex_exit(&ks->ks_lock);
    799 	if (name == NULL || len == 0)
    800 		return 0;
    801 	return uiomove(name, len, uio);
    802 }
    803 
    804 static int
    805 ksem_stat_fop(file_t *fp, struct stat *ub)
    806 {
    807 	ksem_t *ks = fp->f_ksem;
    808 
    809 	mutex_enter(&ks->ks_lock);
    810 
    811 	memset(ub, 0, sizeof(*ub));
    812 
    813 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
    814 	    ? _S_IFLNK : _S_IFREG);
    815 	ub->st_uid = ks->ks_uid;
    816 	ub->st_gid = ks->ks_gid;
    817 	ub->st_size = ks->ks_value;
    818 	ub->st_blocks = (ub->st_size) ? 1 : 0;
    819 	ub->st_nlink = ks->ks_ref;
    820 	ub->st_blksize = 4096;
    821 
    822 	nanotime(&ub->st_atimespec);
    823 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
    824 	    ub->st_atimespec;
    825 
    826 	/*
    827 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
    828 	 * XXX (st_dev, st_ino) should be unique.
    829 	 */
    830 	mutex_exit(&ks->ks_lock);
    831 	return 0;
    832 }
    833 
    834 static int
    835 ksem_close_fop(file_t *fp)
    836 {
    837 	ksem_t *ks = fp->f_ksem;
    838 
    839 	mutex_enter(&ks->ks_lock);
    840 
    841 	if (ks->ks_pshared_id) {
    842 		if (ks->ks_pshared_proc != curproc) {
    843 			/* Do nothing if this is not the creator. */
    844 			mutex_exit(&ks->ks_lock);
    845 			return 0;
    846 		}
    847 		/* Mark this semaphore as dead. */
    848 		ks->ks_pshared_proc = NULL;
    849 	}
    850 
    851 	ksem_release(ks, -1);
    852 	return 0;
    853 }
    854 
    855 int
    856 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
    857     register_t *retval)
    858 {
    859 	/* {
    860 		const char *name;
    861 	} */
    862 	char *name;
    863 	ksem_t *ks;
    864 	u_int refcnt;
    865 	int error;
    866 
    867 	error = name_copyin(SCARG(uap, name), &name);
    868 	if (error)
    869 		return error;
    870 
    871 	mutex_enter(&ksem_lock);
    872 	ks = ksem_lookup(name);
    873 	name_destroy(&name);
    874 	if (ks == NULL) {
    875 		mutex_exit(&ksem_lock);
    876 		return ENOENT;
    877 	}
    878 	KASSERT(mutex_owned(&ks->ks_lock));
    879 
    880 	/* Verify permissions. */
    881 	error = ksem_perm(l, ks);
    882 	if (error) {
    883 		mutex_exit(&ks->ks_lock);
    884 		mutex_exit(&ksem_lock);
    885 		return error;
    886 	}
    887 
    888 	/* Remove from the global list. */
    889 	LIST_REMOVE(ks, ks_entry);
    890 	nsems--;
    891 	mutex_exit(&ksem_lock);
    892 
    893 	refcnt = ks->ks_ref;
    894 	if (refcnt) {
    895 		/* Mark as unlinked, if there are references. */
    896 		ks->ks_flags |= KS_UNLINKED;
    897 	}
    898 	mutex_exit(&ks->ks_lock);
    899 
    900 	if (refcnt == 0) {
    901 		ksem_free(ks);
    902 	}
    903 	return 0;
    904 }
    905 
    906 int
    907 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
    908     register_t *retval)
    909 {
    910 	/* {
    911 		intptr_t id;
    912 	} */
    913 	int fd, error;
    914 	ksem_t *ks;
    915 
    916 	error = ksem_get(SCARG(uap, id), &ks, &fd);
    917 	if (error) {
    918 		return error;
    919 	}
    920 	KASSERT(mutex_owned(&ks->ks_lock));
    921 	if (ks->ks_value == SEM_VALUE_MAX) {
    922 		error = EOVERFLOW;
    923 		goto out;
    924 	}
    925 	ks->ks_value++;
    926 	if (ks->ks_waiters) {
    927 		cv_broadcast(&ks->ks_cv);
    928 	}
    929 out:
    930 	ksem_release(ks, fd);
    931 	return error;
    932 }
    933 
    934 int
    935 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
    936 {
    937 	int fd, error, timeo;
    938 	ksem_t *ks;
    939 
    940 	error = ksem_get(id, &ks, &fd);
    941 	if (error) {
    942 		return error;
    943 	}
    944 	KASSERT(mutex_owned(&ks->ks_lock));
    945 	while (ks->ks_value == 0) {
    946 		ks->ks_waiters++;
    947 		if (!try_p && abstime != NULL) {
    948 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
    949 			    &timeo, NULL);
    950 			if (error != 0)
    951 				goto out;
    952 		} else {
    953 			timeo = 0;
    954 		}
    955 		error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv,
    956 		    &ks->ks_lock, timeo);
    957 		ks->ks_waiters--;
    958 		if (error)
    959 			goto out;
    960 	}
    961 	ks->ks_value--;
    962 out:
    963 	ksem_release(ks, fd);
    964 	return error;
    965 }
    966 
    967 int
    968 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
    969     register_t *retval)
    970 {
    971 	/* {
    972 		intptr_t id;
    973 	} */
    974 
    975 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
    976 }
    977 
    978 int
    979 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
    980     register_t *retval)
    981 {
    982 	/* {
    983 		intptr_t id;
    984 		const struct timespec *abstime;
    985 	} */
    986 	struct timespec ts;
    987 	int error;
    988 
    989 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
    990 	if (error != 0)
    991 		return error;
    992 
    993 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
    994 		return EINVAL;
    995 
    996 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
    997 	if (error == EWOULDBLOCK)
    998 		error = ETIMEDOUT;
    999 	return error;
   1000 }
   1001 
   1002 int
   1003 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
   1004     register_t *retval)
   1005 {
   1006 	/* {
   1007 		intptr_t id;
   1008 	} */
   1009 
   1010 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
   1011 }
   1012 
   1013 int
   1014 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
   1015     register_t *retval)
   1016 {
   1017 	/* {
   1018 		intptr_t id;
   1019 		unsigned int *value;
   1020 	} */
   1021 	int fd, error;
   1022 	ksem_t *ks;
   1023 	unsigned int val;
   1024 
   1025 	error = ksem_get(SCARG(uap, id), &ks, &fd);
   1026 	if (error) {
   1027 		return error;
   1028 	}
   1029 	KASSERT(mutex_owned(&ks->ks_lock));
   1030 	val = ks->ks_value;
   1031 	ksem_release(ks, fd);
   1032 
   1033 	return copyout(&val, SCARG(uap, value), sizeof(val));
   1034 }
   1035 
   1036 int
   1037 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
   1038     register_t *retval)
   1039 {
   1040 	/* {
   1041 		intptr_t id;
   1042 	} */
   1043 	int fd, error;
   1044 	ksem_t *ks;
   1045 
   1046 	intptr_t id = SCARG(uap, id);
   1047 
   1048 	error = ksem_get(id, &ks, &fd);
   1049 	if (error) {
   1050 		return error;
   1051 	}
   1052 	KASSERT(mutex_owned(&ks->ks_lock));
   1053 
   1054 	/* Operation is only for unnamed semaphores. */
   1055 	if (ks->ks_name != NULL) {
   1056 		error = EINVAL;
   1057 		goto out;
   1058 	}
   1059 	/* Cannot destroy if there are waiters. */
   1060 	if (ks->ks_waiters) {
   1061 		error = EBUSY;
   1062 		goto out;
   1063 	}
   1064 	if (KSEM_ID_IS_PSHARED(id)) {
   1065 		/* Cannot destroy if we did't create it. */
   1066 		KASSERT(fd == -1);
   1067 		KASSERT(ks->ks_pshared_proc != NULL);
   1068 		if (ks->ks_pshared_proc != curproc) {
   1069 			error = EINVAL;
   1070 			goto out;
   1071 		}
   1072 		fd = ks->ks_pshared_fd;
   1073 
   1074 		/* Mark it dead so subsequent lookups fail. */
   1075 		ks->ks_pshared_proc = NULL;
   1076 
   1077 		/* Do an fd_getfile() to for the benefit of fd_close(). */
   1078 		file_t *fp __diagused = fd_getfile(fd);
   1079 		KASSERT(fp != NULL);
   1080 		KASSERT(fp->f_ksem == ks);
   1081 	}
   1082 out:
   1083 	ksem_release(ks, -1);
   1084 	if (error) {
   1085 		if (!KSEM_ID_IS_PSHARED(id))
   1086 			fd_putfile(fd);
   1087 		return error;
   1088 	}
   1089 	return fd_close(fd);
   1090 }
   1091