Home | History | Annotate | Line # | Download | only in kern
uipc_sem.c revision 1.53
      1 /*	$NetBSD: uipc_sem.c,v 1.53 2019/02/05 07:14:32 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Mindaugas Rasiukevicius and Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 2002 Alfred Perlstein <alfred (at) FreeBSD.org>
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55  * SUCH DAMAGE.
     56  */
     57 
     58 /*
     59  * Implementation of POSIX semaphore.
     60  */
     61 
     62 #include <sys/cdefs.h>
     63 __KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.53 2019/02/05 07:14:32 thorpej Exp $");
     64 
     65 #include <sys/param.h>
     66 #include <sys/kernel.h>
     67 
     68 #include <sys/atomic.h>
     69 #include <sys/proc.h>
     70 #include <sys/lwp.h>
     71 #include <sys/ksem.h>
     72 #include <sys/syscall.h>
     73 #include <sys/stat.h>
     74 #include <sys/kmem.h>
     75 #include <sys/fcntl.h>
     76 #include <sys/file.h>
     77 #include <sys/filedesc.h>
     78 #include <sys/kauth.h>
     79 #include <sys/module.h>
     80 #include <sys/mount.h>
     81 #include <sys/mutex.h>
     82 #include <sys/rwlock.h>
     83 #include <sys/semaphore.h>
     84 #include <sys/syscall.h>
     85 #include <sys/syscallargs.h>
     86 #include <sys/syscallvar.h>
     87 #include <sys/sysctl.h>
     88 #include <sys/cprng.h>
     89 
     90 MODULE(MODULE_CLASS_MISC, ksem, NULL);
     91 
     92 #define	SEM_MAX_NAMELEN		NAME_MAX
     93 
     94 #define	SEM_NSEMS_MAX		256
     95 #define	KS_UNLINKED		0x01
     96 
     97 static kmutex_t		ksem_lock	__cacheline_aligned;
     98 static LIST_HEAD(,ksem)	ksem_head	__cacheline_aligned;
     99 static u_int		nsems_total	__cacheline_aligned;
    100 static u_int		nsems		__cacheline_aligned;
    101 
    102 static krwlock_t	ksem_pshared_lock __cacheline_aligned;
    103 static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
    104 static u_long		ksem_pshared_hashmask __read_mostly;
    105 
    106 #define	KSEM_PSHARED_HASHSIZE	32
    107 
    108 static kauth_listener_t	ksem_listener;
    109 
    110 static int		ksem_sysinit(void);
    111 static int		ksem_sysfini(bool);
    112 static int		ksem_modcmd(modcmd_t, void *);
    113 static int		ksem_close_fop(file_t *);
    114 static int		ksem_stat_fop(file_t *, struct stat *);
    115 static int		ksem_read_fop(file_t *, off_t *, struct uio *,
    116     kauth_cred_t, int);
    117 
    118 static const struct fileops semops = {
    119 	.fo_name = "sem",
    120 	.fo_read = ksem_read_fop,
    121 	.fo_write = fbadop_write,
    122 	.fo_ioctl = fbadop_ioctl,
    123 	.fo_fcntl = fnullop_fcntl,
    124 	.fo_poll = fnullop_poll,
    125 	.fo_stat = ksem_stat_fop,
    126 	.fo_close = ksem_close_fop,
    127 	.fo_kqfilter = fnullop_kqfilter,
    128 	.fo_restart = fnullop_restart,
    129 };
    130 
    131 static const struct syscall_package ksem_syscalls[] = {
    132 	{ SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
    133 	{ SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
    134 	{ SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
    135 	{ SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
    136 	{ SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
    137 	{ SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
    138 	{ SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
    139 	{ SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
    140 	{ SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
    141 	{ SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
    142 	{ 0, 0, NULL },
    143 };
    144 
    145 struct sysctllog *ksem_clog;
    146 int ksem_max;
    147 
    148 static int
    149 name_copyin(const char *uname, char **name)
    150 {
    151 	*name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);
    152 
    153 	int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
    154 	if (error)
    155 		kmem_free(*name, SEM_MAX_NAMELEN);
    156 
    157 	return error;
    158 }
    159 
    160 static void
    161 name_destroy(char **name)
    162 {
    163 	if (!*name)
    164 		return;
    165 
    166 	kmem_free(*name, SEM_MAX_NAMELEN);
    167 	*name = NULL;
    168 }
    169 
    170 static int
    171 ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    172     void *arg0, void *arg1, void *arg2, void *arg3)
    173 {
    174 	ksem_t *ks;
    175 	mode_t mode;
    176 
    177 	if (action != KAUTH_SYSTEM_SEMAPHORE)
    178 		return KAUTH_RESULT_DEFER;
    179 
    180 	ks = arg1;
    181 	mode = ks->ks_mode;
    182 
    183 	if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
    184 	    (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
    185 	    (mode & S_IWOTH) != 0)
    186 		return KAUTH_RESULT_ALLOW;
    187 
    188 	return KAUTH_RESULT_DEFER;
    189 }
    190 
    191 static int
    192 ksem_sysinit(void)
    193 {
    194 	int error;
    195 	const struct sysctlnode *rnode;
    196 
    197 	mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
    198 	LIST_INIT(&ksem_head);
    199 	nsems_total = 0;
    200 	nsems = 0;
    201 
    202 	rw_init(&ksem_pshared_lock);
    203 	ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
    204 	    true, &ksem_pshared_hashmask);
    205 	KASSERT(ksem_pshared_hashtab != NULL);
    206 
    207 	error = syscall_establish(NULL, ksem_syscalls);
    208 	if (error) {
    209 		(void)ksem_sysfini(false);
    210 	}
    211 
    212 	ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
    213 	    ksem_listener_cb, NULL);
    214 
    215 	/* Define module-specific sysctl tree */
    216 
    217 	ksem_max = KSEM_MAX;
    218 	ksem_clog = NULL;
    219 
    220 	sysctl_createv(&ksem_clog, 0, NULL, &rnode,
    221 			CTLFLAG_PERMANENT,
    222 			CTLTYPE_NODE, "posix",
    223 			SYSCTL_DESCR("POSIX options"),
    224 			NULL, 0, NULL, 0,
    225 			CTL_KERN, CTL_CREATE, CTL_EOL);
    226 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
    227 			CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
    228 			CTLTYPE_INT, "semmax",
    229 			SYSCTL_DESCR("Maximal number of semaphores"),
    230 			NULL, 0, &ksem_max, 0,
    231 			CTL_CREATE, CTL_EOL);
    232 	sysctl_createv(&ksem_clog, 0, &rnode, NULL,
    233 			CTLFLAG_PERMANENT | CTLFLAG_READONLY,
    234 			CTLTYPE_INT, "semcnt",
    235 			SYSCTL_DESCR("Current number of semaphores"),
    236 			NULL, 0, &nsems, 0,
    237 			CTL_CREATE, CTL_EOL);
    238 
    239 	return error;
    240 }
    241 
    242 static int
    243 ksem_sysfini(bool interface)
    244 {
    245 	int error;
    246 
    247 	if (interface) {
    248 		error = syscall_disestablish(NULL, ksem_syscalls);
    249 		if (error != 0) {
    250 			return error;
    251 		}
    252 		/*
    253 		 * Make sure that no semaphores are in use.  Note: semops
    254 		 * must be unused at this point.
    255 		 */
    256 		if (nsems_total) {
    257 			error = syscall_establish(NULL, ksem_syscalls);
    258 			KASSERT(error == 0);
    259 			return EBUSY;
    260 		}
    261 	}
    262 	kauth_unlisten_scope(ksem_listener);
    263 	hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
    264 	rw_destroy(&ksem_pshared_lock);
    265 	mutex_destroy(&ksem_lock);
    266 	sysctl_teardown(&ksem_clog);
    267 	return 0;
    268 }
    269 
    270 static int
    271 ksem_modcmd(modcmd_t cmd, void *arg)
    272 {
    273 
    274 	switch (cmd) {
    275 	case MODULE_CMD_INIT:
    276 		return ksem_sysinit();
    277 
    278 	case MODULE_CMD_FINI:
    279 		return ksem_sysfini(true);
    280 
    281 	default:
    282 		return ENOTTY;
    283 	}
    284 }
    285 
    286 static ksem_t *
    287 ksem_lookup(const char *name)
    288 {
    289 	ksem_t *ks;
    290 
    291 	KASSERT(mutex_owned(&ksem_lock));
    292 
    293 	LIST_FOREACH(ks, &ksem_head, ks_entry) {
    294 		if (strcmp(ks->ks_name, name) == 0) {
    295 			mutex_enter(&ks->ks_lock);
    296 			return ks;
    297 		}
    298 	}
    299 	return NULL;
    300 }
    301 
    302 static int
    303 ksem_perm(lwp_t *l, ksem_t *ks)
    304 {
    305 	kauth_cred_t uc = l->l_cred;
    306 
    307 	KASSERT(mutex_owned(&ks->ks_lock));
    308 
    309 	if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
    310 		return EACCES;
    311 
    312 	return 0;
    313 }
    314 
    315 /*
    316  * Bits 1..23 are random, just pluck a few of those and assume the
    317  * distribution is going to be pretty good.
    318  */
    319 #define	KSEM_PSHARED_HASH(id)	(((id) >> 1) & ksem_pshared_hashmask)
    320 
    321 static void
    322 ksem_remove_pshared(ksem_t *ksem)
    323 {
    324 	rw_enter(&ksem_pshared_lock, RW_WRITER);
    325 	LIST_REMOVE(ksem, ks_entry);
    326 	rw_exit(&ksem_pshared_lock);
    327 }
    328 
    329 static ksem_t *
    330 ksem_lookup_pshared_locked(intptr_t id)
    331 {
    332 	u_long bucket = KSEM_PSHARED_HASH(id);
    333 	ksem_t *ksem = NULL;
    334 
    335 	/* ksem_t is locked and referenced upon return. */
    336 
    337 	LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
    338 		if (ksem->ks_pshared_id == id) {
    339 			mutex_enter(&ksem->ks_lock);
    340 			if (ksem->ks_pshared_proc == NULL) {
    341 				/*
    342 				 * This entry is dead, and in the process
    343 				 * of being torn down; skip it.
    344 				 */
    345 				mutex_exit(&ksem->ks_lock);
    346 				continue;
    347 			}
    348 			ksem->ks_ref++;
    349 			KASSERT(ksem->ks_ref != 0);
    350 			return ksem;
    351 		}
    352 	}
    353 
    354 	return NULL;
    355 }
    356 
    357 static ksem_t *
    358 ksem_lookup_pshared(intptr_t id)
    359 {
    360 	rw_enter(&ksem_pshared_lock, RW_READER);
    361 	ksem_t *ksem = ksem_lookup_pshared_locked(id);
    362 	rw_exit(&ksem_pshared_lock);
    363 	return ksem;
    364 }
    365 
    366 static void
    367 ksem_alloc_pshared_id(ksem_t *ksem)
    368 {
    369 	uint32_t try;
    370 
    371 	KASSERT(ksem->ks_pshared_proc != NULL);
    372 
    373 	rw_enter(&ksem_pshared_lock, RW_WRITER);
    374 	for (;;) {
    375 		try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
    376 		    KSEM_PSHARED_MARKER;
    377 
    378 		if (ksem_lookup_pshared_locked(try) == NULL) {
    379 			/* Got it! */
    380 			break;
    381 		}
    382 	}
    383 	ksem->ks_pshared_id = try;
    384 	u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
    385 	LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
    386 	rw_exit(&ksem_pshared_lock);
    387 }
    388 
    389 /*
    390  * ksem_get: get the semaphore from the descriptor.
    391  *
    392  * => locks the semaphore, if found, and holds an extra reference.
    393  * => holds a reference on the file descriptor.
    394  */
    395 static int
    396 ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
    397 {
    398 	ksem_t *ks;
    399 	int fd;
    400 
    401 	if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
    402 		/*
    403 		 * ksem_lookup_pshared() returns the ksem_t *
    404 		 * locked and referenced.
    405 		 */
    406 		ks = ksem_lookup_pshared(id);
    407 		if (ks == NULL)
    408 			return EINVAL;
    409 		KASSERT(ks->ks_pshared_id == id);
    410 		KASSERT(ks->ks_pshared_proc != NULL);
    411 		fd = -1;
    412 	} else if (id <= INT_MAX) {
    413 		fd = (int)id;
    414 		file_t *fp = fd_getfile(fd);
    415 
    416 		if (__predict_false(fp == NULL))
    417 			return EINVAL;
    418 		if (__predict_false(fp->f_type != DTYPE_SEM)) {
    419 			fd_putfile(fd);
    420 			return EINVAL;
    421 		}
    422 		ks = fp->f_ksem;
    423 		mutex_enter(&ks->ks_lock);
    424 		ks->ks_ref++;
    425 	} else {
    426 		return EINVAL;
    427 	}
    428 
    429 	*ksret = ks;
    430 	*fdp = fd;
    431 	return 0;
    432 }
    433 
    434 /*
    435  * ksem_create: allocate and setup a new semaphore structure.
    436  */
    437 static int
    438 ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
    439 {
    440 	ksem_t *ks;
    441 	kauth_cred_t uc;
    442 	char *kname;
    443 	size_t len;
    444 
    445 	/* Pre-check for the limit. */
    446 	if (nsems >= ksem_max) {
    447 		return ENFILE;
    448 	}
    449 
    450 	if (val > SEM_VALUE_MAX) {
    451 		return EINVAL;
    452 	}
    453 
    454 	if (name != NULL) {
    455 		len = strlen(name);
    456 		if (len > SEM_MAX_NAMELEN) {
    457 			return ENAMETOOLONG;
    458 		}
    459 		/* Name must start with a '/' but not contain one. */
    460 		if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
    461 			return EINVAL;
    462 		}
    463 		kname = kmem_alloc(++len, KM_SLEEP);
    464 		strlcpy(kname, name, len);
    465 	} else {
    466 		kname = NULL;
    467 		len = 0;
    468 	}
    469 
    470 	if (atomic_inc_uint_nv(&l->l_proc->p_nsems) > SEM_NSEMS_MAX) {
    471 		atomic_dec_uint(&l->l_proc->p_nsems);
    472 		if (kname != NULL)
    473 			kmem_free(kname, len);
    474 		return -1;
    475 	}
    476 
    477 	ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
    478 	mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
    479 	cv_init(&ks->ks_cv, "psem");
    480 	ks->ks_name = kname;
    481 	ks->ks_namelen = len;
    482 	ks->ks_mode = mode;
    483 	ks->ks_value = val;
    484 	ks->ks_ref = 1;
    485 
    486 	uc = l->l_cred;
    487 	ks->ks_uid = kauth_cred_geteuid(uc);
    488 	ks->ks_gid = kauth_cred_getegid(uc);
    489 
    490 	atomic_inc_uint(&nsems_total);
    491 	*ksret = ks;
    492 	return 0;
    493 }
    494 
    495 static void
    496 ksem_free(ksem_t *ks)
    497 {
    498 
    499 	KASSERT(!cv_has_waiters(&ks->ks_cv));
    500 
    501 	if (ks->ks_pshared_id) {
    502 		KASSERT(ks->ks_pshared_proc == NULL);
    503 		ksem_remove_pshared(ks);
    504 	}
    505 	if (ks->ks_name) {
    506 		KASSERT(ks->ks_namelen > 0);
    507 		kmem_free(ks->ks_name, ks->ks_namelen);
    508 	}
    509 	mutex_destroy(&ks->ks_lock);
    510 	cv_destroy(&ks->ks_cv);
    511 	kmem_free(ks, sizeof(ksem_t));
    512 
    513 	atomic_dec_uint(&nsems_total);
    514  	atomic_dec_uint(&curproc->p_nsems);
    515 }
    516 
    517 #define	KSEM_ID_IS_PSHARED(id)		\
    518 	(((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)
    519 
    520 static void
    521 ksem_release(ksem_t *ksem, int fd)
    522 {
    523 	bool destroy = false;
    524 
    525 	KASSERT(mutex_owned(&ksem->ks_lock));
    526 
    527 	KASSERT(ksem->ks_ref > 0);
    528 	if (--ksem->ks_ref == 0) {
    529 		/*
    530 		 * Destroy if the last reference and semaphore is unnamed,
    531 		 * or unlinked (for named semaphore).
    532 		 */
    533 		destroy = (ksem->ks_flags & KS_UNLINKED) ||
    534 		    (ksem->ks_name == NULL);
    535 	}
    536 	mutex_exit(&ksem->ks_lock);
    537 
    538 	if (destroy) {
    539 		ksem_free(ksem);
    540 	}
    541 	if (fd != -1) {
    542 		fd_putfile(fd);
    543 	}
    544 }
    545 
    546 int
    547 sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
    548     register_t *retval)
    549 {
    550 	/* {
    551 		unsigned int value;
    552 		intptr_t *idp;
    553 	} */
    554 
    555 	return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
    556 	    copyin, copyout);
    557 }
    558 
    559 int
    560 do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
    561     copyout_t docopyout)
    562 {
    563 	proc_t *p = l->l_proc;
    564 	ksem_t *ks;
    565 	file_t *fp;
    566 	intptr_t id, arg;
    567 	int fd, error;
    568 
    569 	/*
    570 	 * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
    571 	 * indicate that a pshared semaphore is wanted.  In that case we
    572 	 * allocate globally unique ID and return that, rather than the
    573 	 * process-scoped file descriptor ID.
    574 	 */
    575 	error = (*docopyin)(idp, &arg, sizeof(*idp));
    576 	if (error) {
    577 		return error;
    578 	}
    579 
    580 	error = fd_allocfile(&fp, &fd);
    581 	if (error) {
    582 		return error;
    583 	}
    584 	fp->f_type = DTYPE_SEM;
    585 	fp->f_flag = FREAD | FWRITE;
    586 	fp->f_ops = &semops;
    587 
    588 	if (fd >= KSEM_MARKER_MIN) {
    589 		/*
    590 		 * This is super-unlikely, but we check for it anyway
    591 		 * because potential collisions with the pshared marker
    592 		 * would be bad.
    593 		 */
    594 		fd_abort(p, fp, fd);
    595 		return EMFILE;
    596 	}
    597 
    598 	/* Note the mode does not matter for anonymous semaphores. */
    599 	error = ksem_create(l, NULL, &ks, 0, val);
    600 	if (error) {
    601 		fd_abort(p, fp, fd);
    602 		return error;
    603 	}
    604 
    605 	if (arg == KSEM_PSHARED) {
    606 		ks->ks_pshared_proc = curproc;
    607 		ks->ks_pshared_fd = fd;
    608 		ksem_alloc_pshared_id(ks);
    609 		id = ks->ks_pshared_id;
    610 	} else {
    611 		id = (intptr_t)fd;
    612 	}
    613 
    614 	error = (*docopyout)(&id, idp, sizeof(*idp));
    615 	if (error) {
    616 		ksem_free(ks);
    617 		fd_abort(p, fp, fd);
    618 		return error;
    619 	}
    620 
    621 	fp->f_ksem = ks;
    622 	fd_affix(p, fp, fd);
    623 	return error;
    624 }
    625 
    626 int
    627 sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
    628     register_t *retval)
    629 {
    630 	/* {
    631 		const char *name;
    632 		int oflag;
    633 		mode_t mode;
    634 		unsigned int value;
    635 		intptr_t *idp;
    636 	} */
    637 
    638 	return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
    639 	    SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
    640 }
    641 
    642 int
    643 do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
    644      unsigned int value, intptr_t *idp, copyout_t docopyout)
    645 {
    646 	char *name;
    647 	proc_t *p = l->l_proc;
    648 	ksem_t *ksnew = NULL, *ks;
    649 	file_t *fp;
    650 	intptr_t id;
    651 	int fd, error;
    652 
    653 	error = name_copyin(semname, &name);
    654 	if (error) {
    655 		return error;
    656 	}
    657 	error = fd_allocfile(&fp, &fd);
    658 	if (error) {
    659 		name_destroy(&name);
    660 		return error;
    661 	}
    662 	fp->f_type = DTYPE_SEM;
    663 	fp->f_flag = FREAD | FWRITE;
    664 	fp->f_ops = &semops;
    665 
    666 	if (fd >= KSEM_MARKER_MIN) {
    667 		/*
    668 		 * This is super-unlikely, but we check for it anyway
    669 		 * because potential collisions with the pshared marker
    670 		 * would be bad.
    671 		 */
    672 		fd_abort(p, fp, fd);
    673 		return EMFILE;
    674 	}
    675 
    676 	/*
    677 	 * The ID (file descriptor number) can be stored early.
    678 	 * Note that zero is a special value for libpthread.
    679 	 */
    680 	id = (intptr_t)fd;
    681 	error = (*docopyout)(&id, idp, sizeof(*idp));
    682 	if (error) {
    683 		goto err;
    684 	}
    685 
    686 	if (oflag & O_CREAT) {
    687 		/* Create a new semaphore. */
    688 		error = ksem_create(l, name, &ksnew, mode, value);
    689 		if (error) {
    690 			goto err;
    691 		}
    692 		KASSERT(ksnew != NULL);
    693 	}
    694 
    695 	/* Lookup for a semaphore with such name. */
    696 	mutex_enter(&ksem_lock);
    697 	ks = ksem_lookup(name);
    698 	name_destroy(&name);
    699 	if (ks) {
    700 		KASSERT(mutex_owned(&ks->ks_lock));
    701 		mutex_exit(&ksem_lock);
    702 
    703 		/* Check for exclusive create. */
    704 		if (oflag & O_EXCL) {
    705 			mutex_exit(&ks->ks_lock);
    706 			error = EEXIST;
    707 			goto err;
    708 		}
    709 		/*
    710 		 * Verify permissions.  If we can access it,
    711 		 * add the reference of this thread.
    712 		 */
    713 		error = ksem_perm(l, ks);
    714 		if (error == 0) {
    715 			ks->ks_ref++;
    716 		}
    717 		mutex_exit(&ks->ks_lock);
    718 		if (error) {
    719 			goto err;
    720 		}
    721 	} else {
    722 		/* Fail if not found and not creating. */
    723 		if ((oflag & O_CREAT) == 0) {
    724 			mutex_exit(&ksem_lock);
    725 			KASSERT(ksnew == NULL);
    726 			error = ENOENT;
    727 			goto err;
    728 		}
    729 
    730 		/* Check for the limit locked. */
    731 		if (nsems >= ksem_max) {
    732 			mutex_exit(&ksem_lock);
    733 			error = ENFILE;
    734 			goto err;
    735 		}
    736 
    737 		/*
    738 		 * Finally, insert semaphore into the list.
    739 		 * Note: it already has the initial reference.
    740 		 */
    741 		ks = ksnew;
    742 		LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
    743 		nsems++;
    744 		mutex_exit(&ksem_lock);
    745 
    746 		ksnew = NULL;
    747 	}
    748 	KASSERT(ks != NULL);
    749 	fp->f_ksem = ks;
    750 	fd_affix(p, fp, fd);
    751 err:
    752 	name_destroy(&name);
    753 	if (error) {
    754 		fd_abort(p, fp, fd);
    755 	}
    756 	if (ksnew) {
    757 		ksem_free(ksnew);
    758 	}
    759 	return error;
    760 }
    761 
    762 int
    763 sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
    764     register_t *retval)
    765 {
    766 	/* {
    767 		intptr_t id;
    768 	} */
    769 	intptr_t id = SCARG(uap, id);
    770 	int fd, error;
    771 	ksem_t *ks;
    772 
    773 	error = ksem_get(id, &ks, &fd);
    774 	if (error) {
    775 		return error;
    776 	}
    777 
    778 	/* This is only for named semaphores. */
    779 	if (ks->ks_name == NULL) {
    780 		error = EINVAL;
    781 	}
    782 	ksem_release(ks, -1);
    783 	if (error) {
    784 		if (fd != -1)
    785 			fd_putfile(fd);
    786 		return error;
    787 	}
    788 	return fd_close(fd);
    789 }
    790 
    791 static int
    792 ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    793     int flags)
    794 {
    795 	size_t len;
    796 	char *name;
    797 	ksem_t *ks = fp->f_ksem;
    798 
    799 	mutex_enter(&ks->ks_lock);
    800 	len = ks->ks_namelen;
    801 	name = ks->ks_name;
    802 	mutex_exit(&ks->ks_lock);
    803 	if (name == NULL || len == 0)
    804 		return 0;
    805 	return uiomove(name, len, uio);
    806 }
    807 
    808 static int
    809 ksem_stat_fop(file_t *fp, struct stat *ub)
    810 {
    811 	ksem_t *ks = fp->f_ksem;
    812 
    813 	mutex_enter(&ks->ks_lock);
    814 
    815 	memset(ub, 0, sizeof(*ub));
    816 
    817 	ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
    818 	    ? _S_IFLNK : _S_IFREG);
    819 	ub->st_uid = ks->ks_uid;
    820 	ub->st_gid = ks->ks_gid;
    821 	ub->st_size = ks->ks_value;
    822 	ub->st_blocks = (ub->st_size) ? 1 : 0;
    823 	ub->st_nlink = ks->ks_ref;
    824 	ub->st_blksize = 4096;
    825 
    826 	nanotime(&ub->st_atimespec);
    827 	ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
    828 	    ub->st_atimespec;
    829 
    830 	/*
    831 	 * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
    832 	 * XXX (st_dev, st_ino) should be unique.
    833 	 */
    834 	mutex_exit(&ks->ks_lock);
    835 	return 0;
    836 }
    837 
    838 static int
    839 ksem_close_fop(file_t *fp)
    840 {
    841 	ksem_t *ks = fp->f_ksem;
    842 
    843 	mutex_enter(&ks->ks_lock);
    844 
    845 	if (ks->ks_pshared_id) {
    846 		if (ks->ks_pshared_proc != curproc) {
    847 			/* Do nothing if this is not the creator. */
    848 			mutex_exit(&ks->ks_lock);
    849 			return 0;
    850 		}
    851 		/* Mark this semaphore as dead. */
    852 		ks->ks_pshared_proc = NULL;
    853 	}
    854 
    855 	ksem_release(ks, -1);
    856 	return 0;
    857 }
    858 
    859 int
    860 sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
    861     register_t *retval)
    862 {
    863 	/* {
    864 		const char *name;
    865 	} */
    866 	char *name;
    867 	ksem_t *ks;
    868 	u_int refcnt;
    869 	int error;
    870 
    871 	error = name_copyin(SCARG(uap, name), &name);
    872 	if (error)
    873 		return error;
    874 
    875 	mutex_enter(&ksem_lock);
    876 	ks = ksem_lookup(name);
    877 	name_destroy(&name);
    878 	if (ks == NULL) {
    879 		mutex_exit(&ksem_lock);
    880 		return ENOENT;
    881 	}
    882 	KASSERT(mutex_owned(&ks->ks_lock));
    883 
    884 	/* Verify permissions. */
    885 	error = ksem_perm(l, ks);
    886 	if (error) {
    887 		mutex_exit(&ks->ks_lock);
    888 		mutex_exit(&ksem_lock);
    889 		return error;
    890 	}
    891 
    892 	/* Remove from the global list. */
    893 	LIST_REMOVE(ks, ks_entry);
    894 	nsems--;
    895 	mutex_exit(&ksem_lock);
    896 
    897 	refcnt = ks->ks_ref;
    898 	if (refcnt) {
    899 		/* Mark as unlinked, if there are references. */
    900 		ks->ks_flags |= KS_UNLINKED;
    901 	}
    902 	mutex_exit(&ks->ks_lock);
    903 
    904 	if (refcnt == 0) {
    905 		ksem_free(ks);
    906 	}
    907 	return 0;
    908 }
    909 
    910 int
    911 sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
    912     register_t *retval)
    913 {
    914 	/* {
    915 		intptr_t id;
    916 	} */
    917 	int fd, error;
    918 	ksem_t *ks;
    919 
    920 	error = ksem_get(SCARG(uap, id), &ks, &fd);
    921 	if (error) {
    922 		return error;
    923 	}
    924 	KASSERT(mutex_owned(&ks->ks_lock));
    925 	if (ks->ks_value == SEM_VALUE_MAX) {
    926 		error = EOVERFLOW;
    927 		goto out;
    928 	}
    929 	ks->ks_value++;
    930 	if (ks->ks_waiters) {
    931 		cv_broadcast(&ks->ks_cv);
    932 	}
    933 out:
    934 	ksem_release(ks, fd);
    935 	return error;
    936 }
    937 
    938 int
    939 do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
    940 {
    941 	int fd, error, timeo;
    942 	ksem_t *ks;
    943 
    944 	error = ksem_get(id, &ks, &fd);
    945 	if (error) {
    946 		return error;
    947 	}
    948 	KASSERT(mutex_owned(&ks->ks_lock));
    949 	while (ks->ks_value == 0) {
    950 		ks->ks_waiters++;
    951 		if (!try_p && abstime != NULL) {
    952 			error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
    953 			    &timeo, NULL);
    954 			if (error != 0)
    955 				goto out;
    956 		} else {
    957 			timeo = 0;
    958 		}
    959 		error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv,
    960 		    &ks->ks_lock, timeo);
    961 		ks->ks_waiters--;
    962 		if (error)
    963 			goto out;
    964 	}
    965 	ks->ks_value--;
    966 out:
    967 	ksem_release(ks, fd);
    968 	return error;
    969 }
    970 
    971 int
    972 sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
    973     register_t *retval)
    974 {
    975 	/* {
    976 		intptr_t id;
    977 	} */
    978 
    979 	return do_ksem_wait(l, SCARG(uap, id), false, NULL);
    980 }
    981 
    982 int
    983 sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
    984     register_t *retval)
    985 {
    986 	/* {
    987 		intptr_t id;
    988 		const struct timespec *abstime;
    989 	} */
    990 	struct timespec ts;
    991 	int error;
    992 
    993 	error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
    994 	if (error != 0)
    995 		return error;
    996 
    997 	if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
    998 		return EINVAL;
    999 
   1000 	error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
   1001 	if (error == EWOULDBLOCK)
   1002 		error = ETIMEDOUT;
   1003 	return error;
   1004 }
   1005 
   1006 int
   1007 sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
   1008     register_t *retval)
   1009 {
   1010 	/* {
   1011 		intptr_t id;
   1012 	} */
   1013 
   1014 	return do_ksem_wait(l, SCARG(uap, id), true, NULL);
   1015 }
   1016 
   1017 int
   1018 sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
   1019     register_t *retval)
   1020 {
   1021 	/* {
   1022 		intptr_t id;
   1023 		unsigned int *value;
   1024 	} */
   1025 	int fd, error;
   1026 	ksem_t *ks;
   1027 	unsigned int val;
   1028 
   1029 	error = ksem_get(SCARG(uap, id), &ks, &fd);
   1030 	if (error) {
   1031 		return error;
   1032 	}
   1033 	KASSERT(mutex_owned(&ks->ks_lock));
   1034 	val = ks->ks_value;
   1035 	ksem_release(ks, fd);
   1036 
   1037 	return copyout(&val, SCARG(uap, value), sizeof(val));
   1038 }
   1039 
   1040 int
   1041 sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
   1042     register_t *retval)
   1043 {
   1044 	/* {
   1045 		intptr_t id;
   1046 	} */
   1047 	int fd, error;
   1048 	ksem_t *ks;
   1049 
   1050 	intptr_t id = SCARG(uap, id);
   1051 
   1052 	error = ksem_get(id, &ks, &fd);
   1053 	if (error) {
   1054 		return error;
   1055 	}
   1056 	KASSERT(mutex_owned(&ks->ks_lock));
   1057 
   1058 	/* Operation is only for unnamed semaphores. */
   1059 	if (ks->ks_name != NULL) {
   1060 		error = EINVAL;
   1061 		goto out;
   1062 	}
   1063 	/* Cannot destroy if there are waiters. */
   1064 	if (ks->ks_waiters) {
   1065 		error = EBUSY;
   1066 		goto out;
   1067 	}
   1068 	if (KSEM_ID_IS_PSHARED(id)) {
   1069 		/* Cannot destroy if we did't create it. */
   1070 		KASSERT(fd == -1);
   1071 		KASSERT(ks->ks_pshared_proc != NULL);
   1072 		if (ks->ks_pshared_proc != curproc) {
   1073 			error = EINVAL;
   1074 			goto out;
   1075 		}
   1076 		fd = ks->ks_pshared_fd;
   1077 
   1078 		/* Mark it dead so subsequent lookups fail. */
   1079 		ks->ks_pshared_proc = NULL;
   1080 
   1081 		/* Do an fd_getfile() to for the benefit of fd_close(). */
   1082 		file_t *fp __diagused = fd_getfile(fd);
   1083 		KASSERT(fp != NULL);
   1084 		KASSERT(fp->f_ksem == ks);
   1085 	}
   1086 out:
   1087 	ksem_release(ks, -1);
   1088 	if (error) {
   1089 		if (!KSEM_ID_IS_PSHARED(id))
   1090 			fd_putfile(fd);
   1091 		return error;
   1092 	}
   1093 	return fd_close(fd);
   1094 }
   1095