Home | History | Annotate | Line # | Download | only in kern
sys_generic.c revision 1.100.2.8
      1 /*	$NetBSD: sys_generic.c,v 1.100.2.8 2007/07/15 13:27:45 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*-
     40  * Copyright (c) 2007 The NetBSD Foundation, Inc.
     41  * All rights reserved.
     42  *
     43  * This code is derived from software contributed to The NetBSD Foundation
     44  * by Andrew Doran.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. All advertising materials mentioning features or use of this software
     55  *    must display the following acknowledgement:
     56  *	This product includes software developed by the NetBSD
     57  *	Foundation, Inc. and its contributors.
     58  * 4. Neither the name of The NetBSD Foundation nor the names of its
     59  *    contributors may be used to endorse or promote products derived
     60  *    from this software without specific prior written permission.
     61  *
     62  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     63  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     64  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     65  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     66  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     67  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     68  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     69  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     70  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     71  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     72  * POSSIBILITY OF SUCH DAMAGE.
     73  */
     74 
     75 /*
     76  * Copyright (c) 1982, 1986, 1989, 1993
     77  *	The Regents of the University of California.  All rights reserved.
     78  * (c) UNIX System Laboratories, Inc.
     79  * All or some portions of this file are derived from material licensed
     80  * to the University of California by American Telephone and Telegraph
     81  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     82  * the permission of UNIX System Laboratories, Inc.
     83  *
     84  * Redistribution and use in source and binary forms, with or without
     85  * modification, are permitted provided that the following conditions
     86  * are met:
     87  * 1. Redistributions of source code must retain the above copyright
     88  *    notice, this list of conditions and the following disclaimer.
     89  * 2. Redistributions in binary form must reproduce the above copyright
     90  *    notice, this list of conditions and the following disclaimer in the
     91  *    documentation and/or other materials provided with the distribution.
     92  * 3. Neither the name of the University nor the names of its contributors
     93  *    may be used to endorse or promote products derived from this software
     94  *    without specific prior written permission.
     95  *
     96  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     97  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     98  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     99  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    100  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    101  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    102  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    103  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    104  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    105  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    106  * SUCH DAMAGE.
    107  *
    108  *	@(#)sys_generic.c	8.9 (Berkeley) 2/14/95
    109  */
    110 
    111 /*
    112  * System calls relating to files.
    113  */
    114 
    115 /*
    116  * System calls relating to files.
    117  */
    118 
    119 #include <sys/cdefs.h>
    120 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.100.2.8 2007/07/15 13:27:45 ad Exp $");
    121 
    122 #include "opt_ktrace.h"
    123 
    124 #include <sys/param.h>
    125 #include <sys/systm.h>
    126 #include <sys/filedesc.h>
    127 #include <sys/ioctl.h>
    128 #include <sys/file.h>
    129 #include <sys/proc.h>
    130 #include <sys/socketvar.h>
    131 #include <sys/signalvar.h>
    132 #include <sys/uio.h>
    133 #include <sys/kernel.h>
    134 #include <sys/stat.h>
    135 #include <sys/kmem.h>
    136 #include <sys/poll.h>
    137 #include <sys/mount.h>
    138 #include <sys/syscallargs.h>
    139 #ifdef KTRACE
    140 #include <sys/ktrace.h>
    141 #endif
    142 
    143 #include <uvm/uvm_extern.h>
    144 
    145 /* Flags for lwp::l_selflag. */
    146 #define	SEL_RESET	0	/* awoken, interrupted, or not yet polling */
    147 #define	SEL_SCANNING	1	/* polling descriptors */
    148 #define	SEL_BLOCKING	2	/* about to block on select_cv */
    149 
    150 static int	selscan(lwp_t *, fd_mask *, fd_mask *, int, register_t *);
    151 static int	pollscan(lwp_t *, struct pollfd *, int, register_t *);
    152 static void	selclear(void);
    153 
    154 /* Global state for select()/poll(). */
    155 kmutex_t	select_lock;
    156 kcondvar_t	select_cv;
    157 int		nselcoll;
    158 
    159 /*
    160  * Read system call.
    161  */
    162 /* ARGSUSED */
    163 int
    164 sys_read(lwp_t *l, void *v, register_t *retval)
    165 {
    166 	struct sys_read_args /* {
    167 		syscallarg(int)		fd;
    168 		syscallarg(void *)	buf;
    169 		syscallarg(size_t)	nbyte;
    170 	} */ *uap = v;
    171 	int		fd;
    172 	struct file	*fp;
    173 	proc_t		*p;
    174 	struct filedesc	*fdp;
    175 
    176 	fd = SCARG(uap, fd);
    177 	p = l->l_proc;
    178 	fdp = p->p_fd;
    179 
    180 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    181 		return (EBADF);
    182 
    183 	if ((fp->f_flag & FREAD) == 0) {
    184 		mutex_exit(&fp->f_lock);
    185 		return (EBADF);
    186 	}
    187 
    188 	FILE_USE(fp);
    189 
    190 	/* dofileread() will unuse the descriptor for us */
    191 	return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
    192 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
    193 }
    194 
    195 int
    196 dofileread(lwp_t *l, int fd, struct file *fp, void *buf, size_t nbyte,
    197 	off_t *offset, int flags, register_t *retval)
    198 {
    199 	struct iovec aiov;
    200 	struct uio auio;
    201 	proc_t *p;
    202 	struct vmspace *vm;
    203 	size_t cnt;
    204 	int error;
    205 #ifdef KTRACE
    206 	struct iovec	ktriov;
    207 #endif
    208 	p = l->l_proc;
    209 
    210 	error = proc_vmspace_getref(p, &vm);
    211 	if (error) {
    212 		goto out;
    213 	}
    214 
    215 	aiov.iov_base = (void *)buf;
    216 	aiov.iov_len = nbyte;
    217 	auio.uio_iov = &aiov;
    218 	auio.uio_iovcnt = 1;
    219 	auio.uio_resid = nbyte;
    220 	auio.uio_rw = UIO_READ;
    221 	auio.uio_vmspace = vm;
    222 
    223 	/*
    224 	 * Reads return ssize_t because -1 is returned on error.  Therefore
    225 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
    226 	 * values.
    227 	 */
    228 	if (auio.uio_resid > SSIZE_MAX) {
    229 		error = EINVAL;
    230 		goto out;
    231 	}
    232 
    233 #ifdef KTRACE
    234 	/* In case we are tracing, save a copy of iovec */
    235 	ktriov = aiov;
    236 #endif
    237 	cnt = auio.uio_resid;
    238 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
    239 	if (error)
    240 		if (auio.uio_resid != cnt && (error == ERESTART ||
    241 		    error == EINTR || error == EWOULDBLOCK))
    242 			error = 0;
    243 	cnt -= auio.uio_resid;
    244 #ifdef KTRACE
    245 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
    246 		ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
    247 #endif
    248 	*retval = cnt;
    249  out:
    250 	FILE_UNUSE(fp, l);
    251 	uvmspace_free(vm);
    252 	return (error);
    253 }
    254 
    255 /*
    256  * Scatter read system call.
    257  */
    258 int
    259 sys_readv(lwp_t *l, void *v, register_t *retval)
    260 {
    261 	struct sys_readv_args /* {
    262 		syscallarg(int)				fd;
    263 		syscallarg(const struct iovec *)	iovp;
    264 		syscallarg(int)				iovcnt;
    265 	} */ *uap = v;
    266 	struct filedesc	*fdp;
    267 	struct file *fp;
    268 	proc_t *p;
    269 	int fd;
    270 
    271 	fd = SCARG(uap, fd);
    272 	p = l->l_proc;
    273 	fdp = p->p_fd;
    274 
    275 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    276 		return (EBADF);
    277 
    278 	if ((fp->f_flag & FREAD) == 0) {
    279 		mutex_exit(&fp->f_lock);
    280 		return (EBADF);
    281 	}
    282 
    283 	FILE_USE(fp);
    284 
    285 	/* dofilereadv() will unuse the descriptor for us */
    286 	return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
    287 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
    288 }
    289 
    290 int
    291 dofilereadv(lwp_t *l, int fd, struct file *fp, const struct iovec *iovp,
    292 	int iovcnt, off_t *offset, int flags, register_t *retval)
    293 {
    294 	proc_t		*p;
    295 	struct uio	auio;
    296 	struct iovec	*iov, *needfree, aiov[UIO_SMALLIOV];
    297 	struct vmspace	*vm;
    298 	int		i, error;
    299 	size_t		cnt;
    300 	u_int		iovlen;
    301 #ifdef KTRACE
    302 	struct iovec	*ktriov;
    303 #endif
    304 
    305 	p = l->l_proc;
    306 	error = proc_vmspace_getref(p, &vm);
    307 	if (error) {
    308 		goto out;
    309 	}
    310 
    311 #ifdef KTRACE
    312 	ktriov = NULL;
    313 #endif
    314 	/* note: can't use iovlen until iovcnt is validated */
    315 	iovlen = iovcnt * sizeof(struct iovec);
    316 	if ((u_int)iovcnt > UIO_SMALLIOV) {
    317 		if ((u_int)iovcnt > IOV_MAX) {
    318 			error = EINVAL;
    319 			goto out;
    320 		}
    321 		iov = kmem_alloc(iovlen, KM_SLEEP);
    322 		needfree = iov;
    323 	} else if ((u_int)iovcnt > 0) {
    324 		iov = aiov;
    325 		needfree = NULL;
    326 	} else {
    327 		error = EINVAL;
    328 		goto out;
    329 	}
    330 
    331 	auio.uio_iov = iov;
    332 	auio.uio_iovcnt = iovcnt;
    333 	auio.uio_rw = UIO_READ;
    334 	auio.uio_vmspace = vm;
    335 	error = copyin(iovp, iov, iovlen);
    336 	if (error)
    337 		goto done;
    338 	auio.uio_resid = 0;
    339 	for (i = 0; i < iovcnt; i++) {
    340 		auio.uio_resid += iov->iov_len;
    341 		/*
    342 		 * Reads return ssize_t because -1 is returned on error.
    343 		 * Therefore we must restrict the length to SSIZE_MAX to
    344 		 * avoid garbage return values.
    345 		 */
    346 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
    347 			error = EINVAL;
    348 			goto done;
    349 		}
    350 		iov++;
    351 	}
    352 #ifdef KTRACE
    353 	/*
    354 	 * if tracing, save a copy of iovec
    355 	 */
    356 	if (KTRPOINT(p, KTR_GENIO))  {
    357 		ktriov = kmem_alloc(iovlen, KM_SLEEP);
    358 		memcpy((void *)ktriov, (void *)auio.uio_iov, iovlen);
    359 	}
    360 #endif
    361 	cnt = auio.uio_resid;
    362 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
    363 	if (error)
    364 		if (auio.uio_resid != cnt && (error == ERESTART ||
    365 		    error == EINTR || error == EWOULDBLOCK))
    366 			error = 0;
    367 	cnt -= auio.uio_resid;
    368 #ifdef KTRACE
    369 	if (ktriov != NULL) {
    370 		if (KTRPOINT(p, KTR_GENIO) && (error == 0))
    371 			ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
    372 		kmem_free(ktriov, iovlen);
    373 	}
    374 #endif
    375 	*retval = cnt;
    376  done:
    377 	if (needfree)
    378 		kmem_free(needfree, iovlen);
    379  out:
    380 	FILE_UNUSE(fp, l);
    381 	uvmspace_free(vm);
    382 	return (error);
    383 }
    384 
    385 /*
    386  * Write system call
    387  */
    388 int
    389 sys_write(lwp_t *l, void *v, register_t *retval)
    390 {
    391 	struct sys_write_args /* {
    392 		syscallarg(int)			fd;
    393 		syscallarg(const void *)	buf;
    394 		syscallarg(size_t)		nbyte;
    395 	} */ *uap = v;
    396 	int		fd;
    397 	struct file	*fp;
    398 	proc_t		*p;
    399 	struct filedesc	*fdp;
    400 
    401 	fd = SCARG(uap, fd);
    402 	p = l->l_proc;
    403 	fdp = p->p_fd;
    404 
    405 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    406 		return (EBADF);
    407 
    408 	if ((fp->f_flag & FWRITE) == 0) {
    409 		mutex_exit(&fp->f_lock);
    410 		return (EBADF);
    411 	}
    412 
    413 	FILE_USE(fp);
    414 
    415 	/* dofilewrite() will unuse the descriptor for us */
    416 	return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
    417 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
    418 }
    419 
    420 int
    421 dofilewrite(lwp_t *l, int fd, struct file *fp, const void *buf,
    422 	size_t nbyte, off_t *offset, int flags, register_t *retval)
    423 {
    424 	struct iovec aiov;
    425 	struct uio auio;
    426 	proc_t *p;
    427 	struct vmspace *vm;
    428 	size_t cnt;
    429 	int error;
    430 #ifdef KTRACE
    431 	struct iovec	ktriov;
    432 #endif
    433 
    434 	p = l->l_proc;
    435 	error = proc_vmspace_getref(p, &vm);
    436 	if (error) {
    437 		goto out;
    438 	}
    439 	aiov.iov_base = __UNCONST(buf);		/* XXXUNCONST kills const */
    440 	aiov.iov_len = nbyte;
    441 	auio.uio_iov = &aiov;
    442 	auio.uio_iovcnt = 1;
    443 	auio.uio_resid = nbyte;
    444 	auio.uio_rw = UIO_WRITE;
    445 	auio.uio_vmspace = vm;
    446 
    447 	/*
    448 	 * Writes return ssize_t because -1 is returned on error.  Therefore
    449 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
    450 	 * values.
    451 	 */
    452 	if (auio.uio_resid > SSIZE_MAX) {
    453 		error = EINVAL;
    454 		goto out;
    455 	}
    456 
    457 #ifdef KTRACE
    458 	/* In case we are tracing, save a copy of iovec */
    459 	ktriov = aiov;
    460 #endif
    461 	cnt = auio.uio_resid;
    462 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
    463 	if (error) {
    464 		if (auio.uio_resid != cnt && (error == ERESTART ||
    465 		    error == EINTR || error == EWOULDBLOCK))
    466 			error = 0;
    467 		if (error == EPIPE) {
    468 			mutex_enter(&proclist_mutex);
    469 			psignal(p, SIGPIPE);
    470 			mutex_exit(&proclist_mutex);
    471 		}
    472 	}
    473 	cnt -= auio.uio_resid;
    474 #ifdef KTRACE
    475 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
    476 		ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
    477 #endif
    478 	*retval = cnt;
    479  out:
    480 	FILE_UNUSE(fp, l);
    481 	uvmspace_free(vm);
    482 	return (error);
    483 }
    484 
    485 /*
    486  * Gather write system call
    487  */
    488 int
    489 sys_writev(lwp_t *l, void *v, register_t *retval)
    490 {
    491 	struct sys_writev_args /* {
    492 		syscallarg(int)				fd;
    493 		syscallarg(const struct iovec *)	iovp;
    494 		syscallarg(int)				iovcnt;
    495 	} */ *uap = v;
    496 	int		fd;
    497 	struct file	*fp;
    498 	proc_t		*p;
    499 	struct filedesc	*fdp;
    500 
    501 	fd = SCARG(uap, fd);
    502 	p = l->l_proc;
    503 	fdp = p->p_fd;
    504 
    505 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    506 		return (EBADF);
    507 
    508 	if ((fp->f_flag & FWRITE) == 0) {
    509 		mutex_exit(&fp->f_lock);
    510 		return (EBADF);
    511 	}
    512 
    513 	FILE_USE(fp);
    514 
    515 	/* dofilewritev() will unuse the descriptor for us */
    516 	return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
    517 	    &fp->f_offset, FOF_UPDATE_OFFSET, retval));
    518 }
    519 
    520 int
    521 dofilewritev(lwp_t *l, int fd, struct file *fp, const struct iovec *iovp,
    522 	int iovcnt, off_t *offset, int flags, register_t *retval)
    523 {
    524 	proc_t		*p;
    525 	struct uio	auio;
    526 	struct iovec	*iov, *needfree, aiov[UIO_SMALLIOV];
    527 	struct vmspace	*vm;
    528 	int		i, error;
    529 	size_t		cnt;
    530 	u_int		iovlen;
    531 #ifdef KTRACE
    532 	struct iovec	*ktriov;
    533 #endif
    534 
    535 	p = l->l_proc;
    536 	error = proc_vmspace_getref(p, &vm);
    537 	if (error) {
    538 		goto out;
    539 	}
    540 #ifdef KTRACE
    541 	ktriov = NULL;
    542 #endif
    543 	/* note: can't use iovlen until iovcnt is validated */
    544 	iovlen = iovcnt * sizeof(struct iovec);
    545 	if ((u_int)iovcnt > UIO_SMALLIOV) {
    546 		if ((u_int)iovcnt > IOV_MAX) {
    547 			error = EINVAL;
    548 			goto out;
    549 		}
    550 		iov = kmem_alloc(iovlen, KM_SLEEP);
    551 		needfree = iov;
    552 	} else if ((u_int)iovcnt > 0) {
    553 		iov = aiov;
    554 		needfree = NULL;
    555 	} else {
    556 		error = EINVAL;
    557 		goto out;
    558 	}
    559 
    560 	auio.uio_iov = iov;
    561 	auio.uio_iovcnt = iovcnt;
    562 	auio.uio_rw = UIO_WRITE;
    563 	auio.uio_vmspace = vm;
    564 	error = copyin(iovp, iov, iovlen);
    565 	if (error)
    566 		goto done;
    567 	auio.uio_resid = 0;
    568 	for (i = 0; i < iovcnt; i++) {
    569 		auio.uio_resid += iov->iov_len;
    570 		/*
    571 		 * Writes return ssize_t because -1 is returned on error.
    572 		 * Therefore we must restrict the length to SSIZE_MAX to
    573 		 * avoid garbage return values.
    574 		 */
    575 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
    576 			error = EINVAL;
    577 			goto done;
    578 		}
    579 		iov++;
    580 	}
    581 #ifdef KTRACE
    582 	/*
    583 	 * if tracing, save a copy of iovec
    584 	 */
    585 	if (KTRPOINT(p, KTR_GENIO))  {
    586 		ktriov = kmem_alloc(iovlen, KM_SLEEP);
    587 		memcpy((void *)ktriov, (void *)auio.uio_iov, iovlen);
    588 	}
    589 #endif
    590 	cnt = auio.uio_resid;
    591 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
    592 	if (error) {
    593 		if (auio.uio_resid != cnt && (error == ERESTART ||
    594 		    error == EINTR || error == EWOULDBLOCK))
    595 			error = 0;
    596 		if (error == EPIPE) {
    597 			mutex_enter(&proclist_mutex);
    598 			psignal(p, SIGPIPE);
    599 			mutex_exit(&proclist_mutex);
    600 		}
    601 	}
    602 	cnt -= auio.uio_resid;
    603 #ifdef KTRACE
    604 	if (ktriov != NULL) {
    605 		if (KTRPOINT(p, KTR_GENIO) && (error == 0))
    606 			ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
    607 		kmem_free(ktriov, iovlen);
    608 	}
    609 #endif
    610 	*retval = cnt;
    611  done:
    612 	if (needfree)
    613 		kmem_free(needfree, iovlen);
    614  out:
    615 	FILE_UNUSE(fp, l);
    616 	uvmspace_free(vm);
    617 	return (error);
    618 }
    619 
    620 /*
    621  * Ioctl system call
    622  */
    623 /* ARGSUSED */
    624 int
    625 sys_ioctl(lwp_t *l, void *v, register_t *retval)
    626 {
    627 	struct sys_ioctl_args /* {
    628 		syscallarg(int)		fd;
    629 		syscallarg(u_long)	com;
    630 		syscallarg(void *)	data;
    631 	} */ *uap = v;
    632 	struct file	*fp;
    633 	proc_t		*p;
    634 	struct filedesc	*fdp;
    635 	u_long		com;
    636 	int		error;
    637 	u_int		size;
    638 	void 		*data, *memp;
    639 #define	STK_PARAMS	128
    640 	u_long		stkbuf[STK_PARAMS/sizeof(u_long)];
    641 
    642 	error = 0;
    643 	p = l->l_proc;
    644 	fdp = p->p_fd;
    645 
    646 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
    647 		return (EBADF);
    648 
    649 	FILE_USE(fp);
    650 
    651 	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
    652 		error = EBADF;
    653 		com = 0;
    654 		goto out;
    655 	}
    656 
    657 	switch (com = SCARG(uap, com)) {
    658 	case FIONCLEX:
    659 		rw_enter(&fdp->fd_lock, RW_WRITER);
    660 		fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
    661 		rw_exit(&fdp->fd_lock);
    662 		goto out;
    663 
    664 	case FIOCLEX:
    665 		rw_enter(&fdp->fd_lock, RW_WRITER);
    666 		fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
    667 		rw_exit(&fdp->fd_lock);
    668 		goto out;
    669 	}
    670 
    671 	/*
    672 	 * Interpret high order word to find amount of data to be
    673 	 * copied to/from the user's address space.
    674 	 */
    675 	size = IOCPARM_LEN(com);
    676 	if (size > IOCPARM_MAX) {
    677 		error = ENOTTY;
    678 		goto out;
    679 	}
    680 	memp = NULL;
    681 	if (size > sizeof(stkbuf)) {
    682 		memp = kmem_alloc(size, KM_SLEEP);
    683 		data = memp;
    684 	} else
    685 		data = (void *)stkbuf;
    686 	if (com&IOC_IN) {
    687 		if (size) {
    688 			error = copyin(SCARG(uap, data), data, size);
    689 			if (error) {
    690 				if (memp)
    691 					kmem_free(memp, size);
    692 				goto out;
    693 			}
    694 #ifdef KTRACE
    695 			if (KTRPOINT(p, KTR_GENIO)) {
    696 				struct iovec iov;
    697 				iov.iov_base = SCARG(uap, data);
    698 				iov.iov_len = size;
    699 				ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
    700 					size, 0);
    701 			}
    702 #endif
    703 		} else
    704 			*(void **)data = SCARG(uap, data);
    705 	} else if ((com&IOC_OUT) && size)
    706 		/*
    707 		 * Zero the buffer so the user always
    708 		 * gets back something deterministic.
    709 		 */
    710 		memset(data, 0, size);
    711 	else if (com&IOC_VOID)
    712 		*(void **)data = SCARG(uap, data);
    713 
    714 	switch (com) {
    715 
    716 	case FIONBIO:
    717 		mutex_enter(&fp->f_lock);
    718 		if (*(int *)data != 0)
    719 			fp->f_flag |= FNONBLOCK;
    720 		else
    721 			fp->f_flag &= ~FNONBLOCK;
    722 		mutex_exit(&fp->f_lock);
    723 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l);
    724 		break;
    725 
    726 	case FIOASYNC:
    727 		mutex_enter(&fp->f_lock);
    728 		if (*(int *)data != 0)
    729 			fp->f_flag |= FASYNC;
    730 		else
    731 			fp->f_flag &= ~FASYNC;
    732 		mutex_exit(&fp->f_lock);
    733 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l);
    734 		break;
    735 
    736 	default:
    737 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
    738 		/*
    739 		 * Copy any data to user, size was
    740 		 * already set and checked above.
    741 		 */
    742 		if (error == 0 && (com&IOC_OUT) && size) {
    743 			error = copyout(data, SCARG(uap, data), size);
    744 #ifdef KTRACE
    745 			if (KTRPOINT(p, KTR_GENIO)) {
    746 				struct iovec iov;
    747 				iov.iov_base = SCARG(uap, data);
    748 				iov.iov_len = size;
    749 				ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
    750 					size, error);
    751 			}
    752 #endif
    753 		}
    754 		break;
    755 	}
    756 	if (memp)
    757 		kmem_free(memp, size);
    758  out:
    759 	FILE_UNUSE(fp, l);
    760 	switch (error) {
    761 	case -1:
    762 		printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
    763 		    "pid=%d comm=%s\n",
    764 		    (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
    765 		    (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
    766 		    p->p_pid, p->p_comm);
    767 		/* FALLTHROUGH */
    768 	case EPASSTHROUGH:
    769 		error = ENOTTY;
    770 		/* FALLTHROUGH */
    771 	default:
    772 		return (error);
    773 	}
    774 }
    775 
    776 /*
    777  * Select system call.
    778  */
    779 int
    780 sys_pselect(lwp_t *l, void *v, register_t *retval)
    781 {
    782 	struct sys_pselect_args /* {
    783 		syscallarg(int)				nd;
    784 		syscallarg(fd_set *)			in;
    785 		syscallarg(fd_set *)			ou;
    786 		syscallarg(fd_set *)			ex;
    787 		syscallarg(const struct timespec *)	ts;
    788 		syscallarg(sigset_t *)			mask;
    789 	} */ * const uap = v;
    790 	struct timespec	ats;
    791 	struct timeval	atv, *tv = NULL;
    792 	sigset_t	amask, *mask = NULL;
    793 	int		error;
    794 
    795 	if (SCARG(uap, ts)) {
    796 		error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
    797 		if (error)
    798 			return error;
    799 		atv.tv_sec = ats.tv_sec;
    800 		atv.tv_usec = ats.tv_nsec / 1000;
    801 		tv = &atv;
    802 	}
    803 	if (SCARG(uap, mask) != NULL) {
    804 		error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
    805 		if (error)
    806 			return error;
    807 		mask = &amask;
    808 	}
    809 
    810 	return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
    811 	    SCARG(uap, ou), SCARG(uap, ex), tv, mask);
    812 }
    813 
    814 int
    815 inittimeleft(struct timeval *tv, struct timeval *sleeptv)
    816 {
    817 	if (itimerfix(tv))
    818 		return -1;
    819 	getmicrouptime(sleeptv);
    820 	return 0;
    821 }
    822 
    823 int
    824 gettimeleft(struct timeval *tv, struct timeval *sleeptv)
    825 {
    826 	/*
    827 	 * We have to recalculate the timeout on every retry.
    828 	 */
    829 	struct timeval slepttv;
    830 	/*
    831 	 * reduce tv by elapsed time
    832 	 * based on monotonic time scale
    833 	 */
    834 	getmicrouptime(&slepttv);
    835 	timeradd(tv, sleeptv, tv);
    836 	timersub(tv, &slepttv, tv);
    837 	*sleeptv = slepttv;
    838 	return tvtohz(tv);
    839 }
    840 
    841 int
    842 sys_select(lwp_t *l, void *v, register_t *retval)
    843 {
    844 	struct sys_select_args /* {
    845 		syscallarg(int)			nd;
    846 		syscallarg(fd_set *)		in;
    847 		syscallarg(fd_set *)		ou;
    848 		syscallarg(fd_set *)		ex;
    849 		syscallarg(struct timeval *)	tv;
    850 	} */ * const uap = v;
    851 	struct timeval atv, *tv = NULL;
    852 	int error;
    853 
    854 	if (SCARG(uap, tv)) {
    855 		error = copyin(SCARG(uap, tv), (void *)&atv,
    856 			sizeof(atv));
    857 		if (error)
    858 			return error;
    859 		tv = &atv;
    860 	}
    861 
    862 	return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
    863 	    SCARG(uap, ou), SCARG(uap, ex), tv, NULL);
    864 }
    865 
    866 int
    867 selcommon(lwp_t *l, register_t *retval, int nd, fd_set *u_in,
    868 	  fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask)
    869 {
    870 	char		smallbits[howmany(FD_SETSIZE, NFDBITS) *
    871 			    sizeof(fd_mask) * 6];
    872 	proc_t		* const p = l->l_proc;
    873 	char 		*bits;
    874 	int		ncoll, error, timo;
    875 	size_t		ni;
    876 	sigset_t	oldmask;
    877 	struct timeval  sleeptv;
    878 
    879 	error = 0;
    880 	if (nd < 0)
    881 		return (EINVAL);
    882 	if (nd > p->p_fd->fd_nfiles) {
    883 		/* forgiving; slightly wrong */
    884 		nd = p->p_fd->fd_nfiles;
    885 	}
    886 	ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
    887 	if (ni * 6 > sizeof(smallbits))
    888 		bits = kmem_alloc(ni * 6, KM_SLEEP);
    889 	else
    890 		bits = smallbits;
    891 
    892 #define	getbits(name, x)						\
    893 	if (u_ ## name) {						\
    894 		error = copyin(u_ ## name, bits + ni * x, ni);		\
    895 		if (error)						\
    896 			goto done;					\
    897 	} else								\
    898 		memset(bits + ni * x, 0, ni);
    899 	getbits(in, 0);
    900 	getbits(ou, 1);
    901 	getbits(ex, 2);
    902 #undef	getbits
    903 
    904 	timo = 0;
    905 	if (tv && inittimeleft(tv, &sleeptv) == -1) {
    906 		error = EINVAL;
    907 		goto done;
    908 	}
    909 
    910 	if (mask) {
    911 		sigminusset(&sigcantmask, mask);
    912 		mutex_enter(&p->p_smutex);
    913 		oldmask = l->l_sigmask;
    914 		l->l_sigmask = *mask;
    915 		mutex_exit(&p->p_smutex);
    916 	} else
    917 		oldmask = l->l_sigmask;	/* XXXgcc */
    918 
    919 	mutex_enter(&select_lock);
    920 	SLIST_INIT(&l->l_selwait);
    921 	for (;;) {
    922 	 	l->l_selflag = SEL_SCANNING;
    923 		ncoll = nselcoll;
    924  		mutex_exit(&select_lock);
    925 
    926 		error = selscan(l, (fd_mask *)(bits + ni * 0),
    927 		    (fd_mask *)(bits + ni * 3), nd, retval);
    928 
    929 		mutex_enter(&select_lock);
    930 		if (error || *retval)
    931 			break;
    932 		if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
    933 			break;
    934 		if (l->l_selflag != SEL_SCANNING || ncoll != nselcoll)
    935 			continue;
    936 		l->l_selflag = SEL_BLOCKING;
    937 		error = cv_timedwait_sig(&select_cv, &select_lock, timo);
    938 		if (error != 0)
    939 			break;
    940 	}
    941 	selclear();
    942 	mutex_exit(&select_lock);
    943 
    944 	if (mask) {
    945 		mutex_enter(&p->p_smutex);
    946 		l->l_sigmask = oldmask;
    947 		mutex_exit(&p->p_smutex);
    948 	}
    949 
    950  done:
    951 	/* select is not restarted after signals... */
    952 	if (error == ERESTART)
    953 		error = EINTR;
    954 	if (error == EWOULDBLOCK)
    955 		error = 0;
    956 	if (error == 0 && u_in != NULL)
    957 		error = copyout(bits + ni * 3, u_in, ni);
    958 	if (error == 0 && u_ou != NULL)
    959 		error = copyout(bits + ni * 4, u_ou, ni);
    960 	if (error == 0 && u_ex != NULL)
    961 		error = copyout(bits + ni * 5, u_ex, ni);
    962 	if (bits != smallbits)
    963 		kmem_free(bits, ni * 6);
    964 	return (error);
    965 }
    966 
    967 int
    968 selscan(lwp_t *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
    969 	register_t *retval)
    970 {
    971 	static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
    972 			       POLLWRNORM | POLLHUP | POLLERR,
    973 			       POLLRDBAND };
    974 	proc_t *p = l->l_proc;
    975 	struct filedesc	*fdp;
    976 	int msk, i, j, fd, n;
    977 	fd_mask ibits, obits;
    978 	struct file *fp;
    979 
    980 	fdp = p->p_fd;
    981 	n = 0;
    982 	for (msk = 0; msk < 3; msk++) {
    983 		for (i = 0; i < nfd; i += NFDBITS) {
    984 			ibits = *ibitp++;
    985 			obits = 0;
    986 			while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
    987 				ibits &= ~(1 << j);
    988 				if ((fp = fd_getfile(fdp, fd)) == NULL)
    989 					return (EBADF);
    990 				FILE_USE(fp);
    991 				if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
    992 					obits |= (1 << j);
    993 					n++;
    994 				}
    995 				FILE_UNUSE(fp, l);
    996 			}
    997 			*obitp++ = obits;
    998 		}
    999 	}
   1000 	*retval = n;
   1001 	return (0);
   1002 }
   1003 
   1004 /*
   1005  * Poll system call.
   1006  */
   1007 int
   1008 sys_poll(lwp_t *l, void *v, register_t *retval)
   1009 {
   1010 	struct sys_poll_args /* {
   1011 		syscallarg(struct pollfd *)	fds;
   1012 		syscallarg(u_int)		nfds;
   1013 		syscallarg(int)			timeout;
   1014 	} */ * const uap = v;
   1015 	struct timeval	atv, *tv = NULL;
   1016 
   1017 	if (SCARG(uap, timeout) != INFTIM) {
   1018 		atv.tv_sec = SCARG(uap, timeout) / 1000;
   1019 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
   1020 		tv = &atv;
   1021 	}
   1022 
   1023 	return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
   1024 		tv, NULL);
   1025 }
   1026 
   1027 /*
   1028  * Poll system call.
   1029  */
   1030 int
   1031 sys_pollts(lwp_t *l, void *v, register_t *retval)
   1032 {
   1033 	struct sys_pollts_args /* {
   1034 		syscallarg(struct pollfd *)		fds;
   1035 		syscallarg(u_int)			nfds;
   1036 		syscallarg(const struct timespec *)	ts;
   1037 		syscallarg(const sigset_t *)		mask;
   1038 	} */ * const uap = v;
   1039 	struct timespec	ats;
   1040 	struct timeval	atv, *tv = NULL;
   1041 	sigset_t	amask, *mask = NULL;
   1042 	int		error;
   1043 
   1044 	if (SCARG(uap, ts)) {
   1045 		error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
   1046 		if (error)
   1047 			return error;
   1048 		atv.tv_sec = ats.tv_sec;
   1049 		atv.tv_usec = ats.tv_nsec / 1000;
   1050 		tv = &atv;
   1051 	}
   1052 	if (SCARG(uap, mask)) {
   1053 		error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
   1054 		if (error)
   1055 			return error;
   1056 		mask = &amask;
   1057 	}
   1058 
   1059 	return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
   1060 		tv, mask);
   1061 }
   1062 
   1063 int
   1064 pollcommon(lwp_t *l, register_t *retval,
   1065 	struct pollfd *u_fds, u_int nfds,
   1066 	struct timeval *tv, sigset_t *mask)
   1067 {
   1068 	char		smallbits[32 * sizeof(struct pollfd)];
   1069 	proc_t		* const p = l->l_proc;
   1070 	void *		bits;
   1071 	sigset_t	oldmask;
   1072 	int		ncoll, error, timo;
   1073 	size_t		ni;
   1074 	struct timeval	sleeptv;
   1075 
   1076 	if (nfds > p->p_fd->fd_nfiles) {
   1077 		/* forgiving; slightly wrong */
   1078 		nfds = p->p_fd->fd_nfiles;
   1079 	}
   1080 	ni = nfds * sizeof(struct pollfd);
   1081 	if (ni > sizeof(smallbits))
   1082 		bits = kmem_alloc(ni, KM_SLEEP);
   1083 	else
   1084 		bits = smallbits;
   1085 
   1086 	error = copyin(u_fds, bits, ni);
   1087 	if (error)
   1088 		goto done;
   1089 
   1090 	timo = 0;
   1091 	if (tv && inittimeleft(tv, &sleeptv) == -1) {
   1092 		error = EINVAL;
   1093 		goto done;
   1094 	}
   1095 
   1096 	if (mask) {
   1097 		sigminusset(&sigcantmask, mask);
   1098 		mutex_enter(&p->p_smutex);
   1099 		oldmask = l->l_sigmask;
   1100 		l->l_sigmask = *mask;
   1101 		mutex_exit(&p->p_smutex);
   1102 	} else
   1103 		oldmask = l->l_sigmask;	/* XXXgcc */
   1104 
   1105 	mutex_enter(&select_lock);
   1106 	SLIST_INIT(&l->l_selwait);
   1107 	for (;;) {
   1108 		ncoll = nselcoll;
   1109 		l->l_selflag = SEL_SCANNING;
   1110 		mutex_exit(&select_lock);
   1111 
   1112 		error = pollscan(l, (struct pollfd *)bits, nfds, retval);
   1113 
   1114 		mutex_enter(&select_lock);
   1115 		if (error || *retval)
   1116 			break;
   1117 		if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
   1118 			break;
   1119 		if (l->l_selflag != SEL_SCANNING || nselcoll != ncoll)
   1120 			continue;
   1121 		l->l_selflag = SEL_BLOCKING;
   1122 		error = cv_timedwait_sig(&select_cv, &select_lock, timo);
   1123 		if (error != 0)
   1124 			break;
   1125 	}
   1126 	selclear();
   1127 	mutex_exit(&select_lock);
   1128 
   1129 	if (mask) {
   1130 		mutex_enter(&p->p_smutex);
   1131 		l->l_sigmask = oldmask;
   1132 		mutex_exit(&p->p_smutex);
   1133 	}
   1134  done:
   1135 	/* poll is not restarted after signals... */
   1136 	if (error == ERESTART)
   1137 		error = EINTR;
   1138 	if (error == EWOULDBLOCK)
   1139 		error = 0;
   1140 	if (error == 0)
   1141 		error = copyout(bits, u_fds, ni);
   1142 	if (bits != smallbits)
   1143 		kmem_free(bits, ni);
   1144 	return (error);
   1145 }
   1146 
   1147 int
   1148 pollscan(lwp_t *l, struct pollfd *fds, int nfd, register_t *retval)
   1149 {
   1150 	proc_t		*p = l->l_proc;
   1151 	struct filedesc	*fdp;
   1152 	int		i, n;
   1153 	struct file	*fp;
   1154 
   1155 	fdp = p->p_fd;
   1156 	n = 0;
   1157 	for (i = 0; i < nfd; i++, fds++) {
   1158 		if (fds->fd >= fdp->fd_nfiles) {
   1159 			fds->revents = POLLNVAL;
   1160 			n++;
   1161 		} else if (fds->fd < 0) {
   1162 			fds->revents = 0;
   1163 		} else {
   1164 			if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
   1165 				fds->revents = POLLNVAL;
   1166 				n++;
   1167 			} else {
   1168 				FILE_USE(fp);
   1169 				fds->revents = (*fp->f_ops->fo_poll)(fp,
   1170 				    fds->events | POLLERR | POLLHUP, l);
   1171 				if (fds->revents != 0)
   1172 					n++;
   1173 				FILE_UNUSE(fp, l);
   1174 			}
   1175 		}
   1176 	}
   1177 	*retval = n;
   1178 	return (0);
   1179 }
   1180 
   1181 /*ARGSUSED*/
   1182 int
   1183 seltrue(dev_t dev, int events, lwp_t *l)
   1184 {
   1185 
   1186 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
   1187 }
   1188 
   1189 /*
   1190  * Record a select request.
   1191  */
   1192 void
   1193 selrecord(lwp_t *selector, struct selinfo *sip)
   1194 {
   1195 
   1196 	mutex_enter(&select_lock);
   1197 	if (sip->sel_lwp == NULL) {
   1198 		/* First named waiter, although there may be more. */
   1199 		sip->sel_lwp = selector;
   1200 		SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain);
   1201 	} else if (sip->sel_lwp != selector) {
   1202 		/* Multiple waiters. */
   1203 		sip->sel_collision = true;
   1204 	}
   1205 	mutex_exit(&select_lock);
   1206 }
   1207 
   1208 /*
   1209  * Do a wakeup when a selectable event occurs.
   1210  */
   1211 void
   1212 selwakeup(struct selinfo *sip)
   1213 {
   1214 	lwp_t *l;
   1215 
   1216 	mutex_enter(&select_lock);
   1217 	if (sip->sel_collision) {
   1218 		/* Multiple waiters - just notify everybody. */
   1219 		nselcoll++;
   1220 		sip->sel_collision = false;
   1221 		cv_broadcast(&select_cv);
   1222 	} else if (sip->sel_lwp != NULL) {
   1223 		/* Only one LWP waiting. */
   1224 		l = sip->sel_lwp;
   1225 		if (l->l_selflag == SEL_BLOCKING) {
   1226 			/*
   1227 			 * If it's sleeping, wake it up.  If not, it's
   1228 			 * already awake but hasn't yet removed itself
   1229 			 * from the selector.  We reset the state below
   1230 			 * so that we only attempt to do this once.
   1231 			 */
   1232 			lwp_lock(l);
   1233 			if (l->l_wchan == &select_cv) {
   1234 				/* lwp_unsleep() releases the LWP lock. */
   1235 				lwp_unsleep(l);
   1236 			} else
   1237 				lwp_unlock(l);
   1238 		} else {
   1239 			/*
   1240 			 * Not yet asleep.  Reset its state below so that
   1241 			 * it will go around again.
   1242 			 */
   1243 		}
   1244 		l->l_selflag = SEL_RESET;
   1245 	}
   1246 	mutex_exit(&select_lock);
   1247 }
   1248 
   1249 void
   1250 selnotify(struct selinfo *sip, long knhint)
   1251 {
   1252 
   1253 	selwakeup(sip);
   1254 	KNOTE(&sip->sel_klist, knhint);
   1255 }
   1256 
   1257 /*
   1258  * Remove an LWP from all objects that it is waiting for.
   1259  */
   1260 static void
   1261 selclear(void)
   1262 {
   1263 	struct selinfo *sip;
   1264 	lwp_t *l = curlwp;
   1265 
   1266 	KASSERT(mutex_owned(&select_lock));
   1267 
   1268 	SLIST_FOREACH(sip, &l->l_selwait, sel_chain) {
   1269 		KASSERT(sip->sel_lwp == l);
   1270 		sip->sel_lwp = NULL;
   1271 	}
   1272 }
   1273 
   1274 /*
   1275  * Initialize the select/poll system calls.
   1276  */
   1277 void
   1278 selsysinit(void)
   1279 {
   1280 
   1281 	mutex_init(&select_lock, MUTEX_DRIVER, IPL_VM);
   1282 	cv_init(&select_cv, "select");
   1283 }
   1284