Home | History | Annotate | Line # | Download | only in kern
sys_descrip.c revision 1.35
      1 /*	$NetBSD: sys_descrip.c,v 1.35 2019/09/15 16:25:57 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
     31  *	The Regents of the University of California.  All rights reserved.
     32  * (c) UNIX System Laboratories, Inc.
     33  * All or some portions of this file are derived from material licensed
     34  * to the University of California by American Telephone and Telegraph
     35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     36  * the permission of UNIX System Laboratories, Inc.
     37  *
     38  * Redistribution and use in source and binary forms, with or without
     39  * modification, are permitted provided that the following conditions
     40  * are met:
     41  * 1. Redistributions of source code must retain the above copyright
     42  *    notice, this list of conditions and the following disclaimer.
     43  * 2. Redistributions in binary form must reproduce the above copyright
     44  *    notice, this list of conditions and the following disclaimer in the
     45  *    documentation and/or other materials provided with the distribution.
     46  * 3. Neither the name of the University nor the names of its contributors
     47  *    may be used to endorse or promote products derived from this software
     48  *    without specific prior written permission.
     49  *
     50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     60  * SUCH DAMAGE.
     61  *
     62  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
     63  */
     64 
     65 /*
     66  * System calls on descriptors.
     67  */
     68 
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.35 2019/09/15 16:25:57 christos Exp $");
     71 
     72 #include <sys/param.h>
     73 #include <sys/systm.h>
     74 #include <sys/filedesc.h>
     75 #include <sys/kernel.h>
     76 #include <sys/vnode.h>
     77 #include <sys/proc.h>
     78 #include <sys/file.h>
     79 #include <sys/namei.h>
     80 #include <sys/socket.h>
     81 #include <sys/socketvar.h>
     82 #include <sys/stat.h>
     83 #include <sys/ioctl.h>
     84 #include <sys/fcntl.h>
     85 #include <sys/kmem.h>
     86 #include <sys/pool.h>
     87 #include <sys/syslog.h>
     88 #include <sys/unistd.h>
     89 #include <sys/resourcevar.h>
     90 #include <sys/conf.h>
     91 #include <sys/event.h>
     92 #include <sys/kauth.h>
     93 #include <sys/atomic.h>
     94 #include <sys/mount.h>
     95 #include <sys/syscallargs.h>
     96 
     97 #include <uvm/uvm_readahead.h>
     98 
     99 /*
    100  * Duplicate a file descriptor.
    101  */
    102 int
    103 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
    104 {
    105 	/* {
    106 		syscallarg(int)	fd;
    107 	} */
    108 	int error, newfd, oldfd;
    109 	file_t *fp;
    110 
    111 	oldfd = SCARG(uap, fd);
    112 
    113 	if ((fp = fd_getfile(oldfd)) == NULL) {
    114 		return EBADF;
    115 	}
    116 	error = fd_dup(fp, 0, &newfd, false);
    117 	fd_putfile(oldfd);
    118 	*retval = newfd;
    119 	return error;
    120 }
    121 
    122 /*
    123  * Duplicate a file descriptor to a particular value.
    124  */
    125 int
    126 dodup(struct lwp *l, int from, int to, int flags, register_t *retval)
    127 {
    128 	int error;
    129 	file_t *fp;
    130 
    131 	if ((fp = fd_getfile(from)) == NULL)
    132 		return EBADF;
    133 	mutex_enter(&fp->f_lock);
    134 	fp->f_count++;
    135 	mutex_exit(&fp->f_lock);
    136 	fd_putfile(from);
    137 
    138 	if ((u_int)to >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
    139 	    (u_int)to >= maxfiles)
    140 		error = EBADF;
    141 	else if (from == to)
    142 		error = 0;
    143 	else
    144 		error = fd_dup2(fp, to, flags);
    145 	closef(fp);
    146 	*retval = to;
    147 
    148 	return error;
    149 }
    150 
    151 int
    152 sys_dup3(struct lwp *l, const struct sys_dup3_args *uap, register_t *retval)
    153 {
    154 	/* {
    155 		syscallarg(int)	from;
    156 		syscallarg(int)	to;
    157 		syscallarg(int)	flags;
    158 	} */
    159 	return dodup(l, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags),
    160 	    retval);
    161 }
    162 
    163 int
    164 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
    165 {
    166 	/* {
    167 		syscallarg(int)	from;
    168 		syscallarg(int)	to;
    169 	} */
    170 	return dodup(l, SCARG(uap, from), SCARG(uap, to), 0, retval);
    171 }
    172 
    173 /*
    174  * fcntl call which is being passed to the file's fs.
    175  */
    176 static int
    177 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg)
    178 {
    179 	int		error;
    180 	u_int		size;
    181 	void		*data, *memp;
    182 #define STK_PARAMS	128
    183 	char		stkbuf[STK_PARAMS];
    184 
    185 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
    186 		return (EBADF);
    187 
    188 	/*
    189 	 * Interpret high order word to find amount of data to be
    190 	 * copied to/from the user's address space.
    191 	 */
    192 	size = (size_t)F_PARAM_LEN(cmd);
    193 	if (size > F_PARAM_MAX)
    194 		return (EINVAL);
    195 	memp = NULL;
    196 	if (size > sizeof(stkbuf)) {
    197 		memp = kmem_alloc(size, KM_SLEEP);
    198 		data = memp;
    199 	} else
    200 		data = stkbuf;
    201 	if (cmd & F_FSIN) {
    202 		if (size) {
    203 			error = copyin(arg, data, size);
    204 			if (error) {
    205 				if (memp)
    206 					kmem_free(memp, size);
    207 				return (error);
    208 			}
    209 		} else
    210 			*(void **)data = arg;
    211 	} else if ((cmd & F_FSOUT) != 0 && size != 0) {
    212 		/*
    213 		 * Zero the buffer so the user always
    214 		 * gets back something deterministic.
    215 		 */
    216 		memset(data, 0, size);
    217 	} else if (cmd & F_FSVOID)
    218 		*(void **)data = arg;
    219 
    220 
    221 	error = (*fp->f_ops->fo_fcntl)(fp, cmd, data);
    222 
    223 	/*
    224 	 * Copy any data to user, size was
    225 	 * already set and checked above.
    226 	 */
    227 	if (error == 0 && (cmd & F_FSOUT) && size)
    228 		error = copyout(data, arg, size);
    229 	if (memp)
    230 		kmem_free(memp, size);
    231 	return (error);
    232 }
    233 
    234 int
    235 do_fcntl_lock(int fd, int cmd, struct flock *fl)
    236 {
    237 	file_t *fp;
    238 	vnode_t *vp;
    239 	proc_t *p;
    240 	int error, flg;
    241 
    242 	if ((error = fd_getvnode(fd, &fp)) != 0)
    243 		return error;
    244 
    245 	vp = fp->f_vnode;
    246 	if (fl->l_whence == SEEK_CUR)
    247 		fl->l_start += fp->f_offset;
    248 
    249 	flg = F_POSIX;
    250 	p = curproc;
    251 
    252 	switch (cmd) {
    253 	case F_SETLKW:
    254 		flg |= F_WAIT;
    255 		/* Fall into F_SETLK */
    256 
    257 		/* FALLTHROUGH */
    258 	case F_SETLK:
    259 		switch (fl->l_type) {
    260 		case F_RDLCK:
    261 			if ((fp->f_flag & FREAD) == 0) {
    262 				error = EBADF;
    263 				break;
    264 			}
    265 			if ((p->p_flag & PK_ADVLOCK) == 0) {
    266 				mutex_enter(p->p_lock);
    267 				p->p_flag |= PK_ADVLOCK;
    268 				mutex_exit(p->p_lock);
    269 			}
    270 			error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
    271 			break;
    272 
    273 		case F_WRLCK:
    274 			if ((fp->f_flag & FWRITE) == 0) {
    275 				error = EBADF;
    276 				break;
    277 			}
    278 			if ((p->p_flag & PK_ADVLOCK) == 0) {
    279 				mutex_enter(p->p_lock);
    280 				p->p_flag |= PK_ADVLOCK;
    281 				mutex_exit(p->p_lock);
    282 			}
    283 			error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
    284 			break;
    285 
    286 		case F_UNLCK:
    287 			error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX);
    288 			break;
    289 
    290 		default:
    291 			error = EINVAL;
    292 			break;
    293 		}
    294 		break;
    295 
    296 	case F_GETLK:
    297 		if (fl->l_type != F_RDLCK &&
    298 		    fl->l_type != F_WRLCK &&
    299 		    fl->l_type != F_UNLCK) {
    300 			error = EINVAL;
    301 			break;
    302 		}
    303 		error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX);
    304 		break;
    305 
    306 	default:
    307 		error = EINVAL;
    308 		break;
    309 	}
    310 
    311 	fd_putfile(fd);
    312 	return error;
    313 }
    314 
    315 static int
    316 do_fcntl_getpath(struct lwp *l, file_t *fp, char *upath)
    317 {
    318 	char *kpath;
    319 	int error;
    320 
    321 	if (fp->f_type != DTYPE_VNODE)
    322 		return EOPNOTSUPP;
    323 
    324 	kpath = PNBUF_GET();
    325 
    326 	error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode, l, l->l_proc);
    327 	if (!error)
    328 		error = copyoutstr(kpath, upath, MAXPATHLEN, NULL);
    329 
    330 	PNBUF_PUT(kpath);
    331 
    332 	return error;
    333 }
    334 
    335 /*
    336  * The file control system call.
    337  */
    338 int
    339 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
    340 {
    341 	/* {
    342 		syscallarg(int)		fd;
    343 		syscallarg(int)		cmd;
    344 		syscallarg(void *)	arg;
    345 	} */
    346 	int fd, i, tmp, error, cmd, newmin;
    347 	filedesc_t *fdp;
    348 	file_t *fp;
    349 	struct flock fl;
    350 	bool cloexec = false;
    351 
    352 	fd = SCARG(uap, fd);
    353 	cmd = SCARG(uap, cmd);
    354 	fdp = l->l_fd;
    355 	error = 0;
    356 
    357 	switch (cmd) {
    358 	case F_CLOSEM:
    359 		if (fd < 0)
    360 			return EBADF;
    361 		while ((i = fdp->fd_lastfile) >= fd) {
    362 			if (fd_getfile(i) == NULL) {
    363 				/* Another thread has updated. */
    364 				continue;
    365 			}
    366 			fd_close(i);
    367 		}
    368 		return 0;
    369 
    370 	case F_MAXFD:
    371 		*retval = fdp->fd_lastfile;
    372 		return 0;
    373 
    374 	case F_SETLKW:
    375 	case F_SETLK:
    376 	case F_GETLK:
    377 		error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
    378 		if (error)
    379 			return error;
    380 		error = do_fcntl_lock(fd, cmd, &fl);
    381 		if (cmd == F_GETLK && error == 0)
    382 			error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
    383 		return error;
    384 
    385 	default:
    386 		/* Handled below */
    387 		break;
    388 	}
    389 
    390 	if ((fp = fd_getfile(fd)) == NULL)
    391 		return EBADF;
    392 
    393 	if ((cmd & F_FSCTL)) {
    394 		error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg));
    395 		fd_putfile(fd);
    396 		return error;
    397 	}
    398 
    399 	switch (cmd) {
    400 	case F_DUPFD_CLOEXEC:
    401 		cloexec = true;
    402 		/*FALLTHROUGH*/
    403 	case F_DUPFD:
    404 		newmin = (long)SCARG(uap, arg);
    405 		if ((u_int)newmin >=
    406 		    l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
    407 		    (u_int)newmin >= maxfiles) {
    408 			fd_putfile(fd);
    409 			return EINVAL;
    410 		}
    411 		error = fd_dup(fp, newmin, &i, cloexec);
    412 		*retval = i;
    413 		break;
    414 
    415 	case F_GETFD:
    416 		*retval = fdp->fd_dt->dt_ff[fd]->ff_exclose;
    417 		break;
    418 
    419 	case F_SETFD:
    420 		fd_set_exclose(l, fd,
    421 		    ((long)SCARG(uap, arg) & FD_CLOEXEC) != 0);
    422 		break;
    423 
    424 	case F_GETNOSIGPIPE:
    425 		*retval = (fp->f_flag & FNOSIGPIPE) != 0;
    426 		break;
    427 
    428 	case F_SETNOSIGPIPE:
    429 		if (SCARG(uap, arg))
    430 			atomic_or_uint(&fp->f_flag, FNOSIGPIPE);
    431 		else
    432 			atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE);
    433 		*retval = 0;
    434 		break;
    435 
    436 	case F_GETFL:
    437 		*retval = OFLAGS(fp->f_flag);
    438 		break;
    439 
    440 	case F_SETFL:
    441 		/* XXX not guaranteed to be atomic. */
    442 		tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
    443 		error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp);
    444 		if (error)
    445 			break;
    446 		i = tmp ^ fp->f_flag;
    447 		if (i & FNONBLOCK) {
    448 			int flgs = tmp & FNONBLOCK;
    449 			error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs);
    450 			if (error) {
    451 				(*fp->f_ops->fo_fcntl)(fp, F_SETFL,
    452 				    &fp->f_flag);
    453 				break;
    454 			}
    455 		}
    456 		if (i & FASYNC) {
    457 			int flgs = tmp & FASYNC;
    458 			error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs);
    459 			if (error) {
    460 				if (i & FNONBLOCK) {
    461 					tmp = fp->f_flag & FNONBLOCK;
    462 					(void)(*fp->f_ops->fo_ioctl)(fp,
    463 						FIONBIO, &tmp);
    464 				}
    465 				(*fp->f_ops->fo_fcntl)(fp, F_SETFL,
    466 				    &fp->f_flag);
    467 				break;
    468 			}
    469 		}
    470 		fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
    471 		break;
    472 
    473 	case F_GETOWN:
    474 		error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp);
    475 		*retval = tmp;
    476 		break;
    477 
    478 	case F_SETOWN:
    479 		tmp = (int)(uintptr_t) SCARG(uap, arg);
    480 		error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp);
    481 		break;
    482 
    483 	case F_GETPATH:
    484 		error = do_fcntl_getpath(l, fp, SCARG(uap, arg));
    485 		break;
    486 
    487 	default:
    488 		error = EINVAL;
    489 	}
    490 
    491 	fd_putfile(fd);
    492 	return (error);
    493 }
    494 
    495 /*
    496  * Close a file descriptor.
    497  */
    498 int
    499 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
    500 {
    501 	/* {
    502 		syscallarg(int)	fd;
    503 	} */
    504 	int error;
    505 	int fd = SCARG(uap, fd);
    506 
    507 	if (fd_getfile(fd) == NULL) {
    508 		return EBADF;
    509 	}
    510 
    511 	error = fd_close(fd);
    512 	if (error == ERESTART) {
    513 #ifdef DIAGNOSTIC
    514 		printf("%s[%d]: close(%d) returned ERESTART\n",
    515 		    l->l_proc->p_comm, (int)l->l_proc->p_pid, fd);
    516 #endif
    517 		error = EINTR;
    518 	}
    519 
    520 	return error;
    521 }
    522 
    523 /*
    524  * Return status information about a file descriptor.
    525  * Common function for compat code.
    526  */
    527 int
    528 do_sys_fstat(int fd, struct stat *sb)
    529 {
    530 	file_t *fp;
    531 	int error;
    532 
    533 	if ((fp = fd_getfile(fd)) == NULL) {
    534 		return EBADF;
    535 	}
    536 	error = (*fp->f_ops->fo_stat)(fp, sb);
    537 	fd_putfile(fd);
    538 
    539 	return error;
    540 }
    541 
    542 /*
    543  * Return status information about a file descriptor.
    544  */
    545 int
    546 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap,
    547 	      register_t *retval)
    548 {
    549 	/* {
    550 		syscallarg(int)			fd;
    551 		syscallarg(struct stat *)	sb;
    552 	} */
    553 	struct stat sb;
    554 	int error;
    555 
    556 	error = do_sys_fstat(SCARG(uap, fd), &sb);
    557 	if (error == 0) {
    558 		error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
    559 	}
    560 	return error;
    561 }
    562 
    563 /*
    564  * Return pathconf information about a file descriptor.
    565  */
    566 int
    567 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap,
    568 	      register_t *retval)
    569 {
    570 	/* {
    571 		syscallarg(int)	fd;
    572 		syscallarg(int)	name;
    573 	} */
    574 	int fd, error;
    575 	file_t *fp;
    576 
    577 	fd = SCARG(uap, fd);
    578 	error = 0;
    579 
    580 	if ((fp = fd_getfile(fd)) == NULL) {
    581 		return (EBADF);
    582 	}
    583 	switch (fp->f_type) {
    584 	case DTYPE_SOCKET:
    585 	case DTYPE_PIPE:
    586 		if (SCARG(uap, name) != _PC_PIPE_BUF)
    587 			error = EINVAL;
    588 		else
    589 			*retval = PIPE_BUF;
    590 		break;
    591 
    592 	case DTYPE_VNODE:
    593 		error = VOP_PATHCONF(fp->f_vnode, SCARG(uap, name), retval);
    594 		break;
    595 
    596 	case DTYPE_KQUEUE:
    597 		error = EINVAL;
    598 		break;
    599 
    600 	default:
    601 		error = EOPNOTSUPP;
    602 		break;
    603 	}
    604 
    605 	fd_putfile(fd);
    606 	return (error);
    607 }
    608 
    609 /*
    610  * Apply an advisory lock on a file descriptor.
    611  *
    612  * Just attempt to get a record lock of the requested type on
    613  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
    614  */
    615 /* ARGSUSED */
    616 int
    617 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
    618 {
    619 	/* {
    620 		syscallarg(int)	fd;
    621 		syscallarg(int)	how;
    622 	} */
    623 	int fd, how, error;
    624 	file_t *fp;
    625 	vnode_t	*vp;
    626 	struct flock lf;
    627 
    628 	fd = SCARG(uap, fd);
    629 	how = SCARG(uap, how);
    630 
    631 	if ((error = fd_getvnode(fd, &fp)) != 0)
    632 		return error == EINVAL ? EOPNOTSUPP : error;
    633 
    634 	vp = fp->f_vnode;
    635 	lf.l_whence = SEEK_SET;
    636 	lf.l_start = 0;
    637 	lf.l_len = 0;
    638 
    639 	switch (how & ~LOCK_NB) {
    640 	case LOCK_UN:
    641 		lf.l_type = F_UNLCK;
    642 		atomic_and_uint(&fp->f_flag, ~FHASLOCK);
    643 		error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
    644 		fd_putfile(fd);
    645 		return error;
    646 	case LOCK_EX:
    647 		lf.l_type = F_WRLCK;
    648 		break;
    649 	case LOCK_SH:
    650 		lf.l_type = F_RDLCK;
    651 		break;
    652 	default:
    653 		fd_putfile(fd);
    654 		return EINVAL;
    655 	}
    656 
    657 	atomic_or_uint(&fp->f_flag, FHASLOCK);
    658 	if (how & LOCK_NB) {
    659 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
    660 	} else {
    661 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
    662 	}
    663 	fd_putfile(fd);
    664 	return error;
    665 }
    666 
    667 int
    668 do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
    669 {
    670 	file_t *fp;
    671 	vnode_t *vp;
    672 	off_t endoffset;
    673 	int error;
    674 
    675 	CTASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
    676 	CTASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
    677 	CTASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);
    678 
    679 	if (offset < 0) {
    680 		return EINVAL;
    681 	}
    682 	if (len == 0) {
    683 		endoffset = INT64_MAX;
    684 	} else if (len > 0 && (INT64_MAX - offset) >= len) {
    685 		endoffset = offset + len;
    686 	} else {
    687 		return EINVAL;
    688 	}
    689 	if ((fp = fd_getfile(fd)) == NULL) {
    690 		return EBADF;
    691 	}
    692 	if (fp->f_type != DTYPE_VNODE) {
    693 		if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
    694 			error = ESPIPE;
    695 		} else {
    696 			error = EOPNOTSUPP;
    697 		}
    698 		fd_putfile(fd);
    699 		return error;
    700 	}
    701 
    702 	switch (advice) {
    703 	case POSIX_FADV_WILLNEED:
    704 	case POSIX_FADV_DONTNEED:
    705 		vp = fp->f_vnode;
    706 		if (vp->v_type != VREG && vp->v_type != VBLK) {
    707 			fd_putfile(fd);
    708 			return 0;
    709 		}
    710 		break;
    711 	}
    712 
    713 	switch (advice) {
    714 	case POSIX_FADV_NORMAL:
    715 	case POSIX_FADV_RANDOM:
    716 	case POSIX_FADV_SEQUENTIAL:
    717 		/*
    718 		 * We ignore offset and size.  Must lock the file to
    719 		 * do this, as f_advice is sub-word sized.
    720 		 */
    721 		mutex_enter(&fp->f_lock);
    722 		fp->f_advice = (u_char)advice;
    723 		mutex_exit(&fp->f_lock);
    724 		error = 0;
    725 		break;
    726 
    727 	case POSIX_FADV_WILLNEED:
    728 		vp = fp->f_vnode;
    729 		error = uvm_readahead(&vp->v_uobj, offset, endoffset - offset);
    730 		break;
    731 
    732 	case POSIX_FADV_DONTNEED:
    733 		vp = fp->f_vnode;
    734 		/*
    735 		 * Align the region to page boundaries as VOP_PUTPAGES expects
    736 		 * by shrinking it.  We shrink instead of expand because we
    737 		 * do not want to deactivate cache outside of the requested
    738 		 * region.  It means that if the specified region is smaller
    739 		 * than PAGE_SIZE, we do nothing.
    740 		 */
    741 		if (round_page(offset) < trunc_page(endoffset) &&
    742 		    offset <= round_page(offset)) {
    743 			mutex_enter(vp->v_interlock);
    744 			error = VOP_PUTPAGES(vp,
    745 			    round_page(offset), trunc_page(endoffset),
    746 			    PGO_DEACTIVATE | PGO_CLEANIT);
    747 		} else {
    748 			error = 0;
    749 		}
    750 		break;
    751 
    752 	case POSIX_FADV_NOREUSE:
    753 		/* Not implemented yet. */
    754 		error = 0;
    755 		break;
    756 	default:
    757 		error = EINVAL;
    758 		break;
    759 	}
    760 
    761 	fd_putfile(fd);
    762 	return error;
    763 }
    764 
    765 int
    766 sys___posix_fadvise50(struct lwp *l,
    767 		      const struct sys___posix_fadvise50_args *uap,
    768 		      register_t *retval)
    769 {
    770 	/* {
    771 		syscallarg(int) fd;
    772 		syscallarg(int) pad;
    773 		syscallarg(off_t) offset;
    774 		syscallarg(off_t) len;
    775 		syscallarg(int) advice;
    776 	} */
    777 
    778 	*retval = do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset),
    779 	    SCARG(uap, len), SCARG(uap, advice));
    780 
    781 	return 0;
    782 }
    783 
    784 int
    785 sys_pipe(struct lwp *l, const void *v, register_t *retval)
    786 {
    787 	int fd[2], error;
    788 
    789 	if ((error = pipe1(l, fd, 0)) != 0)
    790 		return error;
    791 
    792 	retval[0] = fd[0];
    793 	retval[1] = fd[1];
    794 
    795 	return 0;
    796 }
    797 
    798 int
    799 sys_pipe2(struct lwp *l, const struct sys_pipe2_args *uap, register_t *retval)
    800 {
    801 	/* {
    802 		syscallarg(int[2]) fildes;
    803 		syscallarg(int) flags;
    804 	} */
    805 	int fd[2], error;
    806 
    807 	if ((error = pipe1(l, fd, SCARG(uap, flags))) != 0)
    808 		return error;
    809 
    810 	if ((error = copyout(fd, SCARG(uap, fildes), sizeof(fd))) != 0)
    811 		return error;
    812 	retval[0] = 0;
    813 	return 0;
    814 }
    815