vfs_vnops.c revision 1.169.2.4 1 /* $NetBSD: vfs_vnops.c,v 1.169.2.4 2010/11/06 08:08:44 uebayasi Exp $ */
2
3 /*-
4 * Copyright (c) 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.169.2.4 2010/11/06 08:08:44 uebayasi Exp $");
70
71 #include "veriexec.h"
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kernel.h>
76 #include <sys/file.h>
77 #include <sys/stat.h>
78 #include <sys/buf.h>
79 #include <sys/proc.h>
80 #include <sys/mount.h>
81 #include <sys/namei.h>
82 #include <sys/vnode.h>
83 #include <sys/ioctl.h>
84 #include <sys/tty.h>
85 #include <sys/poll.h>
86 #include <sys/kauth.h>
87 #include <sys/syslog.h>
88 #include <sys/fstrans.h>
89 #include <sys/atomic.h>
90 #include <sys/filedesc.h>
91 #include <sys/wapbl.h>
92
93 #include <miscfs/specfs/specdev.h>
94 #include <miscfs/fifofs/fifo.h>
95
96 #include <uvm/uvm_extern.h>
97 #include <uvm/uvm_readahead.h>
98
99 #ifdef UNION
100 #include <fs/union/union.h>
101 #endif
102
103 int (*vn_union_readdir_hook) (struct vnode **, struct file *, struct lwp *);
104
105 #include <sys/verified_exec.h>
106
107 static int vn_read(file_t *fp, off_t *offset, struct uio *uio,
108 kauth_cred_t cred, int flags);
109 static int vn_write(file_t *fp, off_t *offset, struct uio *uio,
110 kauth_cred_t cred, int flags);
111 static int vn_closefile(file_t *fp);
112 static int vn_poll(file_t *fp, int events);
113 static int vn_fcntl(file_t *fp, u_int com, void *data);
114 static int vn_statfile(file_t *fp, struct stat *sb);
115 static int vn_ioctl(file_t *fp, u_long com, void *data);
116
117 const struct fileops vnops = {
118 .fo_read = vn_read,
119 .fo_write = vn_write,
120 .fo_ioctl = vn_ioctl,
121 .fo_fcntl = vn_fcntl,
122 .fo_poll = vn_poll,
123 .fo_stat = vn_statfile,
124 .fo_close = vn_closefile,
125 .fo_kqfilter = vn_kqfilter,
126 .fo_restart = fnullop_restart,
127 };
128
129 /*
130 * Common code for vnode open operations.
131 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
132 */
133 int
134 vn_open(struct nameidata *ndp, int fmode, int cmode)
135 {
136 struct vnode *vp;
137 struct lwp *l = curlwp;
138 kauth_cred_t cred = l->l_cred;
139 struct vattr va;
140 int error;
141 char *path;
142
143 if ((fmode & (O_CREAT | O_DIRECTORY)) == (O_CREAT | O_DIRECTORY))
144 return EINVAL;
145
146 ndp->ni_cnd.cn_flags &= TRYEMULROOT | NOCHROOT;
147
148 if (fmode & O_CREAT) {
149 ndp->ni_cnd.cn_nameiop = CREATE;
150 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF;
151 if ((fmode & O_EXCL) == 0 &&
152 ((fmode & O_NOFOLLOW) == 0))
153 ndp->ni_cnd.cn_flags |= FOLLOW;
154 } else {
155 ndp->ni_cnd.cn_nameiop = LOOKUP;
156 ndp->ni_cnd.cn_flags |= LOCKLEAF;
157 if ((fmode & O_NOFOLLOW) == 0)
158 ndp->ni_cnd.cn_flags |= FOLLOW;
159 }
160
161 VERIEXEC_PATH_GET(ndp->ni_dirp, ndp->ni_segflg, ndp->ni_dirp, path);
162
163 error = namei(ndp);
164 if (error)
165 goto out;
166
167 vp = ndp->ni_vp;
168
169 #if NVERIEXEC > 0
170 error = veriexec_openchk(l, ndp->ni_vp, ndp->ni_dirp, fmode);
171 if (error)
172 goto bad;
173 #endif /* NVERIEXEC > 0 */
174
175 if (fmode & O_CREAT) {
176 if (ndp->ni_vp == NULL) {
177 vattr_null(&va);
178 va.va_type = VREG;
179 va.va_mode = cmode;
180 if (fmode & O_EXCL)
181 va.va_vaflags |= VA_EXCLUSIVE;
182 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
183 &ndp->ni_cnd, &va);
184 if (error)
185 goto out;
186 fmode &= ~O_TRUNC;
187 vp = ndp->ni_vp;
188 } else {
189 VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
190 if (ndp->ni_dvp == ndp->ni_vp)
191 vrele(ndp->ni_dvp);
192 else
193 vput(ndp->ni_dvp);
194 ndp->ni_dvp = NULL;
195 vp = ndp->ni_vp;
196 if (fmode & O_EXCL) {
197 error = EEXIST;
198 goto bad;
199 }
200 fmode &= ~O_CREAT;
201 }
202 } else {
203 vp = ndp->ni_vp;
204 }
205 if (vp->v_type == VSOCK) {
206 error = EOPNOTSUPP;
207 goto bad;
208 }
209 if (ndp->ni_vp->v_type == VLNK) {
210 error = EFTYPE;
211 goto bad;
212 }
213
214 if ((fmode & O_CREAT) == 0) {
215 error = vn_openchk(vp, cred, fmode);
216 if (error != 0)
217 goto bad;
218 }
219
220 if (fmode & O_TRUNC) {
221 vattr_null(&va);
222 va.va_size = 0;
223 error = VOP_SETATTR(vp, &va, cred);
224 if (error != 0)
225 goto bad;
226 }
227 if ((error = VOP_OPEN(vp, fmode, cred)) != 0)
228 goto bad;
229 if (fmode & FWRITE) {
230 mutex_enter(&vp->v_interlock);
231 vp->v_writecount++;
232 mutex_exit(&vp->v_interlock);
233 }
234
235 bad:
236 if (error)
237 vput(vp);
238 out:
239 VERIEXEC_PATH_PUT(path);
240 return (error);
241 }
242
243 /*
244 * Check for write permissions on the specified vnode.
245 * Prototype text segments cannot be written.
246 */
247 int
248 vn_writechk(struct vnode *vp)
249 {
250
251 /*
252 * If the vnode is in use as a process's text,
253 * we can't allow writing.
254 */
255 if (vp->v_iflag & VI_TEXT)
256 return (ETXTBSY);
257 return (0);
258 }
259
260 int
261 vn_openchk(struct vnode *vp, kauth_cred_t cred, int fflags)
262 {
263 int permbits = 0;
264 int error;
265
266 if ((fflags & O_DIRECTORY) != 0 && vp->v_type != VDIR)
267 return ENOTDIR;
268
269 if ((fflags & FREAD) != 0) {
270 permbits = VREAD;
271 }
272 if ((fflags & (FWRITE | O_TRUNC)) != 0) {
273 permbits |= VWRITE;
274 if (vp->v_type == VDIR) {
275 error = EISDIR;
276 goto bad;
277 }
278 error = vn_writechk(vp);
279 if (error != 0)
280 goto bad;
281 }
282 error = VOP_ACCESS(vp, permbits, cred);
283 bad:
284 return error;
285 }
286
287 /*
288 * Mark a vnode as having executable mappings.
289 */
290 void
291 vn_markexec(struct vnode *vp)
292 {
293
294 if ((vp->v_iflag & VI_EXECMAP) != 0) {
295 /* Safe unlocked, as long as caller holds a reference. */
296 return;
297 }
298
299 mutex_enter(&vp->v_interlock);
300 if ((vp->v_iflag & VI_EXECMAP) == 0) {
301 atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages);
302 atomic_add_int(&uvmexp.execpages, vp->v_uobj.uo_npages);
303 vp->v_iflag |= VI_EXECMAP;
304 }
305 mutex_exit(&vp->v_interlock);
306 }
307
308 /*
309 * Mark a vnode as being the text of a process.
310 * Fail if the vnode is currently writable.
311 */
312 int
313 vn_marktext(struct vnode *vp)
314 {
315
316 if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP)) == (VI_TEXT|VI_EXECMAP)) {
317 /* Safe unlocked, as long as caller holds a reference. */
318 return (0);
319 }
320
321 mutex_enter(&vp->v_interlock);
322 if (vp->v_writecount != 0) {
323 KASSERT((vp->v_iflag & VI_TEXT) == 0);
324 mutex_exit(&vp->v_interlock);
325 return (ETXTBSY);
326 }
327 if ((vp->v_iflag & VI_EXECMAP) == 0) {
328 atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages);
329 atomic_add_int(&uvmexp.execpages, vp->v_uobj.uo_npages);
330 }
331 vp->v_iflag |= (VI_TEXT | VI_EXECMAP);
332 mutex_exit(&vp->v_interlock);
333 return (0);
334 }
335
336 /*
337 * Vnode close call
338 *
339 * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node.
340 */
341 int
342 vn_close(struct vnode *vp, int flags, kauth_cred_t cred)
343 {
344 int error;
345
346 if (flags & FWRITE) {
347 mutex_enter(&vp->v_interlock);
348 vp->v_writecount--;
349 mutex_exit(&vp->v_interlock);
350 }
351 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
352 error = VOP_CLOSE(vp, flags, cred);
353 vput(vp);
354 return (error);
355 }
356
357 static int
358 enforce_rlimit_fsize(struct vnode *vp, struct uio *uio, int ioflag)
359 {
360 struct lwp *l = curlwp;
361 off_t testoff;
362
363 if (uio->uio_rw != UIO_WRITE || vp->v_type != VREG)
364 return 0;
365
366 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
367 if (ioflag & IO_APPEND)
368 testoff = vp->v_size;
369 else
370 testoff = uio->uio_offset;
371
372 if (testoff + uio->uio_resid >
373 l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
374 mutex_enter(proc_lock);
375 psignal(l->l_proc, SIGXFSZ);
376 mutex_exit(proc_lock);
377 return EFBIG;
378 }
379
380 return 0;
381 }
382
383 /*
384 * Package up an I/O request on a vnode into a uio and do it.
385 */
386 int
387 vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset,
388 enum uio_seg segflg, int ioflg, kauth_cred_t cred, size_t *aresid,
389 struct lwp *l)
390 {
391 struct uio auio;
392 struct iovec aiov;
393 int error;
394
395 if ((ioflg & IO_NODELOCKED) == 0) {
396 if (rw == UIO_READ) {
397 vn_lock(vp, LK_SHARED | LK_RETRY);
398 } else /* UIO_WRITE */ {
399 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
400 }
401 }
402 auio.uio_iov = &aiov;
403 auio.uio_iovcnt = 1;
404 aiov.iov_base = base;
405 aiov.iov_len = len;
406 auio.uio_resid = len;
407 auio.uio_offset = offset;
408 auio.uio_rw = rw;
409 if (segflg == UIO_SYSSPACE) {
410 UIO_SETUP_SYSSPACE(&auio);
411 } else {
412 auio.uio_vmspace = l->l_proc->p_vmspace;
413 }
414
415 if ((error = enforce_rlimit_fsize(vp, &auio, ioflg)) != 0)
416 goto out;
417
418 if (rw == UIO_READ) {
419 error = VOP_READ(vp, &auio, ioflg, cred);
420 } else {
421 error = VOP_WRITE(vp, &auio, ioflg, cred);
422 }
423
424 if (aresid)
425 *aresid = auio.uio_resid;
426 else
427 if (auio.uio_resid && error == 0)
428 error = EIO;
429
430 out:
431 if ((ioflg & IO_NODELOCKED) == 0) {
432 VOP_UNLOCK(vp);
433 }
434 return (error);
435 }
436
437 int
438 vn_readdir(file_t *fp, char *bf, int segflg, u_int count, int *done,
439 struct lwp *l, off_t **cookies, int *ncookies)
440 {
441 struct vnode *vp = (struct vnode *)fp->f_data;
442 struct iovec aiov;
443 struct uio auio;
444 int error, eofflag;
445
446 /* Limit the size on any kernel buffers used by VOP_READDIR */
447 count = min(MAXBSIZE, count);
448
449 unionread:
450 if (vp->v_type != VDIR)
451 return (EINVAL);
452 aiov.iov_base = bf;
453 aiov.iov_len = count;
454 auio.uio_iov = &aiov;
455 auio.uio_iovcnt = 1;
456 auio.uio_rw = UIO_READ;
457 if (segflg == UIO_SYSSPACE) {
458 UIO_SETUP_SYSSPACE(&auio);
459 } else {
460 KASSERT(l == curlwp);
461 auio.uio_vmspace = l->l_proc->p_vmspace;
462 }
463 auio.uio_resid = count;
464 vn_lock(vp, LK_SHARED | LK_RETRY);
465 auio.uio_offset = fp->f_offset;
466 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies,
467 ncookies);
468 mutex_enter(&fp->f_lock);
469 fp->f_offset = auio.uio_offset;
470 mutex_exit(&fp->f_lock);
471 VOP_UNLOCK(vp);
472 if (error)
473 return (error);
474
475 if (count == auio.uio_resid && vn_union_readdir_hook) {
476 struct vnode *ovp = vp;
477
478 error = (*vn_union_readdir_hook)(&vp, fp, l);
479 if (error)
480 return (error);
481 if (vp != ovp)
482 goto unionread;
483 }
484
485 if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) &&
486 (vp->v_mount->mnt_flag & MNT_UNION)) {
487 struct vnode *tvp = vp;
488 vp = vp->v_mount->mnt_vnodecovered;
489 vref(vp);
490 mutex_enter(&fp->f_lock);
491 fp->f_data = vp;
492 fp->f_offset = 0;
493 mutex_exit(&fp->f_lock);
494 vrele(tvp);
495 goto unionread;
496 }
497 *done = count - auio.uio_resid;
498 return error;
499 }
500
501 /*
502 * File table vnode read routine.
503 */
504 static int
505 vn_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
506 int flags)
507 {
508 struct vnode *vp = (struct vnode *)fp->f_data;
509 int count, error, ioflag, fflag;
510
511 ioflag = IO_ADV_ENCODE(fp->f_advice);
512 fflag = fp->f_flag;
513 if (fflag & FNONBLOCK)
514 ioflag |= IO_NDELAY;
515 if ((fflag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC))
516 ioflag |= IO_SYNC;
517 if (fflag & FALTIO)
518 ioflag |= IO_ALTSEMANTICS;
519 if (fflag & FDIRECT)
520 ioflag |= IO_DIRECT;
521 vn_lock(vp, LK_SHARED | LK_RETRY);
522 uio->uio_offset = *offset;
523 count = uio->uio_resid;
524 error = VOP_READ(vp, uio, ioflag, cred);
525 if (flags & FOF_UPDATE_OFFSET)
526 *offset += count - uio->uio_resid;
527 VOP_UNLOCK(vp);
528 return (error);
529 }
530
531 /*
532 * File table vnode write routine.
533 */
534 static int
535 vn_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
536 int flags)
537 {
538 struct vnode *vp = (struct vnode *)fp->f_data;
539 int count, error, ioflag, fflag;
540
541 ioflag = IO_ADV_ENCODE(fp->f_advice) | IO_UNIT;
542 fflag = fp->f_flag;
543 if (vp->v_type == VREG && (fflag & O_APPEND))
544 ioflag |= IO_APPEND;
545 if (fflag & FNONBLOCK)
546 ioflag |= IO_NDELAY;
547 if (fflag & FFSYNC ||
548 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
549 ioflag |= IO_SYNC;
550 else if (fflag & FDSYNC)
551 ioflag |= IO_DSYNC;
552 if (fflag & FALTIO)
553 ioflag |= IO_ALTSEMANTICS;
554 if (fflag & FDIRECT)
555 ioflag |= IO_DIRECT;
556 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
557 uio->uio_offset = *offset;
558 count = uio->uio_resid;
559
560 if ((error = enforce_rlimit_fsize(vp, uio, ioflag)) != 0)
561 goto out;
562
563 error = VOP_WRITE(vp, uio, ioflag, cred);
564
565 if (flags & FOF_UPDATE_OFFSET) {
566 if (ioflag & IO_APPEND) {
567 /*
568 * SUSv3 describes behaviour for count = 0 as following:
569 * "Before any action ... is taken, and if nbyte is zero
570 * and the file is a regular file, the write() function
571 * ... in the absence of errors ... shall return zero
572 * and have no other results."
573 */
574 if (count)
575 *offset = uio->uio_offset;
576 } else
577 *offset += count - uio->uio_resid;
578 }
579
580 out:
581 VOP_UNLOCK(vp);
582 return (error);
583 }
584
585 /*
586 * File table vnode stat routine.
587 */
588 static int
589 vn_statfile(file_t *fp, struct stat *sb)
590 {
591 struct vnode *vp = fp->f_data;
592 int error;
593
594 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
595 error = vn_stat(vp, sb);
596 VOP_UNLOCK(vp);
597 return error;
598 }
599
600 int
601 vn_stat(struct vnode *vp, struct stat *sb)
602 {
603 struct vattr va;
604 int error;
605 mode_t mode;
606
607 memset(&va, 0, sizeof(va));
608 error = VOP_GETATTR(vp, &va, kauth_cred_get());
609 if (error)
610 return (error);
611 /*
612 * Copy from vattr table
613 */
614 memset(sb, 0, sizeof(*sb));
615 sb->st_dev = va.va_fsid;
616 sb->st_ino = va.va_fileid;
617 mode = va.va_mode;
618 switch (vp->v_type) {
619 case VREG:
620 mode |= S_IFREG;
621 break;
622 case VDIR:
623 mode |= S_IFDIR;
624 break;
625 case VBLK:
626 mode |= S_IFBLK;
627 break;
628 case VCHR:
629 mode |= S_IFCHR;
630 break;
631 case VLNK:
632 mode |= S_IFLNK;
633 break;
634 case VSOCK:
635 mode |= S_IFSOCK;
636 break;
637 case VFIFO:
638 mode |= S_IFIFO;
639 break;
640 default:
641 return (EBADF);
642 };
643 sb->st_mode = mode;
644 sb->st_nlink = va.va_nlink;
645 sb->st_uid = va.va_uid;
646 sb->st_gid = va.va_gid;
647 sb->st_rdev = va.va_rdev;
648 sb->st_size = va.va_size;
649 sb->st_atimespec = va.va_atime;
650 sb->st_mtimespec = va.va_mtime;
651 sb->st_ctimespec = va.va_ctime;
652 sb->st_birthtimespec = va.va_birthtime;
653 sb->st_blksize = va.va_blocksize;
654 sb->st_flags = va.va_flags;
655 sb->st_gen = 0;
656 sb->st_blocks = va.va_bytes / S_BLKSIZE;
657 return (0);
658 }
659
660 /*
661 * File table vnode fcntl routine.
662 */
663 static int
664 vn_fcntl(file_t *fp, u_int com, void *data)
665 {
666 struct vnode *vp = fp->f_data;
667 int error;
668
669 error = VOP_FCNTL(vp, com, data, fp->f_flag, kauth_cred_get());
670 return (error);
671 }
672
673 /*
674 * File table vnode ioctl routine.
675 */
676 static int
677 vn_ioctl(file_t *fp, u_long com, void *data)
678 {
679 struct vnode *vp = fp->f_data, *ovp;
680 struct vattr vattr;
681 int error;
682
683 switch (vp->v_type) {
684
685 case VREG:
686 case VDIR:
687 if (com == FIONREAD) {
688 error = VOP_GETATTR(vp, &vattr,
689 kauth_cred_get());
690 if (error)
691 return (error);
692 *(int *)data = vattr.va_size - fp->f_offset;
693 return (0);
694 }
695 if ((com == FIONWRITE) || (com == FIONSPACE)) {
696 /*
697 * Files don't have send queues, so there never
698 * are any bytes in them, nor is there any
699 * open space in them.
700 */
701 *(int *)data = 0;
702 return (0);
703 }
704 if (com == FIOGETBMAP) {
705 daddr_t *block;
706
707 if (*(daddr_t *)data < 0)
708 return (EINVAL);
709 block = (daddr_t *)data;
710 return (VOP_BMAP(vp, *block, NULL, block, NULL));
711 }
712 if (com == OFIOGETBMAP) {
713 daddr_t ibn, obn;
714
715 if (*(int32_t *)data < 0)
716 return (EINVAL);
717 ibn = (daddr_t)*(int32_t *)data;
718 error = VOP_BMAP(vp, ibn, NULL, &obn, NULL);
719 *(int32_t *)data = (int32_t)obn;
720 return error;
721 }
722 if (com == FIONBIO || com == FIOASYNC) /* XXX */
723 return (0); /* XXX */
724 /* fall into ... */
725 case VFIFO:
726 case VCHR:
727 case VBLK:
728 error = VOP_IOCTL(vp, com, data, fp->f_flag,
729 kauth_cred_get());
730 if (error == 0 && com == TIOCSCTTY) {
731 vref(vp);
732 mutex_enter(proc_lock);
733 ovp = curproc->p_session->s_ttyvp;
734 curproc->p_session->s_ttyvp = vp;
735 mutex_exit(proc_lock);
736 if (ovp != NULL)
737 vrele(ovp);
738 }
739 return (error);
740
741 default:
742 return (EPASSTHROUGH);
743 }
744 }
745
746 /*
747 * File table vnode poll routine.
748 */
749 static int
750 vn_poll(file_t *fp, int events)
751 {
752
753 return (VOP_POLL(fp->f_data, events));
754 }
755
756 /*
757 * File table vnode kqfilter routine.
758 */
759 int
760 vn_kqfilter(file_t *fp, struct knote *kn)
761 {
762
763 return (VOP_KQFILTER(fp->f_data, kn));
764 }
765
766 /*
767 * Check that the vnode is still valid, and if so
768 * acquire requested lock.
769 */
770 int
771 vn_lock(struct vnode *vp, int flags)
772 {
773 int error;
774
775 #if 0
776 KASSERT(vp->v_usecount > 0 || (vp->v_iflag & VI_ONWORKLST) != 0);
777 #endif
778 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY)) == 0);
779 KASSERT(!mutex_owned(&vp->v_interlock));
780
781 #ifdef DIAGNOSTIC
782 if (wapbl_vphaswapbl(vp))
783 WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp));
784 #endif
785
786 do {
787 /*
788 * XXX PR 37706 forced unmount of file systems is unsafe.
789 * Race between vclean() and this the remaining problem.
790 */
791 mutex_enter(&vp->v_interlock);
792 if (vp->v_iflag & VI_XLOCK) {
793 if (flags & LK_NOWAIT) {
794 mutex_exit(&vp->v_interlock);
795 return EBUSY;
796 }
797 vwait(vp, VI_XLOCK);
798 mutex_exit(&vp->v_interlock);
799 error = ENOENT;
800 } else {
801 mutex_exit(&vp->v_interlock);
802 error = VOP_LOCK(vp, (flags & ~LK_RETRY));
803 if (error == 0 || error == EDEADLK || error == EBUSY)
804 return (error);
805 }
806 } while (flags & LK_RETRY);
807 return (error);
808 }
809
810 /*
811 * File table vnode close routine.
812 */
813 static int
814 vn_closefile(file_t *fp)
815 {
816
817 return vn_close(fp->f_data, fp->f_flag, fp->f_cred);
818 }
819
820 /*
821 * Simplified in-kernel wrapper calls for extended attribute access.
822 * Both calls pass in a NULL credential, authorizing a "kernel" access.
823 * Set IO_NODELOCKED in ioflg if the vnode is already locked.
824 */
825 int
826 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
827 const char *attrname, size_t *buflen, void *bf, struct lwp *l)
828 {
829 struct uio auio;
830 struct iovec aiov;
831 int error;
832
833 aiov.iov_len = *buflen;
834 aiov.iov_base = bf;
835
836 auio.uio_iov = &aiov;
837 auio.uio_iovcnt = 1;
838 auio.uio_rw = UIO_READ;
839 auio.uio_offset = 0;
840 auio.uio_resid = *buflen;
841 UIO_SETUP_SYSSPACE(&auio);
842
843 if ((ioflg & IO_NODELOCKED) == 0)
844 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
845
846 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL);
847
848 if ((ioflg & IO_NODELOCKED) == 0)
849 VOP_UNLOCK(vp);
850
851 if (error == 0)
852 *buflen = *buflen - auio.uio_resid;
853
854 return (error);
855 }
856
857 /*
858 * XXX Failure mode if partially written?
859 */
860 int
861 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
862 const char *attrname, size_t buflen, const void *bf, struct lwp *l)
863 {
864 struct uio auio;
865 struct iovec aiov;
866 int error;
867
868 aiov.iov_len = buflen;
869 aiov.iov_base = __UNCONST(bf); /* XXXUNCONST kills const */
870
871 auio.uio_iov = &aiov;
872 auio.uio_iovcnt = 1;
873 auio.uio_rw = UIO_WRITE;
874 auio.uio_offset = 0;
875 auio.uio_resid = buflen;
876 UIO_SETUP_SYSSPACE(&auio);
877
878 if ((ioflg & IO_NODELOCKED) == 0) {
879 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
880 }
881
882 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL);
883
884 if ((ioflg & IO_NODELOCKED) == 0) {
885 VOP_UNLOCK(vp);
886 }
887
888 return (error);
889 }
890
891 int
892 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
893 const char *attrname, struct lwp *l)
894 {
895 int error;
896
897 if ((ioflg & IO_NODELOCKED) == 0) {
898 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
899 }
900
901 error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL);
902 if (error == EOPNOTSUPP)
903 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL);
904
905 if ((ioflg & IO_NODELOCKED) == 0) {
906 VOP_UNLOCK(vp);
907 }
908
909 return (error);
910 }
911
912 void
913 vn_ra_allocctx(struct vnode *vp)
914 {
915 struct uvm_ractx *ra = NULL;
916
917 KASSERT(mutex_owned(&vp->v_interlock));
918
919 if (vp->v_type != VREG) {
920 return;
921 }
922 if (vp->v_ractx != NULL) {
923 return;
924 }
925 if (vp->v_ractx == NULL) {
926 mutex_exit(&vp->v_interlock);
927 ra = uvm_ra_allocctx();
928 mutex_enter(&vp->v_interlock);
929 if (ra != NULL && vp->v_ractx == NULL) {
930 vp->v_ractx = ra;
931 ra = NULL;
932 }
933 }
934 if (ra != NULL) {
935 uvm_ra_freectx(ra);
936 }
937 }
938
939 int
940 vn_fifo_bypass(void *v)
941 {
942 struct vop_generic_args *ap = v;
943
944 return VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, v);
945 }
946