vfs_vnops.c revision 1.187 1 /* $NetBSD: vfs_vnops.c,v 1.187 2014/01/17 10:55:02 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.187 2014/01/17 10:55:02 hannken Exp $");
70
71 #include "veriexec.h"
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kernel.h>
76 #include <sys/file.h>
77 #include <sys/stat.h>
78 #include <sys/buf.h>
79 #include <sys/proc.h>
80 #include <sys/mount.h>
81 #include <sys/namei.h>
82 #include <sys/vnode.h>
83 #include <sys/ioctl.h>
84 #include <sys/tty.h>
85 #include <sys/poll.h>
86 #include <sys/kauth.h>
87 #include <sys/syslog.h>
88 #include <sys/fstrans.h>
89 #include <sys/atomic.h>
90 #include <sys/filedesc.h>
91 #include <sys/wapbl.h>
92
93 #include <miscfs/specfs/specdev.h>
94 #include <miscfs/fifofs/fifo.h>
95
96 #include <uvm/uvm_extern.h>
97 #include <uvm/uvm_readahead.h>
98
99 #ifdef UNION
100 #include <fs/union/union.h>
101 #endif
102
103 int (*vn_union_readdir_hook) (struct vnode **, struct file *, struct lwp *);
104
105 #include <sys/verified_exec.h>
106
107 static int vn_read(file_t *fp, off_t *offset, struct uio *uio,
108 kauth_cred_t cred, int flags);
109 static int vn_write(file_t *fp, off_t *offset, struct uio *uio,
110 kauth_cred_t cred, int flags);
111 static int vn_closefile(file_t *fp);
112 static int vn_poll(file_t *fp, int events);
113 static int vn_fcntl(file_t *fp, u_int com, void *data);
114 static int vn_statfile(file_t *fp, struct stat *sb);
115 static int vn_ioctl(file_t *fp, u_long com, void *data);
116
117 const struct fileops vnops = {
118 .fo_read = vn_read,
119 .fo_write = vn_write,
120 .fo_ioctl = vn_ioctl,
121 .fo_fcntl = vn_fcntl,
122 .fo_poll = vn_poll,
123 .fo_stat = vn_statfile,
124 .fo_close = vn_closefile,
125 .fo_kqfilter = vn_kqfilter,
126 .fo_restart = fnullop_restart,
127 };
128
129 /*
130 * Common code for vnode open operations.
131 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
132 */
133 int
134 vn_open(struct nameidata *ndp, int fmode, int cmode)
135 {
136 struct vnode *vp;
137 struct lwp *l = curlwp;
138 kauth_cred_t cred = l->l_cred;
139 struct vattr va;
140 int error;
141 const char *pathstring;
142
143 if ((fmode & (O_CREAT | O_DIRECTORY)) == (O_CREAT | O_DIRECTORY))
144 return EINVAL;
145
146 ndp->ni_cnd.cn_flags &= TRYEMULROOT | NOCHROOT;
147
148 if (fmode & O_CREAT) {
149 ndp->ni_cnd.cn_nameiop = CREATE;
150 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF;
151 if ((fmode & O_EXCL) == 0 &&
152 ((fmode & O_NOFOLLOW) == 0))
153 ndp->ni_cnd.cn_flags |= FOLLOW;
154 } else {
155 ndp->ni_cnd.cn_nameiop = LOOKUP;
156 ndp->ni_cnd.cn_flags |= LOCKLEAF;
157 if ((fmode & O_NOFOLLOW) == 0)
158 ndp->ni_cnd.cn_flags |= FOLLOW;
159 }
160
161 pathstring = pathbuf_stringcopy_get(ndp->ni_pathbuf);
162 if (pathstring == NULL) {
163 return ENOMEM;
164 }
165
166 error = namei(ndp);
167 if (error)
168 goto out;
169
170 vp = ndp->ni_vp;
171
172 #if NVERIEXEC > 0
173 error = veriexec_openchk(l, ndp->ni_vp, pathstring, fmode);
174 if (error)
175 goto bad;
176 #endif /* NVERIEXEC > 0 */
177
178 if (fmode & O_CREAT) {
179 if (ndp->ni_vp == NULL) {
180 vattr_null(&va);
181 va.va_type = VREG;
182 va.va_mode = cmode;
183 if (fmode & O_EXCL)
184 va.va_vaflags |= VA_EXCLUSIVE;
185 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
186 &ndp->ni_cnd, &va);
187 vput(ndp->ni_dvp);
188 if (error)
189 goto out;
190 fmode &= ~O_TRUNC;
191 vp = ndp->ni_vp;
192 } else {
193 VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
194 if (ndp->ni_dvp == ndp->ni_vp)
195 vrele(ndp->ni_dvp);
196 else
197 vput(ndp->ni_dvp);
198 ndp->ni_dvp = NULL;
199 vp = ndp->ni_vp;
200 if (fmode & O_EXCL) {
201 error = EEXIST;
202 goto bad;
203 }
204 fmode &= ~O_CREAT;
205 }
206 } else {
207 vp = ndp->ni_vp;
208 }
209 if (vp->v_type == VSOCK) {
210 error = EOPNOTSUPP;
211 goto bad;
212 }
213 if (ndp->ni_vp->v_type == VLNK) {
214 error = EFTYPE;
215 goto bad;
216 }
217
218 if ((fmode & O_CREAT) == 0) {
219 error = vn_openchk(vp, cred, fmode);
220 if (error != 0)
221 goto bad;
222 }
223
224 if (fmode & O_TRUNC) {
225 vattr_null(&va);
226 va.va_size = 0;
227 error = VOP_SETATTR(vp, &va, cred);
228 if (error != 0)
229 goto bad;
230 }
231 if ((error = VOP_OPEN(vp, fmode, cred)) != 0)
232 goto bad;
233 if (fmode & FWRITE) {
234 mutex_enter(vp->v_interlock);
235 vp->v_writecount++;
236 mutex_exit(vp->v_interlock);
237 }
238
239 bad:
240 if (error)
241 vput(vp);
242 out:
243 pathbuf_stringcopy_put(ndp->ni_pathbuf, pathstring);
244 return (error);
245 }
246
247 /*
248 * Check for write permissions on the specified vnode.
249 * Prototype text segments cannot be written.
250 */
251 int
252 vn_writechk(struct vnode *vp)
253 {
254
255 /*
256 * If the vnode is in use as a process's text,
257 * we can't allow writing.
258 */
259 if (vp->v_iflag & VI_TEXT)
260 return (ETXTBSY);
261 return (0);
262 }
263
264 int
265 vn_openchk(struct vnode *vp, kauth_cred_t cred, int fflags)
266 {
267 int permbits = 0;
268 int error;
269
270 if ((fflags & O_DIRECTORY) != 0 && vp->v_type != VDIR)
271 return ENOTDIR;
272
273 if ((fflags & FREAD) != 0) {
274 permbits = VREAD;
275 }
276 if ((fflags & (FWRITE | O_TRUNC)) != 0) {
277 permbits |= VWRITE;
278 if (vp->v_type == VDIR) {
279 error = EISDIR;
280 goto bad;
281 }
282 error = vn_writechk(vp);
283 if (error != 0)
284 goto bad;
285 }
286 error = VOP_ACCESS(vp, permbits, cred);
287 bad:
288 return error;
289 }
290
291 /*
292 * Mark a vnode as having executable mappings.
293 */
294 void
295 vn_markexec(struct vnode *vp)
296 {
297
298 if ((vp->v_iflag & VI_EXECMAP) != 0) {
299 /* Safe unlocked, as long as caller holds a reference. */
300 return;
301 }
302
303 mutex_enter(vp->v_interlock);
304 if ((vp->v_iflag & VI_EXECMAP) == 0) {
305 atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages);
306 atomic_add_int(&uvmexp.execpages, vp->v_uobj.uo_npages);
307 vp->v_iflag |= VI_EXECMAP;
308 }
309 mutex_exit(vp->v_interlock);
310 }
311
312 /*
313 * Mark a vnode as being the text of a process.
314 * Fail if the vnode is currently writable.
315 */
316 int
317 vn_marktext(struct vnode *vp)
318 {
319
320 if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP)) == (VI_TEXT|VI_EXECMAP)) {
321 /* Safe unlocked, as long as caller holds a reference. */
322 return (0);
323 }
324
325 mutex_enter(vp->v_interlock);
326 if (vp->v_writecount != 0) {
327 KASSERT((vp->v_iflag & VI_TEXT) == 0);
328 mutex_exit(vp->v_interlock);
329 return (ETXTBSY);
330 }
331 if ((vp->v_iflag & VI_EXECMAP) == 0) {
332 atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages);
333 atomic_add_int(&uvmexp.execpages, vp->v_uobj.uo_npages);
334 }
335 vp->v_iflag |= (VI_TEXT | VI_EXECMAP);
336 mutex_exit(vp->v_interlock);
337 return (0);
338 }
339
340 /*
341 * Vnode close call
342 *
343 * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node.
344 */
345 int
346 vn_close(struct vnode *vp, int flags, kauth_cred_t cred)
347 {
348 int error;
349
350 if (flags & FWRITE) {
351 mutex_enter(vp->v_interlock);
352 KASSERT(vp->v_writecount > 0);
353 vp->v_writecount--;
354 mutex_exit(vp->v_interlock);
355 }
356 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
357 error = VOP_CLOSE(vp, flags, cred);
358 vput(vp);
359 return (error);
360 }
361
362 static int
363 enforce_rlimit_fsize(struct vnode *vp, struct uio *uio, int ioflag)
364 {
365 struct lwp *l = curlwp;
366 off_t testoff;
367
368 if (uio->uio_rw != UIO_WRITE || vp->v_type != VREG)
369 return 0;
370
371 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
372 if (ioflag & IO_APPEND)
373 testoff = vp->v_size;
374 else
375 testoff = uio->uio_offset;
376
377 if (testoff + uio->uio_resid >
378 l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
379 mutex_enter(proc_lock);
380 psignal(l->l_proc, SIGXFSZ);
381 mutex_exit(proc_lock);
382 return EFBIG;
383 }
384
385 return 0;
386 }
387
388 /*
389 * Package up an I/O request on a vnode into a uio and do it.
390 */
391 int
392 vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset,
393 enum uio_seg segflg, int ioflg, kauth_cred_t cred, size_t *aresid,
394 struct lwp *l)
395 {
396 struct uio auio;
397 struct iovec aiov;
398 int error;
399
400 if ((ioflg & IO_NODELOCKED) == 0) {
401 if (rw == UIO_READ) {
402 vn_lock(vp, LK_SHARED | LK_RETRY);
403 } else /* UIO_WRITE */ {
404 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
405 }
406 }
407 auio.uio_iov = &aiov;
408 auio.uio_iovcnt = 1;
409 aiov.iov_base = base;
410 aiov.iov_len = len;
411 auio.uio_resid = len;
412 auio.uio_offset = offset;
413 auio.uio_rw = rw;
414 if (segflg == UIO_SYSSPACE) {
415 UIO_SETUP_SYSSPACE(&auio);
416 } else {
417 auio.uio_vmspace = l->l_proc->p_vmspace;
418 }
419
420 if ((error = enforce_rlimit_fsize(vp, &auio, ioflg)) != 0)
421 goto out;
422
423 if (rw == UIO_READ) {
424 error = VOP_READ(vp, &auio, ioflg, cred);
425 } else {
426 error = VOP_WRITE(vp, &auio, ioflg, cred);
427 }
428
429 if (aresid)
430 *aresid = auio.uio_resid;
431 else
432 if (auio.uio_resid && error == 0)
433 error = EIO;
434
435 out:
436 if ((ioflg & IO_NODELOCKED) == 0) {
437 VOP_UNLOCK(vp);
438 }
439 return (error);
440 }
441
442 int
443 vn_readdir(file_t *fp, char *bf, int segflg, u_int count, int *done,
444 struct lwp *l, off_t **cookies, int *ncookies)
445 {
446 struct vnode *vp = (struct vnode *)fp->f_data;
447 struct iovec aiov;
448 struct uio auio;
449 int error, eofflag;
450
451 /* Limit the size on any kernel buffers used by VOP_READDIR */
452 count = min(MAXBSIZE, count);
453
454 unionread:
455 if (vp->v_type != VDIR)
456 return (EINVAL);
457 aiov.iov_base = bf;
458 aiov.iov_len = count;
459 auio.uio_iov = &aiov;
460 auio.uio_iovcnt = 1;
461 auio.uio_rw = UIO_READ;
462 if (segflg == UIO_SYSSPACE) {
463 UIO_SETUP_SYSSPACE(&auio);
464 } else {
465 KASSERT(l == curlwp);
466 auio.uio_vmspace = l->l_proc->p_vmspace;
467 }
468 auio.uio_resid = count;
469 vn_lock(vp, LK_SHARED | LK_RETRY);
470 auio.uio_offset = fp->f_offset;
471 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies,
472 ncookies);
473 mutex_enter(&fp->f_lock);
474 fp->f_offset = auio.uio_offset;
475 mutex_exit(&fp->f_lock);
476 VOP_UNLOCK(vp);
477 if (error)
478 return (error);
479
480 if (count == auio.uio_resid && vn_union_readdir_hook) {
481 struct vnode *ovp = vp;
482
483 error = (*vn_union_readdir_hook)(&vp, fp, l);
484 if (error)
485 return (error);
486 if (vp != ovp)
487 goto unionread;
488 }
489
490 if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) &&
491 (vp->v_mount->mnt_flag & MNT_UNION)) {
492 struct vnode *tvp = vp;
493 vp = vp->v_mount->mnt_vnodecovered;
494 vref(vp);
495 mutex_enter(&fp->f_lock);
496 fp->f_data = vp;
497 fp->f_offset = 0;
498 mutex_exit(&fp->f_lock);
499 vrele(tvp);
500 goto unionread;
501 }
502 *done = count - auio.uio_resid;
503 return error;
504 }
505
506 /*
507 * File table vnode read routine.
508 */
509 static int
510 vn_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
511 int flags)
512 {
513 struct vnode *vp = (struct vnode *)fp->f_data;
514 int error, ioflag, fflag;
515 size_t count;
516
517 ioflag = IO_ADV_ENCODE(fp->f_advice);
518 fflag = fp->f_flag;
519 if (fflag & FNONBLOCK)
520 ioflag |= IO_NDELAY;
521 if ((fflag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC))
522 ioflag |= IO_SYNC;
523 if (fflag & FALTIO)
524 ioflag |= IO_ALTSEMANTICS;
525 if (fflag & FDIRECT)
526 ioflag |= IO_DIRECT;
527 vn_lock(vp, LK_SHARED | LK_RETRY);
528 uio->uio_offset = *offset;
529 count = uio->uio_resid;
530 error = VOP_READ(vp, uio, ioflag, cred);
531 if (flags & FOF_UPDATE_OFFSET)
532 *offset += count - uio->uio_resid;
533 VOP_UNLOCK(vp);
534 return (error);
535 }
536
537 /*
538 * File table vnode write routine.
539 */
540 static int
541 vn_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
542 int flags)
543 {
544 struct vnode *vp = (struct vnode *)fp->f_data;
545 int error, ioflag, fflag;
546 size_t count;
547
548 ioflag = IO_ADV_ENCODE(fp->f_advice) | IO_UNIT;
549 fflag = fp->f_flag;
550 if (vp->v_type == VREG && (fflag & O_APPEND))
551 ioflag |= IO_APPEND;
552 if (fflag & FNONBLOCK)
553 ioflag |= IO_NDELAY;
554 if (fflag & FFSYNC ||
555 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
556 ioflag |= IO_SYNC;
557 else if (fflag & FDSYNC)
558 ioflag |= IO_DSYNC;
559 if (fflag & FALTIO)
560 ioflag |= IO_ALTSEMANTICS;
561 if (fflag & FDIRECT)
562 ioflag |= IO_DIRECT;
563 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
564 uio->uio_offset = *offset;
565 count = uio->uio_resid;
566
567 if ((error = enforce_rlimit_fsize(vp, uio, ioflag)) != 0)
568 goto out;
569
570 error = VOP_WRITE(vp, uio, ioflag, cred);
571
572 if (flags & FOF_UPDATE_OFFSET) {
573 if (ioflag & IO_APPEND) {
574 /*
575 * SUSv3 describes behaviour for count = 0 as following:
576 * "Before any action ... is taken, and if nbyte is zero
577 * and the file is a regular file, the write() function
578 * ... in the absence of errors ... shall return zero
579 * and have no other results."
580 */
581 if (count)
582 *offset = uio->uio_offset;
583 } else
584 *offset += count - uio->uio_resid;
585 }
586
587 out:
588 VOP_UNLOCK(vp);
589 return (error);
590 }
591
592 /*
593 * File table vnode stat routine.
594 */
595 static int
596 vn_statfile(file_t *fp, struct stat *sb)
597 {
598 struct vnode *vp = fp->f_data;
599 int error;
600
601 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
602 error = vn_stat(vp, sb);
603 VOP_UNLOCK(vp);
604 return error;
605 }
606
607 int
608 vn_stat(struct vnode *vp, struct stat *sb)
609 {
610 struct vattr va;
611 int error;
612 mode_t mode;
613
614 memset(&va, 0, sizeof(va));
615 error = VOP_GETATTR(vp, &va, kauth_cred_get());
616 if (error)
617 return (error);
618 /*
619 * Copy from vattr table
620 */
621 memset(sb, 0, sizeof(*sb));
622 sb->st_dev = va.va_fsid;
623 sb->st_ino = va.va_fileid;
624 mode = va.va_mode;
625 switch (vp->v_type) {
626 case VREG:
627 mode |= S_IFREG;
628 break;
629 case VDIR:
630 mode |= S_IFDIR;
631 break;
632 case VBLK:
633 mode |= S_IFBLK;
634 break;
635 case VCHR:
636 mode |= S_IFCHR;
637 break;
638 case VLNK:
639 mode |= S_IFLNK;
640 break;
641 case VSOCK:
642 mode |= S_IFSOCK;
643 break;
644 case VFIFO:
645 mode |= S_IFIFO;
646 break;
647 default:
648 return (EBADF);
649 };
650 sb->st_mode = mode;
651 sb->st_nlink = va.va_nlink;
652 sb->st_uid = va.va_uid;
653 sb->st_gid = va.va_gid;
654 sb->st_rdev = va.va_rdev;
655 sb->st_size = va.va_size;
656 sb->st_atimespec = va.va_atime;
657 sb->st_mtimespec = va.va_mtime;
658 sb->st_ctimespec = va.va_ctime;
659 sb->st_birthtimespec = va.va_birthtime;
660 sb->st_blksize = va.va_blocksize;
661 sb->st_flags = va.va_flags;
662 sb->st_gen = 0;
663 sb->st_blocks = va.va_bytes / S_BLKSIZE;
664 return (0);
665 }
666
667 /*
668 * File table vnode fcntl routine.
669 */
670 static int
671 vn_fcntl(file_t *fp, u_int com, void *data)
672 {
673 struct vnode *vp = fp->f_data;
674 int error;
675
676 error = VOP_FCNTL(vp, com, data, fp->f_flag, kauth_cred_get());
677 return (error);
678 }
679
680 /*
681 * File table vnode ioctl routine.
682 */
683 static int
684 vn_ioctl(file_t *fp, u_long com, void *data)
685 {
686 struct vnode *vp = fp->f_data, *ovp;
687 struct vattr vattr;
688 int error;
689
690 switch (vp->v_type) {
691
692 case VREG:
693 case VDIR:
694 if (com == FIONREAD) {
695 vn_lock(vp, LK_SHARED | LK_RETRY);
696 error = VOP_GETATTR(vp, &vattr, kauth_cred_get());
697 VOP_UNLOCK(vp);
698 if (error)
699 return (error);
700 *(int *)data = vattr.va_size - fp->f_offset;
701 return (0);
702 }
703 if ((com == FIONWRITE) || (com == FIONSPACE)) {
704 /*
705 * Files don't have send queues, so there never
706 * are any bytes in them, nor is there any
707 * open space in them.
708 */
709 *(int *)data = 0;
710 return (0);
711 }
712 if (com == FIOGETBMAP) {
713 daddr_t *block;
714
715 if (*(daddr_t *)data < 0)
716 return (EINVAL);
717 block = (daddr_t *)data;
718 return (VOP_BMAP(vp, *block, NULL, block, NULL));
719 }
720 if (com == OFIOGETBMAP) {
721 daddr_t ibn, obn;
722
723 if (*(int32_t *)data < 0)
724 return (EINVAL);
725 ibn = (daddr_t)*(int32_t *)data;
726 error = VOP_BMAP(vp, ibn, NULL, &obn, NULL);
727 *(int32_t *)data = (int32_t)obn;
728 return error;
729 }
730 if (com == FIONBIO || com == FIOASYNC) /* XXX */
731 return (0); /* XXX */
732 /* fall into ... */
733 case VFIFO:
734 case VCHR:
735 case VBLK:
736 error = VOP_IOCTL(vp, com, data, fp->f_flag,
737 kauth_cred_get());
738 if (error == 0 && com == TIOCSCTTY) {
739 vref(vp);
740 mutex_enter(proc_lock);
741 ovp = curproc->p_session->s_ttyvp;
742 curproc->p_session->s_ttyvp = vp;
743 mutex_exit(proc_lock);
744 if (ovp != NULL)
745 vrele(ovp);
746 }
747 return (error);
748
749 default:
750 return (EPASSTHROUGH);
751 }
752 }
753
754 /*
755 * File table vnode poll routine.
756 */
757 static int
758 vn_poll(file_t *fp, int events)
759 {
760
761 return (VOP_POLL(fp->f_data, events));
762 }
763
764 /*
765 * File table vnode kqfilter routine.
766 */
767 int
768 vn_kqfilter(file_t *fp, struct knote *kn)
769 {
770
771 return (VOP_KQFILTER(fp->f_data, kn));
772 }
773
774 /*
775 * Check that the vnode is still valid, and if so
776 * acquire requested lock.
777 */
778 int
779 vn_lock(struct vnode *vp, int flags)
780 {
781 int error;
782
783 #if 0
784 KASSERT(vp->v_usecount > 0 || (vp->v_iflag & VI_ONWORKLST) != 0);
785 #endif
786 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY)) == 0);
787 KASSERT(!mutex_owned(vp->v_interlock));
788
789 #ifdef DIAGNOSTIC
790 if (wapbl_vphaswapbl(vp))
791 WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp));
792 #endif
793
794 do {
795 /*
796 * XXX PR 37706 forced unmount of file systems is unsafe.
797 * Race between vclean() and this the remaining problem.
798 */
799 mutex_enter(vp->v_interlock);
800 if (vp->v_iflag & VI_XLOCK) {
801 if (flags & LK_NOWAIT) {
802 mutex_exit(vp->v_interlock);
803 return EBUSY;
804 }
805 vwait(vp, VI_XLOCK);
806 mutex_exit(vp->v_interlock);
807 error = ENOENT;
808 } else {
809 mutex_exit(vp->v_interlock);
810 error = VOP_LOCK(vp, (flags & ~LK_RETRY));
811 if (error == 0 || error == EDEADLK || error == EBUSY)
812 return (error);
813 }
814 } while (flags & LK_RETRY);
815 return (error);
816 }
817
818 /*
819 * File table vnode close routine.
820 */
821 static int
822 vn_closefile(file_t *fp)
823 {
824
825 return vn_close(fp->f_data, fp->f_flag, fp->f_cred);
826 }
827
828 /*
829 * Simplified in-kernel wrapper calls for extended attribute access.
830 * Both calls pass in a NULL credential, authorizing a "kernel" access.
831 * Set IO_NODELOCKED in ioflg if the vnode is already locked.
832 */
833 int
834 vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
835 const char *attrname, size_t *buflen, void *bf, struct lwp *l)
836 {
837 struct uio auio;
838 struct iovec aiov;
839 int error;
840
841 aiov.iov_len = *buflen;
842 aiov.iov_base = bf;
843
844 auio.uio_iov = &aiov;
845 auio.uio_iovcnt = 1;
846 auio.uio_rw = UIO_READ;
847 auio.uio_offset = 0;
848 auio.uio_resid = *buflen;
849 UIO_SETUP_SYSSPACE(&auio);
850
851 if ((ioflg & IO_NODELOCKED) == 0)
852 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
853
854 error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL);
855
856 if ((ioflg & IO_NODELOCKED) == 0)
857 VOP_UNLOCK(vp);
858
859 if (error == 0)
860 *buflen = *buflen - auio.uio_resid;
861
862 return (error);
863 }
864
865 /*
866 * XXX Failure mode if partially written?
867 */
868 int
869 vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
870 const char *attrname, size_t buflen, const void *bf, struct lwp *l)
871 {
872 struct uio auio;
873 struct iovec aiov;
874 int error;
875
876 aiov.iov_len = buflen;
877 aiov.iov_base = __UNCONST(bf); /* XXXUNCONST kills const */
878
879 auio.uio_iov = &aiov;
880 auio.uio_iovcnt = 1;
881 auio.uio_rw = UIO_WRITE;
882 auio.uio_offset = 0;
883 auio.uio_resid = buflen;
884 UIO_SETUP_SYSSPACE(&auio);
885
886 if ((ioflg & IO_NODELOCKED) == 0) {
887 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
888 }
889
890 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL);
891
892 if ((ioflg & IO_NODELOCKED) == 0) {
893 VOP_UNLOCK(vp);
894 }
895
896 return (error);
897 }
898
899 int
900 vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
901 const char *attrname, struct lwp *l)
902 {
903 int error;
904
905 if ((ioflg & IO_NODELOCKED) == 0) {
906 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
907 }
908
909 error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL);
910 if (error == EOPNOTSUPP)
911 error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL);
912
913 if ((ioflg & IO_NODELOCKED) == 0) {
914 VOP_UNLOCK(vp);
915 }
916
917 return (error);
918 }
919
920 void
921 vn_ra_allocctx(struct vnode *vp)
922 {
923 struct uvm_ractx *ra = NULL;
924
925 KASSERT(mutex_owned(vp->v_interlock));
926
927 if (vp->v_type != VREG) {
928 return;
929 }
930 if (vp->v_ractx != NULL) {
931 return;
932 }
933 if (vp->v_ractx == NULL) {
934 mutex_exit(vp->v_interlock);
935 ra = uvm_ra_allocctx();
936 mutex_enter(vp->v_interlock);
937 if (ra != NULL && vp->v_ractx == NULL) {
938 vp->v_ractx = ra;
939 ra = NULL;
940 }
941 }
942 if (ra != NULL) {
943 uvm_ra_freectx(ra);
944 }
945 }
946
947 int
948 vn_fifo_bypass(void *v)
949 {
950 struct vop_generic_args *ap = v;
951
952 return VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, v);
953 }
954