tmpfs_vnops.c revision 1.133 1 /* $NetBSD: tmpfs_vnops.c,v 1.133 2017/05/26 14:21:01 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * tmpfs vnode interface.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.133 2017/05/26 14:21:01 riastradh Exp $");
39
40 #include <sys/param.h>
41 #include <sys/dirent.h>
42 #include <sys/fcntl.h>
43 #include <sys/event.h>
44 #include <sys/malloc.h>
45 #include <sys/namei.h>
46 #include <sys/stat.h>
47 #include <sys/uio.h>
48 #include <sys/unistd.h>
49 #include <sys/vnode.h>
50 #include <sys/lockf.h>
51 #include <sys/kauth.h>
52 #include <sys/atomic.h>
53
54 #include <uvm/uvm.h>
55
56 #include <miscfs/fifofs/fifo.h>
57 #include <miscfs/genfs/genfs.h>
58 #include <fs/tmpfs/tmpfs_vnops.h>
59 #include <fs/tmpfs/tmpfs.h>
60
61 /*
62 * vnode operations vector used for files stored in a tmpfs file system.
63 */
64 int (**tmpfs_vnodeop_p)(void *);
65 const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
66 { &vop_default_desc, vn_default_error },
67 { &vop_lookup_desc, tmpfs_lookup },
68 { &vop_create_desc, tmpfs_create },
69 { &vop_mknod_desc, tmpfs_mknod },
70 { &vop_open_desc, tmpfs_open },
71 { &vop_close_desc, tmpfs_close },
72 { &vop_access_desc, tmpfs_access },
73 { &vop_getattr_desc, tmpfs_getattr },
74 { &vop_setattr_desc, tmpfs_setattr },
75 { &vop_read_desc, tmpfs_read },
76 { &vop_write_desc, tmpfs_write },
77 { &vop_fallocate_desc, genfs_eopnotsupp },
78 { &vop_fdiscard_desc, genfs_eopnotsupp },
79 { &vop_ioctl_desc, tmpfs_ioctl },
80 { &vop_fcntl_desc, tmpfs_fcntl },
81 { &vop_poll_desc, tmpfs_poll },
82 { &vop_kqfilter_desc, tmpfs_kqfilter },
83 { &vop_revoke_desc, tmpfs_revoke },
84 { &vop_mmap_desc, tmpfs_mmap },
85 { &vop_fsync_desc, tmpfs_fsync },
86 { &vop_seek_desc, tmpfs_seek },
87 { &vop_remove_desc, tmpfs_remove },
88 { &vop_link_desc, tmpfs_link },
89 { &vop_rename_desc, tmpfs_rename },
90 { &vop_mkdir_desc, tmpfs_mkdir },
91 { &vop_rmdir_desc, tmpfs_rmdir },
92 { &vop_symlink_desc, tmpfs_symlink },
93 { &vop_readdir_desc, tmpfs_readdir },
94 { &vop_readlink_desc, tmpfs_readlink },
95 { &vop_abortop_desc, tmpfs_abortop },
96 { &vop_inactive_desc, tmpfs_inactive },
97 { &vop_reclaim_desc, tmpfs_reclaim },
98 { &vop_lock_desc, tmpfs_lock },
99 { &vop_unlock_desc, tmpfs_unlock },
100 { &vop_bmap_desc, tmpfs_bmap },
101 { &vop_strategy_desc, tmpfs_strategy },
102 { &vop_print_desc, tmpfs_print },
103 { &vop_pathconf_desc, tmpfs_pathconf },
104 { &vop_islocked_desc, tmpfs_islocked },
105 { &vop_advlock_desc, tmpfs_advlock },
106 { &vop_bwrite_desc, tmpfs_bwrite },
107 { &vop_getpages_desc, tmpfs_getpages },
108 { &vop_putpages_desc, tmpfs_putpages },
109 { &vop_whiteout_desc, tmpfs_whiteout },
110 { NULL, NULL }
111 };
112
113 const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
114 &tmpfs_vnodeop_p, tmpfs_vnodeop_entries
115 };
116
117 /*
118 * tmpfs_lookup: path name traversal routine.
119 *
120 * Arguments: dvp (directory being searched), vpp (result),
121 * cnp (component name - path).
122 *
123 * => Caller holds a reference and lock on dvp.
124 * => We return looked-up vnode (vpp) locked, with a reference held.
125 */
126 int
127 tmpfs_lookup(void *v)
128 {
129 struct vop_lookup_v2_args /* {
130 struct vnode *a_dvp;
131 struct vnode **a_vpp;
132 struct componentname *a_cnp;
133 } */ *ap = v;
134 vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
135 struct componentname *cnp = ap->a_cnp;
136 const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
137 tmpfs_node_t *dnode, *tnode;
138 tmpfs_dirent_t *de;
139 int cachefound, iswhiteout;
140 int error;
141
142 KASSERT(VOP_ISLOCKED(dvp));
143
144 dnode = VP_TO_TMPFS_DIR(dvp);
145 *vpp = NULL;
146
147 /* Check accessibility of directory. */
148 error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
149 if (error) {
150 goto out;
151 }
152
153 /*
154 * If requesting the last path component on a read-only file system
155 * with a write operation, deny it.
156 */
157 if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
158 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
159 error = EROFS;
160 goto out;
161 }
162
163 /*
164 * Avoid doing a linear scan of the directory if the requested
165 * directory/name couple is already in the cache.
166 */
167 cachefound = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
168 cnp->cn_nameiop, cnp->cn_flags,
169 &iswhiteout, vpp);
170 if (iswhiteout) {
171 cnp->cn_flags |= ISWHITEOUT;
172 }
173 if (cachefound && *vpp == NULLVP) {
174 /* Negative cache hit. */
175 error = ENOENT;
176 goto out;
177 } else if (cachefound) {
178 error = 0;
179 goto out;
180 }
181
182 /*
183 * Treat an unlinked directory as empty (no "." or "..")
184 */
185 if (dnode->tn_links == 0) {
186 KASSERT(dnode->tn_size == 0);
187 error = ENOENT;
188 goto out;
189 }
190
191 if (cnp->cn_flags & ISDOTDOT) {
192 tmpfs_node_t *pnode;
193
194 /*
195 * Lookup of ".." case.
196 */
197 if (lastcn && cnp->cn_nameiop == RENAME) {
198 error = EINVAL;
199 goto out;
200 }
201 KASSERT(dnode->tn_type == VDIR);
202 pnode = dnode->tn_spec.tn_dir.tn_parent;
203 if (pnode == NULL) {
204 error = ENOENT;
205 goto out;
206 }
207
208 error = vcache_get(dvp->v_mount, &pnode, sizeof(pnode), vpp);
209 goto out;
210 } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
211 /*
212 * Lookup of "." case.
213 */
214 if (lastcn && cnp->cn_nameiop == RENAME) {
215 error = EISDIR;
216 goto out;
217 }
218 vref(dvp);
219 *vpp = dvp;
220 error = 0;
221 goto done;
222 }
223
224 /*
225 * Other lookup cases: perform directory scan.
226 */
227 de = tmpfs_dir_lookup(dnode, cnp);
228 if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
229 /*
230 * The entry was not found in the directory. This is valid
231 * if we are creating or renaming an entry and are working
232 * on the last component of the path name.
233 */
234 if (lastcn && (cnp->cn_nameiop == CREATE ||
235 cnp->cn_nameiop == RENAME)) {
236 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
237 if (error) {
238 goto out;
239 }
240 error = EJUSTRETURN;
241 } else {
242 error = ENOENT;
243 }
244 if (de) {
245 KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
246 cnp->cn_flags |= ISWHITEOUT;
247 }
248 goto done;
249 }
250
251 tnode = de->td_node;
252
253 /*
254 * If it is not the last path component and found a non-directory
255 * or non-link entry (which may itself be pointing to a directory),
256 * raise an error.
257 */
258 if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
259 error = ENOTDIR;
260 goto out;
261 }
262
263 /* Check the permissions. */
264 if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
265 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
266 if (error)
267 goto out;
268
269 if ((dnode->tn_mode & S_ISTXT) != 0) {
270 error = kauth_authorize_vnode(cnp->cn_cred,
271 KAUTH_VNODE_DELETE, tnode->tn_vnode,
272 dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
273 dnode->tn_uid, tnode->tn_uid));
274 if (error) {
275 error = EPERM;
276 goto out;
277 }
278 }
279 }
280
281 /* Get a vnode for the matching entry. */
282 error = vcache_get(dvp->v_mount, &tnode, sizeof(tnode), vpp);
283 done:
284 /*
285 * Cache the result, unless request was for creation (as it does
286 * not improve the performance).
287 */
288 if (cnp->cn_nameiop != CREATE) {
289 cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
290 cnp->cn_flags);
291 }
292 out:
293 KASSERT(VOP_ISLOCKED(dvp));
294
295 return error;
296 }
297
298 int
299 tmpfs_create(void *v)
300 {
301 struct vop_create_v3_args /* {
302 struct vnode *a_dvp;
303 struct vnode **a_vpp;
304 struct componentname *a_cnp;
305 struct vattr *a_vap;
306 } */ *ap = v;
307 vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
308 struct componentname *cnp = ap->a_cnp;
309 struct vattr *vap = ap->a_vap;
310
311 KASSERT(VOP_ISLOCKED(dvp));
312 KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
313 return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
314 }
315
316 int
317 tmpfs_mknod(void *v)
318 {
319 struct vop_mknod_v3_args /* {
320 struct vnode *a_dvp;
321 struct vnode **a_vpp;
322 struct componentname *a_cnp;
323 struct vattr *a_vap;
324 } */ *ap = v;
325 vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
326 struct componentname *cnp = ap->a_cnp;
327 struct vattr *vap = ap->a_vap;
328 enum vtype vt = vap->va_type;
329
330 if (vt != VBLK && vt != VCHR && vt != VFIFO) {
331 *vpp = NULL;
332 return EINVAL;
333 }
334 return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
335 }
336
337 int
338 tmpfs_open(void *v)
339 {
340 struct vop_open_args /* {
341 struct vnode *a_vp;
342 int a_mode;
343 kauth_cred_t a_cred;
344 } */ *ap = v;
345 vnode_t *vp = ap->a_vp;
346 mode_t mode = ap->a_mode;
347 tmpfs_node_t *node;
348
349 KASSERT(VOP_ISLOCKED(vp));
350
351 node = VP_TO_TMPFS_NODE(vp);
352
353 /* If the file is marked append-only, deny write requests. */
354 if ((node->tn_flags & APPEND) != 0 &&
355 (mode & (FWRITE | O_APPEND)) == FWRITE) {
356 return EPERM;
357 }
358 return 0;
359 }
360
361 int
362 tmpfs_close(void *v)
363 {
364 struct vop_close_args /* {
365 struct vnode *a_vp;
366 int a_fflag;
367 kauth_cred_t a_cred;
368 } */ *ap = v;
369 vnode_t *vp __diagused = ap->a_vp;
370
371 KASSERT(VOP_ISLOCKED(vp));
372 return 0;
373 }
374
375 int
376 tmpfs_access(void *v)
377 {
378 struct vop_access_args /* {
379 struct vnode *a_vp;
380 int a_mode;
381 kauth_cred_t a_cred;
382 } */ *ap = v;
383 vnode_t *vp = ap->a_vp;
384 mode_t mode = ap->a_mode;
385 kauth_cred_t cred = ap->a_cred;
386 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
387 const bool writing = (mode & VWRITE) != 0;
388
389 KASSERT(VOP_ISLOCKED(vp));
390
391 /* Possible? */
392 switch (vp->v_type) {
393 case VDIR:
394 case VLNK:
395 case VREG:
396 if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
397 return EROFS;
398 }
399 break;
400 case VBLK:
401 case VCHR:
402 case VSOCK:
403 case VFIFO:
404 break;
405 default:
406 return EINVAL;
407 }
408 if (writing && (node->tn_flags & IMMUTABLE) != 0) {
409 return EPERM;
410 }
411
412 return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
413 vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
414 node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
415 }
416
417 int
418 tmpfs_getattr(void *v)
419 {
420 struct vop_getattr_args /* {
421 struct vnode *a_vp;
422 struct vattr *a_vap;
423 kauth_cred_t a_cred;
424 } */ *ap = v;
425 vnode_t *vp = ap->a_vp;
426 struct vattr *vap = ap->a_vap;
427 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
428
429 vattr_null(vap);
430
431 vap->va_type = vp->v_type;
432 vap->va_mode = node->tn_mode;
433 vap->va_nlink = node->tn_links;
434 vap->va_uid = node->tn_uid;
435 vap->va_gid = node->tn_gid;
436 vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
437 vap->va_fileid = node->tn_id;
438 vap->va_size = node->tn_size;
439 vap->va_blocksize = PAGE_SIZE;
440 vap->va_atime = node->tn_atime;
441 vap->va_mtime = node->tn_mtime;
442 vap->va_ctime = node->tn_ctime;
443 vap->va_birthtime = node->tn_birthtime;
444 vap->va_gen = TMPFS_NODE_GEN(node);
445 vap->va_flags = node->tn_flags;
446 vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
447 node->tn_spec.tn_dev.tn_rdev : VNOVAL;
448 vap->va_bytes = round_page(node->tn_size);
449 vap->va_filerev = VNOVAL;
450 vap->va_vaflags = 0;
451 vap->va_spare = VNOVAL; /* XXX */
452
453 return 0;
454 }
455
456 int
457 tmpfs_setattr(void *v)
458 {
459 struct vop_setattr_args /* {
460 struct vnode *a_vp;
461 struct vattr *a_vap;
462 kauth_cred_t a_cred;
463 } */ *ap = v;
464 vnode_t *vp = ap->a_vp;
465 struct vattr *vap = ap->a_vap;
466 kauth_cred_t cred = ap->a_cred;
467 lwp_t *l = curlwp;
468 int error = 0;
469
470 KASSERT(VOP_ISLOCKED(vp));
471
472 /* Abort if any unsettable attribute is given. */
473 if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
474 vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
475 vap->va_blocksize != VNOVAL || vap->va_ctime.tv_sec != VNOVAL ||
476 vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
477 vap->va_bytes != VNOVAL) {
478 return EINVAL;
479 }
480
481 if (error == 0 && vap->va_flags != VNOVAL)
482 error = tmpfs_chflags(vp, vap->va_flags, cred, l);
483
484 if (error == 0 && vap->va_size != VNOVAL)
485 error = tmpfs_chsize(vp, vap->va_size, cred, l);
486
487 if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
488 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
489
490 if (error == 0 && vap->va_mode != VNOVAL)
491 error = tmpfs_chmod(vp, vap->va_mode, cred, l);
492
493 const bool chsometime =
494 vap->va_atime.tv_sec != VNOVAL ||
495 vap->va_mtime.tv_sec != VNOVAL ||
496 vap->va_birthtime.tv_sec != VNOVAL;
497 if (error == 0 && chsometime) {
498 error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
499 &vap->va_birthtime, vap->va_vaflags, cred, l);
500 }
501 return error;
502 }
503
504 int
505 tmpfs_read(void *v)
506 {
507 struct vop_read_args /* {
508 struct vnode *a_vp;
509 struct uio *a_uio;
510 int a_ioflag;
511 kauth_cred_t a_cred;
512 } */ *ap = v;
513 vnode_t *vp = ap->a_vp;
514 struct uio *uio = ap->a_uio;
515 const int ioflag = ap->a_ioflag;
516 tmpfs_node_t *node;
517 struct uvm_object *uobj;
518 int error;
519
520 KASSERT(VOP_ISLOCKED(vp));
521
522 if (vp->v_type == VDIR) {
523 return EISDIR;
524 }
525 if (uio->uio_offset < 0 || vp->v_type != VREG) {
526 return EINVAL;
527 }
528
529 /* Note: reading zero bytes should not update atime. */
530 if (uio->uio_resid == 0) {
531 return 0;
532 }
533
534 node = VP_TO_TMPFS_NODE(vp);
535 uobj = node->tn_spec.tn_reg.tn_aobj;
536 error = 0;
537
538 while (error == 0 && uio->uio_resid > 0) {
539 vsize_t len;
540
541 if (node->tn_size <= uio->uio_offset) {
542 break;
543 }
544 len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
545 if (len == 0) {
546 break;
547 }
548 error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
549 UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
550 }
551
552 tmpfs_update(vp, TMPFS_UPDATE_ATIME);
553 return error;
554 }
555
556 int
557 tmpfs_write(void *v)
558 {
559 struct vop_write_args /* {
560 struct vnode *a_vp;
561 struct uio *a_uio;
562 int a_ioflag;
563 kauth_cred_t a_cred;
564 } */ *ap = v;
565 vnode_t *vp = ap->a_vp;
566 struct uio *uio = ap->a_uio;
567 const int ioflag = ap->a_ioflag;
568 tmpfs_node_t *node;
569 struct uvm_object *uobj;
570 off_t oldsize;
571 int error;
572
573 KASSERT(VOP_ISLOCKED(vp));
574
575 node = VP_TO_TMPFS_NODE(vp);
576 oldsize = node->tn_size;
577
578 if ((vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
579 error = EROFS;
580 goto out;
581 }
582
583 if (uio->uio_offset < 0 || vp->v_type != VREG) {
584 error = EINVAL;
585 goto out;
586 }
587 if (uio->uio_resid == 0) {
588 error = 0;
589 goto out;
590 }
591 if (ioflag & IO_APPEND) {
592 uio->uio_offset = node->tn_size;
593 }
594
595 if (uio->uio_offset + uio->uio_resid > node->tn_size) {
596 error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
597 if (error)
598 goto out;
599 }
600
601 uobj = node->tn_spec.tn_reg.tn_aobj;
602 error = 0;
603 while (error == 0 && uio->uio_resid > 0) {
604 vsize_t len;
605
606 len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
607 if (len == 0) {
608 break;
609 }
610 error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
611 UBC_WRITE | UBC_UNMAP_FLAG(vp));
612 }
613 if (error) {
614 (void)tmpfs_reg_resize(vp, oldsize);
615 }
616
617 tmpfs_update(vp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
618 VN_KNOTE(vp, NOTE_WRITE);
619 out:
620 if (error) {
621 KASSERT(oldsize == node->tn_size);
622 } else {
623 KASSERT(uio->uio_resid == 0);
624 }
625 return error;
626 }
627
628 int
629 tmpfs_fsync(void *v)
630 {
631 struct vop_fsync_args /* {
632 struct vnode *a_vp;
633 kauth_cred_t a_cred;
634 int a_flags;
635 off_t a_offlo;
636 off_t a_offhi;
637 struct lwp *a_l;
638 } */ *ap = v;
639 vnode_t *vp __diagused = ap->a_vp;
640
641 /* Nothing to do. Should be up to date. */
642 KASSERT(VOP_ISLOCKED(vp));
643 return 0;
644 }
645
646 /*
647 * tmpfs_remove: unlink a file.
648 *
649 * => Both directory (dvp) and file (vp) are locked.
650 * => We unlock and drop the reference on both.
651 */
652 int
653 tmpfs_remove(void *v)
654 {
655 struct vop_remove_v2_args /* {
656 struct vnode *a_dvp;
657 struct vnode *a_vp;
658 struct componentname *a_cnp;
659 } */ *ap = v;
660 vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
661 tmpfs_node_t *dnode, *node;
662 tmpfs_dirent_t *de;
663 int error;
664
665 KASSERT(VOP_ISLOCKED(dvp));
666 KASSERT(VOP_ISLOCKED(vp));
667
668 if (vp->v_type == VDIR) {
669 error = EPERM;
670 goto out;
671 }
672 dnode = VP_TO_TMPFS_DIR(dvp);
673 node = VP_TO_TMPFS_NODE(vp);
674
675 /*
676 * Files marked as immutable or append-only cannot be deleted.
677 * Likewise, files residing on directories marked as append-only
678 * cannot be deleted.
679 */
680 if (node->tn_flags & (IMMUTABLE | APPEND)) {
681 error = EPERM;
682 goto out;
683 }
684 if (dnode->tn_flags & APPEND) {
685 error = EPERM;
686 goto out;
687 }
688
689 /* Lookup the directory entry (check the cached hint first). */
690 de = tmpfs_dir_cached(node);
691 if (de == NULL) {
692 struct componentname *cnp = ap->a_cnp;
693 de = tmpfs_dir_lookup(dnode, cnp);
694 }
695 KASSERT(de && de->td_node == node);
696
697 /*
698 * Remove the entry from the directory (drops the link count) and
699 * destroy it or replace with a whiteout.
700 *
701 * Note: the inode referred by it will not be destroyed until the
702 * vnode is reclaimed/recycled.
703 */
704
705 tmpfs_dir_detach(dnode, de);
706
707 if (ap->a_cnp->cn_flags & DOWHITEOUT)
708 tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
709 else
710 tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
711
712 if (node->tn_links > 0) {
713 /* We removed a hard link. */
714 tmpfs_update(vp, TMPFS_UPDATE_CTIME);
715 }
716 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
717 error = 0;
718 out:
719 /* Drop the reference and unlock the node. */
720 if (dvp == vp) {
721 vrele(vp);
722 } else {
723 vput(vp);
724 }
725 return error;
726 }
727
728 /*
729 * tmpfs_link: create a hard link.
730 */
731 int
732 tmpfs_link(void *v)
733 {
734 struct vop_link_v2_args /* {
735 struct vnode *a_dvp;
736 struct vnode *a_vp;
737 struct componentname *a_cnp;
738 } */ *ap = v;
739 vnode_t *dvp = ap->a_dvp;
740 vnode_t *vp = ap->a_vp;
741 struct componentname *cnp = ap->a_cnp;
742 tmpfs_node_t *dnode, *node;
743 tmpfs_dirent_t *de;
744 int error;
745
746 KASSERT(dvp != vp);
747 KASSERT(VOP_ISLOCKED(dvp));
748 KASSERT(vp->v_type != VDIR);
749 KASSERT(dvp->v_mount == vp->v_mount);
750
751 dnode = VP_TO_TMPFS_DIR(dvp);
752 node = VP_TO_TMPFS_NODE(vp);
753
754 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
755
756 /* Check for maximum number of links limit. */
757 if (node->tn_links == LINK_MAX) {
758 error = EMLINK;
759 goto out;
760 }
761 KASSERT(node->tn_links < LINK_MAX);
762
763 /* We cannot create links of files marked immutable or append-only. */
764 if (node->tn_flags & (IMMUTABLE | APPEND)) {
765 error = EPERM;
766 goto out;
767 }
768
769 /* Allocate a new directory entry to represent the inode. */
770 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
771 cnp->cn_nameptr, cnp->cn_namelen, &de);
772 if (error) {
773 goto out;
774 }
775
776 /*
777 * Insert the entry into the directory.
778 * It will increase the inode link count.
779 */
780 tmpfs_dir_attach(dnode, de, node);
781 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
782
783 /* Update the timestamps and trigger the event. */
784 if (node->tn_vnode) {
785 VN_KNOTE(node->tn_vnode, NOTE_LINK);
786 }
787 tmpfs_update(vp, TMPFS_UPDATE_CTIME);
788 error = 0;
789 out:
790 VOP_UNLOCK(vp);
791 return error;
792 }
793
794 int
795 tmpfs_mkdir(void *v)
796 {
797 struct vop_mkdir_v3_args /* {
798 struct vnode *a_dvp;
799 struct vnode **a_vpp;
800 struct componentname *a_cnp;
801 struct vattr *a_vap;
802 } */ *ap = v;
803 vnode_t *dvp = ap->a_dvp;
804 vnode_t **vpp = ap->a_vpp;
805 struct componentname *cnp = ap->a_cnp;
806 struct vattr *vap = ap->a_vap;
807
808 KASSERT(vap->va_type == VDIR);
809 return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
810 }
811
812 int
813 tmpfs_rmdir(void *v)
814 {
815 struct vop_rmdir_v2_args /* {
816 struct vnode *a_dvp;
817 struct vnode *a_vp;
818 struct componentname *a_cnp;
819 } */ *ap = v;
820 vnode_t *dvp = ap->a_dvp;
821 vnode_t *vp = ap->a_vp;
822 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
823 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
824 tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
825 tmpfs_dirent_t *de;
826 int error = 0;
827
828 KASSERT(VOP_ISLOCKED(dvp));
829 KASSERT(VOP_ISLOCKED(vp));
830
831 /*
832 * Directories with more than two entries ('.' and '..') cannot be
833 * removed. There may be whiteout entries, which we will destroy.
834 */
835 if (node->tn_size > 0) {
836 /*
837 * If never had whiteout entries, the directory is certainly
838 * not empty. Otherwise, scan for any non-whiteout entry.
839 */
840 if ((node->tn_gen & TMPFS_WHITEOUT_BIT) == 0) {
841 error = ENOTEMPTY;
842 goto out;
843 }
844 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
845 if (de->td_node != TMPFS_NODE_WHITEOUT) {
846 error = ENOTEMPTY;
847 goto out;
848 }
849 }
850 KASSERT(error == 0);
851 }
852
853 KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
854
855 /* Lookup the directory entry (check the cached hint first). */
856 de = tmpfs_dir_cached(node);
857 if (de == NULL) {
858 struct componentname *cnp = ap->a_cnp;
859 de = tmpfs_dir_lookup(dnode, cnp);
860 }
861 KASSERT(de && de->td_node == node);
862
863 /* Check flags to see if we are allowed to remove the directory. */
864 if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
865 error = EPERM;
866 goto out;
867 }
868
869 /* Decrement the link count for the virtual '.' entry. */
870 node->tn_links--;
871
872 /* Detach the directory entry from the directory. */
873 tmpfs_dir_detach(dnode, de);
874
875 /* Purge the cache for parent. */
876 cache_purge(dvp);
877
878 /*
879 * Destroy the directory entry or replace it with a whiteout.
880 *
881 * Note: the inode referred by it will not be destroyed until the
882 * vnode is reclaimed.
883 */
884 if (ap->a_cnp->cn_flags & DOWHITEOUT)
885 tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
886 else
887 tmpfs_free_dirent(tmp, de);
888
889 /* Destroy the whiteout entries from the node. */
890 while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
891 KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
892 tmpfs_dir_detach(node, de);
893 tmpfs_free_dirent(tmp, de);
894 }
895 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
896
897 KASSERT(node->tn_size == 0);
898 KASSERT(node->tn_links == 0);
899 out:
900 /* Release the node. */
901 KASSERT(dvp != vp);
902 vput(vp);
903 return error;
904 }
905
906 int
907 tmpfs_symlink(void *v)
908 {
909 struct vop_symlink_v3_args /* {
910 struct vnode *a_dvp;
911 struct vnode **a_vpp;
912 struct componentname *a_cnp;
913 struct vattr *a_vap;
914 char *a_target;
915 } */ *ap = v;
916 vnode_t *dvp = ap->a_dvp;
917 vnode_t **vpp = ap->a_vpp;
918 struct componentname *cnp = ap->a_cnp;
919 struct vattr *vap = ap->a_vap;
920 char *target = ap->a_target;
921
922 KASSERT(vap->va_type == VLNK);
923 return tmpfs_construct_node(dvp, vpp, vap, cnp, target);
924 }
925
926 int
927 tmpfs_readdir(void *v)
928 {
929 struct vop_readdir_args /* {
930 struct vnode *a_vp;
931 struct uio *a_uio;
932 kauth_cred_t a_cred;
933 int *a_eofflag;
934 off_t **a_cookies;
935 int *ncookies;
936 } */ *ap = v;
937 vnode_t *vp = ap->a_vp;
938 struct uio *uio = ap->a_uio;
939 int *eofflag = ap->a_eofflag;
940 off_t **cookies = ap->a_cookies;
941 int *ncookies = ap->a_ncookies;
942 off_t startoff, cnt;
943 tmpfs_node_t *node;
944 int error;
945
946 KASSERT(VOP_ISLOCKED(vp));
947
948 /* This operation only makes sense on directory nodes. */
949 if (vp->v_type != VDIR) {
950 return ENOTDIR;
951 }
952 node = VP_TO_TMPFS_DIR(vp);
953 startoff = uio->uio_offset;
954 cnt = 0;
955
956 /*
957 * Retrieve the directory entries, unless it is being destroyed.
958 */
959 if (node->tn_links) {
960 error = tmpfs_dir_getdents(node, uio, &cnt);
961 } else {
962 error = 0;
963 }
964
965 if (eofflag != NULL) {
966 *eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
967 }
968 if (error || cookies == NULL || ncookies == NULL) {
969 return error;
970 }
971
972 /* Update NFS-related variables, if any. */
973 tmpfs_dirent_t *de = NULL;
974 off_t i, off = startoff;
975
976 *cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
977 *ncookies = cnt;
978
979 for (i = 0; i < cnt; i++) {
980 KASSERT(off != TMPFS_DIRSEQ_EOF);
981 if (off != TMPFS_DIRSEQ_DOT) {
982 if (off == TMPFS_DIRSEQ_DOTDOT) {
983 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
984 } else if (de != NULL) {
985 de = TAILQ_NEXT(de, td_entries);
986 } else {
987 de = tmpfs_dir_lookupbyseq(node, off);
988 KASSERT(de != NULL);
989 de = TAILQ_NEXT(de, td_entries);
990 }
991 if (de == NULL) {
992 off = TMPFS_DIRSEQ_EOF;
993 } else {
994 off = tmpfs_dir_getseq(node, de);
995 }
996 } else {
997 off = TMPFS_DIRSEQ_DOTDOT;
998 }
999 (*cookies)[i] = off;
1000 }
1001 KASSERT(uio->uio_offset == off);
1002 return error;
1003 }
1004
1005 int
1006 tmpfs_readlink(void *v)
1007 {
1008 struct vop_readlink_args /* {
1009 struct vnode *a_vp;
1010 struct uio *a_uio;
1011 kauth_cred_t a_cred;
1012 } */ *ap = v;
1013 vnode_t *vp = ap->a_vp;
1014 struct uio *uio = ap->a_uio;
1015 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1016 int error;
1017
1018 KASSERT(VOP_ISLOCKED(vp));
1019 KASSERT(uio->uio_offset == 0);
1020 KASSERT(vp->v_type == VLNK);
1021
1022 /* Note: readlink(2) returns the path without NUL terminator. */
1023 if (node->tn_size > 0) {
1024 error = uiomove(node->tn_spec.tn_lnk.tn_link,
1025 MIN(node->tn_size, uio->uio_resid), uio);
1026 } else {
1027 error = 0;
1028 }
1029 tmpfs_update(vp, TMPFS_UPDATE_ATIME);
1030
1031 return error;
1032 }
1033
1034 int
1035 tmpfs_inactive(void *v)
1036 {
1037 struct vop_inactive_v2_args /* {
1038 struct vnode *a_vp;
1039 bool *a_recycle;
1040 } */ *ap = v;
1041 vnode_t *vp = ap->a_vp;
1042 tmpfs_node_t *node;
1043
1044 KASSERT(VOP_ISLOCKED(vp));
1045
1046 node = VP_TO_TMPFS_NODE(vp);
1047 if (node->tn_links == 0) {
1048 /*
1049 * Mark node as dead by setting its generation to zero.
1050 */
1051 atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK);
1052 *ap->a_recycle = true;
1053 } else {
1054 *ap->a_recycle = false;
1055 }
1056
1057 return 0;
1058 }
1059
1060 int
1061 tmpfs_reclaim(void *v)
1062 {
1063 struct vop_reclaim_v2_args /* {
1064 struct vnode *a_vp;
1065 } */ *ap = v;
1066 vnode_t *vp = ap->a_vp;
1067 tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1068 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1069
1070 /* Unlock vnode. We still have exclusive access to it. */
1071 VOP_UNLOCK(vp);
1072
1073 /* Disassociate inode from vnode. */
1074 node->tn_vnode = NULL;
1075 vp->v_data = NULL;
1076
1077 /* If inode is not referenced, i.e. no links, then destroy it. */
1078 if (node->tn_links == 0)
1079 tmpfs_free_node(tmp, node);
1080 return 0;
1081 }
1082
1083 int
1084 tmpfs_pathconf(void *v)
1085 {
1086 struct vop_pathconf_args /* {
1087 struct vnode *a_vp;
1088 int a_name;
1089 register_t *a_retval;
1090 } */ *ap = v;
1091 const int name = ap->a_name;
1092 register_t *retval = ap->a_retval;
1093 int error = 0;
1094
1095 switch (name) {
1096 case _PC_LINK_MAX:
1097 *retval = LINK_MAX;
1098 break;
1099 case _PC_NAME_MAX:
1100 *retval = TMPFS_MAXNAMLEN;
1101 break;
1102 case _PC_PATH_MAX:
1103 *retval = PATH_MAX;
1104 break;
1105 case _PC_PIPE_BUF:
1106 *retval = PIPE_BUF;
1107 break;
1108 case _PC_CHOWN_RESTRICTED:
1109 *retval = 1;
1110 break;
1111 case _PC_NO_TRUNC:
1112 *retval = 1;
1113 break;
1114 case _PC_SYNC_IO:
1115 *retval = 1;
1116 break;
1117 case _PC_FILESIZEBITS:
1118 *retval = sizeof(off_t) * CHAR_BIT;
1119 break;
1120 default:
1121 error = EINVAL;
1122 }
1123 return error;
1124 }
1125
1126 int
1127 tmpfs_advlock(void *v)
1128 {
1129 struct vop_advlock_args /* {
1130 struct vnode *a_vp;
1131 void * a_id;
1132 int a_op;
1133 struct flock *a_fl;
1134 int a_flags;
1135 } */ *ap = v;
1136 vnode_t *vp = ap->a_vp;
1137 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1138
1139 return lf_advlock(v, &node->tn_lockf, node->tn_size);
1140 }
1141
1142 int
1143 tmpfs_getpages(void *v)
1144 {
1145 struct vop_getpages_args /* {
1146 struct vnode *a_vp;
1147 voff_t a_offset;
1148 struct vm_page **a_m;
1149 int *a_count;
1150 int a_centeridx;
1151 vm_prot_t a_access_type;
1152 int a_advice;
1153 int a_flags;
1154 } */ * const ap = v;
1155 vnode_t *vp = ap->a_vp;
1156 const voff_t offset = ap->a_offset;
1157 struct vm_page **pgs = ap->a_m;
1158 const int centeridx = ap->a_centeridx;
1159 const vm_prot_t access_type = ap->a_access_type;
1160 const int advice = ap->a_advice;
1161 const int flags = ap->a_flags;
1162 int error, npages = *ap->a_count;
1163 tmpfs_node_t *node;
1164 struct uvm_object *uobj;
1165
1166 KASSERT(vp->v_type == VREG);
1167 KASSERT(mutex_owned(vp->v_interlock));
1168
1169 /*
1170 * Currently, PGO_PASTEOF is not supported.
1171 */
1172 if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1173 if ((flags & PGO_LOCKED) == 0)
1174 mutex_exit(vp->v_interlock);
1175 return EINVAL;
1176 }
1177
1178 if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1179 npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1180 }
1181
1182 if ((flags & PGO_LOCKED) != 0)
1183 return EBUSY;
1184
1185 if (vdead_check(vp, VDEAD_NOWAIT) != 0)
1186 return ENOENT;
1187
1188 node = VP_TO_TMPFS_NODE(vp);
1189 uobj = node->tn_spec.tn_reg.tn_aobj;
1190
1191 if ((flags & PGO_NOTIMESTAMP) == 0) {
1192 u_int tflags = 0;
1193
1194 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1195 tflags |= TMPFS_UPDATE_ATIME;
1196
1197 if ((access_type & VM_PROT_WRITE) != 0) {
1198 tflags |= TMPFS_UPDATE_MTIME;
1199 if (vp->v_mount->mnt_flag & MNT_RELATIME)
1200 tflags |= TMPFS_UPDATE_ATIME;
1201 }
1202 tmpfs_update(vp, tflags);
1203 }
1204
1205 /*
1206 * Invoke the pager.
1207 *
1208 * Clean the array of pages before. XXX: PR/32166
1209 * Note that vnode lock is shared with underlying UVM object.
1210 */
1211 if (pgs) {
1212 memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1213 }
1214 KASSERT(vp->v_interlock == uobj->vmobjlock);
1215
1216 error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1217 access_type, advice, flags | PGO_ALLPAGES);
1218
1219 #if defined(DEBUG)
1220 if (!error && pgs) {
1221 for (int i = 0; i < npages; i++) {
1222 KASSERT(pgs[i] != NULL);
1223 }
1224 }
1225 #endif
1226 return error;
1227 }
1228
1229 int
1230 tmpfs_putpages(void *v)
1231 {
1232 struct vop_putpages_args /* {
1233 struct vnode *a_vp;
1234 voff_t a_offlo;
1235 voff_t a_offhi;
1236 int a_flags;
1237 } */ * const ap = v;
1238 vnode_t *vp = ap->a_vp;
1239 const voff_t offlo = ap->a_offlo;
1240 const voff_t offhi = ap->a_offhi;
1241 const int flags = ap->a_flags;
1242 tmpfs_node_t *node;
1243 struct uvm_object *uobj;
1244 int error;
1245
1246 KASSERT(mutex_owned(vp->v_interlock));
1247
1248 if (vp->v_type != VREG) {
1249 mutex_exit(vp->v_interlock);
1250 return 0;
1251 }
1252
1253 node = VP_TO_TMPFS_NODE(vp);
1254 uobj = node->tn_spec.tn_reg.tn_aobj;
1255
1256 KASSERT(vp->v_interlock == uobj->vmobjlock);
1257 error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1258
1259 /* XXX mtime */
1260
1261 return error;
1262 }
1263
1264 int
1265 tmpfs_whiteout(void *v)
1266 {
1267 struct vop_whiteout_args /* {
1268 struct vnode *a_dvp;
1269 struct componentname *a_cnp;
1270 int a_flags;
1271 } */ *ap = v;
1272 vnode_t *dvp = ap->a_dvp;
1273 struct componentname *cnp = ap->a_cnp;
1274 const int flags = ap->a_flags;
1275 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
1276 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
1277 tmpfs_dirent_t *de;
1278 int error;
1279
1280 switch (flags) {
1281 case LOOKUP:
1282 break;
1283 case CREATE:
1284 error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
1285 cnp->cn_namelen, &de);
1286 if (error)
1287 return error;
1288 tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
1289 break;
1290 case DELETE:
1291 cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
1292 de = tmpfs_dir_lookup(dnode, cnp);
1293 if (de == NULL)
1294 return ENOENT;
1295 tmpfs_dir_detach(dnode, de);
1296 tmpfs_free_dirent(tmp, de);
1297 break;
1298 }
1299 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
1300 return 0;
1301 }
1302
1303 int
1304 tmpfs_print(void *v)
1305 {
1306 struct vop_print_args /* {
1307 struct vnode *a_vp;
1308 } */ *ap = v;
1309 vnode_t *vp = ap->a_vp;
1310 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1311
1312 printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1313 "\tmode 0%o, owner %d, group %d, size %" PRIdMAX,
1314 node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1315 node->tn_gid, (uintmax_t)node->tn_size);
1316 if (vp->v_type == VFIFO) {
1317 VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1318 }
1319 printf("\n");
1320 return 0;
1321 }
1322