tmpfs_vnops.c revision 1.138 1 /* $NetBSD: tmpfs_vnops.c,v 1.138 2020/05/16 18:31:49 christos Exp $ */
2
3 /*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * tmpfs vnode interface.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.138 2020/05/16 18:31:49 christos Exp $");
39
40 #include <sys/param.h>
41 #include <sys/dirent.h>
42 #include <sys/fcntl.h>
43 #include <sys/event.h>
44 #include <sys/malloc.h>
45 #include <sys/namei.h>
46 #include <sys/stat.h>
47 #include <sys/uio.h>
48 #include <sys/unistd.h>
49 #include <sys/vnode.h>
50 #include <sys/lockf.h>
51 #include <sys/kauth.h>
52 #include <sys/atomic.h>
53
54 #include <uvm/uvm.h>
55
56 #include <miscfs/fifofs/fifo.h>
57 #include <miscfs/genfs/genfs.h>
58 #include <fs/tmpfs/tmpfs_vnops.h>
59 #include <fs/tmpfs/tmpfs.h>
60
61 /*
62 * vnode operations vector used for files stored in a tmpfs file system.
63 */
64 int (**tmpfs_vnodeop_p)(void *);
65 const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
66 { &vop_default_desc, vn_default_error },
67 { &vop_lookup_desc, tmpfs_lookup },
68 { &vop_create_desc, tmpfs_create },
69 { &vop_mknod_desc, tmpfs_mknod },
70 { &vop_open_desc, tmpfs_open },
71 { &vop_close_desc, tmpfs_close },
72 { &vop_access_desc, tmpfs_access },
73 { &vop_accessx_desc, genfs_accessx },
74 { &vop_getattr_desc, tmpfs_getattr },
75 { &vop_setattr_desc, tmpfs_setattr },
76 { &vop_read_desc, tmpfs_read },
77 { &vop_write_desc, tmpfs_write },
78 { &vop_fallocate_desc, genfs_eopnotsupp },
79 { &vop_fdiscard_desc, genfs_eopnotsupp },
80 { &vop_ioctl_desc, tmpfs_ioctl },
81 { &vop_fcntl_desc, tmpfs_fcntl },
82 { &vop_poll_desc, tmpfs_poll },
83 { &vop_kqfilter_desc, tmpfs_kqfilter },
84 { &vop_revoke_desc, tmpfs_revoke },
85 { &vop_mmap_desc, tmpfs_mmap },
86 { &vop_fsync_desc, tmpfs_fsync },
87 { &vop_seek_desc, tmpfs_seek },
88 { &vop_remove_desc, tmpfs_remove },
89 { &vop_link_desc, tmpfs_link },
90 { &vop_rename_desc, tmpfs_rename },
91 { &vop_mkdir_desc, tmpfs_mkdir },
92 { &vop_rmdir_desc, tmpfs_rmdir },
93 { &vop_symlink_desc, tmpfs_symlink },
94 { &vop_readdir_desc, tmpfs_readdir },
95 { &vop_readlink_desc, tmpfs_readlink },
96 { &vop_abortop_desc, tmpfs_abortop },
97 { &vop_inactive_desc, tmpfs_inactive },
98 { &vop_reclaim_desc, tmpfs_reclaim },
99 { &vop_lock_desc, tmpfs_lock },
100 { &vop_unlock_desc, tmpfs_unlock },
101 { &vop_bmap_desc, tmpfs_bmap },
102 { &vop_strategy_desc, tmpfs_strategy },
103 { &vop_print_desc, tmpfs_print },
104 { &vop_pathconf_desc, tmpfs_pathconf },
105 { &vop_islocked_desc, tmpfs_islocked },
106 { &vop_advlock_desc, tmpfs_advlock },
107 { &vop_bwrite_desc, tmpfs_bwrite },
108 { &vop_getpages_desc, tmpfs_getpages },
109 { &vop_putpages_desc, tmpfs_putpages },
110 { &vop_whiteout_desc, tmpfs_whiteout },
111 { NULL, NULL }
112 };
113
114 const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
115 &tmpfs_vnodeop_p, tmpfs_vnodeop_entries
116 };
117
118 /*
119 * tmpfs_lookup: path name traversal routine.
120 *
121 * Arguments: dvp (directory being searched), vpp (result),
122 * cnp (component name - path).
123 *
124 * => Caller holds a reference and lock on dvp.
125 * => We return looked-up vnode (vpp) locked, with a reference held.
126 */
127 int
128 tmpfs_lookup(void *v)
129 {
130 struct vop_lookup_v2_args /* {
131 struct vnode *a_dvp;
132 struct vnode **a_vpp;
133 struct componentname *a_cnp;
134 } */ *ap = v;
135 vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
136 struct componentname *cnp = ap->a_cnp;
137 const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
138 tmpfs_node_t *dnode, *tnode;
139 tmpfs_dirent_t *de;
140 int cachefound, iswhiteout;
141 int error;
142
143 KASSERT(VOP_ISLOCKED(dvp));
144
145 dnode = VP_TO_TMPFS_DIR(dvp);
146 *vpp = NULL;
147
148 /* Check accessibility of directory. */
149 error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
150 if (error) {
151 goto out;
152 }
153
154 /*
155 * If requesting the last path component on a read-only file system
156 * with a write operation, deny it.
157 */
158 if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
159 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
160 error = EROFS;
161 goto out;
162 }
163
164 /*
165 * Avoid doing a linear scan of the directory if the requested
166 * directory/name couple is already in the cache.
167 */
168 cachefound = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
169 cnp->cn_nameiop, cnp->cn_flags,
170 &iswhiteout, vpp);
171 if (iswhiteout) {
172 cnp->cn_flags |= ISWHITEOUT;
173 }
174 if (cachefound && *vpp == NULLVP) {
175 /* Negative cache hit. */
176 error = ENOENT;
177 goto out;
178 } else if (cachefound) {
179 error = 0;
180 goto out;
181 }
182
183 /*
184 * Treat an unlinked directory as empty (no "." or "..")
185 */
186 if (dnode->tn_links == 0) {
187 KASSERT(dnode->tn_size == 0);
188 error = ENOENT;
189 goto out;
190 }
191
192 if (cnp->cn_flags & ISDOTDOT) {
193 tmpfs_node_t *pnode;
194
195 /*
196 * Lookup of ".." case.
197 */
198 if (lastcn && cnp->cn_nameiop == RENAME) {
199 error = EINVAL;
200 goto out;
201 }
202 KASSERT(dnode->tn_type == VDIR);
203 pnode = dnode->tn_spec.tn_dir.tn_parent;
204 if (pnode == NULL) {
205 error = ENOENT;
206 goto done;
207 }
208
209 error = vcache_get(dvp->v_mount, &pnode, sizeof(pnode), vpp);
210 goto done;
211 } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
212 /*
213 * Lookup of "." case.
214 */
215 if (lastcn && cnp->cn_nameiop == RENAME) {
216 error = EISDIR;
217 goto out;
218 }
219 vref(dvp);
220 *vpp = dvp;
221 error = 0;
222 goto done;
223 }
224
225 /*
226 * Other lookup cases: perform directory scan.
227 */
228 de = tmpfs_dir_lookup(dnode, cnp);
229 if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
230 /*
231 * The entry was not found in the directory. This is valid
232 * if we are creating or renaming an entry and are working
233 * on the last component of the path name.
234 */
235 if (lastcn && (cnp->cn_nameiop == CREATE ||
236 cnp->cn_nameiop == RENAME)) {
237 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
238 if (error) {
239 goto out;
240 }
241 error = EJUSTRETURN;
242 } else {
243 error = ENOENT;
244 }
245 if (de) {
246 KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
247 cnp->cn_flags |= ISWHITEOUT;
248 }
249 goto done;
250 }
251
252 tnode = de->td_node;
253
254 /*
255 * If it is not the last path component and found a non-directory
256 * or non-link entry (which may itself be pointing to a directory),
257 * raise an error.
258 */
259 if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
260 error = ENOTDIR;
261 goto out;
262 }
263
264 /* Check the permissions. */
265 if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
266 error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
267 if (error)
268 goto out;
269
270 if ((dnode->tn_mode & S_ISTXT) != 0) {
271 error = kauth_authorize_vnode(cnp->cn_cred,
272 KAUTH_VNODE_DELETE, tnode->tn_vnode,
273 dnode->tn_vnode, genfs_can_sticky(dvp, cnp->cn_cred,
274 dnode->tn_uid, tnode->tn_uid));
275 if (error) {
276 error = EPERM;
277 goto out;
278 }
279 }
280 }
281
282 /* Get a vnode for the matching entry. */
283 error = vcache_get(dvp->v_mount, &tnode, sizeof(tnode), vpp);
284 done:
285 /*
286 * Cache the result, unless request was for creation (as it does
287 * not improve the performance).
288 */
289 if (cnp->cn_nameiop != CREATE) {
290 cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
291 cnp->cn_flags);
292 }
293 out:
294 KASSERT(VOP_ISLOCKED(dvp));
295
296 return error;
297 }
298
299 int
300 tmpfs_create(void *v)
301 {
302 struct vop_create_v3_args /* {
303 struct vnode *a_dvp;
304 struct vnode **a_vpp;
305 struct componentname *a_cnp;
306 struct vattr *a_vap;
307 } */ *ap = v;
308 vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
309 struct componentname *cnp = ap->a_cnp;
310 struct vattr *vap = ap->a_vap;
311
312 KASSERT(VOP_ISLOCKED(dvp));
313 KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
314 return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
315 }
316
317 int
318 tmpfs_mknod(void *v)
319 {
320 struct vop_mknod_v3_args /* {
321 struct vnode *a_dvp;
322 struct vnode **a_vpp;
323 struct componentname *a_cnp;
324 struct vattr *a_vap;
325 } */ *ap = v;
326 vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
327 struct componentname *cnp = ap->a_cnp;
328 struct vattr *vap = ap->a_vap;
329 enum vtype vt = vap->va_type;
330
331 if (vt != VBLK && vt != VCHR && vt != VFIFO) {
332 *vpp = NULL;
333 return EINVAL;
334 }
335 return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
336 }
337
338 int
339 tmpfs_open(void *v)
340 {
341 struct vop_open_args /* {
342 struct vnode *a_vp;
343 int a_mode;
344 kauth_cred_t a_cred;
345 } */ *ap = v;
346 vnode_t *vp = ap->a_vp;
347 mode_t mode = ap->a_mode;
348 tmpfs_node_t *node;
349
350 KASSERT(VOP_ISLOCKED(vp));
351
352 node = VP_TO_TMPFS_NODE(vp);
353
354 /* If the file is marked append-only, deny write requests. */
355 if ((node->tn_flags & APPEND) != 0 &&
356 (mode & (FWRITE | O_APPEND)) == FWRITE) {
357 return EPERM;
358 }
359 return 0;
360 }
361
362 int
363 tmpfs_close(void *v)
364 {
365 struct vop_close_args /* {
366 struct vnode *a_vp;
367 int a_fflag;
368 kauth_cred_t a_cred;
369 } */ *ap = v;
370 vnode_t *vp __diagused = ap->a_vp;
371
372 KASSERT(VOP_ISLOCKED(vp));
373 return 0;
374 }
375
376 int
377 tmpfs_access(void *v)
378 {
379 struct vop_access_args /* {
380 struct vnode *a_vp;
381 accmode_t a_accmode;
382 kauth_cred_t a_cred;
383 } */ *ap = v;
384 vnode_t *vp = ap->a_vp;
385 accmode_t accmode = ap->a_accmode;
386 kauth_cred_t cred = ap->a_cred;
387 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
388 const bool writing = (accmode & VWRITE) != 0;
389
390 KASSERT(VOP_ISLOCKED(vp));
391
392 /* Possible? */
393 switch (vp->v_type) {
394 case VDIR:
395 case VLNK:
396 case VREG:
397 if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
398 return EROFS;
399 }
400 break;
401 case VBLK:
402 case VCHR:
403 case VSOCK:
404 case VFIFO:
405 break;
406 default:
407 return EINVAL;
408 }
409 if (writing && (node->tn_flags & IMMUTABLE) != 0) {
410 return EPERM;
411 }
412
413 return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(accmode,
414 vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp, cred,
415 node->tn_uid, node->tn_gid, node->tn_mode, NULL, accmode));
416 }
417
418 int
419 tmpfs_getattr(void *v)
420 {
421 struct vop_getattr_args /* {
422 struct vnode *a_vp;
423 struct vattr *a_vap;
424 kauth_cred_t a_cred;
425 } */ *ap = v;
426 vnode_t *vp = ap->a_vp;
427 struct vattr *vap = ap->a_vap;
428 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
429
430 vattr_null(vap);
431
432 vap->va_type = vp->v_type;
433 vap->va_mode = node->tn_mode;
434 vap->va_nlink = node->tn_links;
435 vap->va_uid = node->tn_uid;
436 vap->va_gid = node->tn_gid;
437 vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
438 vap->va_fileid = node->tn_id;
439 vap->va_size = node->tn_size;
440 vap->va_blocksize = PAGE_SIZE;
441 vap->va_atime = node->tn_atime;
442 vap->va_mtime = node->tn_mtime;
443 vap->va_ctime = node->tn_ctime;
444 vap->va_birthtime = node->tn_birthtime;
445 vap->va_gen = TMPFS_NODE_GEN(node);
446 vap->va_flags = node->tn_flags;
447 vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
448 node->tn_spec.tn_dev.tn_rdev : VNOVAL;
449 vap->va_bytes = round_page(node->tn_size);
450 vap->va_filerev = VNOVAL;
451 vap->va_vaflags = 0;
452 vap->va_spare = VNOVAL; /* XXX */
453
454 return 0;
455 }
456
457 int
458 tmpfs_setattr(void *v)
459 {
460 struct vop_setattr_args /* {
461 struct vnode *a_vp;
462 struct vattr *a_vap;
463 kauth_cred_t a_cred;
464 } */ *ap = v;
465 vnode_t *vp = ap->a_vp;
466 struct vattr *vap = ap->a_vap;
467 kauth_cred_t cred = ap->a_cred;
468 lwp_t *l = curlwp;
469 int error = 0;
470
471 KASSERT(VOP_ISLOCKED(vp));
472
473 /* Abort if any unsettable attribute is given. */
474 if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
475 vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
476 vap->va_blocksize != VNOVAL || vap->va_ctime.tv_sec != VNOVAL ||
477 vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
478 vap->va_bytes != VNOVAL) {
479 return EINVAL;
480 }
481
482 if (error == 0 && vap->va_flags != VNOVAL)
483 error = tmpfs_chflags(vp, vap->va_flags, cred, l);
484
485 if (error == 0 && vap->va_size != VNOVAL)
486 error = tmpfs_chsize(vp, vap->va_size, cred, l);
487
488 if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
489 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
490
491 if (error == 0 && vap->va_mode != VNOVAL)
492 error = tmpfs_chmod(vp, vap->va_mode, cred, l);
493
494 const bool chsometime =
495 vap->va_atime.tv_sec != VNOVAL ||
496 vap->va_mtime.tv_sec != VNOVAL ||
497 vap->va_birthtime.tv_sec != VNOVAL;
498 if (error == 0 && chsometime) {
499 error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
500 &vap->va_birthtime, vap->va_vaflags, cred, l);
501 }
502 return error;
503 }
504
505 int
506 tmpfs_read(void *v)
507 {
508 struct vop_read_args /* {
509 struct vnode *a_vp;
510 struct uio *a_uio;
511 int a_ioflag;
512 kauth_cred_t a_cred;
513 } */ *ap = v;
514 vnode_t *vp = ap->a_vp;
515 struct uio *uio = ap->a_uio;
516 const int ioflag = ap->a_ioflag;
517 tmpfs_node_t *node;
518 struct uvm_object *uobj;
519 int error;
520
521 KASSERT(VOP_ISLOCKED(vp));
522
523 if (vp->v_type == VDIR) {
524 return EISDIR;
525 }
526 if (uio->uio_offset < 0 || vp->v_type != VREG) {
527 return EINVAL;
528 }
529
530 /* Note: reading zero bytes should not update atime. */
531 if (uio->uio_resid == 0) {
532 return 0;
533 }
534
535 node = VP_TO_TMPFS_NODE(vp);
536 uobj = node->tn_spec.tn_reg.tn_aobj;
537 error = 0;
538
539 while (error == 0 && uio->uio_resid > 0) {
540 vsize_t len;
541
542 if (node->tn_size <= uio->uio_offset) {
543 break;
544 }
545 len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
546 if (len == 0) {
547 break;
548 }
549 error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
550 UBC_READ | UBC_PARTIALOK | UBC_VNODE_FLAGS(vp));
551 }
552
553 tmpfs_update(vp, TMPFS_UPDATE_ATIME);
554 return error;
555 }
556
557 int
558 tmpfs_write(void *v)
559 {
560 struct vop_write_args /* {
561 struct vnode *a_vp;
562 struct uio *a_uio;
563 int a_ioflag;
564 kauth_cred_t a_cred;
565 } */ *ap = v;
566 vnode_t *vp = ap->a_vp;
567 struct uio *uio = ap->a_uio;
568 const int ioflag = ap->a_ioflag;
569 tmpfs_node_t *node;
570 struct uvm_object *uobj;
571 off_t oldsize;
572 int error;
573
574 KASSERT(VOP_ISLOCKED(vp));
575
576 node = VP_TO_TMPFS_NODE(vp);
577 oldsize = node->tn_size;
578
579 if ((vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
580 error = EROFS;
581 goto out;
582 }
583
584 if (uio->uio_offset < 0 || vp->v_type != VREG) {
585 error = EINVAL;
586 goto out;
587 }
588 if (uio->uio_resid == 0) {
589 error = 0;
590 goto out;
591 }
592 if (ioflag & IO_APPEND) {
593 uio->uio_offset = node->tn_size;
594 }
595
596 if (uio->uio_offset + uio->uio_resid > node->tn_size) {
597 error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
598 if (error)
599 goto out;
600 }
601
602 uobj = node->tn_spec.tn_reg.tn_aobj;
603 error = 0;
604 while (error == 0 && uio->uio_resid > 0) {
605 vsize_t len;
606
607 len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
608 if (len == 0) {
609 break;
610 }
611 error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
612 UBC_WRITE | UBC_VNODE_FLAGS(vp));
613 }
614 if (error) {
615 (void)tmpfs_reg_resize(vp, oldsize);
616 }
617
618 tmpfs_update(vp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
619 VN_KNOTE(vp, NOTE_WRITE);
620 out:
621 if (error) {
622 KASSERT(oldsize == node->tn_size);
623 } else {
624 KASSERT(uio->uio_resid == 0);
625 }
626 return error;
627 }
628
629 int
630 tmpfs_fsync(void *v)
631 {
632 struct vop_fsync_args /* {
633 struct vnode *a_vp;
634 kauth_cred_t a_cred;
635 int a_flags;
636 off_t a_offlo;
637 off_t a_offhi;
638 struct lwp *a_l;
639 } */ *ap = v;
640 vnode_t *vp __diagused = ap->a_vp;
641
642 /* Nothing to do. Should be up to date. */
643 KASSERT(VOP_ISLOCKED(vp));
644 return 0;
645 }
646
647 /*
648 * tmpfs_remove: unlink a file.
649 *
650 * => Both directory (dvp) and file (vp) are locked.
651 * => We unlock and drop the reference on both.
652 */
653 int
654 tmpfs_remove(void *v)
655 {
656 struct vop_remove_v2_args /* {
657 struct vnode *a_dvp;
658 struct vnode *a_vp;
659 struct componentname *a_cnp;
660 } */ *ap = v;
661 vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
662 tmpfs_node_t *dnode, *node;
663 tmpfs_dirent_t *de;
664 int error;
665
666 KASSERT(VOP_ISLOCKED(dvp));
667 KASSERT(VOP_ISLOCKED(vp));
668
669 if (vp->v_type == VDIR) {
670 error = EPERM;
671 goto out;
672 }
673 dnode = VP_TO_TMPFS_DIR(dvp);
674 node = VP_TO_TMPFS_NODE(vp);
675
676 /*
677 * Files marked as immutable or append-only cannot be deleted.
678 * Likewise, files residing on directories marked as append-only
679 * cannot be deleted.
680 */
681 if (node->tn_flags & (IMMUTABLE | APPEND)) {
682 error = EPERM;
683 goto out;
684 }
685 if (dnode->tn_flags & APPEND) {
686 error = EPERM;
687 goto out;
688 }
689
690 /* Lookup the directory entry (check the cached hint first). */
691 de = tmpfs_dir_cached(node);
692 if (de == NULL) {
693 struct componentname *cnp = ap->a_cnp;
694 de = tmpfs_dir_lookup(dnode, cnp);
695 }
696 KASSERT(de && de->td_node == node);
697
698 /*
699 * Remove the entry from the directory (drops the link count) and
700 * destroy it or replace with a whiteout.
701 *
702 * Note: the inode referred by it will not be destroyed until the
703 * vnode is reclaimed/recycled.
704 */
705
706 tmpfs_dir_detach(dnode, de);
707
708 if (ap->a_cnp->cn_flags & DOWHITEOUT)
709 tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
710 else
711 tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
712
713 if (node->tn_links > 0) {
714 /* We removed a hard link. */
715 tmpfs_update(vp, TMPFS_UPDATE_CTIME);
716 }
717 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
718 error = 0;
719 out:
720 /* Drop the reference and unlock the node. */
721 if (dvp == vp) {
722 vrele(vp);
723 } else {
724 vput(vp);
725 }
726 return error;
727 }
728
729 /*
730 * tmpfs_link: create a hard link.
731 */
732 int
733 tmpfs_link(void *v)
734 {
735 struct vop_link_v2_args /* {
736 struct vnode *a_dvp;
737 struct vnode *a_vp;
738 struct componentname *a_cnp;
739 } */ *ap = v;
740 vnode_t *dvp = ap->a_dvp;
741 vnode_t *vp = ap->a_vp;
742 struct componentname *cnp = ap->a_cnp;
743 tmpfs_node_t *dnode, *node;
744 tmpfs_dirent_t *de;
745 int error;
746
747 KASSERT(dvp != vp);
748 KASSERT(VOP_ISLOCKED(dvp));
749 KASSERT(vp->v_type != VDIR);
750 KASSERT(dvp->v_mount == vp->v_mount);
751
752 dnode = VP_TO_TMPFS_DIR(dvp);
753 node = VP_TO_TMPFS_NODE(vp);
754
755 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
756
757 /* Check for maximum number of links limit. */
758 if (node->tn_links == LINK_MAX) {
759 error = EMLINK;
760 goto out;
761 }
762 KASSERT(node->tn_links < LINK_MAX);
763
764 /* We cannot create links of files marked immutable or append-only. */
765 if (node->tn_flags & (IMMUTABLE | APPEND)) {
766 error = EPERM;
767 goto out;
768 }
769
770 /* Allocate a new directory entry to represent the inode. */
771 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
772 cnp->cn_nameptr, cnp->cn_namelen, &de);
773 if (error) {
774 goto out;
775 }
776
777 /*
778 * Insert the entry into the directory.
779 * It will increase the inode link count.
780 */
781 tmpfs_dir_attach(dnode, de, node);
782 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
783
784 /* Update the timestamps and trigger the event. */
785 if (node->tn_vnode) {
786 VN_KNOTE(node->tn_vnode, NOTE_LINK);
787 }
788 tmpfs_update(vp, TMPFS_UPDATE_CTIME);
789 error = 0;
790 out:
791 VOP_UNLOCK(vp);
792 return error;
793 }
794
795 int
796 tmpfs_mkdir(void *v)
797 {
798 struct vop_mkdir_v3_args /* {
799 struct vnode *a_dvp;
800 struct vnode **a_vpp;
801 struct componentname *a_cnp;
802 struct vattr *a_vap;
803 } */ *ap = v;
804 vnode_t *dvp = ap->a_dvp;
805 vnode_t **vpp = ap->a_vpp;
806 struct componentname *cnp = ap->a_cnp;
807 struct vattr *vap = ap->a_vap;
808
809 KASSERT(vap->va_type == VDIR);
810 return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
811 }
812
813 int
814 tmpfs_rmdir(void *v)
815 {
816 struct vop_rmdir_v2_args /* {
817 struct vnode *a_dvp;
818 struct vnode *a_vp;
819 struct componentname *a_cnp;
820 } */ *ap = v;
821 vnode_t *dvp = ap->a_dvp;
822 vnode_t *vp = ap->a_vp;
823 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
824 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
825 tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
826 tmpfs_dirent_t *de;
827 int error = 0;
828
829 KASSERT(VOP_ISLOCKED(dvp));
830 KASSERT(VOP_ISLOCKED(vp));
831
832 /*
833 * Directories with more than two entries ('.' and '..') cannot be
834 * removed. There may be whiteout entries, which we will destroy.
835 */
836 if (node->tn_size > 0) {
837 /*
838 * If never had whiteout entries, the directory is certainly
839 * not empty. Otherwise, scan for any non-whiteout entry.
840 */
841 if ((node->tn_gen & TMPFS_WHITEOUT_BIT) == 0) {
842 error = ENOTEMPTY;
843 goto out;
844 }
845 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
846 if (de->td_node != TMPFS_NODE_WHITEOUT) {
847 error = ENOTEMPTY;
848 goto out;
849 }
850 }
851 KASSERT(error == 0);
852 }
853
854 KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
855
856 /* Lookup the directory entry (check the cached hint first). */
857 de = tmpfs_dir_cached(node);
858 if (de == NULL) {
859 struct componentname *cnp = ap->a_cnp;
860 de = tmpfs_dir_lookup(dnode, cnp);
861 }
862 KASSERT(de && de->td_node == node);
863
864 /* Check flags to see if we are allowed to remove the directory. */
865 if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
866 error = EPERM;
867 goto out;
868 }
869
870 /* Decrement the link count for the virtual '.' entry. */
871 node->tn_links--;
872
873 /* Detach the directory entry from the directory. */
874 tmpfs_dir_detach(dnode, de);
875
876 /* Purge the cache for parent. */
877 cache_purge(dvp);
878
879 /*
880 * Destroy the directory entry or replace it with a whiteout.
881 *
882 * Note: the inode referred by it will not be destroyed until the
883 * vnode is reclaimed.
884 */
885 if (ap->a_cnp->cn_flags & DOWHITEOUT)
886 tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
887 else
888 tmpfs_free_dirent(tmp, de);
889
890 /* Destroy the whiteout entries from the node. */
891 while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
892 KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
893 tmpfs_dir_detach(node, de);
894 tmpfs_free_dirent(tmp, de);
895 }
896 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
897
898 KASSERT(node->tn_size == 0);
899 KASSERT(node->tn_links == 0);
900 out:
901 /* Release the node. */
902 KASSERT(dvp != vp);
903 vput(vp);
904 return error;
905 }
906
907 int
908 tmpfs_symlink(void *v)
909 {
910 struct vop_symlink_v3_args /* {
911 struct vnode *a_dvp;
912 struct vnode **a_vpp;
913 struct componentname *a_cnp;
914 struct vattr *a_vap;
915 char *a_target;
916 } */ *ap = v;
917 vnode_t *dvp = ap->a_dvp;
918 vnode_t **vpp = ap->a_vpp;
919 struct componentname *cnp = ap->a_cnp;
920 struct vattr *vap = ap->a_vap;
921 char *target = ap->a_target;
922
923 KASSERT(vap->va_type == VLNK);
924 return tmpfs_construct_node(dvp, vpp, vap, cnp, target);
925 }
926
927 int
928 tmpfs_readdir(void *v)
929 {
930 struct vop_readdir_args /* {
931 struct vnode *a_vp;
932 struct uio *a_uio;
933 kauth_cred_t a_cred;
934 int *a_eofflag;
935 off_t **a_cookies;
936 int *ncookies;
937 } */ *ap = v;
938 vnode_t *vp = ap->a_vp;
939 struct uio *uio = ap->a_uio;
940 int *eofflag = ap->a_eofflag;
941 off_t **cookies = ap->a_cookies;
942 int *ncookies = ap->a_ncookies;
943 off_t startoff, cnt;
944 tmpfs_node_t *node;
945 int error;
946
947 KASSERT(VOP_ISLOCKED(vp));
948
949 /* This operation only makes sense on directory nodes. */
950 if (vp->v_type != VDIR) {
951 return ENOTDIR;
952 }
953 node = VP_TO_TMPFS_DIR(vp);
954 startoff = uio->uio_offset;
955 cnt = 0;
956
957 /*
958 * Retrieve the directory entries, unless it is being destroyed.
959 */
960 if (node->tn_links) {
961 error = tmpfs_dir_getdents(node, uio, &cnt);
962 } else {
963 error = 0;
964 }
965
966 if (eofflag != NULL) {
967 *eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
968 }
969 if (error || cookies == NULL || ncookies == NULL) {
970 return error;
971 }
972
973 /* Update NFS-related variables, if any. */
974 tmpfs_dirent_t *de = NULL;
975 off_t i, off = startoff;
976
977 *cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
978 *ncookies = cnt;
979
980 for (i = 0; i < cnt; i++) {
981 KASSERT(off != TMPFS_DIRSEQ_EOF);
982 if (off != TMPFS_DIRSEQ_DOT) {
983 if (off == TMPFS_DIRSEQ_DOTDOT) {
984 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
985 } else if (de != NULL) {
986 de = TAILQ_NEXT(de, td_entries);
987 } else {
988 de = tmpfs_dir_lookupbyseq(node, off);
989 KASSERT(de != NULL);
990 de = TAILQ_NEXT(de, td_entries);
991 }
992 if (de == NULL) {
993 off = TMPFS_DIRSEQ_EOF;
994 } else {
995 off = tmpfs_dir_getseq(node, de);
996 }
997 } else {
998 off = TMPFS_DIRSEQ_DOTDOT;
999 }
1000 (*cookies)[i] = off;
1001 }
1002 KASSERT(uio->uio_offset == off);
1003 return error;
1004 }
1005
1006 int
1007 tmpfs_readlink(void *v)
1008 {
1009 struct vop_readlink_args /* {
1010 struct vnode *a_vp;
1011 struct uio *a_uio;
1012 kauth_cred_t a_cred;
1013 } */ *ap = v;
1014 vnode_t *vp = ap->a_vp;
1015 struct uio *uio = ap->a_uio;
1016 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1017 int error;
1018
1019 KASSERT(VOP_ISLOCKED(vp));
1020 KASSERT(uio->uio_offset == 0);
1021 KASSERT(vp->v_type == VLNK);
1022
1023 /* Note: readlink(2) returns the path without NUL terminator. */
1024 if (node->tn_size > 0) {
1025 error = uiomove(node->tn_spec.tn_lnk.tn_link,
1026 MIN(node->tn_size, uio->uio_resid), uio);
1027 } else {
1028 error = 0;
1029 }
1030 tmpfs_update(vp, TMPFS_UPDATE_ATIME);
1031
1032 return error;
1033 }
1034
1035 int
1036 tmpfs_inactive(void *v)
1037 {
1038 struct vop_inactive_v2_args /* {
1039 struct vnode *a_vp;
1040 bool *a_recycle;
1041 } */ *ap = v;
1042 vnode_t *vp = ap->a_vp;
1043 tmpfs_node_t *node;
1044 int error = 0;
1045
1046 KASSERT(VOP_ISLOCKED(vp));
1047
1048 node = VP_TO_TMPFS_NODE(vp);
1049 if (node->tn_links == 0) {
1050 /*
1051 * Mark node as dead by setting its generation to zero.
1052 */
1053 atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK);
1054
1055 /*
1056 * If the file has been deleted, truncate it, otherwise VFS
1057 * will quite rightly try to write back dirty data, which in
1058 * the case of tmpfs/UAO means needless page deactivations.
1059 */
1060 if (vp->v_type == VREG) {
1061 error = tmpfs_reg_resize(vp, 0);
1062 }
1063 *ap->a_recycle = true;
1064 } else {
1065 *ap->a_recycle = false;
1066 }
1067
1068 return error;
1069 }
1070
1071 int
1072 tmpfs_reclaim(void *v)
1073 {
1074 struct vop_reclaim_v2_args /* {
1075 struct vnode *a_vp;
1076 } */ *ap = v;
1077 vnode_t *vp = ap->a_vp;
1078 tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1079 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1080
1081 /* Unlock vnode. We still have exclusive access to it. */
1082 VOP_UNLOCK(vp);
1083
1084 /* Disassociate inode from vnode. */
1085 node->tn_vnode = NULL;
1086 vp->v_data = NULL;
1087
1088 /* If inode is not referenced, i.e. no links, then destroy it. */
1089 if (node->tn_links == 0)
1090 tmpfs_free_node(tmp, node);
1091 return 0;
1092 }
1093
1094 int
1095 tmpfs_pathconf(void *v)
1096 {
1097 struct vop_pathconf_args /* {
1098 struct vnode *a_vp;
1099 int a_name;
1100 register_t *a_retval;
1101 } */ *ap = v;
1102 const int name = ap->a_name;
1103 register_t *retval = ap->a_retval;
1104 int error = 0;
1105
1106 switch (name) {
1107 case _PC_LINK_MAX:
1108 *retval = LINK_MAX;
1109 break;
1110 case _PC_NAME_MAX:
1111 *retval = TMPFS_MAXNAMLEN;
1112 break;
1113 case _PC_PATH_MAX:
1114 *retval = PATH_MAX;
1115 break;
1116 case _PC_PIPE_BUF:
1117 *retval = PIPE_BUF;
1118 break;
1119 case _PC_CHOWN_RESTRICTED:
1120 *retval = 1;
1121 break;
1122 case _PC_NO_TRUNC:
1123 *retval = 1;
1124 break;
1125 case _PC_SYNC_IO:
1126 *retval = 1;
1127 break;
1128 case _PC_FILESIZEBITS:
1129 *retval = sizeof(off_t) * CHAR_BIT;
1130 break;
1131 default:
1132 error = EINVAL;
1133 }
1134 return error;
1135 }
1136
1137 int
1138 tmpfs_advlock(void *v)
1139 {
1140 struct vop_advlock_args /* {
1141 struct vnode *a_vp;
1142 void * a_id;
1143 int a_op;
1144 struct flock *a_fl;
1145 int a_flags;
1146 } */ *ap = v;
1147 vnode_t *vp = ap->a_vp;
1148 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1149
1150 return lf_advlock(v, &node->tn_lockf, node->tn_size);
1151 }
1152
1153 int
1154 tmpfs_getpages(void *v)
1155 {
1156 struct vop_getpages_args /* {
1157 struct vnode *a_vp;
1158 voff_t a_offset;
1159 struct vm_page **a_m;
1160 int *a_count;
1161 int a_centeridx;
1162 vm_prot_t a_access_type;
1163 int a_advice;
1164 int a_flags;
1165 } */ * const ap = v;
1166 vnode_t *vp = ap->a_vp;
1167 const voff_t offset = ap->a_offset;
1168 struct vm_page **pgs = ap->a_m;
1169 const int centeridx = ap->a_centeridx;
1170 const vm_prot_t access_type = ap->a_access_type;
1171 const int advice = ap->a_advice;
1172 const int flags = ap->a_flags;
1173 int error, npages = *ap->a_count;
1174 tmpfs_node_t *node;
1175 struct uvm_object *uobj;
1176
1177 KASSERT(vp->v_type == VREG);
1178 KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1179
1180 /*
1181 * Currently, PGO_PASTEOF is not supported.
1182 */
1183 if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1184 if ((flags & PGO_LOCKED) == 0)
1185 rw_exit(vp->v_uobj.vmobjlock);
1186 return EINVAL;
1187 }
1188
1189 if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1190 npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1191 }
1192
1193 if ((flags & PGO_LOCKED) != 0)
1194 return EBUSY;
1195
1196 mutex_enter(vp->v_interlock);
1197 error = vdead_check(vp, VDEAD_NOWAIT);
1198 mutex_exit(vp->v_interlock);
1199 if (error != 0)
1200 return ENOENT;
1201
1202 node = VP_TO_TMPFS_NODE(vp);
1203 uobj = node->tn_spec.tn_reg.tn_aobj;
1204
1205 if ((flags & PGO_NOTIMESTAMP) == 0) {
1206 u_int tflags = 0;
1207
1208 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1209 tflags |= TMPFS_UPDATE_ATIME;
1210
1211 if ((access_type & VM_PROT_WRITE) != 0) {
1212 tflags |= TMPFS_UPDATE_MTIME;
1213 if (vp->v_mount->mnt_flag & MNT_RELATIME)
1214 tflags |= TMPFS_UPDATE_ATIME;
1215 }
1216 tmpfs_update(vp, tflags);
1217 }
1218
1219 /*
1220 * Invoke the pager.
1221 *
1222 * Clean the array of pages before. XXX: PR/32166
1223 * Note that vnode lock is shared with underlying UVM object.
1224 */
1225 if (pgs) {
1226 memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1227 }
1228 KASSERT(vp->v_uobj.vmobjlock == uobj->vmobjlock);
1229
1230 error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1231 access_type, advice, flags | PGO_ALLPAGES);
1232
1233 #if defined(DEBUG)
1234 if (!error && pgs) {
1235 for (int i = 0; i < npages; i++) {
1236 KASSERT(pgs[i] != NULL);
1237 }
1238 }
1239 #endif
1240 return error;
1241 }
1242
1243 int
1244 tmpfs_putpages(void *v)
1245 {
1246 struct vop_putpages_args /* {
1247 struct vnode *a_vp;
1248 voff_t a_offlo;
1249 voff_t a_offhi;
1250 int a_flags;
1251 } */ * const ap = v;
1252 vnode_t *vp = ap->a_vp;
1253 const voff_t offlo = ap->a_offlo;
1254 const voff_t offhi = ap->a_offhi;
1255 const int flags = ap->a_flags;
1256 tmpfs_node_t *node;
1257 struct uvm_object *uobj;
1258 int error;
1259
1260 KASSERT(rw_write_held(vp->v_uobj.vmobjlock));
1261
1262 if (vp->v_type != VREG) {
1263 rw_exit(vp->v_uobj.vmobjlock);
1264 return 0;
1265 }
1266
1267 node = VP_TO_TMPFS_NODE(vp);
1268 uobj = node->tn_spec.tn_reg.tn_aobj;
1269
1270 KASSERT(vp->v_uobj.vmobjlock == uobj->vmobjlock);
1271 error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1272
1273 /* XXX mtime */
1274
1275 return error;
1276 }
1277
1278 int
1279 tmpfs_whiteout(void *v)
1280 {
1281 struct vop_whiteout_args /* {
1282 struct vnode *a_dvp;
1283 struct componentname *a_cnp;
1284 int a_flags;
1285 } */ *ap = v;
1286 vnode_t *dvp = ap->a_dvp;
1287 struct componentname *cnp = ap->a_cnp;
1288 const int flags = ap->a_flags;
1289 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
1290 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
1291 tmpfs_dirent_t *de;
1292 int error;
1293
1294 switch (flags) {
1295 case LOOKUP:
1296 break;
1297 case CREATE:
1298 error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
1299 cnp->cn_namelen, &de);
1300 if (error)
1301 return error;
1302 tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
1303 break;
1304 case DELETE:
1305 cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
1306 de = tmpfs_dir_lookup(dnode, cnp);
1307 if (de == NULL)
1308 return ENOENT;
1309 tmpfs_dir_detach(dnode, de);
1310 tmpfs_free_dirent(tmp, de);
1311 break;
1312 }
1313 tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
1314 return 0;
1315 }
1316
1317 int
1318 tmpfs_print(void *v)
1319 {
1320 struct vop_print_args /* {
1321 struct vnode *a_vp;
1322 } */ *ap = v;
1323 vnode_t *vp = ap->a_vp;
1324 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1325
1326 printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1327 "\tmode 0%o, owner %d, group %d, size %" PRIdMAX,
1328 node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1329 node->tn_gid, (uintmax_t)node->tn_size);
1330 if (vp->v_type == VFIFO) {
1331 VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1332 }
1333 printf("\n");
1334 return 0;
1335 }
1336