Home | History | Annotate | Line # | Download | only in tmpfs
tmpfs_vnops.c revision 1.96
      1 /*	$NetBSD: tmpfs_vnops.c,v 1.96 2012/03/13 18:40:50 elad Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
      9  * 2005 program.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * tmpfs vnode interface.
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.96 2012/03/13 18:40:50 elad Exp $");
     39 
     40 #include <sys/param.h>
     41 #include <sys/dirent.h>
     42 #include <sys/fcntl.h>
     43 #include <sys/event.h>
     44 #include <sys/malloc.h>
     45 #include <sys/namei.h>
     46 #include <sys/stat.h>
     47 #include <sys/uio.h>
     48 #include <sys/unistd.h>
     49 #include <sys/vnode.h>
     50 #include <sys/lockf.h>
     51 #include <sys/kauth.h>
     52 
     53 #include <uvm/uvm.h>
     54 
     55 #include <miscfs/fifofs/fifo.h>
     56 #include <miscfs/genfs/genfs.h>
     57 #include <fs/tmpfs/tmpfs_vnops.h>
     58 #include <fs/tmpfs/tmpfs.h>
     59 
     60 /*
     61  * vnode operations vector used for files stored in a tmpfs file system.
     62  */
     63 int (**tmpfs_vnodeop_p)(void *);
     64 const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
     65 	{ &vop_default_desc,		vn_default_error },
     66 	{ &vop_lookup_desc,		tmpfs_lookup },
     67 	{ &vop_create_desc,		tmpfs_create },
     68 	{ &vop_mknod_desc,		tmpfs_mknod },
     69 	{ &vop_open_desc,		tmpfs_open },
     70 	{ &vop_close_desc,		tmpfs_close },
     71 	{ &vop_access_desc,		tmpfs_access },
     72 	{ &vop_getattr_desc,		tmpfs_getattr },
     73 	{ &vop_setattr_desc,		tmpfs_setattr },
     74 	{ &vop_read_desc,		tmpfs_read },
     75 	{ &vop_write_desc,		tmpfs_write },
     76 	{ &vop_ioctl_desc,		tmpfs_ioctl },
     77 	{ &vop_fcntl_desc,		tmpfs_fcntl },
     78 	{ &vop_poll_desc,		tmpfs_poll },
     79 	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
     80 	{ &vop_revoke_desc,		tmpfs_revoke },
     81 	{ &vop_mmap_desc,		tmpfs_mmap },
     82 	{ &vop_fsync_desc,		tmpfs_fsync },
     83 	{ &vop_seek_desc,		tmpfs_seek },
     84 	{ &vop_remove_desc,		tmpfs_remove },
     85 	{ &vop_link_desc,		tmpfs_link },
     86 	{ &vop_rename_desc,		tmpfs_rename },
     87 	{ &vop_mkdir_desc,		tmpfs_mkdir },
     88 	{ &vop_rmdir_desc,		tmpfs_rmdir },
     89 	{ &vop_symlink_desc,		tmpfs_symlink },
     90 	{ &vop_readdir_desc,		tmpfs_readdir },
     91 	{ &vop_readlink_desc,		tmpfs_readlink },
     92 	{ &vop_abortop_desc,		tmpfs_abortop },
     93 	{ &vop_inactive_desc,		tmpfs_inactive },
     94 	{ &vop_reclaim_desc,		tmpfs_reclaim },
     95 	{ &vop_lock_desc,		tmpfs_lock },
     96 	{ &vop_unlock_desc,		tmpfs_unlock },
     97 	{ &vop_bmap_desc,		tmpfs_bmap },
     98 	{ &vop_strategy_desc,		tmpfs_strategy },
     99 	{ &vop_print_desc,		tmpfs_print },
    100 	{ &vop_pathconf_desc,		tmpfs_pathconf },
    101 	{ &vop_islocked_desc,		tmpfs_islocked },
    102 	{ &vop_advlock_desc,		tmpfs_advlock },
    103 	{ &vop_bwrite_desc,		tmpfs_bwrite },
    104 	{ &vop_getpages_desc,		tmpfs_getpages },
    105 	{ &vop_putpages_desc,		tmpfs_putpages },
    106 	{ &vop_whiteout_desc,		tmpfs_whiteout },
    107 	{ NULL, NULL }
    108 };
    109 
    110 const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
    111 	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
    112 };
    113 
    114 /*
    115  * tmpfs_lookup: path name traversal routine.
    116  *
    117  * Arguments: dvp (directory being searched), vpp (result),
    118  * cnp (component name - path).
    119  *
    120  * => Caller holds a reference and lock on dvp.
    121  * => We return looked-up vnode (vpp) locked, with a reference held.
    122  */
    123 int
    124 tmpfs_lookup(void *v)
    125 {
    126 	struct vop_lookup_args /* {
    127 		struct vnode *a_dvp;
    128 		struct vnode **a_vpp;
    129 		struct componentname *a_cnp;
    130 	} */ *ap = v;
    131 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
    132 	struct componentname *cnp = ap->a_cnp;
    133 	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
    134 	tmpfs_node_t *dnode, *tnode;
    135 	tmpfs_dirent_t *de;
    136 	int error;
    137 
    138 	KASSERT(VOP_ISLOCKED(dvp));
    139 
    140 	dnode = VP_TO_TMPFS_DIR(dvp);
    141 	*vpp = NULL;
    142 
    143 	/* Check accessibility of directory. */
    144 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
    145 	if (error) {
    146 		goto out;
    147 	}
    148 
    149 	/*
    150 	 * If requesting the last path component on a read-only file system
    151 	 * with a write operation, deny it.
    152 	 */
    153 	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
    154 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
    155 		error = EROFS;
    156 		goto out;
    157 	}
    158 
    159 	/*
    160 	 * Avoid doing a linear scan of the directory if the requested
    161 	 * directory/name couple is already in the cache.
    162 	 */
    163 	error = cache_lookup(dvp, vpp, cnp);
    164 	if (error >= 0) {
    165 		/* Both cache-hit or an error case. */
    166 		goto out;
    167 	}
    168 
    169 	if (cnp->cn_flags & ISDOTDOT) {
    170 		tmpfs_node_t *pnode;
    171 
    172 		/*
    173 		 * Lookup of ".." case.
    174 		 */
    175 		if (lastcn && cnp->cn_nameiop == RENAME) {
    176 			error = EINVAL;
    177 			goto out;
    178 		}
    179 		KASSERT(dnode->tn_type == VDIR);
    180 		pnode = dnode->tn_spec.tn_dir.tn_parent;
    181 		if (pnode == NULL) {
    182 			error = ENOENT;
    183 			goto out;
    184 		}
    185 
    186 		/*
    187 		 * Lock the parent tn_vlock before releasing the vnode lock,
    188 		 * and thus prevents parent from disappearing.
    189 		 */
    190 		mutex_enter(&pnode->tn_vlock);
    191 		VOP_UNLOCK(dvp);
    192 
    193 		/*
    194 		 * Get a vnode of the '..' entry and re-acquire the lock.
    195 		 * Release the tn_vlock.
    196 		 */
    197 		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
    198 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
    199 		goto out;
    200 
    201 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
    202 		/*
    203 		 * Lookup of "." case.
    204 		 */
    205 		if (lastcn && cnp->cn_nameiop == RENAME) {
    206 			error = EISDIR;
    207 			goto out;
    208 		}
    209 		vref(dvp);
    210 		*vpp = dvp;
    211 		error = 0;
    212 		goto done;
    213 	}
    214 
    215 	/*
    216 	 * Other lookup cases: perform directory scan.
    217 	 */
    218 	de = tmpfs_dir_lookup(dnode, cnp);
    219 	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
    220 		/*
    221 		 * The entry was not found in the directory.  This is valid
    222 		 * if we are creating or renaming an entry and are working
    223 		 * on the last component of the path name.
    224 		 */
    225 		if (lastcn && (cnp->cn_nameiop == CREATE ||
    226 		    cnp->cn_nameiop == RENAME)) {
    227 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
    228 			if (error) {
    229 				goto out;
    230 			}
    231 			error = EJUSTRETURN;
    232 		} else {
    233 			error = ENOENT;
    234 		}
    235 		if (de) {
    236 			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
    237 			cnp->cn_flags |= ISWHITEOUT;
    238 		}
    239 		goto done;
    240 	}
    241 
    242 	tnode = de->td_node;
    243 
    244 	/*
    245 	 * If it is not the last path component and found a non-directory
    246 	 * or non-link entry (which may itself be pointing to a directory),
    247 	 * raise an error.
    248 	 */
    249 	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
    250 		error = ENOTDIR;
    251 		goto out;
    252 	}
    253 
    254 	/* Check the permissions. */
    255 	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
    256 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
    257 		if (error)
    258 			goto out;
    259 
    260 		if ((dnode->tn_mode & S_ISTXT) != 0) {
    261 			error = kauth_authorize_vnode(cnp->cn_cred,
    262 			    KAUTH_VNODE_DELETE, tnode->tn_vnode,
    263 			    dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
    264 			    dnode->tn_uid, tnode->tn_uid));
    265 			if (error) {
    266 				error = EPERM;
    267 				goto out;
    268 			}
    269 		}
    270 	}
    271 
    272 	/* Get a vnode for the matching entry. */
    273 	mutex_enter(&tnode->tn_vlock);
    274 	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
    275 done:
    276 	/*
    277 	 * Cache the result, unless request was for creation (as it does
    278 	 * not improve the performance).
    279 	 */
    280 	if ((cnp->cn_flags & MAKEENTRY) != 0 && cnp->cn_nameiop != CREATE) {
    281 		cache_enter(dvp, *vpp, cnp);
    282 	}
    283 out:
    284 	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
    285 	KASSERT(VOP_ISLOCKED(dvp));
    286 
    287 	return error;
    288 }
    289 
    290 int
    291 tmpfs_create(void *v)
    292 {
    293 	struct vop_create_args /* {
    294 		struct vnode		*a_dvp;
    295 		struct vnode		**a_vpp;
    296 		struct componentname	*a_cnp;
    297 		struct vattr		*a_vap;
    298 	} */ *ap = v;
    299 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
    300 	struct componentname *cnp = ap->a_cnp;
    301 	struct vattr *vap = ap->a_vap;
    302 
    303 	KASSERT(VOP_ISLOCKED(dvp));
    304 	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
    305 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
    306 }
    307 
    308 int
    309 tmpfs_mknod(void *v)
    310 {
    311 	struct vop_mknod_args /* {
    312 		struct vnode		*a_dvp;
    313 		struct vnode		**a_vpp;
    314 		struct componentname	*a_cnp;
    315 		struct vattr		*a_vap;
    316 	} */ *ap = v;
    317 	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
    318 	struct componentname *cnp = ap->a_cnp;
    319 	struct vattr *vap = ap->a_vap;
    320 	enum vtype vt = vap->va_type;
    321 
    322 	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
    323 		vput(dvp);
    324 		return EINVAL;
    325 	}
    326 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
    327 }
    328 
    329 int
    330 tmpfs_open(void *v)
    331 {
    332 	struct vop_open_args /* {
    333 		struct vnode	*a_vp;
    334 		int		a_mode;
    335 		kauth_cred_t	a_cred;
    336 	} */ *ap = v;
    337 	vnode_t *vp = ap->a_vp;
    338 	mode_t mode = ap->a_mode;
    339 	tmpfs_node_t *node;
    340 
    341 	KASSERT(VOP_ISLOCKED(vp));
    342 
    343 	node = VP_TO_TMPFS_NODE(vp);
    344 	if (node->tn_links < 1) {
    345 		/*
    346 		 * The file is still active, but all its names have been
    347 		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
    348 		 * any more, as it is about to be destroyed.
    349 		 */
    350 		return ENOENT;
    351 	}
    352 
    353 	/* If the file is marked append-only, deny write requests. */
    354 	if ((node->tn_flags & APPEND) != 0 &&
    355 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
    356 		return EPERM;
    357 	}
    358 	return 0;
    359 }
    360 
    361 int
    362 tmpfs_close(void *v)
    363 {
    364 	struct vop_close_args /* {
    365 		struct vnode	*a_vp;
    366 		int		a_fflag;
    367 		kauth_cred_t	a_cred;
    368 	} */ *ap = v;
    369 	vnode_t *vp = ap->a_vp;
    370 
    371 	KASSERT(VOP_ISLOCKED(vp));
    372 
    373 	tmpfs_update(vp, NULL, NULL, NULL, UPDATE_CLOSE);
    374 	return 0;
    375 }
    376 
    377 int
    378 tmpfs_access(void *v)
    379 {
    380 	struct vop_access_args /* {
    381 		struct vnode	*a_vp;
    382 		int		a_mode;
    383 		kauth_cred_t	a_cred;
    384 	} */ *ap = v;
    385 	vnode_t *vp = ap->a_vp;
    386 	mode_t mode = ap->a_mode;
    387 	kauth_cred_t cred = ap->a_cred;
    388 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
    389 	const bool writing = (mode & VWRITE) != 0;
    390 
    391 	KASSERT(VOP_ISLOCKED(vp));
    392 
    393 	/* Possible? */
    394 	switch (vp->v_type) {
    395 	case VDIR:
    396 	case VLNK:
    397 	case VREG:
    398 		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
    399 			return EROFS;
    400 		}
    401 		break;
    402 	case VBLK:
    403 	case VCHR:
    404 	case VSOCK:
    405 	case VFIFO:
    406 		break;
    407 	default:
    408 		return EINVAL;
    409 	}
    410 	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
    411 		return EPERM;
    412 	}
    413 
    414 	return kauth_authorize_vnode(cred, kauth_access_action(mode,
    415 	    vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
    416 	    node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
    417 }
    418 
    419 int
    420 tmpfs_getattr(void *v)
    421 {
    422 	struct vop_getattr_args /* {
    423 		struct vnode	*a_vp;
    424 		struct vattr	*a_vap;
    425 		kauth_cred_t	a_cred;
    426 	} */ *ap = v;
    427 	vnode_t *vp = ap->a_vp;
    428 	struct vattr *vap = ap->a_vap;
    429 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
    430 
    431 	vattr_null(vap);
    432 
    433 	tmpfs_update(vp, NULL, NULL, NULL, 0);
    434 
    435 	vap->va_type = vp->v_type;
    436 	vap->va_mode = node->tn_mode;
    437 	vap->va_nlink = node->tn_links;
    438 	vap->va_uid = node->tn_uid;
    439 	vap->va_gid = node->tn_gid;
    440 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
    441 	vap->va_fileid = node->tn_id;
    442 	vap->va_size = node->tn_size;
    443 	vap->va_blocksize = PAGE_SIZE;
    444 	vap->va_atime = node->tn_atime;
    445 	vap->va_mtime = node->tn_mtime;
    446 	vap->va_ctime = node->tn_ctime;
    447 	vap->va_birthtime = node->tn_birthtime;
    448 	vap->va_gen = TMPFS_NODE_GEN(node);
    449 	vap->va_flags = node->tn_flags;
    450 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
    451 	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
    452 	vap->va_bytes = round_page(node->tn_size);
    453 	vap->va_filerev = VNOVAL;
    454 	vap->va_vaflags = 0;
    455 	vap->va_spare = VNOVAL; /* XXX */
    456 
    457 	return 0;
    458 }
    459 
    460 #define GOODTIME(tv)	((tv)->tv_sec != VNOVAL || (tv)->tv_nsec != VNOVAL)
    461 /* XXX Should this operation be atomic?  I think it should, but code in
    462  * XXX other places (e.g., ufs) doesn't seem to be... */
    463 int
    464 tmpfs_setattr(void *v)
    465 {
    466 	struct vop_setattr_args /* {
    467 		struct vnode	*a_vp;
    468 		struct vattr	*a_vap;
    469 		kauth_cred_t	a_cred;
    470 	} */ *ap = v;
    471 	vnode_t *vp = ap->a_vp;
    472 	struct vattr *vap = ap->a_vap;
    473 	kauth_cred_t cred = ap->a_cred;
    474 	lwp_t *l = curlwp;
    475 	int error = 0;
    476 
    477 	KASSERT(VOP_ISLOCKED(vp));
    478 
    479 	/* Abort if any unsettable attribute is given. */
    480 	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
    481 	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
    482 	    vap->va_blocksize != VNOVAL || GOODTIME(&vap->va_ctime) ||
    483 	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
    484 	    vap->va_bytes != VNOVAL) {
    485 		return EINVAL;
    486 	}
    487 	if (error == 0 && (vap->va_flags != VNOVAL))
    488 		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
    489 
    490 	if (error == 0 && (vap->va_size != VNOVAL))
    491 		error = tmpfs_chsize(vp, vap->va_size, cred, l);
    492 
    493 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
    494 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
    495 
    496 	if (error == 0 && (vap->va_mode != VNOVAL))
    497 		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
    498 
    499 	if (error == 0 && (GOODTIME(&vap->va_atime) || GOODTIME(&vap->va_mtime)
    500 	    || GOODTIME(&vap->va_birthtime))) {
    501 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
    502 		    &vap->va_birthtime, vap->va_vaflags, cred, l);
    503 		if (error == 0)
    504 			return 0;
    505 	}
    506 	tmpfs_update(vp, NULL, NULL, NULL, 0);
    507 	return error;
    508 }
    509 
    510 int
    511 tmpfs_read(void *v)
    512 {
    513 	struct vop_read_args /* {
    514 		struct vnode *a_vp;
    515 		struct uio *a_uio;
    516 		int a_ioflag;
    517 		kauth_cred_t a_cred;
    518 	} */ *ap = v;
    519 	vnode_t *vp = ap->a_vp;
    520 	struct uio *uio = ap->a_uio;
    521 	const int ioflag = ap->a_ioflag;
    522 	tmpfs_node_t *node;
    523 	struct uvm_object *uobj;
    524 	int error;
    525 
    526 	KASSERT(VOP_ISLOCKED(vp));
    527 
    528 	if (vp->v_type != VREG) {
    529 		return EISDIR;
    530 	}
    531 	if (uio->uio_offset < 0) {
    532 		return EINVAL;
    533 	}
    534 
    535 	node = VP_TO_TMPFS_NODE(vp);
    536 	node->tn_status |= TMPFS_NODE_ACCESSED;
    537 	uobj = node->tn_spec.tn_reg.tn_aobj;
    538 	error = 0;
    539 
    540 	while (error == 0 && uio->uio_resid > 0) {
    541 		vsize_t len;
    542 
    543 		if (node->tn_size <= uio->uio_offset) {
    544 			break;
    545 		}
    546 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
    547 		if (len == 0) {
    548 			break;
    549 		}
    550 		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
    551 		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
    552 	}
    553 	return error;
    554 }
    555 
    556 int
    557 tmpfs_write(void *v)
    558 {
    559 	struct vop_write_args /* {
    560 		struct vnode	*a_vp;
    561 		struct uio	*a_uio;
    562 		int		a_ioflag;
    563 		kauth_cred_t	a_cred;
    564 	} */ *ap = v;
    565 	vnode_t *vp = ap->a_vp;
    566 	struct uio *uio = ap->a_uio;
    567 	const int ioflag = ap->a_ioflag;
    568 	tmpfs_node_t *node;
    569 	struct uvm_object *uobj;
    570 	off_t oldsize;
    571 	bool extended;
    572 	int error;
    573 
    574 	KASSERT(VOP_ISLOCKED(vp));
    575 
    576 	node = VP_TO_TMPFS_NODE(vp);
    577 	oldsize = node->tn_size;
    578 
    579 	if (uio->uio_offset < 0 || vp->v_type != VREG) {
    580 		error = EINVAL;
    581 		goto out;
    582 	}
    583 	if (uio->uio_resid == 0) {
    584 		error = 0;
    585 		goto out;
    586 	}
    587 	if (ioflag & IO_APPEND) {
    588 		uio->uio_offset = node->tn_size;
    589 	}
    590 
    591 	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
    592 	if (extended) {
    593 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
    594 		if (error)
    595 			goto out;
    596 	}
    597 
    598 	uobj = node->tn_spec.tn_reg.tn_aobj;
    599 	error = 0;
    600 	while (error == 0 && uio->uio_resid > 0) {
    601 		vsize_t len;
    602 
    603 		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
    604 		if (len == 0) {
    605 			break;
    606 		}
    607 		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
    608 		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
    609 	}
    610 	if (error) {
    611 		(void)tmpfs_reg_resize(vp, oldsize);
    612 	}
    613 
    614 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
    615 	    (extended ? TMPFS_NODE_CHANGED : 0);
    616 	VN_KNOTE(vp, NOTE_WRITE);
    617 out:
    618 	if (error) {
    619 		KASSERT(oldsize == node->tn_size);
    620 	} else {
    621 		KASSERT(uio->uio_resid == 0);
    622 	}
    623 	return error;
    624 }
    625 
    626 int
    627 tmpfs_fsync(void *v)
    628 {
    629 	struct vop_fsync_args /* {
    630 		struct vnode *a_vp;
    631 		kauth_cred_t a_cred;
    632 		int a_flags;
    633 		off_t a_offlo;
    634 		off_t a_offhi;
    635 		struct lwp *a_l;
    636 	} */ *ap = v;
    637 	vnode_t *vp = ap->a_vp;
    638 
    639 	/* Nothing to do.  Just update. */
    640 	KASSERT(VOP_ISLOCKED(vp));
    641 	tmpfs_update(vp, NULL, NULL, NULL, 0);
    642 	return 0;
    643 }
    644 
    645 /*
    646  * tmpfs_remove: unlink a file.
    647  *
    648  * => Both directory (dvp) and file (vp) are locked.
    649  * => We unlock and drop the reference on both.
    650  */
    651 int
    652 tmpfs_remove(void *v)
    653 {
    654 	struct vop_remove_args /* {
    655 		struct vnode *a_dvp;
    656 		struct vnode *a_vp;
    657 		struct componentname *a_cnp;
    658 	} */ *ap = v;
    659 	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
    660 	tmpfs_node_t *node;
    661 	tmpfs_dirent_t *de;
    662 	int error;
    663 
    664 	KASSERT(VOP_ISLOCKED(dvp));
    665 	KASSERT(VOP_ISLOCKED(vp));
    666 
    667 	if (vp->v_type == VDIR) {
    668 		error = EPERM;
    669 		goto out;
    670 	}
    671 	node = VP_TO_TMPFS_NODE(vp);
    672 
    673 	/* Files marked as immutable or append-only cannot be deleted. */
    674 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
    675 		error = EPERM;
    676 		goto out;
    677 	}
    678 
    679 	/* Lookup the directory entry (check the cached hint first). */
    680 	de = tmpfs_dir_cached(node);
    681 	if (de == NULL) {
    682 		tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
    683 		struct componentname *cnp = ap->a_cnp;
    684 		de = tmpfs_dir_lookup(dnode, cnp);
    685 	}
    686 	KASSERT(de && de->td_node == node);
    687 
    688 	/*
    689 	 * Remove the entry from the directory (drops the link count) and
    690 	 * destroy it or replace it with a whiteout.
    691 	 * Note: the inode referred by it will not be destroyed
    692 	 * until the vnode is reclaimed/recycled.
    693 	 */
    694 	tmpfs_dir_detach(dvp, de);
    695 	if (ap->a_cnp->cn_flags & DOWHITEOUT)
    696 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
    697 	else
    698 		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
    699 	error = 0;
    700 out:
    701 	/* Drop the references and unlock the vnodes. */
    702 	vput(vp);
    703 	if (dvp == vp) {
    704 		vrele(dvp);
    705 	} else {
    706 		vput(dvp);
    707 	}
    708 	return error;
    709 }
    710 
    711 /*
    712  * tmpfs_link: create a hard link.
    713  */
    714 int
    715 tmpfs_link(void *v)
    716 {
    717 	struct vop_link_args /* {
    718 		struct vnode *a_dvp;
    719 		struct vnode *a_vp;
    720 		struct componentname *a_cnp;
    721 	} */ *ap = v;
    722 	vnode_t *dvp = ap->a_dvp;
    723 	vnode_t *vp = ap->a_vp;
    724 	struct componentname *cnp = ap->a_cnp;
    725 	tmpfs_node_t *dnode, *node;
    726 	tmpfs_dirent_t *de;
    727 	int error;
    728 
    729 	KASSERT(dvp != vp);
    730 	KASSERT(VOP_ISLOCKED(dvp));
    731 	KASSERT(vp->v_type != VDIR);
    732 	KASSERT(dvp->v_mount == vp->v_mount);
    733 
    734 	dnode = VP_TO_TMPFS_DIR(dvp);
    735 	node = VP_TO_TMPFS_NODE(vp);
    736 
    737 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    738 
    739 	/* Check for maximum number of links limit. */
    740 	if (node->tn_links == LINK_MAX) {
    741 		error = EMLINK;
    742 		goto out;
    743 	}
    744 	KASSERT(node->tn_links < LINK_MAX);
    745 
    746 	/* We cannot create links of files marked immutable or append-only. */
    747 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
    748 		error = EPERM;
    749 		goto out;
    750 	}
    751 
    752 	/* Allocate a new directory entry to represent the inode. */
    753 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
    754 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
    755 	if (error) {
    756 		goto out;
    757 	}
    758 
    759 	/*
    760 	 * Insert the entry into the directory.
    761 	 * It will increase the inode link count.
    762 	 */
    763 	tmpfs_dir_attach(dvp, de, node);
    764 
    765 	/* Update the timestamps and trigger the event. */
    766 	if (node->tn_vnode) {
    767 		VN_KNOTE(node->tn_vnode, NOTE_LINK);
    768 	}
    769 	node->tn_status |= TMPFS_NODE_CHANGED;
    770 	tmpfs_update(vp, NULL, NULL, NULL, 0);
    771 	error = 0;
    772 out:
    773 	VOP_UNLOCK(vp);
    774 	vput(dvp);
    775 	return error;
    776 }
    777 
    778 /*
    779  * tmpfs_rename: rename routine, the hairiest system call, with the
    780  * insane API.
    781  *
    782  * Arguments: fdvp (from-parent vnode), fvp (from-leaf), tdvp (to-parent)
    783  * and tvp (to-leaf), if exists (NULL if not).
    784  *
    785  * => Caller holds a reference on fdvp and fvp, they are unlocked.
    786  *    Note: fdvp and fvp can refer to the same object (i.e. when it is root).
    787  *
    788  * => Both tdvp and tvp are referenced and locked.  It is our responsibility
    789  *    to release the references and unlock them (or destroy).
    790  */
    791 
    792 /*
    793  * First, some forward declarations of subroutines.
    794  */
    795 
    796 static int tmpfs_sane_rename(struct vnode *, struct componentname *,
    797     struct vnode *, struct componentname *, kauth_cred_t, bool);
    798 static int tmpfs_rename_enter(struct mount *, struct tmpfs_mount *,
    799     kauth_cred_t,
    800     struct vnode *, struct tmpfs_node *, struct componentname *,
    801     struct tmpfs_dirent **, struct vnode **,
    802     struct vnode *, struct tmpfs_node *, struct componentname *,
    803     struct tmpfs_dirent **, struct vnode **);
    804 static int tmpfs_rename_enter_common(struct mount *, struct tmpfs_mount *,
    805     kauth_cred_t,
    806     struct vnode *, struct tmpfs_node *,
    807     struct componentname *, struct tmpfs_dirent **, struct vnode **,
    808     struct componentname *, struct tmpfs_dirent **, struct vnode **);
    809 static int tmpfs_rename_enter_separate(struct mount *, struct tmpfs_mount *,
    810     kauth_cred_t,
    811     struct vnode *, struct tmpfs_node *, struct componentname *,
    812     struct tmpfs_dirent **, struct vnode **,
    813     struct vnode *, struct tmpfs_node *, struct componentname *,
    814     struct tmpfs_dirent **, struct vnode **);
    815 static void tmpfs_rename_exit(struct tmpfs_mount *,
    816     struct vnode *, struct vnode *, struct vnode *, struct vnode *);
    817 static int tmpfs_rename_lock_directory(struct vnode *, struct tmpfs_node *);
    818 static int tmpfs_rename_genealogy(struct tmpfs_node *, struct tmpfs_node *,
    819     struct tmpfs_node **);
    820 static int tmpfs_rename_lock(struct mount *, kauth_cred_t, int,
    821     struct vnode *, struct tmpfs_node *, struct componentname *, bool,
    822     struct tmpfs_dirent **, struct vnode **,
    823     struct vnode *, struct tmpfs_node *, struct componentname *, bool,
    824     struct tmpfs_dirent **, struct vnode **);
    825 static void tmpfs_rename_attachdetach(struct tmpfs_mount *,
    826     struct vnode *, struct tmpfs_dirent *, struct vnode *,
    827     struct vnode *, struct tmpfs_dirent *, struct vnode *);
    828 static int tmpfs_do_remove(struct tmpfs_mount *, struct vnode *,
    829     struct tmpfs_node *, struct tmpfs_dirent *, struct vnode *, kauth_cred_t);
    830 static int tmpfs_rename_check_possible(struct tmpfs_node *,
    831     struct tmpfs_node *, struct tmpfs_node *, struct tmpfs_node *);
    832 static int tmpfs_rename_check_permitted(kauth_cred_t,
    833     struct tmpfs_node *, struct tmpfs_node *,
    834     struct tmpfs_node *, struct tmpfs_node *);
    835 static int tmpfs_remove_check_possible(struct tmpfs_node *,
    836     struct tmpfs_node *);
    837 static int tmpfs_remove_check_permitted(kauth_cred_t,
    838     struct tmpfs_node *, struct tmpfs_node *);
    839 static int tmpfs_check_sticky(kauth_cred_t,
    840     struct tmpfs_node *, struct tmpfs_node *);
    841 
    842 int
    843 tmpfs_rename(void *v)
    844 {
    845 	struct vop_rename_args  /* {
    846 		struct vnode		*a_fdvp;
    847 		struct vnode		*a_fvp;
    848 		struct componentname	*a_fcnp;
    849 		struct vnode		*a_tdvp;
    850 		struct vnode		*a_tvp;
    851 		struct componentname	*a_tcnp;
    852 	} */ *ap = v;
    853 	struct vnode *fdvp = ap->a_fdvp;
    854 	struct vnode *fvp = ap->a_fvp;
    855 	struct componentname *fcnp = ap->a_fcnp;
    856 	struct vnode *tdvp = ap->a_tdvp;
    857 	struct vnode *tvp = ap->a_tvp;
    858 	struct componentname *tcnp = ap->a_tcnp;
    859 	kauth_cred_t cred;
    860 	int error;
    861 
    862 	KASSERT(fdvp != NULL);
    863 	KASSERT(fvp != NULL);
    864 	KASSERT(fcnp != NULL);
    865 	KASSERT(fcnp->cn_nameptr != NULL);
    866 	KASSERT(tdvp != NULL);
    867 	KASSERT(tcnp != NULL);
    868 	KASSERT(fcnp->cn_nameptr != NULL);
    869 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
    870 	/* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
    871 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
    872 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
    873 	KASSERT(fdvp->v_type == VDIR);
    874 	KASSERT(tdvp->v_type == VDIR);
    875 
    876 	cred = fcnp->cn_cred;
    877 	KASSERT(tcnp->cn_cred == cred);
    878 
    879 	/*
    880 	 * Sanitize our world from the VFS insanity.  Unlock the target
    881 	 * directory and node, which are locked.  Release the children,
    882 	 * which are referenced.  Check for rename("x", "y/."), which
    883 	 * it is our responsibility to reject, not the caller's.  (But
    884 	 * the caller does reject rename("x/.", "y").  Go figure.)
    885 	 */
    886 
    887 	VOP_UNLOCK(tdvp);
    888 	if ((tvp != NULL) && (tvp != tdvp))
    889 		VOP_UNLOCK(tvp);
    890 
    891 	vrele(fvp);
    892 	if (tvp != NULL)
    893 		vrele(tvp);
    894 
    895 	if (tvp == tdvp) {
    896 		error = EINVAL;
    897 		goto out;
    898 	}
    899 
    900 	error = tmpfs_sane_rename(fdvp, fcnp, tdvp, tcnp, cred, false);
    901 
    902 out:	/*
    903 	 * All done, whether with success or failure.  Release the
    904 	 * directory nodes now, as the caller expects from the VFS
    905 	 * protocol.
    906 	 */
    907 	vrele(fdvp);
    908 	vrele(tdvp);
    909 
    910 	return error;
    911 }
    912 
    913 /*
    914  * tmpfs_sane_rename: rename routine, the hairiest system call, with
    915  * the sane API.
    916  *
    917  * Arguments:
    918  *
    919  * . fdvp (from directory vnode),
    920  * . fcnp (from component name),
    921  * . tdvp (to directory vnode), and
    922  * . tcnp (to component name).
    923  *
    924  * fdvp and tdvp must be referenced and unlocked.
    925  */
    926 static int
    927 tmpfs_sane_rename(struct vnode *fdvp, struct componentname *fcnp,
    928     struct vnode *tdvp, struct componentname *tcnp, kauth_cred_t cred,
    929     bool posixly_correct)
    930 {
    931 	struct mount *mount;
    932 	struct tmpfs_mount *tmpfs;
    933 	struct tmpfs_node *fdnode, *tdnode;
    934 	struct tmpfs_dirent *fde, *tde;
    935 	struct vnode *fvp, *tvp;
    936 	char *newname;
    937 	int error;
    938 
    939 	KASSERT(fdvp != NULL);
    940 	KASSERT(fcnp != NULL);
    941 	KASSERT(tdvp != NULL);
    942 	KASSERT(tcnp != NULL);
    943 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
    944 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
    945 	KASSERT(fdvp->v_type == VDIR);
    946 	KASSERT(tdvp->v_type == VDIR);
    947 	KASSERT(fdvp->v_mount == tdvp->v_mount);
    948 	KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
    949 	KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
    950 	KASSERT((fcnp->cn_namelen != 1) || (fcnp->cn_nameptr[0] != '.'));
    951 	KASSERT((tcnp->cn_namelen != 1) || (tcnp->cn_nameptr[0] != '.'));
    952 	KASSERT((fcnp->cn_namelen != 2) || (fcnp->cn_nameptr[0] != '.') ||
    953 	    (fcnp->cn_nameptr[1] != '.'));
    954 	KASSERT((tcnp->cn_namelen != 2) || (tcnp->cn_nameptr[0] != '.') ||
    955 	    (tcnp->cn_nameptr[1] != '.'));
    956 
    957 	/*
    958 	 * Pull out the tmpfs data structures.
    959 	 */
    960 	fdnode = VP_TO_TMPFS_NODE(fdvp);
    961 	tdnode = VP_TO_TMPFS_NODE(tdvp);
    962 	KASSERT(fdnode != NULL);
    963 	KASSERT(tdnode != NULL);
    964 	KASSERT(fdnode->tn_vnode == fdvp);
    965 	KASSERT(tdnode->tn_vnode == tdvp);
    966 	KASSERT(fdnode->tn_type == VDIR);
    967 	KASSERT(tdnode->tn_type == VDIR);
    968 
    969 	mount = fdvp->v_mount;
    970 	KASSERT(mount != NULL);
    971 	KASSERT(mount == tdvp->v_mount);
    972 	/* XXX How can we be sure this stays true?  (Not that you're
    973 	 * likely to mount a tmpfs read-only...)  */
    974 	KASSERT((mount->mnt_flag & MNT_RDONLY) == 0);
    975 	tmpfs = VFS_TO_TMPFS(mount);
    976 	KASSERT(tmpfs != NULL);
    977 
    978 	/*
    979 	 * Decide whether we need a new name, and allocate memory for
    980 	 * it if so.  Do this before locking anything or taking
    981 	 * destructive actions so that we can back out safely and sleep
    982 	 * safely.  XXX Is sleeping an issue here?  Can this just be
    983 	 * moved into tmpfs_rename_attachdetach?
    984 	 */
    985 	if (tmpfs_strname_neqlen(fcnp, tcnp)) {
    986 		newname = tmpfs_strname_alloc(tmpfs, tcnp->cn_namelen);
    987 		if (newname == NULL) {
    988 			error = ENOSPC;
    989 			goto out_unlocked;
    990 		}
    991 	} else {
    992 		newname = NULL;
    993 	}
    994 
    995 	/*
    996 	 * Lock and look up everything.  GCC is not very clever.
    997 	 */
    998 	fde = tde = NULL;
    999 	fvp = tvp = NULL;
   1000 	error = tmpfs_rename_enter(mount, tmpfs, cred,
   1001 	    fdvp, fdnode, fcnp, &fde, &fvp,
   1002 	    tdvp, tdnode, tcnp, &tde, &tvp);
   1003 	if (error)
   1004 		goto out_unlocked;
   1005 
   1006 	/*
   1007 	 * Check that everything is locked and looks right.
   1008 	 */
   1009 	KASSERT(fde != NULL);
   1010 	KASSERT(fvp != NULL);
   1011 	KASSERT(fde->td_node != NULL);
   1012 	KASSERT(fde->td_node->tn_vnode == fvp);
   1013 	KASSERT(fde->td_node->tn_type == fvp->v_type);
   1014 	KASSERT((tde == NULL) == (tvp == NULL));
   1015 	KASSERT((tde == NULL) || (tde->td_node != NULL));
   1016 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
   1017 	KASSERT((tde == NULL) || (tde->td_node->tn_type == tvp->v_type));
   1018 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
   1019 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
   1020 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
   1021 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
   1022 
   1023 	/*
   1024 	 * If the source and destination are the same object, we need
   1025 	 * only at most delete the source entry.
   1026 	 */
   1027 	if (fvp == tvp) {
   1028 		KASSERT(tvp != NULL);
   1029 		if (fde->td_node->tn_type == VDIR) {
   1030 			/* XXX How can this possibly happen?  */
   1031 			error = EINVAL;
   1032 			goto out_locked;
   1033 		}
   1034 		if (!posixly_correct && (fde != tde)) {
   1035 			/* XXX Doesn't work because of locking.
   1036 			 * error = VOP_REMOVE(fdvp, fvp);
   1037 			 */
   1038 			error = tmpfs_do_remove(tmpfs, fdvp, fdnode, fde, fvp,
   1039 			    cred);
   1040 			if (error)
   1041 				goto out_locked;
   1042 		}
   1043 		goto success;
   1044 	}
   1045 	KASSERT(fde != tde);
   1046 	KASSERT(fvp != tvp);
   1047 
   1048 	/*
   1049 	 * If the target exists, refuse to rename a directory over a
   1050 	 * non-directory or vice versa, or to clobber a non-empty
   1051 	 * directory.
   1052 	 */
   1053 	if (tvp != NULL) {
   1054 		KASSERT(tde != NULL);
   1055 		KASSERT(tde->td_node != NULL);
   1056 		if (fvp->v_type == VDIR && tvp->v_type == VDIR)
   1057 			error = ((tde->td_node->tn_size > 0)? ENOTEMPTY : 0);
   1058 		else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
   1059 			error = ENOTDIR;
   1060 		else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
   1061 			error = EISDIR;
   1062 		else
   1063 			error = 0;
   1064 		if (error)
   1065 			goto out_locked;
   1066 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
   1067 	}
   1068 
   1069 	/*
   1070 	 * Authorize the rename.
   1071 	 */
   1072 	error = tmpfs_rename_check_possible(fdnode, fde->td_node,
   1073 	    tdnode, (tde? tde->td_node : NULL));
   1074 	if (error)
   1075 		goto out_locked;
   1076 	error = tmpfs_rename_check_permitted(cred, fdnode, fde->td_node,
   1077 	    tdnode, (tde? tde->td_node : NULL));
   1078 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, fvp, fdvp,
   1079 	    error);
   1080 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_RENAME, tvp, tdvp,
   1081 	    error);
   1082 	if (error)
   1083 		goto out_locked;
   1084 
   1085 	/*
   1086 	 * Everything is hunky-dory.  Shuffle the directory entries.
   1087 	 */
   1088 	tmpfs_rename_attachdetach(tmpfs, fdvp, fde, fvp, tdvp, tde, tvp);
   1089 
   1090 	/*
   1091 	 * Update the directory entry's name necessary, and flag
   1092 	 * metadata updates.  A memory allocation failure here is not
   1093 	 * OK because we've already committed some changes that we
   1094 	 * can't back out at this point, and we have things locked so
   1095 	 * we can't sleep, hence the early allocation above.
   1096 	 */
   1097 	if (newname != NULL) {
   1098 		KASSERT(tcnp->cn_namelen <= TMPFS_MAXNAMLEN);
   1099 
   1100 		tmpfs_strname_free(tmpfs, fde->td_name, fde->td_namelen);
   1101 		fde->td_namelen = (uint16_t)tcnp->cn_namelen;
   1102 		(void)memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
   1103 		/* Commit newname and don't free it on the way out.  */
   1104 		fde->td_name = newname;
   1105 		newname = NULL;
   1106 
   1107 		fde->td_node->tn_status |= TMPFS_NODE_CHANGED;
   1108 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
   1109 	}
   1110 
   1111 success:
   1112 	VN_KNOTE(fvp, NOTE_RENAME);
   1113 	error = 0;
   1114 
   1115 out_locked:
   1116 	tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
   1117 
   1118 out_unlocked:
   1119 	/* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
   1120 	/* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
   1121 	/* KASSERT((fvp == NULL) || (VOP_ISLOCKED(fvp) != LK_EXCLUSIVE)); */
   1122 	/* KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
   1123 
   1124 	if (newname != NULL)
   1125 		tmpfs_strname_free(tmpfs, newname, tcnp->cn_namelen);
   1126 
   1127 	return error;
   1128 }
   1129 
   1130 /*
   1131  * Look up fcnp in fdnode/fdvp and store its directory entry in fde_ret
   1132  * and the associated vnode in fvp_ret; fail if not found.  Look up
   1133  * tcnp in tdnode/tdvp and store its directory entry in tde_ret and the
   1134  * associated vnode in tvp_ret; store null instead if not found.  Fail
   1135  * if anything has been mounted on any of the nodes involved.
   1136  *
   1137  * fdvp and tdvp must be referenced.
   1138  *
   1139  * On entry, nothing is locked.
   1140  *
   1141  * On success, everything is locked, and *fvp_ret, and *tvp_ret if
   1142  * nonnull, are referenced.  The only pairs of vnodes that may be
   1143  * identical are {fdvp, tdvp} and {fvp, tvp}.
   1144  *
   1145  * On failure, everything remains as was.
   1146  *
   1147  * Locking everything including the source and target nodes is
   1148  * necessary to make sure that, e.g., link count updates are OK.  The
   1149  * locking order is, in general, ancestor-first, matching the order you
   1150  * need to use to look up a descendant anyway.
   1151  */
   1152 static int
   1153 tmpfs_rename_enter(struct mount *mount, struct tmpfs_mount *tmpfs,
   1154     kauth_cred_t cred,
   1155     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
   1156     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
   1157     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
   1158     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
   1159 {
   1160 	int error;
   1161 
   1162 	KASSERT(mount != NULL);
   1163 	KASSERT(tmpfs != NULL);
   1164 	KASSERT(fdvp != NULL);
   1165 	KASSERT(fdnode != NULL);
   1166 	KASSERT(fcnp != NULL);
   1167 	KASSERT(fde_ret != NULL);
   1168 	KASSERT(fvp_ret != NULL);
   1169 	KASSERT(tdvp != NULL);
   1170 	KASSERT(tdnode != NULL);
   1171 	KASSERT(tcnp != NULL);
   1172 	KASSERT(tde_ret != NULL);
   1173 	KASSERT(tvp_ret != NULL);
   1174 	KASSERT(fdnode->tn_vnode == fdvp);
   1175 	KASSERT(tdnode->tn_vnode == tdvp);
   1176 	KASSERT(fdnode->tn_type == VDIR);
   1177 	KASSERT(tdnode->tn_type == VDIR);
   1178 
   1179 	if (fdvp == tdvp) {
   1180 		KASSERT(fdnode == tdnode);
   1181 		error = tmpfs_rename_enter_common(mount, tmpfs, cred, fdvp,
   1182 		    fdnode, fcnp, fde_ret, fvp_ret, tcnp, tde_ret, tvp_ret);
   1183 	} else {
   1184 		KASSERT(fdnode != tdnode);
   1185 		error = tmpfs_rename_enter_separate(mount, tmpfs, cred,
   1186 		    fdvp, fdnode, fcnp, fde_ret, fvp_ret,
   1187 		    tdvp, tdnode, tcnp, tde_ret, tvp_ret);
   1188 	}
   1189 
   1190 	if (error)
   1191 		return error;
   1192 
   1193 	KASSERT(*fde_ret != NULL);
   1194 	KASSERT(*fvp_ret != NULL);
   1195 	KASSERT((*tde_ret == NULL) == (*tvp_ret == NULL));
   1196 	KASSERT((*tde_ret == NULL) || ((*tde_ret)->td_node != NULL));
   1197 	KASSERT((*tde_ret == NULL) ||
   1198 	    ((*tde_ret)->td_node->tn_vnode == *tvp_ret));
   1199 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
   1200 	KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
   1201 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
   1202 	KASSERT((*tvp_ret == NULL) ||
   1203 	    (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
   1204 	KASSERT(*fvp_ret != fdvp);
   1205 	KASSERT(*fvp_ret != tdvp);
   1206 	KASSERT(*tvp_ret != fdvp);
   1207 	KASSERT(*tvp_ret != tdvp);
   1208 	return 0;
   1209 }
   1210 
   1211 /*
   1212  * Lock and look up with a common source/target directory.
   1213  */
   1214 static int
   1215 tmpfs_rename_enter_common(struct mount *mount, struct tmpfs_mount *tmpfs,
   1216     kauth_cred_t cred,
   1217     struct vnode *dvp, struct tmpfs_node *dnode,
   1218     struct componentname *fcnp,
   1219     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
   1220     struct componentname *tcnp,
   1221     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
   1222 {
   1223 	struct tmpfs_dirent *fde, *tde;
   1224 	struct vnode *fvp, *tvp;
   1225 	int error;
   1226 
   1227 	error = tmpfs_rename_lock_directory(dvp, dnode);
   1228 	if (error)
   1229 		goto fail0;
   1230 
   1231 	/* Did we lose a race with mount?  */
   1232 	if (dvp->v_mountedhere != NULL) {
   1233 		error = EBUSY;
   1234 		goto fail1;
   1235 	}
   1236 
   1237 	/* Make sure the caller may read the directory.  */
   1238 	error = VOP_ACCESS(dvp, VEXEC, cred);
   1239 	if (error)
   1240 		goto fail1;
   1241 
   1242 	/*
   1243 	 * The order in which we lock the source and target nodes is
   1244 	 * irrelevant because there can only be one rename on this
   1245 	 * directory in flight at a time, and we have it locked.
   1246 	 */
   1247 
   1248 	fde = tmpfs_dir_lookup(dnode, fcnp);
   1249 	if (fde == NULL) {
   1250 		error = ENOENT;
   1251 		goto fail1;
   1252 	}
   1253 
   1254 	KASSERT(fde->td_node != NULL);
   1255 	/* We ruled out `.' earlier.  */
   1256 	KASSERT(fde->td_node != dnode);
   1257 	/* We ruled out `..' earlier.  */
   1258 	KASSERT(fde->td_node != dnode->tn_spec.tn_dir.tn_parent);
   1259 	mutex_enter(&fde->td_node->tn_vlock);
   1260 	error = tmpfs_vnode_get(mount, fde->td_node, &fvp);
   1261 	if (error)
   1262 		goto fail1;
   1263 	KASSERT(fvp != NULL);
   1264 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
   1265 	KASSERT(fvp != dvp);
   1266 	KASSERT(fvp->v_mount == mount);
   1267 
   1268 	/* Refuse to rename a mount point.  */
   1269 	if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
   1270 		error = EBUSY;
   1271 		goto fail2;
   1272 	}
   1273 
   1274 	tde = tmpfs_dir_lookup(dnode, tcnp);
   1275 	if (tde == NULL) {
   1276 		tvp = NULL;
   1277 	} else {
   1278 		KASSERT(tde->td_node != NULL);
   1279 		/* We ruled out `.' earlier.  */
   1280 		KASSERT(tde->td_node != dnode);
   1281 		/* We ruled out `..' earlier.  */
   1282 		KASSERT(tde->td_node != dnode->tn_spec.tn_dir.tn_parent);
   1283 		if (tde->td_node != fde->td_node) {
   1284 			mutex_enter(&tde->td_node->tn_vlock);
   1285 			error = tmpfs_vnode_get(mount, tde->td_node, &tvp);
   1286 			if (error)
   1287 				goto fail2;
   1288 			KASSERT(tvp->v_mount == mount);
   1289 			/* Refuse to rename over a mount point.  */
   1290 			if ((tvp->v_type == VDIR) &&
   1291 			    (tvp->v_mountedhere != NULL)) {
   1292 				error = EBUSY;
   1293 				goto fail3;
   1294 			}
   1295 		} else {
   1296 			tvp = fvp;
   1297 			vref(tvp);
   1298 		}
   1299 		KASSERT(tvp != NULL);
   1300 		KASSERT(VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
   1301 	}
   1302 	KASSERT(tvp != dvp);
   1303 
   1304 	*fde_ret = fde;
   1305 	*fvp_ret = fvp;
   1306 	*tde_ret = tde;
   1307 	*tvp_ret = tvp;
   1308 	return 0;
   1309 
   1310 fail3:	if (tvp != NULL) {
   1311 		if (tvp != fvp)
   1312 			vput(tvp);
   1313 		else
   1314 			vrele(tvp);
   1315 	}
   1316 
   1317 fail2:	vput(fvp);
   1318 fail1:	VOP_UNLOCK(dvp);
   1319 fail0:	return error;
   1320 }
   1321 
   1322 /*
   1323  * Lock and look up with separate source and target directories.
   1324  */
   1325 static int
   1326 tmpfs_rename_enter_separate(struct mount *mount, struct tmpfs_mount *tmpfs,
   1327     kauth_cred_t cred,
   1328     struct vnode *fdvp, struct tmpfs_node *fdnode, struct componentname *fcnp,
   1329     struct tmpfs_dirent **fde_ret, struct vnode **fvp_ret,
   1330     struct vnode *tdvp, struct tmpfs_node *tdnode, struct componentname *tcnp,
   1331     struct tmpfs_dirent **tde_ret, struct vnode **tvp_ret)
   1332 {
   1333 	struct tmpfs_node *intermediate_node;
   1334 	struct tmpfs_dirent *fde, *tde;
   1335 	struct vnode *fvp, *tvp;
   1336 	int error;
   1337 
   1338 	KASSERT(fdvp != tdvp);
   1339 	KASSERT(fdnode != tdnode);
   1340 
   1341 #if 0				/* XXX */
   1342 	mutex_enter(&tmpfs->tm_rename_lock);
   1343 #endif
   1344 
   1345 	error = tmpfs_rename_genealogy(fdnode, tdnode, &intermediate_node);
   1346 	if (error)
   1347 		goto fail;
   1348 
   1349 	/*
   1350 	 * intermediate_node == NULL means fdnode is not an ancestor of
   1351 	 * tdnode.
   1352 	 */
   1353 	if (intermediate_node == NULL)
   1354 		error = tmpfs_rename_lock(mount, cred, ENOTEMPTY,
   1355 		    tdvp, tdnode, tcnp, true, &tde, &tvp,
   1356 		    fdvp, fdnode, fcnp, false, &fde, &fvp);
   1357 	else
   1358 		error = tmpfs_rename_lock(mount, cred, EINVAL,
   1359 		    fdvp, fdnode, fcnp, false, &fde, &fvp,
   1360 		    tdvp, tdnode, tcnp, true, &tde, &tvp);
   1361 	if (error)
   1362 		goto fail;
   1363 
   1364 	KASSERT(fde != NULL);
   1365 	KASSERT(fde->td_node != NULL);
   1366 
   1367 	/*
   1368 	 * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
   1369 	 */
   1370 	if (fde->td_node == intermediate_node) {
   1371 		tmpfs_rename_exit(tmpfs, fdvp, fvp, tdvp, tvp);
   1372 		return EINVAL;
   1373 	}
   1374 
   1375 	*fde_ret = fde;
   1376 	*fvp_ret = fvp;
   1377 	*tde_ret = tde;
   1378 	*tvp_ret = tvp;
   1379 	return 0;
   1380 
   1381 fail:
   1382 #if 0				/* XXX */
   1383 	mutex_exit(&tmpfs->tm_rename_lock);
   1384 #endif
   1385 	return error;
   1386 }
   1387 
   1388 /*
   1389  * Unlock everything we locked for rename.
   1390  *
   1391  * fdvp and tdvp must be referenced.
   1392  *
   1393  * On entry, everything is locked, and fvp and tvp referenced.
   1394  *
   1395  * On exit, everything is unlocked, and fvp and tvp are released.
   1396  */
   1397 static void
   1398 tmpfs_rename_exit(struct tmpfs_mount *tmpfs,
   1399     struct vnode *fdvp, struct vnode *fvp,
   1400     struct vnode *tdvp, struct vnode *tvp)
   1401 {
   1402 
   1403 	KASSERT(tmpfs != NULL);
   1404 	KASSERT(fdvp != NULL);
   1405 	KASSERT(fvp != NULL);
   1406 	KASSERT(fdvp != fvp);
   1407 	KASSERT(fdvp != tvp);
   1408 	KASSERT(tdvp != tvp);
   1409 	KASSERT(tdvp != fvp);
   1410 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
   1411 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
   1412 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
   1413 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
   1414 
   1415 	if (tvp != NULL) {
   1416 		if (tvp != fvp)
   1417 			vput(tvp);
   1418 		else
   1419 			vrele(tvp);
   1420 	}
   1421 	VOP_UNLOCK(tdvp);
   1422 	vput(fvp);
   1423 	if (fdvp != tdvp)
   1424 		VOP_UNLOCK(fdvp);
   1425 
   1426 #if 0				/* XXX */
   1427 	if (fdvp != tdvp)
   1428 		mutex_exit(&tmpfs->tm_rename_lock);
   1429 #endif
   1430 }
   1431 
   1432 /*
   1433  * Lock a directory, but fail if it has been rmdir'd.
   1434  *
   1435  * vp must be referenced.
   1436  */
   1437 static int
   1438 tmpfs_rename_lock_directory(struct vnode *vp, struct tmpfs_node *node)
   1439 {
   1440 
   1441 	KASSERT(vp != NULL);
   1442 	KASSERT(node != NULL);
   1443 	KASSERT(node->tn_vnode == vp);
   1444 	KASSERT(node->tn_type == VDIR);
   1445 
   1446 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
   1447 	if (node->tn_spec.tn_dir.tn_parent == NULL) {
   1448 		VOP_UNLOCK(vp);
   1449 		return ENOENT;
   1450 	}
   1451 
   1452 	return 0;
   1453 }
   1454 
   1455 /*
   1456  * Analyze the genealogy of the source and target nodes.
   1457  *
   1458  * On success, stores in *intermediate_node_ret either the child of
   1459  * fdnode of which tdnode is a descendant, or null if tdnode is not a
   1460  * descendant of fdnode at all.
   1461  *
   1462  * fdnode and tdnode must be unlocked and referenced.  The file
   1463  * system's rename lock must also be held, to exclude concurrent
   1464  * changes to the file system's genealogy other than rmdir.
   1465  *
   1466  * XXX This causes an extra lock/unlock of tdnode in the case when
   1467  * we're just about to lock it again before locking anything else.
   1468  * However, changing that requires reorganizing the code to make it
   1469  * even more horrifically obscure.
   1470  */
   1471 static int
   1472 tmpfs_rename_genealogy(struct tmpfs_node *fdnode, struct tmpfs_node *tdnode,
   1473     struct tmpfs_node **intermediate_node_ret)
   1474 {
   1475 	struct tmpfs_node *node = tdnode, *parent;
   1476 	int error;
   1477 
   1478 	KASSERT(fdnode != NULL);
   1479 	KASSERT(tdnode != NULL);
   1480 	KASSERT(fdnode != tdnode);
   1481 	KASSERT(intermediate_node_ret != NULL);
   1482 
   1483 	KASSERT(fdnode->tn_vnode != NULL);
   1484 	KASSERT(tdnode->tn_vnode != NULL);
   1485 	KASSERT(fdnode->tn_type == VDIR);
   1486 	KASSERT(tdnode->tn_type == VDIR);
   1487 
   1488 	/*
   1489 	 * We need to provisionally lock tdnode->tn_vnode to keep rmdir
   1490 	 * from deleting it -- or any ancestor -- at an inopportune
   1491 	 * moment.
   1492 	 */
   1493 	error = tmpfs_rename_lock_directory(tdnode->tn_vnode, tdnode);
   1494 	if (error)
   1495 		return error;
   1496 
   1497 	for (;;) {
   1498 		parent = node->tn_spec.tn_dir.tn_parent;
   1499 		KASSERT(parent != NULL);
   1500 		KASSERT(parent->tn_type == VDIR);
   1501 
   1502 		/* Did we hit the root without finding fdnode?  */
   1503 		if (parent == node) {
   1504 			*intermediate_node_ret = NULL;
   1505 			break;
   1506 		}
   1507 
   1508 		/* Did we find that fdnode is an ancestor?  */
   1509 		if (parent == fdnode) {
   1510 			*intermediate_node_ret = node;
   1511 			break;
   1512 		}
   1513 
   1514 		/* Neither -- keep ascending the family tree.  */
   1515 		node = parent;
   1516 	}
   1517 
   1518 	VOP_UNLOCK(tdnode->tn_vnode);
   1519 	return 0;
   1520 }
   1521 
   1522 /*
   1523  * Lock directories a and b, which must be distinct, and look up and
   1524  * lock nodes a and b.  Do a first and then b.  Directory b may not be
   1525  * an ancestor of directory a, although directory a may be an ancestor
   1526  * of directory b.  Fail with overlap_error if node a is directory b.
   1527  * Neither componentname may be `.' or `..'.
   1528  *
   1529  * a_dvp and b_dvp must be referenced.
   1530  *
   1531  * On entry, a_dvp and b_dvp are unlocked.
   1532  *
   1533  * On success,
   1534  * . a_dvp and b_dvp are locked,
   1535  * . *a_dirent_ret is filled with a directory entry whose node is
   1536  *     locked and referenced,
   1537  * . *b_vp_ret is filled with the corresponding vnode,
   1538  * . *b_dirent_ret is filled either with null or with a directory entry
   1539  *     whose node is locked and referenced,
   1540  * . *b_vp is filled either with null or with the corresponding vnode,
   1541  *     and
   1542  * . the only pair of vnodes that may be identical is a_vp and b_vp.
   1543  *
   1544  * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
   1545  * *a_vp, *b_dirent_ret, and *b_vp are left alone.
   1546  */
   1547 static int
   1548 tmpfs_rename_lock(struct mount *mount, kauth_cred_t cred, int overlap_error,
   1549     struct vnode *a_dvp, struct tmpfs_node *a_dnode,
   1550     struct componentname *a_cnp, bool a_missing_ok,
   1551     struct tmpfs_dirent **a_dirent_ret, struct vnode **a_vp_ret,
   1552     struct vnode *b_dvp, struct tmpfs_node *b_dnode,
   1553     struct componentname *b_cnp, bool b_missing_ok,
   1554     struct tmpfs_dirent **b_dirent_ret, struct vnode **b_vp_ret)
   1555 {
   1556 	struct tmpfs_dirent *a_dirent, *b_dirent;
   1557 	struct vnode *a_vp, *b_vp;
   1558 	int error;
   1559 
   1560 	KASSERT(a_dvp != NULL);
   1561 	KASSERT(a_dnode != NULL);
   1562 	KASSERT(a_cnp != NULL);
   1563 	KASSERT(a_dirent_ret != NULL);
   1564 	KASSERT(a_vp_ret != NULL);
   1565 	KASSERT(b_dvp != NULL);
   1566 	KASSERT(b_dnode != NULL);
   1567 	KASSERT(b_cnp != NULL);
   1568 	KASSERT(b_dirent_ret != NULL);
   1569 	KASSERT(b_vp_ret != NULL);
   1570 	KASSERT(a_dvp != b_dvp);
   1571 	KASSERT(a_dnode != b_dnode);
   1572 	KASSERT(a_dnode->tn_vnode == a_dvp);
   1573 	KASSERT(b_dnode->tn_vnode == b_dvp);
   1574 	KASSERT(a_dnode->tn_type == VDIR);
   1575 	KASSERT(b_dnode->tn_type == VDIR);
   1576 	KASSERT(a_missing_ok != b_missing_ok);
   1577 
   1578 	error = tmpfs_rename_lock_directory(a_dvp, a_dnode);
   1579 	if (error)
   1580 		goto fail0;
   1581 
   1582 	/* Did we lose a race with mount?  */
   1583 	if (a_dvp->v_mountedhere != NULL) {
   1584 		error = EBUSY;
   1585 		goto fail1;
   1586 	}
   1587 
   1588 	/* Make sure the caller may read the directory.  */
   1589 	error = VOP_ACCESS(a_dvp, VEXEC, cred);
   1590 	if (error)
   1591 		goto fail1;
   1592 
   1593 	a_dirent = tmpfs_dir_lookup(a_dnode, a_cnp);
   1594 	if (a_dirent != NULL) {
   1595 		KASSERT(a_dirent->td_node != NULL);
   1596 		/* We ruled out `.' earlier.  */
   1597 		KASSERT(a_dirent->td_node != a_dnode);
   1598 		/* We ruled out `..' earlier.  */
   1599 		KASSERT(a_dirent->td_node !=
   1600 		    a_dnode->tn_spec.tn_dir.tn_parent);
   1601 		if (a_dirent->td_node == b_dnode) {
   1602 			error = overlap_error;
   1603 			goto fail1;
   1604 		}
   1605 		mutex_enter(&a_dirent->td_node->tn_vlock);
   1606 		error = tmpfs_vnode_get(mount, a_dirent->td_node, &a_vp);
   1607 		if (error)
   1608 			goto fail1;
   1609 		KASSERT(a_vp->v_mount == mount);
   1610 		/* Refuse to rename (over) a mount point.  */
   1611 		if ((a_vp->v_type == VDIR) && (a_vp->v_mountedhere != NULL)) {
   1612 			error = EBUSY;
   1613 			goto fail2;
   1614 		}
   1615 	} else if (!a_missing_ok) {
   1616 		error = ENOENT;
   1617 		goto fail1;
   1618 	} else {
   1619 		a_vp = NULL;
   1620 	}
   1621 	KASSERT(a_vp != a_dvp);
   1622 	KASSERT(a_vp != b_dvp);
   1623 
   1624 	error = tmpfs_rename_lock_directory(b_dvp, b_dnode);
   1625 	if (error)
   1626 		goto fail2;
   1627 
   1628 	/* Did we lose a race with mount?  */
   1629 	if (b_dvp->v_mountedhere != NULL) {
   1630 		error = EBUSY;
   1631 		goto fail3;
   1632 	}
   1633 
   1634 	/* Make sure the caller may read the directory.  */
   1635 	error = VOP_ACCESS(b_dvp, VEXEC, cred);
   1636 	if (error)
   1637 		goto fail3;
   1638 
   1639 	b_dirent = tmpfs_dir_lookup(b_dnode, b_cnp);
   1640 	if (b_dirent != NULL) {
   1641 		KASSERT(b_dirent->td_node != NULL);
   1642 		/* We ruled out `.' earlier.  */
   1643 		KASSERT(b_dirent->td_node != b_dnode);
   1644 		/* We ruled out `..' earlier.  */
   1645 		KASSERT(b_dirent->td_node !=
   1646 		    b_dnode->tn_spec.tn_dir.tn_parent);
   1647 		/* b is not an ancestor of a.  */
   1648 		KASSERT(b_dirent->td_node != a_dnode);
   1649 		/* But the source and target nodes might be the same.  */
   1650 		if ((a_dirent == NULL) ||
   1651 		    (a_dirent->td_node != b_dirent->td_node)) {
   1652 			mutex_enter(&b_dirent->td_node->tn_vlock);
   1653 			error = tmpfs_vnode_get(mount, b_dirent->td_node,
   1654 			    &b_vp);
   1655 			if (error)
   1656 				goto fail3;
   1657 			KASSERT(b_vp->v_mount == mount);
   1658 			KASSERT(a_vp != b_vp);
   1659 			/* Refuse to rename (over) a mount point.  */
   1660 			if ((b_vp->v_type == VDIR) &&
   1661 			    (b_vp->v_mountedhere != NULL)) {
   1662 				error = EBUSY;
   1663 				goto fail4;
   1664 			}
   1665 		} else {
   1666 			b_vp = a_vp;
   1667 			vref(b_vp);
   1668 		}
   1669 	} else if (!b_missing_ok) {
   1670 		error = ENOENT;
   1671 		goto fail3;
   1672 	} else {
   1673 		b_vp = NULL;
   1674 	}
   1675 	KASSERT(b_vp != a_dvp);
   1676 	KASSERT(b_vp != b_dvp);
   1677 
   1678 	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
   1679 	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
   1680 	KASSERT(a_missing_ok || (a_dirent != NULL));
   1681 	KASSERT(a_missing_ok || (a_dirent->td_node != NULL));
   1682 	KASSERT(b_missing_ok || (b_dirent != NULL));
   1683 	KASSERT(b_missing_ok || (b_dirent->td_node != NULL));
   1684 	KASSERT((a_dirent == NULL) || (a_dirent->td_node != NULL));
   1685 	KASSERT((a_dirent == NULL) || (a_dirent->td_node->tn_vnode == a_vp));
   1686 	KASSERT((b_dirent == NULL) || (b_dirent->td_node != NULL));
   1687 	KASSERT((b_dirent == NULL) || (b_dirent->td_node->tn_vnode == b_vp));
   1688 	KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
   1689 	KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));
   1690 
   1691 	*a_dirent_ret = a_dirent;
   1692 	*b_dirent_ret = b_dirent;
   1693 	*a_vp_ret = a_vp;
   1694 	*b_vp_ret = b_vp;
   1695 	return 0;
   1696 
   1697 fail4:	if (b_vp != NULL) {
   1698 		KASSERT(VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE);
   1699 		if (b_vp != a_vp)
   1700 			vput(b_vp);
   1701 		else
   1702 			vrele(a_vp);
   1703 	}
   1704 
   1705 fail3:	KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
   1706 	VOP_UNLOCK(b_dvp);
   1707 
   1708 fail2:	if (a_vp != NULL) {
   1709 		KASSERT(VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE);
   1710 		vput(a_vp);
   1711 	}
   1712 
   1713 fail1:	KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
   1714 	VOP_UNLOCK(a_dvp);
   1715 
   1716 fail0:	/* KASSERT(VOP_ISLOCKED(a_dvp) != LK_EXCLUSIVE); */
   1717 	/* KASSERT(VOP_ISLOCKED(b_dvp) != LK_EXCLUSIVE); */
   1718 	/* KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) != LK_EXCLUSIVE)); */
   1719 	/* KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) != LK_EXCLUSIVE)); */
   1720 	return error;
   1721 }
   1722 
   1723 /*
   1724  * Shuffle the directory entries to move fvp from the directory fdvp
   1725  * into the directory tdvp.  fde is fvp's directory entry in fdvp.  If
   1726  * we are overwriting a target node, it is tvp, and tde is its
   1727  * directory entry in tdvp.
   1728  *
   1729  * fdvp, fvp, tdvp, and tvp must all be locked and referenced.
   1730  */
   1731 static void
   1732 tmpfs_rename_attachdetach(struct tmpfs_mount *tmpfs,
   1733     struct vnode *fdvp, struct tmpfs_dirent *fde, struct vnode *fvp,
   1734     struct vnode *tdvp, struct tmpfs_dirent *tde, struct vnode *tvp)
   1735 {
   1736 
   1737 	KASSERT(tmpfs != NULL);
   1738 	KASSERT(fdvp != NULL);
   1739 	KASSERT(fde != NULL);
   1740 	KASSERT(fvp != NULL);
   1741 	KASSERT(tdvp != NULL);
   1742 	KASSERT(fde->td_node != NULL);
   1743 	KASSERT(fde->td_node->tn_vnode == fvp);
   1744 	KASSERT((tde == NULL) == (tvp == NULL));
   1745 	KASSERT((tde == NULL) || (tde->td_node != NULL));
   1746 	KASSERT((tde == NULL) || (tde->td_node->tn_vnode == tvp));
   1747 	KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
   1748 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
   1749 	KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
   1750 	KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
   1751 
   1752 	/*
   1753 	 * If we are moving from one directory to another, detach the
   1754 	 * source entry and reattach it to the target directory.
   1755 	 */
   1756 	if (fdvp != tdvp) {
   1757 		/* tmpfs_dir_detach clobbers fde->td_node, so save it.  */
   1758 		struct tmpfs_node *fnode = fde->td_node;
   1759 		tmpfs_dir_detach(fdvp, fde);
   1760 		tmpfs_dir_attach(tdvp, fde, fnode);
   1761 	} else if (tvp == NULL) {
   1762 		/*
   1763 		 * We are changing the directory.  tmpfs_dir_attach and
   1764 		 * tmpfs_dir_detach note the events for us, but for
   1765 		 * this case we don't call them, so we must note the
   1766 		 * event explicitly.
   1767 		 */
   1768 		VN_KNOTE(fdvp, NOTE_WRITE);
   1769 	}
   1770 
   1771 	/*
   1772 	 * If we are replacing an existing target entry, delete it.
   1773 	 */
   1774 	if (tde != NULL) {
   1775 		KASSERT(tvp != NULL);
   1776 		KASSERT(tde->td_node != NULL);
   1777 		KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
   1778 		if (tde->td_node->tn_type == VDIR) {
   1779 			KASSERT(tde->td_node->tn_size == 0);
   1780 			KASSERT(tde->td_node->tn_links == 2);
   1781 			/* Decrement the extra link count for `.' so
   1782 			 * the vnode will be recycled when released.  */
   1783 			tde->td_node->tn_links--;
   1784 		}
   1785 		tmpfs_dir_detach(tdvp, tde);
   1786 		tmpfs_free_dirent(tmpfs, tde);
   1787 	}
   1788 }
   1789 
   1790 /*
   1791  * Remove the entry de for the non-directory vp from the directory dvp.
   1792  *
   1793  * Everything must be locked and referenced.
   1794  */
   1795 static int
   1796 tmpfs_do_remove(struct tmpfs_mount *tmpfs, struct vnode *dvp,
   1797     struct tmpfs_node *dnode, struct tmpfs_dirent *de, struct vnode *vp,
   1798     kauth_cred_t cred)
   1799 {
   1800 	int error;
   1801 
   1802 	KASSERT(tmpfs != NULL);
   1803 	KASSERT(dvp != NULL);
   1804 	KASSERT(dnode != NULL);
   1805 	KASSERT(de != NULL);
   1806 	KASSERT(vp != NULL);
   1807 	KASSERT(dnode->tn_vnode == dvp);
   1808 	KASSERT(de->td_node != NULL);
   1809 	KASSERT(de->td_node->tn_vnode == vp);
   1810 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   1811 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
   1812 
   1813 	error = tmpfs_remove_check_possible(dnode, de->td_node);
   1814 	if (error)
   1815 		return error;
   1816 
   1817 	error = tmpfs_remove_check_permitted(cred, dnode, de->td_node);
   1818 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, vp, dvp,
   1819 	    error);
   1820 	if (error)
   1821 		return error;
   1822 
   1823 	tmpfs_dir_detach(dvp, de);
   1824 	tmpfs_free_dirent(tmpfs, de);
   1825 
   1826 	return 0;
   1827 }
   1828 
   1829 /*
   1830  * Check whether a rename is possible independent of credentials.
   1831  *
   1832  * Everything must be locked and referenced.
   1833  */
   1834 static int
   1835 tmpfs_rename_check_possible(
   1836     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
   1837     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
   1838 {
   1839 
   1840 	KASSERT(fdnode != NULL);
   1841 	KASSERT(fnode != NULL);
   1842 	KASSERT(tdnode != NULL);
   1843 	KASSERT(fdnode != fnode);
   1844 	KASSERT(tdnode != tnode);
   1845 	KASSERT(fnode != tnode);
   1846 	KASSERT(fdnode->tn_vnode != NULL);
   1847 	KASSERT(fnode->tn_vnode != NULL);
   1848 	KASSERT(tdnode->tn_vnode != NULL);
   1849 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
   1850 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
   1851 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
   1852 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
   1853 	KASSERT((tnode == NULL) ||
   1854 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
   1855 
   1856 	/*
   1857 	 * If fdnode is immutable, we can't write to it.  If fdnode is
   1858 	 * append-only, the only change we can make is to add entries
   1859 	 * to it.  If fnode is immutable, we can't change the links to
   1860 	 * it.  If fnode is append-only...well, this is what UFS does.
   1861 	 */
   1862 	if ((fdnode->tn_flags | fnode->tn_flags) & (IMMUTABLE | APPEND))
   1863 		return EPERM;
   1864 
   1865 	/*
   1866 	 * If tdnode is immutable, we can't write to it.  If tdnode is
   1867 	 * append-only, we can add entries, but we can't change
   1868 	 * existing entries.
   1869 	 */
   1870 	if (tdnode->tn_flags & (IMMUTABLE | (tnode? APPEND : 0)))
   1871 		return EPERM;
   1872 
   1873 	/*
   1874 	 * If tnode is immutable, we can't replace links to it.  If
   1875 	 * tnode is append-only...well, this is what UFS does.
   1876 	 */
   1877 	if (tnode != NULL) {
   1878 		KASSERT(tnode != NULL);
   1879 		if ((tnode->tn_flags & (IMMUTABLE | APPEND)) != 0)
   1880 			return EPERM;
   1881 	}
   1882 
   1883 	return 0;
   1884 }
   1885 
   1886 /*
   1887  * Check whether a rename is permitted given our credentials.
   1888  *
   1889  * Everything must be locked and referenced.
   1890  */
   1891 static int
   1892 tmpfs_rename_check_permitted(kauth_cred_t cred,
   1893     struct tmpfs_node *fdnode, struct tmpfs_node *fnode,
   1894     struct tmpfs_node *tdnode, struct tmpfs_node *tnode)
   1895 {
   1896 	int error;
   1897 
   1898 	KASSERT(fdnode != NULL);
   1899 	KASSERT(fnode != NULL);
   1900 	KASSERT(tdnode != NULL);
   1901 	KASSERT(fdnode != fnode);
   1902 	KASSERT(tdnode != tnode);
   1903 	KASSERT(fnode != tnode);
   1904 	KASSERT(fdnode->tn_vnode != NULL);
   1905 	KASSERT(fnode->tn_vnode != NULL);
   1906 	KASSERT(tdnode->tn_vnode != NULL);
   1907 	KASSERT((tnode == NULL) || (tnode->tn_vnode != NULL));
   1908 	KASSERT(VOP_ISLOCKED(fdnode->tn_vnode) == LK_EXCLUSIVE);
   1909 	KASSERT(VOP_ISLOCKED(fnode->tn_vnode) == LK_EXCLUSIVE);
   1910 	KASSERT(VOP_ISLOCKED(tdnode->tn_vnode) == LK_EXCLUSIVE);
   1911 	KASSERT((tnode == NULL) ||
   1912 	    (VOP_ISLOCKED(tnode->tn_vnode) == LK_EXCLUSIVE));
   1913 
   1914 	/*
   1915 	 * We need to remove or change an entry in the source directory.
   1916 	 */
   1917 	error = VOP_ACCESS(fdnode->tn_vnode, VWRITE, cred);
   1918 	if (error)
   1919 		return error;
   1920 
   1921 	/*
   1922 	 * If we are changing directories, then we need to write to the
   1923 	 * target directory to add or change an entry.  Also, if fnode
   1924 	 * is a directory, we need to write to it to change its `..'
   1925 	 * entry.
   1926 	 */
   1927 	if (fdnode != tdnode) {
   1928 		error = VOP_ACCESS(tdnode->tn_vnode, VWRITE, cred);
   1929 		if (error)
   1930 			return error;
   1931 		if (fnode->tn_type == VDIR) {
   1932 			error = VOP_ACCESS(fnode->tn_vnode, VWRITE, cred);
   1933 			if (error)
   1934 				return error;
   1935 		}
   1936 	}
   1937 
   1938 	error = tmpfs_check_sticky(cred, fdnode, fnode);
   1939 	if (error)
   1940 		return error;
   1941 
   1942 	error = tmpfs_check_sticky(cred, tdnode, tnode);
   1943 	if (error)
   1944 		return error;
   1945 
   1946 	return 0;
   1947 }
   1948 
   1949 /*
   1950  * Check whether removing node's entry in dnode is possible independent
   1951  * of credentials.
   1952  *
   1953  * Everything must be locked and referenced.
   1954  */
   1955 static int
   1956 tmpfs_remove_check_possible(struct tmpfs_node *dnode, struct tmpfs_node *node)
   1957 {
   1958 
   1959 	KASSERT(dnode != NULL);
   1960 	KASSERT(dnode->tn_vnode != NULL);
   1961 	KASSERT(node != NULL);
   1962 	KASSERT(dnode != node);
   1963 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
   1964 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
   1965 
   1966 	/*
   1967 	 * We want to delete the entry.  If dnode is immutable, we
   1968 	 * can't write to it to delete the entry.  If dnode is
   1969 	 * append-only, the only change we can make is to add entries,
   1970 	 * so we can't delete entries.  If node is immutable, we can't
   1971 	 * change the links to it, so we can't delete the entry.  If
   1972 	 * node is append-only...well, this is what UFS does.
   1973 	 */
   1974 	if ((dnode->tn_flags | node->tn_flags) & (IMMUTABLE | APPEND))
   1975 		return EPERM;
   1976 
   1977 	return 0;
   1978 }
   1979 
   1980 /*
   1981  * Check whether removing node's entry in dnode is permitted given our
   1982  * credentials.
   1983  *
   1984  * Everything must be locked and referenced.
   1985  */
   1986 static int
   1987 tmpfs_remove_check_permitted(kauth_cred_t cred,
   1988     struct tmpfs_node *dnode, struct tmpfs_node *node)
   1989 {
   1990 	int error;
   1991 
   1992 	KASSERT(dnode != NULL);
   1993 	KASSERT(dnode->tn_vnode != NULL);
   1994 	KASSERT(node != NULL);
   1995 	KASSERT(dnode != node);
   1996 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
   1997 	KASSERT(VOP_ISLOCKED(node->tn_vnode) == LK_EXCLUSIVE);
   1998 
   1999 	/*
   2000 	 * Check whether we are permitted to write to the source
   2001 	 * directory in order to delete an entry from it.
   2002 	 */
   2003 	error = VOP_ACCESS(dnode->tn_vnode, VWRITE, cred);
   2004 	if (error)
   2005 		return error;
   2006 
   2007 	error = tmpfs_check_sticky(cred, dnode, node);
   2008 	if (error)
   2009 		return error;
   2010 
   2011 	return 0;
   2012 }
   2013 
   2014 /*
   2015  * Check whether we may change an entry in a sticky directory.  If the
   2016  * directory is sticky, the user must own either the directory or, if
   2017  * it exists, the node, in order to change the entry.
   2018  *
   2019  * Everything must be locked and referenced.
   2020  */
   2021 static int
   2022 tmpfs_check_sticky(kauth_cred_t cred,
   2023     struct tmpfs_node *dnode, struct tmpfs_node *node)
   2024 {
   2025 
   2026 	KASSERT(dnode != NULL);
   2027 	KASSERT(dnode->tn_vnode != NULL);
   2028 	KASSERT(VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE);
   2029 	KASSERT((node == NULL) || (node->tn_vnode != NULL));
   2030 	KASSERT((node == NULL) ||
   2031 	    (VOP_ISLOCKED(dnode->tn_vnode) == LK_EXCLUSIVE));
   2032 
   2033 	if (node == NULL)
   2034 		return 0;
   2035 
   2036 	if (dnode->tn_mode & S_ISTXT) {
   2037 		if (kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE,
   2038 		    node->tn_vnode, dnode->tn_vnode, genfs_can_sticky(cred,
   2039 		    dnode->tn_uid, node->tn_uid)) != 0)
   2040 			return EPERM;
   2041 	}
   2042 
   2043 	return 0;
   2044 }
   2045 
   2046 int
   2047 tmpfs_mkdir(void *v)
   2048 {
   2049 	struct vop_mkdir_args /* {
   2050 		struct vnode		*a_dvp;
   2051 		struct vnode		**a_vpp;
   2052 		struct componentname	*a_cnp;
   2053 		struct vattr		*a_vap;
   2054 	} */ *ap = v;
   2055 	vnode_t *dvp = ap->a_dvp;
   2056 	vnode_t **vpp = ap->a_vpp;
   2057 	struct componentname *cnp = ap->a_cnp;
   2058 	struct vattr *vap = ap->a_vap;
   2059 
   2060 	KASSERT(vap->va_type == VDIR);
   2061 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
   2062 }
   2063 
   2064 int
   2065 tmpfs_rmdir(void *v)
   2066 {
   2067 	struct vop_rmdir_args /* {
   2068 		struct vnode		*a_dvp;
   2069 		struct vnode		*a_vp;
   2070 		struct componentname	*a_cnp;
   2071 	} */ *ap = v;
   2072 	vnode_t *dvp = ap->a_dvp;
   2073 	vnode_t *vp = ap->a_vp;
   2074 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
   2075 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
   2076 	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
   2077 	tmpfs_dirent_t *de;
   2078 	int error = 0;
   2079 
   2080 	KASSERT(VOP_ISLOCKED(dvp));
   2081 	KASSERT(VOP_ISLOCKED(vp));
   2082 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
   2083 
   2084 	/*
   2085 	 * Directories with more than two non-whiteout
   2086 	 * entries ('.' and '..') cannot be removed.
   2087 	 */
   2088 	if (node->tn_size > 0) {
   2089 		KASSERT(error == 0);
   2090 		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
   2091 			if (de->td_node != TMPFS_NODE_WHITEOUT) {
   2092 				error = ENOTEMPTY;
   2093 				break;
   2094 			}
   2095 		}
   2096 		if (error)
   2097 			goto out;
   2098 	}
   2099 
   2100 	/* Lookup the directory entry (check the cached hint first). */
   2101 	de = tmpfs_dir_cached(node);
   2102 	if (de == NULL) {
   2103 		struct componentname *cnp = ap->a_cnp;
   2104 		de = tmpfs_dir_lookup(dnode, cnp);
   2105 	}
   2106 	KASSERT(de && de->td_node == node);
   2107 
   2108 	/* Check flags to see if we are allowed to remove the directory. */
   2109 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
   2110 		error = EPERM;
   2111 		goto out;
   2112 	}
   2113 
   2114 	/* Decrement the link count for the virtual '.' entry. */
   2115 	node->tn_links--;
   2116 	node->tn_status |= TMPFS_NODE_STATUSALL;
   2117 
   2118 	/* Detach the directory entry from the directory. */
   2119 	tmpfs_dir_detach(dvp, de);
   2120 
   2121 	/* Purge the cache for parent. */
   2122 	cache_purge(dvp);
   2123 
   2124 	/*
   2125 	 * Destroy the directory entry or replace it with a whiteout.
   2126 	 * Note: the inode referred by it will not be destroyed
   2127 	 * until the vnode is reclaimed.
   2128 	 */
   2129 	if (ap->a_cnp->cn_flags & DOWHITEOUT)
   2130 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
   2131 	else
   2132 		tmpfs_free_dirent(tmp, de);
   2133 
   2134 	/* Destroy the whiteout entries from the node. */
   2135 	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
   2136 		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
   2137 		tmpfs_dir_detach(vp, de);
   2138 		tmpfs_free_dirent(tmp, de);
   2139 	}
   2140 
   2141 	KASSERT(node->tn_links == 0);
   2142 out:
   2143 	/* Release the nodes. */
   2144 	vput(dvp);
   2145 	vput(vp);
   2146 	return error;
   2147 }
   2148 
   2149 int
   2150 tmpfs_symlink(void *v)
   2151 {
   2152 	struct vop_symlink_args /* {
   2153 		struct vnode		*a_dvp;
   2154 		struct vnode		**a_vpp;
   2155 		struct componentname	*a_cnp;
   2156 		struct vattr		*a_vap;
   2157 		char			*a_target;
   2158 	} */ *ap = v;
   2159 	vnode_t *dvp = ap->a_dvp;
   2160 	vnode_t **vpp = ap->a_vpp;
   2161 	struct componentname *cnp = ap->a_cnp;
   2162 	struct vattr *vap = ap->a_vap;
   2163 	char *target = ap->a_target;
   2164 
   2165 	KASSERT(vap->va_type == VLNK);
   2166 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
   2167 }
   2168 
   2169 int
   2170 tmpfs_readdir(void *v)
   2171 {
   2172 	struct vop_readdir_args /* {
   2173 		struct vnode	*a_vp;
   2174 		struct uio	*a_uio;
   2175 		kauth_cred_t	a_cred;
   2176 		int		*a_eofflag;
   2177 		off_t		**a_cookies;
   2178 		int		*ncookies;
   2179 	} */ *ap = v;
   2180 	vnode_t *vp = ap->a_vp;
   2181 	struct uio *uio = ap->a_uio;
   2182 	int *eofflag = ap->a_eofflag;
   2183 	off_t **cookies = ap->a_cookies;
   2184 	int *ncookies = ap->a_ncookies;
   2185 	off_t startoff, cnt;
   2186 	tmpfs_node_t *node;
   2187 	int error;
   2188 
   2189 	KASSERT(VOP_ISLOCKED(vp));
   2190 
   2191 	/* This operation only makes sense on directory nodes. */
   2192 	if (vp->v_type != VDIR) {
   2193 		return ENOTDIR;
   2194 	}
   2195 	node = VP_TO_TMPFS_DIR(vp);
   2196 	startoff = uio->uio_offset;
   2197 	cnt = 0;
   2198 	if (node->tn_links == 0) {
   2199 		error = 0;
   2200 		goto out;
   2201 	}
   2202 
   2203 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
   2204 		error = tmpfs_dir_getdotdent(node, uio);
   2205 		if (error != 0) {
   2206 			if (error == -1)
   2207 				error = 0;
   2208 			goto out;
   2209 		}
   2210 		cnt++;
   2211 	}
   2212 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
   2213 		error = tmpfs_dir_getdotdotdent(node, uio);
   2214 		if (error != 0) {
   2215 			if (error == -1)
   2216 				error = 0;
   2217 			goto out;
   2218 		}
   2219 		cnt++;
   2220 	}
   2221 	error = tmpfs_dir_getdents(node, uio, &cnt);
   2222 	if (error == -1) {
   2223 		error = 0;
   2224 	}
   2225 	KASSERT(error >= 0);
   2226 out:
   2227 	if (eofflag != NULL) {
   2228 		*eofflag = (!error && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
   2229 	}
   2230 	if (error || cookies == NULL || ncookies == NULL) {
   2231 		return error;
   2232 	}
   2233 
   2234 	/* Update NFS-related variables, if any. */
   2235 	off_t i, off = startoff;
   2236 	tmpfs_dirent_t *de = NULL;
   2237 
   2238 	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
   2239 	*ncookies = cnt;
   2240 
   2241 	for (i = 0; i < cnt; i++) {
   2242 		KASSERT(off != TMPFS_DIRCOOKIE_EOF);
   2243 		if (off != TMPFS_DIRCOOKIE_DOT) {
   2244 			if (off == TMPFS_DIRCOOKIE_DOTDOT) {
   2245 				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
   2246 			} else if (de != NULL) {
   2247 				de = TAILQ_NEXT(de, td_entries);
   2248 			} else {
   2249 				de = tmpfs_dir_lookupbycookie(node, off);
   2250 				KASSERT(de != NULL);
   2251 				de = TAILQ_NEXT(de, td_entries);
   2252 			}
   2253 			if (de == NULL) {
   2254 				off = TMPFS_DIRCOOKIE_EOF;
   2255 			} else {
   2256 				off = tmpfs_dircookie(de);
   2257 			}
   2258 		} else {
   2259 			off = TMPFS_DIRCOOKIE_DOTDOT;
   2260 		}
   2261 		(*cookies)[i] = off;
   2262 	}
   2263 	KASSERT(uio->uio_offset == off);
   2264 	return error;
   2265 }
   2266 
   2267 int
   2268 tmpfs_readlink(void *v)
   2269 {
   2270 	struct vop_readlink_args /* {
   2271 		struct vnode	*a_vp;
   2272 		struct uio	*a_uio;
   2273 		kauth_cred_t	a_cred;
   2274 	} */ *ap = v;
   2275 	vnode_t *vp = ap->a_vp;
   2276 	struct uio *uio = ap->a_uio;
   2277 	tmpfs_node_t *node;
   2278 	int error;
   2279 
   2280 	KASSERT(VOP_ISLOCKED(vp));
   2281 	KASSERT(uio->uio_offset == 0);
   2282 	KASSERT(vp->v_type == VLNK);
   2283 
   2284 	node = VP_TO_TMPFS_NODE(vp);
   2285 	error = uiomove(node->tn_spec.tn_lnk.tn_link,
   2286 	    MIN(node->tn_size, uio->uio_resid), uio);
   2287 	node->tn_status |= TMPFS_NODE_ACCESSED;
   2288 
   2289 	return error;
   2290 }
   2291 
   2292 int
   2293 tmpfs_inactive(void *v)
   2294 {
   2295 	struct vop_inactive_args /* {
   2296 		struct vnode *a_vp;
   2297 		bool *a_recycle;
   2298 	} */ *ap = v;
   2299 	vnode_t *vp = ap->a_vp;
   2300 	tmpfs_node_t *node;
   2301 
   2302 	KASSERT(VOP_ISLOCKED(vp));
   2303 
   2304 	node = VP_TO_TMPFS_NODE(vp);
   2305 	*ap->a_recycle = (node->tn_links == 0);
   2306 	VOP_UNLOCK(vp);
   2307 
   2308 	return 0;
   2309 }
   2310 
   2311 int
   2312 tmpfs_reclaim(void *v)
   2313 {
   2314 	struct vop_reclaim_args /* {
   2315 		struct vnode *a_vp;
   2316 	} */ *ap = v;
   2317 	vnode_t *vp = ap->a_vp;
   2318 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
   2319 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
   2320 	bool racing;
   2321 
   2322 	/* Disassociate inode from vnode. */
   2323 	mutex_enter(&node->tn_vlock);
   2324 	node->tn_vnode = NULL;
   2325 	vp->v_data = NULL;
   2326 	/* Check if tmpfs_vnode_get() is racing with us. */
   2327 	racing = TMPFS_NODE_RECLAIMING(node);
   2328 	mutex_exit(&node->tn_vlock);
   2329 
   2330 	/*
   2331 	 * If inode is not referenced, i.e. no links, then destroy it.
   2332 	 * Note: if racing - inode is about to get a new vnode, leave it.
   2333 	 */
   2334 	if (node->tn_links == 0 && !racing) {
   2335 		tmpfs_free_node(tmp, node);
   2336 	}
   2337 	return 0;
   2338 }
   2339 
   2340 int
   2341 tmpfs_pathconf(void *v)
   2342 {
   2343 	struct vop_pathconf_args /* {
   2344 		struct vnode	*a_vp;
   2345 		int		a_name;
   2346 		register_t	*a_retval;
   2347 	} */ *ap = v;
   2348 	const int name = ap->a_name;
   2349 	register_t *retval = ap->a_retval;
   2350 	int error = 0;
   2351 
   2352 	switch (name) {
   2353 	case _PC_LINK_MAX:
   2354 		*retval = LINK_MAX;
   2355 		break;
   2356 	case _PC_NAME_MAX:
   2357 		*retval = TMPFS_MAXNAMLEN;
   2358 		break;
   2359 	case _PC_PATH_MAX:
   2360 		*retval = PATH_MAX;
   2361 		break;
   2362 	case _PC_PIPE_BUF:
   2363 		*retval = PIPE_BUF;
   2364 		break;
   2365 	case _PC_CHOWN_RESTRICTED:
   2366 		*retval = 1;
   2367 		break;
   2368 	case _PC_NO_TRUNC:
   2369 		*retval = 1;
   2370 		break;
   2371 	case _PC_SYNC_IO:
   2372 		*retval = 1;
   2373 		break;
   2374 	case _PC_FILESIZEBITS:
   2375 		*retval = sizeof(off_t) * CHAR_BIT;
   2376 		break;
   2377 	default:
   2378 		error = EINVAL;
   2379 	}
   2380 	return error;
   2381 }
   2382 
   2383 int
   2384 tmpfs_advlock(void *v)
   2385 {
   2386 	struct vop_advlock_args /* {
   2387 		struct vnode	*a_vp;
   2388 		void *		a_id;
   2389 		int		a_op;
   2390 		struct flock	*a_fl;
   2391 		int		a_flags;
   2392 	} */ *ap = v;
   2393 	vnode_t *vp = ap->a_vp;
   2394 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
   2395 
   2396 	return lf_advlock(v, &node->tn_lockf, node->tn_size);
   2397 }
   2398 
   2399 int
   2400 tmpfs_getpages(void *v)
   2401 {
   2402 	struct vop_getpages_args /* {
   2403 		struct vnode *a_vp;
   2404 		voff_t a_offset;
   2405 		struct vm_page **a_m;
   2406 		int *a_count;
   2407 		int a_centeridx;
   2408 		vm_prot_t a_access_type;
   2409 		int a_advice;
   2410 		int a_flags;
   2411 	} */ * const ap = v;
   2412 	vnode_t *vp = ap->a_vp;
   2413 	const voff_t offset = ap->a_offset;
   2414 	struct vm_page **pgs = ap->a_m;
   2415 	const int centeridx = ap->a_centeridx;
   2416 	const vm_prot_t access_type = ap->a_access_type;
   2417 	const int advice = ap->a_advice;
   2418 	const int flags = ap->a_flags;
   2419 	int error, npages = *ap->a_count;
   2420 	tmpfs_node_t *node;
   2421 	struct uvm_object *uobj;
   2422 
   2423 	KASSERT(vp->v_type == VREG);
   2424 	KASSERT(mutex_owned(vp->v_interlock));
   2425 
   2426 	node = VP_TO_TMPFS_NODE(vp);
   2427 	uobj = node->tn_spec.tn_reg.tn_aobj;
   2428 
   2429 	/*
   2430 	 * Currently, PGO_PASTEOF is not supported.
   2431 	 */
   2432 	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
   2433 		if ((flags & PGO_LOCKED) == 0)
   2434 			mutex_exit(vp->v_interlock);
   2435 		return EINVAL;
   2436 	}
   2437 
   2438 	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
   2439 		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
   2440 	}
   2441 
   2442 	if ((flags & PGO_LOCKED) != 0)
   2443 		return EBUSY;
   2444 
   2445 	if ((flags & PGO_NOTIMESTAMP) == 0) {
   2446 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
   2447 			node->tn_status |= TMPFS_NODE_ACCESSED;
   2448 
   2449 		if ((access_type & VM_PROT_WRITE) != 0) {
   2450 			node->tn_status |= TMPFS_NODE_MODIFIED;
   2451 			if (vp->v_mount->mnt_flag & MNT_RELATIME)
   2452 				node->tn_status |= TMPFS_NODE_ACCESSED;
   2453 		}
   2454 	}
   2455 
   2456 	/*
   2457 	 * Invoke the pager.
   2458 	 *
   2459 	 * Clean the array of pages before.  XXX: PR/32166
   2460 	 * Note that vnode lock is shared with underlying UVM object.
   2461 	 */
   2462 	if (pgs) {
   2463 		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
   2464 	}
   2465 	KASSERT(vp->v_interlock == uobj->vmobjlock);
   2466 
   2467 	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
   2468 	    access_type, advice, flags | PGO_ALLPAGES);
   2469 
   2470 #if defined(DEBUG)
   2471 	if (!error && pgs) {
   2472 		for (int i = 0; i < npages; i++) {
   2473 			KASSERT(pgs[i] != NULL);
   2474 		}
   2475 	}
   2476 #endif
   2477 	return error;
   2478 }
   2479 
   2480 int
   2481 tmpfs_putpages(void *v)
   2482 {
   2483 	struct vop_putpages_args /* {
   2484 		struct vnode *a_vp;
   2485 		voff_t a_offlo;
   2486 		voff_t a_offhi;
   2487 		int a_flags;
   2488 	} */ * const ap = v;
   2489 	vnode_t *vp = ap->a_vp;
   2490 	const voff_t offlo = ap->a_offlo;
   2491 	const voff_t offhi = ap->a_offhi;
   2492 	const int flags = ap->a_flags;
   2493 	tmpfs_node_t *node;
   2494 	struct uvm_object *uobj;
   2495 	int error;
   2496 
   2497 	KASSERT(mutex_owned(vp->v_interlock));
   2498 
   2499 	if (vp->v_type != VREG) {
   2500 		mutex_exit(vp->v_interlock);
   2501 		return 0;
   2502 	}
   2503 
   2504 	node = VP_TO_TMPFS_NODE(vp);
   2505 	uobj = node->tn_spec.tn_reg.tn_aobj;
   2506 
   2507 	KASSERT(vp->v_interlock == uobj->vmobjlock);
   2508 	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
   2509 
   2510 	/* XXX mtime */
   2511 
   2512 	return error;
   2513 }
   2514 
   2515 int
   2516 tmpfs_whiteout(void *v)
   2517 {
   2518 	struct vop_whiteout_args /* {
   2519 		struct vnode		*a_dvp;
   2520 		struct componentname	*a_cnp;
   2521 		int			a_flags;
   2522 	} */ *ap = v;
   2523 	vnode_t *dvp = ap->a_dvp;
   2524 	struct componentname *cnp = ap->a_cnp;
   2525 	const int flags = ap->a_flags;
   2526 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
   2527 	tmpfs_dirent_t *de;
   2528 	int error;
   2529 
   2530 	switch (flags) {
   2531 	case LOOKUP:
   2532 		break;
   2533 	case CREATE:
   2534 		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
   2535 		    cnp->cn_namelen, &de);
   2536 		if (error)
   2537 			return error;
   2538 		tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
   2539 		break;
   2540 	case DELETE:
   2541 		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
   2542 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), cnp);
   2543 		if (de == NULL)
   2544 			return ENOENT;
   2545 		tmpfs_dir_detach(dvp, de);
   2546 		tmpfs_free_dirent(tmp, de);
   2547 		break;
   2548 	}
   2549 	return 0;
   2550 }
   2551 
   2552 int
   2553 tmpfs_print(void *v)
   2554 {
   2555 	struct vop_print_args /* {
   2556 		struct vnode	*a_vp;
   2557 	} */ *ap = v;
   2558 	vnode_t *vp = ap->a_vp;
   2559 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
   2560 
   2561 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
   2562 	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX ", status 0x%x",
   2563 	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
   2564 	    node->tn_gid, (uintmax_t)node->tn_size, node->tn_status);
   2565 	if (vp->v_type == VFIFO) {
   2566 		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
   2567 	}
   2568 	printf("\n");
   2569 	return 0;
   2570 }
   2571