Home | History | Annotate | Line # | Download | only in tmpfs
tmpfs_subr.c revision 1.71
      1 /*	$NetBSD: tmpfs_subr.c,v 1.71 2011/05/29 22:29:06 rmind Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2005-2011 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
      9  * 2005 program, and by Mindaugas Rasiukevicius.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 /*
     34  * Efficient memory file system: interfaces for inode and directory entry
     35  * construction, destruction and manipulation.
     36  *
     37  * Reference counting
     38  *
     39  *	The link count of inode (tmpfs_node_t::tn_links) is used as a
     40  *	reference counter.  However, it has slightly different semantics.
     41  *
     42  *	For directories - link count represents directory entries, which
     43  *	refer to the directories.  In other words, it represents the count
     44  *	of sub-directories.  It also takes into account the virtual '.'
     45  *	entry (which has no real entry in the list).  For files - link count
     46  *	represents the hard links.  Since only empty directories can be
     47  *	removed - link count aligns the reference counting requirements
     48  *	enough.  Note: to check whether directory is not empty, the inode
     49  *	size (tmpfs_node_t::tn_size) can be used.
     50  *
     51  *	The inode itself, as an object, gathers its first reference when
     52  *	directory entry is attached via tmpfs_dir_attach(9).  For instance,
     53  *	after regular tmpfs_create(), a file would have a link count of 1,
     54  *	while directory after tmpfs_mkdir() would have 2 (due to '.').
     55  *
     56  * Reclamation
     57  *
     58  *	It should be noted that tmpfs inodes rely on a combination of vnode
     59  *	reference counting and link counting.  That is, an inode can only be
     60  *	destroyed if its associated vnode is inactive.  The destruction is
     61  *	done on vnode reclamation i.e. tmpfs_reclaim().  It should be noted
     62  *	that tmpfs_node_t::tn_links being 0 is a destruction criterion.
     63  *
     64  *	If an inode has references within the file system (tn_links > 0) and
     65  *	its inactive vnode gets reclaimed/recycled - then the association is
     66  *	broken in tmpfs_reclaim().  In such case, an inode will always pass
     67  *	tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode.
     68  *
     69  * Lock order
     70  *
     71  *	tmpfs_node_t::tn_vlock ->
     72  *		vnode_t::v_vlock ->
     73  *			vnode_t::v_interlock
     74  */
     75 
     76 #include <sys/cdefs.h>
     77 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.71 2011/05/29 22:29:06 rmind Exp $");
     78 
     79 #include <sys/param.h>
     80 #include <sys/dirent.h>
     81 #include <sys/event.h>
     82 #include <sys/kmem.h>
     83 #include <sys/mount.h>
     84 #include <sys/namei.h>
     85 #include <sys/time.h>
     86 #include <sys/stat.h>
     87 #include <sys/systm.h>
     88 #include <sys/vnode.h>
     89 #include <sys/kauth.h>
     90 #include <sys/atomic.h>
     91 
     92 #include <uvm/uvm.h>
     93 
     94 #include <miscfs/specfs/specdev.h>
     95 #include <miscfs/genfs/genfs.h>
     96 #include <fs/tmpfs/tmpfs.h>
     97 #include <fs/tmpfs/tmpfs_fifoops.h>
     98 #include <fs/tmpfs/tmpfs_specops.h>
     99 #include <fs/tmpfs/tmpfs_vnops.h>
    100 
    101 /*
    102  * tmpfs_alloc_node: allocate a new inode of a specified type and
    103  * insert it into the list of specified mount point.
    104  */
    105 int
    106 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid,
    107     mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node)
    108 {
    109 	tmpfs_node_t *nnode;
    110 
    111 	nnode = tmpfs_node_get(tmp);
    112 	if (nnode == NULL) {
    113 		return ENOSPC;
    114 	}
    115 
    116 	/* Initially, no references and no associations. */
    117 	nnode->tn_links = 0;
    118 	nnode->tn_vnode = NULL;
    119 	nnode->tn_dirent_hint = NULL;
    120 
    121 	/*
    122 	 * XXX Where the pool is backed by a map larger than (4GB *
    123 	 * sizeof(*nnode)), this may produce duplicate inode numbers
    124 	 * for applications that do not understand 64-bit ino_t.
    125 	 */
    126 	nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode));
    127 	nnode->tn_gen = TMPFS_NODE_GEN_MASK & arc4random();
    128 
    129 	/* Generic initialization. */
    130 	nnode->tn_type = type;
    131 	nnode->tn_size = 0;
    132 	nnode->tn_status = 0;
    133 	nnode->tn_flags = 0;
    134 	nnode->tn_lockf = NULL;
    135 
    136 	vfs_timestamp(&nnode->tn_atime);
    137 	nnode->tn_birthtime = nnode->tn_atime;
    138 	nnode->tn_ctime = nnode->tn_atime;
    139 	nnode->tn_mtime = nnode->tn_atime;
    140 
    141 	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
    142 	nnode->tn_uid = uid;
    143 	nnode->tn_gid = gid;
    144 	nnode->tn_mode = mode;
    145 
    146 	/* Type-specific initialization. */
    147 	switch (nnode->tn_type) {
    148 	case VBLK:
    149 	case VCHR:
    150 		/* Character/block special device. */
    151 		KASSERT(rdev != VNOVAL);
    152 		nnode->tn_spec.tn_dev.tn_rdev = rdev;
    153 		break;
    154 	case VDIR:
    155 		/* Directory. */
    156 		TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
    157 		nnode->tn_spec.tn_dir.tn_parent = NULL;
    158 		nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
    159 		nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
    160 
    161 		/* Extra link count for the virtual '.' entry. */
    162 		nnode->tn_links++;
    163 		break;
    164 	case VFIFO:
    165 	case VSOCK:
    166 		break;
    167 	case VLNK:
    168 		/* Symbolic link.  Target specifies the file name. */
    169 		KASSERT(target && strlen(target) < MAXPATHLEN);
    170 
    171 		nnode->tn_size = strlen(target);
    172 		if (nnode->tn_size == 0) {
    173 			nnode->tn_spec.tn_lnk.tn_link = NULL;
    174 			break;
    175 		}
    176 		nnode->tn_spec.tn_lnk.tn_link =
    177 		    tmpfs_strname_alloc(tmp, nnode->tn_size);
    178 		if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
    179 			tmpfs_node_put(tmp, nnode);
    180 			return ENOSPC;
    181 		}
    182 		memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
    183 		break;
    184 	case VREG:
    185 		/* Regular file.  Create an underlying UVM object. */
    186 		nnode->tn_spec.tn_reg.tn_aobj =
    187 		    uao_create(INT32_MAX - PAGE_SIZE, 0);
    188 		nnode->tn_spec.tn_reg.tn_aobj_pages = 0;
    189 		break;
    190 	default:
    191 		KASSERT(false);
    192 	}
    193 
    194 	mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE);
    195 
    196 	mutex_enter(&tmp->tm_lock);
    197 	LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries);
    198 	mutex_exit(&tmp->tm_lock);
    199 
    200 	*node = nnode;
    201 	return 0;
    202 }
    203 
    204 /*
    205  * tmpfs_free_node: remove the inode from a list in the mount point and
    206  * destroy the inode structures.
    207  */
    208 void
    209 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
    210 {
    211 	size_t objsz;
    212 
    213 	mutex_enter(&tmp->tm_lock);
    214 	LIST_REMOVE(node, tn_entries);
    215 	mutex_exit(&tmp->tm_lock);
    216 
    217 	switch (node->tn_type) {
    218 	case VLNK:
    219 		if (node->tn_size > 0) {
    220 			tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link,
    221 			    node->tn_size);
    222 		}
    223 		break;
    224 	case VREG:
    225 		/*
    226 		 * Calculate the size of inode data, decrease the used-memory
    227 		 * counter, and destroy the unerlying UVM object (if any).
    228 		 */
    229 		objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages;
    230 		if (objsz != 0) {
    231 			tmpfs_mem_decr(tmp, objsz);
    232 		}
    233 		if (node->tn_spec.tn_reg.tn_aobj != NULL) {
    234 			uao_detach(node->tn_spec.tn_reg.tn_aobj);
    235 		}
    236 		break;
    237 	case VDIR:
    238 		/*
    239 		 * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
    240 		 * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
    241 		 *     node == tmp->tm_root);
    242 		 */
    243 		break;
    244 	default:
    245 		break;
    246 	}
    247 
    248 	mutex_destroy(&node->tn_vlock);
    249 	tmpfs_node_put(tmp, node);
    250 }
    251 
    252 /*
    253  * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode.
    254  *
    255  * => Must be called with tmpfs_node_t::tn_vlock held.
    256  * => Returns vnode (*vpp) locked.
    257  */
    258 int
    259 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp)
    260 {
    261 	vnode_t *vp;
    262 	int error;
    263 again:
    264 	/* If there is already a vnode, try to reclaim it. */
    265 	if ((vp = node->tn_vnode) != NULL) {
    266 		atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT);
    267 		mutex_enter(&vp->v_interlock);
    268 		mutex_exit(&node->tn_vlock);
    269 		error = vget(vp, LK_EXCLUSIVE);
    270 		if (error == ENOENT) {
    271 			mutex_enter(&node->tn_vlock);
    272 			goto again;
    273 		}
    274 		atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
    275 		*vpp = vp;
    276 		return error;
    277 	}
    278 	if (TMPFS_NODE_RECLAIMING(node)) {
    279 		atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
    280 	}
    281 
    282 	/* Get a new vnode and associate it with our node. */
    283 	error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
    284 	if (error) {
    285 		mutex_exit(&node->tn_vlock);
    286 		return error;
    287 	}
    288 
    289 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    290 	vp->v_type = node->tn_type;
    291 
    292 	/* Type-specific initialization. */
    293 	switch (node->tn_type) {
    294 	case VBLK:
    295 	case VCHR:
    296 		vp->v_op = tmpfs_specop_p;
    297 		spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev);
    298 		break;
    299 	case VDIR:
    300 		vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ?
    301 		    VV_ROOT : 0;
    302 		break;
    303 	case VFIFO:
    304 		vp->v_op = tmpfs_fifoop_p;
    305 		break;
    306 	case VLNK:
    307 	case VREG:
    308 	case VSOCK:
    309 		break;
    310 	default:
    311 		KASSERT(false);
    312 	}
    313 
    314 	uvm_vnp_setsize(vp, node->tn_size);
    315 	vp->v_data = node;
    316 	node->tn_vnode = vp;
    317 	mutex_exit(&node->tn_vlock);
    318 
    319 	KASSERT(VOP_ISLOCKED(vp));
    320 	*vpp = vp;
    321 	return 0;
    322 }
    323 
    324 /*
    325  * tmpfs_alloc_file: allocate a new file of specified type and adds it
    326  * into the parent directory.
    327  *
    328  * => Credentials of the caller are used.
    329  */
    330 int
    331 tmpfs_alloc_file(vnode_t *dvp, vnode_t **vpp, struct vattr *vap,
    332     struct componentname *cnp, char *target)
    333 {
    334 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
    335 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node;
    336 	tmpfs_dirent_t *de;
    337 	int error;
    338 
    339 	KASSERT(VOP_ISLOCKED(dvp));
    340 	*vpp = NULL;
    341 
    342 	/* Check for the maximum number of links limit. */
    343 	if (vap->va_type == VDIR) {
    344 		/* Check for maximum links limit. */
    345 		if (dnode->tn_links == LINK_MAX) {
    346 			error = EMLINK;
    347 			goto out;
    348 		}
    349 		KASSERT(dnode->tn_links < LINK_MAX);
    350 	}
    351 
    352 	/* Allocate a node that represents the new file. */
    353 	error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred),
    354 	    dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node);
    355 	if (error)
    356 		goto out;
    357 
    358 	/* Allocate a directory entry that points to the new file. */
    359 	error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de);
    360 	if (error) {
    361 		tmpfs_free_node(tmp, node);
    362 		goto out;
    363 	}
    364 
    365 	/* Get a vnode for the new file. */
    366 	mutex_enter(&node->tn_vlock);
    367 	error = tmpfs_vnode_get(dvp->v_mount, node, vpp);
    368 	if (error) {
    369 		tmpfs_free_dirent(tmp, de);
    370 		tmpfs_free_node(tmp, node);
    371 		goto out;
    372 	}
    373 
    374 	/* Associate inode and attach the entry into the directory. */
    375 	tmpfs_dir_attach(dvp, de, node);
    376 out:
    377 	vput(dvp);
    378 	return error;
    379 }
    380 
    381 /*
    382  * tmpfs_alloc_dirent: allocates a new directory entry for the inode.
    383  * The directory entry contains a path name component.
    384  */
    385 int
    386 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len,
    387     tmpfs_dirent_t **de)
    388 {
    389 	tmpfs_dirent_t *nde;
    390 
    391 	nde = tmpfs_dirent_get(tmp);
    392 	if (nde == NULL)
    393 		return ENOSPC;
    394 
    395 	nde->td_name = tmpfs_strname_alloc(tmp, len);
    396 	if (nde->td_name == NULL) {
    397 		tmpfs_dirent_put(tmp, nde);
    398 		return ENOSPC;
    399 	}
    400 	nde->td_namelen = len;
    401 	memcpy(nde->td_name, name, len);
    402 
    403 	*de = nde;
    404 	return 0;
    405 }
    406 
    407 /*
    408  * tmpfs_free_dirent: free a directory entry.
    409  */
    410 void
    411 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de)
    412 {
    413 
    414 	/* KASSERT(de->td_node == NULL); */
    415 	tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
    416 	tmpfs_dirent_put(tmp, de);
    417 }
    418 
    419 /*
    420  * tmpfs_dir_attach: associate directory entry with a specified inode,
    421  * and attach the entry into the directory, specified by vnode.
    422  *
    423  * => Increases link count on the associated node.
    424  * => Increases link count on directory node, if our node is VDIR.
    425  *    It is caller's responsibility to check for the LINK_MAX limit.
    426  * => Triggers kqueue events here.
    427  */
    428 void
    429 tmpfs_dir_attach(vnode_t *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node)
    430 {
    431 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
    432 	int events = NOTE_WRITE;
    433 
    434 	KASSERT(VOP_ISLOCKED(dvp));
    435 
    436 	/* Associate directory entry and the inode. */
    437 	if (node != TMPFS_NODE_WHITEOUT) {
    438 		de->td_node = node;
    439 		KASSERT(node->tn_links < LINK_MAX);
    440 		node->tn_links++;
    441 
    442 		/* Save the hint (might overwrite). */
    443 		node->tn_dirent_hint = de;
    444 	}
    445 
    446 	/* Insert the entry to the directory (parent of inode). */
    447 	TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
    448 	dnode->tn_size += sizeof(tmpfs_dirent_t);
    449 	dnode->tn_status |= TMPFS_NODE_STATUSALL;
    450 	uvm_vnp_setsize(dvp, dnode->tn_size);
    451 
    452 	if (node != TMPFS_NODE_WHITEOUT && node->tn_type == VDIR) {
    453 		/* Set parent. */
    454 		KASSERT(node->tn_spec.tn_dir.tn_parent == NULL);
    455 		node->tn_spec.tn_dir.tn_parent = dnode;
    456 
    457 		/* Increase the link count of parent. */
    458 		KASSERT(dnode->tn_links < LINK_MAX);
    459 		dnode->tn_links++;
    460 		events |= NOTE_LINK;
    461 
    462 		TMPFS_VALIDATE_DIR(node);
    463 	}
    464 	VN_KNOTE(dvp, events);
    465 }
    466 
    467 /*
    468  * tmpfs_dir_detach: disassociate directory entry and its inode,
    469  * and detach the entry from the directory, specified by vnode.
    470  *
    471  * => Decreases link count on the associated node.
    472  * => Decreases the link count on directory node, if our node is VDIR.
    473  * => Triggers kqueue events here.
    474  */
    475 void
    476 tmpfs_dir_detach(vnode_t *dvp, tmpfs_dirent_t *de)
    477 {
    478 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
    479 	tmpfs_node_t *node = de->td_node;
    480 	int events = NOTE_WRITE;
    481 
    482 	KASSERT(VOP_ISLOCKED(dvp));
    483 
    484 	if (node != TMPFS_NODE_WHITEOUT) {
    485 		vnode_t *vp = node->tn_vnode;
    486 
    487 		KASSERT(VOP_ISLOCKED(vp));
    488 
    489 		/* Deassociate the inode and entry. */
    490 		de->td_node = NULL;
    491 		node->tn_dirent_hint = NULL;
    492 
    493 		KASSERT(node->tn_links > 0);
    494 		node->tn_links--;
    495 		if (node->tn_vnode) {
    496 			VN_KNOTE(node->tn_vnode,
    497 			    node->tn_links ? NOTE_LINK : NOTE_DELETE);
    498 		}
    499 
    500 		/* If directory - decrease the link count of parent. */
    501 		if (node->tn_type == VDIR) {
    502 			KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
    503 			node->tn_spec.tn_dir.tn_parent = NULL;
    504 
    505 			KASSERT(dnode->tn_links > 0);
    506 			dnode->tn_links--;
    507 			events |= NOTE_LINK;
    508 		}
    509 	}
    510 
    511 	/* Remove the entry from the directory. */
    512 	if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
    513 		dnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
    514 		dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
    515 	}
    516 	TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
    517 
    518 	dnode->tn_size -= sizeof(tmpfs_dirent_t);
    519 	dnode->tn_status |= TMPFS_NODE_STATUSALL;
    520 	uvm_vnp_setsize(dvp, dnode->tn_size);
    521 	VN_KNOTE(dvp, events);
    522 }
    523 
    524 /*
    525  * tmpfs_dir_lookup: find a directory entry in the specified inode.
    526  *
    527  * Note that the . and .. components are not allowed as they do not
    528  * physically exist within directories.
    529  */
    530 tmpfs_dirent_t *
    531 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp)
    532 {
    533 	const char *name = cnp->cn_nameptr;
    534 	const uint16_t nlen = cnp->cn_namelen;
    535 	tmpfs_dirent_t *de;
    536 
    537 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
    538 	KASSERT(nlen != 1 || !(name[0] == '.'));
    539 	KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.'));
    540 	TMPFS_VALIDATE_DIR(node);
    541 
    542 	TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
    543 		if (de->td_namelen != nlen)
    544 			continue;
    545 		if (memcmp(de->td_name, name, nlen) != 0)
    546 			continue;
    547 		break;
    548 	}
    549 	node->tn_status |= TMPFS_NODE_ACCESSED;
    550 	return de;
    551 }
    552 
    553 /*
    554  * tmpfs_dir_cached: get a cached directory entry if it is valid.  Used to
    555  * avoid unnecessary tmpds_dir_lookup().
    556  *
    557  * => The vnode must be locked.
    558  */
    559 tmpfs_dirent_t *
    560 tmpfs_dir_cached(tmpfs_node_t *node)
    561 {
    562 	tmpfs_dirent_t *de = node->tn_dirent_hint;
    563 
    564 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
    565 
    566 	if (de == NULL) {
    567 		return NULL;
    568 	}
    569 	KASSERT(de->td_node == node);
    570 
    571 	/*
    572 	 * Directories always have a valid hint.  For files, check if there
    573 	 * are any hard links.  If there are - hint might be invalid.
    574 	 */
    575 	return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de;
    576 }
    577 
    578 /*
    579  * tmpfs_dir_getdotdent: helper function for tmpfs_readdir.  Creates a
    580  * '.' entry for the given directory and returns it in the uio space.
    581  */
    582 int
    583 tmpfs_dir_getdotdent(tmpfs_node_t *node, struct uio *uio)
    584 {
    585 	struct dirent *dentp;
    586 	int error;
    587 
    588 	TMPFS_VALIDATE_DIR(node);
    589 	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
    590 
    591 	dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
    592 	dentp->d_fileno = node->tn_id;
    593 	dentp->d_type = DT_DIR;
    594 	dentp->d_namlen = 1;
    595 	dentp->d_name[0] = '.';
    596 	dentp->d_name[1] = '\0';
    597 	dentp->d_reclen = _DIRENT_SIZE(dentp);
    598 
    599 	if (dentp->d_reclen > uio->uio_resid)
    600 		error = -1;
    601 	else {
    602 		error = uiomove(dentp, dentp->d_reclen, uio);
    603 		if (error == 0)
    604 			uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
    605 	}
    606 	node->tn_status |= TMPFS_NODE_ACCESSED;
    607 	kmem_free(dentp, sizeof(struct dirent));
    608 	return error;
    609 }
    610 
    611 /*
    612  * tmpfs_dir_getdotdotdent: helper function for tmpfs_readdir.  Creates a
    613  * '..' entry for the given directory and returns it in the uio space.
    614  */
    615 int
    616 tmpfs_dir_getdotdotdent(tmpfs_node_t *node, struct uio *uio)
    617 {
    618 	struct dirent *dentp;
    619 	int error;
    620 
    621 	TMPFS_VALIDATE_DIR(node);
    622 	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
    623 
    624 	dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
    625 	dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
    626 	dentp->d_type = DT_DIR;
    627 	dentp->d_namlen = 2;
    628 	dentp->d_name[0] = '.';
    629 	dentp->d_name[1] = '.';
    630 	dentp->d_name[2] = '\0';
    631 	dentp->d_reclen = _DIRENT_SIZE(dentp);
    632 
    633 	if (dentp->d_reclen > uio->uio_resid)
    634 		error = -1;
    635 	else {
    636 		error = uiomove(dentp, dentp->d_reclen, uio);
    637 		if (error == 0) {
    638 			tmpfs_dirent_t *de;
    639 
    640 			de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
    641 			if (de == NULL)
    642 				uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
    643 			else
    644 				uio->uio_offset = tmpfs_dircookie(de);
    645 		}
    646 	}
    647 	node->tn_status |= TMPFS_NODE_ACCESSED;
    648 	kmem_free(dentp, sizeof(struct dirent));
    649 	return error;
    650 }
    651 
    652 /*
    653  * tmpfs_dir_lookupbycookie: lookup a directory entry by associated cookie.
    654  */
    655 tmpfs_dirent_t *
    656 tmpfs_dir_lookupbycookie(tmpfs_node_t *node, off_t cookie)
    657 {
    658 	tmpfs_dirent_t *de;
    659 
    660 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
    661 
    662 	if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn &&
    663 	    node->tn_spec.tn_dir.tn_readdir_lastp != NULL) {
    664 		return node->tn_spec.tn_dir.tn_readdir_lastp;
    665 	}
    666 	TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
    667 		if (tmpfs_dircookie(de) == cookie) {
    668 			break;
    669 		}
    670 	}
    671 	return de;
    672 }
    673 
    674 /*
    675  * tmpfs_dir_getdents: relper function for tmpfs_readdir.
    676  *
    677  * => Returns as much directory entries as can fit in the uio space.
    678  * => The read starts at uio->uio_offset.
    679  */
    680 int
    681 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio, off_t *cntp)
    682 {
    683 	tmpfs_dirent_t *de;
    684 	struct dirent *dentp;
    685 	off_t startcookie;
    686 	int error;
    687 
    688 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
    689 	TMPFS_VALIDATE_DIR(node);
    690 
    691 	/*
    692 	 * Locate the first directory entry we have to return.  We have cached
    693 	 * the last readdir in the node, so use those values if appropriate.
    694 	 * Otherwise do a linear scan to find the requested entry.
    695 	 */
    696 	startcookie = uio->uio_offset;
    697 	KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT);
    698 	KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
    699 	if (startcookie == TMPFS_DIRCOOKIE_EOF) {
    700 		return 0;
    701 	} else {
    702 		de = tmpfs_dir_lookupbycookie(node, startcookie);
    703 	}
    704 	if (de == NULL) {
    705 		return EINVAL;
    706 	}
    707 
    708 	/*
    709 	 * Read as much entries as possible; i.e., until we reach the end
    710 	 * of the directory or we exhaust uio space.
    711 	 */
    712 	dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
    713 	do {
    714 		/*
    715 		 * Create a dirent structure representing the current
    716 		 * inode and fill it.
    717 		 */
    718 		if (de->td_node == TMPFS_NODE_WHITEOUT) {
    719 			dentp->d_fileno = 1;
    720 			dentp->d_type = DT_WHT;
    721 		} else {
    722 			dentp->d_fileno = de->td_node->tn_id;
    723 			switch (de->td_node->tn_type) {
    724 			case VBLK:
    725 				dentp->d_type = DT_BLK;
    726 				break;
    727 			case VCHR:
    728 				dentp->d_type = DT_CHR;
    729 				break;
    730 			case VDIR:
    731 				dentp->d_type = DT_DIR;
    732 				break;
    733 			case VFIFO:
    734 				dentp->d_type = DT_FIFO;
    735 				break;
    736 			case VLNK:
    737 				dentp->d_type = DT_LNK;
    738 				break;
    739 			case VREG:
    740 				dentp->d_type = DT_REG;
    741 				break;
    742 			case VSOCK:
    743 				dentp->d_type = DT_SOCK;
    744 				break;
    745 			default:
    746 				KASSERT(false);
    747 			}
    748 		}
    749 		dentp->d_namlen = de->td_namelen;
    750 		KASSERT(de->td_namelen < sizeof(dentp->d_name));
    751 		memcpy(dentp->d_name, de->td_name, de->td_namelen);
    752 		dentp->d_name[de->td_namelen] = '\0';
    753 		dentp->d_reclen = _DIRENT_SIZE(dentp);
    754 
    755 		/* Stop reading if the directory entry we are treating is
    756 		 * bigger than the amount of data that can be returned. */
    757 		if (dentp->d_reclen > uio->uio_resid) {
    758 			error = -1;
    759 			break;
    760 		}
    761 
    762 		/*
    763 		 * Copy the new dirent structure into the output buffer and
    764 		 * advance pointers.
    765 		 */
    766 		error = uiomove(dentp, dentp->d_reclen, uio);
    767 
    768 		(*cntp)++;
    769 		de = TAILQ_NEXT(de, td_entries);
    770 	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
    771 
    772 	/* Update the offset and cache. */
    773 	if (de == NULL) {
    774 		uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
    775 		node->tn_spec.tn_dir.tn_readdir_lastn = 0;
    776 		node->tn_spec.tn_dir.tn_readdir_lastp = NULL;
    777 	} else {
    778 		node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset =
    779 		    tmpfs_dircookie(de);
    780 		node->tn_spec.tn_dir.tn_readdir_lastp = de;
    781 	}
    782 	node->tn_status |= TMPFS_NODE_ACCESSED;
    783 	kmem_free(dentp, sizeof(struct dirent));
    784 	return error;
    785 }
    786 
    787 /*
    788  * tmpfs_reg_resize: resize the underlying UVM object associated with the
    789  * specified regular file.
    790  */
    791 int
    792 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
    793 {
    794 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
    795 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
    796 	size_t newpages, oldpages;
    797 	off_t oldsize;
    798 
    799 	KASSERT(vp->v_type == VREG);
    800 	KASSERT(newsize >= 0);
    801 
    802 	oldsize = node->tn_size;
    803 	oldpages = round_page(oldsize) >> PAGE_SHIFT;
    804 	newpages = round_page(newsize) >> PAGE_SHIFT;
    805 	KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages);
    806 
    807 	if (newpages > oldpages) {
    808 		/* Increase the used-memory counter if getting extra pages. */
    809 		if (!tmpfs_mem_incr(tmp, (newpages - oldpages) << PAGE_SHIFT)) {
    810 			return ENOSPC;
    811 		}
    812 	} else if (newsize < oldsize) {
    813 		int zerolen = MIN(round_page(newsize), node->tn_size) - newsize;
    814 
    815 		/* Zero out the truncated part of the last page. */
    816 		uvm_vnp_zerorange(vp, newsize, zerolen);
    817 	}
    818 
    819 	node->tn_spec.tn_reg.tn_aobj_pages = newpages;
    820 	node->tn_size = newsize;
    821 	uvm_vnp_setsize(vp, newsize);
    822 
    823 	/*
    824 	 * Free "backing store".
    825 	 */
    826 	if (newpages < oldpages) {
    827 		struct uvm_object *uobj;
    828 
    829 		uobj = node->tn_spec.tn_reg.tn_aobj;
    830 
    831 		mutex_enter(&uobj->vmobjlock);
    832 		uao_dropswap_range(uobj, newpages, oldpages);
    833 		mutex_exit(&uobj->vmobjlock);
    834 
    835 		/* Decrease the used-memory counter. */
    836 		tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT);
    837 	}
    838 	if (newsize > oldsize) {
    839 		VN_KNOTE(vp, NOTE_EXTEND);
    840 	}
    841 	return 0;
    842 }
    843 
    844 /*
    845  * tmpfs_chflags: change flags of the given vnode.
    846  *
    847  * => Caller should perform tmpfs_update().
    848  */
    849 int
    850 tmpfs_chflags(vnode_t *vp, int flags, kauth_cred_t cred, lwp_t *l)
    851 {
    852 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
    853 	kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS;
    854 	int error, fs_decision = 0;
    855 
    856 	KASSERT(VOP_ISLOCKED(vp));
    857 
    858 	/* Disallow this operation if the file system is mounted read-only. */
    859 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
    860 		return EROFS;
    861 
    862 	if (kauth_cred_geteuid(cred) != node->tn_uid) {
    863 		fs_decision = EACCES;
    864 	}
    865 
    866 	/*
    867 	 * If the new flags have non-user flags that are different than
    868 	 * those on the node, we need special permission to change them.
    869 	 */
    870 	if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) {
    871 		action |= KAUTH_VNODE_WRITE_SYSFLAGS;
    872 		if (!fs_decision) {
    873 			fs_decision = EPERM;
    874 		}
    875 	}
    876 
    877 	/*
    878 	 * Indicate that this node's flags have system attributes in them if
    879 	 * that's the case.
    880 	 */
    881 	if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) {
    882 		action |= KAUTH_VNODE_HAS_SYSFLAGS;
    883 	}
    884 
    885 	error = kauth_authorize_vnode(cred, action, vp, NULL, fs_decision);
    886 	if (error)
    887 		return error;
    888 
    889 	/*
    890 	 * Set the flags. If we're not setting non-user flags, be careful not
    891 	 * to overwrite them.
    892 	 *
    893 	 * XXX: Can't we always assign here? if the system flags are different,
    894 	 *      the code above should catch attempts to change them without
    895 	 *      proper permissions, and if we're here it means it's okay to
    896 	 *      change them...
    897 	 */
    898 	if ((action & KAUTH_VNODE_WRITE_SYSFLAGS) == 0) {
    899 		/* Clear all user-settable flags and re-set them. */
    900 		node->tn_flags &= SF_SETTABLE;
    901 		node->tn_flags |= (flags & UF_SETTABLE);
    902 	} else {
    903 		node->tn_flags = flags;
    904 	}
    905 	node->tn_status |= TMPFS_NODE_CHANGED;
    906 	VN_KNOTE(vp, NOTE_ATTRIB);
    907 	return 0;
    908 }
    909 
    910 /*
    911  * tmpfs_chmod: change access mode on the given vnode.
    912  *
    913  * => Caller should perform tmpfs_update().
    914  */
    915 int
    916 tmpfs_chmod(vnode_t *vp, mode_t mode, kauth_cred_t cred, lwp_t *l)
    917 {
    918 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
    919 	int error;
    920 
    921 	KASSERT(VOP_ISLOCKED(vp));
    922 
    923 	/* Disallow this operation if the file system is mounted read-only. */
    924 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
    925 		return EROFS;
    926 
    927 	/* Immutable or append-only files cannot be modified, either. */
    928 	if (node->tn_flags & (IMMUTABLE | APPEND))
    929 		return EPERM;
    930 
    931 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp,
    932 	    NULL, genfs_can_chmod(vp, cred, node->tn_uid, node->tn_gid, mode));
    933 	if (error) {
    934 		return error;
    935 	}
    936 	node->tn_mode = (mode & ALLPERMS);
    937 	node->tn_status |= TMPFS_NODE_CHANGED;
    938 	VN_KNOTE(vp, NOTE_ATTRIB);
    939 	return 0;
    940 }
    941 
    942 /*
    943  * tmpfs_chown: change ownership of the given vnode.
    944  *
    945  * => At least one of uid or gid must be different than VNOVAL.
    946  * => Attribute is unchanged for VNOVAL case.
    947  * => Caller should perform tmpfs_update().
    948  */
    949 int
    950 tmpfs_chown(vnode_t *vp, uid_t uid, gid_t gid, kauth_cred_t cred, lwp_t *l)
    951 {
    952 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
    953 	int error;
    954 
    955 	KASSERT(VOP_ISLOCKED(vp));
    956 
    957 	/* Assign default values if they are unknown. */
    958 	KASSERT(uid != VNOVAL || gid != VNOVAL);
    959 	if (uid == VNOVAL) {
    960 		uid = node->tn_uid;
    961 	}
    962 	if (gid == VNOVAL) {
    963 		gid = node->tn_gid;
    964 	}
    965 
    966 	/* Disallow this operation if the file system is mounted read-only. */
    967 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
    968 		return EROFS;
    969 
    970 	/* Immutable or append-only files cannot be modified, either. */
    971 	if (node->tn_flags & (IMMUTABLE | APPEND))
    972 		return EPERM;
    973 
    974 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp,
    975 	    NULL, genfs_can_chown(vp, cred, node->tn_uid, node->tn_gid, uid,
    976 	    gid));
    977 	if (error) {
    978 		return error;
    979 	}
    980 	node->tn_uid = uid;
    981 	node->tn_gid = gid;
    982 	node->tn_status |= TMPFS_NODE_CHANGED;
    983 	VN_KNOTE(vp, NOTE_ATTRIB);
    984 	return 0;
    985 }
    986 
    987 /*
    988  * tmpfs_chsize: change size of the given vnode.
    989  */
    990 int
    991 tmpfs_chsize(vnode_t *vp, u_quad_t size, kauth_cred_t cred, lwp_t *l)
    992 {
    993 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
    994 
    995 	KASSERT(VOP_ISLOCKED(vp));
    996 
    997 	/* Decide whether this is a valid operation based on the file type. */
    998 	switch (vp->v_type) {
    999 	case VDIR:
   1000 		return EISDIR;
   1001 	case VREG:
   1002 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
   1003 			return EROFS;
   1004 		}
   1005 		break;
   1006 	case VBLK:
   1007 	case VCHR:
   1008 	case VFIFO:
   1009 		/*
   1010 		 * Allow modifications of special files even if in the file
   1011 		 * system is mounted read-only (we are not modifying the
   1012 		 * files themselves, but the objects they represent).
   1013 		 */
   1014 		return 0;
   1015 	default:
   1016 		return EOPNOTSUPP;
   1017 	}
   1018 
   1019 	/* Immutable or append-only files cannot be modified, either. */
   1020 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
   1021 		return EPERM;
   1022 	}
   1023 
   1024 	/* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */
   1025 	return tmpfs_truncate(vp, size);
   1026 }
   1027 
   1028 /*
   1029  * tmpfs_chtimes: change access and modification times for vnode.
   1030  */
   1031 int
   1032 tmpfs_chtimes(vnode_t *vp, const struct timespec *atime,
   1033     const struct timespec *mtime, const struct timespec *btime,
   1034     int vaflags, kauth_cred_t cred, lwp_t *l)
   1035 {
   1036 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
   1037 	int error;
   1038 
   1039 	KASSERT(VOP_ISLOCKED(vp));
   1040 
   1041 	/* Disallow this operation if the file system is mounted read-only. */
   1042 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1043 		return EROFS;
   1044 
   1045 	/* Immutable or append-only files cannot be modified, either. */
   1046 	if (node->tn_flags & (IMMUTABLE | APPEND))
   1047 		return EPERM;
   1048 
   1049 	error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL,
   1050 	    genfs_can_chtimes(vp, vaflags, node->tn_uid, cred));
   1051 	if (error)
   1052 		return error;
   1053 
   1054 	if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
   1055 		node->tn_status |= TMPFS_NODE_ACCESSED;
   1056 
   1057 	if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
   1058 		node->tn_status |= TMPFS_NODE_MODIFIED;
   1059 
   1060 	if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL)
   1061 		btime = NULL;
   1062 
   1063 	tmpfs_update(vp, atime, mtime, btime, 0);
   1064 	VN_KNOTE(vp, NOTE_ATTRIB);
   1065 	return 0;
   1066 }
   1067 
   1068 /*
   1069  * tmpfs_update: update timestamps, et al.
   1070  */
   1071 void
   1072 tmpfs_update(vnode_t *vp, const struct timespec *acc,
   1073     const struct timespec *mod, const struct timespec *birth, int flags)
   1074 {
   1075 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
   1076 	struct timespec nowtm;
   1077 
   1078 	/* KASSERT(VOP_ISLOCKED(vp)); */
   1079 
   1080 	if (flags & UPDATE_CLOSE) {
   1081 		/* XXX Need to do anything special? */
   1082 	}
   1083 	if ((node->tn_status & TMPFS_NODE_STATUSALL) == 0) {
   1084 		return;
   1085 	}
   1086 	if (birth != NULL) {
   1087 		node->tn_birthtime = *birth;
   1088 	}
   1089 	vfs_timestamp(&nowtm);
   1090 
   1091 	if (node->tn_status & TMPFS_NODE_ACCESSED) {
   1092 		node->tn_atime = acc ? *acc : nowtm;
   1093 	}
   1094 	if (node->tn_status & TMPFS_NODE_MODIFIED) {
   1095 		node->tn_mtime = mod ? *mod : nowtm;
   1096 	}
   1097 	if (node->tn_status & TMPFS_NODE_CHANGED) {
   1098 		node->tn_ctime = nowtm;
   1099 	}
   1100 
   1101 	node->tn_status &= ~TMPFS_NODE_STATUSALL;
   1102 }
   1103 
   1104 int
   1105 tmpfs_truncate(vnode_t *vp, off_t length)
   1106 {
   1107 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
   1108 	int error;
   1109 
   1110 	if (length < 0) {
   1111 		error = EINVAL;
   1112 		goto out;
   1113 	}
   1114 	if (node->tn_size == length) {
   1115 		error = 0;
   1116 		goto out;
   1117 	}
   1118 	error = tmpfs_reg_resize(vp, length);
   1119 	if (error == 0) {
   1120 		node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
   1121 	}
   1122 out:
   1123 	tmpfs_update(vp, NULL, NULL, NULL, 0);
   1124 	return error;
   1125 }
   1126