Home | History | Annotate | Line # | Download | only in tmpfs
tmpfs_subr.c revision 1.9
      1 /*	$NetBSD: tmpfs_subr.c,v 1.9 2005/09/27 20:35:33 jmmv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2005 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
      9  * 2005 program.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *        This product includes software developed by the NetBSD
     22  *        Foundation, Inc. and its contributors.
     23  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24  *    contributors may be used to endorse or promote products derived
     25  *    from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 /*
     41  * Efficient memory file system supporting functions.
     42  */
     43 
     44 #include <sys/cdefs.h>
     45 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.9 2005/09/27 20:35:33 jmmv Exp $");
     46 
     47 #include <sys/param.h>
     48 #include <sys/dirent.h>
     49 #include <sys/event.h>
     50 #include <sys/malloc.h>
     51 #include <sys/mount.h>
     52 #include <sys/namei.h>
     53 #include <sys/time.h>
     54 #include <sys/stat.h>
     55 #include <sys/systm.h>
     56 #include <sys/swap.h>
     57 #include <sys/vnode.h>
     58 
     59 #include <uvm/uvm.h>
     60 
     61 #include <miscfs/specfs/specdev.h>
     62 #include <fs/tmpfs/tmpfs.h>
     63 #include <fs/tmpfs/tmpfs_fifoops.h>
     64 #include <fs/tmpfs/tmpfs_specops.h>
     65 #include <fs/tmpfs/tmpfs_vnops.h>
     66 
     67 /* --------------------------------------------------------------------- */
     68 
     69 /*
     70  * Allocates a new node of type 'type' inside the 'tmp' mount point, with
     71  * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
     72  * using the credentials of the process 'p'.
     73  *
     74  * If the node type is set to 'VDIR', then the parent parameter must point
     75  * to the parent directory of the node being created.  It may only be NULL
     76  * while allocating the root node.
     77  *
     78  * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
     79  * specifies the device the node represents.
     80  *
     81  * If the node type is set to 'VLNK', then the parameter target specifies
     82  * the file name of the target file for the symbolic link that is being
     83  * created.
     84  *
     85  * Note that new nodes are retrieved from the available list if it has
     86  * items or, if it is empty, from the node pool as long as there is enough
     87  * space to create them.
     88  *
     89  * Returns zero on success or an appropriate error code on failure.
     90  */
     91 int
     92 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
     93     uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
     94     char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node)
     95 {
     96 	struct tmpfs_node *nnode;
     97 
     98 	/* If the root directory of the 'tmp' file system is not yet
     99 	 * allocated, this must be the request to do it. */
    100 	KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
    101 
    102 	KASSERT(IFF(type == VLNK, target != NULL));
    103 	KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
    104 
    105 	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
    106 
    107 	nnode = NULL;
    108 	if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
    109 		KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
    110 		if (tmp->tm_nodes_last == tmp->tm_nodes_max)
    111 			return ENOSPC;
    112 
    113 		nnode =
    114 		    (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
    115 		if (nnode == NULL)
    116 			return ENOSPC;
    117 		nnode->tn_id = tmp->tm_nodes_last++;
    118 		nnode->tn_gen = 0;
    119 	} else {
    120 		nnode = LIST_FIRST(&tmp->tm_nodes_avail);
    121 		LIST_REMOVE(nnode, tn_entries);
    122 		nnode->tn_gen++;
    123 	}
    124 	KASSERT(nnode != NULL);
    125 	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
    126 
    127 	/* Generic initialization. */
    128 	nnode->tn_type = type;
    129 	nnode->tn_size = 0;
    130 	nnode->tn_status = 0;
    131 	nnode->tn_flags = 0;
    132 	nnode->tn_links = 0;
    133 	(void)nanotime(&nnode->tn_atime);
    134 	nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
    135 	    nnode->tn_atime;
    136 	nnode->tn_uid = uid;
    137 	nnode->tn_gid = gid;
    138 	nnode->tn_mode = mode;
    139 	nnode->tn_vnode = NULL;
    140 
    141 	/* Type-specific initialization. */
    142 	switch (nnode->tn_type) {
    143 	case VBLK:
    144 	case VCHR:
    145 		nnode->tn_rdev = rdev;
    146 		break;
    147 
    148 	case VDIR:
    149 		TAILQ_INIT(&nnode->tn_dir);
    150 		nnode->tn_parent = (parent == NULL) ? nnode : parent;
    151 		nnode->tn_readdir_lastn = 0;
    152 		nnode->tn_readdir_lastp = NULL;
    153 		nnode->tn_links++;
    154 		nnode->tn_parent->tn_links++;
    155 		break;
    156 
    157 	case VFIFO:
    158 		/* FALLTHROUGH */
    159 	case VSOCK:
    160 		break;
    161 
    162 	case VLNK:
    163 		KASSERT(strlen(target) < MAXPATHLEN);
    164 		nnode->tn_size = strlen(target);
    165 		nnode->tn_link = tmpfs_str_pool_get(&tmp->tm_str_pool,
    166 		    nnode->tn_size, 0);
    167 		if (nnode->tn_link == NULL) {
    168 			nnode->tn_type = VNON;
    169 			tmpfs_free_node(tmp, nnode);
    170 			return ENOSPC;
    171 		}
    172 		memcpy(nnode->tn_link, target, nnode->tn_size);
    173 		break;
    174 
    175 	case VREG:
    176 		nnode->tn_aobj = uao_create(INT32_MAX - PAGE_SIZE, 0);
    177 		nnode->tn_aobj_pages = 0;
    178 		break;
    179 
    180 	default:
    181 		KASSERT(0);
    182 	}
    183 
    184 	*node = nnode;
    185 	return 0;
    186 }
    187 
    188 /* --------------------------------------------------------------------- */
    189 
    190 /*
    191  * Destroys the node pointed to by node from the file system 'tmp'.
    192  * If the node does not belong to the given mount point, the results are
    193  * unpredicted.
    194  *
    195  * If the node references a directory; no entries are allowed because
    196  * their removal could need a recursive algorithm, something forbidden in
    197  * kernel space.  Furthermore, there is not need to provide such
    198  * functionality (recursive removal) because the only primitives offered
    199  * to the user are the removal of empty directories and the deletion of
    200  * individual files.
    201  *
    202  * Note that nodes are not really deleted; in fact, when a node has been
    203  * allocated, it cannot be deleted during the whole life of the file
    204  * system.  Instead, they are moved to the available list and remain there
    205  * until reused.
    206  */
    207 void
    208 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
    209 {
    210 	ino_t id;
    211 	unsigned long gen;
    212 	size_t pages;
    213 
    214 	switch (node->tn_type) {
    215 	case VNON:
    216 		/* Do not do anything.  VNON is provided to let the
    217 		 * allocation routine clean itself easily by avoiding
    218 		 * duplicating code in it. */
    219 		/* FALLTHROUGH */
    220 	case VBLK:
    221 		/* FALLTHROUGH */
    222 	case VCHR:
    223 		/* FALLTHROUGH */
    224 	case VDIR:
    225 		/* FALLTHROUGH */
    226 	case VFIFO:
    227 		/* FALLTHROUGH */
    228 	case VSOCK:
    229 		pages = 0;
    230 		break;
    231 
    232 	case VLNK:
    233 		tmpfs_str_pool_put(&tmp->tm_str_pool, node->tn_link,
    234 		    node->tn_size);
    235 		pages = 0;
    236 		break;
    237 
    238 	case VREG:
    239 		if (node->tn_aobj != NULL)
    240 			uao_detach(node->tn_aobj);
    241 		pages = node->tn_aobj_pages;
    242 		break;
    243 
    244 	default:
    245 		KASSERT(0);
    246 		pages = 0; /* Shut up gcc when !DIAGNOSTIC. */
    247 		break;
    248 	}
    249 
    250 	tmp->tm_pages_used -= pages;
    251 
    252 	LIST_REMOVE(node, tn_entries);
    253 	id = node->tn_id;
    254 	gen = node->tn_gen;
    255 	memset(node, 0, sizeof(struct tmpfs_node));
    256 	node->tn_id = id;
    257 	node->tn_type = VNON;
    258 	node->tn_gen = gen;
    259 	LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
    260 }
    261 
    262 /* --------------------------------------------------------------------- */
    263 
    264 /*
    265  * Allocates a new directory entry for the node node with a name of name.
    266  * The new directory entry is returned in *de.
    267  *
    268  * The link count of node is increased by one to reflect the new object
    269  * referencing it.
    270  *
    271  * Returns zero on success or an appropriate error code on failure.
    272  */
    273 int
    274 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
    275     const char *name, uint16_t len, struct tmpfs_dirent **de)
    276 {
    277 	struct tmpfs_dirent *nde;
    278 
    279 	nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0);
    280 	if (nde == NULL)
    281 		return ENOSPC;
    282 
    283 	nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0);
    284 	if (nde->td_name == NULL) {
    285 		TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde);
    286 		return ENOSPC;
    287 	}
    288 	nde->td_namelen = len;
    289 	memcpy(nde->td_name, name, len);
    290 	nde->td_node = node;
    291 
    292 	node->tn_links++;
    293 	*de = nde;
    294 
    295 	return 0;
    296 }
    297 
    298 /* --------------------------------------------------------------------- */
    299 
    300 /*
    301  * Frees a directory entry.  It is the caller's responsibility to destroy
    302  * the node referenced by it if needed.
    303  *
    304  * The link count of node is decreased by one to reflect the removal of an
    305  * object that referenced it.  This only happens if 'node_exists' is true;
    306  * otherwise the function will not access the node referred to by the
    307  * directory entry, as it may already have been released from the outside.
    308  */
    309 void
    310 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
    311     boolean_t node_exists)
    312 {
    313 	if (node_exists) {
    314 		struct tmpfs_node *node;
    315 
    316 		node = de->td_node;
    317 
    318 		KASSERT(node->tn_links > 0);
    319 		node->tn_links--;
    320 	}
    321 
    322 	tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen);
    323 	TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de);
    324 }
    325 
    326 /* --------------------------------------------------------------------- */
    327 
    328 /*
    329  * Allocates a new vnode for the node node or returns a new reference to
    330  * an existing one if the node had already a vnode referencing it.  The
    331  * resulting locked vnode is returned in *vpp.
    332  *
    333  * Returns zero on success or an appropriate error code on failure.
    334  */
    335 int
    336 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
    337 {
    338 	int error;
    339 	struct vnode *nvp;
    340 	struct vnode *vp;
    341 
    342 	vp = NULL;
    343 
    344 	if (node->tn_vnode != NULL) {
    345 		vp = node->tn_vnode;
    346 		vget(vp, LK_EXCLUSIVE | LK_RETRY);
    347 		error = 0;
    348 		goto out;
    349 	}
    350 
    351 	/* Get a new vnode and associate it with our node. */
    352 	error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
    353 	if (error != 0)
    354 		goto out;
    355 	KASSERT(vp != NULL);
    356 
    357 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    358 	if (error != 0) {
    359 		vp->v_data = NULL;
    360 		ungetnewvnode(vp);
    361 		vp = NULL;
    362 		goto out;
    363 	}
    364 
    365 	vp->v_data = node;
    366 	vp->v_type = node->tn_type;
    367 
    368 	/* Type-specific initialization. */
    369 	switch (node->tn_type) {
    370 	case VBLK:
    371 		/* FALLTHROUGH */
    372 	case VCHR:
    373 		vp->v_op = tmpfs_specop_p;
    374 		nvp = checkalias(vp, node->tn_rdev, mp);
    375 		if (nvp != NULL) {
    376 			/* Discard unneeded vnode, but save its inode. */
    377 			nvp->v_data = vp->v_data;
    378 			vp->v_data = NULL;
    379 
    380 			/* XXX spec_vnodeops has no locking, so we have to
    381 			 * do it explicitly. */
    382 			VOP_UNLOCK(vp, 0);
    383 			vp->v_op = spec_vnodeop_p;
    384 			vp->v_flag &= ~VLOCKSWORK;
    385 			vrele(vp);
    386 			vgone(vp);
    387 
    388 			/* Reinitialize aliased node. */
    389 			vp = nvp;
    390 			error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    391 			if (error != 0) {
    392 				vp->v_data = NULL;
    393 				vp = NULL;
    394 				goto out;
    395 			}
    396 		}
    397 		break;
    398 
    399 	case VDIR:
    400 		vp->v_flag = node->tn_parent == node ? VROOT : 0;
    401 		break;
    402 
    403 	case VFIFO:
    404 		vp->v_op = tmpfs_fifoop_p;
    405 		break;
    406 
    407 	case VLNK:
    408 		/* FALLTHROUGH */
    409 	case VREG:
    410 		/* FALLTHROUGH */
    411 	case VSOCK:
    412 		break;
    413 
    414 	default:
    415 		KASSERT(0);
    416 	}
    417 
    418 	uvm_vnp_setsize(vp, node->tn_size);
    419 
    420 	error = 0;
    421 
    422 out:
    423 	*vpp = node->tn_vnode = vp;
    424 
    425 	KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
    426 	KASSERT(*vpp == node->tn_vnode);
    427 
    428 	return error;
    429 }
    430 
    431 /* --------------------------------------------------------------------- */
    432 
    433 /*
    434  * Destroys the association between the vnode vp and the node it
    435  * references.
    436  */
    437 void
    438 tmpfs_free_vp(struct vnode *vp)
    439 {
    440 	struct tmpfs_node *node;
    441 
    442 	node = VP_TO_TMPFS_NODE(vp);
    443 
    444 	node->tn_vnode = NULL;
    445 	vp->v_data = NULL;
    446 }
    447 
    448 /* --------------------------------------------------------------------- */
    449 
    450 /*
    451  * Allocates a new file of type 'type' and adds it to the parent directory
    452  * 'dvp'; this addition is done using the component name given in 'cnp'.
    453  * The ownership of the new file is automatically assigned based on the
    454  * credentials of the caller (through 'cnp'), the group is set based on
    455  * the parent directory and the mode is determined from the 'vap' argument.
    456  * If successful, *vpp holds a vnode to the newly created file and zero
    457  * is returned.  Otherwise *vpp is NULL and the function returns an
    458  * appropriate error code.
    459  */
    460 int
    461 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
    462     struct componentname *cnp, char *target)
    463 {
    464 	int error;
    465 	struct tmpfs_dirent *de;
    466 	struct tmpfs_mount *tmp;
    467 	struct tmpfs_node *dnode;
    468 	struct tmpfs_node *node;
    469 	struct tmpfs_node *parent;
    470 
    471 	KASSERT(VOP_ISLOCKED(dvp));
    472 	KASSERT(cnp->cn_flags & HASBUF);
    473 
    474 	tmp = VFS_TO_TMPFS(dvp->v_mount);
    475 	dnode = VP_TO_TMPFS_DIR(dvp);
    476 	*vpp = NULL;
    477 
    478 	/* If the entry we are creating is a directory, we cannot overflow
    479 	 * the number of links of its parent, because it will get a new
    480 	 * link. */
    481 	if (vap->va_type == VDIR) {
    482 		/* Ensure that we do not overflow the maximum number of links
    483 		 * imposed by the system. */
    484 		KASSERT(dnode->tn_links <= LINK_MAX);
    485 		if (dnode->tn_links == LINK_MAX) {
    486 			error = EMLINK;
    487 			goto out;
    488 		}
    489 
    490 		parent = dnode;
    491 	} else
    492 		parent = NULL;
    493 
    494 	/* Allocate a node that represents the new file. */
    495 	error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
    496 	    dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev,
    497 	    cnp->cn_proc, &node);
    498 	if (error != 0)
    499 		goto out;
    500 
    501 	/* Allocate a directory entry that points to the new file. */
    502 	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
    503 	    &de);
    504 	if (error != 0) {
    505 		tmpfs_free_node(tmp, node);
    506 		goto out;
    507 	}
    508 
    509 	/* Allocate a vnode for the new file. */
    510 	error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
    511 	if (error != 0) {
    512 		tmpfs_free_dirent(tmp, de, TRUE);
    513 		tmpfs_free_node(tmp, node);
    514 		goto out;
    515 	}
    516 
    517 	/* Now that all required items are allocated, we can proceed to
    518 	 * insert the new node into the directory, an operation that
    519 	 * cannot fail. */
    520 	tmpfs_dir_attach(dvp, de);
    521 	VN_KNOTE(dvp, NOTE_WRITE);
    522 
    523 out:
    524 	if (error != 0 || !(cnp->cn_flags & SAVESTART))
    525 		PNBUF_PUT(cnp->cn_pnbuf);
    526 	vput(dvp);
    527 
    528 	KASSERT(!VOP_ISLOCKED(dvp));
    529 	KASSERT(IFF(error == 0, *vpp != NULL));
    530 
    531 	return error;
    532 }
    533 
    534 /* --------------------------------------------------------------------- */
    535 
    536 /*
    537  * Attaches the directory entry de to the directory represented by vp.
    538  * Note that this does not change the link count of the node pointed by
    539  * the directory entry, as this is done by tmpfs_alloc_dirent.
    540  */
    541 void
    542 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
    543 {
    544 	struct tmpfs_node *dnode;
    545 
    546 	dnode = VP_TO_TMPFS_DIR(vp);
    547 
    548 	TAILQ_INSERT_TAIL(&dnode->tn_dir, de, td_entries);
    549 	dnode->tn_size += sizeof(struct tmpfs_dirent);
    550 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
    551 	    TMPFS_NODE_MODIFIED;
    552 	uvm_vnp_setsize(vp, dnode->tn_size);
    553 }
    554 
    555 /* --------------------------------------------------------------------- */
    556 
    557 /*
    558  * Detaches the directory entry de from the directory represented by vp.
    559  * Note that this does not change the link count of the node pointed by
    560  * the directory entry, as this is done by tmpfs_free_dirent.
    561  */
    562 void
    563 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
    564 {
    565 	struct tmpfs_node *dnode;
    566 
    567 	KASSERT(VOP_ISLOCKED(vp));
    568 
    569 	dnode = VP_TO_TMPFS_DIR(vp);
    570 
    571 	if (dnode->tn_readdir_lastp == de) {
    572 		dnode->tn_readdir_lastn = 0;
    573 		dnode->tn_readdir_lastp = NULL;
    574 	}
    575 
    576 	TAILQ_REMOVE(&dnode->tn_dir, de, td_entries);
    577 	dnode->tn_size -= sizeof(struct tmpfs_dirent);
    578 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
    579 	    TMPFS_NODE_MODIFIED;
    580 	uvm_vnp_setsize(vp, dnode->tn_size);
    581 }
    582 
    583 /* --------------------------------------------------------------------- */
    584 
    585 /*
    586  * Looks for a directory entry in the directory represented by node.
    587  * 'cnp' describes the name of the entry to look for.  Note that the .
    588  * and .. components are not allowed as they do not physically exist
    589  * within directories.
    590  *
    591  * Returns a pointer to the entry when found, otherwise NULL.
    592  */
    593 struct tmpfs_dirent *
    594 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
    595 {
    596 	boolean_t found;
    597 	struct tmpfs_dirent *de;
    598 
    599 	KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
    600 	KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
    601 	    cnp->cn_nameptr[1] == '.')));
    602 	TMPFS_VALIDATE_DIR(node);
    603 
    604 	node->tn_status |= TMPFS_NODE_ACCESSED;
    605 
    606 	found = 0;
    607 	TAILQ_FOREACH(de, &node->tn_dir, td_entries) {
    608 		KASSERT(cnp->cn_namelen < 0xffff);
    609 		if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
    610 		    memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
    611 			found = 1;
    612 			break;
    613 		}
    614 	}
    615 
    616 	return found ? de : NULL;
    617 }
    618 
    619 /* --------------------------------------------------------------------- */
    620 
    621 /*
    622  * Helper function for tmpfs_readdir.  Creates a '.' entry for the given
    623  * directory and returns it in the uio space.  The function returns 0
    624  * on success, -1 if there was not enough space in the uio structure to
    625  * hold the directory entry or an appropriate error code if another
    626  * error happens.
    627  */
    628 int
    629 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
    630 {
    631 	int error;
    632 	struct dirent dent;
    633 
    634 	TMPFS_VALIDATE_DIR(node);
    635 	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
    636 
    637 	dent.d_fileno = node->tn_id;
    638 	dent.d_type = DT_DIR;
    639 	dent.d_namlen = 1;
    640 	dent.d_name[0] = '.';
    641 	dent.d_name[1] = '\0';
    642 	dent.d_reclen = _DIRENT_SIZE(&dent);
    643 
    644 	if (dent.d_reclen > uio->uio_resid)
    645 		error = -1;
    646 	else {
    647 		error = uiomove(&dent, dent.d_reclen, uio);
    648 		if (error == 0)
    649 			uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
    650 	}
    651 
    652 	node->tn_status |= TMPFS_NODE_ACCESSED;
    653 
    654 	return error;
    655 }
    656 
    657 /* --------------------------------------------------------------------- */
    658 
    659 /*
    660  * Helper function for tmpfs_readdir.  Creates a '..' entry for the given
    661  * directory and returns it in the uio space.  The function returns 0
    662  * on success, -1 if there was not enough space in the uio structure to
    663  * hold the directory entry or an appropriate error code if another
    664  * error happens.
    665  */
    666 int
    667 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
    668 {
    669 	int error;
    670 	struct dirent dent;
    671 
    672 	TMPFS_VALIDATE_DIR(node);
    673 	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
    674 
    675 	dent.d_fileno = node->tn_parent->tn_id;
    676 	dent.d_type = DT_DIR;
    677 	dent.d_namlen = 2;
    678 	dent.d_name[0] = '.';
    679 	dent.d_name[1] = '.';
    680 	dent.d_name[2] = '\0';
    681 	dent.d_reclen = _DIRENT_SIZE(&dent);
    682 
    683 	if (dent.d_reclen > uio->uio_resid)
    684 		error = -1;
    685 	else {
    686 		error = uiomove(&dent, dent.d_reclen, uio);
    687 		if (error == 0) {
    688 			struct tmpfs_dirent *de;
    689 
    690 			de = TAILQ_FIRST(&node->tn_dir);
    691 			if (de == NULL)
    692 				uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
    693 			else
    694 				uio->uio_offset = TMPFS_DIRCOOKIE(de);
    695 		}
    696 	}
    697 
    698 	node->tn_status |= TMPFS_NODE_ACCESSED;
    699 
    700 	return error;
    701 }
    702 
    703 /* --------------------------------------------------------------------- */
    704 
    705 /*
    706  * Lookup a directory entry by its associated cookie.
    707  */
    708 struct tmpfs_dirent *
    709 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
    710 {
    711 	struct tmpfs_dirent *de;
    712 
    713 	if (cookie == node->tn_readdir_lastn &&
    714 	    node->tn_readdir_lastp != NULL) {
    715 		return node->tn_readdir_lastp;
    716 	}
    717 
    718 	TAILQ_FOREACH(de, &node->tn_dir, td_entries) {
    719 		if (TMPFS_DIRCOOKIE(de) == cookie) {
    720 			break;
    721 		}
    722 	}
    723 
    724 	return de;
    725 }
    726 
    727 /* --------------------------------------------------------------------- */
    728 
    729 /*
    730  * Helper function for tmpfs_readdir.  Returns as much directory entries
    731  * as can fit in the uio space.  The read starts at uio->uio_offset.
    732  * The function returns 0 on success, -1 if there was not enough space
    733  * in the uio structure to hold the directory entry or an appropriate
    734  * error code if another error happens.
    735  */
    736 int
    737 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
    738 {
    739 	int error;
    740 	off_t startcookie;
    741 	struct tmpfs_dirent *de;
    742 
    743 	TMPFS_VALIDATE_DIR(node);
    744 
    745 	/* Locate the first directory entry we have to return.  We have cached
    746 	 * the last readdir in the node, so use those values if appropriate.
    747 	 * Otherwise do a linear scan to find the requested entry. */
    748 	startcookie = uio->uio_offset;
    749 	KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT);
    750 	KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
    751 	if (startcookie == TMPFS_DIRCOOKIE_EOF) {
    752 		return 0;
    753 	} else {
    754 		de = tmpfs_dir_lookupbycookie(node, startcookie);
    755 	}
    756 	if (de == NULL) {
    757 		return EINVAL;
    758 	}
    759 
    760 	/* Read as much entries as possible; i.e., until we reach the end of
    761 	 * the directory or we exhaust uio space. */
    762 	do {
    763 		struct dirent d;
    764 
    765 		/* Create a dirent structure representing the current
    766 		 * tmpfs_node and fill it. */
    767 		d.d_fileno = de->td_node->tn_id;
    768 		switch (de->td_node->tn_type) {
    769 		case VBLK:
    770 			d.d_type = DT_BLK;
    771 			break;
    772 
    773 		case VCHR:
    774 			d.d_type = DT_CHR;
    775 			break;
    776 
    777 		case VDIR:
    778 			d.d_type = DT_DIR;
    779 			break;
    780 
    781 		case VFIFO:
    782 			d.d_type = DT_FIFO;
    783 			break;
    784 
    785 		case VLNK:
    786 			d.d_type = DT_LNK;
    787 			break;
    788 
    789 		case VREG:
    790 			d.d_type = DT_REG;
    791 			break;
    792 
    793 		case VSOCK:
    794 			d.d_type = DT_SOCK;
    795 			break;
    796 
    797 		default:
    798 			KASSERT(0);
    799 		}
    800 		d.d_namlen = de->td_namelen;
    801 		KASSERT(de->td_namelen < sizeof(d.d_name));
    802 		(void)memcpy(d.d_name, de->td_name, de->td_namelen);
    803 		d.d_name[de->td_namelen] = '\0';
    804 		d.d_reclen = _DIRENT_SIZE(&d);
    805 
    806 		/* Stop reading if the directory entry we are treating is
    807 		 * bigger than the amount of data that can be returned. */
    808 		if (d.d_reclen > uio->uio_resid) {
    809 			error = -1;
    810 			break;
    811 		}
    812 
    813 		/* Copy the new dirent structure into the output buffer and
    814 		 * advance pointers. */
    815 		error = uiomove(&d, d.d_reclen, uio);
    816 
    817 		(*cntp)++;
    818 		de = TAILQ_NEXT(de, td_entries);
    819 	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
    820 
    821 	/* Update the offset and cache. */
    822 	if (de == NULL) {
    823 		uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
    824 		node->tn_readdir_lastn = 0;
    825 		node->tn_readdir_lastp = NULL;
    826 	} else {
    827 		node->tn_readdir_lastn = uio->uio_offset = TMPFS_DIRCOOKIE(de);
    828 		node->tn_readdir_lastp = de;
    829 	}
    830 
    831 	node->tn_status |= TMPFS_NODE_ACCESSED;
    832 
    833 	return error;
    834 }
    835 
    836 /* --------------------------------------------------------------------- */
    837 
    838 /*
    839  * Resizes the aobj associated to the regular file pointed to by vp to
    840  * the size newsize.  'vp' must point to a vnode that represents a regular
    841  * file.  'newsize' must be positive.
    842  *
    843  * Returns zero on success or an appropriate error code on failure.
    844  */
    845 int
    846 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
    847 {
    848 	int error;
    849 	size_t newpages, oldpages;
    850 	struct tmpfs_mount *tmp;
    851 	struct tmpfs_node *node;
    852 
    853 	KASSERT(vp->v_type == VREG);
    854 	KASSERT(newsize >= 0);
    855 
    856 	node = VP_TO_TMPFS_NODE(vp);
    857 	tmp = VFS_TO_TMPFS(vp->v_mount);
    858 
    859 	/* Convert the old and new sizes to the number of pages needed to
    860 	 * store them.  It may happen that we do not need to do anything
    861 	 * because the last allocated page can accommodate the change on
    862 	 * its own. */
    863 	oldpages = round_page(node->tn_size) / PAGE_SIZE;
    864 	KASSERT(oldpages == node->tn_aobj_pages);
    865 	newpages = round_page(newsize) / PAGE_SIZE;
    866 
    867 	if (newpages > oldpages &&
    868 	    newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
    869 		error = ENOSPC;
    870 		goto out;
    871 	}
    872 
    873 	node->tn_aobj_pages = newpages;
    874 
    875 	tmp->tm_pages_used += (newpages - oldpages);
    876 	node->tn_size = newsize;
    877 	uvm_vnp_setsize(vp, newsize);
    878 
    879 	error = 0;
    880 
    881 out:
    882 	return error;
    883 }
    884 
    885 /* --------------------------------------------------------------------- */
    886 
    887 /*
    888  * Returns information about the number of available memory pages,
    889  * including physical and virtual ones.
    890  *
    891  * If 'total' is TRUE, the value returned is the total amount of memory
    892  * pages configured for the system (either in use or free).
    893  * If it is FALSE, the value returned is the amount of free memory pages.
    894  *
    895  * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
    896  * excessive memory usage.
    897  *
    898  * XXX: This function is used every time TMPFS_PAGES_MAX is called to gather
    899  * the amount of free memory, something that happens during _each_
    900  * object allocation.  The time it takes to run this function so many
    901  * times is not negligible, so this value should be stored as an
    902  * aggregate somewhere, possibly within UVM (we cannot do it ourselves
    903  * because we can't get notifications on memory usage changes).
    904  */
    905 size_t
    906 tmpfs_mem_info(boolean_t total)
    907 {
    908 	int i, sec;
    909 	register_t retval;
    910 	size_t size;
    911 	struct swapent *sep;
    912 
    913 	sec = uvmexp.nswapdev;
    914 	sep = (struct swapent *)malloc(sizeof(struct swapent) * sec, M_TEMP,
    915 	    M_WAITOK);
    916 	KASSERT(sep != NULL);
    917 	uvm_swap_stats(SWAP_STATS, sep, sec, &retval);
    918 	KASSERT(retval == sec);
    919 
    920 	size = 0;
    921 	if (total) {
    922 		for (i = 0; i < sec; i++)
    923 			size += dbtob(sep[i].se_nblks) / PAGE_SIZE;
    924 	} else {
    925 		for (i = 0; i < sec; i++)
    926 			size += dbtob(sep[i].se_nblks - sep[i].se_inuse) /
    927 			    PAGE_SIZE;
    928 	}
    929 	size += uvmexp.free;
    930 
    931 	free(sep, M_TEMP);
    932 
    933 	return size;
    934 }
    935 
    936 /* --------------------------------------------------------------------- */
    937 
    938 /*
    939  * Change flags of the given vnode.
    940  * Caller should execute VOP_UPDATE on vp after a successful execution.
    941  * The vnode must be locked on entry and remain locked on exit.
    942  */
    943 int
    944 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
    945 {
    946 	int error;
    947 	struct tmpfs_node *node;
    948 
    949 	KASSERT(VOP_ISLOCKED(vp));
    950 
    951 	node = VP_TO_TMPFS_NODE(vp);
    952 
    953 	/* Disallow this operation if the file system is mounted read-only. */
    954 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
    955 		return EROFS;
    956 
    957 	/* XXX: The following comes from UFS code, and can be found in
    958 	 * several other file systems.  Shouldn't this be centralized
    959 	 * somewhere? */
    960 	if (cred->cr_uid != node->tn_uid &&
    961 	    (error = suser(cred, &p->p_acflag)))
    962 		return error;
    963 	if (cred->cr_uid == 0) {
    964 		/* The super-user is only allowed to change flags if the file
    965 		 * wasn't protected before and the securelevel is zero. */
    966 		if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) &&
    967 		    securelevel > 0)
    968 			return EPERM;
    969 		node->tn_flags = flags;
    970 	} else {
    971 		/* Regular users can change flags provided they only want to
    972 		 * change user-specific ones, not those reserved for the
    973 		 * super-user. */
    974 		if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) ||
    975 		    (flags & UF_SETTABLE) != flags)
    976 			return EPERM;
    977 		if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE))
    978 			return EPERM;
    979 		node->tn_flags &= SF_SETTABLE;
    980 		node->tn_flags |= (flags & UF_SETTABLE);
    981 	}
    982 
    983 	node->tn_status |= TMPFS_NODE_CHANGED;
    984 	VN_KNOTE(vp, NOTE_ATTRIB);
    985 
    986 	KASSERT(VOP_ISLOCKED(vp));
    987 
    988 	return 0;
    989 }
    990 
    991 /* --------------------------------------------------------------------- */
    992 
    993 /*
    994  * Change access mode on the given vnode.
    995  * Caller should execute VOP_UPDATE on vp after a successful execution.
    996  * The vnode must be locked on entry and remain locked on exit.
    997  */
    998 int
    999 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
   1000 {
   1001 	int error;
   1002 	struct tmpfs_node *node;
   1003 
   1004 	KASSERT(VOP_ISLOCKED(vp));
   1005 
   1006 	node = VP_TO_TMPFS_NODE(vp);
   1007 
   1008 	/* Disallow this operation if the file system is mounted read-only. */
   1009 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1010 		return EROFS;
   1011 
   1012 	/* Immutable or append-only files cannot be modified, either. */
   1013 	if (node->tn_flags & (IMMUTABLE | APPEND))
   1014 		return EPERM;
   1015 
   1016 	/* XXX: The following comes from UFS code, and can be found in
   1017 	 * several other file systems.  Shouldn't this be centralized
   1018 	 * somewhere? */
   1019 	if (cred->cr_uid != node->tn_uid &&
   1020 	    (error = suser(cred, &p->p_acflag)))
   1021 		return error;
   1022 	if (cred->cr_uid != 0) {
   1023 		if (vp->v_type != VDIR && (mode & S_ISTXT))
   1024 			return EFTYPE;
   1025 
   1026 		if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
   1027 			return EPERM;
   1028 	}
   1029 
   1030 	node->tn_mode = (mode & ALLPERMS);
   1031 
   1032 	node->tn_status |= TMPFS_NODE_CHANGED;
   1033 	VN_KNOTE(vp, NOTE_ATTRIB);
   1034 
   1035 	KASSERT(VOP_ISLOCKED(vp));
   1036 
   1037 	return 0;
   1038 }
   1039 
   1040 /* --------------------------------------------------------------------- */
   1041 
   1042 /*
   1043  * Change ownership of the given vnode.  At least one of uid or gid must
   1044  * be different than VNOVAL.  If one is set to that value, the attribute
   1045  * is unchanged.
   1046  * Caller should execute VOP_UPDATE on vp after a successful execution.
   1047  * The vnode must be locked on entry and remain locked on exit.
   1048  */
   1049 int
   1050 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
   1051     struct proc *p)
   1052 {
   1053 	int error;
   1054 	struct tmpfs_node *node;
   1055 
   1056 	KASSERT(VOP_ISLOCKED(vp));
   1057 
   1058 	node = VP_TO_TMPFS_NODE(vp);
   1059 
   1060 	/* Assign default values if they are unknown. */
   1061 	KASSERT(uid != VNOVAL || gid != VNOVAL);
   1062 	if (uid == VNOVAL)
   1063 		uid = node->tn_uid;
   1064 	if (gid == VNOVAL)
   1065 		gid = node->tn_gid;
   1066 	KASSERT(uid != VNOVAL && gid != VNOVAL);
   1067 
   1068 	/* Disallow this operation if the file system is mounted read-only. */
   1069 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1070 		return EROFS;
   1071 
   1072 	/* Immutable or append-only files cannot be modified, either. */
   1073 	if (node->tn_flags & (IMMUTABLE | APPEND))
   1074 		return EPERM;
   1075 
   1076 	/* XXX: The following comes from UFS code, and can be found in
   1077 	 * several other file systems.  Shouldn't this be centralized
   1078 	 * somewhere? */
   1079 	if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
   1080 	    (gid != node->tn_gid && !(cred->cr_gid == node->tn_gid ||
   1081 	     groupmember(gid, cred)))) &&
   1082 	    ((error = suser(cred, &p->p_acflag)) != 0))
   1083 		return error;
   1084 
   1085 	node->tn_uid = uid;
   1086 	node->tn_gid = gid;
   1087 
   1088 	node->tn_status |= TMPFS_NODE_CHANGED;
   1089 	VN_KNOTE(vp, NOTE_ATTRIB);
   1090 
   1091 	KASSERT(VOP_ISLOCKED(vp));
   1092 
   1093 	return 0;
   1094 }
   1095 
   1096 /* --------------------------------------------------------------------- */
   1097 
   1098 /*
   1099  * Change size of the given vnode.
   1100  * Caller should execute VOP_UPDATE on vp after a successful execution.
   1101  * The vnode must be locked on entry and remain locked on exit.
   1102  */
   1103 int
   1104 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
   1105     struct proc *p)
   1106 {
   1107 	int error;
   1108 	struct tmpfs_node *node;
   1109 
   1110 	KASSERT(VOP_ISLOCKED(vp));
   1111 
   1112 	node = VP_TO_TMPFS_NODE(vp);
   1113 
   1114 	/* Decide whether this is a valid operation based on the file type. */
   1115 	error = 0;
   1116 	switch (vp->v_type) {
   1117 	case VDIR:
   1118 		return EISDIR;
   1119 
   1120 	case VLNK:
   1121 		/* FALLTHROUGH */
   1122 	case VREG:
   1123 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1124 			return EROFS;
   1125 		break;
   1126 
   1127 	case VBLK:
   1128 		/* FALLTHROUGH */
   1129 	case VCHR:
   1130 		/* FALLTHROUGH */
   1131 	case VSOCK:
   1132 		/* FALLTHROUGH */
   1133 	case VFIFO:
   1134 		/* Allow modifications of special files even if in the file
   1135 		 * system is mounted read-only (we are not modifying the
   1136 		 * files themselves, but the objects they represent). */
   1137 		break;
   1138 
   1139 	default:
   1140 		/* Anything else is unsupported. */
   1141 		return EINVAL;
   1142 	}
   1143 
   1144 	/* Immutable or append-only files cannot be modified, either. */
   1145 	if (node->tn_flags & (IMMUTABLE | APPEND))
   1146 		return EPERM;
   1147 
   1148 	error = VOP_TRUNCATE(vp, size, 0, cred, p);
   1149 	/* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
   1150 	 * for us, as will update tn_status; no need to do that here. */
   1151 
   1152 	KASSERT(VOP_ISLOCKED(vp));
   1153 
   1154 	return error;
   1155 }
   1156 
   1157 /* --------------------------------------------------------------------- */
   1158 
   1159 /*
   1160  * Change access and modification times of the given vnode.
   1161  * Caller should execute VOP_UPDATE on vp after a successful execution.
   1162  * The vnode must be locked on entry and remain locked on exit.
   1163  */
   1164 int
   1165 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
   1166     int vaflags, struct ucred *cred, struct proc *p)
   1167 {
   1168 	int error;
   1169 	struct tmpfs_node *node;
   1170 
   1171 	KASSERT(VOP_ISLOCKED(vp));
   1172 
   1173 	node = VP_TO_TMPFS_NODE(vp);
   1174 
   1175 	/* Disallow this operation if the file system is mounted read-only. */
   1176 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1177 		return EROFS;
   1178 
   1179 	/* Immutable or append-only files cannot be modified, either. */
   1180 	if (node->tn_flags & (IMMUTABLE | APPEND))
   1181 		return EPERM;
   1182 
   1183 	/* XXX: The following comes from UFS code, and can be found in
   1184 	 * several other file systems.  Shouldn't this be centralized
   1185 	 * somewhere? */
   1186 	if (cred->cr_uid != node->tn_uid &&
   1187 	    (error = suser(cred, &p->p_acflag)) &&
   1188 	    ((vaflags & VA_UTIMES_NULL) == 0 ||
   1189 	    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
   1190 		return error;
   1191 
   1192 	if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
   1193 		node->tn_status |= TMPFS_NODE_ACCESSED;
   1194 
   1195 	if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
   1196 		node->tn_status |= TMPFS_NODE_MODIFIED;
   1197 
   1198 	error = VOP_UPDATE(vp, atime, mtime, 0);
   1199 
   1200 	KASSERT(VOP_ISLOCKED(vp));
   1201 
   1202 	return error;
   1203 }
   1204