Home | History | Annotate | Line # | Download | only in tmpfs
tmpfs_subr.c revision 1.2
      1 /*	$NetBSD: tmpfs_subr.c,v 1.2 2005/09/10 22:28:57 jmmv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2005 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Julio M. Merino Vidal.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *        This product includes software developed by the NetBSD
     21  *        Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Efficient memory file system supporting functions.
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.2 2005/09/10 22:28:57 jmmv Exp $");
     45 
     46 #include <sys/param.h>
     47 #include <sys/dirent.h>
     48 #include <sys/event.h>
     49 #include <sys/malloc.h>
     50 #include <sys/mount.h>
     51 #include <sys/namei.h>
     52 #include <sys/time.h>
     53 #include <sys/stat.h>
     54 #include <sys/systm.h>
     55 #include <sys/swap.h>
     56 #include <sys/vnode.h>
     57 
     58 #include <uvm/uvm.h>
     59 
     60 #include <miscfs/specfs/specdev.h>
     61 #include <fs/tmpfs/tmpfs.h>
     62 #include <fs/tmpfs/tmpfs_fifoops.h>
     63 #include <fs/tmpfs/tmpfs_specops.h>
     64 #include <fs/tmpfs/tmpfs_vnops.h>
     65 
     66 /* --------------------------------------------------------------------- */
     67 
     68 int
     69 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
     70     uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
     71     char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node)
     72 {
     73 	struct timeval tv;
     74 	struct tmpfs_node *nnode;
     75 
     76 	/* If the root directory of the 'tmp' file system is not yet
     77 	 * allocated, this must be the request to do it. */
     78 	KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
     79 
     80 	KASSERT(IFF(type == VLNK, target != NULL));
     81 	KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
     82 
     83 	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
     84 
     85 	nnode = NULL;
     86 	if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
     87 		KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
     88 		if (tmp->tm_nodes_last == tmp->tm_nodes_max)
     89 			return ENOSPC;
     90 
     91 		nnode =
     92 		    (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
     93 		if (nnode == NULL)
     94 			return ENOSPC;
     95 		nnode->tn_id = tmp->tm_nodes_last++;
     96 		nnode->tn_gen = 0;
     97 	} else {
     98 		nnode = LIST_FIRST(&tmp->tm_nodes_avail);
     99 		LIST_REMOVE(nnode, tn_entries);
    100 		nnode->tn_gen++;
    101 	}
    102 	KASSERT(nnode != NULL);
    103 	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
    104 
    105 	/* Generic initialization. */
    106 	nnode->tn_type = type;
    107 	nnode->tn_size = 0;
    108 	nnode->tn_status = 0;
    109 	nnode->tn_flags = 0;
    110 	nnode->tn_links = 0;
    111 	microtime(&tv);
    112 	TIMEVAL_TO_TIMESPEC(&tv, &nnode->tn_atime);
    113 	nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
    114 	    nnode->tn_atime;
    115 	nnode->tn_uid = uid;
    116 	nnode->tn_gid = gid;
    117 	nnode->tn_mode = mode;
    118 	nnode->tn_vnode = NULL;
    119 
    120 	/* Type-specific initialization. */
    121 	switch (nnode->tn_type) {
    122 	case VBLK:
    123 	case VCHR:
    124 		nnode->tn_rdev = rdev;
    125 		break;
    126 
    127 	case VDIR:
    128 		TAILQ_INIT(&nnode->tn_dir);
    129 		nnode->tn_parent = (parent == NULL) ? nnode : parent;
    130 		nnode->tn_readdir_lastn = 0;
    131 		nnode->tn_readdir_lastp = NULL;
    132 		nnode->tn_links++;
    133 		nnode->tn_parent->tn_links++;
    134 		break;
    135 
    136 	case VFIFO:
    137 		/* FALLTHROUGH */
    138 	case VSOCK:
    139 		break;
    140 
    141 	case VLNK:
    142 		KASSERT(strlen(target) < MAXPATHLEN);
    143 		nnode->tn_link = tmpfs_str_pool_get(&tmp->tm_str_pool,
    144 		    strlen(target), 0);
    145 		if (nnode->tn_link == NULL) {
    146 			nnode->tn_type = VNON;
    147 			tmpfs_free_node(tmp, nnode);
    148 			return ENOSPC;
    149 		}
    150 		strcpy(nnode->tn_link, target);
    151 		nnode->tn_size = strlen(target);
    152 		break;
    153 
    154 	case VREG:
    155 		nnode->tn_aobj = NULL;
    156 		nnode->tn_aobj_pages = 0;
    157 		nnode->tn_va = 0;
    158 		break;
    159 
    160 	default:
    161 		KASSERT(0);
    162 	}
    163 
    164 	*node = nnode;
    165 	return 0;
    166 }
    167 
    168 /* --------------------------------------------------------------------- */
    169 
    170 void
    171 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
    172 {
    173 	ino_t id;
    174 	unsigned long gen;
    175 	size_t pages;
    176 
    177 	switch (node->tn_type) {
    178 	case VNON:
    179 		/* Do not do anything.  VNON is provided to let the
    180 		 * allocation routine clean itself easily by avoiding
    181 		 * duplicating code in it. */
    182 		/* FALLTHROUGH */
    183 	case VBLK:
    184 		/* FALLTHROUGH */
    185 	case VCHR:
    186 		/* FALLTHROUGH */
    187 	case VDIR:
    188 		/* FALLTHROUGH */
    189 	case VFIFO:
    190 		/* FALLTHROUGH */
    191 	case VSOCK:
    192 		pages = 0;
    193 		break;
    194 
    195 	case VLNK:
    196 		tmpfs_str_pool_put(&tmp->tm_str_pool, node->tn_link,
    197 		    strlen(node->tn_link));
    198 		pages = 0;
    199 		break;
    200 
    201 	case VREG:
    202 		if (node->tn_aobj != NULL)
    203 			uao_detach(node->tn_aobj);
    204 		pages = node->tn_aobj_pages;
    205 		break;
    206 
    207 	default:
    208 		KASSERT(0);
    209 		pages = 0; /* Shut up gcc when !DIAGNOSTIC. */
    210 		break;
    211 	}
    212 
    213 	tmp->tm_pages_used -= pages;
    214 
    215 	LIST_REMOVE(node, tn_entries);
    216 	id = node->tn_id;
    217 	gen = node->tn_gen;
    218 	memset(node, 0, sizeof(struct tmpfs_node));
    219 	node->tn_id = id;
    220 	node->tn_type = VNON;
    221 	node->tn_gen = gen;
    222 	LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
    223 }
    224 
    225 /* --------------------------------------------------------------------- */
    226 
    227 int
    228 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
    229     const char *name, uint16_t len, struct tmpfs_dirent **de)
    230 {
    231 	struct tmpfs_dirent *nde;
    232 
    233 	nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0);
    234 	if (nde == NULL)
    235 		return ENOSPC;
    236 
    237 	nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0);
    238 	if (nde->td_name == NULL) {
    239 		TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde);
    240 		return ENOSPC;
    241 	}
    242 	nde->td_namelen = len;
    243 	memcpy(nde->td_name, name, len);
    244 	nde->td_node = node;
    245 
    246 	node->tn_links++;
    247 	*de = nde;
    248 
    249 	return 0;
    250 }
    251 
    252 /* --------------------------------------------------------------------- */
    253 
    254 void
    255 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
    256     boolean_t node_exists)
    257 {
    258 	if (node_exists) {
    259 		struct tmpfs_node *node;
    260 
    261 		node = de->td_node;
    262 
    263 		KASSERT(node->tn_links > 0);
    264 		node->tn_links--;
    265 	}
    266 
    267 	tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen);
    268 	TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de);
    269 }
    270 
    271 /* --------------------------------------------------------------------- */
    272 
    273 int
    274 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
    275 {
    276 	int error;
    277 	struct vnode *nvp;
    278 	struct vnode *vp;
    279 
    280 	vp = NULL;
    281 
    282 	if (node->tn_vnode != NULL) {
    283 		vp = node->tn_vnode;
    284 		vget(vp, LK_EXCLUSIVE | LK_RETRY);
    285 		error = 0;
    286 		goto out;
    287 	}
    288 
    289 	/* Get a new vnode and associate it with our node. */
    290 	error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
    291 	if (error != 0)
    292 		goto out;
    293 	KASSERT(vp != NULL);
    294 
    295 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    296 	if (error != 0) {
    297 		vp->v_data = NULL;
    298 		ungetnewvnode(vp);
    299 		vp = NULL;
    300 		goto out;
    301 	}
    302 
    303 	vp->v_data = node;
    304 	vp->v_type = node->tn_type;
    305 
    306 	/* Type-specific initialization. */
    307 	switch (node->tn_type) {
    308 	case VBLK:
    309 		/* FALLTHROUGH */
    310 	case VCHR:
    311 		vp->v_op = tmpfs_specop_p;
    312 		nvp = checkalias(vp, node->tn_rdev, mp);
    313 		if (nvp != NULL) {
    314 			/* Discard unneeded vnode, but save its inode. */
    315 			nvp->v_data = vp->v_data;
    316 			vp->v_data = NULL;
    317 
    318 			/* XXX spec_vnodeops has no locking, so we have to
    319 			 * do it explicitly. */
    320 			VOP_UNLOCK(vp, 0);
    321 			vp->v_op = spec_vnodeop_p;
    322 			vp->v_flag &= ~VLOCKSWORK;
    323 			vrele(vp);
    324 			vgone(vp);
    325 
    326 			/* Reinitialize aliased node. */
    327 			vp = nvp;
    328 			error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    329 			if (error != 0) {
    330 				vp->v_data = NULL;
    331 				vp = NULL;
    332 				goto out;
    333 			}
    334 		}
    335 		break;
    336 
    337 	case VDIR:
    338 		vp->v_flag = node->tn_parent == node ? VROOT : 0;
    339 		break;
    340 
    341 	case VFIFO:
    342 		vp->v_op = tmpfs_fifoop_p;
    343 		break;
    344 
    345 	case VLNK:
    346 		/* FALLTHROUGH */
    347 	case VREG:
    348 		/* FALLTHROUGH */
    349 	case VSOCK:
    350 		break;
    351 
    352 	default:
    353 		KASSERT(0);
    354 	}
    355 
    356 	uvm_vnp_setsize(vp, node->tn_size);
    357 
    358 	error = 0;
    359 
    360 out:
    361 	*vpp = node->tn_vnode = vp;
    362 
    363 	KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
    364 	KASSERT(*vpp == node->tn_vnode);
    365 
    366 	return error;
    367 }
    368 
    369 /* --------------------------------------------------------------------- */
    370 
    371 void
    372 tmpfs_free_vp(struct vnode *vp)
    373 {
    374 	struct tmpfs_node *node;
    375 
    376 	node = VP_TO_TMPFS_NODE(vp);
    377 
    378 	node->tn_vnode = NULL;
    379 	vp->v_data = NULL;
    380 }
    381 
    382 /* --------------------------------------------------------------------- */
    383 
    384 /* Allocates a new file of type 'type' and adds it to the parent directory
    385  * 'dvp'; this addition is done using the component name given in 'cnp'.
    386  * The ownership of the new file is automatically assigned based on the
    387  * credentials of the caller (through 'cnp'), the group is set based on
    388  * the parent directory and the mode is determined from the 'vap' argument.
    389  * If successful, *vpp holds a vnode to the newly created file and zero
    390  * is returned.  Otherwise *vpp is NULL and the function returns an
    391  * appropriate error code .*/
    392 int
    393 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
    394     struct componentname *cnp, char *target)
    395 {
    396 	int error;
    397 	struct tmpfs_dirent *de;
    398 	struct tmpfs_mount *tmp;
    399 	struct tmpfs_node *dnode;
    400 	struct tmpfs_node *node;
    401 	struct tmpfs_node *parent;
    402 
    403 	KASSERT(VOP_ISLOCKED(dvp));
    404 	KASSERT(cnp->cn_flags & HASBUF);
    405 
    406 	tmp = VFS_TO_TMPFS(dvp->v_mount);
    407 	dnode = VP_TO_TMPFS_DIR(dvp);
    408 	*vpp = NULL;
    409 
    410 	/* If the entry we are creating is a directory, we cannot overflow
    411 	 * the number of links of its parent, because it will get a new
    412 	 * link. */
    413 	if (vap->va_type == VDIR) {
    414 		/* Ensure that we do not overflow the maximum number of links
    415 		 * imposed by the system. */
    416 		KASSERT(dnode->tn_links <= LINK_MAX);
    417 		if (dnode->tn_links == LINK_MAX) {
    418 			error = EMLINK;
    419 			goto out;
    420 		}
    421 
    422 		parent = dnode;
    423 	} else
    424 		parent = NULL;
    425 
    426 	/* Allocate a node that represents the new file. */
    427 	error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
    428 	    dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev,
    429 	    cnp->cn_proc, &node);
    430 	if (error != 0)
    431 		goto out;
    432 
    433 	/* Allocate a directory entry that points to the new file. */
    434 	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
    435 	    &de);
    436 	if (error != 0) {
    437 		tmpfs_free_node(tmp, node);
    438 		goto out;
    439 	}
    440 
    441 	/* Allocate a vnode for the new file. */
    442 	error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
    443 	if (error != 0) {
    444 		tmpfs_free_dirent(tmp, de, TRUE);
    445 		tmpfs_free_node(tmp, node);
    446 		goto out;
    447 	}
    448 
    449 	/* Now that all required items are allocated, we can proceed to
    450 	 * insert the new node into the directory, an operation that
    451 	 * cannot fail. */
    452 	tmpfs_dir_attach(dvp, de);
    453 	VN_KNOTE(dvp, NOTE_WRITE);
    454 
    455 out:
    456 	if (error != 0 || !(cnp->cn_flags & SAVESTART))
    457 		PNBUF_PUT(cnp->cn_pnbuf);
    458 	vput(dvp);
    459 
    460 	KASSERT(!VOP_ISLOCKED(dvp));
    461 	KASSERT(IFF(error == 0, *vpp != NULL));
    462 
    463 	return error;
    464 }
    465 
    466 /* --------------------------------------------------------------------- */
    467 
    468 void
    469 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
    470 {
    471 	struct tmpfs_node *dnode;
    472 
    473 	dnode = VP_TO_TMPFS_DIR(vp);
    474 
    475 	TAILQ_INSERT_TAIL(&dnode->tn_dir, de, td_entries);
    476 	dnode->tn_size += sizeof(struct tmpfs_dirent);
    477 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
    478 	    TMPFS_NODE_MODIFIED;
    479 	uvm_vnp_setsize(vp, dnode->tn_size);
    480 }
    481 
    482 /* --------------------------------------------------------------------- */
    483 
    484 void
    485 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
    486 {
    487 	struct tmpfs_node *dnode;
    488 
    489 	dnode = VP_TO_TMPFS_DIR(vp);
    490 
    491 	TAILQ_REMOVE(&dnode->tn_dir, de, td_entries);
    492 	dnode->tn_size -= sizeof(struct tmpfs_dirent);
    493 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
    494 	    TMPFS_NODE_MODIFIED;
    495 	uvm_vnp_setsize(vp, dnode->tn_size);
    496 }
    497 
    498 /* --------------------------------------------------------------------- */
    499 
    500 struct tmpfs_dirent *
    501 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
    502 {
    503 	boolean_t found;
    504 	struct tmpfs_dirent *de;
    505 
    506 	KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
    507 	KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
    508 	    cnp->cn_nameptr[1] == '.')));
    509 	TMPFS_VALIDATE_DIR(node);
    510 
    511 	node->tn_status |= TMPFS_NODE_ACCESSED;
    512 
    513 	found = 0;
    514 	TAILQ_FOREACH(de, &node->tn_dir, td_entries) {
    515 		KASSERT(cnp->cn_namelen < 0xffff);
    516 		if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
    517 		    memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
    518 			found = 1;
    519 			break;
    520 		}
    521 	}
    522 
    523 	return found ? de : NULL;
    524 }
    525 
    526 /* --------------------------------------------------------------------- */
    527 
    528 /* Helper function for tmpfs_readdir.  Creates a '.' entry for the given
    529  * directory and returns it in the uio space.  The function returns 0
    530  * on success, -1 if there was not enough space in the uio structure to
    531  * hold the directory entry or an appropriate error code if another
    532  * error happens. */
    533 int
    534 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
    535 {
    536 	int error;
    537 	struct dirent dent;
    538 
    539 	TMPFS_VALIDATE_DIR(node);
    540 	KASSERT(uio->uio_offset == 0);
    541 
    542 	dent.d_fileno = node->tn_id;
    543 	dent.d_type = DT_DIR;
    544 	dent.d_namlen = 1;
    545 	dent.d_name[0] = '.';
    546 	dent.d_name[1] = '\0';
    547 	dent.d_reclen = _DIRENT_SIZE(&dent);
    548 
    549 	if (dent.d_reclen > uio->uio_resid)
    550 		error = -1;
    551 	else {
    552 		error = uiomove(&dent, dent.d_reclen, uio);
    553 		if (error == 0)
    554 			uio->uio_offset += sizeof(struct tmpfs_dirent) - \
    555 			    dent.d_reclen;
    556 	}
    557 
    558 	node->tn_status |= TMPFS_NODE_ACCESSED;
    559 
    560 	return error;
    561 }
    562 
    563 /* --------------------------------------------------------------------- */
    564 
    565 /* Helper function for tmpfs_readdir.  Creates a '..' entry for the given
    566  * directory and returns it in the uio space.  The function returns 0
    567  * on success, -1 if there was not enough space in the uio structure to
    568  * hold the directory entry or an appropriate error code if another
    569  * error happens. */
    570 int
    571 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
    572 {
    573 	int error;
    574 	struct dirent dent;
    575 
    576 	TMPFS_VALIDATE_DIR(node);
    577 	KASSERT(uio->uio_offset == sizeof(struct tmpfs_dirent));
    578 
    579 	dent.d_fileno = node->tn_id;
    580 	dent.d_type = DT_DIR;
    581 	dent.d_namlen = 2;
    582 	dent.d_name[0] = '.';
    583 	dent.d_name[1] = '.';
    584 	dent.d_name[2] = '\0';
    585 	dent.d_reclen = _DIRENT_SIZE(&dent);
    586 
    587 	if (dent.d_reclen > uio->uio_resid)
    588 		error = -1;
    589 	else {
    590 		error = uiomove(&dent, dent.d_reclen, uio);
    591 		if (error == 0)
    592 			uio->uio_offset += sizeof(struct tmpfs_dirent) - \
    593 			    dent.d_reclen;
    594 	}
    595 
    596 	node->tn_status |= TMPFS_NODE_ACCESSED;
    597 
    598 	return error;
    599 }
    600 
    601 /* --------------------------------------------------------------------- */
    602 
    603 /* Helper function for tmpfs_readdir.  Returns as much directory entries
    604  * as can fit in the uio space.  The read starts at uio->uio_offset.
    605  * The function returns 0 on success, -1 if there was not enough space
    606  * in the uio structure to hold the directory entry or an appropriate
    607  * error code if another error happens. */
    608 int
    609 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio)
    610 {
    611 	int error;
    612 	long cnt, startcnt;
    613 	struct tmpfs_dirent *de;
    614 
    615 	TMPFS_VALIDATE_DIR(node);
    616 	KASSERT(uio->uio_offset % sizeof(struct tmpfs_dirent) == 0);
    617 	KASSERT(uio->uio_offset >= sizeof(struct tmpfs_dirent) * 2);
    618 	KASSERT(uio->uio_offset < node->tn_size +
    619 	    sizeof(struct tmpfs_dirent) * 2);
    620 
    621 	/* Locate the first directory entry we have to return.  We have cached
    622 	 * the last readdir in the node, so use those values if appropriate.
    623 	 * Otherwise do a linear scan to find the requested entry. */
    624 	de = NULL;
    625 	startcnt = uio->uio_offset / sizeof(struct tmpfs_dirent) - 2;
    626 	if (startcnt == node->tn_readdir_lastn && \
    627 	    node->tn_readdir_lastp != NULL) {
    628 		cnt = node->tn_readdir_lastn;
    629 		de = node->tn_readdir_lastp;
    630 	} else {
    631 		cnt = 0;
    632 		de = TAILQ_FIRST(&node->tn_dir);
    633 		while (cnt < startcnt) {
    634 			cnt++;
    635 			de = TAILQ_NEXT(de, td_entries);
    636 
    637 			/* Ensure that if we have not found the desired item,
    638 			 * there are more entries in the directory to continue
    639 			 * the search. */
    640 			KASSERT(IMPLIES(de == TAILQ_LAST(&node->tn_dir,
    641 			    tmpfs_dir), cnt == startcnt));
    642 		}
    643 	}
    644 	KASSERT(cnt == startcnt);
    645 	KASSERT(de != NULL);
    646 
    647 	/* Read as much entries as possible; i.e., until we reach the end of
    648 	 * the directory or we exhaust uio space. */
    649 	do {
    650 		struct dirent d;
    651 
    652 		/* Create a dirent structure representing the current
    653 		 * tmpfs_node and fill it. */
    654 		d.d_fileno = de->td_node->tn_id;
    655 		switch (de->td_node->tn_type) {
    656 		case VBLK:
    657 			d.d_type = DT_BLK;
    658 			break;
    659 
    660 		case VCHR:
    661 			d.d_type = DT_CHR;
    662 			break;
    663 
    664 		case VDIR:
    665 			d.d_type = DT_DIR;
    666 			break;
    667 
    668 		case VFIFO:
    669 			d.d_type = DT_FIFO;
    670 			break;
    671 
    672 		case VLNK:
    673 			d.d_type = DT_LNK;
    674 			break;
    675 
    676 		case VREG:
    677 			d.d_type = DT_REG;
    678 			break;
    679 
    680 		case VSOCK:
    681 			d.d_type = DT_SOCK;
    682 			break;
    683 
    684 		default:
    685 			KASSERT(0);
    686 		}
    687 		d.d_namlen = de->td_namelen;
    688 		KASSERT(de->td_namelen < sizeof(d.d_name));
    689 		(void)memcpy(d.d_name, de->td_name, de->td_namelen);
    690 		d.d_name[de->td_namelen] = '\0';
    691 		d.d_reclen = _DIRENT_SIZE(&d);
    692 
    693 		/* Stop reading if the directory entry we are treating is
    694 		 * bigger than the amount of data that can be returned. */
    695 		if (d.d_reclen > uio->uio_resid) {
    696 			error = -1;
    697 			break;
    698 		}
    699 
    700 		/* Copy the new dirent structure into the output buffer and
    701 		 * advance pointers. */
    702 		error = uiomove(&d, d.d_reclen, uio);
    703 
    704 		cnt++;
    705 		de = TAILQ_NEXT(de, td_entries);
    706 	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
    707 
    708 	/* Update the offset in the uio structure to be correctly aligned
    709 	 * with tmpfs_dirent structures.  Otherwise, the offset is the
    710 	 * size of the returned dirent structures, which is useless for us. */
    711 	uio->uio_offset = (cnt + 2) * sizeof(struct tmpfs_dirent);
    712 
    713 	/* Cache the current status. */
    714 	if (de == NULL) {
    715 		KASSERT(cnt == node->tn_size / sizeof(struct tmpfs_dirent));
    716 		node->tn_readdir_lastn = 0;
    717 		node->tn_readdir_lastp = NULL;
    718 	} else {
    719 		node->tn_readdir_lastn = cnt;
    720 		node->tn_readdir_lastp = de;
    721 	}
    722 
    723 	node->tn_status |= TMPFS_NODE_ACCESSED;
    724 
    725 	return error;
    726 }
    727 
    728 /* --------------------------------------------------------------------- */
    729 
    730 int
    731 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
    732 {
    733 	int error;
    734 	size_t newpages, oldpages;
    735 	struct tmpfs_mount *tmp;
    736 	struct tmpfs_node *node;
    737 
    738 	KASSERT(vp->v_type == VREG);
    739 	KASSERT(newsize >= 0);
    740 	KASSERT(newsize != vp->v_size);
    741 
    742 	node = VP_TO_TMPFS_NODE(vp);
    743 	tmp = VFS_TO_TMPFS(vp->v_mount);
    744 
    745 	/* Convert the old and new sizes to the number of pages needed to
    746 	 * store them.  It may happen that we do not need to do anything
    747 	 * because the last allocated page can accommodate the change on
    748 	 * its own. */
    749 	oldpages = round_page(node->tn_size) / PAGE_SIZE;
    750 	KASSERT(oldpages == node->tn_aobj_pages);
    751 	newpages = round_page(newsize) / PAGE_SIZE;
    752 
    753 	if (newpages > oldpages &&
    754 	    newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
    755 		error = ENOSPC;
    756 		goto out;
    757 	}
    758 
    759 	if (newpages == 0) {
    760 		uao_detach(node->tn_aobj);
    761 		node->tn_aobj = NULL;
    762 		node->tn_aobj_pages = 0;
    763 		node->tn_va = 0;
    764 	} else if (newpages > oldpages) {
    765 		vaddr_t va;
    766 		struct uvm_object *aobj;
    767 
    768 		aobj = uao_create(newpages * PAGE_SIZE, 0);
    769 		va = vm_map_min(kernel_map);
    770 		error = uvm_map(kernel_map, &va, newpages * PAGE_SIZE,
    771 		    aobj, 0, 0,
    772 		    UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
    773 		    UVM_ADV_RANDOM, 0));
    774 		if (error != 0) {
    775 			uao_detach(aobj);
    776 			error = ENOSPC;
    777 			goto out;
    778 		}
    779 
    780 		/* XXX This is really expensive.  Is it possible to do a
    781 		 * map entry passing? */
    782 		if (node->tn_size > 0) {
    783 			KASSERT(node->tn_aobj != NULL);
    784 			(void)memcpy((void *)va, (void *)node->tn_va,
    785 			    node->tn_size);
    786 			uao_detach(node->tn_aobj);
    787 		}
    788 
    789 		node->tn_aobj = aobj;
    790 		node->tn_aobj_pages = newpages;
    791 		node->tn_va = va;
    792 	} else if (newpages < oldpages) {
    793 		/* XXX Do we need to shrink the aobj or is the unmap enough? */
    794 		uvm_unmap(kernel_map, node->tn_va + (vaddr_t)newpages,
    795 		    (vaddr_t)node->tn_aobj_pages * PAGE_SIZE);
    796 		node->tn_aobj_pages = newpages;
    797 	}
    798 
    799 	tmp->tm_pages_used += (newpages - oldpages);
    800 	node->tn_size = newsize;
    801 	uvm_vnp_setsize(vp, newsize);
    802 
    803 	error = 0;
    804 
    805 out:
    806 	return error;
    807 }
    808 
    809 /* --------------------------------------------------------------------- */
    810 
    811 /* Returns information about the number of available memory pages,
    812  * including physical and virtual ones.
    813  *
    814  * If 'total' is TRUE, the value returned is the total amount of memory
    815  * pages configured for the system (either in use or free).
    816  * If it is FALSE, the value returned is the amount of free memory pages.
    817  *
    818  * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
    819  * excessive memory usage.
    820  *
    821  * XXX: This function is used every time TMPFS_PAGES_MAX is called to gather
    822  * the amount of free memory, something that happens during _each_
    823  * object allocation.  The time it takes to run this function so many
    824  * times is not negligible, so this value should be stored as an
    825  * aggregate somewhere, possibly within UVM (we cannot do it ourselves
    826  * because we can't get notifications on memory usage changes). */
    827 size_t
    828 tmpfs_mem_info(boolean_t total)
    829 {
    830 	int i, sec;
    831 	register_t retval;
    832 	size_t size;
    833 	struct swapent *sep;
    834 
    835 	sec = uvmexp.nswapdev;
    836 	sep = (struct swapent *)malloc(sizeof(struct swapent) * sec, M_TEMP,
    837 	    M_WAITOK);
    838 	KASSERT(sep != NULL);
    839 	uvm_swap_stats(SWAP_STATS, sep, sec, &retval);
    840 	KASSERT(retval == sec);
    841 
    842 	size = 0;
    843 	if (total) {
    844 		for (i = 0; i < sec; i++)
    845 			size += dbtob(sep[i].se_nblks) / PAGE_SIZE;
    846 	} else {
    847 		for (i = 0; i < sec; i++)
    848 			size += dbtob(sep[i].se_nblks - sep[i].se_inuse) /
    849 			    PAGE_SIZE;
    850 	}
    851 	size += uvmexp.free;
    852 
    853 	free(sep, M_TEMP);
    854 
    855 	return size;
    856 }
    857 
    858 /* --------------------------------------------------------------------- */
    859 
    860 /* Change flags of the given vnode.
    861  * Caller should execute VOP_UPDATE on vp after a successful execution.
    862  * The vnode must be locked on entry and remain locked on exit. */
    863 int
    864 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
    865 {
    866 	int error;
    867 	struct tmpfs_node *node;
    868 
    869 	KASSERT(VOP_ISLOCKED(vp));
    870 
    871 	node = VP_TO_TMPFS_NODE(vp);
    872 
    873 	/* Disallow this operation if the file system is mounted read-only. */
    874 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
    875 		return EROFS;
    876 
    877 	/* XXX: The following comes from UFS code, and can be found in
    878 	 * several other file systems.  Shouldn't this be centralized
    879 	 * somewhere? */
    880 	if (cred->cr_uid != node->tn_uid &&
    881 	    (error = suser(cred, &p->p_acflag)))
    882 		return error;
    883 	if (cred->cr_uid == 0) {
    884 		/* The super-user is only allowed to change flags if the file
    885 		 * wasn't protected before and the securelevel is zero. */
    886 		if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) &&
    887 		    securelevel > 0)
    888 			return EPERM;
    889 		node->tn_flags = flags;
    890 	} else {
    891 		/* Regular users can change flags provided they only want to
    892 		 * change user-specific ones, not those reserved for the
    893 		 * super-user. */
    894 		if ((node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) ||
    895 		    (flags & UF_SETTABLE) != flags)
    896 			return EPERM;
    897 		if ((node->tn_flags & SF_SETTABLE) != (flags & SF_SETTABLE))
    898 			return EPERM;
    899 		node->tn_flags &= SF_SETTABLE;
    900 		node->tn_flags |= (flags & UF_SETTABLE);
    901 	}
    902 
    903 	node->tn_status |= TMPFS_NODE_CHANGED;
    904 	VN_KNOTE(vp, NOTE_ATTRIB);
    905 
    906 	KASSERT(VOP_ISLOCKED(vp));
    907 
    908 	return 0;
    909 }
    910 
    911 /* --------------------------------------------------------------------- */
    912 
    913 /* Change access mode on the given vnode.
    914  * Caller should execute VOP_UPDATE on vp after a successful execution.
    915  * The vnode must be locked on entry and remain locked on exit. */
    916 int
    917 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
    918 {
    919 	int error;
    920 	struct tmpfs_node *node;
    921 
    922 	KASSERT(VOP_ISLOCKED(vp));
    923 
    924 	node = VP_TO_TMPFS_NODE(vp);
    925 
    926 	/* Disallow this operation if the file system is mounted read-only. */
    927 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
    928 		return EROFS;
    929 
    930 	/* Immutable or append-only files cannot be modified, either. */
    931 	if (node->tn_flags & (IMMUTABLE | APPEND))
    932 		return EPERM;
    933 
    934 	/* XXX: The following comes from UFS code, and can be found in
    935 	 * several other file systems.  Shouldn't this be centralized
    936 	 * somewhere? */
    937 	if (cred->cr_uid != node->tn_uid &&
    938 	    (error = suser(cred, &p->p_acflag)))
    939 		return error;
    940 	if (cred->cr_uid != 0) {
    941 		if (vp->v_type != VDIR && (mode & S_ISTXT))
    942 			return EFTYPE;
    943 
    944 		if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
    945 			return EPERM;
    946 	}
    947 
    948 	node->tn_mode = (mode & ALLPERMS);
    949 
    950 	node->tn_status |= TMPFS_NODE_CHANGED;
    951 	VN_KNOTE(vp, NOTE_ATTRIB);
    952 
    953 	KASSERT(VOP_ISLOCKED(vp));
    954 
    955 	return 0;
    956 }
    957 
    958 /* --------------------------------------------------------------------- */
    959 
    960 /* Change ownership of the given vnode.  At least one of uid or gid must
    961  * be different than VNOVAL.  If one is set to that value, the attribute
    962  * is unchanged.
    963  * Caller should execute VOP_UPDATE on vp after a successful execution.
    964  * The vnode must be locked on entry and remain locked on exit. */
    965 int
    966 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
    967     struct proc *p)
    968 {
    969 	int error;
    970 	struct tmpfs_node *node;
    971 
    972 	KASSERT(VOP_ISLOCKED(vp));
    973 
    974 	node = VP_TO_TMPFS_NODE(vp);
    975 
    976 	/* Assign default values if they are unknown. */
    977 	KASSERT(uid != VNOVAL || gid != VNOVAL);
    978 	if (uid == VNOVAL)
    979 		uid = node->tn_uid;
    980 	if (gid == VNOVAL)
    981 		gid = node->tn_gid;
    982 	KASSERT(uid != VNOVAL && gid != VNOVAL);
    983 
    984 	/* Disallow this operation if the file system is mounted read-only. */
    985 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
    986 		return EROFS;
    987 
    988 	/* Immutable or append-only files cannot be modified, either. */
    989 	if (node->tn_flags & (IMMUTABLE | APPEND))
    990 		return EPERM;
    991 
    992 	/* XXX: The following comes from UFS code, and can be found in
    993 	 * several other file systems.  Shouldn't this be centralized
    994 	 * somewhere? */
    995 	if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
    996 	    (gid != node->tn_gid && !(cred->cr_gid == node->tn_gid ||
    997 	     groupmember(gid, cred)))) &&
    998 	    ((error = suser(cred, &p->p_acflag)) != 0))
    999 		return error;
   1000 
   1001 	node->tn_uid = uid;
   1002 	node->tn_gid = gid;
   1003 
   1004 	node->tn_status |= TMPFS_NODE_CHANGED;
   1005 	VN_KNOTE(vp, NOTE_ATTRIB);
   1006 
   1007 	KASSERT(VOP_ISLOCKED(vp));
   1008 
   1009 	return 0;
   1010 }
   1011 
   1012 /* --------------------------------------------------------------------- */
   1013 
   1014 /* Change size of the given vnode.
   1015  * Caller should execute VOP_UPDATE on vp after a successful execution.
   1016  * The vnode must be locked on entry and remain locked on exit. */
   1017 int
   1018 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
   1019     struct proc *p)
   1020 {
   1021 	int error;
   1022 	struct tmpfs_node *node;
   1023 
   1024 	KASSERT(VOP_ISLOCKED(vp));
   1025 
   1026 	node = VP_TO_TMPFS_NODE(vp);
   1027 
   1028 	/* Decide whether this is a valid operation based on the file type. */
   1029 	error = 0;
   1030 	switch (vp->v_type) {
   1031 	case VDIR:
   1032 		return EISDIR;
   1033 
   1034 	case VLNK:
   1035 		/* FALLTHROUGH */
   1036 	case VREG:
   1037 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1038 			return EROFS;
   1039 		break;
   1040 
   1041 	case VBLK:
   1042 		/* FALLTHROUGH */
   1043 	case VCHR:
   1044 		/* FALLTHROUGH */
   1045 	case VSOCK:
   1046 		/* FALLTHROUGH */
   1047 	case VFIFO:
   1048 		/* Allow modifications of special files even if in the file
   1049 		 * system is mounted read-only (we are not modifying the
   1050 		 * files themselves, but the objects they represent). */
   1051 		break;
   1052 
   1053 	default:
   1054 		/* Anything else is unsupported. */
   1055 		return EINVAL;
   1056 	}
   1057 
   1058 	/* Immutable or append-only files cannot be modified, either. */
   1059 	if (node->tn_flags & (IMMUTABLE | APPEND))
   1060 		return EPERM;
   1061 
   1062 	error = VOP_TRUNCATE(vp, size, 0, cred, p);
   1063 	/* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
   1064 	 * for us, as will update tn_status; no need to do that here. */
   1065 
   1066 	KASSERT(VOP_ISLOCKED(vp));
   1067 
   1068 	return error;
   1069 }
   1070 
   1071 /* --------------------------------------------------------------------- */
   1072 
   1073 /* Change access and modification times of the given vnode.
   1074  * Caller should execute VOP_UPDATE on vp after a successful execution.
   1075  * The vnode must be locked on entry and remain locked on exit. */
   1076 int
   1077 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
   1078     int vaflags, struct ucred *cred, struct proc *p)
   1079 {
   1080 	int error;
   1081 	struct tmpfs_node *node;
   1082 
   1083 	KASSERT(VOP_ISLOCKED(vp));
   1084 
   1085 	node = VP_TO_TMPFS_NODE(vp);
   1086 
   1087 	/* Disallow this operation if the file system is mounted read-only. */
   1088 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1089 		return EROFS;
   1090 
   1091 	/* Immutable or append-only files cannot be modified, either. */
   1092 	if (node->tn_flags & (IMMUTABLE | APPEND))
   1093 		return EPERM;
   1094 
   1095 	/* XXX: The following comes from UFS code, and can be found in
   1096 	 * several other file systems.  Shouldn't this be centralized
   1097 	 * somewhere? */
   1098 	if (cred->cr_uid != node->tn_uid &&
   1099 	    (error = suser(cred, &p->p_acflag)) &&
   1100 	    ((vaflags & VA_UTIMES_NULL) == 0 ||
   1101 	    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
   1102 		return error;
   1103 
   1104 	if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
   1105 		node->tn_status |= TMPFS_NODE_ACCESSED;
   1106 
   1107 	if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
   1108 		node->tn_status |= TMPFS_NODE_MODIFIED;
   1109 
   1110 	error = VOP_UPDATE(vp, atime, mtime, 0);
   1111 
   1112 	KASSERT(VOP_ISLOCKED(vp));
   1113 
   1114 	return error;
   1115 }
   1116