Home | History | Annotate | Line # | Download | only in lfs
      1 /*	$NetBSD: lfs_rfw.c,v 1.43 2025/12/10 03:20:59 perseant Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2025 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.43 2025/12/10 03:20:59 perseant Exp $");
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_quota.h"
     37 #endif
     38 
     39 #include <sys/param.h>
     40 #include <sys/systm.h>
     41 #include <sys/namei.h>
     42 #include <sys/proc.h>
     43 #include <sys/kernel.h>
     44 #include <sys/vnode.h>
     45 #include <sys/mount.h>
     46 #include <sys/kthread.h>
     47 #include <sys/buf.h>
     48 #include <sys/device.h>
     49 #include <sys/file.h>
     50 #include <sys/disklabel.h>
     51 #include <sys/ioctl.h>
     52 #include <sys/errno.h>
     53 #include <sys/malloc.h>
     54 #include <sys/pool.h>
     55 #include <sys/socket.h>
     56 #include <sys/stat.h>
     57 #include <sys/syslog.h>
     58 #include <sys/sysctl.h>
     59 #include <sys/conf.h>
     60 #include <sys/kauth.h>
     61 
     62 #include <miscfs/specfs/specdev.h>
     63 
     64 #include <ufs/lfs/ulfs_quotacommon.h>
     65 #include <ufs/lfs/ulfs_inode.h>
     66 #include <ufs/lfs/ulfsmount.h>
     67 #include <ufs/lfs/ulfs_extern.h>
     68 
     69 #include <uvm/uvm_extern.h>
     70 
     71 #include <ufs/lfs/lfs.h>
     72 #include <ufs/lfs/lfs_accessors.h>
     73 #include <ufs/lfs/lfs_kernel.h>
     74 #include <ufs/lfs/lfs_extern.h>
     75 
     76 #include <miscfs/genfs/genfs.h>
     77 #include <miscfs/genfs/genfs_node.h>
     78 
     79 /*
     80  * Roll-forward code.
     81  */
     82 static bool all_selector(void *, struct vnode *);
     83 static void drop_vnode_pages(struct mount *, struct lwp *);
     84 static void update_inoblk_copy_dinode(struct lfs *, union lfs_dinode *,
     85 				      const union lfs_dinode *);
     86 static int update_inogen(struct lfs_inofuncarg *);
     87 static int update_inoblk(struct lfs_inofuncarg *);
     88 static int finfo_func_rfw(struct lfs_finfofuncarg *);
     89 
     90 static int update_meta(struct lfs *, ino_t, int, daddr_t, daddr_t, size_t,
     91 		       struct lwp *l);
     92 #if 0
     93 static bool lfs_isseq(const struct lfs *fs, long int lbn1, long int lbn2);
     94 #endif
     95 
     96 extern int lfs_do_rfw;
     97 int rblkcnt;
     98 int lfs_rfw_max_psegs = 0;
     99 
    100 /*
    101  * Allocate a particular inode with a particular version number, freeing
    102  * any previous versions of this inode that may have gone before.
    103  * Used by the roll-forward code.
    104  *
    105  * XXX this function does not have appropriate locking to be used on a live fs;
    106  * XXX but something similar could probably be used for an "undelete" call.
    107  *
    108  * Called with the Ifile inode locked.
    109  */
    110 int
    111 lfs_rf_valloc(struct lfs *fs, ino_t ino, int vers, struct lwp *l,
    112 	      struct vnode **vpp, union lfs_dinode *dip)
    113 {
    114 	struct vattr va;
    115 	struct vnode *vp;
    116 	struct inode *ip;
    117 	int error;
    118 
    119 	KASSERT(ino > LFS_IFILE_INUM);
    120 	LFS_ASSERT_MAXINO(fs, ino);
    121 
    122 	ASSERT_SEGLOCK(fs); /* XXX it doesn't, really */
    123 
    124 	/*
    125 	 * First, just try a vget. If the version number is the one we want,
    126 	 * we don't have to do anything else.  If the version number is wrong,
    127 	 * take appropriate action.
    128 	 */
    129 	error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp);
    130 	if (error == 0) {
    131 		DLOG((DLOG_RF, "lfs_rf_valloc[1]: ino %d vp %p\n",
    132 			(int)ino, vp));
    133 
    134 		*vpp = vp;
    135 		ip = VTOI(vp);
    136 		DLOG((DLOG_RF, "  ip->i_gen=%jd dip nlink %jd seeking"
    137 			" version %jd\n", (intmax_t)ip->i_gen,
    138 			(intmax_t)(dip == NULL ? -1
    139 				: lfs_dino_getnlink(fs, dip)), (intmax_t)vers));
    140 		if (ip->i_gen == vers) {
    141 			/*
    142 			 * We have what we wanted already.
    143 			 */
    144 			DLOG((DLOG_RF, "  pre-existing\n"));
    145 			return 0;
    146 		} else if (ip->i_gen < vers && dip != NULL
    147 			&& lfs_dino_getnlink(fs, dip) > 0) {
    148 			/*
    149 			 * We have found a newer version.  Truncate
    150 			 * the old vnode to zero and re-initialize
    151 			 * from the given dinode.
    152 			 */
    153 			DLOG((DLOG_RF, "  replace old version %jd\n",
    154 				(intmax_t)ip->i_gen));
    155 			lfs_truncate(vp, (off_t)0, 0, NOCRED);
    156 			ip->i_gen = vers;
    157 			vp->v_type = IFTOVT(lfs_dino_getmode(fs, dip));
    158 			update_inoblk_copy_dinode(fs, ip->i_din, dip);
    159 			LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE);
    160 			return 0;
    161 		} else {
    162 			/*
    163 			 * Not the right version and nothing to
    164 			 * initialize from.  Don't recover this data.
    165 			 */
    166 			DLOG((DLOG_RF, "ino %d: sought version %d, got %d\n",
    167 				(int)ino, (int)vers,
    168 				(int)lfs_dino_getgen(fs, ip->i_din)));
    169 			vput(vp);
    170 			*vpp = NULLVP;
    171 			return EEXIST;
    172 		}
    173 	}
    174 
    175 	/*
    176 	 * No version of this inode was found in the cache.
    177 	 * Make a new one from the dinode.  We will add data blocks
    178 	 * as they come in, so scrub any block addresses off of the
    179 	 * inode and reset block counts to zero.
    180 	 */
    181 	if (dip == NULL)
    182 		return ENOENT;
    183 
    184 	vattr_null(&va);
    185 	va.va_type = IFTOVT(lfs_dino_getmode(fs, dip));
    186 	va.va_mode = lfs_dino_getmode(fs, dip) & ALLPERMS;
    187 	va.va_fileid = ino;
    188 	va.va_gen = vers;
    189 	error = vcache_new(fs->lfs_ivnode->v_mount, NULL, &va, NOCRED, NULL,
    190 	    &vp);
    191 	if (error)
    192 		return error;
    193 	error = vn_lock(vp, LK_EXCLUSIVE);
    194 	if (error)
    195 		goto err;
    196 
    197 	ip = VTOI(vp);
    198 	update_inoblk_copy_dinode(fs, ip->i_din, dip);
    199 
    200 	DLOG((DLOG_RF, "lfs_valloc[2] ino %d vp %p size=%lld effnblks=%d,"
    201 		" blocks=%d\n", (int)ino, vp, (long long)ip->i_size,
    202 		(int)ip->i_lfs_effnblks,
    203 		(int)lfs_dino_getblocks(fs, ip->i_din)));
    204 	*vpp = vp;
    205 	return 0;
    206 
    207 err:
    208 	vrele(vp);
    209 	*vpp = NULLVP;
    210 	return error;
    211 }
    212 
    213 /*
    214  * Load the appropriate indirect block, and change the appropriate pointer.
    215  * Mark the block dirty.  Do segment and avail accounting.
    216  */
    217 static int
    218 update_meta(struct lfs *fs, ino_t ino, int vers, daddr_t lbn,
    219 	    daddr_t ndaddr, size_t size, struct lwp *l)
    220 {
    221 	int error;
    222 	struct vnode *vp;
    223 	struct inode *ip;
    224 	daddr_t odaddr;
    225 	struct indir a[ULFS_NIADDR];
    226 	int num;
    227 	struct buf *bp;
    228 	SEGUSE *sup;
    229 	u_int64_t newsize, loff;
    230 
    231 	KASSERT(lbn >= 0);	/* no indirect blocks */
    232 	KASSERT(ino > LFS_IFILE_INUM);
    233 	LFS_ASSERT_MAXINO(fs, ino);
    234 
    235 	DLOG((DLOG_RF, "update_meta: ino %d lbn %d size %d at 0x%jx\n",
    236 	      (int)ino, (int)lbn, (int)size, (uintmax_t)ndaddr));
    237 
    238 	if ((error = lfs_rf_valloc(fs, ino, vers, l, &vp, NULL)) != 0)
    239 		return error;
    240 	ip = VTOI(vp);
    241 
    242 	/*
    243 	 * If block already exists, note its new location
    244 	 * but do not account it as new.
    245 	 */
    246 	ulfs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL);
    247 	if (odaddr == UNASSIGNED) {
    248 		if ((error = lfs_balloc(vp, (lbn << lfs_sb_getbshift(fs)),
    249 					size, NOCRED, 0, &bp)) != 0) {
    250 			vput(vp);
    251 			return (error);
    252 		}
    253 		/* No need to write, the block is already on disk */
    254 		if (bp->b_oflags & BO_DELWRI) {
    255 			LFS_UNLOCK_BUF(bp);
    256 			/* Account recovery of the previous version */
    257 			lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount));
    258 		}
    259 		brelse(bp, BC_INVAL);
    260 		DLOG((DLOG_RF, "balloc ip->i_lfs_effnblks = %d,"
    261 			" lfs_dino_getblocks(fs, ip->i_din) = %d\n",
    262 			(int)ip->i_lfs_effnblks,
    263 			(int)lfs_dino_getblocks(fs, ip->i_din)));
    264 	} else {
    265 		/* XXX fragextend? */
    266 		DLOG((DLOG_RF, "block exists, no balloc\n"));
    267 	}
    268 
    269 	/*
    270 	 * Extend the file, if it is not large enough already.
    271 	 * XXX This is not exactly right, we don't know how much of the
    272 	 * XXX last block is actually used.
    273 	 *
    274 	 * XXX We should be able to encode the actual data length of the
    275 	 * XXX last block in fi_lastlength, since we can infer the
    276 	 * XXX necessary block length from that using a variant of
    277 	 * XXX lfs_blksize().
    278 	 */
    279 	loff = lfs_lblktosize(fs, lbn);
    280 	if (loff >= (ULFS_NDADDR << lfs_sb_getbshift(fs))) {
    281 		/* No fragments */
    282 		newsize = loff + 1;
    283 	} else {
    284 		/* Subtract only a fragment to account for block size */
    285 		newsize = loff + size - lfs_fsbtob(fs, 1) + 1;
    286 	}
    287 
    288 	if (ip->i_size < newsize) {
    289 		DLOG((DLOG_RF, "ino %d size %d -> %d\n",
    290 		      (int)ino, (int)ip->i_size, (int)newsize));
    291 		lfs_dino_setsize(fs, ip->i_din, newsize);
    292 		ip->i_size = newsize;
    293 		/*
    294 		 * tell vm our new size for the case the inode won't
    295 		 * appear later.
    296 		 */
    297 		uvm_vnp_setsize(vp, newsize);
    298 	}
    299 
    300 	lfs_update_single(fs, NULL, vp, lbn, ndaddr, size);
    301 
    302 	LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, ndaddr), bp);
    303 	DLOG((DLOG_SU, "seg %jd += %jd for ino %jd"
    304 		" lbn %jd db 0x%jd (rfw)\n",
    305 		(intmax_t)lfs_dtosn(fs, ndaddr),
    306 		(intmax_t)size,
    307 		(intmax_t)ip->i_number,
    308 		(intmax_t)lbn,
    309 		(intmax_t)ndaddr));
    310 	sup->su_nbytes += size;
    311 	LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, ndaddr), bp);
    312 
    313 	/* differences here should be due to UNWRITTEN indirect blocks. */
    314 	if (vp->v_type != VLNK) {
    315 		if (!(ip->i_lfs_effnblks >= lfs_dino_getblocks(fs, ip->i_din))
    316 #if 0
    317 		    || !(lfs_lblkno(fs, ip->i_size) > ULFS_NDADDR ||
    318 			 ip->i_lfs_effnblks == lfs_dino_getblocks(fs, ip->i_din))
    319 #endif /* 0 */
    320 			) {
    321 			vprint("vnode", vp);
    322 			printf("effnblks=%jd dino_getblocks=%jd\n",
    323 			       (intmax_t)ip->i_lfs_effnblks,
    324 			       (intmax_t)lfs_dino_getblocks(fs, ip->i_din));
    325 		}
    326 		KASSERT(ip->i_lfs_effnblks >= lfs_dino_getblocks(fs, ip->i_din));
    327 #if 0
    328 		KASSERT(lfs_lblkno(fs, ip->i_size) > ULFS_NDADDR ||
    329 			ip->i_lfs_effnblks == lfs_dino_getblocks(fs, ip->i_din));
    330 #endif /* 0 */
    331 	}
    332 
    333 #ifdef DEBUG
    334 	/* Now look again to make sure it worked */
    335 	ulfs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL);
    336 	if (LFS_DBTOFSB(fs, odaddr) != ndaddr)
    337 		DLOG((DLOG_RF, "update_meta: failed setting ino %jd lbn %jd"
    338 		      " to %jd\n", (intmax_t)ino, (intmax_t)lbn, (intmax_t)ndaddr));
    339 #endif /* DEBUG */
    340 	vput(vp);
    341 	return 0;
    342 }
    343 
    344 /*
    345  * Copy some the fields of the dinode as needed by update_inoblk().
    346  */
    347 static void
    348 update_inoblk_copy_dinode(struct lfs *fs,
    349     union lfs_dinode *dstu, const union lfs_dinode *srcu)
    350 {
    351 	if (fs->lfs_is64) {
    352 		struct lfs64_dinode *dst = &dstu->u_64;
    353 		const struct lfs64_dinode *src = &srcu->u_64;
    354 		unsigned i;
    355 
    356 		/*
    357 		 * Copy everything but the block pointers and di_blocks.
    358 		 * XXX what about di_extb?
    359 		 */
    360 		dst->di_mode = src->di_mode;
    361 		dst->di_nlink = src->di_nlink;
    362 		dst->di_uid = src->di_uid;
    363 		dst->di_gid = src->di_gid;
    364 		dst->di_blksize = src->di_blksize;
    365 		dst->di_size = src->di_size;
    366 		dst->di_atime = src->di_atime;
    367 		dst->di_mtime = src->di_mtime;
    368 		dst->di_ctime = src->di_ctime;
    369 		dst->di_birthtime = src->di_birthtime;
    370 		dst->di_mtimensec = src->di_mtimensec;
    371 		dst->di_atimensec = src->di_atimensec;
    372 		dst->di_ctimensec = src->di_ctimensec;
    373 		dst->di_birthnsec = src->di_birthnsec;
    374 		dst->di_gen = src->di_gen;
    375 		dst->di_kernflags = src->di_kernflags;
    376 		dst->di_flags = src->di_flags;
    377 		dst->di_extsize = src->di_extsize;
    378 		dst->di_modrev = src->di_modrev;
    379 		dst->di_inumber = src->di_inumber;
    380 		for (i = 0; i < __arraycount(src->di_spare); i++) {
    381 			dst->di_spare[i] = src->di_spare[i];
    382 		}
    383 		/* Short symlinks store their data in di_db. */
    384 		if ((src->di_mode & LFS_IFMT) == LFS_IFLNK
    385 		    && src->di_size < lfs_sb_getmaxsymlinklen(fs)) {
    386 			memcpy(dst->di_db, src->di_db, src->di_size);
    387 		}
    388 	} else {
    389 		struct lfs32_dinode *dst = &dstu->u_32;
    390 		const struct lfs32_dinode *src = &srcu->u_32;
    391 
    392 		/* Get mode, link count, size, and times */
    393 		memcpy(dst, src, offsetof(struct lfs32_dinode, di_db[0]));
    394 
    395 		/* Then the rest, except di_blocks */
    396 		dst->di_flags = src->di_flags;
    397 		dst->di_gen = src->di_gen;
    398 		dst->di_uid = src->di_uid;
    399 		dst->di_gid = src->di_gid;
    400 		dst->di_modrev = src->di_modrev;
    401 
    402 		/* Short symlinks store their data in di_db. */
    403 		if ((src->di_mode & LFS_IFMT) == LFS_IFLNK
    404 		    && src->di_size < lfs_sb_getmaxsymlinklen(fs)) {
    405 			memcpy(dst->di_db, src->di_db, src->di_size);
    406 		}
    407 	}
    408 }
    409 
    410 static int
    411 update_inoblk(struct lfs_inofuncarg *lifa)
    412 {
    413 	struct lfs *fs;
    414 	daddr_t offset;
    415 	struct lwp *l;
    416 	struct vnode *devvp, *vp;
    417 	struct inode *ip;
    418 	union lfs_dinode *dip;
    419 	struct buf *dbp, *ibp;
    420 	int error;
    421 	IFILE *ifp;
    422 	unsigned i, num;
    423 	uint32_t gen;
    424 	char *buf;
    425 	ino_t ino;
    426 
    427 	fs = lifa->fs;
    428 	offset = lifa->offset;
    429 	l = lifa->l;
    430 	devvp = VTOI(fs->lfs_ivnode)->i_devvp;
    431 
    432 	/*
    433 	 * Get the inode, update times and perms.
    434 	 * DO NOT update disk blocks, we do that separately.
    435 	 */
    436 	error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
    437 	    0, &dbp);
    438 	if (error) {
    439 		DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error));
    440 		return error;
    441 	}
    442 	buf = malloc(dbp->b_bcount, M_SEGMENT, M_WAITOK);
    443 	memcpy(buf, dbp->b_data, dbp->b_bcount);
    444 	brelse(dbp, BC_AGE);
    445 	num = LFS_INOPB(fs);
    446 	for (i = num; i-- > 0; ) {
    447 		dip = DINO_IN_BLOCK(fs, buf, i);
    448 		ino = lfs_dino_getinumber(fs, dip);
    449 		if (ino <= LFS_IFILE_INUM)
    450 			continue;
    451 
    452 		LFS_ASSERT_MAXINO(fs, ino);
    453 
    454 		/* Check generation number */
    455 		LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp);
    456 		gen = lfs_if_getversion(fs, ifp);
    457 		brelse(ibp, 0);
    458 		if (lfs_dino_getgen(fs, dip) < gen) {
    459 			continue;
    460 		}
    461 
    462 		/*
    463 		 * This inode is the newest generation.  Load it.
    464 		 */
    465 		error = lfs_rf_valloc(fs, ino, lfs_dino_getgen(fs, dip),
    466 				      l, &vp, dip);
    467 		if (error) {
    468 			DLOG((DLOG_RF, "update_inoblk: lfs_rf_valloc"
    469 			      " returned %d\n", error));
    470 			continue;
    471 		}
    472 		ip = VTOI(vp);
    473 		if (lfs_dino_getsize(fs, dip) != ip->i_size
    474 		    && vp->v_type != VLNK) {
    475 			/* XXX What should we do with symlinks? */
    476 			DLOG((DLOG_RF, "  ino %jd size %jd -> %jd\n",
    477 				(intmax_t)ino,
    478 				(intmax_t)ip->i_size,
    479 				(intmax_t)lfs_dino_getsize(fs, dip)));
    480 			lfs_truncate(vp, lfs_dino_getsize(fs, dip), 0,
    481 				     NOCRED);
    482 		}
    483 		update_inoblk_copy_dinode(fs, ip->i_din, dip);
    484 
    485 		ip->i_flags = lfs_dino_getflags(fs, dip);
    486 		ip->i_gen = lfs_dino_getgen(fs, dip);
    487 		ip->i_uid = lfs_dino_getuid(fs, dip);
    488 		ip->i_gid = lfs_dino_getgid(fs, dip);
    489 
    490 		ip->i_mode = lfs_dino_getmode(fs, dip);
    491 		ip->i_nlink = lfs_dino_getnlink(fs, dip);
    492 		ip->i_size = lfs_dino_getsize(fs, dip);
    493 
    494 		LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE);
    495 
    496 		/* Re-initialize to get type right */
    497 		ulfs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p,
    498 			  &vp);
    499 
    500 		/* Record change in location and do segment accounting */
    501 		lfs_update_iaddr(fs, ip, offset);
    502 
    503 		vput(vp);
    504 	}
    505 	free(buf, M_SEGMENT);
    506 
    507 	return 0;
    508 }
    509 
    510 /*
    511  * Note the highest generation number of each inode in the Ifile.
    512  * This allows us to skip processing data for intermediate versions.
    513  */
    514 static int
    515 update_inogen(struct lfs_inofuncarg *lifa)
    516 {
    517 	struct lfs *fs;
    518 	daddr_t offset;
    519 	struct vnode *devvp;
    520 	union lfs_dinode *dip;
    521 	struct buf *dbp, *ibp;
    522 	int error;
    523 	IFILE *ifp;
    524 	unsigned i, num;
    525 
    526 	fs = lifa->fs;
    527 	offset = lifa->offset;
    528 	devvp = VTOI(fs->lfs_ivnode)->i_devvp;
    529 
    530 	/* Read inode block */
    531 	error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
    532 	    0, &dbp);
    533 	if (error) {
    534 		DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error));
    535 		return error;
    536 	}
    537 
    538 	/* Check each inode against ifile entry */
    539 	num = LFS_INOPB(fs);
    540 	for (i = num; i-- > 0; ) {
    541 		dip = DINO_IN_BLOCK(fs, dbp->b_data, i);
    542 		if (lfs_dino_getinumber(fs, dip) == LFS_IFILE_INUM)
    543 			continue;
    544 
    545 		/* Update generation number */
    546 		LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp);
    547 		if (lfs_if_getversion(fs, ifp) < lfs_dino_getgen(fs, dip))
    548 			lfs_if_setversion(fs, ifp, lfs_dino_getgen(fs, dip));
    549 		LFS_WRITEIENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp);
    550 		if (error)
    551 			break;
    552 	}
    553 	brelse(dbp, 0);
    554 
    555 	return error;
    556 }
    557 
    558 static int
    559 finfo_func_rfw(struct lfs_finfofuncarg *lffa)
    560 {
    561 	struct lfs *fs;
    562 	FINFO *fip;
    563 	daddr_t *offsetp;
    564 	struct lwp *l;
    565 	int j;
    566 	size_t size;
    567 	ino_t ino;
    568 
    569 	fs = lffa->fs;
    570 	fip = lffa->finfop;
    571 	offsetp = lffa->offsetp;
    572 	l = lffa->l;
    573 	size = lfs_sb_getbsize(fs);
    574 	ino = lfs_fi_getino(fs, fip);
    575 	LFS_ASSERT_MAXINO(fs, ino);
    576 	for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) {
    577 		if (j == lfs_fi_getnblocks(fs, fip) - 1)
    578 			size = lfs_fi_getlastlength(fs, fip);
    579 
    580 		/* Account for and update any direct blocks */
    581 		if (ino > LFS_IFILE_INUM &&
    582 		    lfs_fi_getblock(fs, fip, j) >= 0) {
    583 			update_meta(fs, ino,
    584 				    lfs_fi_getversion(fs, fip),
    585 				    lfs_fi_getblock(fs, fip, j),
    586 				    *offsetp, size, l);
    587 			++rblkcnt;
    588 		}
    589 		*offsetp += lfs_btofsb(fs, size);
    590 	}
    591 
    592 	return 0;
    593 }
    594 
    595 int
    596 lfs_skip_superblock(struct lfs *fs, daddr_t *offsetp)
    597 {
    598 	daddr_t offset;
    599 	int i;
    600 
    601 	/*
    602 	 * If this is segment 0, skip the label.
    603 	 * If the segment has a superblock and we're at the top
    604 	 * of the segment, skip the superblock.
    605 	 */
    606 	offset = *offsetp;
    607 	if (offset == lfs_sb_gets0addr(fs)) {
    608 		offset += lfs_btofsb(fs, LFS_LABELPAD);
    609 	}
    610 	for (i = 0; i < LFS_MAXNUMSB; i++) {
    611 		if (offset == lfs_sb_getsboff(fs, i)) {
    612 			offset += lfs_btofsb(fs, LFS_SBPAD);
    613 			break;
    614 		}
    615 	}
    616 	*offsetp = offset;
    617 	return 0;
    618 }
    619 
    620 /*
    621  * Read the partial sement at offset.
    622  *
    623  * If finfo_func and ino_func are both NULL, check the summary
    624  * and data checksums.  During roll forward, this must be done in its
    625  * entirety before processing any blocks.
    626  *
    627  * If finfo_func is given, use that to process every file block
    628  * in the segment summary.  If ino_func is given, use that to process
    629  * every inode block.
    630  */
    631 int
    632 lfs_parse_pseg(struct lfs *fs, daddr_t *offsetp, u_int64_t nextserial,
    633 	       kauth_cred_t cred, int *pseg_flags, struct lwp *l,
    634 	       int (*ino_func)(struct lfs_inofuncarg *),
    635 	       int (*finfo_func)(struct lfs_finfofuncarg *),
    636 	       int flags, void *arg)
    637 {
    638 	struct vnode *devvp;
    639 	struct buf *bp, *dbp;
    640 	int error, ninos, i, j;
    641 	SEGSUM *ssp;
    642 	daddr_t offset, prevoffset;
    643 	IINFO *iip;
    644 	FINFO *fip;
    645 	size_t size;
    646 	uint32_t datasum, foundsum;
    647 	char *buf;
    648 	struct lfs_inofuncarg lifa;
    649 	struct lfs_finfofuncarg lffa;
    650 
    651 	KASSERT(fs != NULL);
    652 	KASSERT(offsetp != NULL);
    653 
    654 	devvp = VTOI(fs->lfs_ivnode)->i_devvp;
    655 
    656 	/* Set up callback arguments */
    657 	lifa.fs = fs;
    658 	/* lifa.offset = offset; */
    659 	lifa.cred = cred;
    660 	lifa.l = l;
    661 	lifa.buf = malloc(lfs_sb_getbsize(fs), M_SEGMENT, M_WAITOK);
    662 
    663 	lifa.arg = arg;
    664 
    665 	lffa.fs = fs;
    666 	/* lffa.offsetp = offsetp; */
    667 	/* lffa.finfop = finfop; */
    668 	lffa.cred = cred;
    669 	lffa.l = l;
    670 	lffa.arg = arg;
    671 
    672 	prevoffset = *offsetp;
    673 	lfs_skip_superblock(fs, offsetp);
    674 	offset = *offsetp;
    675 
    676 	/* Read in the segment summary */
    677 	buf = malloc(lfs_sb_getsumsize(fs), M_SEGMENT, M_WAITOK);
    678 	error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getsumsize(fs),
    679 	    0, &bp);
    680 	if (error)
    681 		goto err;
    682 	memcpy(buf, bp->b_data, bp->b_bcount);
    683 	brelse(bp, BC_AGE);
    684 
    685 	ssp = (SEGSUM *)buf;
    686 
    687 	if (lfs_ss_getmagic(fs, ssp) != SS_MAGIC) {
    688 		DLOG((DLOG_RF, "Bad magic at 0x%" PRIx64 "\n",
    689 		      offset));
    690 		offset = -1;
    691 		goto err;
    692 	}
    693 
    694 	if (flags & CKSEG_CKSUM) {
    695 		size_t sumstart;
    696 
    697 		sumstart = lfs_ss_getsumstart(fs);
    698 		if (lfs_ss_getsumsum(fs, ssp) !=
    699 		    cksum((char *)ssp + sumstart,
    700 			  lfs_sb_getsumsize(fs) - sumstart)) {
    701 			DLOG((DLOG_RF, "Sumsum error at 0x%" PRIx64 "\n",
    702 				offset));
    703 			offset = -1;
    704 			goto err;
    705 		}
    706 	}
    707 
    708 #if 0
    709 	/*
    710 	 * Under normal conditions, we should never be producing
    711 	 * a partial segment with neither inode blocks nor data blocks.
    712 	 * However, these do sometimes appear and they need not
    713 	 * prevent us from continuing.
    714 	 */
    715 	if (lfs_ss_getnfinfo(fs, ssp) == 0 &&
    716 	    lfs_ss_getninos(fs, ssp) == 0) {
    717 		DLOG((DLOG_RF, "Empty pseg at 0x%" PRIx64 "\n",
    718 		      offset));
    719 		offset = -1;
    720 		goto err;
    721 	}
    722 #endif /* 0 */
    723 
    724 	if (lfs_sb_getversion(fs) == 1) {
    725 		if (lfs_ss_getcreate(fs, ssp) < lfs_sb_gettstamp(fs)) {
    726 			DLOG((DLOG_RF, "Old data at 0x%" PRIx64 "\n", offset));
    727 			offset = -1;
    728 			goto err;
    729 		}
    730 	} else {
    731 		if (nextserial > 0
    732 		    && lfs_ss_getserial(fs, ssp) != nextserial) {
    733 			DLOG((DLOG_RF, "Serial number at 0x%jx given as 0x%jx,"
    734 			      " expected 0x%jx\n", (intmax_t)offset,
    735 			      (intmax_t)lfs_ss_getserial(fs, ssp),
    736 			      (intmax_t)nextserial));
    737 			offset = -1;
    738 			goto err;
    739 		}
    740 		if (lfs_ss_getident(fs, ssp) != lfs_sb_getident(fs)) {
    741 			DLOG((DLOG_RF, "Incorrect fsid (0x%x vs 0x%x) at 0x%"
    742 			      PRIx64 "\n", lfs_ss_getident(fs, ssp),
    743 			      lfs_sb_getident(fs), offset));
    744 			offset = -1;
    745 			goto err;
    746 		}
    747 	}
    748 
    749 #ifdef DIAGNOSTIC
    750 	if (lfs_ss_getnfinfo(fs, ssp) > lfs_sb_getssize(fs) / lfs_sb_getfsize(fs)) {
    751 		printf("At offset 0x%jx, nfinfo %jd > max frags %jd\n",
    752 		       (intmax_t)offset,
    753 		       (intmax_t)lfs_ss_getnfinfo(fs, ssp),
    754 		       (intmax_t)lfs_sb_getssize(fs) / lfs_sb_getfsize(fs));
    755 	}
    756 #endif
    757 	KASSERT(lfs_ss_getnfinfo(fs, ssp) <= lfs_sb_getssize(fs) / lfs_sb_getfsize(fs));
    758 #ifdef DIAGNOSTIC
    759 	if (lfs_ss_getnfinfo(fs, ssp) > lfs_sb_getfsize(fs) / sizeof(FINFO32)) {
    760 		printf("At offset 0x%jx, nfinfo %jd > max entries %jd\n",
    761 		       (intmax_t)offset,
    762 		       (intmax_t)lfs_ss_getnfinfo(fs, ssp),
    763 		       (intmax_t)lfs_sb_getssize(fs) / lfs_sb_getfsize(fs));
    764 	}
    765 #endif
    766 	KASSERT(lfs_ss_getnfinfo(fs, ssp) <= lfs_sb_getfsize(fs) / sizeof(FINFO32));
    767 
    768 	if (pseg_flags)
    769 		*pseg_flags = lfs_ss_getflags(fs, ssp);
    770 	ninos = howmany(lfs_ss_getninos(fs, ssp), LFS_INOPB(fs));
    771 	iip = SEGSUM_IINFOSTART(fs, buf);
    772 	fip = SEGSUM_FINFOBASE(fs, (SEGSUM *)buf);
    773 
    774 	/* Handle individual blocks */
    775 	foundsum = 0;
    776 	offset += lfs_btofsb(fs, lfs_sb_getsumsize(fs));
    777 	for (i = 0; i < lfs_ss_getnfinfo(fs, ssp) || ninos; ++i) {
    778 		/* Inode block? */
    779 		if (ninos && lfs_ii_getblock(fs, iip) == offset) {
    780 			if (flags & CKSEG_CKSUM) {
    781 				/* Read in the head and add to the buffer */
    782 				error = bread(devvp, LFS_FSBTODB(fs, offset),
    783 					lfs_sb_getbsize(fs), 0, &dbp);
    784 				if (error) {
    785 					offset = -1;
    786 					goto err;
    787 				}
    788 				foundsum = lfs_cksum_part(dbp->b_data,
    789 					sizeof(uint32_t), foundsum);
    790 				brelse(dbp, BC_AGE);
    791 			} else if (ino_func != NULL) {
    792 				lifa.offset = offset;
    793 				error = (*ino_func)(&lifa);
    794 				if (error != 0) {
    795 					offset = -1;
    796 					goto err;
    797 				}
    798 			}
    799 
    800 			offset += lfs_btofsb(fs, lfs_sb_getibsize(fs));
    801 			iip = NEXTLOWER_IINFO(fs, iip);
    802 			--ninos;
    803 			--i; /* compensate for ++i in loop header */
    804 			continue;
    805 		}
    806 
    807 		/* File block */
    808 		size = lfs_sb_getbsize(fs);
    809 		if (flags & CKSEG_CKSUM) {
    810 			for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) {
    811 				if (j == lfs_fi_getnblocks(fs, fip) - 1)
    812 					size = lfs_fi_getlastlength(fs, fip);
    813 				error = bread(devvp, LFS_FSBTODB(fs, offset),
    814 					      size, 0, &dbp);
    815 				if (error) {
    816 					offset = -1;
    817 					goto err;
    818 				}
    819 				foundsum = lfs_cksum_part(dbp->b_data,
    820 							  sizeof(uint32_t), foundsum);
    821 				brelse(dbp, BC_AGE);
    822 				offset += lfs_btofsb(fs, size);
    823 			}
    824 		} else if (finfo_func != NULL) {
    825 			lffa.offsetp = &offset;
    826 			lffa.finfop = fip;
    827 			(*finfo_func)(&lffa);
    828 		} else {
    829 			int n = lfs_fi_getnblocks(fs, fip);
    830 			size = lfs_fi_getlastlength(fs, fip);
    831 			offset += lfs_btofsb(fs, lfs_sb_getbsize(fs) * (n - 1)
    832 					     + size);
    833 		}
    834 		fip = NEXT_FINFO(fs, fip);
    835 	}
    836 
    837 	/* Checksum the array, compare */
    838 	if (flags & CKSEG_CKSUM) {
    839 		datasum = lfs_ss_getdatasum(fs, ssp);
    840 		foundsum = lfs_cksum_fold(foundsum);
    841 		if (datasum != foundsum) {
    842 			DLOG((DLOG_RF, "Datasum error at 0x%" PRIx64
    843 			      " (wanted %x got %x)\n",
    844 			      offset, datasum, foundsum));
    845 			offset = -1;
    846 			goto err;
    847 		}
    848 	} else {
    849 		/* Don't clog the buffer queue */
    850 		mutex_enter(&lfs_lock);
    851 		if (locked_queue_count > LFS_MAX_BUFS ||
    852 		    locked_queue_bytes > LFS_MAX_BYTES) {
    853 			lfs_flush(fs, SEGM_CKP, 0);
    854 		}
    855 		mutex_exit(&lfs_lock);
    856 	}
    857 
    858 	/*
    859 	 * If we're at the end of the segment, move to the next.
    860 	 * A partial segment needs space for a segment header (1 fsb)
    861 	 * and a full block ("frag" fsb).  Thus, adding "frag" fsb should
    862 	 * still be within the current segment (whereas frag + 1 might
    863 	 * be at the start of the next segment).
    864 	 *
    865 	 * This needs to match the definition of LFS_PARTIAL_FITS
    866 	 * in lfs_segment.c.
    867 	 */
    868 	if (lfs_dtosn(fs, offset + lfs_sb_getfrag(fs))
    869 	    != lfs_dtosn(fs, offset)) {
    870 		if (lfs_dtosn(fs, offset) == lfs_dtosn(fs, lfs_ss_getnext(fs,
    871 									ssp))) {
    872 			offset = -1;
    873 			goto err;
    874 		}
    875 		offset = lfs_ss_getnext(fs, ssp);
    876 		DLOG((DLOG_RF, "LFS roll forward: moving to offset 0x%" PRIx64
    877 		       " -> segment %d\n", offset, lfs_dtosn(fs,offset)));
    878 	}
    879 	if (flags & CKSEG_AVAIL)
    880 		lfs_sb_subavail(fs, offset - prevoffset);
    881 
    882     err:
    883 	free(lifa.buf, M_SEGMENT);
    884 	free(buf, M_SEGMENT);
    885 
    886 	*offsetp = offset;
    887 	return 0;
    888 }
    889 
    890 /*
    891  * Roll forward.
    892  */
    893 void
    894 lfs_roll_forward(struct lfs *fs, struct mount *mp, struct lwp *l)
    895 {
    896 	int flags, dirty;
    897 	daddr_t startoffset, offset, nextoffset, endpseg;
    898 	u_int64_t nextserial, startserial, endserial;
    899 	int sn, curseg;
    900 	struct proc *p;
    901 	kauth_cred_t cred;
    902 	SEGUSE *sup;
    903 	struct buf *bp;
    904 
    905 	p = l ? l->l_proc : NULL;
    906 	cred = p ? p->p_cred : NOCRED;
    907 
    908 	/*
    909 	 * We don't roll forward for v1 filesystems, because
    910 	 * of the danger that the clock was turned back between the last
    911 	 * checkpoint and crash.  This would roll forward garbage.
    912 	 *
    913 	 * v2 filesystems don't have this problem because they use a
    914 	 * monotonically increasing serial number instead of a timestamp.
    915 	 */
    916 	rblkcnt = 0;
    917 	if ((lfs_sb_getpflags(fs) & LFS_PF_CLEAN) || !lfs_do_rfw
    918 	    || lfs_sb_getversion(fs) <= 1 || p == NULL)
    919 		return;
    920 
    921 	DLOG((DLOG_RF, "%s: begin roll forward at serial 0x%jx\n",
    922 		lfs_sb_getfsmnt(fs), (intmax_t)lfs_sb_getserial(fs)));
    923 	DEBUG_CHECK_FREELIST(fs);
    924 
    925 	/*
    926 	 * Phase I: Find the address of the last good partial
    927 	 * segment that was written after the checkpoint.  Mark
    928 	 * the segments in question dirty, so they won't be
    929 	 * reallocated.
    930 	 */
    931 	endpseg = startoffset = offset = lfs_sb_getoffset(fs);
    932 	flags = 0x0;
    933 	DLOG((DLOG_RF, "LFS roll forward phase 1: start at offset 0x%"
    934 	      PRIx64 "\n", offset));
    935 	LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp);
    936 	if (!(sup->su_flags & SEGUSE_DIRTY))
    937 		lfs_sb_subnclean(fs, 1);
    938 	sup->su_flags |= SEGUSE_DIRTY;
    939 	LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp);
    940 
    941 	startserial = lfs_sb_getserial(fs);
    942 	endserial = nextserial = startserial + 1;
    943 	nextoffset = offset;
    944 	while (1) {
    945 		nextoffset = offset;
    946 		lfs_parse_pseg(fs, &nextoffset, nextserial,
    947 			     cred, &flags, l, NULL, NULL, CKSEG_CKSUM, NULL);
    948 		if (nextoffset == -1)
    949 			break;
    950 		if (lfs_sntod(fs, offset) != lfs_sntod(fs, nextoffset)) {
    951 			LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset),
    952 				     bp);
    953 			if (!(sup->su_flags & SEGUSE_DIRTY))
    954 				lfs_sb_subnclean(fs, 1);
    955 			sup->su_flags |= SEGUSE_DIRTY;
    956 			LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp);
    957 		}
    958 
    959 		DLOG((DLOG_RF, "LFS roll forward phase 1: offset=0x%jx"
    960 			" serial=0x%jx\n", (intmax_t)nextoffset,
    961 			(intmax_t)nextserial));
    962 		if (flags & SS_DIROP) {
    963 			DLOG((DLOG_RF, "lfs_mountfs: dirops at 0x%"
    964 			      PRIx64 "\n", offset));
    965 			if (!(flags & SS_CONT)) {
    966 			     DLOG((DLOG_RF, "lfs_mountfs: dirops end "
    967 				   "at 0x%" PRIx64 "\n", offset));
    968 			}
    969 		}
    970 		offset = nextoffset;
    971 		++nextserial;
    972 
    973 		if (!(flags & SS_CONT)) {
    974 			endpseg = nextoffset;
    975 			endserial = nextserial;
    976 		}
    977 		if (lfs_rfw_max_psegs > 0
    978 		    && nextserial > startserial + lfs_rfw_max_psegs)
    979 			break;
    980 	}
    981 	if (flags & SS_CONT) {
    982 		DLOG((DLOG_RF, "LFS roll forward: warning: incomplete "
    983 			"dirops discarded (0x%jx < 0x%jx)\n",
    984 			endpseg, nextoffset));
    985 	}
    986 	if (lfs_sb_getversion(fs) > 1)
    987 		lfs_sb_setserial(fs, endserial);
    988 	DLOG((DLOG_RF, "LFS roll forward phase 1: completed: "
    989 	      "endpseg=0x%" PRIx64 "\n", endpseg));
    990 	offset = startoffset;
    991 	if (offset != endpseg) {
    992 		/* Don't overwrite what we're trying to preserve */
    993 		lfs_sb_setoffset(fs, endpseg);
    994 		lfs_sb_setcurseg(fs, lfs_sntod(fs, lfs_dtosn(fs, endpseg)));
    995 		for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs));;) {
    996 			sn = (sn + 1) % lfs_sb_getnseg(fs);
    997 			/* XXX could we just fail to roll forward? */
    998 			if (sn == curseg)
    999 				panic("lfs_mountfs: no clean segments");
   1000 			LFS_SEGENTRY(sup, fs, sn, bp);
   1001 			dirty = (sup->su_flags & SEGUSE_DIRTY);
   1002 			brelse(bp, 0);
   1003 			if (!dirty)
   1004 				break;
   1005 		}
   1006 		lfs_sb_setnextseg(fs, lfs_sntod(fs, sn));
   1007 		/* Explicitly set this segment dirty */
   1008 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, endpseg), bp);
   1009 		sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
   1010 		LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, endpseg), bp);
   1011 
   1012 		/*
   1013 		 * Phase II: Identify the highest generation of each
   1014 		 * inode.  We will ignore inodes and data blocks
   1015 		 * belonging to old versions.
   1016 		 */
   1017 		offset = startoffset;
   1018 		nextserial = startserial + 1;
   1019 		DLOG((DLOG_RF, "LFS roll forward phase 2 beginning\n"));
   1020 		while (offset > 0 && offset != endpseg) {
   1021 			lfs_parse_pseg(fs, &offset, nextserial++, cred,
   1022 				     NULL, l, update_inogen, NULL,
   1023 				     CKSEG_NONE, NULL);
   1024 			DEBUG_CHECK_FREELIST(fs);
   1025 		}
   1026 
   1027 		/*
   1028 		 * Phase III: Update inodes.
   1029 		 */
   1030 		offset = startoffset;
   1031 		nextserial = startserial + 1;
   1032 		DLOG((DLOG_RF, "LFS roll forward phase 3 beginning\n"));
   1033 		while (offset > 0 && offset != endpseg) {
   1034 			lfs_parse_pseg(fs, &offset, nextserial++, cred,
   1035 				     NULL, l, update_inoblk, NULL,
   1036 				     CKSEG_NONE, NULL);
   1037 			DEBUG_CHECK_FREELIST(fs);
   1038 		}
   1039 
   1040 		/*
   1041 		 * Phase IV: Roll forward, updating data blocks.
   1042 		 */
   1043 		offset = startoffset;
   1044 		nextserial = startserial + 1;
   1045 		DLOG((DLOG_RF, "LFS roll forward phase 4 beginning\n"));
   1046 		while (offset > 0 && offset != endpseg) {
   1047 			lfs_parse_pseg(fs, &offset, nextserial++, cred,
   1048 				     NULL, l, NULL, finfo_func_rfw,
   1049 				     CKSEG_AVAIL, NULL);
   1050 			DEBUG_CHECK_FREELIST(fs);
   1051 		}
   1052 
   1053 		/*
   1054 		 * Finish: flush our changes to disk.
   1055 		 */
   1056 		lfs_sb_setserial(fs, endserial);
   1057 
   1058 		lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC);
   1059 		DLOG((DLOG_RF, "lfs_mountfs: roll forward "
   1060 		      "examined %jd blocks\n",
   1061 		      (intmax_t)(endpseg - startoffset)));
   1062 	}
   1063 
   1064 	/* Get rid of our vnodes, except the ifile */
   1065 	drop_vnode_pages(mp, l);
   1066 	DLOG((DLOG_RF, "LFS roll forward complete\n"));
   1067 	printf("%s: roll forward recovered %d data blocks\n",
   1068 		lfs_sb_getfsmnt(fs), rblkcnt);
   1069 
   1070 	/*
   1071 	 * At this point we have no more changes to write to disk.
   1072 	 * Reset the "avail" count to match the segments as they
   1073 	 * appear on disk, and the clean segment count.
   1074 	 */
   1075 	lfs_reset_avail(fs);
   1076 }
   1077 
   1078 static bool
   1079 all_selector(void *cl, struct vnode *vp)
   1080 {
   1081 	return true;
   1082 }
   1083 
   1084 /*
   1085  * Dump any pages from vnodes that may have been put on
   1086  * during truncation.
   1087  */
   1088 static void
   1089 drop_vnode_pages(struct mount *mp, struct lwp *l)
   1090 {
   1091        struct vnode_iterator *marker;
   1092        struct lfs *fs;
   1093        struct vnode *vp;
   1094 
   1095        fs = VFSTOULFS(mp)->um_lfs;
   1096        vfs_vnode_iterator_init(mp, &marker);
   1097        while ((vp = vfs_vnode_iterator_next(marker,
   1098                all_selector, NULL)) != NULL) {
   1099                if (vp == fs->lfs_ivnode)
   1100                        continue;
   1101                VOP_LOCK(vp, LK_EXCLUSIVE | LK_RETRY);
   1102                uvm_vnp_setsize(vp, 0);
   1103                uvm_vnp_setsize(vp, VTOI(vp)->i_size);
   1104                VOP_UNLOCK(vp);
   1105                vrele(vp);
   1106        }
   1107        vfs_vnode_iterator_destroy(marker);
   1108 }
   1109