Home | History | Annotate | Line # | Download | only in fsck_lfs
      1 /* $NetBSD: pass6.c,v 1.51 2020/04/03 19:36:33 joerg Exp $	 */
      2 
      3 /*-
      4  * Copyright (c) 2003 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/types.h>
     33 #include <sys/param.h>
     34 #include <sys/time.h>
     35 #include <sys/buf.h>
     36 #include <sys/mount.h>
     37 
     38 #define VU_DIROP 0x01000000 /* XXX XXX from sys/vnode.h */
     39 #define vnode uvnode
     40 #include <ufs/lfs/lfs.h>
     41 #include <ufs/lfs/lfs_accessors.h>
     42 #include <ufs/lfs/lfs_inode.h>
     43 #undef vnode
     44 
     45 #include <assert.h>
     46 #include <err.h>
     47 #include <signal.h>
     48 #include <string.h>
     49 #include <stdio.h>
     50 #include <stdlib.h>
     51 #include <util.h>
     52 
     53 #include "bufcache.h"
     54 #include "lfs_user.h"
     55 #include "segwrite.h"
     56 
     57 #include "fsck.h"
     58 #include "extern.h"
     59 #include "fsutil.h"
     60 
     61 static int nnewblocks;
     62 
     63 /*
     64  * Our own copy of lfs_update_single so we can account in seg_table
     65  * as well as the Ifile; and so we can add the blocks to their new
     66  * segment.
     67  *
     68  * Change the given block's address to ndaddr, finding its previous
     69  * location using ulfs_bmaparray().
     70  *
     71  * Account for this change in the segment table.
     72  */
     73 static void
     74 rfw_update_single(struct uvnode *vp, daddr_t lbn, daddr_t ndaddr, size_t size)
     75 {
     76 	SEGUSE *sup;
     77 	struct ubuf *bp;
     78 	struct indir a[ULFS_NIADDR + 2], *ap;
     79 	struct inode *ip;
     80 	daddr_t daddr, ooff;
     81 	int num, error;
     82 	int i, osize = 0;
     83 	int frags, ofrags = 0;
     84 	u_int32_t oldsn, sn;
     85 
     86 	ip = VTOI(vp);
     87 	ip->i_state |= IN_MODIFIED;
     88 
     89 	error = ulfs_bmaparray(fs, vp, lbn, &daddr, a, &num);
     90 	if (error)
     91 		errx(1, "lfs_updatemeta: ulfs_bmaparray returned %d"
     92 		     " looking up lbn %" PRId64 "\n", error, lbn);
     93 	if (daddr > 0)
     94 		daddr = LFS_DBTOFSB(fs, daddr);
     95 
     96 	frags = lfs_numfrags(fs, size);
     97 	switch (num) {
     98 	case 0:
     99 		ooff = lfs_dino_getdb(fs, ip->i_din, lbn);
    100 		if (ooff <= 0)
    101 			lfs_dino_setblocks(fs, ip->i_din,
    102 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
    103 		else {
    104 			/* possible fragment truncation or extension */
    105 			ofrags = lfs_numfrags(fs, ip->i_lfs_fragsize[lbn]);
    106 			lfs_dino_setblocks(fs, ip->i_din,
    107 			    lfs_dino_getblocks(fs, ip->i_din) + (frags - ofrags));
    108 		}
    109 		lfs_dino_setdb(fs, ip->i_din, lbn, ndaddr);
    110 		break;
    111 	case 1:
    112 		ooff = lfs_dino_getib(fs, ip->i_din, a[0].in_off);
    113 		if (ooff <= 0)
    114 			lfs_dino_setblocks(fs, ip->i_din,
    115 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
    116 		lfs_dino_setib(fs, ip->i_din, a[0].in_off, ndaddr);
    117 		break;
    118 	default:
    119 		ap = &a[num - 1];
    120 		if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs), 0, &bp))
    121 			errx(1, "lfs_updatemeta: bread bno %" PRId64,
    122 			    ap->in_lbn);
    123 
    124 		ooff = lfs_iblock_get(fs, bp->b_data, ap->in_off);
    125 		if (ooff <= 0)
    126 			lfs_dino_setblocks(fs, ip->i_din,
    127 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
    128 		lfs_iblock_set(fs, bp->b_data, ap->in_off, ndaddr);
    129 		(void) VOP_BWRITE(bp);
    130 	}
    131 
    132 	/*
    133 	 * Update segment usage information, based on old size
    134 	 * and location.
    135 	 */
    136 	if (daddr > 0) {
    137 		oldsn = lfs_dtosn(fs, daddr);
    138 		if (lbn >= 0 && lbn < ULFS_NDADDR)
    139 			osize = ip->i_lfs_fragsize[lbn];
    140 		else
    141 			osize = lfs_sb_getbsize(fs);
    142 		LFS_SEGENTRY(sup, fs, oldsn, bp);
    143 		seg_table[oldsn].su_nbytes -= osize;
    144 		sup->su_nbytes -= osize;
    145 		if (!(bp->b_flags & B_GATHERED))
    146 			fs->lfs_flags |= LFS_IFDIRTY;
    147 		LFS_WRITESEGENTRY(sup, fs, oldsn, bp);
    148 		for (i = 0; i < lfs_btofsb(fs, osize); i++)
    149 			clrbmap(daddr + i);
    150 	}
    151 
    152 	/* If block is beyond EOF, update size */
    153 	if (lbn >= 0 && lfs_dino_getsize(fs, ip->i_din) <= (lbn << lfs_sb_getbshift(fs))) {
    154 		lfs_dino_setsize(fs, ip->i_din, (lbn << lfs_sb_getbshift(fs)) + 1);
    155 	}
    156 
    157 	/* If block frag size is too large for old EOF, update size */
    158 	if (lbn < ULFS_NDADDR) {
    159 		off_t minsize;
    160 
    161 		minsize = (lbn << lfs_sb_getbshift(fs));
    162 		minsize += (size - lfs_sb_getfsize(fs)) + 1;
    163 		if (lfs_dino_getsize(fs, ip->i_din) < minsize)
    164 			lfs_dino_setsize(fs, ip->i_din, minsize);
    165 	}
    166 
    167 	/* Count for the user */
    168 	++nnewblocks;
    169 
    170 	/* Add block to its new segment */
    171 	sn = lfs_dtosn(fs, ndaddr);
    172 	LFS_SEGENTRY(sup, fs, sn, bp);
    173 	seg_table[sn].su_nbytes += size;
    174 	sup->su_nbytes += size;
    175 	if (!(bp->b_flags & B_GATHERED))
    176 		fs->lfs_flags |= LFS_IFDIRTY;
    177 	LFS_WRITESEGENTRY(sup, fs, sn, bp);
    178 	for (i = 0; i < lfs_btofsb(fs, size); i++)
    179 #ifndef VERBOSE_BLOCKMAP
    180 		setbmap(daddr + i);
    181 #else
    182 		setbmap(daddr + i, ip->i_number);
    183 #endif
    184 
    185 	/* Check bfree accounting as well */
    186 	if (daddr <= 0) {
    187 		lfs_sb_subbfree(fs, lfs_btofsb(fs, size));
    188 	} else if (size != osize) {
    189 		lfs_sb_subbfree(fs, frags - ofrags);
    190 	}
    191 
    192 	/*
    193 	 * Now that this block has a new address, and its old
    194 	 * segment no longer owns it, we can forget about its
    195 	 * old size.
    196 	 */
    197 	if (lbn >= 0 && lbn < ULFS_NDADDR)
    198 		ip->i_lfs_fragsize[lbn] = size;
    199 }
    200 
    201 /*
    202  * Remove the vnode from the cache, including any blocks it
    203  * may hold.  Account the blocks.  Finally account the removal
    204  * of the inode from its segment.
    205  */
    206 static void
    207 remove_ino(struct uvnode *vp, ino_t ino)
    208 {
    209 	IFILE *ifp;
    210 	ino_t nextfree;
    211 	SEGUSE *sup;
    212 	CLEANERINFO *cip;
    213 	struct ubuf *bp, *sbp, *cbp;
    214 	struct inodesc idesc;
    215 	daddr_t daddr;
    216 
    217 	if (debug)
    218 		pwarn("remove ino %d\n", (int)ino);
    219 
    220 	LFS_IENTRY(ifp, fs, ino, bp);
    221 	daddr = lfs_if_getdaddr(fs, ifp);
    222 	if (daddr > 0) {
    223 		lfs_if_setdaddr(fs, ifp, 0);
    224 
    225 		LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
    226 		lfs_if_setnextfree(fs, ifp, nextfree);
    227 		VOP_BWRITE(bp);
    228 		LFS_PUT_HEADFREE(fs, cip, cbp, ino);
    229 		sbdirty();
    230 
    231 		if (vp == NULL)
    232 			vp = lfs_raw_vget(fs, ino, fs->lfs_ivnode->v_fd, daddr);
    233 
    234 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
    235 		sup->su_nbytes -= DINOSIZE(fs);
    236 		VOP_BWRITE(sbp);
    237 		seg_table[lfs_dtosn(fs, daddr)].su_nbytes -= DINOSIZE(fs);
    238 	} else
    239 		brelse(bp, 0);
    240 
    241 	/* Do on-disk accounting */
    242 	if (vp) {
    243 		idesc.id_number = ino;
    244 		idesc.id_func = pass4check; /* Delete dinode and blocks */
    245 		idesc.id_type = ADDR;
    246 		idesc.id_lblkno = 0;
    247 		clri(&idesc, "unknown", 2); /* XXX magic number 2 */
    248 		/* vp has been destroyed */
    249 	}
    250 }
    251 
    252 /*
    253  * Use FIP records to update blocks, if the generation number matches.
    254  */
    255 static void
    256 pass6harvest(daddr_t daddr, FINFO *fip)
    257 {
    258 	struct uvnode *vp;
    259 	int i;
    260 	size_t size;
    261 
    262 	vp = vget(fs, lfs_fi_getino(fs, fip));
    263 	if (vp && vp != fs->lfs_ivnode &&
    264 	    lfs_dino_getgen(fs, VTOI(vp)->i_din) == lfs_fi_getversion(fs, fip)) {
    265 		for (i = 0; i < lfs_fi_getnblocks(fs, fip); i++) {
    266 			size = (i == lfs_fi_getnblocks(fs, fip) - 1 ?
    267 				lfs_fi_getlastlength(fs, fip) : lfs_sb_getbsize(fs));
    268 			if (debug)
    269 				pwarn("ino %ju lbn %jd -> 0x%jx\n",
    270 					(uintmax_t)lfs_fi_getino(fs, fip),
    271 					(intmax_t)lfs_fi_getblock(fs, fip, i),
    272 					(intmax_t)daddr);
    273 			rfw_update_single(vp, lfs_fi_getblock(fs, fip, i), daddr, size);
    274 			daddr += lfs_btofsb(fs, size);
    275 		}
    276 	}
    277 }
    278 
    279 /*
    280  * Check validity of blocks on roll-forward inodes.
    281  */
    282 int
    283 pass6check(struct inodesc * idesc)
    284 {
    285 	int i, sn, anyout, anynew;
    286 
    287 	/* Brand new blocks are always OK */
    288 	if (idesc->id_blkno == UNWRITTEN)
    289 		return KEEPON;
    290 
    291 	/* Check that the blocks do not lie within clean segments. */
    292 	anyout = anynew = 0;
    293 	for (i = 0; i < idesc->id_numfrags; i++) {
    294 		sn = lfs_dtosn(fs, idesc->id_blkno + i);
    295 		if (sn < 0 || sn >= lfs_sb_getnseg(fs) ||
    296 		    (seg_table[sn].su_flags & SEGUSE_DIRTY) == 0) {
    297 			anyout = 1;
    298 			break;
    299 		}
    300 		if (seg_table[sn].su_flags & SEGUSE_ACTIVE) {
    301 			if (sn != lfs_dtosn(fs, lfs_sb_getoffset(fs)) ||
    302 			    idesc->id_blkno > lfs_sb_getoffset(fs)) {
    303 				++anynew;
    304 			}
    305 		}
    306 		if (!anynew) {
    307 			/* Clear so pass1check won't be surprised */
    308 			clrbmap(idesc->id_blkno + i);
    309 			seg_table[sn].su_nbytes -= lfs_fsbtob(fs, 1);
    310 		}
    311 	}
    312 	if (anyout) {
    313 		blkerror(idesc->id_number, "BAD", idesc->id_blkno);
    314 		if (badblkcount++ >= MAXBAD) {
    315 			pwarn("EXCESSIVE BAD BLKS I=%llu",
    316 			    (unsigned long long)idesc->id_number);
    317 			if (preen)
    318 				pwarn(" (SKIPPING)\n");
    319 			else if (reply("CONTINUE") == 0)
    320 				err(EEXIT, "%s", "");
    321 			return (STOP);
    322 		}
    323 	}
    324 
    325 	return pass1check(idesc);
    326 }
    327 
    328 static void
    329 account_indir(struct uvnode *vp, union lfs_dinode *dp, daddr_t ilbn, daddr_t daddr, int lvl)
    330 {
    331 	struct ubuf *bp;
    332 	int32_t *dap, *odap, *buf, *obuf;
    333 	daddr_t lbn;
    334 
    335 	if (lvl == 0)
    336 		lbn = -ilbn;
    337 	else
    338 		lbn = ilbn + 1;
    339 	bread(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getbsize(fs), 0, &bp);
    340 	buf = emalloc(lfs_sb_getbsize(fs));
    341 	memcpy(buf, bp->b_data, lfs_sb_getbsize(fs));
    342 	brelse(bp, 0);
    343 
    344 	obuf = emalloc(lfs_sb_getbsize(fs));
    345 	if (vp) {
    346 		bread(vp, ilbn, lfs_sb_getbsize(fs), 0, &bp);
    347 		memcpy(obuf, bp->b_data, lfs_sb_getbsize(fs));
    348 		brelse(bp, 0);
    349 	} else
    350 		memset(obuf, 0, lfs_sb_getbsize(fs));
    351 
    352 	for (dap = buf, odap = obuf;
    353 	     dap < (int32_t *)((char *)buf + lfs_sb_getbsize(fs));
    354 	     ++dap, ++odap) {
    355 		if (*dap > 0 && *dap != *odap) {
    356 			rfw_update_single(vp, lbn, *dap, lfs_dblksize(fs, dp, lbn));
    357 			if (lvl > 0)
    358 				account_indir(vp, dp, lbn, *dap, lvl - 1);
    359 		}
    360 		if (lvl == 0)
    361 			++lbn;
    362 		else if (lvl == 1)
    363 			lbn -= LFS_NINDIR(fs);
    364 		else if (lvl == 2)
    365 			lbn -= LFS_NINDIR(fs) * LFS_NINDIR(fs);
    366 	}
    367 
    368 	free(obuf);
    369 	free(buf);
    370 }
    371 
    372 /*
    373  * Account block changes between new found inode and existing inode.
    374  */
    375 static void
    376 account_block_changes(union lfs_dinode *dp)
    377 {
    378 	int i;
    379 	daddr_t lbn, off, odaddr;
    380 	struct uvnode *vp;
    381 	struct inode *ip;
    382 
    383 	vp = vget(fs, lfs_dino_getinumber(fs, dp));
    384 	ip = (vp ? VTOI(vp) : NULL);
    385 
    386 	/* Check direct block holdings between existing and new */
    387 	for (i = 0; i < ULFS_NDADDR; i++) {
    388 		odaddr = (ip ? lfs_dino_getdb(fs, ip->i_din, i) : 0x0);
    389 		if (lfs_dino_getdb(fs, dp, i) > 0 && lfs_dino_getdb(fs, dp, i) != odaddr)
    390 			rfw_update_single(vp, i, lfs_dino_getdb(fs, dp, i),
    391 					  lfs_dblksize(fs, dp, i));
    392 	}
    393 
    394 	/* Check indirect block holdings between existing and new */
    395 	off = 0;
    396 	for (i = 0; i < ULFS_NIADDR; i++) {
    397 		odaddr = (ip ? lfs_dino_getib(fs, ip->i_din, i) : 0x0);
    398 		if (lfs_dino_getib(fs, dp, i) > 0 && lfs_dino_getib(fs, dp, i) != odaddr) {
    399 			lbn = -(ULFS_NDADDR + off + i);
    400 			rfw_update_single(vp, i, lfs_dino_getib(fs, dp, i), lfs_sb_getbsize(fs));
    401 			account_indir(vp, dp, lbn, lfs_dino_getib(fs, dp, i), i);
    402 		}
    403 		if (off == 0)
    404 			off = LFS_NINDIR(fs);
    405 		else
    406 			off *= LFS_NINDIR(fs);
    407 	}
    408 }
    409 
    410 /*
    411  * Give a previously allocated inode a new address; do segment
    412  * accounting if necessary.
    413  *
    414  * Caller has ensured that this inode is not on the free list, so no
    415  * free list accounting is done.
    416  */
    417 static void
    418 readdress_inode(union lfs_dinode *dp, daddr_t daddr)
    419 {
    420 	IFILE *ifp;
    421 	SEGUSE *sup;
    422 	struct ubuf *bp;
    423 	int sn;
    424 	daddr_t odaddr;
    425 	ino_t thisino = lfs_dino_getinumber(fs, dp);
    426 	struct uvnode *vp;
    427 
    428 	/* Recursively check all block holdings, account changes */
    429 	account_block_changes(dp);
    430 
    431 	/* Move ifile pointer to this location */
    432 	LFS_IENTRY(ifp, fs, thisino, bp);
    433 	odaddr = lfs_if_getdaddr(fs, ifp);
    434 	assert(odaddr != 0);
    435 	lfs_if_setdaddr(fs, ifp, daddr);
    436 	VOP_BWRITE(bp);
    437 
    438 	if (debug)
    439 		pwarn("readdress ino %ju from 0x%jx to 0x%jx mode %o nlink %d\n",
    440 			(uintmax_t)lfs_dino_getinumber(fs, dp),
    441 			(uintmax_t)odaddr,
    442 			(intmax_t)daddr,
    443 			(int)lfs_dino_getmode(fs, dp),
    444 			(int)lfs_dino_getnlink(fs, dp));
    445 
    446 	/* Copy over preexisting in-core inode, if any */
    447 	vp = vget(fs, thisino);
    448 	lfs_copy_dinode(fs, VTOI(vp)->i_din, dp);
    449 
    450 	/* Finally account the inode itself */
    451 	sn = lfs_dtosn(fs, odaddr);
    452 	LFS_SEGENTRY(sup, fs, sn, bp);
    453 	sup->su_nbytes -= DINOSIZE(fs);
    454 	VOP_BWRITE(bp);
    455 	seg_table[sn].su_nbytes -= DINOSIZE(fs);
    456 
    457 	sn = lfs_dtosn(fs, daddr);
    458 	LFS_SEGENTRY(sup, fs, sn, bp);
    459 	sup->su_nbytes += DINOSIZE(fs);
    460 	VOP_BWRITE(bp);
    461 	seg_table[sn].su_nbytes += DINOSIZE(fs);
    462 }
    463 
    464 /*
    465  * Allocate the given inode from the free list.
    466  */
    467 static void
    468 alloc_inode(ino_t thisino, daddr_t daddr)
    469 {
    470 	ino_t ino, nextfree, oldhead;
    471 	IFILE *ifp;
    472 	SEGUSE *sup;
    473 	struct ubuf *bp, *cbp;
    474 	CLEANERINFO *cip;
    475 
    476 	if (debug)
    477 		pwarn("allocating ino %ju at 0x%jx\n", (uintmax_t)thisino,
    478 			(intmax_t)daddr);
    479 	while (thisino >= maxino) {
    480 		extend_ifile(fs);
    481 	}
    482 
    483 	LFS_IENTRY(ifp, fs, thisino, bp);
    484 	if (lfs_if_getdaddr(fs, ifp) != 0) {
    485 		pwarn("allocated inode %lld already allocated\n",
    486 			(long long)thisino);
    487 	}
    488 	nextfree = lfs_if_getnextfree(fs, ifp);
    489 	lfs_if_setnextfree(fs, ifp, 0);
    490 	lfs_if_setdaddr(fs, ifp, daddr);
    491 	VOP_BWRITE(bp);
    492 
    493 	LFS_GET_HEADFREE(fs, cip, cbp, &oldhead);
    494 	if (oldhead == thisino) {
    495 		LFS_PUT_HEADFREE(fs, cip, cbp, nextfree);
    496 		sbdirty();
    497 		if (nextfree == 0) {
    498 			extend_ifile(fs);
    499 		}
    500 	} else {
    501 		/* Search the free list for this inode */
    502 		ino = oldhead;
    503 		while (ino) {
    504 			LFS_IENTRY(ifp, fs, ino, bp);
    505 			assert(lfs_if_getnextfree(fs, ifp) != ino);
    506 			if (lfs_if_getnextfree(fs, ifp) == thisino) {
    507 				lfs_if_setnextfree(fs, ifp, nextfree);
    508 				VOP_BWRITE(bp);
    509 				if (nextfree == 0)
    510 					LFS_PUT_TAILFREE(fs, cip, cbp, ino);
    511 				break;
    512 			} else
    513 				ino = lfs_if_getnextfree(fs, ifp);
    514 			brelse(bp, 0);
    515 		}
    516 	}
    517 
    518 	/* Account for new location */
    519 	LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), bp);
    520 	sup->su_nbytes += DINOSIZE(fs);
    521 	VOP_BWRITE(bp);
    522 	seg_table[lfs_dtosn(fs, daddr)].su_nbytes += DINOSIZE(fs);
    523 }
    524 
    525 /*
    526  * Roll forward from the last verified checkpoint.
    527  *
    528  * Basic strategy:
    529  *
    530  * Run through the summaries finding the last valid partial segment.
    531  * Note segment numbers as we go.  For each inode that we find, compare
    532  * its generation number; if newer than old inode's (or if old inode is
    533  * USTATE), change to that inode.  Recursively look at inode blocks that
    534  * do not have their old disk addresses.  These addresses must lie in
    535  * segments we have seen already in our roll forward.
    536  *
    537  * A second pass through the past-checkpoint area verifies the validity
    538  * of these new blocks, as well as updating other blocks that do not
    539  * have corresponding new inodes (but their generation number must match
    540  * the old generation number).
    541  */
    542 void
    543 pass6(void)
    544 {
    545 	daddr_t daddr, ibdaddr, odaddr, lastgood;
    546 	IINFO *iip;
    547 	struct uvnode *vp, *devvp;
    548 	CLEANERINFO *cip;
    549 	SEGUSE *sup;
    550 	SEGSUM *sp;
    551 	struct ubuf *bp, *ibp, *sbp, *cbp;
    552 	union lfs_dinode *dp;
    553 	struct inodesc idesc;
    554 	int i, j, bc, hassuper;
    555 	unsigned k;
    556 	int nnewfiles, ndelfiles, nmvfiles;
    557 	int sn, curseg;
    558 	char *ibbuf;
    559 	long lastserial;
    560 
    561 	devvp = fs->lfs_devvp;
    562 
    563 	/* If we can't roll forward because of created files, don't try */
    564 	if (no_roll_forward) {
    565 		if (debug)
    566 			pwarn("not rolling forward due to possible allocation conflict\n");
    567 		return;
    568 	}
    569 
    570 	/* Find last valid partial segment */
    571 	lastgood = try_verify(fs, devvp, 0, debug);
    572 	if (lastgood == lfs_sb_getoffset(fs)) {
    573 		if (debug)
    574 			pwarn("not rolling forward, nothing to recover\n");
    575 		return;
    576 	}
    577 
    578 	if (debug)
    579 		pwarn("could roll forward from 0x%jx to 0x%jx\n",
    580 			(uintmax_t)lfs_sb_getoffset(fs), (uintmax_t)lastgood);
    581 
    582 	if (!preen && reply("ROLL FORWARD") == 0)
    583 		return;
    584 	/*
    585 	 * Pass 1: find inode blocks.  We ignore the Ifile inode but accept
    586 	 * changes to any other inode.
    587 	 */
    588 
    589 	ibbuf = emalloc(lfs_sb_getibsize(fs));
    590 	nnewfiles = ndelfiles = nmvfiles = nnewblocks = 0;
    591 	daddr = lfs_sb_getoffset(fs);
    592 	hassuper = 0;
    593 	lastserial = 0;
    594 	while (daddr != lastgood) {
    595 		seg_table[lfs_dtosn(fs, daddr)].su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
    596 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
    597 		sup->su_flags |= SEGUSE_DIRTY;
    598 		VOP_BWRITE(sbp);
    599 
    600 		/* Could be a superblock */
    601 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr) {
    602 			if (daddr == lfs_sb_gets0addr(fs)) {
    603 				++hassuper;
    604 				daddr += lfs_btofsb(fs, LFS_LABELPAD);
    605 			}
    606 			for (i = 0; i < LFS_MAXNUMSB; i++) {
    607 				if (daddr == lfs_sb_getsboff(fs, i)) {
    608 					++hassuper;
    609 					daddr += lfs_btofsb(fs, LFS_SBPAD);
    610 				}
    611 				if (daddr < lfs_sb_getsboff(fs, i))
    612 					break;
    613 			}
    614 		}
    615 		KASSERT(hassuper == 0 || hassuper == 1);
    616 
    617 		/* Read in summary block */
    618 		bread(devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getsumsize(fs), 0, &bp);
    619 		sp = (SEGSUM *)bp->b_data;
    620 		if (debug)
    621 			pwarn("sum at 0x%jx: ninos=%d nfinfo=%d\n",
    622 				(intmax_t)daddr, (int)lfs_ss_getninos(fs, sp),
    623 				(int)lfs_ss_getnfinfo(fs, sp));
    624 
    625 		/* We have verified that this is a good summary. */
    626 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
    627 		++sup->su_nsums;
    628 		VOP_BWRITE(sbp);
    629 		lfs_sb_subbfree(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
    630 		lfs_sb_adddmeta(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
    631 		sbdirty();
    632 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr +
    633 		    hassuper * lfs_btofsb(fs, LFS_SBPAD) &&
    634 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, lfs_sb_getoffset(fs))) {
    635 			lfs_sb_subnclean(fs, 1);
    636 			sbdirty();
    637 		}
    638 
    639 		/* Find inodes, look at generation number. */
    640 		if (lfs_ss_getninos(fs, sp)) {
    641 			LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
    642 			sup->su_ninos += howmany(lfs_ss_getninos(fs, sp), LFS_INOPB(fs));
    643 			VOP_BWRITE(sbp);
    644 			lfs_sb_adddmeta(fs, lfs_btofsb(fs, howmany(lfs_ss_getninos(fs, sp),
    645 							    LFS_INOPB(fs)) *
    646 						lfs_sb_getibsize(fs)));
    647 		}
    648 		iip = SEGSUM_IINFOSTART(fs, bp->b_data);
    649 		for (i = 0; i < howmany(lfs_ss_getninos(fs, sp), LFS_INOPB(fs)); i++) {
    650 			ino_t *inums;
    651 
    652 			inums = ecalloc(LFS_INOPB(fs) + 1, sizeof(*inums));
    653 			ibdaddr = lfs_ii_getblock(fs, iip);
    654 			iip = NEXTLOWER_IINFO(fs, iip);
    655 			lfs_sb_subbfree(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs)));
    656 			sbdirty();
    657 			bread(devvp, LFS_FSBTODB(fs, ibdaddr),
    658 			      lfs_sb_getibsize(fs), 0, &ibp);
    659 			memcpy(ibbuf, ibp->b_data, lfs_sb_getibsize(fs));
    660 			brelse(ibp, 0);
    661 
    662 			j = 0;
    663 			for (k = 0; k < LFS_INOPB(fs); k++) {
    664 				dp = DINO_IN_BLOCK(fs, ibbuf, k);
    665 				if (lfs_dino_getinumber(fs, dp) == 0 ||
    666 				    lfs_dino_getinumber(fs, dp) == LFS_IFILE_INUM)
    667 					continue;
    668 				/* Basic sanity checks */
    669 				if (lfs_dino_getnlink(fs, dp) < 0
    670 #if 0
    671 				    || lfs_dino_getinumber(fs, dp) < 0
    672 				    || lfs_dino_getsize(fs, dp) < 0
    673 #endif
    674 				) {
    675 					pwarn("BAD INODE AT 0x%jx\n",
    676 						(intmax_t)ibdaddr);
    677 					brelse(bp, 0);
    678 					free(inums);
    679 					goto out;
    680 				}
    681 
    682 				vp = vget(fs, lfs_dino_getinumber(fs, dp));
    683 
    684 				/*
    685 				 * Four cases:
    686 				 * (1) Invalid inode (nlink == 0).
    687 				 *     If currently allocated, remove.
    688 				 */
    689 				if (lfs_dino_getnlink(fs, dp) == 0) {
    690 					remove_ino(vp, lfs_dino_getinumber(fs, dp));
    691 					++ndelfiles;
    692 					continue;
    693 				}
    694 				/*
    695 				 * (2) New valid inode, previously free.
    696 				 *     Nothing to do except account
    697 				 *     the inode itself, done after the
    698 				 *     loop.
    699 				 */
    700 				if (vp == NULL) {
    701 					if (!(lfs_ss_getflags(fs, sp) & SS_DIROP))
    702 						pfatal("NEW FILE IN NON-DIROP PARTIAL SEGMENT");
    703 					else {
    704 						inums[j++] = lfs_dino_getinumber(fs, dp);
    705 						nnewfiles++;
    706 					}
    707 					continue;
    708 				}
    709 				/*
    710 				 * (3) Valid new version of previously
    711 				 *     allocated inode.  Delete old file
    712 				 *     and proceed as in (2).
    713 				 */
    714 				if (vp &&
    715 				    lfs_dino_getgen(fs, VTOI(vp)->i_din)
    716 				    < lfs_dino_getgen(fs, dp)) {
    717 					remove_ino(vp, lfs_dino_getinumber(fs, dp));
    718 					if (!(lfs_ss_getflags(fs, sp) & SS_DIROP))
    719 						pfatal("NEW FILE VERSION IN NON-DIROP PARTIAL SEGMENT");
    720 					else {
    721 						inums[j++] = lfs_dino_getinumber(fs, dp);
    722 						ndelfiles++;
    723 						nnewfiles++;
    724 					}
    725 					continue;
    726 				}
    727 				/*
    728 				 * (4) Same version of previously
    729 				 *     allocated inode.  Move inode to
    730 				 *     this location, account inode change
    731 				 *     only.  We'll pick up any new
    732 				 *     blocks when we do the block pass.
    733 				 */
    734 				if (vp &&
    735 				    lfs_dino_getgen(fs, VTOI(vp)->i_din)
    736 				    == lfs_dino_getgen(fs, dp)) {
    737 					nmvfiles++;
    738 					readdress_inode(dp, ibdaddr);
    739 
    740 					/* Update with new info */
    741 					lfs_dino_setmode(fs, VTOD(vp), lfs_dino_getmode(fs, dp));
    742 					lfs_dino_setnlink(fs, VTOD(vp), lfs_dino_getmode(fs, dp));
    743 					/* XXX size is important */
    744 					lfs_dino_setsize(fs, VTOD(vp), lfs_dino_getsize(fs, dp));
    745 					lfs_dino_setatime(fs, VTOD(vp), lfs_dino_getatime(fs, dp));
    746 					lfs_dino_setatimensec(fs, VTOD(vp), lfs_dino_getatimensec(fs, dp));
    747 					lfs_dino_setmtime(fs, VTOD(vp), lfs_dino_getmtime(fs, dp));
    748 					lfs_dino_setmtimensec(fs, VTOD(vp), lfs_dino_getmtimensec(fs, dp));
    749 					lfs_dino_setctime(fs, VTOD(vp), lfs_dino_getctime(fs, dp));
    750 					lfs_dino_setctimensec(fs, VTOD(vp), lfs_dino_getctimensec(fs, dp));
    751 					lfs_dino_setflags(fs, VTOD(vp), lfs_dino_getflags(fs, dp));
    752 					lfs_dino_setuid(fs, VTOD(vp), lfs_dino_getuid(fs, dp));
    753 					lfs_dino_setgid(fs, VTOD(vp), lfs_dino_getgid(fs, dp));
    754 					inodirty(VTOI(vp));
    755 				}
    756 			}
    757 			for (j = 0; inums[j]; j++) {
    758 				alloc_inode(inums[j], ibdaddr);
    759 				vp = lfs_raw_vget(fs, inums[j],
    760 					      devvp->v_fd, ibdaddr);
    761 				/* We'll get the blocks later */
    762 				if (debug)
    763 					pwarn("alloc ino %d nlink %d\n",
    764 						(int)inums[j], lfs_dino_getnlink(fs, VTOD(vp)));
    765 
    766 				for (k=0; k<ULFS_NDADDR; k++) {
    767 					lfs_dino_setdb(fs, VTOD(vp), k, 0);
    768 				}
    769 				for (k=0; k<ULFS_NIADDR; k++) {
    770 					lfs_dino_setib(fs, VTOD(vp), k, 0);
    771 				}
    772 				lfs_dino_setblocks(fs, VTOD(vp), 0);
    773 
    774 				vp->v_uflag |= VU_DIROP;
    775 				inodirty(VTOI(vp));
    776 			}
    777 			free(inums);
    778 		}
    779 
    780 		bc = check_summary(fs, sp, daddr, debug, devvp, NULL);
    781 		if (bc == 0) {
    782 			pwarn("unexpected bad seg ptr at 0x%jx with serial=%ju\n",
    783 				(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
    784 			brelse(bp, 0);
    785 			break;
    786 		} else {
    787 			if (debug)
    788 				pwarn("good seg ptr at 0x%jx with serial=%ju\n",
    789 					(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
    790 			lastserial = lfs_ss_getserial(fs, sp);
    791 		}
    792 		odaddr = daddr;
    793 		daddr += lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc);
    794 		if (lfs_dtosn(fs, odaddr) != lfs_dtosn(fs, daddr) ||
    795 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, daddr +
    796 			lfs_btofsb(fs, lfs_sb_getsumsize(fs) + lfs_sb_getbsize(fs)) - 1)) {
    797 			daddr = lfs_ss_getnext(fs, sp);
    798 		}
    799 		brelse(bp, 0);
    800 	}
    801 
    802     out:
    803 	free(ibbuf);
    804 
    805 	/* Set serial here, just to be sure (XXX should be right already) */
    806 	lfs_sb_setserial(fs, lastserial + 1);
    807 
    808 	/*
    809 	 * Check our new vnodes.  Any blocks must lie in segments that
    810 	 * we've seen before (SEGUSE_DIRTY or SEGUSE_RFW); and the rest
    811 	 * of the pass 1 checks as well.
    812 	 */
    813 	memset(&idesc, 0, sizeof(struct inodesc));
    814 	idesc.id_type = ADDR;
    815 	idesc.id_func = pass6check;
    816 	idesc.id_lblkno = 0;
    817 	LIST_FOREACH(vp, &vnodelist, v_mntvnodes) {
    818 		if ((vp->v_uflag & VU_DIROP) == 0)
    819 			--n_files; /* Don't double count */
    820 		checkinode(VTOI(vp)->i_number, &idesc);
    821 	}
    822 
    823 	/*
    824 	 * Second pass.  Run through FINFO entries looking for blocks
    825 	 * with the same generation number as files we've seen before.
    826 	 * If they have it, pretend like we just wrote them.  We don't
    827 	 * do the pretend-write, though, if we've already seen them
    828 	 * (the accounting would have been done for us already).
    829 	 */
    830 	daddr = lfs_sb_getoffset(fs);
    831 	while (daddr != lastgood) {
    832 		if (!(seg_table[lfs_dtosn(fs, daddr)].su_flags & SEGUSE_DIRTY)) {
    833 			seg_table[lfs_dtosn(fs, daddr)].su_flags |= SEGUSE_DIRTY;
    834 			LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
    835 			sup->su_flags |= SEGUSE_DIRTY;
    836 			VOP_BWRITE(sbp);
    837 		}
    838 
    839 		/* Could be a superblock */
    840 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr) {
    841 			if (daddr == lfs_sb_gets0addr(fs))
    842 				daddr += lfs_btofsb(fs, LFS_LABELPAD);
    843 			for (i = 0; i < LFS_MAXNUMSB; i++) {
    844 				if (daddr == lfs_sb_getsboff(fs, i)) {
    845 					daddr += lfs_btofsb(fs, LFS_SBPAD);
    846 				}
    847 				if (daddr < lfs_sb_getsboff(fs, i))
    848 					break;
    849 			}
    850 		}
    851 
    852 		/* Read in summary block */
    853 		bread(devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getsumsize(fs), 0, &bp);
    854 		sp = (SEGSUM *)bp->b_data;
    855 		bc = check_summary(fs, sp, daddr, debug, devvp, pass6harvest);
    856 		if (bc == 0) {
    857 			pwarn("unexpected bad seg ptr [2] at 0x%jx with serial=%ju\n",
    858 				(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
    859 			brelse(bp, 0);
    860 			break;
    861 		}
    862 		odaddr = daddr;
    863 		daddr += lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc);
    864 		lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc));
    865 		if (lfs_dtosn(fs, odaddr) != lfs_dtosn(fs, daddr) ||
    866 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, daddr +
    867 			lfs_btofsb(fs, lfs_sb_getsumsize(fs) + lfs_sb_getbsize(fs)) - 1)) {
    868 			lfs_sb_subavail(fs, lfs_sntod(fs, lfs_dtosn(fs, daddr) + 1) - daddr);
    869 			daddr = lfs_ss_getnext(fs, sp);
    870 		}
    871 		LFS_CLEANERINFO(cip, fs, cbp);
    872 		LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0);
    873 		bp->b_flags |= B_AGE;
    874 		brelse(bp, 0);
    875 	}
    876 
    877 	/* Final address could also be a superblock */
    878 	if (lfs_sntod(fs, lfs_dtosn(fs, lastgood)) == lastgood) {
    879 		if (lastgood == lfs_sb_gets0addr(fs))
    880 			lastgood += lfs_btofsb(fs, LFS_LABELPAD);
    881 		for (i = 0; i < LFS_MAXNUMSB; i++) {
    882 			if (lastgood == lfs_sb_getsboff(fs, i))
    883 				lastgood += lfs_btofsb(fs, LFS_SBPAD);
    884 			if (lastgood < lfs_sb_getsboff(fs, i))
    885 				break;
    886 		}
    887 	}
    888 
    889 	/* Update offset to point at correct location */
    890 	lfs_sb_setoffset(fs, lastgood);
    891 	lfs_sb_setcurseg(fs, lfs_sntod(fs, lfs_dtosn(fs, lastgood)));
    892 	for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs));;) {
    893 		sn = (sn + 1) % lfs_sb_getnseg(fs);
    894 		if (sn == curseg)
    895 			errx(1, "no clean segments");
    896 		LFS_SEGENTRY(sup, fs, sn, bp);
    897 		if ((sup->su_flags & SEGUSE_DIRTY) == 0) {
    898 			sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
    899 			VOP_BWRITE(bp);
    900 			break;
    901 		}
    902 		brelse(bp, 0);
    903 	}
    904 	lfs_sb_setnextseg(fs, lfs_sntod(fs, sn));
    905 
    906 	if (preen) {
    907 		if (ndelfiles)
    908 			pwarn("roll forward deleted %d file%s\n", ndelfiles,
    909 				(ndelfiles > 1 ? "s" : ""));
    910 		if (nnewfiles)
    911 			pwarn("roll forward added %d file%s\n", nnewfiles,
    912 				(nnewfiles > 1 ? "s" : ""));
    913 		if (nmvfiles)
    914 			pwarn("roll forward relocated %d inode%s\n", nmvfiles,
    915 				(nmvfiles > 1 ? "s" : ""));
    916 		if (nnewblocks)
    917 			pwarn("roll forward verified %d data block%s\n", nnewblocks,
    918 				(nnewblocks > 1 ? "s" : ""));
    919 		if (ndelfiles == 0 && nnewfiles == 0 && nmvfiles == 0 &&
    920 		    nnewblocks == 0)
    921 			pwarn("roll forward produced nothing new\n");
    922 	}
    923 
    924 	if (!preen) {
    925 		/* Run pass 5 again (it's quick anyway). */
    926 		pwarn("** Phase 6b - Recheck Segment Block Accounting\n");
    927 		pass5();
    928 	}
    929 
    930 	/* Likewise for pass 0 */
    931 	if (!preen)
    932 		pwarn("** Phase 6c - Recheck Inode Free List\n");
    933 	pass0();
    934 }
    935