1 1.37 perseant /* $NetBSD: lfs_rfw.c,v 1.37 2025/09/17 04:37:47 perseant Exp $ */ 2 1.1 perseant 3 1.1 perseant /*- 4 1.1 perseant * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 1.1 perseant * All rights reserved. 6 1.1 perseant * 7 1.1 perseant * This code is derived from software contributed to The NetBSD Foundation 8 1.1 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 1.1 perseant * 10 1.1 perseant * Redistribution and use in source and binary forms, with or without 11 1.1 perseant * modification, are permitted provided that the following conditions 12 1.1 perseant * are met: 13 1.1 perseant * 1. Redistributions of source code must retain the above copyright 14 1.1 perseant * notice, this list of conditions and the following disclaimer. 15 1.1 perseant * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 perseant * notice, this list of conditions and the following disclaimer in the 17 1.1 perseant * documentation and/or other materials provided with the distribution. 18 1.1 perseant * 19 1.1 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 perseant * POSSIBILITY OF SUCH DAMAGE. 30 1.1 perseant */ 31 1.1 perseant 32 1.2 perseant #include <sys/cdefs.h> 33 1.37 perseant __KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.37 2025/09/17 04:37:47 perseant Exp $"); 34 1.2 perseant 35 1.2 perseant #if defined(_KERNEL_OPT) 36 1.2 perseant #include "opt_quota.h" 37 1.2 perseant #endif 38 1.2 perseant 39 1.2 perseant #include <sys/param.h> 40 1.2 perseant #include <sys/systm.h> 41 1.2 perseant #include <sys/namei.h> 42 1.2 perseant #include <sys/proc.h> 43 1.2 perseant #include <sys/kernel.h> 44 1.2 perseant #include <sys/vnode.h> 45 1.2 perseant #include <sys/mount.h> 46 1.2 perseant #include <sys/kthread.h> 47 1.2 perseant #include <sys/buf.h> 48 1.2 perseant #include <sys/device.h> 49 1.2 perseant #include <sys/file.h> 50 1.2 perseant #include <sys/disklabel.h> 51 1.2 perseant #include <sys/ioctl.h> 52 1.2 perseant #include <sys/errno.h> 53 1.2 perseant #include <sys/malloc.h> 54 1.2 perseant #include <sys/pool.h> 55 1.2 perseant #include <sys/socket.h> 56 1.37 perseant #include <sys/stat.h> 57 1.2 perseant #include <sys/syslog.h> 58 1.2 perseant #include <sys/sysctl.h> 59 1.2 perseant #include <sys/conf.h> 60 1.2 perseant #include <sys/kauth.h> 61 1.2 perseant 62 1.2 perseant #include <miscfs/specfs/specdev.h> 63 1.2 perseant 64 1.14 dholland #include <ufs/lfs/ulfs_quotacommon.h> 65 1.14 dholland #include <ufs/lfs/ulfs_inode.h> 66 1.14 dholland #include <ufs/lfs/ulfsmount.h> 67 1.14 dholland #include <ufs/lfs/ulfs_extern.h> 68 1.2 perseant 69 1.36 riastrad #include <uvm/uvm_extern.h> 70 1.2 perseant 71 1.2 perseant #include <ufs/lfs/lfs.h> 72 1.24 dholland #include <ufs/lfs/lfs_accessors.h> 73 1.18 dholland #include <ufs/lfs/lfs_kernel.h> 74 1.2 perseant #include <ufs/lfs/lfs_extern.h> 75 1.2 perseant 76 1.2 perseant #include <miscfs/genfs/genfs.h> 77 1.2 perseant #include <miscfs/genfs/genfs_node.h> 78 1.2 perseant 79 1.1 perseant /* 80 1.1 perseant * Roll-forward code. 81 1.1 perseant */ 82 1.1 perseant static daddr_t check_segsum(struct lfs *, daddr_t, u_int64_t, 83 1.1 perseant kauth_cred_t, int, int *, struct lwp *); 84 1.1 perseant 85 1.37 perseant static bool all_selector(void *, struct vnode *); 86 1.37 perseant static void drop_vnode_pages(struct mount *, struct lwp *); 87 1.37 perseant static int update_inogen(struct lfs *, daddr_t); 88 1.37 perseant static void update_inoblk_copy_dinode(struct lfs *, union lfs_dinode *, const union lfs_dinode *); 89 1.37 perseant 90 1.3 perseant extern int lfs_do_rfw; 91 1.37 perseant int rblkcnt; 92 1.37 perseant int lfs_rfw_max_psegs = 0; 93 1.3 perseant 94 1.1 perseant /* 95 1.1 perseant * Allocate a particular inode with a particular version number, freeing 96 1.1 perseant * any previous versions of this inode that may have gone before. 97 1.1 perseant * Used by the roll-forward code. 98 1.1 perseant * 99 1.1 perseant * XXX this function does not have appropriate locking to be used on a live fs; 100 1.1 perseant * XXX but something similar could probably be used for an "undelete" call. 101 1.1 perseant * 102 1.1 perseant * Called with the Ifile inode locked. 103 1.1 perseant */ 104 1.1 perseant int 105 1.1 perseant lfs_rf_valloc(struct lfs *fs, ino_t ino, int vers, struct lwp *l, 106 1.37 perseant struct vnode **vpp, union lfs_dinode *dip) 107 1.1 perseant { 108 1.20 hannken struct vattr va; 109 1.1 perseant struct vnode *vp; 110 1.1 perseant struct inode *ip; 111 1.1 perseant int error; 112 1.1 perseant 113 1.37 perseant KASSERT(ino > LFS_IFILE_INUM); 114 1.1 perseant ASSERT_SEGLOCK(fs); /* XXX it doesn't, really */ 115 1.1 perseant 116 1.1 perseant /* 117 1.1 perseant * First, just try a vget. If the version number is the one we want, 118 1.1 perseant * we don't have to do anything else. If the version number is wrong, 119 1.1 perseant * take appropriate action. 120 1.1 perseant */ 121 1.35 ad error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp); 122 1.1 perseant if (error == 0) { 123 1.37 perseant DLOG((DLOG_RF, "lfs_rf_valloc[1]: ino %d vp %p\n", 124 1.37 perseant (int)ino, vp)); 125 1.1 perseant 126 1.1 perseant *vpp = vp; 127 1.1 perseant ip = VTOI(vp); 128 1.37 perseant DLOG((DLOG_RF, " ip->i_gen=%jd dip nlink %jd seeking" 129 1.37 perseant " version %jd\n", (intmax_t)ip->i_gen, 130 1.37 perseant (intmax_t)(dip == NULL ? -1 131 1.37 perseant : lfs_dino_getnlink(fs, dip)), (intmax_t)vers)); 132 1.37 perseant if (ip->i_gen == vers) { 133 1.37 perseant /* 134 1.37 perseant * We have what we wanted already. 135 1.37 perseant */ 136 1.37 perseant DLOG((DLOG_RF, " pre-existing\n")); 137 1.1 perseant return 0; 138 1.37 perseant } else if (ip->i_gen < vers && dip != NULL 139 1.37 perseant && lfs_dino_getnlink(fs, dip) > 0) { 140 1.37 perseant /* 141 1.37 perseant * We have found a newer version. Truncate 142 1.37 perseant * the old vnode to zero and re-initialize 143 1.37 perseant * from the given dinode. 144 1.37 perseant */ 145 1.37 perseant DLOG((DLOG_RF, " replace old version %jd\n", 146 1.37 perseant (intmax_t)ip->i_gen)); 147 1.8 he lfs_truncate(vp, (off_t)0, 0, NOCRED); 148 1.31 dholland ip->i_gen = vers; 149 1.37 perseant vp->v_type = IFTOVT(lfs_dino_getmode(fs, dip)); 150 1.37 perseant update_inoblk_copy_dinode(fs, ip->i_din, dip); 151 1.1 perseant LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE); 152 1.1 perseant return 0; 153 1.1 perseant } else { 154 1.37 perseant /* 155 1.37 perseant * Not the right version and nothing to 156 1.37 perseant * initialize from. Don't recover this data. 157 1.37 perseant */ 158 1.1 perseant DLOG((DLOG_RF, "ino %d: sought version %d, got %d\n", 159 1.37 perseant (int)ino, (int)vers, 160 1.37 perseant (int)lfs_dino_getgen(fs, ip->i_din))); 161 1.1 perseant vput(vp); 162 1.1 perseant *vpp = NULLVP; 163 1.1 perseant return EEXIST; 164 1.1 perseant } 165 1.1 perseant } 166 1.1 perseant 167 1.37 perseant /* 168 1.37 perseant * No version of this inode was found in the cache. 169 1.37 perseant * Make a new one from the dinode. We will add data blocks 170 1.37 perseant * as they come in, so scrub any block addresses off of the 171 1.37 perseant * inode and reset block counts to zero. 172 1.37 perseant */ 173 1.37 perseant if (dip == NULL) 174 1.37 perseant return ENOENT; 175 1.37 perseant 176 1.20 hannken vattr_null(&va); 177 1.37 perseant va.va_type = IFTOVT(lfs_dino_getmode(fs, dip)); 178 1.37 perseant va.va_mode = lfs_dino_getmode(fs, dip) & ALLPERMS; 179 1.20 hannken va.va_fileid = ino; 180 1.20 hannken va.va_gen = vers; 181 1.34 hannken error = vcache_new(fs->lfs_ivnode->v_mount, NULL, &va, NOCRED, NULL, 182 1.34 hannken &vp); 183 1.20 hannken if (error) 184 1.20 hannken return error; 185 1.20 hannken error = vn_lock(vp, LK_EXCLUSIVE); 186 1.37 perseant if (error) 187 1.37 perseant goto err; 188 1.37 perseant 189 1.20 hannken ip = VTOI(vp); 190 1.37 perseant update_inoblk_copy_dinode(fs, ip->i_din, dip); 191 1.37 perseant 192 1.37 perseant DLOG((DLOG_RF, "lfs_valloc[2] ino %d vp %p size=%lld effnblks=%d," 193 1.37 perseant " blocks=%d\n", (int)ino, vp, (long long)ip->i_size, 194 1.37 perseant (int)ip->i_lfs_effnblks, 195 1.37 perseant (int)lfs_dino_getblocks(fs, ip->i_din))); 196 1.1 perseant *vpp = vp; 197 1.20 hannken return 0; 198 1.37 perseant 199 1.37 perseant err: 200 1.37 perseant vrele(vp); 201 1.37 perseant *vpp = NULLVP; 202 1.37 perseant return error; 203 1.1 perseant } 204 1.1 perseant 205 1.1 perseant /* 206 1.1 perseant * Load the appropriate indirect block, and change the appropriate pointer. 207 1.1 perseant * Mark the block dirty. Do segment and avail accounting. 208 1.1 perseant */ 209 1.1 perseant static int 210 1.1 perseant update_meta(struct lfs *fs, ino_t ino, int vers, daddr_t lbn, 211 1.1 perseant daddr_t ndaddr, size_t size, struct lwp *l) 212 1.1 perseant { 213 1.1 perseant int error; 214 1.1 perseant struct vnode *vp; 215 1.1 perseant struct inode *ip; 216 1.1 perseant daddr_t odaddr; 217 1.15 dholland struct indir a[ULFS_NIADDR]; 218 1.1 perseant int num; 219 1.1 perseant struct buf *bp; 220 1.1 perseant SEGUSE *sup; 221 1.37 perseant u_int64_t newsize, loff; 222 1.1 perseant 223 1.1 perseant KASSERT(lbn >= 0); /* no indirect blocks */ 224 1.37 perseant KASSERT(ino > LFS_IFILE_INUM); 225 1.37 perseant 226 1.37 perseant DLOG((DLOG_RF, "update_meta: ino %d lbn %d size %d at 0x%jx\n", 227 1.37 perseant (int)ino, (int)lbn, (int)size, (uintmax_t)ndaddr)); 228 1.1 perseant 229 1.37 perseant if ((error = lfs_rf_valloc(fs, ino, vers, l, &vp, NULL)) != 0) 230 1.1 perseant return error; 231 1.37 perseant ip = VTOI(vp); 232 1.1 perseant 233 1.37 perseant /* 234 1.37 perseant * If block already exists, note its new location 235 1.37 perseant * but do not account it as new. 236 1.37 perseant */ 237 1.37 perseant ulfs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL); 238 1.37 perseant if (odaddr == UNASSIGNED) { 239 1.37 perseant if ((error = lfs_balloc(vp, (lbn << lfs_sb_getbshift(fs)), 240 1.37 perseant size, NOCRED, 0, &bp)) != 0) { 241 1.37 perseant vput(vp); 242 1.37 perseant return (error); 243 1.37 perseant } 244 1.37 perseant /* No need to write, the block is already on disk */ 245 1.37 perseant if (bp->b_oflags & BO_DELWRI) { 246 1.37 perseant LFS_UNLOCK_BUF(bp); 247 1.37 perseant /* Account recovery of the previous version */ 248 1.37 perseant lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); 249 1.37 perseant } 250 1.37 perseant brelse(bp, BC_INVAL); 251 1.37 perseant DLOG((DLOG_RF, "balloc ip->i_lfs_effnblks = %d," 252 1.37 perseant " lfs_dino_getblocks(fs, ip->i_din) = %d\n", 253 1.37 perseant (int)ip->i_lfs_effnblks, 254 1.37 perseant (int)lfs_dino_getblocks(fs, ip->i_din))); 255 1.37 perseant } else { 256 1.37 perseant /* XXX fragextend? */ 257 1.37 perseant DLOG((DLOG_RF, "block exists, no balloc\n")); 258 1.1 perseant } 259 1.1 perseant 260 1.1 perseant /* 261 1.1 perseant * Extend the file, if it is not large enough already. 262 1.1 perseant * XXX this is not exactly right, we don't know how much of the 263 1.37 perseant * XXX last block is actually used. 264 1.1 perseant */ 265 1.37 perseant loff = lfs_lblktosize(fs, lbn); 266 1.37 perseant if (loff >= (ULFS_NDADDR << lfs_sb_getbshift(fs))) { 267 1.37 perseant /* No fragments */ 268 1.37 perseant newsize = loff + 1; 269 1.37 perseant } else { 270 1.37 perseant /* Subtract only a fragment to account for block size */ 271 1.37 perseant newsize = loff + size - lfs_fsbtob(fs, 1) + 1; 272 1.37 perseant } 273 1.37 perseant 274 1.37 perseant if (ip->i_size < newsize) { 275 1.37 perseant DLOG((DLOG_RF, "ino %d size %d -> %d\n", 276 1.37 perseant (int)ino, (int)ip->i_size, (int)newsize)); 277 1.31 dholland lfs_dino_setsize(fs, ip->i_din, newsize); 278 1.37 perseant ip->i_size = newsize; 279 1.37 perseant /* 280 1.37 perseant * tell vm our new size for the case the inode won't 281 1.37 perseant * appear later. 282 1.37 perseant */ 283 1.37 perseant uvm_vnp_setsize(vp, newsize); 284 1.1 perseant } 285 1.1 perseant 286 1.1 perseant lfs_update_single(fs, NULL, vp, lbn, ndaddr, size); 287 1.1 perseant 288 1.17 christos LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, ndaddr), bp); 289 1.1 perseant sup->su_nbytes += size; 290 1.17 christos LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, ndaddr), bp); 291 1.1 perseant 292 1.1 perseant /* differences here should be due to UNWRITTEN indirect blocks. */ 293 1.37 perseant if (vp->v_type != VLNK) { 294 1.37 perseant if (!(ip->i_lfs_effnblks >= lfs_dino_getblocks(fs, ip->i_din)) 295 1.37 perseant #if 0 296 1.37 perseant || !(lfs_lblkno(fs, ip->i_size) > ULFS_NDADDR || 297 1.37 perseant ip->i_lfs_effnblks == lfs_dino_getblocks(fs, ip->i_din)) 298 1.37 perseant #endif /* 0 */ 299 1.37 perseant ) { 300 1.37 perseant vprint("vnode", vp); 301 1.37 perseant printf("effnblks=%jd dino_getblocks=%jd\n", 302 1.37 perseant (intmax_t)ip->i_lfs_effnblks, 303 1.37 perseant (intmax_t)lfs_dino_getblocks(fs, ip->i_din)); 304 1.37 perseant } 305 1.37 perseant KASSERT(ip->i_lfs_effnblks >= lfs_dino_getblocks(fs, ip->i_din)); 306 1.37 perseant #if 0 307 1.37 perseant KASSERT(lfs_lblkno(fs, ip->i_size) > ULFS_NDADDR || 308 1.37 perseant ip->i_lfs_effnblks == lfs_dino_getblocks(fs, ip->i_din)); 309 1.37 perseant #endif /* 0 */ 310 1.37 perseant } 311 1.1 perseant 312 1.1 perseant #ifdef DEBUG 313 1.1 perseant /* Now look again to make sure it worked */ 314 1.15 dholland ulfs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL); 315 1.17 christos if (LFS_DBTOFSB(fs, odaddr) != ndaddr) 316 1.37 perseant DLOG((DLOG_RF, "update_meta: failed setting ino %jd lbn %jd" 317 1.37 perseant " to %jd\n", (intmax_t)ino, (intmax_t)lbn, (intmax_t)ndaddr)); 318 1.1 perseant #endif /* DEBUG */ 319 1.1 perseant vput(vp); 320 1.1 perseant return 0; 321 1.1 perseant } 322 1.1 perseant 323 1.30 dholland /* 324 1.30 dholland * Copy some the fields of the dinode as needed by update_inoblk(). 325 1.30 dholland */ 326 1.30 dholland static void 327 1.30 dholland update_inoblk_copy_dinode(struct lfs *fs, 328 1.30 dholland union lfs_dinode *dstu, const union lfs_dinode *srcu) 329 1.30 dholland { 330 1.30 dholland if (fs->lfs_is64) { 331 1.30 dholland struct lfs64_dinode *dst = &dstu->u_64; 332 1.30 dholland const struct lfs64_dinode *src = &srcu->u_64; 333 1.30 dholland unsigned i; 334 1.30 dholland 335 1.30 dholland /* 336 1.30 dholland * Copy everything but the block pointers and di_blocks. 337 1.30 dholland * XXX what about di_extb? 338 1.30 dholland */ 339 1.30 dholland dst->di_mode = src->di_mode; 340 1.30 dholland dst->di_nlink = src->di_nlink; 341 1.30 dholland dst->di_uid = src->di_uid; 342 1.30 dholland dst->di_gid = src->di_gid; 343 1.30 dholland dst->di_blksize = src->di_blksize; 344 1.30 dholland dst->di_size = src->di_size; 345 1.30 dholland dst->di_atime = src->di_atime; 346 1.30 dholland dst->di_mtime = src->di_mtime; 347 1.30 dholland dst->di_ctime = src->di_ctime; 348 1.30 dholland dst->di_birthtime = src->di_birthtime; 349 1.30 dholland dst->di_mtimensec = src->di_mtimensec; 350 1.30 dholland dst->di_atimensec = src->di_atimensec; 351 1.30 dholland dst->di_ctimensec = src->di_ctimensec; 352 1.30 dholland dst->di_birthnsec = src->di_birthnsec; 353 1.30 dholland dst->di_gen = src->di_gen; 354 1.30 dholland dst->di_kernflags = src->di_kernflags; 355 1.30 dholland dst->di_flags = src->di_flags; 356 1.30 dholland dst->di_extsize = src->di_extsize; 357 1.30 dholland dst->di_modrev = src->di_modrev; 358 1.30 dholland dst->di_inumber = src->di_inumber; 359 1.30 dholland for (i = 0; i < __arraycount(src->di_spare); i++) { 360 1.30 dholland dst->di_spare[i] = src->di_spare[i]; 361 1.30 dholland } 362 1.37 perseant /* Short symlinks store their data in di_db. */ 363 1.37 perseant if ((src->di_mode & LFS_IFMT) == LFS_IFLNK 364 1.37 perseant && src->di_size < lfs_sb_getmaxsymlinklen(fs)) { 365 1.37 perseant memcpy(dst->di_db, src->di_db, src->di_size); 366 1.37 perseant } 367 1.30 dholland } else { 368 1.30 dholland struct lfs32_dinode *dst = &dstu->u_32; 369 1.30 dholland const struct lfs32_dinode *src = &srcu->u_32; 370 1.30 dholland 371 1.30 dholland /* Get mode, link count, size, and times */ 372 1.30 dholland memcpy(dst, src, offsetof(struct lfs32_dinode, di_db[0])); 373 1.30 dholland 374 1.30 dholland /* Then the rest, except di_blocks */ 375 1.30 dholland dst->di_flags = src->di_flags; 376 1.30 dholland dst->di_gen = src->di_gen; 377 1.30 dholland dst->di_uid = src->di_uid; 378 1.30 dholland dst->di_gid = src->di_gid; 379 1.30 dholland dst->di_modrev = src->di_modrev; 380 1.37 perseant 381 1.37 perseant /* Short symlinks store their data in di_db. */ 382 1.37 perseant if ((src->di_mode & LFS_IFMT) == LFS_IFLNK 383 1.37 perseant && src->di_size < lfs_sb_getmaxsymlinklen(fs)) { 384 1.37 perseant memcpy(dst->di_db, src->di_db, src->di_size); 385 1.37 perseant } 386 1.30 dholland } 387 1.30 dholland } 388 1.30 dholland 389 1.1 perseant static int 390 1.1 perseant update_inoblk(struct lfs *fs, daddr_t offset, kauth_cred_t cred, 391 1.1 perseant struct lwp *l) 392 1.1 perseant { 393 1.1 perseant struct vnode *devvp, *vp; 394 1.1 perseant struct inode *ip; 395 1.29 dholland union lfs_dinode *dip; 396 1.1 perseant struct buf *dbp, *ibp; 397 1.1 perseant int error; 398 1.1 perseant daddr_t daddr; 399 1.1 perseant IFILE *ifp; 400 1.1 perseant SEGUSE *sup; 401 1.29 dholland unsigned i, num; 402 1.37 perseant uint32_t gen; 403 1.37 perseant char *buf; 404 1.1 perseant 405 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp; 406 1.1 perseant 407 1.1 perseant /* 408 1.1 perseant * Get the inode, update times and perms. 409 1.1 perseant * DO NOT update disk blocks, we do that separately. 410 1.1 perseant */ 411 1.23 dholland error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 412 1.19 maxv 0, &dbp); 413 1.1 perseant if (error) { 414 1.1 perseant DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error)); 415 1.1 perseant return error; 416 1.1 perseant } 417 1.37 perseant buf = malloc(dbp->b_bcount, M_SEGMENT, M_WAITOK); 418 1.37 perseant memcpy(buf, dbp->b_data, dbp->b_bcount); 419 1.37 perseant brelse(dbp, BC_AGE); 420 1.29 dholland num = LFS_INOPB(fs); 421 1.29 dholland for (i = num; i-- > 0; ) { 422 1.37 perseant dip = DINO_IN_BLOCK(fs, buf, i); 423 1.37 perseant if (lfs_dino_getinumber(fs, dip) <= LFS_IFILE_INUM) 424 1.37 perseant continue; 425 1.1 perseant 426 1.37 perseant /* Check generation number */ 427 1.37 perseant LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp); 428 1.37 perseant gen = lfs_if_getversion(fs, ifp); 429 1.37 perseant brelse(ibp, 0); 430 1.37 perseant if (lfs_dino_getgen(fs, dip) < gen) { 431 1.37 perseant continue; 432 1.37 perseant } 433 1.1 perseant 434 1.37 perseant /* 435 1.37 perseant * This inode is the newest generation. Load it. 436 1.37 perseant */ 437 1.37 perseant error = lfs_rf_valloc(fs, lfs_dino_getinumber(fs, dip), 438 1.37 perseant lfs_dino_getgen(fs, dip), 439 1.37 perseant l, &vp, dip); 440 1.37 perseant if (error) { 441 1.37 perseant DLOG((DLOG_RF, "update_inoblk: lfs_rf_valloc" 442 1.37 perseant " returned %d\n", error)); 443 1.37 perseant continue; 444 1.37 perseant } 445 1.37 perseant ip = VTOI(vp); 446 1.37 perseant if (lfs_dino_getsize(fs, dip) != ip->i_size 447 1.37 perseant && vp->v_type != VLNK) { 448 1.37 perseant /* XXX What should we do sith symlinks? */ 449 1.37 perseant DLOG((DLOG_RF, " ino %jd size %jd -> %jd\n", 450 1.37 perseant (intmax_t)lfs_dino_getinumber(fs, dip), 451 1.37 perseant (intmax_t)ip->i_size, 452 1.37 perseant (intmax_t)lfs_dino_getsize(fs, dip))); 453 1.37 perseant lfs_truncate(vp, lfs_dino_getsize(fs, dip), 0, 454 1.37 perseant NOCRED); 455 1.37 perseant } 456 1.37 perseant update_inoblk_copy_dinode(fs, ip->i_din, dip); 457 1.37 perseant 458 1.37 perseant ip->i_flags = lfs_dino_getflags(fs, dip); 459 1.37 perseant ip->i_gen = lfs_dino_getgen(fs, dip); 460 1.37 perseant ip->i_uid = lfs_dino_getuid(fs, dip); 461 1.37 perseant ip->i_gid = lfs_dino_getgid(fs, dip); 462 1.37 perseant 463 1.37 perseant ip->i_mode = lfs_dino_getmode(fs, dip); 464 1.37 perseant ip->i_nlink = lfs_dino_getnlink(fs, dip); 465 1.37 perseant ip->i_size = lfs_dino_getsize(fs, dip); 466 1.37 perseant 467 1.37 perseant LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE); 468 1.37 perseant 469 1.37 perseant /* Re-initialize to get type right */ 470 1.37 perseant ulfs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, 471 1.37 perseant &vp); 472 1.37 perseant 473 1.37 perseant /* Record change in location */ 474 1.37 perseant LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp); 475 1.37 perseant daddr = lfs_if_getdaddr(fs, ifp); 476 1.37 perseant lfs_if_setdaddr(fs, ifp, LFS_DBTOFSB(fs, dbp->b_blkno)); 477 1.37 perseant error = LFS_BWRITE_LOG(ibp); /* Ifile */ 478 1.37 perseant /* And do segment accounting */ 479 1.37 perseant if (lfs_dtosn(fs, daddr) 480 1.37 perseant != lfs_dtosn(fs, LFS_DBTOFSB(fs, dbp->b_blkno))) { 481 1.37 perseant if (!DADDR_IS_BAD(daddr)) { 482 1.37 perseant LFS_SEGENTRY(sup, fs, 483 1.37 perseant lfs_dtosn(fs, daddr), ibp); 484 1.37 perseant sup->su_nbytes -= DINOSIZE(fs); 485 1.1 perseant LFS_WRITESEGENTRY(sup, fs, 486 1.37 perseant lfs_dtosn(fs, daddr), 487 1.1 perseant ibp); 488 1.1 perseant } 489 1.37 perseant LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, 490 1.37 perseant LFS_DBTOFSB(fs, dbp->b_blkno)), 491 1.37 perseant ibp); 492 1.37 perseant sup->su_nbytes += DINOSIZE(fs); 493 1.37 perseant LFS_WRITESEGENTRY(sup, fs, 494 1.37 perseant lfs_dtosn(fs, LFS_DBTOFSB(fs, 495 1.37 perseant dbp->b_blkno)), 496 1.37 perseant ibp); 497 1.1 perseant } 498 1.37 perseant vput(vp); 499 1.37 perseant } 500 1.37 perseant free(buf, M_SEGMENT); 501 1.37 perseant 502 1.37 perseant return 0; 503 1.37 perseant } 504 1.37 perseant 505 1.37 perseant /* 506 1.37 perseant * Note the highest generation number of each inode in the Ifile. 507 1.37 perseant * This allows us to skip processing data for intermediate versions. 508 1.37 perseant */ 509 1.37 perseant static int 510 1.37 perseant update_inogen(struct lfs *fs, daddr_t offset) 511 1.37 perseant { 512 1.37 perseant struct vnode *devvp; 513 1.37 perseant union lfs_dinode *dip; 514 1.37 perseant struct buf *dbp, *ibp; 515 1.37 perseant int error; 516 1.37 perseant IFILE *ifp; 517 1.37 perseant unsigned i, num; 518 1.37 perseant 519 1.37 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp; 520 1.37 perseant 521 1.37 perseant /* Read inode block */ 522 1.37 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 523 1.37 perseant 0, &dbp); 524 1.37 perseant if (error) { 525 1.37 perseant DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error)); 526 1.37 perseant return error; 527 1.37 perseant } 528 1.37 perseant 529 1.37 perseant /* Check each inode against ifile entry */ 530 1.37 perseant num = LFS_INOPB(fs); 531 1.37 perseant for (i = num; i-- > 0; ) { 532 1.37 perseant dip = DINO_IN_BLOCK(fs, dbp->b_data, i); 533 1.37 perseant if (lfs_dino_getinumber(fs, dip) == LFS_IFILE_INUM) 534 1.37 perseant continue; 535 1.37 perseant 536 1.37 perseant /* Update generation number */ 537 1.37 perseant LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp); 538 1.37 perseant if (lfs_if_getversion(fs, ifp) < lfs_dino_getgen(fs, dip)) 539 1.37 perseant lfs_if_setversion(fs, ifp, lfs_dino_getgen(fs, dip)); 540 1.37 perseant error = LFS_BWRITE_LOG(ibp); /* Ifile */ 541 1.37 perseant if (error) 542 1.37 perseant break; 543 1.1 perseant } 544 1.4 ad brelse(dbp, BC_AGE); 545 1.1 perseant 546 1.37 perseant return error; 547 1.1 perseant } 548 1.1 perseant 549 1.37 perseant #define CHECK_CKSUM 1 /* Check the checksum to make sure it's valid */ 550 1.37 perseant #define CHECK_GEN 2 /* Update highest generation number */ 551 1.37 perseant #define CHECK_INODES 3 /* Read and process inodes */ 552 1.37 perseant #define CHECK_DATA 4 /* Identify and process data blocks */ 553 1.1 perseant 554 1.1 perseant static daddr_t 555 1.1 perseant check_segsum(struct lfs *fs, daddr_t offset, u_int64_t nextserial, 556 1.37 perseant kauth_cred_t cred, int phase, int *pseg_flags, struct lwp *l) 557 1.1 perseant { 558 1.1 perseant struct vnode *devvp; 559 1.1 perseant struct buf *bp, *dbp; 560 1.37 perseant int error, ninos, i, j; 561 1.1 perseant SEGSUM *ssp; 562 1.37 perseant daddr_t prevoffset; 563 1.32 dholland IINFO *iip; 564 1.1 perseant FINFO *fip; 565 1.1 perseant SEGUSE *sup; 566 1.1 perseant size_t size; 567 1.27 dholland uint32_t datasum, foundsum; 568 1.37 perseant char *buf; 569 1.1 perseant 570 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp; 571 1.37 perseant 572 1.1 perseant /* 573 1.37 perseant * If this is segment 0, skip the label. 574 1.1 perseant * If the segment has a superblock and we're at the top 575 1.1 perseant * of the segment, skip the superblock. 576 1.1 perseant */ 577 1.37 perseant if (offset == lfs_sb_gets0addr(fs)) 578 1.37 perseant offset += lfs_btofsb(fs, LFS_LABELPAD); 579 1.17 christos if (lfs_sntod(fs, lfs_dtosn(fs, offset)) == offset) { 580 1.17 christos LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp); 581 1.1 perseant if (sup->su_flags & SEGUSE_SUPERBLOCK) 582 1.17 christos offset += lfs_btofsb(fs, LFS_SBPAD); 583 1.4 ad brelse(bp, 0); 584 1.1 perseant } 585 1.1 perseant 586 1.1 perseant /* Read in the segment summary */ 587 1.23 dholland error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getsumsize(fs), 588 1.19 maxv 0, &bp); 589 1.1 perseant if (error) 590 1.1 perseant return -1; 591 1.37 perseant buf = malloc(bp->b_bcount, M_SEGMENT, M_WAITOK); 592 1.37 perseant memcpy(buf, bp->b_data, bp->b_bcount); 593 1.37 perseant brelse(bp, BC_AGE); 594 1.37 perseant 595 1.37 perseant ssp = (SEGSUM *)buf; 596 1.1 perseant 597 1.37 perseant /* 598 1.37 perseant * Phase I: Check summary checksum. 599 1.37 perseant */ 600 1.37 perseant if (phase == CHECK_CKSUM) { 601 1.27 dholland size_t sumstart; 602 1.27 dholland 603 1.27 dholland sumstart = lfs_ss_getsumstart(fs); 604 1.27 dholland if (lfs_ss_getsumsum(fs, ssp) != 605 1.27 dholland cksum((char *)ssp + sumstart, 606 1.27 dholland lfs_sb_getsumsize(fs) - sumstart)) { 607 1.37 perseant DLOG((DLOG_RF, "Sumsum error at 0x%" PRIx64 "\n", 608 1.37 perseant offset)); 609 1.1 perseant offset = -1; 610 1.37 perseant goto err; 611 1.1 perseant } 612 1.27 dholland if (lfs_ss_getnfinfo(fs, ssp) == 0 && 613 1.27 dholland lfs_ss_getninos(fs, ssp) == 0) { 614 1.37 perseant DLOG((DLOG_RF, "Empty pseg at 0x%" PRIx64 "\n", 615 1.37 perseant offset)); 616 1.1 perseant offset = -1; 617 1.37 perseant goto err; 618 1.1 perseant } 619 1.37 perseant if (lfs_sb_getversion(fs) == 1) { 620 1.37 perseant if (lfs_ss_getcreate(fs, ssp) < lfs_sb_gettstamp(fs)) { 621 1.37 perseant DLOG((DLOG_RF, "Old data at 0x%" PRIx64 "\n", offset)); 622 1.37 perseant offset = -1; 623 1.37 perseant goto err; 624 1.37 perseant } 625 1.37 perseant } else { 626 1.37 perseant if (lfs_ss_getserial(fs, ssp) != nextserial) { 627 1.37 perseant DLOG((DLOG_RF, "Serial number at 0x%jx given as 0x%jx," 628 1.37 perseant " expected 0x%jx\n", (intmax_t)offset, 629 1.37 perseant (intmax_t)lfs_ss_getserial(fs, ssp), 630 1.37 perseant (intmax_t)nextserial)); 631 1.37 perseant offset = -1; 632 1.37 perseant goto err; 633 1.37 perseant } 634 1.37 perseant if (lfs_ss_getident(fs, ssp) != lfs_sb_getident(fs)) { 635 1.37 perseant DLOG((DLOG_RF, "Incorrect fsid (0x%x vs 0x%x) at 0x%" 636 1.37 perseant PRIx64 "\n", lfs_ss_getident(fs, ssp), 637 1.37 perseant lfs_sb_getident(fs), offset)); 638 1.37 perseant offset = -1; 639 1.37 perseant goto err; 640 1.37 perseant } 641 1.1 perseant } 642 1.1 perseant } 643 1.1 perseant if (pseg_flags) 644 1.27 dholland *pseg_flags = lfs_ss_getflags(fs, ssp); 645 1.37 perseant prevoffset = offset; 646 1.23 dholland offset += lfs_btofsb(fs, lfs_sb_getsumsize(fs)); 647 1.1 perseant 648 1.37 perseant /* Handle individual blocks */ 649 1.37 perseant foundsum = 0; 650 1.27 dholland ninos = howmany(lfs_ss_getninos(fs, ssp), LFS_INOPB(fs)); 651 1.37 perseant iip = SEGSUM_IINFOSTART(fs, buf); 652 1.37 perseant fip = SEGSUM_FINFOBASE(fs, (SEGSUM *)buf); 653 1.27 dholland for (i = 0; i < lfs_ss_getnfinfo(fs, ssp) || ninos; ++i) { 654 1.1 perseant /* Inode block? */ 655 1.32 dholland if (ninos && lfs_ii_getblock(fs, iip) == offset) { 656 1.37 perseant if (phase == CHECK_CKSUM) { 657 1.1 perseant /* Read in the head and add to the buffer */ 658 1.37 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), 659 1.37 perseant lfs_sb_getbsize(fs), 0, &dbp); 660 1.1 perseant if (error) { 661 1.1 perseant offset = -1; 662 1.37 perseant goto err; 663 1.1 perseant } 664 1.37 perseant foundsum = lfs_cksum_part(dbp->b_data, 665 1.37 perseant sizeof(uint32_t), foundsum); 666 1.4 ad brelse(dbp, BC_AGE); 667 1.1 perseant } 668 1.37 perseant if (phase == CHECK_GEN) { 669 1.37 perseant if ((error = update_inogen(fs, offset)) 670 1.37 perseant != 0) { 671 1.37 perseant offset = -1; 672 1.37 perseant goto err; 673 1.37 perseant } 674 1.37 perseant } 675 1.37 perseant if (phase == CHECK_INODES) { 676 1.1 perseant if ((error = update_inoblk(fs, offset, cred, l)) 677 1.1 perseant != 0) { 678 1.1 perseant offset = -1; 679 1.37 perseant goto err; 680 1.1 perseant } 681 1.1 perseant } 682 1.23 dholland offset += lfs_btofsb(fs, lfs_sb_getibsize(fs)); 683 1.32 dholland iip = NEXTLOWER_IINFO(fs, iip); 684 1.1 perseant --ninos; 685 1.32 dholland --i; /* compensate for ++i in loop header */ 686 1.1 perseant continue; 687 1.1 perseant } 688 1.37 perseant 689 1.37 perseant /* File block */ 690 1.22 dholland size = lfs_sb_getbsize(fs); 691 1.28 dholland for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) { 692 1.28 dholland if (j == lfs_fi_getnblocks(fs, fip) - 1) 693 1.28 dholland size = lfs_fi_getlastlength(fs, fip); 694 1.37 perseant if (phase == CHECK_CKSUM) { 695 1.37 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), 696 1.37 perseant size, 0, &dbp); 697 1.1 perseant if (error) { 698 1.1 perseant offset = -1; 699 1.37 perseant goto err; 700 1.1 perseant } 701 1.37 perseant foundsum = lfs_cksum_part(dbp->b_data, 702 1.37 perseant sizeof(uint32_t), foundsum); 703 1.4 ad brelse(dbp, BC_AGE); 704 1.1 perseant } 705 1.1 perseant /* Account for and update any direct blocks */ 706 1.37 perseant if (phase == CHECK_DATA && 707 1.28 dholland lfs_fi_getino(fs, fip) > LFS_IFILE_INUM && 708 1.28 dholland lfs_fi_getblock(fs, fip, j) >= 0) { 709 1.28 dholland update_meta(fs, lfs_fi_getino(fs, fip), 710 1.28 dholland lfs_fi_getversion(fs, fip), 711 1.28 dholland lfs_fi_getblock(fs, fip, j), 712 1.28 dholland offset, size, l); 713 1.37 perseant ++rblkcnt; 714 1.1 perseant } 715 1.17 christos offset += lfs_btofsb(fs, size); 716 1.1 perseant } 717 1.37 perseant 718 1.27 dholland fip = NEXT_FINFO(fs, fip); 719 1.1 perseant } 720 1.37 perseant 721 1.1 perseant /* Checksum the array, compare */ 722 1.37 perseant if (phase == CHECK_CKSUM) { 723 1.37 perseant datasum = lfs_ss_getdatasum(fs, ssp); 724 1.37 perseant foundsum = lfs_cksum_fold(foundsum); 725 1.37 perseant if (datasum != foundsum) { 726 1.37 perseant DLOG((DLOG_RF, "Datasum error at 0x%" PRIx64 727 1.37 perseant " (wanted %x got %x)\n", 728 1.37 perseant offset, datasum, foundsum)); 729 1.1 perseant offset = -1; 730 1.37 perseant goto err; 731 1.1 perseant } 732 1.1 perseant } 733 1.1 perseant 734 1.37 perseant if (phase == CHECK_CKSUM) 735 1.37 perseant lfs_sb_subavail(fs, offset - prevoffset); 736 1.37 perseant else { 737 1.1 perseant /* Don't clog the buffer queue */ 738 1.9 ad mutex_enter(&lfs_lock); 739 1.1 perseant if (locked_queue_count > LFS_MAX_BUFS || 740 1.1 perseant locked_queue_bytes > LFS_MAX_BYTES) { 741 1.1 perseant lfs_flush(fs, SEGM_CKP, 0); 742 1.1 perseant } 743 1.9 ad mutex_exit(&lfs_lock); 744 1.1 perseant } 745 1.1 perseant 746 1.37 perseant /* 747 1.37 perseant * If we're at the end of the segment, move to the next. 748 1.37 perseant * A partial segment needs space for a segment header (1 fsb) 749 1.37 perseant * and a full block ("frag" fsb). Thus, adding "frag" fsb should 750 1.37 perseant * still be within the current segment (whereas frag + 1 might 751 1.37 perseant * be at the start of the next segment). 752 1.37 perseant * 753 1.37 perseant * This needs to match the definition of LFS_PARTIAL_FITS 754 1.37 perseant * in lfs_segment.c. 755 1.37 perseant */ 756 1.37 perseant if (lfs_dtosn(fs, offset + lfs_sb_getfrag(fs)) 757 1.37 perseant != lfs_dtosn(fs, offset)) { 758 1.37 perseant if (lfs_dtosn(fs, offset) == lfs_dtosn(fs, lfs_ss_getnext(fs, 759 1.37 perseant ssp))) { 760 1.37 perseant printf("WHOA! at 0x%jx/seg %jd moving to 0x%jx/seg %jd\n", 761 1.37 perseant (intmax_t)offset, 762 1.37 perseant (intmax_t)lfs_dtosn(fs, offset), 763 1.37 perseant (intmax_t)lfs_ss_getnext(fs, ssp), 764 1.37 perseant (intmax_t)lfs_dtosn(fs, lfs_ss_getnext(fs, ssp))); 765 1.37 perseant offset = -1; 766 1.37 perseant goto err; 767 1.37 perseant } 768 1.37 perseant offset = lfs_ss_getnext(fs, ssp); 769 1.37 perseant DLOG((DLOG_RF, "LFS roll forward: moving to offset 0x%" PRIx64 770 1.37 perseant " -> segment %d\n", offset, lfs_dtosn(fs,offset))); 771 1.37 perseant } 772 1.1 perseant 773 1.37 perseant err: 774 1.37 perseant free(buf, M_SEGMENT); 775 1.37 perseant 776 1.1 perseant return offset; 777 1.1 perseant } 778 1.1 perseant 779 1.1 perseant void 780 1.2 perseant lfs_roll_forward(struct lfs *fs, struct mount *mp, struct lwp *l) 781 1.1 perseant { 782 1.37 perseant int flags, dirty, phase; 783 1.37 perseant daddr_t startoffset, offset, nextoffset, endpseg; 784 1.37 perseant u_int64_t nextserial, startserial, endserial; 785 1.37 perseant int sn, curseg; 786 1.3 perseant struct proc *p; 787 1.3 perseant kauth_cred_t cred; 788 1.3 perseant SEGUSE *sup; 789 1.3 perseant struct buf *bp; 790 1.3 perseant 791 1.3 perseant p = l ? l->l_proc : NULL; 792 1.3 perseant cred = p ? p->p_cred : NOCRED; 793 1.1 perseant 794 1.1 perseant /* 795 1.1 perseant * Roll forward. 796 1.1 perseant * 797 1.1 perseant * We don't roll forward for v1 filesystems, because 798 1.1 perseant * of the danger that the clock was turned back between the last 799 1.1 perseant * checkpoint and crash. This would roll forward garbage. 800 1.1 perseant * 801 1.1 perseant * v2 filesystems don't have this problem because they use a 802 1.1 perseant * monotonically increasing serial number instead of a timestamp. 803 1.1 perseant */ 804 1.37 perseant rblkcnt = 0; 805 1.37 perseant if ((lfs_sb_getpflags(fs) & LFS_PF_CLEAN) || !lfs_do_rfw 806 1.37 perseant || lfs_sb_getversion(fs) <= 1 || p == NULL) 807 1.37 perseant return; 808 1.37 perseant 809 1.37 perseant DLOG((DLOG_RF, "%s: begin roll forward at serial 0x%jx\n", 810 1.37 perseant lfs_sb_getfsmnt(fs), (intmax_t)lfs_sb_getserial(fs))); 811 1.37 perseant DEBUG_CHECK_FREELIST(fs); 812 1.37 perseant 813 1.37 perseant /* 814 1.37 perseant * Phase I: Find the address of the last good partial 815 1.37 perseant * segment that was written after the checkpoint. Mark 816 1.37 perseant * the segments in question dirty, so they won't be 817 1.37 perseant * reallocated. 818 1.37 perseant */ 819 1.37 perseant endpseg = startoffset = offset = lfs_sb_getoffset(fs); 820 1.37 perseant flags = 0x0; 821 1.37 perseant DLOG((DLOG_RF, "LFS roll forward phase 1: start at offset 0x%" 822 1.37 perseant PRIx64 "\n", offset)); 823 1.37 perseant LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp); 824 1.37 perseant if (!(sup->su_flags & SEGUSE_DIRTY)) 825 1.37 perseant lfs_sb_subnclean(fs, 1); 826 1.37 perseant sup->su_flags |= SEGUSE_DIRTY; 827 1.37 perseant LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp); 828 1.37 perseant 829 1.37 perseant startserial = lfs_sb_getserial(fs); 830 1.37 perseant endserial = nextserial = startserial + 1; 831 1.37 perseant while ((nextoffset = check_segsum(fs, offset, nextserial, 832 1.37 perseant cred, CHECK_CKSUM, &flags, l)) > 0) { 833 1.37 perseant if (lfs_sntod(fs, offset) != lfs_sntod(fs, nextoffset)) { 834 1.37 perseant LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset), 835 1.37 perseant bp); 836 1.37 perseant if (!(sup->su_flags & SEGUSE_DIRTY)) 837 1.37 perseant lfs_sb_subnclean(fs, 1); 838 1.37 perseant sup->su_flags |= SEGUSE_DIRTY; 839 1.37 perseant LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp); 840 1.37 perseant } 841 1.37 perseant 842 1.37 perseant DLOG((DLOG_RF, "LFS roll forward phase 1: offset=0x%jx" 843 1.37 perseant " serial=0x%jx\n", (intmax_t)nextoffset, 844 1.37 perseant (intmax_t)nextserial)); 845 1.37 perseant if (flags & SS_DIROP) { 846 1.37 perseant DLOG((DLOG_RF, "lfs_mountfs: dirops at 0x%" 847 1.37 perseant PRIx64 "\n", offset)); 848 1.37 perseant if (!(flags & SS_CONT)) { 849 1.37 perseant DLOG((DLOG_RF, "lfs_mountfs: dirops end " 850 1.37 perseant "at 0x%" PRIx64 "\n", offset)); 851 1.37 perseant } 852 1.37 perseant } 853 1.37 perseant offset = nextoffset; 854 1.37 perseant ++nextserial; 855 1.37 perseant 856 1.37 perseant if (!(flags & SS_CONT)) { 857 1.37 perseant endpseg = nextoffset; 858 1.37 perseant endserial = nextserial; 859 1.37 perseant } 860 1.37 perseant if (lfs_rfw_max_psegs > 0 861 1.37 perseant && nextserial > startserial + lfs_rfw_max_psegs) 862 1.37 perseant break; 863 1.37 perseant } 864 1.37 perseant if (flags & SS_CONT) { 865 1.37 perseant DLOG((DLOG_RF, "LFS roll forward: warning: incomplete " 866 1.37 perseant "dirops discarded (0x%jx < 0x%jx)\n", 867 1.37 perseant endpseg, nextoffset)); 868 1.37 perseant } 869 1.37 perseant if (lfs_sb_getversion(fs) > 1) 870 1.37 perseant lfs_sb_setserial(fs, endserial); 871 1.37 perseant DLOG((DLOG_RF, "LFS roll forward phase 1: completed: " 872 1.37 perseant "endpseg=0x%" PRIx64 "\n", endpseg)); 873 1.37 perseant offset = startoffset; 874 1.37 perseant if (offset != endpseg) { 875 1.37 perseant /* Don't overwrite what we're trying to preserve */ 876 1.37 perseant lfs_sb_setoffset(fs, endpseg); 877 1.37 perseant lfs_sb_setcurseg(fs, lfs_sntod(fs, lfs_dtosn(fs, endpseg))); 878 1.37 perseant for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs));;) { 879 1.37 perseant sn = (sn + 1) % lfs_sb_getnseg(fs); 880 1.37 perseant /* XXX could we just fail to roll forward? */ 881 1.37 perseant if (sn == curseg) 882 1.37 perseant panic("lfs_mountfs: no clean segments"); 883 1.37 perseant LFS_SEGENTRY(sup, fs, sn, bp); 884 1.37 perseant dirty = (sup->su_flags & SEGUSE_DIRTY); 885 1.37 perseant brelse(bp, 0); 886 1.37 perseant if (!dirty) 887 1.37 perseant break; 888 1.37 perseant } 889 1.37 perseant lfs_sb_setnextseg(fs, lfs_sntod(fs, sn)); 890 1.37 perseant /* Explicitly set this segment dirty */ 891 1.37 perseant LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, endpseg), bp); 892 1.37 perseant sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 893 1.37 perseant LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, endpseg), bp); 894 1.37 perseant 895 1.37 perseant 896 1.1 perseant /* 897 1.37 perseant * Phase II: Identify the highest generation of each 898 1.37 perseant * inode. 899 1.37 perseant * 900 1.37 perseant * Phase III: Update inodes. We end up with the 901 1.37 perseant * last version of each inode present, and can ignore 902 1.37 perseant * data blocks belonging to previous versions. 903 1.37 perseant * 904 1.37 perseant * Phase IV: Roll forward, updating data blocks. 905 1.1 perseant */ 906 1.37 perseant for (phase = CHECK_GEN; phase <= CHECK_DATA; ++phase) { 907 1.37 perseant offset = startoffset; 908 1.37 perseant nextserial = startserial + 1; 909 1.37 perseant printf("LFS roll forward phase %d beginning\n", phase); 910 1.37 perseant while (offset > 0 && offset != endpseg) { 911 1.37 perseant if (phase == CHECK_DATA) { 912 1.37 perseant DLOG((DLOG_RF, "LFS roll forward" 913 1.37 perseant " phase %d: offset=0x%jx" 914 1.37 perseant " serial=0x%jx\n", 915 1.37 perseant phase, (intmax_t)offset, 916 1.37 perseant (intmax_t)nextserial)); 917 1.8 he } 918 1.37 perseant offset = check_segsum(fs, offset, 919 1.37 perseant nextserial, cred, 920 1.37 perseant phase, NULL, l); 921 1.37 perseant ++nextserial; 922 1.37 perseant DEBUG_CHECK_FREELIST(fs); 923 1.1 perseant } 924 1.37 perseant } 925 1.1 perseant 926 1.37 perseant /* 927 1.37 perseant * Finish: flush our changes to disk. 928 1.37 perseant */ 929 1.37 perseant lfs_sb_setserial(fs, endserial); 930 1.1 perseant 931 1.37 perseant lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); 932 1.37 perseant DLOG((DLOG_RF, "lfs_mountfs: roll forward " 933 1.37 perseant "examined %jd blocks\n", 934 1.37 perseant (intmax_t)(endpseg - startoffset))); 935 1.1 perseant } 936 1.37 perseant 937 1.37 perseant /* Get rid of our vnodes, except the ifile */ 938 1.37 perseant drop_vnode_pages(mp, l); 939 1.37 perseant DLOG((DLOG_RF, "LFS roll forward complete\n")); 940 1.37 perseant printf("%s: roll forward recovered %d data blocks\n", 941 1.37 perseant lfs_sb_getfsmnt(fs), rblkcnt); 942 1.37 perseant 943 1.37 perseant /* 944 1.37 perseant * At this point we have no more changes to write to disk. 945 1.37 perseant * Reset the "avail" count to match the segments as they 946 1.37 perseant * appear on disk, and the clean segment count. 947 1.37 perseant */ 948 1.37 perseant lfs_reset_avail(fs); 949 1.1 perseant } 950 1.37 perseant 951 1.37 perseant static bool 952 1.37 perseant all_selector(void *cl, struct vnode *vp) 953 1.37 perseant { 954 1.37 perseant return true; 955 1.37 perseant } 956 1.37 perseant 957 1.37 perseant 958 1.37 perseant /* 959 1.37 perseant * Dump any pages from vnodes that may have been put on 960 1.37 perseant * during truncation. 961 1.37 perseant */ 962 1.37 perseant static void 963 1.37 perseant drop_vnode_pages(struct mount *mp, struct lwp *l) 964 1.37 perseant { 965 1.37 perseant struct vnode_iterator *marker; 966 1.37 perseant struct lfs *fs; 967 1.37 perseant struct vnode *vp; 968 1.37 perseant 969 1.37 perseant fs = VFSTOULFS(mp)->um_lfs; 970 1.37 perseant vfs_vnode_iterator_init(mp, &marker); 971 1.37 perseant while ((vp = vfs_vnode_iterator_next(marker, 972 1.37 perseant all_selector, NULL)) != NULL) { 973 1.37 perseant if (vp == fs->lfs_ivnode) 974 1.37 perseant continue; 975 1.37 perseant VOP_LOCK(vp, LK_EXCLUSIVE | LK_RETRY); 976 1.37 perseant uvm_vnp_setsize(vp, 0); 977 1.37 perseant uvm_vnp_setsize(vp, VTOI(vp)->i_size); 978 1.37 perseant VOP_UNLOCK(vp); 979 1.37 perseant vrele(vp); 980 1.37 perseant } 981 1.37 perseant vfs_vnode_iterator_destroy(marker); 982 1.37 perseant } 983 1.37 perseant 984