lfs_rfw.c revision 1.37 1 1.37 perseant /* $NetBSD: lfs_rfw.c,v 1.37 2025/09/17 04:37:47 perseant Exp $ */
2 1.1 perseant
3 1.1 perseant /*-
4 1.1 perseant * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 1.1 perseant * All rights reserved.
6 1.1 perseant *
7 1.1 perseant * This code is derived from software contributed to The NetBSD Foundation
8 1.1 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 1.1 perseant *
10 1.1 perseant * Redistribution and use in source and binary forms, with or without
11 1.1 perseant * modification, are permitted provided that the following conditions
12 1.1 perseant * are met:
13 1.1 perseant * 1. Redistributions of source code must retain the above copyright
14 1.1 perseant * notice, this list of conditions and the following disclaimer.
15 1.1 perseant * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 perseant * notice, this list of conditions and the following disclaimer in the
17 1.1 perseant * documentation and/or other materials provided with the distribution.
18 1.1 perseant *
19 1.1 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 perseant * POSSIBILITY OF SUCH DAMAGE.
30 1.1 perseant */
31 1.1 perseant
32 1.2 perseant #include <sys/cdefs.h>
33 1.37 perseant __KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.37 2025/09/17 04:37:47 perseant Exp $");
34 1.2 perseant
35 1.2 perseant #if defined(_KERNEL_OPT)
36 1.2 perseant #include "opt_quota.h"
37 1.2 perseant #endif
38 1.2 perseant
39 1.2 perseant #include <sys/param.h>
40 1.2 perseant #include <sys/systm.h>
41 1.2 perseant #include <sys/namei.h>
42 1.2 perseant #include <sys/proc.h>
43 1.2 perseant #include <sys/kernel.h>
44 1.2 perseant #include <sys/vnode.h>
45 1.2 perseant #include <sys/mount.h>
46 1.2 perseant #include <sys/kthread.h>
47 1.2 perseant #include <sys/buf.h>
48 1.2 perseant #include <sys/device.h>
49 1.2 perseant #include <sys/file.h>
50 1.2 perseant #include <sys/disklabel.h>
51 1.2 perseant #include <sys/ioctl.h>
52 1.2 perseant #include <sys/errno.h>
53 1.2 perseant #include <sys/malloc.h>
54 1.2 perseant #include <sys/pool.h>
55 1.2 perseant #include <sys/socket.h>
56 1.37 perseant #include <sys/stat.h>
57 1.2 perseant #include <sys/syslog.h>
58 1.2 perseant #include <sys/sysctl.h>
59 1.2 perseant #include <sys/conf.h>
60 1.2 perseant #include <sys/kauth.h>
61 1.2 perseant
62 1.2 perseant #include <miscfs/specfs/specdev.h>
63 1.2 perseant
64 1.14 dholland #include <ufs/lfs/ulfs_quotacommon.h>
65 1.14 dholland #include <ufs/lfs/ulfs_inode.h>
66 1.14 dholland #include <ufs/lfs/ulfsmount.h>
67 1.14 dholland #include <ufs/lfs/ulfs_extern.h>
68 1.2 perseant
69 1.36 riastrad #include <uvm/uvm_extern.h>
70 1.2 perseant
71 1.2 perseant #include <ufs/lfs/lfs.h>
72 1.24 dholland #include <ufs/lfs/lfs_accessors.h>
73 1.18 dholland #include <ufs/lfs/lfs_kernel.h>
74 1.2 perseant #include <ufs/lfs/lfs_extern.h>
75 1.2 perseant
76 1.2 perseant #include <miscfs/genfs/genfs.h>
77 1.2 perseant #include <miscfs/genfs/genfs_node.h>
78 1.2 perseant
79 1.1 perseant /*
80 1.1 perseant * Roll-forward code.
81 1.1 perseant */
82 1.1 perseant static daddr_t check_segsum(struct lfs *, daddr_t, u_int64_t,
83 1.1 perseant kauth_cred_t, int, int *, struct lwp *);
84 1.1 perseant
85 1.37 perseant static bool all_selector(void *, struct vnode *);
86 1.37 perseant static void drop_vnode_pages(struct mount *, struct lwp *);
87 1.37 perseant static int update_inogen(struct lfs *, daddr_t);
88 1.37 perseant static void update_inoblk_copy_dinode(struct lfs *, union lfs_dinode *, const union lfs_dinode *);
89 1.37 perseant
90 1.3 perseant extern int lfs_do_rfw;
91 1.37 perseant int rblkcnt;
92 1.37 perseant int lfs_rfw_max_psegs = 0;
93 1.3 perseant
94 1.1 perseant /*
95 1.1 perseant * Allocate a particular inode with a particular version number, freeing
96 1.1 perseant * any previous versions of this inode that may have gone before.
97 1.1 perseant * Used by the roll-forward code.
98 1.1 perseant *
99 1.1 perseant * XXX this function does not have appropriate locking to be used on a live fs;
100 1.1 perseant * XXX but something similar could probably be used for an "undelete" call.
101 1.1 perseant *
102 1.1 perseant * Called with the Ifile inode locked.
103 1.1 perseant */
104 1.1 perseant int
105 1.1 perseant lfs_rf_valloc(struct lfs *fs, ino_t ino, int vers, struct lwp *l,
106 1.37 perseant struct vnode **vpp, union lfs_dinode *dip)
107 1.1 perseant {
108 1.20 hannken struct vattr va;
109 1.1 perseant struct vnode *vp;
110 1.1 perseant struct inode *ip;
111 1.1 perseant int error;
112 1.1 perseant
113 1.37 perseant KASSERT(ino > LFS_IFILE_INUM);
114 1.1 perseant ASSERT_SEGLOCK(fs); /* XXX it doesn't, really */
115 1.1 perseant
116 1.1 perseant /*
117 1.1 perseant * First, just try a vget. If the version number is the one we want,
118 1.1 perseant * we don't have to do anything else. If the version number is wrong,
119 1.1 perseant * take appropriate action.
120 1.1 perseant */
121 1.35 ad error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp);
122 1.1 perseant if (error == 0) {
123 1.37 perseant DLOG((DLOG_RF, "lfs_rf_valloc[1]: ino %d vp %p\n",
124 1.37 perseant (int)ino, vp));
125 1.1 perseant
126 1.1 perseant *vpp = vp;
127 1.1 perseant ip = VTOI(vp);
128 1.37 perseant DLOG((DLOG_RF, " ip->i_gen=%jd dip nlink %jd seeking"
129 1.37 perseant " version %jd\n", (intmax_t)ip->i_gen,
130 1.37 perseant (intmax_t)(dip == NULL ? -1
131 1.37 perseant : lfs_dino_getnlink(fs, dip)), (intmax_t)vers));
132 1.37 perseant if (ip->i_gen == vers) {
133 1.37 perseant /*
134 1.37 perseant * We have what we wanted already.
135 1.37 perseant */
136 1.37 perseant DLOG((DLOG_RF, " pre-existing\n"));
137 1.1 perseant return 0;
138 1.37 perseant } else if (ip->i_gen < vers && dip != NULL
139 1.37 perseant && lfs_dino_getnlink(fs, dip) > 0) {
140 1.37 perseant /*
141 1.37 perseant * We have found a newer version. Truncate
142 1.37 perseant * the old vnode to zero and re-initialize
143 1.37 perseant * from the given dinode.
144 1.37 perseant */
145 1.37 perseant DLOG((DLOG_RF, " replace old version %jd\n",
146 1.37 perseant (intmax_t)ip->i_gen));
147 1.8 he lfs_truncate(vp, (off_t)0, 0, NOCRED);
148 1.31 dholland ip->i_gen = vers;
149 1.37 perseant vp->v_type = IFTOVT(lfs_dino_getmode(fs, dip));
150 1.37 perseant update_inoblk_copy_dinode(fs, ip->i_din, dip);
151 1.1 perseant LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE);
152 1.1 perseant return 0;
153 1.1 perseant } else {
154 1.37 perseant /*
155 1.37 perseant * Not the right version and nothing to
156 1.37 perseant * initialize from. Don't recover this data.
157 1.37 perseant */
158 1.1 perseant DLOG((DLOG_RF, "ino %d: sought version %d, got %d\n",
159 1.37 perseant (int)ino, (int)vers,
160 1.37 perseant (int)lfs_dino_getgen(fs, ip->i_din)));
161 1.1 perseant vput(vp);
162 1.1 perseant *vpp = NULLVP;
163 1.1 perseant return EEXIST;
164 1.1 perseant }
165 1.1 perseant }
166 1.1 perseant
167 1.37 perseant /*
168 1.37 perseant * No version of this inode was found in the cache.
169 1.37 perseant * Make a new one from the dinode. We will add data blocks
170 1.37 perseant * as they come in, so scrub any block addresses off of the
171 1.37 perseant * inode and reset block counts to zero.
172 1.37 perseant */
173 1.37 perseant if (dip == NULL)
174 1.37 perseant return ENOENT;
175 1.37 perseant
176 1.20 hannken vattr_null(&va);
177 1.37 perseant va.va_type = IFTOVT(lfs_dino_getmode(fs, dip));
178 1.37 perseant va.va_mode = lfs_dino_getmode(fs, dip) & ALLPERMS;
179 1.20 hannken va.va_fileid = ino;
180 1.20 hannken va.va_gen = vers;
181 1.34 hannken error = vcache_new(fs->lfs_ivnode->v_mount, NULL, &va, NOCRED, NULL,
182 1.34 hannken &vp);
183 1.20 hannken if (error)
184 1.20 hannken return error;
185 1.20 hannken error = vn_lock(vp, LK_EXCLUSIVE);
186 1.37 perseant if (error)
187 1.37 perseant goto err;
188 1.37 perseant
189 1.20 hannken ip = VTOI(vp);
190 1.37 perseant update_inoblk_copy_dinode(fs, ip->i_din, dip);
191 1.37 perseant
192 1.37 perseant DLOG((DLOG_RF, "lfs_valloc[2] ino %d vp %p size=%lld effnblks=%d,"
193 1.37 perseant " blocks=%d\n", (int)ino, vp, (long long)ip->i_size,
194 1.37 perseant (int)ip->i_lfs_effnblks,
195 1.37 perseant (int)lfs_dino_getblocks(fs, ip->i_din)));
196 1.1 perseant *vpp = vp;
197 1.20 hannken return 0;
198 1.37 perseant
199 1.37 perseant err:
200 1.37 perseant vrele(vp);
201 1.37 perseant *vpp = NULLVP;
202 1.37 perseant return error;
203 1.1 perseant }
204 1.1 perseant
205 1.1 perseant /*
206 1.1 perseant * Load the appropriate indirect block, and change the appropriate pointer.
207 1.1 perseant * Mark the block dirty. Do segment and avail accounting.
208 1.1 perseant */
209 1.1 perseant static int
210 1.1 perseant update_meta(struct lfs *fs, ino_t ino, int vers, daddr_t lbn,
211 1.1 perseant daddr_t ndaddr, size_t size, struct lwp *l)
212 1.1 perseant {
213 1.1 perseant int error;
214 1.1 perseant struct vnode *vp;
215 1.1 perseant struct inode *ip;
216 1.1 perseant daddr_t odaddr;
217 1.15 dholland struct indir a[ULFS_NIADDR];
218 1.1 perseant int num;
219 1.1 perseant struct buf *bp;
220 1.1 perseant SEGUSE *sup;
221 1.37 perseant u_int64_t newsize, loff;
222 1.1 perseant
223 1.1 perseant KASSERT(lbn >= 0); /* no indirect blocks */
224 1.37 perseant KASSERT(ino > LFS_IFILE_INUM);
225 1.37 perseant
226 1.37 perseant DLOG((DLOG_RF, "update_meta: ino %d lbn %d size %d at 0x%jx\n",
227 1.37 perseant (int)ino, (int)lbn, (int)size, (uintmax_t)ndaddr));
228 1.1 perseant
229 1.37 perseant if ((error = lfs_rf_valloc(fs, ino, vers, l, &vp, NULL)) != 0)
230 1.1 perseant return error;
231 1.37 perseant ip = VTOI(vp);
232 1.1 perseant
233 1.37 perseant /*
234 1.37 perseant * If block already exists, note its new location
235 1.37 perseant * but do not account it as new.
236 1.37 perseant */
237 1.37 perseant ulfs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL);
238 1.37 perseant if (odaddr == UNASSIGNED) {
239 1.37 perseant if ((error = lfs_balloc(vp, (lbn << lfs_sb_getbshift(fs)),
240 1.37 perseant size, NOCRED, 0, &bp)) != 0) {
241 1.37 perseant vput(vp);
242 1.37 perseant return (error);
243 1.37 perseant }
244 1.37 perseant /* No need to write, the block is already on disk */
245 1.37 perseant if (bp->b_oflags & BO_DELWRI) {
246 1.37 perseant LFS_UNLOCK_BUF(bp);
247 1.37 perseant /* Account recovery of the previous version */
248 1.37 perseant lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount));
249 1.37 perseant }
250 1.37 perseant brelse(bp, BC_INVAL);
251 1.37 perseant DLOG((DLOG_RF, "balloc ip->i_lfs_effnblks = %d,"
252 1.37 perseant " lfs_dino_getblocks(fs, ip->i_din) = %d\n",
253 1.37 perseant (int)ip->i_lfs_effnblks,
254 1.37 perseant (int)lfs_dino_getblocks(fs, ip->i_din)));
255 1.37 perseant } else {
256 1.37 perseant /* XXX fragextend? */
257 1.37 perseant DLOG((DLOG_RF, "block exists, no balloc\n"));
258 1.1 perseant }
259 1.1 perseant
260 1.1 perseant /*
261 1.1 perseant * Extend the file, if it is not large enough already.
262 1.1 perseant * XXX this is not exactly right, we don't know how much of the
263 1.37 perseant * XXX last block is actually used.
264 1.1 perseant */
265 1.37 perseant loff = lfs_lblktosize(fs, lbn);
266 1.37 perseant if (loff >= (ULFS_NDADDR << lfs_sb_getbshift(fs))) {
267 1.37 perseant /* No fragments */
268 1.37 perseant newsize = loff + 1;
269 1.37 perseant } else {
270 1.37 perseant /* Subtract only a fragment to account for block size */
271 1.37 perseant newsize = loff + size - lfs_fsbtob(fs, 1) + 1;
272 1.37 perseant }
273 1.37 perseant
274 1.37 perseant if (ip->i_size < newsize) {
275 1.37 perseant DLOG((DLOG_RF, "ino %d size %d -> %d\n",
276 1.37 perseant (int)ino, (int)ip->i_size, (int)newsize));
277 1.31 dholland lfs_dino_setsize(fs, ip->i_din, newsize);
278 1.37 perseant ip->i_size = newsize;
279 1.37 perseant /*
280 1.37 perseant * tell vm our new size for the case the inode won't
281 1.37 perseant * appear later.
282 1.37 perseant */
283 1.37 perseant uvm_vnp_setsize(vp, newsize);
284 1.1 perseant }
285 1.1 perseant
286 1.1 perseant lfs_update_single(fs, NULL, vp, lbn, ndaddr, size);
287 1.1 perseant
288 1.17 christos LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, ndaddr), bp);
289 1.1 perseant sup->su_nbytes += size;
290 1.17 christos LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, ndaddr), bp);
291 1.1 perseant
292 1.1 perseant /* differences here should be due to UNWRITTEN indirect blocks. */
293 1.37 perseant if (vp->v_type != VLNK) {
294 1.37 perseant if (!(ip->i_lfs_effnblks >= lfs_dino_getblocks(fs, ip->i_din))
295 1.37 perseant #if 0
296 1.37 perseant || !(lfs_lblkno(fs, ip->i_size) > ULFS_NDADDR ||
297 1.37 perseant ip->i_lfs_effnblks == lfs_dino_getblocks(fs, ip->i_din))
298 1.37 perseant #endif /* 0 */
299 1.37 perseant ) {
300 1.37 perseant vprint("vnode", vp);
301 1.37 perseant printf("effnblks=%jd dino_getblocks=%jd\n",
302 1.37 perseant (intmax_t)ip->i_lfs_effnblks,
303 1.37 perseant (intmax_t)lfs_dino_getblocks(fs, ip->i_din));
304 1.37 perseant }
305 1.37 perseant KASSERT(ip->i_lfs_effnblks >= lfs_dino_getblocks(fs, ip->i_din));
306 1.37 perseant #if 0
307 1.37 perseant KASSERT(lfs_lblkno(fs, ip->i_size) > ULFS_NDADDR ||
308 1.37 perseant ip->i_lfs_effnblks == lfs_dino_getblocks(fs, ip->i_din));
309 1.37 perseant #endif /* 0 */
310 1.37 perseant }
311 1.1 perseant
312 1.1 perseant #ifdef DEBUG
313 1.1 perseant /* Now look again to make sure it worked */
314 1.15 dholland ulfs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL);
315 1.17 christos if (LFS_DBTOFSB(fs, odaddr) != ndaddr)
316 1.37 perseant DLOG((DLOG_RF, "update_meta: failed setting ino %jd lbn %jd"
317 1.37 perseant " to %jd\n", (intmax_t)ino, (intmax_t)lbn, (intmax_t)ndaddr));
318 1.1 perseant #endif /* DEBUG */
319 1.1 perseant vput(vp);
320 1.1 perseant return 0;
321 1.1 perseant }
322 1.1 perseant
323 1.30 dholland /*
324 1.30 dholland * Copy some the fields of the dinode as needed by update_inoblk().
325 1.30 dholland */
326 1.30 dholland static void
327 1.30 dholland update_inoblk_copy_dinode(struct lfs *fs,
328 1.30 dholland union lfs_dinode *dstu, const union lfs_dinode *srcu)
329 1.30 dholland {
330 1.30 dholland if (fs->lfs_is64) {
331 1.30 dholland struct lfs64_dinode *dst = &dstu->u_64;
332 1.30 dholland const struct lfs64_dinode *src = &srcu->u_64;
333 1.30 dholland unsigned i;
334 1.30 dholland
335 1.30 dholland /*
336 1.30 dholland * Copy everything but the block pointers and di_blocks.
337 1.30 dholland * XXX what about di_extb?
338 1.30 dholland */
339 1.30 dholland dst->di_mode = src->di_mode;
340 1.30 dholland dst->di_nlink = src->di_nlink;
341 1.30 dholland dst->di_uid = src->di_uid;
342 1.30 dholland dst->di_gid = src->di_gid;
343 1.30 dholland dst->di_blksize = src->di_blksize;
344 1.30 dholland dst->di_size = src->di_size;
345 1.30 dholland dst->di_atime = src->di_atime;
346 1.30 dholland dst->di_mtime = src->di_mtime;
347 1.30 dholland dst->di_ctime = src->di_ctime;
348 1.30 dholland dst->di_birthtime = src->di_birthtime;
349 1.30 dholland dst->di_mtimensec = src->di_mtimensec;
350 1.30 dholland dst->di_atimensec = src->di_atimensec;
351 1.30 dholland dst->di_ctimensec = src->di_ctimensec;
352 1.30 dholland dst->di_birthnsec = src->di_birthnsec;
353 1.30 dholland dst->di_gen = src->di_gen;
354 1.30 dholland dst->di_kernflags = src->di_kernflags;
355 1.30 dholland dst->di_flags = src->di_flags;
356 1.30 dholland dst->di_extsize = src->di_extsize;
357 1.30 dholland dst->di_modrev = src->di_modrev;
358 1.30 dholland dst->di_inumber = src->di_inumber;
359 1.30 dholland for (i = 0; i < __arraycount(src->di_spare); i++) {
360 1.30 dholland dst->di_spare[i] = src->di_spare[i];
361 1.30 dholland }
362 1.37 perseant /* Short symlinks store their data in di_db. */
363 1.37 perseant if ((src->di_mode & LFS_IFMT) == LFS_IFLNK
364 1.37 perseant && src->di_size < lfs_sb_getmaxsymlinklen(fs)) {
365 1.37 perseant memcpy(dst->di_db, src->di_db, src->di_size);
366 1.37 perseant }
367 1.30 dholland } else {
368 1.30 dholland struct lfs32_dinode *dst = &dstu->u_32;
369 1.30 dholland const struct lfs32_dinode *src = &srcu->u_32;
370 1.30 dholland
371 1.30 dholland /* Get mode, link count, size, and times */
372 1.30 dholland memcpy(dst, src, offsetof(struct lfs32_dinode, di_db[0]));
373 1.30 dholland
374 1.30 dholland /* Then the rest, except di_blocks */
375 1.30 dholland dst->di_flags = src->di_flags;
376 1.30 dholland dst->di_gen = src->di_gen;
377 1.30 dholland dst->di_uid = src->di_uid;
378 1.30 dholland dst->di_gid = src->di_gid;
379 1.30 dholland dst->di_modrev = src->di_modrev;
380 1.37 perseant
381 1.37 perseant /* Short symlinks store their data in di_db. */
382 1.37 perseant if ((src->di_mode & LFS_IFMT) == LFS_IFLNK
383 1.37 perseant && src->di_size < lfs_sb_getmaxsymlinklen(fs)) {
384 1.37 perseant memcpy(dst->di_db, src->di_db, src->di_size);
385 1.37 perseant }
386 1.30 dholland }
387 1.30 dholland }
388 1.30 dholland
389 1.1 perseant static int
390 1.1 perseant update_inoblk(struct lfs *fs, daddr_t offset, kauth_cred_t cred,
391 1.1 perseant struct lwp *l)
392 1.1 perseant {
393 1.1 perseant struct vnode *devvp, *vp;
394 1.1 perseant struct inode *ip;
395 1.29 dholland union lfs_dinode *dip;
396 1.1 perseant struct buf *dbp, *ibp;
397 1.1 perseant int error;
398 1.1 perseant daddr_t daddr;
399 1.1 perseant IFILE *ifp;
400 1.1 perseant SEGUSE *sup;
401 1.29 dholland unsigned i, num;
402 1.37 perseant uint32_t gen;
403 1.37 perseant char *buf;
404 1.1 perseant
405 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp;
406 1.1 perseant
407 1.1 perseant /*
408 1.1 perseant * Get the inode, update times and perms.
409 1.1 perseant * DO NOT update disk blocks, we do that separately.
410 1.1 perseant */
411 1.23 dholland error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
412 1.19 maxv 0, &dbp);
413 1.1 perseant if (error) {
414 1.1 perseant DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error));
415 1.1 perseant return error;
416 1.1 perseant }
417 1.37 perseant buf = malloc(dbp->b_bcount, M_SEGMENT, M_WAITOK);
418 1.37 perseant memcpy(buf, dbp->b_data, dbp->b_bcount);
419 1.37 perseant brelse(dbp, BC_AGE);
420 1.29 dholland num = LFS_INOPB(fs);
421 1.29 dholland for (i = num; i-- > 0; ) {
422 1.37 perseant dip = DINO_IN_BLOCK(fs, buf, i);
423 1.37 perseant if (lfs_dino_getinumber(fs, dip) <= LFS_IFILE_INUM)
424 1.37 perseant continue;
425 1.1 perseant
426 1.37 perseant /* Check generation number */
427 1.37 perseant LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp);
428 1.37 perseant gen = lfs_if_getversion(fs, ifp);
429 1.37 perseant brelse(ibp, 0);
430 1.37 perseant if (lfs_dino_getgen(fs, dip) < gen) {
431 1.37 perseant continue;
432 1.37 perseant }
433 1.1 perseant
434 1.37 perseant /*
435 1.37 perseant * This inode is the newest generation. Load it.
436 1.37 perseant */
437 1.37 perseant error = lfs_rf_valloc(fs, lfs_dino_getinumber(fs, dip),
438 1.37 perseant lfs_dino_getgen(fs, dip),
439 1.37 perseant l, &vp, dip);
440 1.37 perseant if (error) {
441 1.37 perseant DLOG((DLOG_RF, "update_inoblk: lfs_rf_valloc"
442 1.37 perseant " returned %d\n", error));
443 1.37 perseant continue;
444 1.37 perseant }
445 1.37 perseant ip = VTOI(vp);
446 1.37 perseant if (lfs_dino_getsize(fs, dip) != ip->i_size
447 1.37 perseant && vp->v_type != VLNK) {
448 1.37 perseant /* XXX What should we do sith symlinks? */
449 1.37 perseant DLOG((DLOG_RF, " ino %jd size %jd -> %jd\n",
450 1.37 perseant (intmax_t)lfs_dino_getinumber(fs, dip),
451 1.37 perseant (intmax_t)ip->i_size,
452 1.37 perseant (intmax_t)lfs_dino_getsize(fs, dip)));
453 1.37 perseant lfs_truncate(vp, lfs_dino_getsize(fs, dip), 0,
454 1.37 perseant NOCRED);
455 1.37 perseant }
456 1.37 perseant update_inoblk_copy_dinode(fs, ip->i_din, dip);
457 1.37 perseant
458 1.37 perseant ip->i_flags = lfs_dino_getflags(fs, dip);
459 1.37 perseant ip->i_gen = lfs_dino_getgen(fs, dip);
460 1.37 perseant ip->i_uid = lfs_dino_getuid(fs, dip);
461 1.37 perseant ip->i_gid = lfs_dino_getgid(fs, dip);
462 1.37 perseant
463 1.37 perseant ip->i_mode = lfs_dino_getmode(fs, dip);
464 1.37 perseant ip->i_nlink = lfs_dino_getnlink(fs, dip);
465 1.37 perseant ip->i_size = lfs_dino_getsize(fs, dip);
466 1.37 perseant
467 1.37 perseant LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE);
468 1.37 perseant
469 1.37 perseant /* Re-initialize to get type right */
470 1.37 perseant ulfs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p,
471 1.37 perseant &vp);
472 1.37 perseant
473 1.37 perseant /* Record change in location */
474 1.37 perseant LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp);
475 1.37 perseant daddr = lfs_if_getdaddr(fs, ifp);
476 1.37 perseant lfs_if_setdaddr(fs, ifp, LFS_DBTOFSB(fs, dbp->b_blkno));
477 1.37 perseant error = LFS_BWRITE_LOG(ibp); /* Ifile */
478 1.37 perseant /* And do segment accounting */
479 1.37 perseant if (lfs_dtosn(fs, daddr)
480 1.37 perseant != lfs_dtosn(fs, LFS_DBTOFSB(fs, dbp->b_blkno))) {
481 1.37 perseant if (!DADDR_IS_BAD(daddr)) {
482 1.37 perseant LFS_SEGENTRY(sup, fs,
483 1.37 perseant lfs_dtosn(fs, daddr), ibp);
484 1.37 perseant sup->su_nbytes -= DINOSIZE(fs);
485 1.1 perseant LFS_WRITESEGENTRY(sup, fs,
486 1.37 perseant lfs_dtosn(fs, daddr),
487 1.1 perseant ibp);
488 1.1 perseant }
489 1.37 perseant LFS_SEGENTRY(sup, fs, lfs_dtosn(fs,
490 1.37 perseant LFS_DBTOFSB(fs, dbp->b_blkno)),
491 1.37 perseant ibp);
492 1.37 perseant sup->su_nbytes += DINOSIZE(fs);
493 1.37 perseant LFS_WRITESEGENTRY(sup, fs,
494 1.37 perseant lfs_dtosn(fs, LFS_DBTOFSB(fs,
495 1.37 perseant dbp->b_blkno)),
496 1.37 perseant ibp);
497 1.1 perseant }
498 1.37 perseant vput(vp);
499 1.37 perseant }
500 1.37 perseant free(buf, M_SEGMENT);
501 1.37 perseant
502 1.37 perseant return 0;
503 1.37 perseant }
504 1.37 perseant
505 1.37 perseant /*
506 1.37 perseant * Note the highest generation number of each inode in the Ifile.
507 1.37 perseant * This allows us to skip processing data for intermediate versions.
508 1.37 perseant */
509 1.37 perseant static int
510 1.37 perseant update_inogen(struct lfs *fs, daddr_t offset)
511 1.37 perseant {
512 1.37 perseant struct vnode *devvp;
513 1.37 perseant union lfs_dinode *dip;
514 1.37 perseant struct buf *dbp, *ibp;
515 1.37 perseant int error;
516 1.37 perseant IFILE *ifp;
517 1.37 perseant unsigned i, num;
518 1.37 perseant
519 1.37 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp;
520 1.37 perseant
521 1.37 perseant /* Read inode block */
522 1.37 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
523 1.37 perseant 0, &dbp);
524 1.37 perseant if (error) {
525 1.37 perseant DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error));
526 1.37 perseant return error;
527 1.37 perseant }
528 1.37 perseant
529 1.37 perseant /* Check each inode against ifile entry */
530 1.37 perseant num = LFS_INOPB(fs);
531 1.37 perseant for (i = num; i-- > 0; ) {
532 1.37 perseant dip = DINO_IN_BLOCK(fs, dbp->b_data, i);
533 1.37 perseant if (lfs_dino_getinumber(fs, dip) == LFS_IFILE_INUM)
534 1.37 perseant continue;
535 1.37 perseant
536 1.37 perseant /* Update generation number */
537 1.37 perseant LFS_IENTRY(ifp, fs, lfs_dino_getinumber(fs, dip), ibp);
538 1.37 perseant if (lfs_if_getversion(fs, ifp) < lfs_dino_getgen(fs, dip))
539 1.37 perseant lfs_if_setversion(fs, ifp, lfs_dino_getgen(fs, dip));
540 1.37 perseant error = LFS_BWRITE_LOG(ibp); /* Ifile */
541 1.37 perseant if (error)
542 1.37 perseant break;
543 1.1 perseant }
544 1.4 ad brelse(dbp, BC_AGE);
545 1.1 perseant
546 1.37 perseant return error;
547 1.1 perseant }
548 1.1 perseant
549 1.37 perseant #define CHECK_CKSUM 1 /* Check the checksum to make sure it's valid */
550 1.37 perseant #define CHECK_GEN 2 /* Update highest generation number */
551 1.37 perseant #define CHECK_INODES 3 /* Read and process inodes */
552 1.37 perseant #define CHECK_DATA 4 /* Identify and process data blocks */
553 1.1 perseant
554 1.1 perseant static daddr_t
555 1.1 perseant check_segsum(struct lfs *fs, daddr_t offset, u_int64_t nextserial,
556 1.37 perseant kauth_cred_t cred, int phase, int *pseg_flags, struct lwp *l)
557 1.1 perseant {
558 1.1 perseant struct vnode *devvp;
559 1.1 perseant struct buf *bp, *dbp;
560 1.37 perseant int error, ninos, i, j;
561 1.1 perseant SEGSUM *ssp;
562 1.37 perseant daddr_t prevoffset;
563 1.32 dholland IINFO *iip;
564 1.1 perseant FINFO *fip;
565 1.1 perseant SEGUSE *sup;
566 1.1 perseant size_t size;
567 1.27 dholland uint32_t datasum, foundsum;
568 1.37 perseant char *buf;
569 1.1 perseant
570 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp;
571 1.37 perseant
572 1.1 perseant /*
573 1.37 perseant * If this is segment 0, skip the label.
574 1.1 perseant * If the segment has a superblock and we're at the top
575 1.1 perseant * of the segment, skip the superblock.
576 1.1 perseant */
577 1.37 perseant if (offset == lfs_sb_gets0addr(fs))
578 1.37 perseant offset += lfs_btofsb(fs, LFS_LABELPAD);
579 1.17 christos if (lfs_sntod(fs, lfs_dtosn(fs, offset)) == offset) {
580 1.17 christos LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp);
581 1.1 perseant if (sup->su_flags & SEGUSE_SUPERBLOCK)
582 1.17 christos offset += lfs_btofsb(fs, LFS_SBPAD);
583 1.4 ad brelse(bp, 0);
584 1.1 perseant }
585 1.1 perseant
586 1.1 perseant /* Read in the segment summary */
587 1.23 dholland error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getsumsize(fs),
588 1.19 maxv 0, &bp);
589 1.1 perseant if (error)
590 1.1 perseant return -1;
591 1.37 perseant buf = malloc(bp->b_bcount, M_SEGMENT, M_WAITOK);
592 1.37 perseant memcpy(buf, bp->b_data, bp->b_bcount);
593 1.37 perseant brelse(bp, BC_AGE);
594 1.37 perseant
595 1.37 perseant ssp = (SEGSUM *)buf;
596 1.1 perseant
597 1.37 perseant /*
598 1.37 perseant * Phase I: Check summary checksum.
599 1.37 perseant */
600 1.37 perseant if (phase == CHECK_CKSUM) {
601 1.27 dholland size_t sumstart;
602 1.27 dholland
603 1.27 dholland sumstart = lfs_ss_getsumstart(fs);
604 1.27 dholland if (lfs_ss_getsumsum(fs, ssp) !=
605 1.27 dholland cksum((char *)ssp + sumstart,
606 1.27 dholland lfs_sb_getsumsize(fs) - sumstart)) {
607 1.37 perseant DLOG((DLOG_RF, "Sumsum error at 0x%" PRIx64 "\n",
608 1.37 perseant offset));
609 1.1 perseant offset = -1;
610 1.37 perseant goto err;
611 1.1 perseant }
612 1.27 dholland if (lfs_ss_getnfinfo(fs, ssp) == 0 &&
613 1.27 dholland lfs_ss_getninos(fs, ssp) == 0) {
614 1.37 perseant DLOG((DLOG_RF, "Empty pseg at 0x%" PRIx64 "\n",
615 1.37 perseant offset));
616 1.1 perseant offset = -1;
617 1.37 perseant goto err;
618 1.1 perseant }
619 1.37 perseant if (lfs_sb_getversion(fs) == 1) {
620 1.37 perseant if (lfs_ss_getcreate(fs, ssp) < lfs_sb_gettstamp(fs)) {
621 1.37 perseant DLOG((DLOG_RF, "Old data at 0x%" PRIx64 "\n", offset));
622 1.37 perseant offset = -1;
623 1.37 perseant goto err;
624 1.37 perseant }
625 1.37 perseant } else {
626 1.37 perseant if (lfs_ss_getserial(fs, ssp) != nextserial) {
627 1.37 perseant DLOG((DLOG_RF, "Serial number at 0x%jx given as 0x%jx,"
628 1.37 perseant " expected 0x%jx\n", (intmax_t)offset,
629 1.37 perseant (intmax_t)lfs_ss_getserial(fs, ssp),
630 1.37 perseant (intmax_t)nextserial));
631 1.37 perseant offset = -1;
632 1.37 perseant goto err;
633 1.37 perseant }
634 1.37 perseant if (lfs_ss_getident(fs, ssp) != lfs_sb_getident(fs)) {
635 1.37 perseant DLOG((DLOG_RF, "Incorrect fsid (0x%x vs 0x%x) at 0x%"
636 1.37 perseant PRIx64 "\n", lfs_ss_getident(fs, ssp),
637 1.37 perseant lfs_sb_getident(fs), offset));
638 1.37 perseant offset = -1;
639 1.37 perseant goto err;
640 1.37 perseant }
641 1.1 perseant }
642 1.1 perseant }
643 1.1 perseant if (pseg_flags)
644 1.27 dholland *pseg_flags = lfs_ss_getflags(fs, ssp);
645 1.37 perseant prevoffset = offset;
646 1.23 dholland offset += lfs_btofsb(fs, lfs_sb_getsumsize(fs));
647 1.1 perseant
648 1.37 perseant /* Handle individual blocks */
649 1.37 perseant foundsum = 0;
650 1.27 dholland ninos = howmany(lfs_ss_getninos(fs, ssp), LFS_INOPB(fs));
651 1.37 perseant iip = SEGSUM_IINFOSTART(fs, buf);
652 1.37 perseant fip = SEGSUM_FINFOBASE(fs, (SEGSUM *)buf);
653 1.27 dholland for (i = 0; i < lfs_ss_getnfinfo(fs, ssp) || ninos; ++i) {
654 1.1 perseant /* Inode block? */
655 1.32 dholland if (ninos && lfs_ii_getblock(fs, iip) == offset) {
656 1.37 perseant if (phase == CHECK_CKSUM) {
657 1.1 perseant /* Read in the head and add to the buffer */
658 1.37 perseant error = bread(devvp, LFS_FSBTODB(fs, offset),
659 1.37 perseant lfs_sb_getbsize(fs), 0, &dbp);
660 1.1 perseant if (error) {
661 1.1 perseant offset = -1;
662 1.37 perseant goto err;
663 1.1 perseant }
664 1.37 perseant foundsum = lfs_cksum_part(dbp->b_data,
665 1.37 perseant sizeof(uint32_t), foundsum);
666 1.4 ad brelse(dbp, BC_AGE);
667 1.1 perseant }
668 1.37 perseant if (phase == CHECK_GEN) {
669 1.37 perseant if ((error = update_inogen(fs, offset))
670 1.37 perseant != 0) {
671 1.37 perseant offset = -1;
672 1.37 perseant goto err;
673 1.37 perseant }
674 1.37 perseant }
675 1.37 perseant if (phase == CHECK_INODES) {
676 1.1 perseant if ((error = update_inoblk(fs, offset, cred, l))
677 1.1 perseant != 0) {
678 1.1 perseant offset = -1;
679 1.37 perseant goto err;
680 1.1 perseant }
681 1.1 perseant }
682 1.23 dholland offset += lfs_btofsb(fs, lfs_sb_getibsize(fs));
683 1.32 dholland iip = NEXTLOWER_IINFO(fs, iip);
684 1.1 perseant --ninos;
685 1.32 dholland --i; /* compensate for ++i in loop header */
686 1.1 perseant continue;
687 1.1 perseant }
688 1.37 perseant
689 1.37 perseant /* File block */
690 1.22 dholland size = lfs_sb_getbsize(fs);
691 1.28 dholland for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) {
692 1.28 dholland if (j == lfs_fi_getnblocks(fs, fip) - 1)
693 1.28 dholland size = lfs_fi_getlastlength(fs, fip);
694 1.37 perseant if (phase == CHECK_CKSUM) {
695 1.37 perseant error = bread(devvp, LFS_FSBTODB(fs, offset),
696 1.37 perseant size, 0, &dbp);
697 1.1 perseant if (error) {
698 1.1 perseant offset = -1;
699 1.37 perseant goto err;
700 1.1 perseant }
701 1.37 perseant foundsum = lfs_cksum_part(dbp->b_data,
702 1.37 perseant sizeof(uint32_t), foundsum);
703 1.4 ad brelse(dbp, BC_AGE);
704 1.1 perseant }
705 1.1 perseant /* Account for and update any direct blocks */
706 1.37 perseant if (phase == CHECK_DATA &&
707 1.28 dholland lfs_fi_getino(fs, fip) > LFS_IFILE_INUM &&
708 1.28 dholland lfs_fi_getblock(fs, fip, j) >= 0) {
709 1.28 dholland update_meta(fs, lfs_fi_getino(fs, fip),
710 1.28 dholland lfs_fi_getversion(fs, fip),
711 1.28 dholland lfs_fi_getblock(fs, fip, j),
712 1.28 dholland offset, size, l);
713 1.37 perseant ++rblkcnt;
714 1.1 perseant }
715 1.17 christos offset += lfs_btofsb(fs, size);
716 1.1 perseant }
717 1.37 perseant
718 1.27 dholland fip = NEXT_FINFO(fs, fip);
719 1.1 perseant }
720 1.37 perseant
721 1.1 perseant /* Checksum the array, compare */
722 1.37 perseant if (phase == CHECK_CKSUM) {
723 1.37 perseant datasum = lfs_ss_getdatasum(fs, ssp);
724 1.37 perseant foundsum = lfs_cksum_fold(foundsum);
725 1.37 perseant if (datasum != foundsum) {
726 1.37 perseant DLOG((DLOG_RF, "Datasum error at 0x%" PRIx64
727 1.37 perseant " (wanted %x got %x)\n",
728 1.37 perseant offset, datasum, foundsum));
729 1.1 perseant offset = -1;
730 1.37 perseant goto err;
731 1.1 perseant }
732 1.1 perseant }
733 1.1 perseant
734 1.37 perseant if (phase == CHECK_CKSUM)
735 1.37 perseant lfs_sb_subavail(fs, offset - prevoffset);
736 1.37 perseant else {
737 1.1 perseant /* Don't clog the buffer queue */
738 1.9 ad mutex_enter(&lfs_lock);
739 1.1 perseant if (locked_queue_count > LFS_MAX_BUFS ||
740 1.1 perseant locked_queue_bytes > LFS_MAX_BYTES) {
741 1.1 perseant lfs_flush(fs, SEGM_CKP, 0);
742 1.1 perseant }
743 1.9 ad mutex_exit(&lfs_lock);
744 1.1 perseant }
745 1.1 perseant
746 1.37 perseant /*
747 1.37 perseant * If we're at the end of the segment, move to the next.
748 1.37 perseant * A partial segment needs space for a segment header (1 fsb)
749 1.37 perseant * and a full block ("frag" fsb). Thus, adding "frag" fsb should
750 1.37 perseant * still be within the current segment (whereas frag + 1 might
751 1.37 perseant * be at the start of the next segment).
752 1.37 perseant *
753 1.37 perseant * This needs to match the definition of LFS_PARTIAL_FITS
754 1.37 perseant * in lfs_segment.c.
755 1.37 perseant */
756 1.37 perseant if (lfs_dtosn(fs, offset + lfs_sb_getfrag(fs))
757 1.37 perseant != lfs_dtosn(fs, offset)) {
758 1.37 perseant if (lfs_dtosn(fs, offset) == lfs_dtosn(fs, lfs_ss_getnext(fs,
759 1.37 perseant ssp))) {
760 1.37 perseant printf("WHOA! at 0x%jx/seg %jd moving to 0x%jx/seg %jd\n",
761 1.37 perseant (intmax_t)offset,
762 1.37 perseant (intmax_t)lfs_dtosn(fs, offset),
763 1.37 perseant (intmax_t)lfs_ss_getnext(fs, ssp),
764 1.37 perseant (intmax_t)lfs_dtosn(fs, lfs_ss_getnext(fs, ssp)));
765 1.37 perseant offset = -1;
766 1.37 perseant goto err;
767 1.37 perseant }
768 1.37 perseant offset = lfs_ss_getnext(fs, ssp);
769 1.37 perseant DLOG((DLOG_RF, "LFS roll forward: moving to offset 0x%" PRIx64
770 1.37 perseant " -> segment %d\n", offset, lfs_dtosn(fs,offset)));
771 1.37 perseant }
772 1.1 perseant
773 1.37 perseant err:
774 1.37 perseant free(buf, M_SEGMENT);
775 1.37 perseant
776 1.1 perseant return offset;
777 1.1 perseant }
778 1.1 perseant
779 1.1 perseant void
780 1.2 perseant lfs_roll_forward(struct lfs *fs, struct mount *mp, struct lwp *l)
781 1.1 perseant {
782 1.37 perseant int flags, dirty, phase;
783 1.37 perseant daddr_t startoffset, offset, nextoffset, endpseg;
784 1.37 perseant u_int64_t nextserial, startserial, endserial;
785 1.37 perseant int sn, curseg;
786 1.3 perseant struct proc *p;
787 1.3 perseant kauth_cred_t cred;
788 1.3 perseant SEGUSE *sup;
789 1.3 perseant struct buf *bp;
790 1.3 perseant
791 1.3 perseant p = l ? l->l_proc : NULL;
792 1.3 perseant cred = p ? p->p_cred : NOCRED;
793 1.1 perseant
794 1.1 perseant /*
795 1.1 perseant * Roll forward.
796 1.1 perseant *
797 1.1 perseant * We don't roll forward for v1 filesystems, because
798 1.1 perseant * of the danger that the clock was turned back between the last
799 1.1 perseant * checkpoint and crash. This would roll forward garbage.
800 1.1 perseant *
801 1.1 perseant * v2 filesystems don't have this problem because they use a
802 1.1 perseant * monotonically increasing serial number instead of a timestamp.
803 1.1 perseant */
804 1.37 perseant rblkcnt = 0;
805 1.37 perseant if ((lfs_sb_getpflags(fs) & LFS_PF_CLEAN) || !lfs_do_rfw
806 1.37 perseant || lfs_sb_getversion(fs) <= 1 || p == NULL)
807 1.37 perseant return;
808 1.37 perseant
809 1.37 perseant DLOG((DLOG_RF, "%s: begin roll forward at serial 0x%jx\n",
810 1.37 perseant lfs_sb_getfsmnt(fs), (intmax_t)lfs_sb_getserial(fs)));
811 1.37 perseant DEBUG_CHECK_FREELIST(fs);
812 1.37 perseant
813 1.37 perseant /*
814 1.37 perseant * Phase I: Find the address of the last good partial
815 1.37 perseant * segment that was written after the checkpoint. Mark
816 1.37 perseant * the segments in question dirty, so they won't be
817 1.37 perseant * reallocated.
818 1.37 perseant */
819 1.37 perseant endpseg = startoffset = offset = lfs_sb_getoffset(fs);
820 1.37 perseant flags = 0x0;
821 1.37 perseant DLOG((DLOG_RF, "LFS roll forward phase 1: start at offset 0x%"
822 1.37 perseant PRIx64 "\n", offset));
823 1.37 perseant LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp);
824 1.37 perseant if (!(sup->su_flags & SEGUSE_DIRTY))
825 1.37 perseant lfs_sb_subnclean(fs, 1);
826 1.37 perseant sup->su_flags |= SEGUSE_DIRTY;
827 1.37 perseant LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp);
828 1.37 perseant
829 1.37 perseant startserial = lfs_sb_getserial(fs);
830 1.37 perseant endserial = nextserial = startserial + 1;
831 1.37 perseant while ((nextoffset = check_segsum(fs, offset, nextserial,
832 1.37 perseant cred, CHECK_CKSUM, &flags, l)) > 0) {
833 1.37 perseant if (lfs_sntod(fs, offset) != lfs_sntod(fs, nextoffset)) {
834 1.37 perseant LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, offset),
835 1.37 perseant bp);
836 1.37 perseant if (!(sup->su_flags & SEGUSE_DIRTY))
837 1.37 perseant lfs_sb_subnclean(fs, 1);
838 1.37 perseant sup->su_flags |= SEGUSE_DIRTY;
839 1.37 perseant LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, offset), bp);
840 1.37 perseant }
841 1.37 perseant
842 1.37 perseant DLOG((DLOG_RF, "LFS roll forward phase 1: offset=0x%jx"
843 1.37 perseant " serial=0x%jx\n", (intmax_t)nextoffset,
844 1.37 perseant (intmax_t)nextserial));
845 1.37 perseant if (flags & SS_DIROP) {
846 1.37 perseant DLOG((DLOG_RF, "lfs_mountfs: dirops at 0x%"
847 1.37 perseant PRIx64 "\n", offset));
848 1.37 perseant if (!(flags & SS_CONT)) {
849 1.37 perseant DLOG((DLOG_RF, "lfs_mountfs: dirops end "
850 1.37 perseant "at 0x%" PRIx64 "\n", offset));
851 1.37 perseant }
852 1.37 perseant }
853 1.37 perseant offset = nextoffset;
854 1.37 perseant ++nextserial;
855 1.37 perseant
856 1.37 perseant if (!(flags & SS_CONT)) {
857 1.37 perseant endpseg = nextoffset;
858 1.37 perseant endserial = nextserial;
859 1.37 perseant }
860 1.37 perseant if (lfs_rfw_max_psegs > 0
861 1.37 perseant && nextserial > startserial + lfs_rfw_max_psegs)
862 1.37 perseant break;
863 1.37 perseant }
864 1.37 perseant if (flags & SS_CONT) {
865 1.37 perseant DLOG((DLOG_RF, "LFS roll forward: warning: incomplete "
866 1.37 perseant "dirops discarded (0x%jx < 0x%jx)\n",
867 1.37 perseant endpseg, nextoffset));
868 1.37 perseant }
869 1.37 perseant if (lfs_sb_getversion(fs) > 1)
870 1.37 perseant lfs_sb_setserial(fs, endserial);
871 1.37 perseant DLOG((DLOG_RF, "LFS roll forward phase 1: completed: "
872 1.37 perseant "endpseg=0x%" PRIx64 "\n", endpseg));
873 1.37 perseant offset = startoffset;
874 1.37 perseant if (offset != endpseg) {
875 1.37 perseant /* Don't overwrite what we're trying to preserve */
876 1.37 perseant lfs_sb_setoffset(fs, endpseg);
877 1.37 perseant lfs_sb_setcurseg(fs, lfs_sntod(fs, lfs_dtosn(fs, endpseg)));
878 1.37 perseant for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs));;) {
879 1.37 perseant sn = (sn + 1) % lfs_sb_getnseg(fs);
880 1.37 perseant /* XXX could we just fail to roll forward? */
881 1.37 perseant if (sn == curseg)
882 1.37 perseant panic("lfs_mountfs: no clean segments");
883 1.37 perseant LFS_SEGENTRY(sup, fs, sn, bp);
884 1.37 perseant dirty = (sup->su_flags & SEGUSE_DIRTY);
885 1.37 perseant brelse(bp, 0);
886 1.37 perseant if (!dirty)
887 1.37 perseant break;
888 1.37 perseant }
889 1.37 perseant lfs_sb_setnextseg(fs, lfs_sntod(fs, sn));
890 1.37 perseant /* Explicitly set this segment dirty */
891 1.37 perseant LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, endpseg), bp);
892 1.37 perseant sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
893 1.37 perseant LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, endpseg), bp);
894 1.37 perseant
895 1.37 perseant
896 1.1 perseant /*
897 1.37 perseant * Phase II: Identify the highest generation of each
898 1.37 perseant * inode.
899 1.37 perseant *
900 1.37 perseant * Phase III: Update inodes. We end up with the
901 1.37 perseant * last version of each inode present, and can ignore
902 1.37 perseant * data blocks belonging to previous versions.
903 1.37 perseant *
904 1.37 perseant * Phase IV: Roll forward, updating data blocks.
905 1.1 perseant */
906 1.37 perseant for (phase = CHECK_GEN; phase <= CHECK_DATA; ++phase) {
907 1.37 perseant offset = startoffset;
908 1.37 perseant nextserial = startserial + 1;
909 1.37 perseant printf("LFS roll forward phase %d beginning\n", phase);
910 1.37 perseant while (offset > 0 && offset != endpseg) {
911 1.37 perseant if (phase == CHECK_DATA) {
912 1.37 perseant DLOG((DLOG_RF, "LFS roll forward"
913 1.37 perseant " phase %d: offset=0x%jx"
914 1.37 perseant " serial=0x%jx\n",
915 1.37 perseant phase, (intmax_t)offset,
916 1.37 perseant (intmax_t)nextserial));
917 1.8 he }
918 1.37 perseant offset = check_segsum(fs, offset,
919 1.37 perseant nextserial, cred,
920 1.37 perseant phase, NULL, l);
921 1.37 perseant ++nextserial;
922 1.37 perseant DEBUG_CHECK_FREELIST(fs);
923 1.1 perseant }
924 1.37 perseant }
925 1.1 perseant
926 1.37 perseant /*
927 1.37 perseant * Finish: flush our changes to disk.
928 1.37 perseant */
929 1.37 perseant lfs_sb_setserial(fs, endserial);
930 1.1 perseant
931 1.37 perseant lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC);
932 1.37 perseant DLOG((DLOG_RF, "lfs_mountfs: roll forward "
933 1.37 perseant "examined %jd blocks\n",
934 1.37 perseant (intmax_t)(endpseg - startoffset)));
935 1.1 perseant }
936 1.37 perseant
937 1.37 perseant /* Get rid of our vnodes, except the ifile */
938 1.37 perseant drop_vnode_pages(mp, l);
939 1.37 perseant DLOG((DLOG_RF, "LFS roll forward complete\n"));
940 1.37 perseant printf("%s: roll forward recovered %d data blocks\n",
941 1.37 perseant lfs_sb_getfsmnt(fs), rblkcnt);
942 1.37 perseant
943 1.37 perseant /*
944 1.37 perseant * At this point we have no more changes to write to disk.
945 1.37 perseant * Reset the "avail" count to match the segments as they
946 1.37 perseant * appear on disk, and the clean segment count.
947 1.37 perseant */
948 1.37 perseant lfs_reset_avail(fs);
949 1.1 perseant }
950 1.37 perseant
951 1.37 perseant static bool
952 1.37 perseant all_selector(void *cl, struct vnode *vp)
953 1.37 perseant {
954 1.37 perseant return true;
955 1.37 perseant }
956 1.37 perseant
957 1.37 perseant
958 1.37 perseant /*
959 1.37 perseant * Dump any pages from vnodes that may have been put on
960 1.37 perseant * during truncation.
961 1.37 perseant */
962 1.37 perseant static void
963 1.37 perseant drop_vnode_pages(struct mount *mp, struct lwp *l)
964 1.37 perseant {
965 1.37 perseant struct vnode_iterator *marker;
966 1.37 perseant struct lfs *fs;
967 1.37 perseant struct vnode *vp;
968 1.37 perseant
969 1.37 perseant fs = VFSTOULFS(mp)->um_lfs;
970 1.37 perseant vfs_vnode_iterator_init(mp, &marker);
971 1.37 perseant while ((vp = vfs_vnode_iterator_next(marker,
972 1.37 perseant all_selector, NULL)) != NULL) {
973 1.37 perseant if (vp == fs->lfs_ivnode)
974 1.37 perseant continue;
975 1.37 perseant VOP_LOCK(vp, LK_EXCLUSIVE | LK_RETRY);
976 1.37 perseant uvm_vnp_setsize(vp, 0);
977 1.37 perseant uvm_vnp_setsize(vp, VTOI(vp)->i_size);
978 1.37 perseant VOP_UNLOCK(vp);
979 1.37 perseant vrele(vp);
980 1.37 perseant }
981 1.37 perseant vfs_vnode_iterator_destroy(marker);
982 1.37 perseant }
983 1.37 perseant
984