lfs_kclean.c revision 1.1 1 1.1 perseant /* $NetBSD: lfs_kclean.c,v 1.1 2025/11/06 15:54:27 perseant Exp $ */
2 1.1 perseant
3 1.1 perseant /*-
4 1.1 perseant * Copyright (c) 2025 The NetBSD Foundation, Inc.
5 1.1 perseant * All rights reserved.
6 1.1 perseant *
7 1.1 perseant * This code is derived from software contributed to The NetBSD Foundation
8 1.1 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 1.1 perseant *
10 1.1 perseant * Redistribution and use in source and binary forms, with or without
11 1.1 perseant * modification, are permitted provided that the following conditions
12 1.1 perseant * are met:
13 1.1 perseant * 1. Redistributions of source code must retain the above copyright
14 1.1 perseant * notice, this list of conditions and the following disclaimer.
15 1.1 perseant * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 perseant * notice, this list of conditions and the following disclaimer in the
17 1.1 perseant * documentation and/or other materials provided with the distribution.
18 1.1 perseant *
19 1.1 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 perseant * POSSIBILITY OF SUCH DAMAGE.
30 1.1 perseant */
31 1.1 perseant
32 1.1 perseant #include <sys/cdefs.h>
33 1.1 perseant __KERNEL_RCSID(0, "$NetBSD: lfs_kclean.c,v 1.1 2025/11/06 15:54:27 perseant Exp $");
34 1.1 perseant
35 1.1 perseant #include <sys/param.h>
36 1.1 perseant #include <sys/systm.h>
37 1.1 perseant #include <sys/namei.h>
38 1.1 perseant #include <sys/proc.h>
39 1.1 perseant #include <sys/kernel.h>
40 1.1 perseant #include <sys/vnode.h>
41 1.1 perseant #include <sys/conf.h>
42 1.1 perseant #include <sys/kauth.h>
43 1.1 perseant #include <sys/buf.h>
44 1.1 perseant #include <sys/kthread.h>
45 1.1 perseant
46 1.1 perseant #include <ufs/lfs/ulfs_inode.h>
47 1.1 perseant #include <ufs/lfs/ulfsmount.h>
48 1.1 perseant #include <ufs/lfs/ulfs_extern.h>
49 1.1 perseant
50 1.1 perseant #include <ufs/lfs/lfs.h>
51 1.1 perseant #include <ufs/lfs/lfs_accessors.h>
52 1.1 perseant #include <ufs/lfs/lfs_kernel.h>
53 1.1 perseant #include <ufs/lfs/lfs_extern.h>
54 1.1 perseant
55 1.1 perseant static int ino_func_setclean(struct lfs_inofuncarg *);
56 1.1 perseant static int finfo_func_rewrite(struct lfs_finfofuncarg *);
57 1.1 perseant static int finfo_func_setclean(struct lfs_finfofuncarg *);
58 1.1 perseant static int rewrite_block(struct lfs *, struct vnode *, daddr_t, daddr_t,
59 1.1 perseant size_t, int *);
60 1.1 perseant
61 1.1 perseant static int clean(struct lfs *);
62 1.1 perseant static long segselect_cb_rosenblum(struct lfs *, int, SEGUSE *, long);
63 1.1 perseant static long segselect_greedy(struct lfs *, int, SEGUSE *);
64 1.1 perseant static long segselect_cb_time(struct lfs *, int, SEGUSE *);
65 1.1 perseant #if 0
66 1.1 perseant static long segselect_cb_serial(struct lfs *, int, SEGUSE *);
67 1.1 perseant #endif
68 1.1 perseant
69 1.1 perseant struct lwp * lfs_cleaner_daemon = NULL;
70 1.1 perseant extern kcondvar_t lfs_allclean_wakeup;
71 1.1 perseant static int lfs_ncleaners = 0;
72 1.1 perseant
73 1.1 perseant static int
74 1.1 perseant ino_func_setclean(struct lfs_inofuncarg *lifa)
75 1.1 perseant {
76 1.1 perseant struct lfs *fs;
77 1.1 perseant daddr_t offset;
78 1.1 perseant struct vnode *devvp, *vp;
79 1.1 perseant union lfs_dinode *dip;
80 1.1 perseant struct buf *dbp, *ibp;
81 1.1 perseant int error;
82 1.1 perseant IFILE *ifp;
83 1.1 perseant unsigned i, num;
84 1.1 perseant daddr_t true_addr;
85 1.1 perseant ino_t ino;
86 1.1 perseant
87 1.1 perseant fs = lifa->fs;
88 1.1 perseant offset = lifa->offset;
89 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp;
90 1.1 perseant
91 1.1 perseant /* Read inode block */
92 1.1 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
93 1.1 perseant 0, &dbp);
94 1.1 perseant if (error) {
95 1.1 perseant DLOG((DLOG_RF, "ino_func_setclean: bread returned %d\n",
96 1.1 perseant error));
97 1.1 perseant return error;
98 1.1 perseant }
99 1.1 perseant memcpy(lifa->buf, dbp->b_data, dbp->b_bcount);
100 1.1 perseant brelse(dbp, BC_AGE);
101 1.1 perseant
102 1.1 perseant /* Check each inode against ifile entry */
103 1.1 perseant num = LFS_INOPB(fs);
104 1.1 perseant for (i = num; i-- > 0; ) {
105 1.1 perseant dip = DINO_IN_BLOCK(fs, lifa->buf, i);
106 1.1 perseant ino = lfs_dino_getinumber(fs, dip);
107 1.1 perseant if (ino == LFS_IFILE_INUM) {
108 1.1 perseant /* Check address against superblock */
109 1.1 perseant true_addr = lfs_sb_getidaddr(fs);
110 1.1 perseant } else {
111 1.1 perseant /* Not ifile. Check address against ifile. */
112 1.1 perseant LFS_IENTRY(ifp, fs, ino, ibp);
113 1.1 perseant true_addr = lfs_if_getdaddr(fs, ifp);
114 1.1 perseant brelse(ibp, 0);
115 1.1 perseant }
116 1.1 perseant if (offset != true_addr)
117 1.1 perseant continue;
118 1.1 perseant
119 1.1 perseant LFS_ASSERT_MAXINO(fs, ino);
120 1.1 perseant
121 1.1 perseant /* XXX We can use fastvget here! */
122 1.1 perseant
123 1.1 perseant /*
124 1.1 perseant * An inode we need to relocate.
125 1.1 perseant * Get it if we can.
126 1.1 perseant */
127 1.1 perseant if (ino == LFS_IFILE_INUM)
128 1.1 perseant vp = fs->lfs_ivnode;
129 1.1 perseant else
130 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino,
131 1.1 perseant LK_EXCLUSIVE | LK_NOWAIT, &vp);
132 1.1 perseant if (error)
133 1.1 perseant continue;
134 1.1 perseant
135 1.1 perseant KASSERT(VTOI(vp)->i_gen == lfs_dino_getgen(fs, dip));
136 1.1 perseant lfs_setclean(fs, vp);
137 1.1 perseant if (vp != fs->lfs_ivnode) {
138 1.1 perseant VOP_UNLOCK(vp);
139 1.1 perseant vrele(vp);
140 1.1 perseant }
141 1.1 perseant }
142 1.1 perseant
143 1.1 perseant return error;
144 1.1 perseant }
145 1.1 perseant
146 1.1 perseant static int
147 1.1 perseant ino_func_rewrite(struct lfs_inofuncarg *lifa)
148 1.1 perseant {
149 1.1 perseant struct lfs *fs;
150 1.1 perseant daddr_t offset;
151 1.1 perseant struct vnode *devvp, *vp;
152 1.1 perseant union lfs_dinode *dip;
153 1.1 perseant struct buf *dbp, *ibp;
154 1.1 perseant int error;
155 1.1 perseant IFILE *ifp;
156 1.1 perseant unsigned i, num;
157 1.1 perseant daddr_t true_addr;
158 1.1 perseant ino_t ino;
159 1.1 perseant
160 1.1 perseant fs = lifa->fs;
161 1.1 perseant offset = lifa->offset;
162 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp;
163 1.1 perseant
164 1.1 perseant /* Read inode block */
165 1.1 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
166 1.1 perseant 0, &dbp);
167 1.1 perseant if (error) {
168 1.1 perseant DLOG((DLOG_RF, "ino_func_rewrite: bread returned %d\n",
169 1.1 perseant error));
170 1.1 perseant return error;
171 1.1 perseant }
172 1.1 perseant memcpy(lifa->buf, dbp->b_data, dbp->b_bcount);
173 1.1 perseant brelse(dbp, BC_AGE);
174 1.1 perseant
175 1.1 perseant /* Check each inode against ifile entry */
176 1.1 perseant num = LFS_INOPB(fs);
177 1.1 perseant for (i = num; i-- > 0; ) {
178 1.1 perseant dip = DINO_IN_BLOCK(fs, lifa->buf, i);
179 1.1 perseant ino = lfs_dino_getinumber(fs, dip);
180 1.1 perseant if (ino == LFS_IFILE_INUM) {
181 1.1 perseant /* Check address against superblock */
182 1.1 perseant true_addr = lfs_sb_getidaddr(fs);
183 1.1 perseant } else {
184 1.1 perseant /* Not ifile. Check address against ifile. */
185 1.1 perseant LFS_IENTRY(ifp, fs, ino, ibp);
186 1.1 perseant true_addr = lfs_if_getdaddr(fs, ifp);
187 1.1 perseant brelse(ibp, 0);
188 1.1 perseant }
189 1.1 perseant if (offset != true_addr)
190 1.1 perseant continue;
191 1.1 perseant
192 1.1 perseant if (ino == LFS_IFILE_INUM)
193 1.1 perseant continue;
194 1.1 perseant
195 1.1 perseant LFS_ASSERT_MAXINO(fs, ino);
196 1.1 perseant
197 1.1 perseant /* XXX We can use fastvget here! */
198 1.1 perseant
199 1.1 perseant /*
200 1.1 perseant * An inode we need to relocate.
201 1.1 perseant * Get it if we can.
202 1.1 perseant */
203 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino,
204 1.1 perseant LK_EXCLUSIVE | LK_NOWAIT, &vp);
205 1.1 perseant if (error)
206 1.1 perseant continue;
207 1.1 perseant
208 1.1 perseant KASSERT(VTOI(vp)->i_gen == lfs_dino_getgen(fs, dip));
209 1.1 perseant
210 1.1 perseant if (!(VTOI(vp)->i_state & IN_CLEANING)) {
211 1.1 perseant lfs_setclean(fs, vp);
212 1.1 perseant lfs_writeinode(fs, fs->lfs_sp, VTOI(vp));
213 1.1 perseant }
214 1.1 perseant
215 1.1 perseant VOP_UNLOCK(vp);
216 1.1 perseant vrele(vp);
217 1.1 perseant
218 1.1 perseant }
219 1.1 perseant
220 1.1 perseant return error;
221 1.1 perseant }
222 1.1 perseant
223 1.1 perseant static int
224 1.1 perseant rewrite_block(struct lfs *fs, struct vnode *vp, daddr_t lbn, daddr_t offset, size_t size, int *have_finfop)
225 1.1 perseant {
226 1.1 perseant daddr_t daddr;
227 1.1 perseant int error;
228 1.1 perseant struct buf *bp;
229 1.1 perseant struct inode *ip;
230 1.1 perseant
231 1.1 perseant KASSERT(have_finfop != NULL);
232 1.1 perseant
233 1.1 perseant /* Look up current location of this block. */
234 1.1 perseant error = VOP_BMAP(vp, lbn, NULL, &daddr, NULL);
235 1.1 perseant if (error)
236 1.1 perseant return error;
237 1.1 perseant
238 1.1 perseant /* Skip any block that is not here. */
239 1.1 perseant if (offset != 0 && LFS_DBTOFSB(fs, daddr) != offset)
240 1.1 perseant return ESTALE;
241 1.1 perseant
242 1.1 perseant /*
243 1.1 perseant * It is (was recently) here. Read the block.
244 1.1 perseant */
245 1.1 perseant //size = lfs_blksize(fs, VTOI(vp), lbn);
246 1.1 perseant error = bread(vp, lbn, size, 0, &bp);
247 1.1 perseant if (error)
248 1.1 perseant return error;
249 1.1 perseant
250 1.1 perseant if (vp == fs->lfs_ivnode) {
251 1.1 perseant VOP_BWRITE(vp, bp);
252 1.1 perseant } else {
253 1.1 perseant /* Get ready to write. */
254 1.1 perseant if (!*have_finfop) {
255 1.1 perseant ip = VTOI(vp);
256 1.1 perseant lfs_acquire_finfo(fs, ip->i_number, ip->i_gen);
257 1.1 perseant fs->lfs_sp->vp = vp;
258 1.1 perseant *have_finfop = 1;
259 1.1 perseant }
260 1.1 perseant
261 1.1 perseant KASSERT(bp->b_vp == vp);
262 1.1 perseant /* bp->b_cflags |= BC_INVAL; */ /* brelse will kill the buffer */
263 1.1 perseant lfs_bwrite_ext(bp, BW_CLEAN);
264 1.1 perseant KASSERT(bp->b_vp == vp);
265 1.1 perseant mutex_enter(&bufcache_lock);
266 1.1 perseant while (lfs_gatherblock(fs->lfs_sp, bp, &bufcache_lock)) {
267 1.1 perseant KASSERT(bp->b_vp != NULL);
268 1.1 perseant }
269 1.1 perseant mutex_exit(&bufcache_lock);
270 1.1 perseant
271 1.1 perseant KASSERT(bp->b_flags & B_GATHERED);
272 1.1 perseant KASSERT(fs->lfs_sp->cbpp[-1] == bp);
273 1.1 perseant }
274 1.1 perseant return 0;
275 1.1 perseant }
276 1.1 perseant
277 1.1 perseant static int
278 1.1 perseant finfo_func_rewrite(struct lfs_finfofuncarg *lffa)
279 1.1 perseant {
280 1.1 perseant struct lfs *fs;
281 1.1 perseant FINFO *fip;
282 1.1 perseant daddr_t *offsetp;
283 1.1 perseant int j, have_finfo, error;
284 1.1 perseant size_t size, bytes;
285 1.1 perseant ino_t ino;
286 1.1 perseant uint32_t gen;
287 1.1 perseant struct vnode *vp;
288 1.1 perseant daddr_t lbn;
289 1.1 perseant int *fragsp;
290 1.1 perseant
291 1.1 perseant fs = lffa->fs;
292 1.1 perseant fip = lffa->finfop;
293 1.1 perseant offsetp = lffa->offsetp;
294 1.1 perseant fragsp = (int *)lffa->arg;
295 1.1 perseant
296 1.1 perseant /* Get the inode and check its version. */
297 1.1 perseant ino = lfs_fi_getino(fs, fip);
298 1.1 perseant gen = lfs_fi_getversion(fs, fip);
299 1.1 perseant error = 0;
300 1.1 perseant if (ino == LFS_IFILE_INUM)
301 1.1 perseant vp = fs->lfs_ivnode;
302 1.1 perseant else {
303 1.1 perseant LFS_ASSERT_MAXINO(fs, ino);
304 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino,
305 1.1 perseant LK_EXCLUSIVE|LK_NOWAIT, &vp);
306 1.1 perseant }
307 1.1 perseant
308 1.1 perseant /*
309 1.1 perseant * If we can't, or if version is wrong, or it has dirop blocks on it,
310 1.1 perseant * we can't relocate its blocks; but we still have to count
311 1.1 perseant * blocks through the partial segment to return the right offset.
312 1.1 perseant * XXX actually we can move DIROP vnodes' *old* data, as long
313 1.1 perseant * XXX as we are sure that we are moving *only* the old data---?
314 1.1 perseant */
315 1.1 perseant if (error || VTOI(vp)->i_gen != gen || (vp->v_uflag & VU_DIROP)) {
316 1.1 perseant if (error == 0)
317 1.1 perseant error = ESTALE;
318 1.1 perseant
319 1.1 perseant if (vp != NULL && vp != fs->lfs_ivnode) {
320 1.1 perseant VOP_UNLOCK(vp);
321 1.1 perseant vrele(vp);
322 1.1 perseant }
323 1.1 perseant vp = NULL;
324 1.1 perseant bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs))
325 1.1 perseant + lfs_fi_getlastlength(fs, fip);
326 1.1 perseant *offsetp += lfs_btofsb(fs, bytes);
327 1.1 perseant
328 1.1 perseant return error;
329 1.1 perseant }
330 1.1 perseant
331 1.1 perseant /*
332 1.1 perseant * We have the vnode and its version is correct.
333 1.1 perseant * Take a cleaning reference; and loop through the blocks
334 1.1 perseant * and rewrite them.
335 1.1 perseant */
336 1.1 perseant lfs_setclean(fs, vp);
337 1.1 perseant size = lfs_sb_getbsize(fs);
338 1.1 perseant have_finfo = 0;
339 1.1 perseant for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) {
340 1.1 perseant if (j == lfs_fi_getnblocks(fs, fip) - 1)
341 1.1 perseant size = lfs_fi_getlastlength(fs, fip);
342 1.1 perseant /*
343 1.1 perseant * An error of ESTALE indicates that there was nothing
344 1.1 perseant * to rewrite; this is not a problem. Any other error
345 1.1 perseant * causes us to skip the rest of this FINFO.
346 1.1 perseant */
347 1.1 perseant if (vp != NULL && error == 0) {
348 1.1 perseant lbn = lfs_fi_getblock(fs, fip, j);
349 1.1 perseant error = rewrite_block(fs, vp, lbn, *offsetp,
350 1.1 perseant size, &have_finfo);
351 1.1 perseant if (error == ESTALE)
352 1.1 perseant error = 0;
353 1.1 perseant if (fragsp != NULL && error == 0)
354 1.1 perseant *fragsp += lfs_btofsb(fs, size);
355 1.1 perseant }
356 1.1 perseant *offsetp += lfs_btofsb(fs, size);
357 1.1 perseant }
358 1.1 perseant
359 1.1 perseant /*
360 1.1 perseant * If we acquired finfo, release it and write the blocks.
361 1.1 perseant */
362 1.1 perseant if (have_finfo) {
363 1.1 perseant lfs_updatemeta(fs->lfs_sp);
364 1.1 perseant fs->lfs_sp->vp = NULL;
365 1.1 perseant lfs_release_finfo(fs);
366 1.1 perseant lfs_writeinode(fs, fs->lfs_sp, VTOI(vp));
367 1.1 perseant }
368 1.1 perseant
369 1.1 perseant /* Release vnode */
370 1.1 perseant if (vp != fs->lfs_ivnode) {
371 1.1 perseant VOP_UNLOCK(vp);
372 1.1 perseant vrele(vp);
373 1.1 perseant }
374 1.1 perseant
375 1.1 perseant return error;
376 1.1 perseant }
377 1.1 perseant
378 1.1 perseant static int
379 1.1 perseant finfo_func_setclean(struct lfs_finfofuncarg *lffa)
380 1.1 perseant {
381 1.1 perseant struct lfs *fs;
382 1.1 perseant FINFO *fip;
383 1.1 perseant daddr_t *offsetp;
384 1.1 perseant int error;
385 1.1 perseant size_t bytes;
386 1.1 perseant ino_t ino;
387 1.1 perseant uint32_t gen;
388 1.1 perseant struct vnode *vp;
389 1.1 perseant
390 1.1 perseant fs = lffa->fs;
391 1.1 perseant fip = lffa->finfop;
392 1.1 perseant offsetp = lffa->offsetp;
393 1.1 perseant
394 1.1 perseant /* Get the inode and check its version. */
395 1.1 perseant ino = lfs_fi_getino(fs, fip);
396 1.1 perseant gen = lfs_fi_getversion(fs, fip);
397 1.1 perseant error = 0;
398 1.1 perseant if (ino == LFS_IFILE_INUM)
399 1.1 perseant vp = fs->lfs_ivnode;
400 1.1 perseant else {
401 1.1 perseant LFS_ASSERT_MAXINO(fs, ino);
402 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino,
403 1.1 perseant LK_EXCLUSIVE|LK_NOWAIT, &vp);
404 1.1 perseant }
405 1.1 perseant
406 1.1 perseant /* If we have it and its version is right, take a cleaning reference */
407 1.1 perseant if (error == 0 && VTOI(vp)->i_gen == gen)
408 1.1 perseant lfs_setclean(fs, vp);
409 1.1 perseant
410 1.1 perseant if (vp == fs->lfs_ivnode)
411 1.1 perseant vp = NULL;
412 1.1 perseant else if (vp != NULL) {
413 1.1 perseant VOP_UNLOCK(vp);
414 1.1 perseant vrele(vp);
415 1.1 perseant vp = NULL;
416 1.1 perseant }
417 1.1 perseant
418 1.1 perseant /* Skip to the next block */
419 1.1 perseant bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs))
420 1.1 perseant + lfs_fi_getlastlength(fs, fip);
421 1.1 perseant *offsetp += lfs_btofsb(fs, bytes);
422 1.1 perseant
423 1.1 perseant return error;
424 1.1 perseant }
425 1.1 perseant
426 1.1 perseant /*
427 1.1 perseant * Use the partial-segment parser to rewrite (clean) a segment.
428 1.1 perseant */
429 1.1 perseant int
430 1.1 perseant lfs_rewrite_segment(struct lfs *fs, int sn, int *fragsp, kauth_cred_t cred, struct lwp *l)
431 1.1 perseant {
432 1.1 perseant daddr_t ooffset, offset, endpseg;
433 1.1 perseant
434 1.1 perseant ASSERT_SEGLOCK(fs);
435 1.1 perseant
436 1.1 perseant offset = lfs_sntod(fs, sn);
437 1.1 perseant lfs_skip_superblock(fs, &offset);
438 1.1 perseant endpseg = lfs_sntod(fs, sn + 1);
439 1.1 perseant
440 1.1 perseant while (offset > 0 && offset != endpseg) {
441 1.1 perseant /* First check summary validity (XXX unnecessary?) */
442 1.1 perseant ooffset = offset;
443 1.1 perseant lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
444 1.1 perseant NULL, NULL, CKSEG_CKSUM, NULL);
445 1.1 perseant if (offset == ooffset)
446 1.1 perseant break;
447 1.1 perseant
448 1.1 perseant /*
449 1.1 perseant * Valid, proceed.
450 1.1 perseant *
451 1.1 perseant * First write the file blocks, marking their
452 1.1 perseant * inodes IN_CLEANING.
453 1.1 perseant */
454 1.1 perseant offset = ooffset;
455 1.1 perseant lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
456 1.1 perseant NULL, finfo_func_rewrite,
457 1.1 perseant CKSEG_NONE, fragsp);
458 1.1 perseant
459 1.1 perseant /*
460 1.1 perseant * Now go back and pick up any inodes that
461 1.1 perseant * were not already marked IN_CLEANING, and
462 1.1 perseant * write them as well.
463 1.1 perseant */
464 1.1 perseant offset = ooffset;
465 1.1 perseant lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
466 1.1 perseant ino_func_rewrite, NULL,
467 1.1 perseant CKSEG_NONE, fragsp);
468 1.1 perseant }
469 1.1 perseant return 0;
470 1.1 perseant }
471 1.1 perseant
472 1.1 perseant /*
473 1.1 perseant * Rewrite the contents of one or more segments, in preparation for
474 1.1 perseant * marking them clean.
475 1.1 perseant */
476 1.1 perseant int
477 1.1 perseant lfs_rewrite_segments(struct lfs *fs, int *snn, int len, int *directp, int *offsetp, struct lwp *l)
478 1.1 perseant {
479 1.1 perseant kauth_cred_t cred;
480 1.1 perseant int i, error;
481 1.1 perseant struct buf *bp;
482 1.1 perseant SEGUSE *sup;
483 1.1 perseant daddr_t offset, endpseg;
484 1.1 perseant
485 1.1 perseant ASSERT_NO_SEGLOCK(fs);
486 1.1 perseant
487 1.1 perseant cred = l ? l->l_cred : NOCRED;
488 1.1 perseant
489 1.1 perseant /* Prevent new dirops and acquire the cleaner lock. */
490 1.1 perseant lfs_writer_enter(fs, "rewritesegs");
491 1.1 perseant if ((error = lfs_cleanerlock(fs)) != 0) {
492 1.1 perseant lfs_writer_leave(fs);
493 1.1 perseant return error;
494 1.1 perseant }
495 1.1 perseant
496 1.1 perseant /*
497 1.1 perseant * Pre-reference vnodes now that we have cleaner lock
498 1.1 perseant * but before we take the segment lock. We don't want to
499 1.1 perseant * mix cleaning blocks with flushed vnodes.
500 1.1 perseant */
501 1.1 perseant for (i = 0; i < len; i++) {
502 1.1 perseant error = 0;
503 1.1 perseant /* Refuse to clean segments that are ACTIVE */
504 1.1 perseant LFS_SEGENTRY(sup, fs, snn[i], bp);
505 1.1 perseant if (sup->su_flags & SEGUSE_ACTIVE
506 1.1 perseant || !(sup->su_flags & SEGUSE_DIRTY))
507 1.1 perseant error = EINVAL;
508 1.1 perseant
509 1.1 perseant brelse(bp, 0);
510 1.1 perseant if (error)
511 1.1 perseant break;
512 1.1 perseant
513 1.1 perseant offset = lfs_sntod(fs, snn[i]);
514 1.1 perseant lfs_skip_superblock(fs, &offset);
515 1.1 perseant endpseg = lfs_sntod(fs, snn[i] + 1);
516 1.1 perseant
517 1.1 perseant while (offset > 0 && offset != endpseg) {
518 1.1 perseant lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
519 1.1 perseant ino_func_setclean, finfo_func_setclean,
520 1.1 perseant CKSEG_NONE, NULL);
521 1.1 perseant }
522 1.1 perseant }
523 1.1 perseant
524 1.1 perseant /*
525 1.1 perseant * Actually rewrite the contents of the segment.
526 1.1 perseant */
527 1.1 perseant lfs_seglock(fs, SEGM_CLEAN);
528 1.1 perseant
529 1.1 perseant for (i = 0; i < len; i++) {
530 1.1 perseant error = 0;
531 1.1 perseant /* Refuse to clean segments that are ACTIVE */
532 1.1 perseant LFS_SEGENTRY(sup, fs, snn[i], bp);
533 1.1 perseant if (sup->su_flags & SEGUSE_ACTIVE
534 1.1 perseant || !(sup->su_flags & SEGUSE_DIRTY))
535 1.1 perseant error = EINVAL;
536 1.1 perseant
537 1.1 perseant brelse(bp, 0);
538 1.1 perseant if (error)
539 1.1 perseant break;
540 1.1 perseant
541 1.1 perseant error = lfs_rewrite_segment(fs, snn[i], directp, cred, l);
542 1.1 perseant if (error) {
543 1.1 perseant printf(" rewrite_segment returned %d\n", error);
544 1.1 perseant break;
545 1.1 perseant }
546 1.1 perseant }
547 1.1 perseant while (lfs_writeseg(fs, fs->lfs_sp))
548 1.1 perseant ;
549 1.1 perseant
550 1.1 perseant *offsetp = lfs_btofsb(fs, fs->lfs_sp->bytes_written);
551 1.1 perseant lfs_segunlock(fs);
552 1.1 perseant lfs_cleanerunlock(fs);
553 1.1 perseant lfs_writer_leave(fs);
554 1.1 perseant
555 1.1 perseant return error;
556 1.1 perseant }
557 1.1 perseant
558 1.1 perseant #if 0
559 1.1 perseant static bool
560 1.1 perseant lfs_isseq(const struct lfs *fs, long int lbn1, long int lbn2)
561 1.1 perseant {
562 1.1 perseant return lbn2 == lbn1 + lfs_sb_getfrag(__UNCONST(fs));
563 1.1 perseant }
564 1.1 perseant
565 1.1 perseant /*
566 1.1 perseant * Rewrite the contents of a file in order to coalesce it.
567 1.1 perseant * We don't bother rewriting indirect blocks because they will have to
568 1.1 perseant * be rewritten anyway when we rewrite the direct blocks.
569 1.1 perseant */
570 1.1 perseant int
571 1.1 perseant lfs_rewrite_file(struct lfs *fs, ino_t ino, struct lwp *l)
572 1.1 perseant {
573 1.1 perseant daddr_t lbn, hiblk, daddr;
574 1.1 perseant int i, error, num, run;
575 1.1 perseant struct vnode *vp;
576 1.1 perseant struct indir indirs[ULFS_NIADDR+2];
577 1.1 perseant size_t size;
578 1.1 perseant
579 1.1 perseant ASSERT_SEGLOCK(fs);
580 1.1 perseant
581 1.1 perseant LFS_ASSERT_MAXINO(fs, ino);
582 1.1 perseant
583 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp);
584 1.1 perseant if (error)
585 1.1 perseant return error;
586 1.1 perseant
587 1.1 perseant lfs_acquire_finfo(fs, ino, VTOI(vp)->i_gen);
588 1.1 perseant for (lbn = 0, hiblk = VTOI(vp)->i_lfs_hiblk; lbn < hiblk; ++lbn) {
589 1.1 perseant error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, &run,
590 1.1 perseant lfs_isseq);
591 1.1 perseant if (daddr == UNASSIGNED)
592 1.1 perseant continue;
593 1.1 perseant for (i = 0; i <= run; i++) {
594 1.1 perseant size = lfs_blksize(fs, VTOI(vp), lbn);
595 1.1 perseant error = rewrite_block(fs, vp, lbn++, 0x0, size, NULL);
596 1.1 perseant if (error)
597 1.1 perseant break;
598 1.1 perseant }
599 1.1 perseant }
600 1.1 perseant lfs_release_finfo(fs);
601 1.1 perseant while (lfs_writeseg(fs, fs->lfs_sp))
602 1.1 perseant ;
603 1.1 perseant lfs_segunlock(fs);
604 1.1 perseant
605 1.1 perseant return error;
606 1.1 perseant }
607 1.1 perseant #endif /* 0 */
608 1.1 perseant
609 1.1 perseant
610 1.1 perseant static int
611 1.1 perseant ino_func_checkempty(struct lfs_inofuncarg *lifa)
612 1.1 perseant {
613 1.1 perseant struct lfs *fs;
614 1.1 perseant daddr_t offset;
615 1.1 perseant struct vnode *devvp;
616 1.1 perseant union lfs_dinode *dip;
617 1.1 perseant struct buf *dbp, *ibp;
618 1.1 perseant int error;
619 1.1 perseant IFILE *ifp;
620 1.1 perseant unsigned i, num;
621 1.1 perseant daddr_t true_addr;
622 1.1 perseant ino_t ino;
623 1.1 perseant
624 1.1 perseant fs = lifa->fs;
625 1.1 perseant offset = lifa->offset;
626 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp;
627 1.1 perseant
628 1.1 perseant /* Read inode block */
629 1.1 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
630 1.1 perseant 0, &dbp);
631 1.1 perseant if (error) {
632 1.1 perseant DLOG((DLOG_RF, "ino_func_checkempty: bread returned %d\n",
633 1.1 perseant error));
634 1.1 perseant return error;
635 1.1 perseant }
636 1.1 perseant
637 1.1 perseant /* Check each inode against ifile entry */
638 1.1 perseant num = LFS_INOPB(fs);
639 1.1 perseant for (i = num; i-- > 0; ) {
640 1.1 perseant dip = DINO_IN_BLOCK(fs, dbp->b_data, i);
641 1.1 perseant ino = lfs_dino_getinumber(fs, dip);
642 1.1 perseant if (ino == LFS_IFILE_INUM) {
643 1.1 perseant /* Check address against superblock */
644 1.1 perseant true_addr = lfs_sb_getidaddr(fs);
645 1.1 perseant } else {
646 1.1 perseant /* Not ifile. Check address against ifile. */
647 1.1 perseant LFS_IENTRY(ifp, fs, ino, ibp);
648 1.1 perseant true_addr = lfs_if_getdaddr(fs, ifp);
649 1.1 perseant brelse(ibp, 0);
650 1.1 perseant }
651 1.1 perseant if (offset == true_addr) {
652 1.1 perseant error = EEXIST;
653 1.1 perseant break;
654 1.1 perseant }
655 1.1 perseant }
656 1.1 perseant brelse(dbp, BC_AGE);
657 1.1 perseant
658 1.1 perseant return error;
659 1.1 perseant }
660 1.1 perseant
661 1.1 perseant static int
662 1.1 perseant finfo_func_checkempty(struct lfs_finfofuncarg *lffa)
663 1.1 perseant {
664 1.1 perseant struct lfs *fs;
665 1.1 perseant FINFO *fip;
666 1.1 perseant daddr_t *offsetp;
667 1.1 perseant int j, error;
668 1.1 perseant size_t size, bytes;
669 1.1 perseant ino_t ino;
670 1.1 perseant uint32_t gen;
671 1.1 perseant struct vnode *vp;
672 1.1 perseant daddr_t lbn, daddr;
673 1.1 perseant
674 1.1 perseant fs = lffa->fs;
675 1.1 perseant fip = lffa->finfop;
676 1.1 perseant offsetp = lffa->offsetp;
677 1.1 perseant
678 1.1 perseant /* Get the inode and check its version. */
679 1.1 perseant ino = lfs_fi_getino(fs, fip);
680 1.1 perseant gen = lfs_fi_getversion(fs, fip);
681 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp);
682 1.1 perseant
683 1.1 perseant /*
684 1.1 perseant * If we can't, or if version is wrong, this FINFO does not refer
685 1.1 perseant * to a live file. Skip over it and continue.
686 1.1 perseant */
687 1.1 perseant if (error || VTOI(vp)->i_gen != gen) {
688 1.1 perseant if (error == 0)
689 1.1 perseant error = ESTALE;
690 1.1 perseant
691 1.1 perseant if (vp != NULL) {
692 1.1 perseant VOP_UNLOCK(vp);
693 1.1 perseant vrele(vp);
694 1.1 perseant vp = NULL;
695 1.1 perseant }
696 1.1 perseant bytes = ((lfs_fi_getnblocks(fs, fip) - 1)
697 1.1 perseant << lfs_sb_getbshift(fs))
698 1.1 perseant + lfs_fi_getlastlength(fs, fip);
699 1.1 perseant *offsetp += lfs_btofsb(fs, bytes);
700 1.1 perseant
701 1.1 perseant return error;
702 1.1 perseant }
703 1.1 perseant
704 1.1 perseant /*
705 1.1 perseant * We have the vnode and its version is correct.
706 1.1 perseant * Loop through the blocks and check their currency.
707 1.1 perseant */
708 1.1 perseant size = lfs_sb_getbsize(fs);
709 1.1 perseant for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) {
710 1.1 perseant if (j == lfs_fi_getnblocks(fs, fip) - 1)
711 1.1 perseant size = lfs_fi_getlastlength(fs, fip);
712 1.1 perseant if (vp != NULL) {
713 1.1 perseant lbn = lfs_fi_getblock(fs, fip, j);
714 1.1 perseant
715 1.1 perseant /* Look up current location of this block. */
716 1.1 perseant error = VOP_BMAP(vp, lbn, NULL, &daddr, NULL);
717 1.1 perseant if (error)
718 1.1 perseant break;
719 1.1 perseant
720 1.1 perseant /* If it is here, the segment is not empty. */
721 1.1 perseant if (LFS_DBTOFSB(fs, daddr) == *offsetp) {
722 1.1 perseant error = EEXIST;
723 1.1 perseant break;
724 1.1 perseant }
725 1.1 perseant }
726 1.1 perseant *offsetp += lfs_btofsb(fs, size);
727 1.1 perseant }
728 1.1 perseant
729 1.1 perseant /* Release vnode */
730 1.1 perseant VOP_UNLOCK(vp);
731 1.1 perseant vrele(vp);
732 1.1 perseant
733 1.1 perseant return error;
734 1.1 perseant }
735 1.1 perseant
736 1.1 perseant int
737 1.1 perseant lfs_checkempty(struct lfs *fs, int sn, kauth_cred_t cred, struct lwp *l)
738 1.1 perseant {
739 1.1 perseant daddr_t offset, endpseg;
740 1.1 perseant int error;
741 1.1 perseant
742 1.1 perseant ASSERT_SEGLOCK(fs);
743 1.1 perseant
744 1.1 perseant offset = lfs_sntod(fs, sn);
745 1.1 perseant lfs_skip_superblock(fs, &offset);
746 1.1 perseant endpseg = lfs_sntod(fs, sn + 1);
747 1.1 perseant
748 1.1 perseant while (offset > 0 && offset < endpseg) {
749 1.1 perseant error = lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
750 1.1 perseant ino_func_checkempty,
751 1.1 perseant finfo_func_checkempty,
752 1.1 perseant CKSEG_NONE, NULL);
753 1.1 perseant if (error)
754 1.1 perseant return error;
755 1.1 perseant }
756 1.1 perseant return 0;
757 1.1 perseant }
758 1.1 perseant
759 1.1 perseant static long
760 1.1 perseant segselect_greedy(struct lfs *fs, int sn, SEGUSE *sup)
761 1.1 perseant {
762 1.1 perseant return lfs_sb_getssize(fs) - sup->su_nbytes;
763 1.1 perseant }
764 1.1 perseant
765 1.1 perseant __inline static long
766 1.1 perseant segselect_cb_rosenblum(struct lfs *fs, int sn, SEGUSE *sup, long age)
767 1.1 perseant {
768 1.1 perseant long benefit, cost;
769 1.1 perseant
770 1.1 perseant benefit = (int64_t)lfs_sb_getssize(fs) - sup->su_nbytes -
771 1.1 perseant (sup->su_nsums + 1) * lfs_sb_getfsize(fs);
772 1.1 perseant if (sup->su_flags & SEGUSE_SUPERBLOCK)
773 1.1 perseant benefit -= LFS_SBPAD;
774 1.1 perseant if (lfs_sb_getbsize(fs) > lfs_sb_getfsize(fs)) /* fragmentation */
775 1.1 perseant benefit -= (lfs_sb_getbsize(fs) / 2);
776 1.1 perseant if (benefit <= 0) {
777 1.1 perseant return 0;
778 1.1 perseant }
779 1.1 perseant
780 1.1 perseant cost = lfs_sb_getssize(fs) + sup->su_nbytes;
781 1.1 perseant return (256 * benefit * age) / cost;
782 1.1 perseant }
783 1.1 perseant
784 1.1 perseant static long
785 1.1 perseant segselect_cb_time(struct lfs *fs, int sn, SEGUSE *sup)
786 1.1 perseant {
787 1.1 perseant long age;
788 1.1 perseant
789 1.1 perseant age = time_second - sup->su_lastmod;
790 1.1 perseant if (age < 0)
791 1.1 perseant age = 0;
792 1.1 perseant return segselect_cb_rosenblum(fs, sn, sup, age);
793 1.1 perseant }
794 1.1 perseant
795 1.1 perseant #if 0
796 1.1 perseant /*
797 1.1 perseant * Same as the time comparator, but fetch the serial number from the
798 1.1 perseant * segment header to compare.
799 1.1 perseant *
800 1.1 perseant * This is ugly. Whether serial number or wall time is better is a
801 1.1 perseant * worthy question, but if we want to use serial number to compute
802 1.1 perseant * age, we should record the serial number in su_lastmod instead of
803 1.1 perseant * the time.
804 1.1 perseant */
805 1.1 perseant static long
806 1.1 perseant segselect_cb_serial(struct lfs *fs, int sn, SEGUSE *sup)
807 1.1 perseant {
808 1.1 perseant struct buf *bp;
809 1.1 perseant uint32_t magic;
810 1.1 perseant uint64_t age, serial;
811 1.1 perseant daddr_t addr;
812 1.1 perseant
813 1.1 perseant addr = lfs_segtod(fs, sn);
814 1.1 perseant lfs_skip_superblock(fs, &addr);
815 1.1 perseant bread(fs->lfs_devvp, LFS_FSBTODB(fs, addr),
816 1.1 perseant lfs_sb_getsumsize(fs), 0, &bp);
817 1.1 perseant magic = lfs_ss_getmagic(fs, ((SEGSUM *)bp->b_data));
818 1.1 perseant serial = lfs_ss_getserial(fs, ((SEGSUM *)bp->b_data));
819 1.1 perseant brelse(bp, 0);
820 1.1 perseant
821 1.1 perseant if (magic != SS_MAGIC)
822 1.1 perseant return 0;
823 1.1 perseant
824 1.1 perseant age = lfs_sb_getserial(fs) - serial;
825 1.1 perseant return segselect_cb_rosenblum(fs, sn, sup, age);
826 1.1 perseant }
827 1.1 perseant #endif
828 1.1 perseant
829 1.1 perseant void
830 1.1 perseant lfs_cleanerd(void *arg)
831 1.1 perseant {
832 1.1 perseant mount_iterator_t *iter;
833 1.1 perseant struct mount *mp;
834 1.1 perseant struct lfs *fs;
835 1.1 perseant struct vfsops *vfs = NULL;
836 1.1 perseant int lfsc;
837 1.1 perseant int cleaned_something = 0;
838 1.1 perseant
839 1.1 perseant mutex_enter(&lfs_lock);
840 1.1 perseant KASSERTMSG(lfs_cleaner_daemon == NULL,
841 1.1 perseant "more than one LFS cleaner daemon");
842 1.1 perseant lfs_cleaner_daemon = curlwp;
843 1.1 perseant mutex_exit(&lfs_lock);
844 1.1 perseant
845 1.1 perseant /* Take an extra reference to the LFS vfsops. */
846 1.1 perseant vfs = vfs_getopsbyname(MOUNT_LFS);
847 1.1 perseant
848 1.1 perseant mutex_enter(&lfs_lock);
849 1.1 perseant for (;;) {
850 1.1 perseant KASSERT(mutex_owned(&lfs_lock));
851 1.1 perseant if (cleaned_something == 0)
852 1.1 perseant cv_timedwait(&lfs_allclean_wakeup, &lfs_lock, hz/10 + 1);
853 1.1 perseant KASSERT(mutex_owned(&lfs_lock));
854 1.1 perseant cleaned_something = 0;
855 1.1 perseant
856 1.1 perseant KASSERT(mutex_owned(&lfs_lock));
857 1.1 perseant mutex_exit(&lfs_lock);
858 1.1 perseant
859 1.1 perseant /*
860 1.1 perseant * Look through the list of LFSs to see if any of them
861 1.1 perseant * need cleaning.
862 1.1 perseant */
863 1.1 perseant mountlist_iterator_init(&iter);
864 1.1 perseant lfsc = 0;
865 1.1 perseant while ((mp = mountlist_iterator_next(iter)) != NULL) {
866 1.1 perseant KASSERT(!mutex_owned(&lfs_lock));
867 1.1 perseant if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS,
868 1.1 perseant sizeof(mp->mnt_stat.f_fstypename)) == 0) {
869 1.1 perseant fs = VFSTOULFS(mp)->um_lfs;
870 1.1 perseant
871 1.1 perseant mutex_enter(&lfs_lock);
872 1.1 perseant if (fs->lfs_clean_selector != NULL)
873 1.1 perseant ++lfsc;
874 1.1 perseant mutex_exit(&lfs_lock);
875 1.1 perseant cleaned_something += clean(fs);
876 1.1 perseant }
877 1.1 perseant }
878 1.1 perseant if (lfsc == 0) {
879 1.1 perseant mutex_enter(&lfs_lock);
880 1.1 perseant lfs_cleaner_daemon = NULL;
881 1.1 perseant mutex_exit(&lfs_lock);
882 1.1 perseant mountlist_iterator_destroy(iter);
883 1.1 perseant break;
884 1.1 perseant }
885 1.1 perseant mountlist_iterator_destroy(iter);
886 1.1 perseant
887 1.1 perseant mutex_enter(&lfs_lock);
888 1.1 perseant }
889 1.1 perseant KASSERT(!mutex_owned(&lfs_lock));
890 1.1 perseant
891 1.1 perseant /* Give up our extra reference so the module can be unloaded. */
892 1.1 perseant mutex_enter(&vfs_list_lock);
893 1.1 perseant if (vfs != NULL)
894 1.1 perseant vfs->vfs_refcount--;
895 1.1 perseant mutex_exit(&vfs_list_lock);
896 1.1 perseant
897 1.1 perseant /* Done! */
898 1.1 perseant kthread_exit(0);
899 1.1 perseant }
900 1.1 perseant
901 1.1 perseant /*
902 1.1 perseant * Look at the file system to see whether it needs cleaning, and if it does,
903 1.1 perseant * clean a segment.
904 1.1 perseant */
905 1.1 perseant static int
906 1.1 perseant clean(struct lfs *fs)
907 1.1 perseant {
908 1.1 perseant struct buf *bp;
909 1.1 perseant SEGUSE *sup;
910 1.1 perseant int sn, maxsn, nclean, nready, nempty, nerror, nzero, again, target;
911 1.1 perseant long prio, maxprio, maxeprio, thresh;
912 1.1 perseant long (*func)(struct lfs *, int, SEGUSE *);
913 1.1 perseant uint32_t __debugused segflags = 0;
914 1.1 perseant daddr_t oldsn, bfree, avail;
915 1.1 perseant int direct, offset;
916 1.1 perseant
917 1.1 perseant func = fs->lfs_clean_selector;
918 1.1 perseant if (func == NULL)
919 1.1 perseant return 0;
920 1.1 perseant
921 1.1 perseant thresh = fs->lfs_autoclean.thresh;
922 1.1 perseant if (fs->lfs_flags & LFS_MUSTCLEAN)
923 1.1 perseant thresh = 0;
924 1.1 perseant else if (thresh < 0) {
925 1.1 perseant /*
926 1.1 perseant * Compute a priority threshold based on availability ratio.
927 1.1 perseant * XXX These numbers only makes sense for the greedy cleaner.
928 1.1 perseant * What is an appropriate threshold for the cost-benefit
929 1.1 perseant * cleaner?
930 1.1 perseant */
931 1.1 perseant bfree = lfs_sb_getbfree(fs)
932 1.1 perseant + lfs_segtod(fs, 1) * lfs_sb_getminfree(fs);
933 1.1 perseant avail = lfs_sb_getavail(fs) - fs->lfs_ravail - fs->lfs_favail;
934 1.1 perseant if (avail > bfree)
935 1.1 perseant return 0;
936 1.1 perseant thresh = lfs_sb_getssize(fs) * (bfree - avail)
937 1.1 perseant / (lfs_sb_getsize(fs) - avail);
938 1.1 perseant if (thresh > lfs_sb_getsumsize(fs) + 5 * lfs_sb_getbsize(fs))
939 1.1 perseant thresh = lfs_sb_getsumsize(fs) + 5 * lfs_sb_getbsize(fs);
940 1.1 perseant if (thresh > lfs_sb_getssize(fs) - lfs_sb_getbsize(fs))
941 1.1 perseant return 0;
942 1.1 perseant }
943 1.1 perseant
944 1.1 perseant target = fs->lfs_autoclean.target;
945 1.1 perseant if (target <= 0) {
946 1.1 perseant /* Default to half a segment target */
947 1.1 perseant target = lfs_segtod(fs, 1) / 2;
948 1.1 perseant }
949 1.1 perseant
950 1.1 perseant oldsn = lfs_dtosn(fs, lfs_sb_getoffset(fs));
951 1.1 perseant
952 1.1 perseant again = 0;
953 1.1 perseant maxprio = maxeprio = -1;
954 1.1 perseant nzero = nclean = nready = nempty = nerror = 0;
955 1.1 perseant for (sn = 0; sn < lfs_sb_getnseg(fs); sn++) {
956 1.1 perseant
957 1.1 perseant prio = 0;
958 1.1 perseant LFS_SEGENTRY(sup, fs, sn, bp);
959 1.1 perseant if (sup->su_flags & SEGUSE_ACTIVE)
960 1.1 perseant prio = 0;
961 1.1 perseant else if (!(sup->su_flags & SEGUSE_DIRTY))
962 1.1 perseant ++nclean;
963 1.1 perseant else if (sup->su_flags & SEGUSE_READY)
964 1.1 perseant ++nready;
965 1.1 perseant else if (sup->su_flags & SEGUSE_EMPTY)
966 1.1 perseant ++nempty;
967 1.1 perseant else if (sup->su_nbytes == 0)
968 1.1 perseant ++nzero;
969 1.1 perseant else
970 1.1 perseant prio = (*func)(fs, sn, sup);
971 1.1 perseant
972 1.1 perseant if (sup->su_flags & SEGUSE_ERROR) {
973 1.1 perseant if (prio > maxeprio)
974 1.1 perseant maxeprio = prio;
975 1.1 perseant prio = 0;
976 1.1 perseant ++nerror;
977 1.1 perseant }
978 1.1 perseant
979 1.1 perseant if (prio > maxprio) {
980 1.1 perseant maxprio = prio;
981 1.1 perseant maxsn = sn;
982 1.1 perseant segflags = sup->su_flags;
983 1.1 perseant }
984 1.1 perseant brelse(bp, 0);
985 1.1 perseant }
986 1.1 perseant DLOG((DLOG_CLEAN, "%s clean=%d/%d zero=%d empty=%d ready=%d maxsn=%d maxprio=%ld/%ld segflags=0x%lx\n",
987 1.1 perseant (maxprio > thresh ? "YES" : "NO "),
988 1.1 perseant nclean, (int)lfs_sb_getnseg(fs), nzero, nempty, nready,
989 1.1 perseant maxsn, maxprio, (unsigned long)thresh,
990 1.1 perseant (unsigned long)segflags));
991 1.1 perseant
992 1.1 perseant /*
993 1.1 perseant * If we are trying to clean the segment we cleaned last,
994 1.1 perseant * cleaning did not work. Mark this segment SEGUSE_ERROR
995 1.1 perseant * and try again.
996 1.1 perseant */
997 1.1 perseant if (maxprio > 0 && fs->lfs_lastcleaned == maxsn) {
998 1.1 perseant LFS_SEGENTRY(sup, fs, maxsn, bp);
999 1.1 perseant sup->su_flags |= SEGUSE_ERROR;
1000 1.1 perseant LFS_WRITESEGENTRY(sup, fs, sn, bp);
1001 1.1 perseant return 1;
1002 1.1 perseant }
1003 1.1 perseant
1004 1.1 perseant /*
1005 1.1 perseant * If there were nothing but error segments, clear error.
1006 1.1 perseant * We will wait to try again.
1007 1.1 perseant */
1008 1.1 perseant if (maxprio == 0 && maxeprio > 0) {
1009 1.1 perseant DLOG((DLOG_CLEAN, "clear error on %d segments, try again\n",
1010 1.1 perseant nerror));
1011 1.1 perseant lfs_seguse_clrflag_all(fs, SEGUSE_ERROR);
1012 1.1 perseant }
1013 1.1 perseant
1014 1.1 perseant /* Rewrite the highest-priority segment */
1015 1.1 perseant if (maxprio > thresh) {
1016 1.1 perseant direct = offset = 0;
1017 1.1 perseant (void)lfs_rewrite_segments(fs, &maxsn, 1,
1018 1.1 perseant &direct, &offset, curlwp);
1019 1.1 perseant DLOG((DLOG_CLEAN, " direct=%d offset=%d\n", direct, offset));
1020 1.1 perseant again += direct;
1021 1.1 perseant fs->lfs_clean_accum += offset;
1022 1.1 perseant
1023 1.1 perseant /* Don't clean this again immediately */
1024 1.1 perseant fs->lfs_lastcleaned = maxsn;
1025 1.1 perseant }
1026 1.1 perseant
1027 1.1 perseant /*
1028 1.1 perseant * If we are in dire straits but we have segments already
1029 1.1 perseant * empty, force a double checkpoint to reclaim them.
1030 1.1 perseant */
1031 1.1 perseant if (fs->lfs_flags & LFS_MUSTCLEAN) {
1032 1.1 perseant if (nready + nempty > 0) {
1033 1.1 perseant printf("force checkpoint with nready=%d nempty=%d nzero=%d\n",
1034 1.1 perseant nready, nempty, nzero);
1035 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount,
1036 1.1 perseant SEGM_CKP | SEGM_FORCE_CKP | SEGM_SYNC);
1037 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount,
1038 1.1 perseant SEGM_CKP | SEGM_FORCE_CKP | SEGM_SYNC);
1039 1.1 perseant ++again;
1040 1.1 perseant }
1041 1.1 perseant } else if (fs->lfs_clean_accum > target) {
1042 1.1 perseant DLOG((DLOG_CLEAN, "checkpoint to flush\n"));
1043 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, SEGM_CKP);
1044 1.1 perseant fs->lfs_clean_accum = 0;
1045 1.1 perseant } else if (lfs_dtosn(fs, lfs_sb_getoffset(fs)) != oldsn
1046 1.1 perseant || nempty + nready > LFS_MAX_ACTIVE) { /* XXX arbitrary */
1047 1.1 perseant DLOG((DLOG_CLEAN, "write to promote empty segments\n"));
1048 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, SEGM_CKP);
1049 1.1 perseant fs->lfs_clean_accum = 0;
1050 1.1 perseant }
1051 1.1 perseant
1052 1.1 perseant return again;
1053 1.1 perseant }
1054 1.1 perseant
1055 1.1 perseant /*
1056 1.1 perseant * Rewrite a file in its entirety.
1057 1.1 perseant *
1058 1.1 perseant * Generally this would be done to coalesce a file that is scattered
1059 1.1 perseant * around the disk; but if the "scramble" flag is set, instead rewrite
1060 1.1 perseant * only the even-numbered blocks, which provides the opposite effect
1061 1.1 perseant * for testing purposes.
1062 1.1 perseant *
1063 1.1 perseant * It is the caller's responsibility to check the bounds of the inode
1064 1.1 perseant * numbers.
1065 1.1 perseant */
1066 1.1 perseant int
1067 1.1 perseant lfs_rewrite_file(struct lfs *fs, ino_t *inoa, int len, bool scramble,
1068 1.1 perseant int *directp, int *offsetp)
1069 1.1 perseant {
1070 1.1 perseant daddr_t hiblk, lbn;
1071 1.1 perseant struct vnode *vp;
1072 1.1 perseant struct inode *ip;
1073 1.1 perseant struct buf *bp;
1074 1.1 perseant int i, error, flags;
1075 1.1 perseant
1076 1.1 perseant *directp = 0;
1077 1.1 perseant if ((error = lfs_cleanerlock(fs)) != 0)
1078 1.1 perseant return error;
1079 1.1 perseant flags = SEGM_PROT;
1080 1.1 perseant lfs_seglock(fs, flags);
1081 1.1 perseant for (i = 0; i < len; ++i) {
1082 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, inoa[i], LK_EXCLUSIVE, &vp);
1083 1.1 perseant if (error)
1084 1.1 perseant goto out;
1085 1.1 perseant
1086 1.1 perseant ip = VTOI(vp);
1087 1.1 perseant if ((vp->v_uflag & VU_DIROP) || (ip->i_flags & IN_ADIROP)) {
1088 1.1 perseant VOP_UNLOCK(vp);
1089 1.1 perseant vrele(vp);
1090 1.1 perseant error = EAGAIN;
1091 1.1 perseant goto out;
1092 1.1 perseant }
1093 1.1 perseant
1094 1.1 perseant /* Highest block in this inode */
1095 1.1 perseant hiblk = lfs_lblkno(fs, ip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
1096 1.1 perseant
1097 1.1 perseant for (lbn = 0; lbn <= hiblk; ++lbn) {
1098 1.1 perseant if (scramble && (lbn & 0x01))
1099 1.1 perseant continue;
1100 1.1 perseant
1101 1.1 perseant if (lfs_needsflush(fs)) {
1102 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
1103 1.1 perseant }
1104 1.1 perseant
1105 1.1 perseant error = bread(vp, lbn, lfs_blksize(fs, ip, lbn), 0, &bp);
1106 1.1 perseant if (error)
1107 1.1 perseant break;
1108 1.1 perseant
1109 1.1 perseant /* bp->b_cflags |= BC_INVAL; */
1110 1.1 perseant lfs_bwrite_ext(bp, (flags & SEGM_CLEAN ? BW_CLEAN : 0));
1111 1.1 perseant *directp += lfs_btofsb(fs, bp->b_bcount);
1112 1.1 perseant }
1113 1.1 perseant
1114 1.1 perseant /* Done with this vnode */
1115 1.1 perseant VOP_UNLOCK(vp);
1116 1.1 perseant vrele(vp);
1117 1.1 perseant if (error)
1118 1.1 perseant break;
1119 1.1 perseant }
1120 1.1 perseant out:
1121 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
1122 1.1 perseant *offsetp += lfs_btofsb(fs, fs->lfs_sp->bytes_written);
1123 1.1 perseant lfs_segunlock(fs);
1124 1.1 perseant lfs_cleanerunlock(fs);
1125 1.1 perseant
1126 1.1 perseant return error;
1127 1.1 perseant }
1128 1.1 perseant
1129 1.1 perseant int
1130 1.1 perseant lfs_cleanctl(struct lfs *fs, struct lfs_autoclean_params *params)
1131 1.1 perseant {
1132 1.1 perseant long (*cleanfunc)(struct lfs *, int, SEGUSE *);
1133 1.1 perseant
1134 1.1 perseant fs->lfs_autoclean = *params;
1135 1.1 perseant
1136 1.1 perseant cleanfunc = NULL;
1137 1.1 perseant switch (fs->lfs_autoclean.mode) {
1138 1.1 perseant case LFS_CLEANMODE_NONE:
1139 1.1 perseant cleanfunc = NULL;
1140 1.1 perseant break;
1141 1.1 perseant
1142 1.1 perseant case LFS_CLEANMODE_GREEDY:
1143 1.1 perseant cleanfunc = segselect_greedy;
1144 1.1 perseant break;
1145 1.1 perseant
1146 1.1 perseant case LFS_CLEANMODE_CB:
1147 1.1 perseant cleanfunc = segselect_cb_time;
1148 1.1 perseant break;
1149 1.1 perseant
1150 1.1 perseant default:
1151 1.1 perseant return EINVAL;
1152 1.1 perseant }
1153 1.1 perseant
1154 1.1 perseant mutex_enter(&lfs_lock);
1155 1.1 perseant if (fs->lfs_clean_selector == NULL && cleanfunc != NULL)
1156 1.1 perseant if (++lfs_ncleaners == 1) {
1157 1.1 perseant printf("Starting cleaner thread\n");
1158 1.1 perseant if (lfs_cleaner_daemon == NULL &&
1159 1.1 perseant kthread_create(PRI_BIO, 0, NULL,
1160 1.1 perseant lfs_cleanerd, NULL, NULL,
1161 1.1 perseant "lfs_cleaner") != 0)
1162 1.1 perseant panic("fork lfs_cleaner");
1163 1.1 perseant }
1164 1.1 perseant if (fs->lfs_clean_selector != NULL && cleanfunc == NULL)
1165 1.1 perseant if (--lfs_ncleaners == 0) {
1166 1.1 perseant printf("Stopping cleaner thread\n");
1167 1.1 perseant kthread_join(lfs_cleaner_daemon);
1168 1.1 perseant }
1169 1.1 perseant fs->lfs_clean_selector = cleanfunc;
1170 1.1 perseant mutex_exit(&lfs_lock);
1171 1.1 perseant
1172 1.1 perseant return 0;
1173 1.1 perseant }
1174