1 1.1 perseant /* $NetBSD: lfs_kclean.c,v 1.1 2025/11/06 15:54:27 perseant Exp $ */ 2 1.1 perseant 3 1.1 perseant /*- 4 1.1 perseant * Copyright (c) 2025 The NetBSD Foundation, Inc. 5 1.1 perseant * All rights reserved. 6 1.1 perseant * 7 1.1 perseant * This code is derived from software contributed to The NetBSD Foundation 8 1.1 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 1.1 perseant * 10 1.1 perseant * Redistribution and use in source and binary forms, with or without 11 1.1 perseant * modification, are permitted provided that the following conditions 12 1.1 perseant * are met: 13 1.1 perseant * 1. Redistributions of source code must retain the above copyright 14 1.1 perseant * notice, this list of conditions and the following disclaimer. 15 1.1 perseant * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 perseant * notice, this list of conditions and the following disclaimer in the 17 1.1 perseant * documentation and/or other materials provided with the distribution. 18 1.1 perseant * 19 1.1 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 perseant * POSSIBILITY OF SUCH DAMAGE. 30 1.1 perseant */ 31 1.1 perseant 32 1.1 perseant #include <sys/cdefs.h> 33 1.1 perseant __KERNEL_RCSID(0, "$NetBSD: lfs_kclean.c,v 1.1 2025/11/06 15:54:27 perseant Exp $"); 34 1.1 perseant 35 1.1 perseant #include <sys/param.h> 36 1.1 perseant #include <sys/systm.h> 37 1.1 perseant #include <sys/namei.h> 38 1.1 perseant #include <sys/proc.h> 39 1.1 perseant #include <sys/kernel.h> 40 1.1 perseant #include <sys/vnode.h> 41 1.1 perseant #include <sys/conf.h> 42 1.1 perseant #include <sys/kauth.h> 43 1.1 perseant #include <sys/buf.h> 44 1.1 perseant #include <sys/kthread.h> 45 1.1 perseant 46 1.1 perseant #include <ufs/lfs/ulfs_inode.h> 47 1.1 perseant #include <ufs/lfs/ulfsmount.h> 48 1.1 perseant #include <ufs/lfs/ulfs_extern.h> 49 1.1 perseant 50 1.1 perseant #include <ufs/lfs/lfs.h> 51 1.1 perseant #include <ufs/lfs/lfs_accessors.h> 52 1.1 perseant #include <ufs/lfs/lfs_kernel.h> 53 1.1 perseant #include <ufs/lfs/lfs_extern.h> 54 1.1 perseant 55 1.1 perseant static int ino_func_setclean(struct lfs_inofuncarg *); 56 1.1 perseant static int finfo_func_rewrite(struct lfs_finfofuncarg *); 57 1.1 perseant static int finfo_func_setclean(struct lfs_finfofuncarg *); 58 1.1 perseant static int rewrite_block(struct lfs *, struct vnode *, daddr_t, daddr_t, 59 1.1 perseant size_t, int *); 60 1.1 perseant 61 1.1 perseant static int clean(struct lfs *); 62 1.1 perseant static long segselect_cb_rosenblum(struct lfs *, int, SEGUSE *, long); 63 1.1 perseant static long segselect_greedy(struct lfs *, int, SEGUSE *); 64 1.1 perseant static long segselect_cb_time(struct lfs *, int, SEGUSE *); 65 1.1 perseant #if 0 66 1.1 perseant static long segselect_cb_serial(struct lfs *, int, SEGUSE *); 67 1.1 perseant #endif 68 1.1 perseant 69 1.1 perseant struct lwp * lfs_cleaner_daemon = NULL; 70 1.1 perseant extern kcondvar_t lfs_allclean_wakeup; 71 1.1 perseant static int lfs_ncleaners = 0; 72 1.1 perseant 73 1.1 perseant static int 74 1.1 perseant ino_func_setclean(struct lfs_inofuncarg *lifa) 75 1.1 perseant { 76 1.1 perseant struct lfs *fs; 77 1.1 perseant daddr_t offset; 78 1.1 perseant struct vnode *devvp, *vp; 79 1.1 perseant union lfs_dinode *dip; 80 1.1 perseant struct buf *dbp, *ibp; 81 1.1 perseant int error; 82 1.1 perseant IFILE *ifp; 83 1.1 perseant unsigned i, num; 84 1.1 perseant daddr_t true_addr; 85 1.1 perseant ino_t ino; 86 1.1 perseant 87 1.1 perseant fs = lifa->fs; 88 1.1 perseant offset = lifa->offset; 89 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp; 90 1.1 perseant 91 1.1 perseant /* Read inode block */ 92 1.1 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 93 1.1 perseant 0, &dbp); 94 1.1 perseant if (error) { 95 1.1 perseant DLOG((DLOG_RF, "ino_func_setclean: bread returned %d\n", 96 1.1 perseant error)); 97 1.1 perseant return error; 98 1.1 perseant } 99 1.1 perseant memcpy(lifa->buf, dbp->b_data, dbp->b_bcount); 100 1.1 perseant brelse(dbp, BC_AGE); 101 1.1 perseant 102 1.1 perseant /* Check each inode against ifile entry */ 103 1.1 perseant num = LFS_INOPB(fs); 104 1.1 perseant for (i = num; i-- > 0; ) { 105 1.1 perseant dip = DINO_IN_BLOCK(fs, lifa->buf, i); 106 1.1 perseant ino = lfs_dino_getinumber(fs, dip); 107 1.1 perseant if (ino == LFS_IFILE_INUM) { 108 1.1 perseant /* Check address against superblock */ 109 1.1 perseant true_addr = lfs_sb_getidaddr(fs); 110 1.1 perseant } else { 111 1.1 perseant /* Not ifile. Check address against ifile. */ 112 1.1 perseant LFS_IENTRY(ifp, fs, ino, ibp); 113 1.1 perseant true_addr = lfs_if_getdaddr(fs, ifp); 114 1.1 perseant brelse(ibp, 0); 115 1.1 perseant } 116 1.1 perseant if (offset != true_addr) 117 1.1 perseant continue; 118 1.1 perseant 119 1.1 perseant LFS_ASSERT_MAXINO(fs, ino); 120 1.1 perseant 121 1.1 perseant /* XXX We can use fastvget here! */ 122 1.1 perseant 123 1.1 perseant /* 124 1.1 perseant * An inode we need to relocate. 125 1.1 perseant * Get it if we can. 126 1.1 perseant */ 127 1.1 perseant if (ino == LFS_IFILE_INUM) 128 1.1 perseant vp = fs->lfs_ivnode; 129 1.1 perseant else 130 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, 131 1.1 perseant LK_EXCLUSIVE | LK_NOWAIT, &vp); 132 1.1 perseant if (error) 133 1.1 perseant continue; 134 1.1 perseant 135 1.1 perseant KASSERT(VTOI(vp)->i_gen == lfs_dino_getgen(fs, dip)); 136 1.1 perseant lfs_setclean(fs, vp); 137 1.1 perseant if (vp != fs->lfs_ivnode) { 138 1.1 perseant VOP_UNLOCK(vp); 139 1.1 perseant vrele(vp); 140 1.1 perseant } 141 1.1 perseant } 142 1.1 perseant 143 1.1 perseant return error; 144 1.1 perseant } 145 1.1 perseant 146 1.1 perseant static int 147 1.1 perseant ino_func_rewrite(struct lfs_inofuncarg *lifa) 148 1.1 perseant { 149 1.1 perseant struct lfs *fs; 150 1.1 perseant daddr_t offset; 151 1.1 perseant struct vnode *devvp, *vp; 152 1.1 perseant union lfs_dinode *dip; 153 1.1 perseant struct buf *dbp, *ibp; 154 1.1 perseant int error; 155 1.1 perseant IFILE *ifp; 156 1.1 perseant unsigned i, num; 157 1.1 perseant daddr_t true_addr; 158 1.1 perseant ino_t ino; 159 1.1 perseant 160 1.1 perseant fs = lifa->fs; 161 1.1 perseant offset = lifa->offset; 162 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp; 163 1.1 perseant 164 1.1 perseant /* Read inode block */ 165 1.1 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 166 1.1 perseant 0, &dbp); 167 1.1 perseant if (error) { 168 1.1 perseant DLOG((DLOG_RF, "ino_func_rewrite: bread returned %d\n", 169 1.1 perseant error)); 170 1.1 perseant return error; 171 1.1 perseant } 172 1.1 perseant memcpy(lifa->buf, dbp->b_data, dbp->b_bcount); 173 1.1 perseant brelse(dbp, BC_AGE); 174 1.1 perseant 175 1.1 perseant /* Check each inode against ifile entry */ 176 1.1 perseant num = LFS_INOPB(fs); 177 1.1 perseant for (i = num; i-- > 0; ) { 178 1.1 perseant dip = DINO_IN_BLOCK(fs, lifa->buf, i); 179 1.1 perseant ino = lfs_dino_getinumber(fs, dip); 180 1.1 perseant if (ino == LFS_IFILE_INUM) { 181 1.1 perseant /* Check address against superblock */ 182 1.1 perseant true_addr = lfs_sb_getidaddr(fs); 183 1.1 perseant } else { 184 1.1 perseant /* Not ifile. Check address against ifile. */ 185 1.1 perseant LFS_IENTRY(ifp, fs, ino, ibp); 186 1.1 perseant true_addr = lfs_if_getdaddr(fs, ifp); 187 1.1 perseant brelse(ibp, 0); 188 1.1 perseant } 189 1.1 perseant if (offset != true_addr) 190 1.1 perseant continue; 191 1.1 perseant 192 1.1 perseant if (ino == LFS_IFILE_INUM) 193 1.1 perseant continue; 194 1.1 perseant 195 1.1 perseant LFS_ASSERT_MAXINO(fs, ino); 196 1.1 perseant 197 1.1 perseant /* XXX We can use fastvget here! */ 198 1.1 perseant 199 1.1 perseant /* 200 1.1 perseant * An inode we need to relocate. 201 1.1 perseant * Get it if we can. 202 1.1 perseant */ 203 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, 204 1.1 perseant LK_EXCLUSIVE | LK_NOWAIT, &vp); 205 1.1 perseant if (error) 206 1.1 perseant continue; 207 1.1 perseant 208 1.1 perseant KASSERT(VTOI(vp)->i_gen == lfs_dino_getgen(fs, dip)); 209 1.1 perseant 210 1.1 perseant if (!(VTOI(vp)->i_state & IN_CLEANING)) { 211 1.1 perseant lfs_setclean(fs, vp); 212 1.1 perseant lfs_writeinode(fs, fs->lfs_sp, VTOI(vp)); 213 1.1 perseant } 214 1.1 perseant 215 1.1 perseant VOP_UNLOCK(vp); 216 1.1 perseant vrele(vp); 217 1.1 perseant 218 1.1 perseant } 219 1.1 perseant 220 1.1 perseant return error; 221 1.1 perseant } 222 1.1 perseant 223 1.1 perseant static int 224 1.1 perseant rewrite_block(struct lfs *fs, struct vnode *vp, daddr_t lbn, daddr_t offset, size_t size, int *have_finfop) 225 1.1 perseant { 226 1.1 perseant daddr_t daddr; 227 1.1 perseant int error; 228 1.1 perseant struct buf *bp; 229 1.1 perseant struct inode *ip; 230 1.1 perseant 231 1.1 perseant KASSERT(have_finfop != NULL); 232 1.1 perseant 233 1.1 perseant /* Look up current location of this block. */ 234 1.1 perseant error = VOP_BMAP(vp, lbn, NULL, &daddr, NULL); 235 1.1 perseant if (error) 236 1.1 perseant return error; 237 1.1 perseant 238 1.1 perseant /* Skip any block that is not here. */ 239 1.1 perseant if (offset != 0 && LFS_DBTOFSB(fs, daddr) != offset) 240 1.1 perseant return ESTALE; 241 1.1 perseant 242 1.1 perseant /* 243 1.1 perseant * It is (was recently) here. Read the block. 244 1.1 perseant */ 245 1.1 perseant //size = lfs_blksize(fs, VTOI(vp), lbn); 246 1.1 perseant error = bread(vp, lbn, size, 0, &bp); 247 1.1 perseant if (error) 248 1.1 perseant return error; 249 1.1 perseant 250 1.1 perseant if (vp == fs->lfs_ivnode) { 251 1.1 perseant VOP_BWRITE(vp, bp); 252 1.1 perseant } else { 253 1.1 perseant /* Get ready to write. */ 254 1.1 perseant if (!*have_finfop) { 255 1.1 perseant ip = VTOI(vp); 256 1.1 perseant lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 257 1.1 perseant fs->lfs_sp->vp = vp; 258 1.1 perseant *have_finfop = 1; 259 1.1 perseant } 260 1.1 perseant 261 1.1 perseant KASSERT(bp->b_vp == vp); 262 1.1 perseant /* bp->b_cflags |= BC_INVAL; */ /* brelse will kill the buffer */ 263 1.1 perseant lfs_bwrite_ext(bp, BW_CLEAN); 264 1.1 perseant KASSERT(bp->b_vp == vp); 265 1.1 perseant mutex_enter(&bufcache_lock); 266 1.1 perseant while (lfs_gatherblock(fs->lfs_sp, bp, &bufcache_lock)) { 267 1.1 perseant KASSERT(bp->b_vp != NULL); 268 1.1 perseant } 269 1.1 perseant mutex_exit(&bufcache_lock); 270 1.1 perseant 271 1.1 perseant KASSERT(bp->b_flags & B_GATHERED); 272 1.1 perseant KASSERT(fs->lfs_sp->cbpp[-1] == bp); 273 1.1 perseant } 274 1.1 perseant return 0; 275 1.1 perseant } 276 1.1 perseant 277 1.1 perseant static int 278 1.1 perseant finfo_func_rewrite(struct lfs_finfofuncarg *lffa) 279 1.1 perseant { 280 1.1 perseant struct lfs *fs; 281 1.1 perseant FINFO *fip; 282 1.1 perseant daddr_t *offsetp; 283 1.1 perseant int j, have_finfo, error; 284 1.1 perseant size_t size, bytes; 285 1.1 perseant ino_t ino; 286 1.1 perseant uint32_t gen; 287 1.1 perseant struct vnode *vp; 288 1.1 perseant daddr_t lbn; 289 1.1 perseant int *fragsp; 290 1.1 perseant 291 1.1 perseant fs = lffa->fs; 292 1.1 perseant fip = lffa->finfop; 293 1.1 perseant offsetp = lffa->offsetp; 294 1.1 perseant fragsp = (int *)lffa->arg; 295 1.1 perseant 296 1.1 perseant /* Get the inode and check its version. */ 297 1.1 perseant ino = lfs_fi_getino(fs, fip); 298 1.1 perseant gen = lfs_fi_getversion(fs, fip); 299 1.1 perseant error = 0; 300 1.1 perseant if (ino == LFS_IFILE_INUM) 301 1.1 perseant vp = fs->lfs_ivnode; 302 1.1 perseant else { 303 1.1 perseant LFS_ASSERT_MAXINO(fs, ino); 304 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, 305 1.1 perseant LK_EXCLUSIVE|LK_NOWAIT, &vp); 306 1.1 perseant } 307 1.1 perseant 308 1.1 perseant /* 309 1.1 perseant * If we can't, or if version is wrong, or it has dirop blocks on it, 310 1.1 perseant * we can't relocate its blocks; but we still have to count 311 1.1 perseant * blocks through the partial segment to return the right offset. 312 1.1 perseant * XXX actually we can move DIROP vnodes' *old* data, as long 313 1.1 perseant * XXX as we are sure that we are moving *only* the old data---? 314 1.1 perseant */ 315 1.1 perseant if (error || VTOI(vp)->i_gen != gen || (vp->v_uflag & VU_DIROP)) { 316 1.1 perseant if (error == 0) 317 1.1 perseant error = ESTALE; 318 1.1 perseant 319 1.1 perseant if (vp != NULL && vp != fs->lfs_ivnode) { 320 1.1 perseant VOP_UNLOCK(vp); 321 1.1 perseant vrele(vp); 322 1.1 perseant } 323 1.1 perseant vp = NULL; 324 1.1 perseant bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs)) 325 1.1 perseant + lfs_fi_getlastlength(fs, fip); 326 1.1 perseant *offsetp += lfs_btofsb(fs, bytes); 327 1.1 perseant 328 1.1 perseant return error; 329 1.1 perseant } 330 1.1 perseant 331 1.1 perseant /* 332 1.1 perseant * We have the vnode and its version is correct. 333 1.1 perseant * Take a cleaning reference; and loop through the blocks 334 1.1 perseant * and rewrite them. 335 1.1 perseant */ 336 1.1 perseant lfs_setclean(fs, vp); 337 1.1 perseant size = lfs_sb_getbsize(fs); 338 1.1 perseant have_finfo = 0; 339 1.1 perseant for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) { 340 1.1 perseant if (j == lfs_fi_getnblocks(fs, fip) - 1) 341 1.1 perseant size = lfs_fi_getlastlength(fs, fip); 342 1.1 perseant /* 343 1.1 perseant * An error of ESTALE indicates that there was nothing 344 1.1 perseant * to rewrite; this is not a problem. Any other error 345 1.1 perseant * causes us to skip the rest of this FINFO. 346 1.1 perseant */ 347 1.1 perseant if (vp != NULL && error == 0) { 348 1.1 perseant lbn = lfs_fi_getblock(fs, fip, j); 349 1.1 perseant error = rewrite_block(fs, vp, lbn, *offsetp, 350 1.1 perseant size, &have_finfo); 351 1.1 perseant if (error == ESTALE) 352 1.1 perseant error = 0; 353 1.1 perseant if (fragsp != NULL && error == 0) 354 1.1 perseant *fragsp += lfs_btofsb(fs, size); 355 1.1 perseant } 356 1.1 perseant *offsetp += lfs_btofsb(fs, size); 357 1.1 perseant } 358 1.1 perseant 359 1.1 perseant /* 360 1.1 perseant * If we acquired finfo, release it and write the blocks. 361 1.1 perseant */ 362 1.1 perseant if (have_finfo) { 363 1.1 perseant lfs_updatemeta(fs->lfs_sp); 364 1.1 perseant fs->lfs_sp->vp = NULL; 365 1.1 perseant lfs_release_finfo(fs); 366 1.1 perseant lfs_writeinode(fs, fs->lfs_sp, VTOI(vp)); 367 1.1 perseant } 368 1.1 perseant 369 1.1 perseant /* Release vnode */ 370 1.1 perseant if (vp != fs->lfs_ivnode) { 371 1.1 perseant VOP_UNLOCK(vp); 372 1.1 perseant vrele(vp); 373 1.1 perseant } 374 1.1 perseant 375 1.1 perseant return error; 376 1.1 perseant } 377 1.1 perseant 378 1.1 perseant static int 379 1.1 perseant finfo_func_setclean(struct lfs_finfofuncarg *lffa) 380 1.1 perseant { 381 1.1 perseant struct lfs *fs; 382 1.1 perseant FINFO *fip; 383 1.1 perseant daddr_t *offsetp; 384 1.1 perseant int error; 385 1.1 perseant size_t bytes; 386 1.1 perseant ino_t ino; 387 1.1 perseant uint32_t gen; 388 1.1 perseant struct vnode *vp; 389 1.1 perseant 390 1.1 perseant fs = lffa->fs; 391 1.1 perseant fip = lffa->finfop; 392 1.1 perseant offsetp = lffa->offsetp; 393 1.1 perseant 394 1.1 perseant /* Get the inode and check its version. */ 395 1.1 perseant ino = lfs_fi_getino(fs, fip); 396 1.1 perseant gen = lfs_fi_getversion(fs, fip); 397 1.1 perseant error = 0; 398 1.1 perseant if (ino == LFS_IFILE_INUM) 399 1.1 perseant vp = fs->lfs_ivnode; 400 1.1 perseant else { 401 1.1 perseant LFS_ASSERT_MAXINO(fs, ino); 402 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, 403 1.1 perseant LK_EXCLUSIVE|LK_NOWAIT, &vp); 404 1.1 perseant } 405 1.1 perseant 406 1.1 perseant /* If we have it and its version is right, take a cleaning reference */ 407 1.1 perseant if (error == 0 && VTOI(vp)->i_gen == gen) 408 1.1 perseant lfs_setclean(fs, vp); 409 1.1 perseant 410 1.1 perseant if (vp == fs->lfs_ivnode) 411 1.1 perseant vp = NULL; 412 1.1 perseant else if (vp != NULL) { 413 1.1 perseant VOP_UNLOCK(vp); 414 1.1 perseant vrele(vp); 415 1.1 perseant vp = NULL; 416 1.1 perseant } 417 1.1 perseant 418 1.1 perseant /* Skip to the next block */ 419 1.1 perseant bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs)) 420 1.1 perseant + lfs_fi_getlastlength(fs, fip); 421 1.1 perseant *offsetp += lfs_btofsb(fs, bytes); 422 1.1 perseant 423 1.1 perseant return error; 424 1.1 perseant } 425 1.1 perseant 426 1.1 perseant /* 427 1.1 perseant * Use the partial-segment parser to rewrite (clean) a segment. 428 1.1 perseant */ 429 1.1 perseant int 430 1.1 perseant lfs_rewrite_segment(struct lfs *fs, int sn, int *fragsp, kauth_cred_t cred, struct lwp *l) 431 1.1 perseant { 432 1.1 perseant daddr_t ooffset, offset, endpseg; 433 1.1 perseant 434 1.1 perseant ASSERT_SEGLOCK(fs); 435 1.1 perseant 436 1.1 perseant offset = lfs_sntod(fs, sn); 437 1.1 perseant lfs_skip_superblock(fs, &offset); 438 1.1 perseant endpseg = lfs_sntod(fs, sn + 1); 439 1.1 perseant 440 1.1 perseant while (offset > 0 && offset != endpseg) { 441 1.1 perseant /* First check summary validity (XXX unnecessary?) */ 442 1.1 perseant ooffset = offset; 443 1.1 perseant lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, 444 1.1 perseant NULL, NULL, CKSEG_CKSUM, NULL); 445 1.1 perseant if (offset == ooffset) 446 1.1 perseant break; 447 1.1 perseant 448 1.1 perseant /* 449 1.1 perseant * Valid, proceed. 450 1.1 perseant * 451 1.1 perseant * First write the file blocks, marking their 452 1.1 perseant * inodes IN_CLEANING. 453 1.1 perseant */ 454 1.1 perseant offset = ooffset; 455 1.1 perseant lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, 456 1.1 perseant NULL, finfo_func_rewrite, 457 1.1 perseant CKSEG_NONE, fragsp); 458 1.1 perseant 459 1.1 perseant /* 460 1.1 perseant * Now go back and pick up any inodes that 461 1.1 perseant * were not already marked IN_CLEANING, and 462 1.1 perseant * write them as well. 463 1.1 perseant */ 464 1.1 perseant offset = ooffset; 465 1.1 perseant lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, 466 1.1 perseant ino_func_rewrite, NULL, 467 1.1 perseant CKSEG_NONE, fragsp); 468 1.1 perseant } 469 1.1 perseant return 0; 470 1.1 perseant } 471 1.1 perseant 472 1.1 perseant /* 473 1.1 perseant * Rewrite the contents of one or more segments, in preparation for 474 1.1 perseant * marking them clean. 475 1.1 perseant */ 476 1.1 perseant int 477 1.1 perseant lfs_rewrite_segments(struct lfs *fs, int *snn, int len, int *directp, int *offsetp, struct lwp *l) 478 1.1 perseant { 479 1.1 perseant kauth_cred_t cred; 480 1.1 perseant int i, error; 481 1.1 perseant struct buf *bp; 482 1.1 perseant SEGUSE *sup; 483 1.1 perseant daddr_t offset, endpseg; 484 1.1 perseant 485 1.1 perseant ASSERT_NO_SEGLOCK(fs); 486 1.1 perseant 487 1.1 perseant cred = l ? l->l_cred : NOCRED; 488 1.1 perseant 489 1.1 perseant /* Prevent new dirops and acquire the cleaner lock. */ 490 1.1 perseant lfs_writer_enter(fs, "rewritesegs"); 491 1.1 perseant if ((error = lfs_cleanerlock(fs)) != 0) { 492 1.1 perseant lfs_writer_leave(fs); 493 1.1 perseant return error; 494 1.1 perseant } 495 1.1 perseant 496 1.1 perseant /* 497 1.1 perseant * Pre-reference vnodes now that we have cleaner lock 498 1.1 perseant * but before we take the segment lock. We don't want to 499 1.1 perseant * mix cleaning blocks with flushed vnodes. 500 1.1 perseant */ 501 1.1 perseant for (i = 0; i < len; i++) { 502 1.1 perseant error = 0; 503 1.1 perseant /* Refuse to clean segments that are ACTIVE */ 504 1.1 perseant LFS_SEGENTRY(sup, fs, snn[i], bp); 505 1.1 perseant if (sup->su_flags & SEGUSE_ACTIVE 506 1.1 perseant || !(sup->su_flags & SEGUSE_DIRTY)) 507 1.1 perseant error = EINVAL; 508 1.1 perseant 509 1.1 perseant brelse(bp, 0); 510 1.1 perseant if (error) 511 1.1 perseant break; 512 1.1 perseant 513 1.1 perseant offset = lfs_sntod(fs, snn[i]); 514 1.1 perseant lfs_skip_superblock(fs, &offset); 515 1.1 perseant endpseg = lfs_sntod(fs, snn[i] + 1); 516 1.1 perseant 517 1.1 perseant while (offset > 0 && offset != endpseg) { 518 1.1 perseant lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, 519 1.1 perseant ino_func_setclean, finfo_func_setclean, 520 1.1 perseant CKSEG_NONE, NULL); 521 1.1 perseant } 522 1.1 perseant } 523 1.1 perseant 524 1.1 perseant /* 525 1.1 perseant * Actually rewrite the contents of the segment. 526 1.1 perseant */ 527 1.1 perseant lfs_seglock(fs, SEGM_CLEAN); 528 1.1 perseant 529 1.1 perseant for (i = 0; i < len; i++) { 530 1.1 perseant error = 0; 531 1.1 perseant /* Refuse to clean segments that are ACTIVE */ 532 1.1 perseant LFS_SEGENTRY(sup, fs, snn[i], bp); 533 1.1 perseant if (sup->su_flags & SEGUSE_ACTIVE 534 1.1 perseant || !(sup->su_flags & SEGUSE_DIRTY)) 535 1.1 perseant error = EINVAL; 536 1.1 perseant 537 1.1 perseant brelse(bp, 0); 538 1.1 perseant if (error) 539 1.1 perseant break; 540 1.1 perseant 541 1.1 perseant error = lfs_rewrite_segment(fs, snn[i], directp, cred, l); 542 1.1 perseant if (error) { 543 1.1 perseant printf(" rewrite_segment returned %d\n", error); 544 1.1 perseant break; 545 1.1 perseant } 546 1.1 perseant } 547 1.1 perseant while (lfs_writeseg(fs, fs->lfs_sp)) 548 1.1 perseant ; 549 1.1 perseant 550 1.1 perseant *offsetp = lfs_btofsb(fs, fs->lfs_sp->bytes_written); 551 1.1 perseant lfs_segunlock(fs); 552 1.1 perseant lfs_cleanerunlock(fs); 553 1.1 perseant lfs_writer_leave(fs); 554 1.1 perseant 555 1.1 perseant return error; 556 1.1 perseant } 557 1.1 perseant 558 1.1 perseant #if 0 559 1.1 perseant static bool 560 1.1 perseant lfs_isseq(const struct lfs *fs, long int lbn1, long int lbn2) 561 1.1 perseant { 562 1.1 perseant return lbn2 == lbn1 + lfs_sb_getfrag(__UNCONST(fs)); 563 1.1 perseant } 564 1.1 perseant 565 1.1 perseant /* 566 1.1 perseant * Rewrite the contents of a file in order to coalesce it. 567 1.1 perseant * We don't bother rewriting indirect blocks because they will have to 568 1.1 perseant * be rewritten anyway when we rewrite the direct blocks. 569 1.1 perseant */ 570 1.1 perseant int 571 1.1 perseant lfs_rewrite_file(struct lfs *fs, ino_t ino, struct lwp *l) 572 1.1 perseant { 573 1.1 perseant daddr_t lbn, hiblk, daddr; 574 1.1 perseant int i, error, num, run; 575 1.1 perseant struct vnode *vp; 576 1.1 perseant struct indir indirs[ULFS_NIADDR+2]; 577 1.1 perseant size_t size; 578 1.1 perseant 579 1.1 perseant ASSERT_SEGLOCK(fs); 580 1.1 perseant 581 1.1 perseant LFS_ASSERT_MAXINO(fs, ino); 582 1.1 perseant 583 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp); 584 1.1 perseant if (error) 585 1.1 perseant return error; 586 1.1 perseant 587 1.1 perseant lfs_acquire_finfo(fs, ino, VTOI(vp)->i_gen); 588 1.1 perseant for (lbn = 0, hiblk = VTOI(vp)->i_lfs_hiblk; lbn < hiblk; ++lbn) { 589 1.1 perseant error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, &run, 590 1.1 perseant lfs_isseq); 591 1.1 perseant if (daddr == UNASSIGNED) 592 1.1 perseant continue; 593 1.1 perseant for (i = 0; i <= run; i++) { 594 1.1 perseant size = lfs_blksize(fs, VTOI(vp), lbn); 595 1.1 perseant error = rewrite_block(fs, vp, lbn++, 0x0, size, NULL); 596 1.1 perseant if (error) 597 1.1 perseant break; 598 1.1 perseant } 599 1.1 perseant } 600 1.1 perseant lfs_release_finfo(fs); 601 1.1 perseant while (lfs_writeseg(fs, fs->lfs_sp)) 602 1.1 perseant ; 603 1.1 perseant lfs_segunlock(fs); 604 1.1 perseant 605 1.1 perseant return error; 606 1.1 perseant } 607 1.1 perseant #endif /* 0 */ 608 1.1 perseant 609 1.1 perseant 610 1.1 perseant static int 611 1.1 perseant ino_func_checkempty(struct lfs_inofuncarg *lifa) 612 1.1 perseant { 613 1.1 perseant struct lfs *fs; 614 1.1 perseant daddr_t offset; 615 1.1 perseant struct vnode *devvp; 616 1.1 perseant union lfs_dinode *dip; 617 1.1 perseant struct buf *dbp, *ibp; 618 1.1 perseant int error; 619 1.1 perseant IFILE *ifp; 620 1.1 perseant unsigned i, num; 621 1.1 perseant daddr_t true_addr; 622 1.1 perseant ino_t ino; 623 1.1 perseant 624 1.1 perseant fs = lifa->fs; 625 1.1 perseant offset = lifa->offset; 626 1.1 perseant devvp = VTOI(fs->lfs_ivnode)->i_devvp; 627 1.1 perseant 628 1.1 perseant /* Read inode block */ 629 1.1 perseant error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs), 630 1.1 perseant 0, &dbp); 631 1.1 perseant if (error) { 632 1.1 perseant DLOG((DLOG_RF, "ino_func_checkempty: bread returned %d\n", 633 1.1 perseant error)); 634 1.1 perseant return error; 635 1.1 perseant } 636 1.1 perseant 637 1.1 perseant /* Check each inode against ifile entry */ 638 1.1 perseant num = LFS_INOPB(fs); 639 1.1 perseant for (i = num; i-- > 0; ) { 640 1.1 perseant dip = DINO_IN_BLOCK(fs, dbp->b_data, i); 641 1.1 perseant ino = lfs_dino_getinumber(fs, dip); 642 1.1 perseant if (ino == LFS_IFILE_INUM) { 643 1.1 perseant /* Check address against superblock */ 644 1.1 perseant true_addr = lfs_sb_getidaddr(fs); 645 1.1 perseant } else { 646 1.1 perseant /* Not ifile. Check address against ifile. */ 647 1.1 perseant LFS_IENTRY(ifp, fs, ino, ibp); 648 1.1 perseant true_addr = lfs_if_getdaddr(fs, ifp); 649 1.1 perseant brelse(ibp, 0); 650 1.1 perseant } 651 1.1 perseant if (offset == true_addr) { 652 1.1 perseant error = EEXIST; 653 1.1 perseant break; 654 1.1 perseant } 655 1.1 perseant } 656 1.1 perseant brelse(dbp, BC_AGE); 657 1.1 perseant 658 1.1 perseant return error; 659 1.1 perseant } 660 1.1 perseant 661 1.1 perseant static int 662 1.1 perseant finfo_func_checkempty(struct lfs_finfofuncarg *lffa) 663 1.1 perseant { 664 1.1 perseant struct lfs *fs; 665 1.1 perseant FINFO *fip; 666 1.1 perseant daddr_t *offsetp; 667 1.1 perseant int j, error; 668 1.1 perseant size_t size, bytes; 669 1.1 perseant ino_t ino; 670 1.1 perseant uint32_t gen; 671 1.1 perseant struct vnode *vp; 672 1.1 perseant daddr_t lbn, daddr; 673 1.1 perseant 674 1.1 perseant fs = lffa->fs; 675 1.1 perseant fip = lffa->finfop; 676 1.1 perseant offsetp = lffa->offsetp; 677 1.1 perseant 678 1.1 perseant /* Get the inode and check its version. */ 679 1.1 perseant ino = lfs_fi_getino(fs, fip); 680 1.1 perseant gen = lfs_fi_getversion(fs, fip); 681 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp); 682 1.1 perseant 683 1.1 perseant /* 684 1.1 perseant * If we can't, or if version is wrong, this FINFO does not refer 685 1.1 perseant * to a live file. Skip over it and continue. 686 1.1 perseant */ 687 1.1 perseant if (error || VTOI(vp)->i_gen != gen) { 688 1.1 perseant if (error == 0) 689 1.1 perseant error = ESTALE; 690 1.1 perseant 691 1.1 perseant if (vp != NULL) { 692 1.1 perseant VOP_UNLOCK(vp); 693 1.1 perseant vrele(vp); 694 1.1 perseant vp = NULL; 695 1.1 perseant } 696 1.1 perseant bytes = ((lfs_fi_getnblocks(fs, fip) - 1) 697 1.1 perseant << lfs_sb_getbshift(fs)) 698 1.1 perseant + lfs_fi_getlastlength(fs, fip); 699 1.1 perseant *offsetp += lfs_btofsb(fs, bytes); 700 1.1 perseant 701 1.1 perseant return error; 702 1.1 perseant } 703 1.1 perseant 704 1.1 perseant /* 705 1.1 perseant * We have the vnode and its version is correct. 706 1.1 perseant * Loop through the blocks and check their currency. 707 1.1 perseant */ 708 1.1 perseant size = lfs_sb_getbsize(fs); 709 1.1 perseant for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) { 710 1.1 perseant if (j == lfs_fi_getnblocks(fs, fip) - 1) 711 1.1 perseant size = lfs_fi_getlastlength(fs, fip); 712 1.1 perseant if (vp != NULL) { 713 1.1 perseant lbn = lfs_fi_getblock(fs, fip, j); 714 1.1 perseant 715 1.1 perseant /* Look up current location of this block. */ 716 1.1 perseant error = VOP_BMAP(vp, lbn, NULL, &daddr, NULL); 717 1.1 perseant if (error) 718 1.1 perseant break; 719 1.1 perseant 720 1.1 perseant /* If it is here, the segment is not empty. */ 721 1.1 perseant if (LFS_DBTOFSB(fs, daddr) == *offsetp) { 722 1.1 perseant error = EEXIST; 723 1.1 perseant break; 724 1.1 perseant } 725 1.1 perseant } 726 1.1 perseant *offsetp += lfs_btofsb(fs, size); 727 1.1 perseant } 728 1.1 perseant 729 1.1 perseant /* Release vnode */ 730 1.1 perseant VOP_UNLOCK(vp); 731 1.1 perseant vrele(vp); 732 1.1 perseant 733 1.1 perseant return error; 734 1.1 perseant } 735 1.1 perseant 736 1.1 perseant int 737 1.1 perseant lfs_checkempty(struct lfs *fs, int sn, kauth_cred_t cred, struct lwp *l) 738 1.1 perseant { 739 1.1 perseant daddr_t offset, endpseg; 740 1.1 perseant int error; 741 1.1 perseant 742 1.1 perseant ASSERT_SEGLOCK(fs); 743 1.1 perseant 744 1.1 perseant offset = lfs_sntod(fs, sn); 745 1.1 perseant lfs_skip_superblock(fs, &offset); 746 1.1 perseant endpseg = lfs_sntod(fs, sn + 1); 747 1.1 perseant 748 1.1 perseant while (offset > 0 && offset < endpseg) { 749 1.1 perseant error = lfs_parse_pseg(fs, &offset, 0, cred, NULL, l, 750 1.1 perseant ino_func_checkempty, 751 1.1 perseant finfo_func_checkempty, 752 1.1 perseant CKSEG_NONE, NULL); 753 1.1 perseant if (error) 754 1.1 perseant return error; 755 1.1 perseant } 756 1.1 perseant return 0; 757 1.1 perseant } 758 1.1 perseant 759 1.1 perseant static long 760 1.1 perseant segselect_greedy(struct lfs *fs, int sn, SEGUSE *sup) 761 1.1 perseant { 762 1.1 perseant return lfs_sb_getssize(fs) - sup->su_nbytes; 763 1.1 perseant } 764 1.1 perseant 765 1.1 perseant __inline static long 766 1.1 perseant segselect_cb_rosenblum(struct lfs *fs, int sn, SEGUSE *sup, long age) 767 1.1 perseant { 768 1.1 perseant long benefit, cost; 769 1.1 perseant 770 1.1 perseant benefit = (int64_t)lfs_sb_getssize(fs) - sup->su_nbytes - 771 1.1 perseant (sup->su_nsums + 1) * lfs_sb_getfsize(fs); 772 1.1 perseant if (sup->su_flags & SEGUSE_SUPERBLOCK) 773 1.1 perseant benefit -= LFS_SBPAD; 774 1.1 perseant if (lfs_sb_getbsize(fs) > lfs_sb_getfsize(fs)) /* fragmentation */ 775 1.1 perseant benefit -= (lfs_sb_getbsize(fs) / 2); 776 1.1 perseant if (benefit <= 0) { 777 1.1 perseant return 0; 778 1.1 perseant } 779 1.1 perseant 780 1.1 perseant cost = lfs_sb_getssize(fs) + sup->su_nbytes; 781 1.1 perseant return (256 * benefit * age) / cost; 782 1.1 perseant } 783 1.1 perseant 784 1.1 perseant static long 785 1.1 perseant segselect_cb_time(struct lfs *fs, int sn, SEGUSE *sup) 786 1.1 perseant { 787 1.1 perseant long age; 788 1.1 perseant 789 1.1 perseant age = time_second - sup->su_lastmod; 790 1.1 perseant if (age < 0) 791 1.1 perseant age = 0; 792 1.1 perseant return segselect_cb_rosenblum(fs, sn, sup, age); 793 1.1 perseant } 794 1.1 perseant 795 1.1 perseant #if 0 796 1.1 perseant /* 797 1.1 perseant * Same as the time comparator, but fetch the serial number from the 798 1.1 perseant * segment header to compare. 799 1.1 perseant * 800 1.1 perseant * This is ugly. Whether serial number or wall time is better is a 801 1.1 perseant * worthy question, but if we want to use serial number to compute 802 1.1 perseant * age, we should record the serial number in su_lastmod instead of 803 1.1 perseant * the time. 804 1.1 perseant */ 805 1.1 perseant static long 806 1.1 perseant segselect_cb_serial(struct lfs *fs, int sn, SEGUSE *sup) 807 1.1 perseant { 808 1.1 perseant struct buf *bp; 809 1.1 perseant uint32_t magic; 810 1.1 perseant uint64_t age, serial; 811 1.1 perseant daddr_t addr; 812 1.1 perseant 813 1.1 perseant addr = lfs_segtod(fs, sn); 814 1.1 perseant lfs_skip_superblock(fs, &addr); 815 1.1 perseant bread(fs->lfs_devvp, LFS_FSBTODB(fs, addr), 816 1.1 perseant lfs_sb_getsumsize(fs), 0, &bp); 817 1.1 perseant magic = lfs_ss_getmagic(fs, ((SEGSUM *)bp->b_data)); 818 1.1 perseant serial = lfs_ss_getserial(fs, ((SEGSUM *)bp->b_data)); 819 1.1 perseant brelse(bp, 0); 820 1.1 perseant 821 1.1 perseant if (magic != SS_MAGIC) 822 1.1 perseant return 0; 823 1.1 perseant 824 1.1 perseant age = lfs_sb_getserial(fs) - serial; 825 1.1 perseant return segselect_cb_rosenblum(fs, sn, sup, age); 826 1.1 perseant } 827 1.1 perseant #endif 828 1.1 perseant 829 1.1 perseant void 830 1.1 perseant lfs_cleanerd(void *arg) 831 1.1 perseant { 832 1.1 perseant mount_iterator_t *iter; 833 1.1 perseant struct mount *mp; 834 1.1 perseant struct lfs *fs; 835 1.1 perseant struct vfsops *vfs = NULL; 836 1.1 perseant int lfsc; 837 1.1 perseant int cleaned_something = 0; 838 1.1 perseant 839 1.1 perseant mutex_enter(&lfs_lock); 840 1.1 perseant KASSERTMSG(lfs_cleaner_daemon == NULL, 841 1.1 perseant "more than one LFS cleaner daemon"); 842 1.1 perseant lfs_cleaner_daemon = curlwp; 843 1.1 perseant mutex_exit(&lfs_lock); 844 1.1 perseant 845 1.1 perseant /* Take an extra reference to the LFS vfsops. */ 846 1.1 perseant vfs = vfs_getopsbyname(MOUNT_LFS); 847 1.1 perseant 848 1.1 perseant mutex_enter(&lfs_lock); 849 1.1 perseant for (;;) { 850 1.1 perseant KASSERT(mutex_owned(&lfs_lock)); 851 1.1 perseant if (cleaned_something == 0) 852 1.1 perseant cv_timedwait(&lfs_allclean_wakeup, &lfs_lock, hz/10 + 1); 853 1.1 perseant KASSERT(mutex_owned(&lfs_lock)); 854 1.1 perseant cleaned_something = 0; 855 1.1 perseant 856 1.1 perseant KASSERT(mutex_owned(&lfs_lock)); 857 1.1 perseant mutex_exit(&lfs_lock); 858 1.1 perseant 859 1.1 perseant /* 860 1.1 perseant * Look through the list of LFSs to see if any of them 861 1.1 perseant * need cleaning. 862 1.1 perseant */ 863 1.1 perseant mountlist_iterator_init(&iter); 864 1.1 perseant lfsc = 0; 865 1.1 perseant while ((mp = mountlist_iterator_next(iter)) != NULL) { 866 1.1 perseant KASSERT(!mutex_owned(&lfs_lock)); 867 1.1 perseant if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS, 868 1.1 perseant sizeof(mp->mnt_stat.f_fstypename)) == 0) { 869 1.1 perseant fs = VFSTOULFS(mp)->um_lfs; 870 1.1 perseant 871 1.1 perseant mutex_enter(&lfs_lock); 872 1.1 perseant if (fs->lfs_clean_selector != NULL) 873 1.1 perseant ++lfsc; 874 1.1 perseant mutex_exit(&lfs_lock); 875 1.1 perseant cleaned_something += clean(fs); 876 1.1 perseant } 877 1.1 perseant } 878 1.1 perseant if (lfsc == 0) { 879 1.1 perseant mutex_enter(&lfs_lock); 880 1.1 perseant lfs_cleaner_daemon = NULL; 881 1.1 perseant mutex_exit(&lfs_lock); 882 1.1 perseant mountlist_iterator_destroy(iter); 883 1.1 perseant break; 884 1.1 perseant } 885 1.1 perseant mountlist_iterator_destroy(iter); 886 1.1 perseant 887 1.1 perseant mutex_enter(&lfs_lock); 888 1.1 perseant } 889 1.1 perseant KASSERT(!mutex_owned(&lfs_lock)); 890 1.1 perseant 891 1.1 perseant /* Give up our extra reference so the module can be unloaded. */ 892 1.1 perseant mutex_enter(&vfs_list_lock); 893 1.1 perseant if (vfs != NULL) 894 1.1 perseant vfs->vfs_refcount--; 895 1.1 perseant mutex_exit(&vfs_list_lock); 896 1.1 perseant 897 1.1 perseant /* Done! */ 898 1.1 perseant kthread_exit(0); 899 1.1 perseant } 900 1.1 perseant 901 1.1 perseant /* 902 1.1 perseant * Look at the file system to see whether it needs cleaning, and if it does, 903 1.1 perseant * clean a segment. 904 1.1 perseant */ 905 1.1 perseant static int 906 1.1 perseant clean(struct lfs *fs) 907 1.1 perseant { 908 1.1 perseant struct buf *bp; 909 1.1 perseant SEGUSE *sup; 910 1.1 perseant int sn, maxsn, nclean, nready, nempty, nerror, nzero, again, target; 911 1.1 perseant long prio, maxprio, maxeprio, thresh; 912 1.1 perseant long (*func)(struct lfs *, int, SEGUSE *); 913 1.1 perseant uint32_t __debugused segflags = 0; 914 1.1 perseant daddr_t oldsn, bfree, avail; 915 1.1 perseant int direct, offset; 916 1.1 perseant 917 1.1 perseant func = fs->lfs_clean_selector; 918 1.1 perseant if (func == NULL) 919 1.1 perseant return 0; 920 1.1 perseant 921 1.1 perseant thresh = fs->lfs_autoclean.thresh; 922 1.1 perseant if (fs->lfs_flags & LFS_MUSTCLEAN) 923 1.1 perseant thresh = 0; 924 1.1 perseant else if (thresh < 0) { 925 1.1 perseant /* 926 1.1 perseant * Compute a priority threshold based on availability ratio. 927 1.1 perseant * XXX These numbers only makes sense for the greedy cleaner. 928 1.1 perseant * What is an appropriate threshold for the cost-benefit 929 1.1 perseant * cleaner? 930 1.1 perseant */ 931 1.1 perseant bfree = lfs_sb_getbfree(fs) 932 1.1 perseant + lfs_segtod(fs, 1) * lfs_sb_getminfree(fs); 933 1.1 perseant avail = lfs_sb_getavail(fs) - fs->lfs_ravail - fs->lfs_favail; 934 1.1 perseant if (avail > bfree) 935 1.1 perseant return 0; 936 1.1 perseant thresh = lfs_sb_getssize(fs) * (bfree - avail) 937 1.1 perseant / (lfs_sb_getsize(fs) - avail); 938 1.1 perseant if (thresh > lfs_sb_getsumsize(fs) + 5 * lfs_sb_getbsize(fs)) 939 1.1 perseant thresh = lfs_sb_getsumsize(fs) + 5 * lfs_sb_getbsize(fs); 940 1.1 perseant if (thresh > lfs_sb_getssize(fs) - lfs_sb_getbsize(fs)) 941 1.1 perseant return 0; 942 1.1 perseant } 943 1.1 perseant 944 1.1 perseant target = fs->lfs_autoclean.target; 945 1.1 perseant if (target <= 0) { 946 1.1 perseant /* Default to half a segment target */ 947 1.1 perseant target = lfs_segtod(fs, 1) / 2; 948 1.1 perseant } 949 1.1 perseant 950 1.1 perseant oldsn = lfs_dtosn(fs, lfs_sb_getoffset(fs)); 951 1.1 perseant 952 1.1 perseant again = 0; 953 1.1 perseant maxprio = maxeprio = -1; 954 1.1 perseant nzero = nclean = nready = nempty = nerror = 0; 955 1.1 perseant for (sn = 0; sn < lfs_sb_getnseg(fs); sn++) { 956 1.1 perseant 957 1.1 perseant prio = 0; 958 1.1 perseant LFS_SEGENTRY(sup, fs, sn, bp); 959 1.1 perseant if (sup->su_flags & SEGUSE_ACTIVE) 960 1.1 perseant prio = 0; 961 1.1 perseant else if (!(sup->su_flags & SEGUSE_DIRTY)) 962 1.1 perseant ++nclean; 963 1.1 perseant else if (sup->su_flags & SEGUSE_READY) 964 1.1 perseant ++nready; 965 1.1 perseant else if (sup->su_flags & SEGUSE_EMPTY) 966 1.1 perseant ++nempty; 967 1.1 perseant else if (sup->su_nbytes == 0) 968 1.1 perseant ++nzero; 969 1.1 perseant else 970 1.1 perseant prio = (*func)(fs, sn, sup); 971 1.1 perseant 972 1.1 perseant if (sup->su_flags & SEGUSE_ERROR) { 973 1.1 perseant if (prio > maxeprio) 974 1.1 perseant maxeprio = prio; 975 1.1 perseant prio = 0; 976 1.1 perseant ++nerror; 977 1.1 perseant } 978 1.1 perseant 979 1.1 perseant if (prio > maxprio) { 980 1.1 perseant maxprio = prio; 981 1.1 perseant maxsn = sn; 982 1.1 perseant segflags = sup->su_flags; 983 1.1 perseant } 984 1.1 perseant brelse(bp, 0); 985 1.1 perseant } 986 1.1 perseant DLOG((DLOG_CLEAN, "%s clean=%d/%d zero=%d empty=%d ready=%d maxsn=%d maxprio=%ld/%ld segflags=0x%lx\n", 987 1.1 perseant (maxprio > thresh ? "YES" : "NO "), 988 1.1 perseant nclean, (int)lfs_sb_getnseg(fs), nzero, nempty, nready, 989 1.1 perseant maxsn, maxprio, (unsigned long)thresh, 990 1.1 perseant (unsigned long)segflags)); 991 1.1 perseant 992 1.1 perseant /* 993 1.1 perseant * If we are trying to clean the segment we cleaned last, 994 1.1 perseant * cleaning did not work. Mark this segment SEGUSE_ERROR 995 1.1 perseant * and try again. 996 1.1 perseant */ 997 1.1 perseant if (maxprio > 0 && fs->lfs_lastcleaned == maxsn) { 998 1.1 perseant LFS_SEGENTRY(sup, fs, maxsn, bp); 999 1.1 perseant sup->su_flags |= SEGUSE_ERROR; 1000 1.1 perseant LFS_WRITESEGENTRY(sup, fs, sn, bp); 1001 1.1 perseant return 1; 1002 1.1 perseant } 1003 1.1 perseant 1004 1.1 perseant /* 1005 1.1 perseant * If there were nothing but error segments, clear error. 1006 1.1 perseant * We will wait to try again. 1007 1.1 perseant */ 1008 1.1 perseant if (maxprio == 0 && maxeprio > 0) { 1009 1.1 perseant DLOG((DLOG_CLEAN, "clear error on %d segments, try again\n", 1010 1.1 perseant nerror)); 1011 1.1 perseant lfs_seguse_clrflag_all(fs, SEGUSE_ERROR); 1012 1.1 perseant } 1013 1.1 perseant 1014 1.1 perseant /* Rewrite the highest-priority segment */ 1015 1.1 perseant if (maxprio > thresh) { 1016 1.1 perseant direct = offset = 0; 1017 1.1 perseant (void)lfs_rewrite_segments(fs, &maxsn, 1, 1018 1.1 perseant &direct, &offset, curlwp); 1019 1.1 perseant DLOG((DLOG_CLEAN, " direct=%d offset=%d\n", direct, offset)); 1020 1.1 perseant again += direct; 1021 1.1 perseant fs->lfs_clean_accum += offset; 1022 1.1 perseant 1023 1.1 perseant /* Don't clean this again immediately */ 1024 1.1 perseant fs->lfs_lastcleaned = maxsn; 1025 1.1 perseant } 1026 1.1 perseant 1027 1.1 perseant /* 1028 1.1 perseant * If we are in dire straits but we have segments already 1029 1.1 perseant * empty, force a double checkpoint to reclaim them. 1030 1.1 perseant */ 1031 1.1 perseant if (fs->lfs_flags & LFS_MUSTCLEAN) { 1032 1.1 perseant if (nready + nempty > 0) { 1033 1.1 perseant printf("force checkpoint with nready=%d nempty=%d nzero=%d\n", 1034 1.1 perseant nready, nempty, nzero); 1035 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, 1036 1.1 perseant SEGM_CKP | SEGM_FORCE_CKP | SEGM_SYNC); 1037 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, 1038 1.1 perseant SEGM_CKP | SEGM_FORCE_CKP | SEGM_SYNC); 1039 1.1 perseant ++again; 1040 1.1 perseant } 1041 1.1 perseant } else if (fs->lfs_clean_accum > target) { 1042 1.1 perseant DLOG((DLOG_CLEAN, "checkpoint to flush\n")); 1043 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, SEGM_CKP); 1044 1.1 perseant fs->lfs_clean_accum = 0; 1045 1.1 perseant } else if (lfs_dtosn(fs, lfs_sb_getoffset(fs)) != oldsn 1046 1.1 perseant || nempty + nready > LFS_MAX_ACTIVE) { /* XXX arbitrary */ 1047 1.1 perseant DLOG((DLOG_CLEAN, "write to promote empty segments\n")); 1048 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, SEGM_CKP); 1049 1.1 perseant fs->lfs_clean_accum = 0; 1050 1.1 perseant } 1051 1.1 perseant 1052 1.1 perseant return again; 1053 1.1 perseant } 1054 1.1 perseant 1055 1.1 perseant /* 1056 1.1 perseant * Rewrite a file in its entirety. 1057 1.1 perseant * 1058 1.1 perseant * Generally this would be done to coalesce a file that is scattered 1059 1.1 perseant * around the disk; but if the "scramble" flag is set, instead rewrite 1060 1.1 perseant * only the even-numbered blocks, which provides the opposite effect 1061 1.1 perseant * for testing purposes. 1062 1.1 perseant * 1063 1.1 perseant * It is the caller's responsibility to check the bounds of the inode 1064 1.1 perseant * numbers. 1065 1.1 perseant */ 1066 1.1 perseant int 1067 1.1 perseant lfs_rewrite_file(struct lfs *fs, ino_t *inoa, int len, bool scramble, 1068 1.1 perseant int *directp, int *offsetp) 1069 1.1 perseant { 1070 1.1 perseant daddr_t hiblk, lbn; 1071 1.1 perseant struct vnode *vp; 1072 1.1 perseant struct inode *ip; 1073 1.1 perseant struct buf *bp; 1074 1.1 perseant int i, error, flags; 1075 1.1 perseant 1076 1.1 perseant *directp = 0; 1077 1.1 perseant if ((error = lfs_cleanerlock(fs)) != 0) 1078 1.1 perseant return error; 1079 1.1 perseant flags = SEGM_PROT; 1080 1.1 perseant lfs_seglock(fs, flags); 1081 1.1 perseant for (i = 0; i < len; ++i) { 1082 1.1 perseant error = VFS_VGET(fs->lfs_ivnode->v_mount, inoa[i], LK_EXCLUSIVE, &vp); 1083 1.1 perseant if (error) 1084 1.1 perseant goto out; 1085 1.1 perseant 1086 1.1 perseant ip = VTOI(vp); 1087 1.1 perseant if ((vp->v_uflag & VU_DIROP) || (ip->i_flags & IN_ADIROP)) { 1088 1.1 perseant VOP_UNLOCK(vp); 1089 1.1 perseant vrele(vp); 1090 1.1 perseant error = EAGAIN; 1091 1.1 perseant goto out; 1092 1.1 perseant } 1093 1.1 perseant 1094 1.1 perseant /* Highest block in this inode */ 1095 1.1 perseant hiblk = lfs_lblkno(fs, ip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 1096 1.1 perseant 1097 1.1 perseant for (lbn = 0; lbn <= hiblk; ++lbn) { 1098 1.1 perseant if (scramble && (lbn & 0x01)) 1099 1.1 perseant continue; 1100 1.1 perseant 1101 1.1 perseant if (lfs_needsflush(fs)) { 1102 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 1103 1.1 perseant } 1104 1.1 perseant 1105 1.1 perseant error = bread(vp, lbn, lfs_blksize(fs, ip, lbn), 0, &bp); 1106 1.1 perseant if (error) 1107 1.1 perseant break; 1108 1.1 perseant 1109 1.1 perseant /* bp->b_cflags |= BC_INVAL; */ 1110 1.1 perseant lfs_bwrite_ext(bp, (flags & SEGM_CLEAN ? BW_CLEAN : 0)); 1111 1.1 perseant *directp += lfs_btofsb(fs, bp->b_bcount); 1112 1.1 perseant } 1113 1.1 perseant 1114 1.1 perseant /* Done with this vnode */ 1115 1.1 perseant VOP_UNLOCK(vp); 1116 1.1 perseant vrele(vp); 1117 1.1 perseant if (error) 1118 1.1 perseant break; 1119 1.1 perseant } 1120 1.1 perseant out: 1121 1.1 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 1122 1.1 perseant *offsetp += lfs_btofsb(fs, fs->lfs_sp->bytes_written); 1123 1.1 perseant lfs_segunlock(fs); 1124 1.1 perseant lfs_cleanerunlock(fs); 1125 1.1 perseant 1126 1.1 perseant return error; 1127 1.1 perseant } 1128 1.1 perseant 1129 1.1 perseant int 1130 1.1 perseant lfs_cleanctl(struct lfs *fs, struct lfs_autoclean_params *params) 1131 1.1 perseant { 1132 1.1 perseant long (*cleanfunc)(struct lfs *, int, SEGUSE *); 1133 1.1 perseant 1134 1.1 perseant fs->lfs_autoclean = *params; 1135 1.1 perseant 1136 1.1 perseant cleanfunc = NULL; 1137 1.1 perseant switch (fs->lfs_autoclean.mode) { 1138 1.1 perseant case LFS_CLEANMODE_NONE: 1139 1.1 perseant cleanfunc = NULL; 1140 1.1 perseant break; 1141 1.1 perseant 1142 1.1 perseant case LFS_CLEANMODE_GREEDY: 1143 1.1 perseant cleanfunc = segselect_greedy; 1144 1.1 perseant break; 1145 1.1 perseant 1146 1.1 perseant case LFS_CLEANMODE_CB: 1147 1.1 perseant cleanfunc = segselect_cb_time; 1148 1.1 perseant break; 1149 1.1 perseant 1150 1.1 perseant default: 1151 1.1 perseant return EINVAL; 1152 1.1 perseant } 1153 1.1 perseant 1154 1.1 perseant mutex_enter(&lfs_lock); 1155 1.1 perseant if (fs->lfs_clean_selector == NULL && cleanfunc != NULL) 1156 1.1 perseant if (++lfs_ncleaners == 1) { 1157 1.1 perseant printf("Starting cleaner thread\n"); 1158 1.1 perseant if (lfs_cleaner_daemon == NULL && 1159 1.1 perseant kthread_create(PRI_BIO, 0, NULL, 1160 1.1 perseant lfs_cleanerd, NULL, NULL, 1161 1.1 perseant "lfs_cleaner") != 0) 1162 1.1 perseant panic("fork lfs_cleaner"); 1163 1.1 perseant } 1164 1.1 perseant if (fs->lfs_clean_selector != NULL && cleanfunc == NULL) 1165 1.1 perseant if (--lfs_ncleaners == 0) { 1166 1.1 perseant printf("Stopping cleaner thread\n"); 1167 1.1 perseant kthread_join(lfs_cleaner_daemon); 1168 1.1 perseant } 1169 1.1 perseant fs->lfs_clean_selector = cleanfunc; 1170 1.1 perseant mutex_exit(&lfs_lock); 1171 1.1 perseant 1172 1.1 perseant return 0; 1173 1.1 perseant } 1174