Home | History | Annotate | Line # | Download | only in lfs
      1  1.1  perseant /*	$NetBSD: lfs_kclean.c,v 1.1 2025/11/06 15:54:27 perseant Exp $	*/
      2  1.1  perseant 
      3  1.1  perseant /*-
      4  1.1  perseant  * Copyright (c) 2025 The NetBSD Foundation, Inc.
      5  1.1  perseant  * All rights reserved.
      6  1.1  perseant  *
      7  1.1  perseant  * This code is derived from software contributed to The NetBSD Foundation
      8  1.1  perseant  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  1.1  perseant  *
     10  1.1  perseant  * Redistribution and use in source and binary forms, with or without
     11  1.1  perseant  * modification, are permitted provided that the following conditions
     12  1.1  perseant  * are met:
     13  1.1  perseant  * 1. Redistributions of source code must retain the above copyright
     14  1.1  perseant  *    notice, this list of conditions and the following disclaimer.
     15  1.1  perseant  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1  perseant  *    notice, this list of conditions and the following disclaimer in the
     17  1.1  perseant  *    documentation and/or other materials provided with the distribution.
     18  1.1  perseant  *
     19  1.1  perseant  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  1.1  perseant  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  1.1  perseant  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  1.1  perseant  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  1.1  perseant  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  1.1  perseant  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  1.1  perseant  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  1.1  perseant  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  1.1  perseant  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  1.1  perseant  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  1.1  perseant  * POSSIBILITY OF SUCH DAMAGE.
     30  1.1  perseant  */
     31  1.1  perseant 
     32  1.1  perseant #include <sys/cdefs.h>
     33  1.1  perseant __KERNEL_RCSID(0, "$NetBSD: lfs_kclean.c,v 1.1 2025/11/06 15:54:27 perseant Exp $");
     34  1.1  perseant 
     35  1.1  perseant #include <sys/param.h>
     36  1.1  perseant #include <sys/systm.h>
     37  1.1  perseant #include <sys/namei.h>
     38  1.1  perseant #include <sys/proc.h>
     39  1.1  perseant #include <sys/kernel.h>
     40  1.1  perseant #include <sys/vnode.h>
     41  1.1  perseant #include <sys/conf.h>
     42  1.1  perseant #include <sys/kauth.h>
     43  1.1  perseant #include <sys/buf.h>
     44  1.1  perseant #include <sys/kthread.h>
     45  1.1  perseant 
     46  1.1  perseant #include <ufs/lfs/ulfs_inode.h>
     47  1.1  perseant #include <ufs/lfs/ulfsmount.h>
     48  1.1  perseant #include <ufs/lfs/ulfs_extern.h>
     49  1.1  perseant 
     50  1.1  perseant #include <ufs/lfs/lfs.h>
     51  1.1  perseant #include <ufs/lfs/lfs_accessors.h>
     52  1.1  perseant #include <ufs/lfs/lfs_kernel.h>
     53  1.1  perseant #include <ufs/lfs/lfs_extern.h>
     54  1.1  perseant 
     55  1.1  perseant static int ino_func_setclean(struct lfs_inofuncarg *);
     56  1.1  perseant static int finfo_func_rewrite(struct lfs_finfofuncarg *);
     57  1.1  perseant static int finfo_func_setclean(struct lfs_finfofuncarg *);
     58  1.1  perseant static int rewrite_block(struct lfs *, struct vnode *, daddr_t, daddr_t,
     59  1.1  perseant 			 size_t, int *);
     60  1.1  perseant 
     61  1.1  perseant static int clean(struct lfs *);
     62  1.1  perseant static long segselect_cb_rosenblum(struct lfs *, int, SEGUSE *, long);
     63  1.1  perseant static long segselect_greedy(struct lfs *, int, SEGUSE *);
     64  1.1  perseant static long segselect_cb_time(struct lfs *, int, SEGUSE *);
     65  1.1  perseant #if 0
     66  1.1  perseant static long segselect_cb_serial(struct lfs *, int, SEGUSE *);
     67  1.1  perseant #endif
     68  1.1  perseant 
     69  1.1  perseant struct lwp * lfs_cleaner_daemon = NULL;
     70  1.1  perseant extern kcondvar_t	lfs_allclean_wakeup;
     71  1.1  perseant static int lfs_ncleaners = 0;
     72  1.1  perseant 
     73  1.1  perseant static int
     74  1.1  perseant ino_func_setclean(struct lfs_inofuncarg *lifa)
     75  1.1  perseant {
     76  1.1  perseant 	struct lfs *fs;
     77  1.1  perseant 	daddr_t offset;
     78  1.1  perseant 	struct vnode *devvp, *vp;
     79  1.1  perseant 	union lfs_dinode *dip;
     80  1.1  perseant 	struct buf *dbp, *ibp;
     81  1.1  perseant 	int error;
     82  1.1  perseant 	IFILE *ifp;
     83  1.1  perseant 	unsigned i, num;
     84  1.1  perseant 	daddr_t true_addr;
     85  1.1  perseant 	ino_t ino;
     86  1.1  perseant 
     87  1.1  perseant 	fs = lifa->fs;
     88  1.1  perseant 	offset = lifa->offset;
     89  1.1  perseant 	devvp = VTOI(fs->lfs_ivnode)->i_devvp;
     90  1.1  perseant 
     91  1.1  perseant 	/* Read inode block */
     92  1.1  perseant 	error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
     93  1.1  perseant 	    0, &dbp);
     94  1.1  perseant 	if (error) {
     95  1.1  perseant 		DLOG((DLOG_RF, "ino_func_setclean: bread returned %d\n",
     96  1.1  perseant 		      error));
     97  1.1  perseant 		return error;
     98  1.1  perseant 	}
     99  1.1  perseant 	memcpy(lifa->buf, dbp->b_data, dbp->b_bcount);
    100  1.1  perseant 	brelse(dbp, BC_AGE);
    101  1.1  perseant 
    102  1.1  perseant 	/* Check each inode against ifile entry */
    103  1.1  perseant 	num = LFS_INOPB(fs);
    104  1.1  perseant 	for (i = num; i-- > 0; ) {
    105  1.1  perseant 		dip = DINO_IN_BLOCK(fs, lifa->buf, i);
    106  1.1  perseant 		ino = lfs_dino_getinumber(fs, dip);
    107  1.1  perseant 		if (ino == LFS_IFILE_INUM) {
    108  1.1  perseant 			/* Check address against superblock */
    109  1.1  perseant 			true_addr = lfs_sb_getidaddr(fs);
    110  1.1  perseant 		} else {
    111  1.1  perseant 			/* Not ifile.  Check address against ifile. */
    112  1.1  perseant 			LFS_IENTRY(ifp, fs, ino, ibp);
    113  1.1  perseant 			true_addr = lfs_if_getdaddr(fs, ifp);
    114  1.1  perseant 			brelse(ibp, 0);
    115  1.1  perseant 		}
    116  1.1  perseant 		if (offset != true_addr)
    117  1.1  perseant 			continue;
    118  1.1  perseant 
    119  1.1  perseant 		LFS_ASSERT_MAXINO(fs, ino);
    120  1.1  perseant 
    121  1.1  perseant 		/* XXX We can use fastvget here! */
    122  1.1  perseant 
    123  1.1  perseant 		/*
    124  1.1  perseant 		 * An inode we need to relocate.
    125  1.1  perseant 		 * Get it if we can.
    126  1.1  perseant 		 */
    127  1.1  perseant 		if (ino == LFS_IFILE_INUM)
    128  1.1  perseant 			vp = fs->lfs_ivnode;
    129  1.1  perseant 		else
    130  1.1  perseant 			error = VFS_VGET(fs->lfs_ivnode->v_mount, ino,
    131  1.1  perseant 					 LK_EXCLUSIVE | LK_NOWAIT, &vp);
    132  1.1  perseant 		if (error)
    133  1.1  perseant 			continue;
    134  1.1  perseant 
    135  1.1  perseant 		KASSERT(VTOI(vp)->i_gen == lfs_dino_getgen(fs, dip));
    136  1.1  perseant 		lfs_setclean(fs, vp);
    137  1.1  perseant 		if (vp != fs->lfs_ivnode) {
    138  1.1  perseant 			VOP_UNLOCK(vp);
    139  1.1  perseant 			vrele(vp);
    140  1.1  perseant 		}
    141  1.1  perseant 	}
    142  1.1  perseant 
    143  1.1  perseant 	return error;
    144  1.1  perseant }
    145  1.1  perseant 
    146  1.1  perseant static int
    147  1.1  perseant ino_func_rewrite(struct lfs_inofuncarg *lifa)
    148  1.1  perseant {
    149  1.1  perseant 	struct lfs *fs;
    150  1.1  perseant 	daddr_t offset;
    151  1.1  perseant 	struct vnode *devvp, *vp;
    152  1.1  perseant 	union lfs_dinode *dip;
    153  1.1  perseant 	struct buf *dbp, *ibp;
    154  1.1  perseant 	int error;
    155  1.1  perseant 	IFILE *ifp;
    156  1.1  perseant 	unsigned i, num;
    157  1.1  perseant 	daddr_t true_addr;
    158  1.1  perseant 	ino_t ino;
    159  1.1  perseant 
    160  1.1  perseant 	fs = lifa->fs;
    161  1.1  perseant 	offset = lifa->offset;
    162  1.1  perseant 	devvp = VTOI(fs->lfs_ivnode)->i_devvp;
    163  1.1  perseant 
    164  1.1  perseant 	/* Read inode block */
    165  1.1  perseant 	error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
    166  1.1  perseant 	    0, &dbp);
    167  1.1  perseant 	if (error) {
    168  1.1  perseant 		DLOG((DLOG_RF, "ino_func_rewrite: bread returned %d\n",
    169  1.1  perseant 		      error));
    170  1.1  perseant 		return error;
    171  1.1  perseant 	}
    172  1.1  perseant 	memcpy(lifa->buf, dbp->b_data, dbp->b_bcount);
    173  1.1  perseant 	brelse(dbp, BC_AGE);
    174  1.1  perseant 
    175  1.1  perseant 	/* Check each inode against ifile entry */
    176  1.1  perseant 	num = LFS_INOPB(fs);
    177  1.1  perseant 	for (i = num; i-- > 0; ) {
    178  1.1  perseant 		dip = DINO_IN_BLOCK(fs, lifa->buf, i);
    179  1.1  perseant 		ino = lfs_dino_getinumber(fs, dip);
    180  1.1  perseant 		if (ino == LFS_IFILE_INUM) {
    181  1.1  perseant 			/* Check address against superblock */
    182  1.1  perseant 			true_addr = lfs_sb_getidaddr(fs);
    183  1.1  perseant 		} else {
    184  1.1  perseant 			/* Not ifile.  Check address against ifile. */
    185  1.1  perseant 			LFS_IENTRY(ifp, fs, ino, ibp);
    186  1.1  perseant 			true_addr = lfs_if_getdaddr(fs, ifp);
    187  1.1  perseant 			brelse(ibp, 0);
    188  1.1  perseant 		}
    189  1.1  perseant 		if (offset != true_addr)
    190  1.1  perseant 			continue;
    191  1.1  perseant 
    192  1.1  perseant 		if (ino == LFS_IFILE_INUM)
    193  1.1  perseant 			continue;
    194  1.1  perseant 
    195  1.1  perseant 		LFS_ASSERT_MAXINO(fs, ino);
    196  1.1  perseant 
    197  1.1  perseant 		/* XXX We can use fastvget here! */
    198  1.1  perseant 
    199  1.1  perseant 		/*
    200  1.1  perseant 		 * An inode we need to relocate.
    201  1.1  perseant 		 * Get it if we can.
    202  1.1  perseant 		 */
    203  1.1  perseant 		error = VFS_VGET(fs->lfs_ivnode->v_mount, ino,
    204  1.1  perseant 				 LK_EXCLUSIVE | LK_NOWAIT, &vp);
    205  1.1  perseant 		if (error)
    206  1.1  perseant 			continue;
    207  1.1  perseant 
    208  1.1  perseant 		KASSERT(VTOI(vp)->i_gen == lfs_dino_getgen(fs, dip));
    209  1.1  perseant 
    210  1.1  perseant 		if (!(VTOI(vp)->i_state & IN_CLEANING)) {
    211  1.1  perseant 			lfs_setclean(fs, vp);
    212  1.1  perseant 			lfs_writeinode(fs, fs->lfs_sp, VTOI(vp));
    213  1.1  perseant 		}
    214  1.1  perseant 
    215  1.1  perseant 		VOP_UNLOCK(vp);
    216  1.1  perseant 		vrele(vp);
    217  1.1  perseant 
    218  1.1  perseant 	}
    219  1.1  perseant 
    220  1.1  perseant 	return error;
    221  1.1  perseant }
    222  1.1  perseant 
    223  1.1  perseant static int
    224  1.1  perseant rewrite_block(struct lfs *fs, struct vnode *vp, daddr_t lbn, daddr_t offset, size_t size, int *have_finfop)
    225  1.1  perseant {
    226  1.1  perseant 	daddr_t daddr;
    227  1.1  perseant 	int error;
    228  1.1  perseant 	struct buf *bp;
    229  1.1  perseant 	struct inode *ip;
    230  1.1  perseant 
    231  1.1  perseant 	KASSERT(have_finfop != NULL);
    232  1.1  perseant 
    233  1.1  perseant 	/* Look up current location of this block. */
    234  1.1  perseant 	error = VOP_BMAP(vp, lbn, NULL, &daddr, NULL);
    235  1.1  perseant 	if (error)
    236  1.1  perseant 		return error;
    237  1.1  perseant 
    238  1.1  perseant 	/* Skip any block that is not here. */
    239  1.1  perseant 	if (offset != 0 && LFS_DBTOFSB(fs, daddr) != offset)
    240  1.1  perseant 		return ESTALE;
    241  1.1  perseant 
    242  1.1  perseant 	/*
    243  1.1  perseant 	 * It is (was recently) here.  Read the block.
    244  1.1  perseant 	 */
    245  1.1  perseant 	//size = lfs_blksize(fs, VTOI(vp), lbn);
    246  1.1  perseant 	error = bread(vp, lbn, size, 0, &bp);
    247  1.1  perseant 	if (error)
    248  1.1  perseant 		return error;
    249  1.1  perseant 
    250  1.1  perseant 	if (vp == fs->lfs_ivnode) {
    251  1.1  perseant 		VOP_BWRITE(vp, bp);
    252  1.1  perseant 	} else {
    253  1.1  perseant 		/* Get ready to write. */
    254  1.1  perseant 		if (!*have_finfop) {
    255  1.1  perseant 			ip = VTOI(vp);
    256  1.1  perseant 			lfs_acquire_finfo(fs, ip->i_number, ip->i_gen);
    257  1.1  perseant 			fs->lfs_sp->vp = vp;
    258  1.1  perseant 			*have_finfop = 1;
    259  1.1  perseant 		}
    260  1.1  perseant 
    261  1.1  perseant 		KASSERT(bp->b_vp == vp);
    262  1.1  perseant 		/* bp->b_cflags |= BC_INVAL; */ /* brelse will kill the buffer */
    263  1.1  perseant 		lfs_bwrite_ext(bp, BW_CLEAN);
    264  1.1  perseant 		KASSERT(bp->b_vp == vp);
    265  1.1  perseant 		mutex_enter(&bufcache_lock);
    266  1.1  perseant 		while (lfs_gatherblock(fs->lfs_sp, bp, &bufcache_lock)) {
    267  1.1  perseant 			KASSERT(bp->b_vp != NULL);
    268  1.1  perseant 		}
    269  1.1  perseant 		mutex_exit(&bufcache_lock);
    270  1.1  perseant 
    271  1.1  perseant 		KASSERT(bp->b_flags & B_GATHERED);
    272  1.1  perseant 		KASSERT(fs->lfs_sp->cbpp[-1] == bp);
    273  1.1  perseant 	}
    274  1.1  perseant 	return 0;
    275  1.1  perseant }
    276  1.1  perseant 
    277  1.1  perseant static int
    278  1.1  perseant finfo_func_rewrite(struct lfs_finfofuncarg *lffa)
    279  1.1  perseant {
    280  1.1  perseant 	struct lfs *fs;
    281  1.1  perseant 	FINFO *fip;
    282  1.1  perseant 	daddr_t *offsetp;
    283  1.1  perseant 	int j, have_finfo, error;
    284  1.1  perseant 	size_t size, bytes;
    285  1.1  perseant 	ino_t ino;
    286  1.1  perseant 	uint32_t gen;
    287  1.1  perseant 	struct vnode *vp;
    288  1.1  perseant 	daddr_t lbn;
    289  1.1  perseant 	int *fragsp;
    290  1.1  perseant 
    291  1.1  perseant 	fs = lffa->fs;
    292  1.1  perseant 	fip = lffa->finfop;
    293  1.1  perseant 	offsetp = lffa->offsetp;
    294  1.1  perseant 	fragsp = (int *)lffa->arg;
    295  1.1  perseant 
    296  1.1  perseant 	/* Get the inode and check its version. */
    297  1.1  perseant 	ino = lfs_fi_getino(fs, fip);
    298  1.1  perseant 	gen = lfs_fi_getversion(fs, fip);
    299  1.1  perseant 	error = 0;
    300  1.1  perseant 	if (ino == LFS_IFILE_INUM)
    301  1.1  perseant 		vp = fs->lfs_ivnode;
    302  1.1  perseant 	else {
    303  1.1  perseant 		LFS_ASSERT_MAXINO(fs, ino);
    304  1.1  perseant 		error = VFS_VGET(fs->lfs_ivnode->v_mount, ino,
    305  1.1  perseant 				 LK_EXCLUSIVE|LK_NOWAIT, &vp);
    306  1.1  perseant 	}
    307  1.1  perseant 
    308  1.1  perseant 	/*
    309  1.1  perseant 	 * If we can't, or if version is wrong, or it has dirop blocks on it,
    310  1.1  perseant 	 * we can't relocate its blocks; but we still have to count
    311  1.1  perseant 	 * blocks through the partial segment to return the right offset.
    312  1.1  perseant 	 * XXX actually we can move DIROP vnodes' *old* data, as long
    313  1.1  perseant 	 * XXX as we are sure that we are moving *only* the old data---?
    314  1.1  perseant 	 */
    315  1.1  perseant 	if (error || VTOI(vp)->i_gen != gen || (vp->v_uflag & VU_DIROP)) {
    316  1.1  perseant 		if (error == 0)
    317  1.1  perseant 			error = ESTALE;
    318  1.1  perseant 
    319  1.1  perseant 		if (vp != NULL && vp != fs->lfs_ivnode) {
    320  1.1  perseant 			VOP_UNLOCK(vp);
    321  1.1  perseant 			vrele(vp);
    322  1.1  perseant 		}
    323  1.1  perseant 		vp = NULL;
    324  1.1  perseant 		bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs))
    325  1.1  perseant 			+ lfs_fi_getlastlength(fs, fip);
    326  1.1  perseant 		*offsetp += lfs_btofsb(fs, bytes);
    327  1.1  perseant 
    328  1.1  perseant 		return error;
    329  1.1  perseant 	}
    330  1.1  perseant 
    331  1.1  perseant 	/*
    332  1.1  perseant 	 * We have the vnode and its version is correct.
    333  1.1  perseant 	 * Take a cleaning reference; and loop through the blocks
    334  1.1  perseant 	 * and rewrite them.
    335  1.1  perseant 	 */
    336  1.1  perseant 	lfs_setclean(fs, vp);
    337  1.1  perseant 	size = lfs_sb_getbsize(fs);
    338  1.1  perseant 	have_finfo = 0;
    339  1.1  perseant 	for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) {
    340  1.1  perseant 		if (j == lfs_fi_getnblocks(fs, fip) - 1)
    341  1.1  perseant 			size = lfs_fi_getlastlength(fs, fip);
    342  1.1  perseant 		/*
    343  1.1  perseant 		 * An error of ESTALE indicates that there was nothing
    344  1.1  perseant 		 * to rewrite; this is not a problem.  Any other error
    345  1.1  perseant 		 * causes us to skip the rest of this FINFO.
    346  1.1  perseant 		 */
    347  1.1  perseant 		if (vp != NULL && error == 0) {
    348  1.1  perseant 			lbn = lfs_fi_getblock(fs, fip, j);
    349  1.1  perseant 			error = rewrite_block(fs, vp, lbn, *offsetp,
    350  1.1  perseant 					      size, &have_finfo);
    351  1.1  perseant 			if (error == ESTALE)
    352  1.1  perseant 				error = 0;
    353  1.1  perseant 			if (fragsp != NULL && error == 0)
    354  1.1  perseant 				*fragsp += lfs_btofsb(fs, size);
    355  1.1  perseant 		}
    356  1.1  perseant 		*offsetp += lfs_btofsb(fs, size);
    357  1.1  perseant 	}
    358  1.1  perseant 
    359  1.1  perseant 	/*
    360  1.1  perseant 	 * If we acquired finfo, release it and write the blocks.
    361  1.1  perseant 	 */
    362  1.1  perseant 	if (have_finfo) {
    363  1.1  perseant 		lfs_updatemeta(fs->lfs_sp);
    364  1.1  perseant 		fs->lfs_sp->vp = NULL;
    365  1.1  perseant 		lfs_release_finfo(fs);
    366  1.1  perseant 		lfs_writeinode(fs, fs->lfs_sp, VTOI(vp));
    367  1.1  perseant 	}
    368  1.1  perseant 
    369  1.1  perseant 	/* Release vnode */
    370  1.1  perseant 	if (vp != fs->lfs_ivnode) {
    371  1.1  perseant 		VOP_UNLOCK(vp);
    372  1.1  perseant 		vrele(vp);
    373  1.1  perseant 	}
    374  1.1  perseant 
    375  1.1  perseant 	return error;
    376  1.1  perseant }
    377  1.1  perseant 
    378  1.1  perseant static int
    379  1.1  perseant finfo_func_setclean(struct lfs_finfofuncarg *lffa)
    380  1.1  perseant {
    381  1.1  perseant 	struct lfs *fs;
    382  1.1  perseant 	FINFO *fip;
    383  1.1  perseant 	daddr_t *offsetp;
    384  1.1  perseant 	int error;
    385  1.1  perseant 	size_t bytes;
    386  1.1  perseant 	ino_t ino;
    387  1.1  perseant 	uint32_t gen;
    388  1.1  perseant 	struct vnode *vp;
    389  1.1  perseant 
    390  1.1  perseant 	fs = lffa->fs;
    391  1.1  perseant 	fip = lffa->finfop;
    392  1.1  perseant 	offsetp = lffa->offsetp;
    393  1.1  perseant 
    394  1.1  perseant 	/* Get the inode and check its version. */
    395  1.1  perseant 	ino = lfs_fi_getino(fs, fip);
    396  1.1  perseant 	gen = lfs_fi_getversion(fs, fip);
    397  1.1  perseant 	error = 0;
    398  1.1  perseant 	if (ino == LFS_IFILE_INUM)
    399  1.1  perseant 		vp = fs->lfs_ivnode;
    400  1.1  perseant 	else {
    401  1.1  perseant 		LFS_ASSERT_MAXINO(fs, ino);
    402  1.1  perseant 		error = VFS_VGET(fs->lfs_ivnode->v_mount, ino,
    403  1.1  perseant 				 LK_EXCLUSIVE|LK_NOWAIT, &vp);
    404  1.1  perseant 	}
    405  1.1  perseant 
    406  1.1  perseant 	/* If we have it and its version is right, take a cleaning reference */
    407  1.1  perseant 	if (error == 0 && VTOI(vp)->i_gen == gen)
    408  1.1  perseant 		lfs_setclean(fs, vp);
    409  1.1  perseant 
    410  1.1  perseant 	if (vp == fs->lfs_ivnode)
    411  1.1  perseant 		vp = NULL;
    412  1.1  perseant 	else if (vp != NULL) {
    413  1.1  perseant 		VOP_UNLOCK(vp);
    414  1.1  perseant 		vrele(vp);
    415  1.1  perseant 		vp = NULL;
    416  1.1  perseant 	}
    417  1.1  perseant 
    418  1.1  perseant 	/* Skip to the next block */
    419  1.1  perseant 	bytes = ((lfs_fi_getnblocks(fs, fip) - 1) << lfs_sb_getbshift(fs))
    420  1.1  perseant 		+ lfs_fi_getlastlength(fs, fip);
    421  1.1  perseant 	*offsetp += lfs_btofsb(fs, bytes);
    422  1.1  perseant 
    423  1.1  perseant 	return error;
    424  1.1  perseant }
    425  1.1  perseant 
    426  1.1  perseant /*
    427  1.1  perseant  * Use the partial-segment parser to rewrite (clean) a segment.
    428  1.1  perseant  */
    429  1.1  perseant int
    430  1.1  perseant lfs_rewrite_segment(struct lfs *fs, int sn, int *fragsp, kauth_cred_t cred, struct lwp *l)
    431  1.1  perseant {
    432  1.1  perseant 	daddr_t ooffset, offset, endpseg;
    433  1.1  perseant 
    434  1.1  perseant 	ASSERT_SEGLOCK(fs);
    435  1.1  perseant 
    436  1.1  perseant 	offset = lfs_sntod(fs, sn);
    437  1.1  perseant 	lfs_skip_superblock(fs, &offset);
    438  1.1  perseant 	endpseg = lfs_sntod(fs, sn + 1);
    439  1.1  perseant 
    440  1.1  perseant 	while (offset > 0 && offset != endpseg) {
    441  1.1  perseant 		/* First check summary validity (XXX unnecessary?) */
    442  1.1  perseant 		ooffset = offset;
    443  1.1  perseant 		lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
    444  1.1  perseant 			     NULL, NULL, CKSEG_CKSUM, NULL);
    445  1.1  perseant 		if (offset == ooffset)
    446  1.1  perseant 			break;
    447  1.1  perseant 
    448  1.1  perseant 		/*
    449  1.1  perseant 		 * Valid, proceed.
    450  1.1  perseant 		 *
    451  1.1  perseant 		 * First write the file blocks, marking their
    452  1.1  perseant 		 * inodes IN_CLEANING.
    453  1.1  perseant 		 */
    454  1.1  perseant 		offset = ooffset;
    455  1.1  perseant 		lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
    456  1.1  perseant 			       NULL, finfo_func_rewrite,
    457  1.1  perseant 			       CKSEG_NONE, fragsp);
    458  1.1  perseant 
    459  1.1  perseant 		/*
    460  1.1  perseant 		 * Now go back and pick up any inodes that
    461  1.1  perseant 		 * were not already marked IN_CLEANING, and
    462  1.1  perseant 		 * write them as well.
    463  1.1  perseant 		 */
    464  1.1  perseant 		offset = ooffset;
    465  1.1  perseant 		lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
    466  1.1  perseant 			       ino_func_rewrite, NULL,
    467  1.1  perseant 			       CKSEG_NONE, fragsp);
    468  1.1  perseant 	}
    469  1.1  perseant 	return 0;
    470  1.1  perseant }
    471  1.1  perseant 
    472  1.1  perseant /*
    473  1.1  perseant  * Rewrite the contents of one or more segments, in preparation for
    474  1.1  perseant  * marking them clean.
    475  1.1  perseant  */
    476  1.1  perseant int
    477  1.1  perseant lfs_rewrite_segments(struct lfs *fs, int *snn, int len, int *directp, int *offsetp, struct lwp *l)
    478  1.1  perseant {
    479  1.1  perseant 	kauth_cred_t cred;
    480  1.1  perseant 	int i, error;
    481  1.1  perseant 	struct buf *bp;
    482  1.1  perseant 	SEGUSE *sup;
    483  1.1  perseant 	daddr_t offset, endpseg;
    484  1.1  perseant 
    485  1.1  perseant 	ASSERT_NO_SEGLOCK(fs);
    486  1.1  perseant 
    487  1.1  perseant 	cred = l ? l->l_cred : NOCRED;
    488  1.1  perseant 
    489  1.1  perseant 	/* Prevent new dirops and acquire the cleaner lock. */
    490  1.1  perseant 	lfs_writer_enter(fs, "rewritesegs");
    491  1.1  perseant 	if ((error = lfs_cleanerlock(fs)) != 0) {
    492  1.1  perseant 		lfs_writer_leave(fs);
    493  1.1  perseant 		return error;
    494  1.1  perseant 	}
    495  1.1  perseant 
    496  1.1  perseant 	/*
    497  1.1  perseant 	 * Pre-reference vnodes now that we have cleaner lock
    498  1.1  perseant 	 * but before we take the segment lock.  We don't want to
    499  1.1  perseant 	 * mix cleaning blocks with flushed vnodes.
    500  1.1  perseant 	 */
    501  1.1  perseant 	for (i = 0; i < len; i++) {
    502  1.1  perseant 		error = 0;
    503  1.1  perseant 		/* Refuse to clean segments that are ACTIVE */
    504  1.1  perseant 		LFS_SEGENTRY(sup, fs, snn[i], bp);
    505  1.1  perseant 		if (sup->su_flags & SEGUSE_ACTIVE
    506  1.1  perseant 		    || !(sup->su_flags & SEGUSE_DIRTY))
    507  1.1  perseant 			error = EINVAL;
    508  1.1  perseant 
    509  1.1  perseant 		brelse(bp, 0);
    510  1.1  perseant 		if (error)
    511  1.1  perseant 			break;
    512  1.1  perseant 
    513  1.1  perseant 		offset = lfs_sntod(fs, snn[i]);
    514  1.1  perseant 		lfs_skip_superblock(fs, &offset);
    515  1.1  perseant 		endpseg = lfs_sntod(fs, snn[i] + 1);
    516  1.1  perseant 
    517  1.1  perseant 		while (offset > 0 && offset != endpseg) {
    518  1.1  perseant 			lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
    519  1.1  perseant 				       ino_func_setclean, finfo_func_setclean,
    520  1.1  perseant 				       CKSEG_NONE, NULL);
    521  1.1  perseant 		}
    522  1.1  perseant 	}
    523  1.1  perseant 
    524  1.1  perseant 	/*
    525  1.1  perseant 	 * Actually rewrite the contents of the segment.
    526  1.1  perseant 	 */
    527  1.1  perseant 	lfs_seglock(fs, SEGM_CLEAN);
    528  1.1  perseant 
    529  1.1  perseant 	for (i = 0; i < len; i++) {
    530  1.1  perseant 		error = 0;
    531  1.1  perseant 		/* Refuse to clean segments that are ACTIVE */
    532  1.1  perseant 		LFS_SEGENTRY(sup, fs, snn[i], bp);
    533  1.1  perseant 		if (sup->su_flags & SEGUSE_ACTIVE
    534  1.1  perseant 		    || !(sup->su_flags & SEGUSE_DIRTY))
    535  1.1  perseant 			error = EINVAL;
    536  1.1  perseant 
    537  1.1  perseant 		brelse(bp, 0);
    538  1.1  perseant 		if (error)
    539  1.1  perseant 			break;
    540  1.1  perseant 
    541  1.1  perseant 		error = lfs_rewrite_segment(fs, snn[i], directp, cred, l);
    542  1.1  perseant 		if (error) {
    543  1.1  perseant 			printf("  rewrite_segment returned %d\n", error);
    544  1.1  perseant 			break;
    545  1.1  perseant 		}
    546  1.1  perseant 	}
    547  1.1  perseant 	while (lfs_writeseg(fs, fs->lfs_sp))
    548  1.1  perseant 		;
    549  1.1  perseant 
    550  1.1  perseant 	*offsetp = lfs_btofsb(fs, fs->lfs_sp->bytes_written);
    551  1.1  perseant 	lfs_segunlock(fs);
    552  1.1  perseant 	lfs_cleanerunlock(fs);
    553  1.1  perseant 	lfs_writer_leave(fs);
    554  1.1  perseant 
    555  1.1  perseant 	return error;
    556  1.1  perseant }
    557  1.1  perseant 
    558  1.1  perseant #if 0
    559  1.1  perseant static bool
    560  1.1  perseant lfs_isseq(const struct lfs *fs, long int lbn1, long int lbn2)
    561  1.1  perseant {
    562  1.1  perseant 	return lbn2 == lbn1 + lfs_sb_getfrag(__UNCONST(fs));
    563  1.1  perseant }
    564  1.1  perseant 
    565  1.1  perseant /*
    566  1.1  perseant  * Rewrite the contents of a file in order to coalesce it.
    567  1.1  perseant  * We don't bother rewriting indirect blocks because they will have to
    568  1.1  perseant  * be rewritten anyway when we rewrite the direct blocks.
    569  1.1  perseant  */
    570  1.1  perseant int
    571  1.1  perseant lfs_rewrite_file(struct lfs *fs, ino_t ino, struct lwp *l)
    572  1.1  perseant {
    573  1.1  perseant 	daddr_t lbn, hiblk, daddr;
    574  1.1  perseant 	int i, error, num, run;
    575  1.1  perseant 	struct vnode *vp;
    576  1.1  perseant 	struct indir indirs[ULFS_NIADDR+2];
    577  1.1  perseant 	size_t size;
    578  1.1  perseant 
    579  1.1  perseant 	ASSERT_SEGLOCK(fs);
    580  1.1  perseant 
    581  1.1  perseant 	LFS_ASSERT_MAXINO(fs, ino);
    582  1.1  perseant 
    583  1.1  perseant 	error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp);
    584  1.1  perseant 	if (error)
    585  1.1  perseant 		return error;
    586  1.1  perseant 
    587  1.1  perseant 	lfs_acquire_finfo(fs, ino, VTOI(vp)->i_gen);
    588  1.1  perseant 	for (lbn = 0, hiblk = VTOI(vp)->i_lfs_hiblk; lbn < hiblk; ++lbn) {
    589  1.1  perseant 		error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, &run,
    590  1.1  perseant 				       lfs_isseq);
    591  1.1  perseant 		if (daddr == UNASSIGNED)
    592  1.1  perseant 			continue;
    593  1.1  perseant 		for (i = 0; i <= run; i++) {
    594  1.1  perseant 			size = lfs_blksize(fs, VTOI(vp), lbn);
    595  1.1  perseant 			error = rewrite_block(fs, vp, lbn++, 0x0, size, NULL);
    596  1.1  perseant 			if (error)
    597  1.1  perseant 				break;
    598  1.1  perseant 		}
    599  1.1  perseant 	}
    600  1.1  perseant 	lfs_release_finfo(fs);
    601  1.1  perseant 	while (lfs_writeseg(fs, fs->lfs_sp))
    602  1.1  perseant 		;
    603  1.1  perseant 	lfs_segunlock(fs);
    604  1.1  perseant 
    605  1.1  perseant 	return error;
    606  1.1  perseant }
    607  1.1  perseant #endif /* 0 */
    608  1.1  perseant 
    609  1.1  perseant 
    610  1.1  perseant static int
    611  1.1  perseant ino_func_checkempty(struct lfs_inofuncarg *lifa)
    612  1.1  perseant {
    613  1.1  perseant 	struct lfs *fs;
    614  1.1  perseant 	daddr_t offset;
    615  1.1  perseant 	struct vnode *devvp;
    616  1.1  perseant 	union lfs_dinode *dip;
    617  1.1  perseant 	struct buf *dbp, *ibp;
    618  1.1  perseant 	int error;
    619  1.1  perseant 	IFILE *ifp;
    620  1.1  perseant 	unsigned i, num;
    621  1.1  perseant 	daddr_t true_addr;
    622  1.1  perseant 	ino_t ino;
    623  1.1  perseant 
    624  1.1  perseant 	fs = lifa->fs;
    625  1.1  perseant 	offset = lifa->offset;
    626  1.1  perseant 	devvp = VTOI(fs->lfs_ivnode)->i_devvp;
    627  1.1  perseant 
    628  1.1  perseant 	/* Read inode block */
    629  1.1  perseant 	error = bread(devvp, LFS_FSBTODB(fs, offset), lfs_sb_getibsize(fs),
    630  1.1  perseant 	    0, &dbp);
    631  1.1  perseant 	if (error) {
    632  1.1  perseant 		DLOG((DLOG_RF, "ino_func_checkempty: bread returned %d\n",
    633  1.1  perseant 		      error));
    634  1.1  perseant 		return error;
    635  1.1  perseant 	}
    636  1.1  perseant 
    637  1.1  perseant 	/* Check each inode against ifile entry */
    638  1.1  perseant 	num = LFS_INOPB(fs);
    639  1.1  perseant 	for (i = num; i-- > 0; ) {
    640  1.1  perseant 		dip = DINO_IN_BLOCK(fs, dbp->b_data, i);
    641  1.1  perseant 		ino = lfs_dino_getinumber(fs, dip);
    642  1.1  perseant 		if (ino == LFS_IFILE_INUM) {
    643  1.1  perseant 			/* Check address against superblock */
    644  1.1  perseant 			true_addr = lfs_sb_getidaddr(fs);
    645  1.1  perseant 		} else {
    646  1.1  perseant 			/* Not ifile.  Check address against ifile. */
    647  1.1  perseant 			LFS_IENTRY(ifp, fs, ino, ibp);
    648  1.1  perseant 			true_addr = lfs_if_getdaddr(fs, ifp);
    649  1.1  perseant 			brelse(ibp, 0);
    650  1.1  perseant 		}
    651  1.1  perseant 		if (offset == true_addr) {
    652  1.1  perseant 			error = EEXIST;
    653  1.1  perseant 			break;
    654  1.1  perseant 		}
    655  1.1  perseant 	}
    656  1.1  perseant 	brelse(dbp, BC_AGE);
    657  1.1  perseant 
    658  1.1  perseant 	return error;
    659  1.1  perseant }
    660  1.1  perseant 
    661  1.1  perseant static int
    662  1.1  perseant finfo_func_checkempty(struct lfs_finfofuncarg *lffa)
    663  1.1  perseant {
    664  1.1  perseant 	struct lfs *fs;
    665  1.1  perseant 	FINFO *fip;
    666  1.1  perseant 	daddr_t *offsetp;
    667  1.1  perseant 	int j, error;
    668  1.1  perseant 	size_t size, bytes;
    669  1.1  perseant 	ino_t ino;
    670  1.1  perseant 	uint32_t gen;
    671  1.1  perseant 	struct vnode *vp;
    672  1.1  perseant 	daddr_t lbn, daddr;
    673  1.1  perseant 
    674  1.1  perseant 	fs = lffa->fs;
    675  1.1  perseant 	fip = lffa->finfop;
    676  1.1  perseant 	offsetp = lffa->offsetp;
    677  1.1  perseant 
    678  1.1  perseant 	/* Get the inode and check its version. */
    679  1.1  perseant 	ino = lfs_fi_getino(fs, fip);
    680  1.1  perseant 	gen = lfs_fi_getversion(fs, fip);
    681  1.1  perseant 	error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE, &vp);
    682  1.1  perseant 
    683  1.1  perseant 	/*
    684  1.1  perseant 	 * If we can't, or if version is wrong, this FINFO does not refer
    685  1.1  perseant 	 * to a live file.  Skip over it and continue.
    686  1.1  perseant 	 */
    687  1.1  perseant 	if (error || VTOI(vp)->i_gen != gen) {
    688  1.1  perseant 		if (error == 0)
    689  1.1  perseant 			error = ESTALE;
    690  1.1  perseant 
    691  1.1  perseant 		if (vp != NULL) {
    692  1.1  perseant 			VOP_UNLOCK(vp);
    693  1.1  perseant 			vrele(vp);
    694  1.1  perseant 			vp = NULL;
    695  1.1  perseant 		}
    696  1.1  perseant 		bytes = ((lfs_fi_getnblocks(fs, fip) - 1)
    697  1.1  perseant 			 << lfs_sb_getbshift(fs))
    698  1.1  perseant 			+ lfs_fi_getlastlength(fs, fip);
    699  1.1  perseant 		*offsetp += lfs_btofsb(fs, bytes);
    700  1.1  perseant 
    701  1.1  perseant 		return error;
    702  1.1  perseant 	}
    703  1.1  perseant 
    704  1.1  perseant 	/*
    705  1.1  perseant 	 * We have the vnode and its version is correct.
    706  1.1  perseant 	 * Loop through the blocks and check their currency.
    707  1.1  perseant 	 */
    708  1.1  perseant 	size = lfs_sb_getbsize(fs);
    709  1.1  perseant 	for (j = 0; j < lfs_fi_getnblocks(fs, fip); ++j) {
    710  1.1  perseant 		if (j == lfs_fi_getnblocks(fs, fip) - 1)
    711  1.1  perseant 			size = lfs_fi_getlastlength(fs, fip);
    712  1.1  perseant 		if (vp != NULL) {
    713  1.1  perseant 			lbn = lfs_fi_getblock(fs, fip, j);
    714  1.1  perseant 
    715  1.1  perseant 			/* Look up current location of this block. */
    716  1.1  perseant 			error = VOP_BMAP(vp, lbn, NULL, &daddr, NULL);
    717  1.1  perseant 			if (error)
    718  1.1  perseant 				break;
    719  1.1  perseant 
    720  1.1  perseant 			/* If it is here, the segment is not empty. */
    721  1.1  perseant 			if (LFS_DBTOFSB(fs, daddr) == *offsetp) {
    722  1.1  perseant 				error = EEXIST;
    723  1.1  perseant 				break;
    724  1.1  perseant 			}
    725  1.1  perseant 		}
    726  1.1  perseant 		*offsetp += lfs_btofsb(fs, size);
    727  1.1  perseant 	}
    728  1.1  perseant 
    729  1.1  perseant 	/* Release vnode */
    730  1.1  perseant 	VOP_UNLOCK(vp);
    731  1.1  perseant 	vrele(vp);
    732  1.1  perseant 
    733  1.1  perseant 	return error;
    734  1.1  perseant }
    735  1.1  perseant 
    736  1.1  perseant int
    737  1.1  perseant lfs_checkempty(struct lfs *fs, int sn, kauth_cred_t cred, struct lwp *l)
    738  1.1  perseant {
    739  1.1  perseant 	daddr_t offset, endpseg;
    740  1.1  perseant 	int error;
    741  1.1  perseant 
    742  1.1  perseant 	ASSERT_SEGLOCK(fs);
    743  1.1  perseant 
    744  1.1  perseant 	offset = lfs_sntod(fs, sn);
    745  1.1  perseant 	lfs_skip_superblock(fs, &offset);
    746  1.1  perseant 	endpseg = lfs_sntod(fs, sn + 1);
    747  1.1  perseant 
    748  1.1  perseant 	while (offset > 0 && offset < endpseg) {
    749  1.1  perseant 		error = lfs_parse_pseg(fs, &offset, 0, cred, NULL, l,
    750  1.1  perseant 				     ino_func_checkempty,
    751  1.1  perseant 				     finfo_func_checkempty,
    752  1.1  perseant 				     CKSEG_NONE, NULL);
    753  1.1  perseant 		if (error)
    754  1.1  perseant 			return error;
    755  1.1  perseant 	}
    756  1.1  perseant 	return 0;
    757  1.1  perseant }
    758  1.1  perseant 
    759  1.1  perseant static long
    760  1.1  perseant segselect_greedy(struct lfs *fs, int sn, SEGUSE *sup)
    761  1.1  perseant {
    762  1.1  perseant 	return lfs_sb_getssize(fs) - sup->su_nbytes;
    763  1.1  perseant }
    764  1.1  perseant 
    765  1.1  perseant __inline static long
    766  1.1  perseant segselect_cb_rosenblum(struct lfs *fs, int sn, SEGUSE *sup, long age)
    767  1.1  perseant {
    768  1.1  perseant 	long benefit, cost;
    769  1.1  perseant 
    770  1.1  perseant 	benefit = (int64_t)lfs_sb_getssize(fs) - sup->su_nbytes -
    771  1.1  perseant 		(sup->su_nsums + 1) * lfs_sb_getfsize(fs);
    772  1.1  perseant 	if (sup->su_flags & SEGUSE_SUPERBLOCK)
    773  1.1  perseant 		benefit -= LFS_SBPAD;
    774  1.1  perseant 	if (lfs_sb_getbsize(fs) > lfs_sb_getfsize(fs)) /* fragmentation */
    775  1.1  perseant 		benefit -= (lfs_sb_getbsize(fs) / 2);
    776  1.1  perseant 	if (benefit <= 0) {
    777  1.1  perseant 		return 0;
    778  1.1  perseant 	}
    779  1.1  perseant 
    780  1.1  perseant 	cost = lfs_sb_getssize(fs) + sup->su_nbytes;
    781  1.1  perseant 	return (256 * benefit * age) / cost;
    782  1.1  perseant }
    783  1.1  perseant 
    784  1.1  perseant static long
    785  1.1  perseant segselect_cb_time(struct lfs *fs, int sn, SEGUSE *sup)
    786  1.1  perseant {
    787  1.1  perseant 	long age;
    788  1.1  perseant 
    789  1.1  perseant 	age = time_second - sup->su_lastmod;
    790  1.1  perseant 	if (age < 0)
    791  1.1  perseant 		age = 0;
    792  1.1  perseant 	return segselect_cb_rosenblum(fs, sn, sup, age);
    793  1.1  perseant }
    794  1.1  perseant 
    795  1.1  perseant #if 0
    796  1.1  perseant /*
    797  1.1  perseant  * Same as the time comparator, but fetch the serial number from the
    798  1.1  perseant  * segment header to compare.
    799  1.1  perseant  *
    800  1.1  perseant  * This is ugly.  Whether serial number or wall time is better is a
    801  1.1  perseant  * worthy question, but if we want to use serial number to compute
    802  1.1  perseant  * age, we should record the serial number in su_lastmod instead of
    803  1.1  perseant  * the time.
    804  1.1  perseant  */
    805  1.1  perseant static long
    806  1.1  perseant segselect_cb_serial(struct lfs *fs, int sn, SEGUSE *sup)
    807  1.1  perseant {
    808  1.1  perseant 	struct buf *bp;
    809  1.1  perseant 	uint32_t magic;
    810  1.1  perseant 	uint64_t age, serial;
    811  1.1  perseant 	daddr_t addr;
    812  1.1  perseant 
    813  1.1  perseant 	addr = lfs_segtod(fs, sn);
    814  1.1  perseant 	lfs_skip_superblock(fs, &addr);
    815  1.1  perseant 	bread(fs->lfs_devvp, LFS_FSBTODB(fs, addr),
    816  1.1  perseant 	      lfs_sb_getsumsize(fs), 0, &bp);
    817  1.1  perseant 	magic = lfs_ss_getmagic(fs, ((SEGSUM *)bp->b_data));
    818  1.1  perseant 	serial = lfs_ss_getserial(fs, ((SEGSUM *)bp->b_data));
    819  1.1  perseant 	brelse(bp, 0);
    820  1.1  perseant 
    821  1.1  perseant 	if (magic != SS_MAGIC)
    822  1.1  perseant 		return 0;
    823  1.1  perseant 
    824  1.1  perseant 	age = lfs_sb_getserial(fs) - serial;
    825  1.1  perseant 	return segselect_cb_rosenblum(fs, sn, sup, age);
    826  1.1  perseant }
    827  1.1  perseant #endif
    828  1.1  perseant 
    829  1.1  perseant void
    830  1.1  perseant lfs_cleanerd(void *arg)
    831  1.1  perseant {
    832  1.1  perseant 	mount_iterator_t *iter;
    833  1.1  perseant  	struct mount *mp;
    834  1.1  perseant  	struct lfs *fs;
    835  1.1  perseant 	struct vfsops *vfs = NULL;
    836  1.1  perseant 	int lfsc;
    837  1.1  perseant 	int cleaned_something = 0;
    838  1.1  perseant 
    839  1.1  perseant 	mutex_enter(&lfs_lock);
    840  1.1  perseant 	KASSERTMSG(lfs_cleaner_daemon == NULL,
    841  1.1  perseant 		   "more than one LFS cleaner daemon");
    842  1.1  perseant 	lfs_cleaner_daemon = curlwp;
    843  1.1  perseant 	mutex_exit(&lfs_lock);
    844  1.1  perseant 
    845  1.1  perseant 	/* Take an extra reference to the LFS vfsops. */
    846  1.1  perseant 	vfs = vfs_getopsbyname(MOUNT_LFS);
    847  1.1  perseant 
    848  1.1  perseant  	mutex_enter(&lfs_lock);
    849  1.1  perseant  	for (;;) {
    850  1.1  perseant 		KASSERT(mutex_owned(&lfs_lock));
    851  1.1  perseant 		if (cleaned_something == 0)
    852  1.1  perseant 			cv_timedwait(&lfs_allclean_wakeup, &lfs_lock, hz/10 + 1);
    853  1.1  perseant 		KASSERT(mutex_owned(&lfs_lock));
    854  1.1  perseant 		cleaned_something = 0;
    855  1.1  perseant 
    856  1.1  perseant 		KASSERT(mutex_owned(&lfs_lock));
    857  1.1  perseant 		mutex_exit(&lfs_lock);
    858  1.1  perseant 
    859  1.1  perseant  		/*
    860  1.1  perseant  		 * Look through the list of LFSs to see if any of them
    861  1.1  perseant 		 * need cleaning.
    862  1.1  perseant  		 */
    863  1.1  perseant  		mountlist_iterator_init(&iter);
    864  1.1  perseant 		lfsc = 0;
    865  1.1  perseant 		while ((mp = mountlist_iterator_next(iter)) != NULL) {
    866  1.1  perseant 			KASSERT(!mutex_owned(&lfs_lock));
    867  1.1  perseant  			if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS,
    868  1.1  perseant  			    sizeof(mp->mnt_stat.f_fstypename)) == 0) {
    869  1.1  perseant  				fs = VFSTOULFS(mp)->um_lfs;
    870  1.1  perseant 
    871  1.1  perseant 				mutex_enter(&lfs_lock);
    872  1.1  perseant 				if (fs->lfs_clean_selector != NULL)
    873  1.1  perseant 					++lfsc;
    874  1.1  perseant 				mutex_exit(&lfs_lock);
    875  1.1  perseant 				cleaned_something += clean(fs);
    876  1.1  perseant 			}
    877  1.1  perseant  		}
    878  1.1  perseant 		if (lfsc == 0) {
    879  1.1  perseant 			mutex_enter(&lfs_lock);
    880  1.1  perseant 			lfs_cleaner_daemon = NULL;
    881  1.1  perseant 			mutex_exit(&lfs_lock);
    882  1.1  perseant 			mountlist_iterator_destroy(iter);
    883  1.1  perseant 			break;
    884  1.1  perseant 		}
    885  1.1  perseant  		mountlist_iterator_destroy(iter);
    886  1.1  perseant 
    887  1.1  perseant  		mutex_enter(&lfs_lock);
    888  1.1  perseant  	}
    889  1.1  perseant 	KASSERT(!mutex_owned(&lfs_lock));
    890  1.1  perseant 
    891  1.1  perseant 	/* Give up our extra reference so the module can be unloaded. */
    892  1.1  perseant 	mutex_enter(&vfs_list_lock);
    893  1.1  perseant 	if (vfs != NULL)
    894  1.1  perseant 		vfs->vfs_refcount--;
    895  1.1  perseant 	mutex_exit(&vfs_list_lock);
    896  1.1  perseant 
    897  1.1  perseant 	/* Done! */
    898  1.1  perseant 	kthread_exit(0);
    899  1.1  perseant }
    900  1.1  perseant 
    901  1.1  perseant /*
    902  1.1  perseant  * Look at the file system to see whether it needs cleaning, and if it does,
    903  1.1  perseant  * clean a segment.
    904  1.1  perseant  */
    905  1.1  perseant static int
    906  1.1  perseant clean(struct lfs *fs)
    907  1.1  perseant {
    908  1.1  perseant 	struct buf *bp;
    909  1.1  perseant 	SEGUSE *sup;
    910  1.1  perseant 	int sn, maxsn, nclean, nready, nempty, nerror, nzero, again, target;
    911  1.1  perseant 	long prio, maxprio, maxeprio, thresh;
    912  1.1  perseant 	long (*func)(struct lfs *, int, SEGUSE *);
    913  1.1  perseant 	uint32_t __debugused segflags = 0;
    914  1.1  perseant 	daddr_t oldsn, bfree, avail;
    915  1.1  perseant 	int direct, offset;
    916  1.1  perseant 
    917  1.1  perseant 	func = fs->lfs_clean_selector;
    918  1.1  perseant 	if (func == NULL)
    919  1.1  perseant 		return 0;
    920  1.1  perseant 
    921  1.1  perseant 	thresh = fs->lfs_autoclean.thresh;
    922  1.1  perseant 	if (fs->lfs_flags & LFS_MUSTCLEAN)
    923  1.1  perseant 		thresh = 0;
    924  1.1  perseant 	else if (thresh < 0) {
    925  1.1  perseant 		/*
    926  1.1  perseant 		 * Compute a priority threshold based on availability ratio.
    927  1.1  perseant 		 * XXX These numbers only makes sense for the greedy cleaner.
    928  1.1  perseant 		 * What is an appropriate threshold for the cost-benefit
    929  1.1  perseant 		 * cleaner?
    930  1.1  perseant 		 */
    931  1.1  perseant 		bfree = lfs_sb_getbfree(fs)
    932  1.1  perseant 			+ lfs_segtod(fs, 1) * lfs_sb_getminfree(fs);
    933  1.1  perseant 		avail = lfs_sb_getavail(fs) - fs->lfs_ravail - fs->lfs_favail;
    934  1.1  perseant 		if (avail > bfree)
    935  1.1  perseant 			return 0;
    936  1.1  perseant 		thresh = lfs_sb_getssize(fs) * (bfree - avail)
    937  1.1  perseant 			/ (lfs_sb_getsize(fs) - avail);
    938  1.1  perseant 		if (thresh > lfs_sb_getsumsize(fs) + 5 * lfs_sb_getbsize(fs))
    939  1.1  perseant 			thresh = lfs_sb_getsumsize(fs) + 5 * lfs_sb_getbsize(fs);
    940  1.1  perseant 		if (thresh > lfs_sb_getssize(fs) - lfs_sb_getbsize(fs))
    941  1.1  perseant 			return 0;
    942  1.1  perseant 	}
    943  1.1  perseant 
    944  1.1  perseant 	target = fs->lfs_autoclean.target;
    945  1.1  perseant 	if (target <= 0) {
    946  1.1  perseant 		/* Default to half a segment target */
    947  1.1  perseant 		target = lfs_segtod(fs, 1) / 2;
    948  1.1  perseant 	}
    949  1.1  perseant 
    950  1.1  perseant 	oldsn = lfs_dtosn(fs, lfs_sb_getoffset(fs));
    951  1.1  perseant 
    952  1.1  perseant 	again = 0;
    953  1.1  perseant 	maxprio = maxeprio = -1;
    954  1.1  perseant 	nzero = nclean = nready = nempty = nerror = 0;
    955  1.1  perseant 	for (sn = 0; sn < lfs_sb_getnseg(fs); sn++) {
    956  1.1  perseant 
    957  1.1  perseant 		prio = 0;
    958  1.1  perseant 		LFS_SEGENTRY(sup, fs, sn, bp);
    959  1.1  perseant 		if (sup->su_flags & SEGUSE_ACTIVE)
    960  1.1  perseant 			prio = 0;
    961  1.1  perseant 		else if (!(sup->su_flags & SEGUSE_DIRTY))
    962  1.1  perseant 			++nclean;
    963  1.1  perseant 		else if (sup->su_flags & SEGUSE_READY)
    964  1.1  perseant 			++nready;
    965  1.1  perseant 		else if (sup->su_flags & SEGUSE_EMPTY)
    966  1.1  perseant 			++nempty;
    967  1.1  perseant 		else if (sup->su_nbytes == 0)
    968  1.1  perseant 			++nzero;
    969  1.1  perseant 		else
    970  1.1  perseant 			prio = (*func)(fs, sn, sup);
    971  1.1  perseant 
    972  1.1  perseant 		if (sup->su_flags & SEGUSE_ERROR) {
    973  1.1  perseant 			if (prio > maxeprio)
    974  1.1  perseant 				maxeprio = prio;
    975  1.1  perseant 			prio = 0;
    976  1.1  perseant 			++nerror;
    977  1.1  perseant 		}
    978  1.1  perseant 
    979  1.1  perseant 		if (prio > maxprio) {
    980  1.1  perseant 			maxprio = prio;
    981  1.1  perseant 			maxsn = sn;
    982  1.1  perseant 			segflags = sup->su_flags;
    983  1.1  perseant 		}
    984  1.1  perseant 		brelse(bp, 0);
    985  1.1  perseant 	}
    986  1.1  perseant 	DLOG((DLOG_CLEAN, "%s clean=%d/%d zero=%d empty=%d ready=%d maxsn=%d maxprio=%ld/%ld segflags=0x%lx\n",
    987  1.1  perseant 	       (maxprio > thresh ? "YES" : "NO "),
    988  1.1  perseant 	       nclean, (int)lfs_sb_getnseg(fs), nzero, nempty, nready,
    989  1.1  perseant 	       maxsn, maxprio, (unsigned long)thresh,
    990  1.1  perseant 	       (unsigned long)segflags));
    991  1.1  perseant 
    992  1.1  perseant 	/*
    993  1.1  perseant 	 * If we are trying to clean the segment we cleaned last,
    994  1.1  perseant 	 * cleaning did not work.  Mark this segment SEGUSE_ERROR
    995  1.1  perseant 	 * and try again.
    996  1.1  perseant 	 */
    997  1.1  perseant 	if (maxprio > 0 && fs->lfs_lastcleaned == maxsn) {
    998  1.1  perseant 		LFS_SEGENTRY(sup, fs, maxsn, bp);
    999  1.1  perseant 		sup->su_flags |= SEGUSE_ERROR;
   1000  1.1  perseant 		LFS_WRITESEGENTRY(sup, fs, sn, bp);
   1001  1.1  perseant 		return 1;
   1002  1.1  perseant 	}
   1003  1.1  perseant 
   1004  1.1  perseant 	/*
   1005  1.1  perseant 	 * If there were nothing but error segments, clear error.
   1006  1.1  perseant 	 * We will wait to try again.
   1007  1.1  perseant 	 */
   1008  1.1  perseant 	if (maxprio == 0 && maxeprio > 0) {
   1009  1.1  perseant 		DLOG((DLOG_CLEAN, "clear error on %d segments, try again\n",
   1010  1.1  perseant 		      nerror));
   1011  1.1  perseant 		lfs_seguse_clrflag_all(fs, SEGUSE_ERROR);
   1012  1.1  perseant 	}
   1013  1.1  perseant 
   1014  1.1  perseant 	/* Rewrite the highest-priority segment */
   1015  1.1  perseant 	if (maxprio > thresh) {
   1016  1.1  perseant 		direct = offset = 0;
   1017  1.1  perseant 		(void)lfs_rewrite_segments(fs, &maxsn, 1,
   1018  1.1  perseant 					   &direct, &offset, curlwp);
   1019  1.1  perseant 		DLOG((DLOG_CLEAN, "  direct=%d offset=%d\n", direct, offset));
   1020  1.1  perseant 		again += direct;
   1021  1.1  perseant 		fs->lfs_clean_accum += offset;
   1022  1.1  perseant 
   1023  1.1  perseant 		/* Don't clean this again immediately */
   1024  1.1  perseant 		fs->lfs_lastcleaned = maxsn;
   1025  1.1  perseant 	}
   1026  1.1  perseant 
   1027  1.1  perseant 	/*
   1028  1.1  perseant 	 * If we are in dire straits but we have segments already
   1029  1.1  perseant 	 * empty, force a double checkpoint to reclaim them.
   1030  1.1  perseant 	 */
   1031  1.1  perseant 	if (fs->lfs_flags & LFS_MUSTCLEAN) {
   1032  1.1  perseant 		if (nready + nempty > 0) {
   1033  1.1  perseant 			printf("force checkpoint with nready=%d nempty=%d nzero=%d\n",
   1034  1.1  perseant 			       nready, nempty, nzero);
   1035  1.1  perseant 			lfs_segwrite(fs->lfs_ivnode->v_mount,
   1036  1.1  perseant 				     SEGM_CKP | SEGM_FORCE_CKP | SEGM_SYNC);
   1037  1.1  perseant 			lfs_segwrite(fs->lfs_ivnode->v_mount,
   1038  1.1  perseant 				     SEGM_CKP | SEGM_FORCE_CKP | SEGM_SYNC);
   1039  1.1  perseant 			++again;
   1040  1.1  perseant 		}
   1041  1.1  perseant 	} else if (fs->lfs_clean_accum > target) {
   1042  1.1  perseant 		DLOG((DLOG_CLEAN, "checkpoint to flush\n"));
   1043  1.1  perseant 		lfs_segwrite(fs->lfs_ivnode->v_mount, SEGM_CKP);
   1044  1.1  perseant 		fs->lfs_clean_accum = 0;
   1045  1.1  perseant 	} else if (lfs_dtosn(fs, lfs_sb_getoffset(fs)) != oldsn
   1046  1.1  perseant 		   || nempty + nready > LFS_MAX_ACTIVE) { /* XXX arbitrary */
   1047  1.1  perseant 		DLOG((DLOG_CLEAN, "write to promote empty segments\n"));
   1048  1.1  perseant 		lfs_segwrite(fs->lfs_ivnode->v_mount, SEGM_CKP);
   1049  1.1  perseant 		fs->lfs_clean_accum = 0;
   1050  1.1  perseant 	}
   1051  1.1  perseant 
   1052  1.1  perseant 	return again;
   1053  1.1  perseant }
   1054  1.1  perseant 
   1055  1.1  perseant /*
   1056  1.1  perseant  * Rewrite a file in its entirety.
   1057  1.1  perseant  *
   1058  1.1  perseant  * Generally this would be done to coalesce a file that is scattered
   1059  1.1  perseant  * around the disk; but if the "scramble" flag is set, instead rewrite
   1060  1.1  perseant  * only the even-numbered blocks, which provides the opposite effect
   1061  1.1  perseant  * for testing purposes.
   1062  1.1  perseant  *
   1063  1.1  perseant  * It is the caller's responsibility to check the bounds of the inode
   1064  1.1  perseant  * numbers.
   1065  1.1  perseant  */
   1066  1.1  perseant int
   1067  1.1  perseant lfs_rewrite_file(struct lfs *fs, ino_t *inoa, int len, bool scramble,
   1068  1.1  perseant 		 int *directp, int *offsetp)
   1069  1.1  perseant {
   1070  1.1  perseant 	daddr_t hiblk, lbn;
   1071  1.1  perseant 	struct vnode *vp;
   1072  1.1  perseant 	struct inode *ip;
   1073  1.1  perseant 	struct buf *bp;
   1074  1.1  perseant 	int i, error, flags;
   1075  1.1  perseant 
   1076  1.1  perseant 	*directp = 0;
   1077  1.1  perseant 	if ((error = lfs_cleanerlock(fs)) != 0)
   1078  1.1  perseant 		return error;
   1079  1.1  perseant 	flags = SEGM_PROT;
   1080  1.1  perseant 	lfs_seglock(fs, flags);
   1081  1.1  perseant 	for (i = 0; i < len; ++i) {
   1082  1.1  perseant 		error = VFS_VGET(fs->lfs_ivnode->v_mount, inoa[i], LK_EXCLUSIVE, &vp);
   1083  1.1  perseant 		if (error)
   1084  1.1  perseant 			goto out;
   1085  1.1  perseant 
   1086  1.1  perseant 		ip = VTOI(vp);
   1087  1.1  perseant 		if ((vp->v_uflag & VU_DIROP) || (ip->i_flags & IN_ADIROP)) {
   1088  1.1  perseant 			VOP_UNLOCK(vp);
   1089  1.1  perseant 			vrele(vp);
   1090  1.1  perseant 			error = EAGAIN;
   1091  1.1  perseant 			goto out;
   1092  1.1  perseant 		}
   1093  1.1  perseant 
   1094  1.1  perseant 		/* Highest block in this inode */
   1095  1.1  perseant 		hiblk = lfs_lblkno(fs, ip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
   1096  1.1  perseant 
   1097  1.1  perseant 		for (lbn = 0; lbn <= hiblk; ++lbn) {
   1098  1.1  perseant 			if (scramble && (lbn & 0x01))
   1099  1.1  perseant 				continue;
   1100  1.1  perseant 
   1101  1.1  perseant 			if (lfs_needsflush(fs)) {
   1102  1.1  perseant 				lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
   1103  1.1  perseant 			}
   1104  1.1  perseant 
   1105  1.1  perseant 			error = bread(vp, lbn, lfs_blksize(fs, ip, lbn), 0, &bp);
   1106  1.1  perseant 			if (error)
   1107  1.1  perseant 				break;
   1108  1.1  perseant 
   1109  1.1  perseant 			/* bp->b_cflags |= BC_INVAL; */
   1110  1.1  perseant 			lfs_bwrite_ext(bp, (flags & SEGM_CLEAN ? BW_CLEAN : 0));
   1111  1.1  perseant 			*directp += lfs_btofsb(fs, bp->b_bcount);
   1112  1.1  perseant 		}
   1113  1.1  perseant 
   1114  1.1  perseant 		/* Done with this vnode */
   1115  1.1  perseant 		VOP_UNLOCK(vp);
   1116  1.1  perseant 		vrele(vp);
   1117  1.1  perseant 		if (error)
   1118  1.1  perseant 			break;
   1119  1.1  perseant 	}
   1120  1.1  perseant out:
   1121  1.1  perseant 	lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
   1122  1.1  perseant 	*offsetp += lfs_btofsb(fs, fs->lfs_sp->bytes_written);
   1123  1.1  perseant 	lfs_segunlock(fs);
   1124  1.1  perseant 	lfs_cleanerunlock(fs);
   1125  1.1  perseant 
   1126  1.1  perseant 	return error;
   1127  1.1  perseant }
   1128  1.1  perseant 
   1129  1.1  perseant int
   1130  1.1  perseant lfs_cleanctl(struct lfs *fs, struct lfs_autoclean_params *params)
   1131  1.1  perseant {
   1132  1.1  perseant 	long (*cleanfunc)(struct lfs *, int, SEGUSE *);
   1133  1.1  perseant 
   1134  1.1  perseant 	fs->lfs_autoclean = *params;
   1135  1.1  perseant 
   1136  1.1  perseant 	cleanfunc = NULL;
   1137  1.1  perseant 	switch (fs->lfs_autoclean.mode) {
   1138  1.1  perseant 	case LFS_CLEANMODE_NONE:
   1139  1.1  perseant 		cleanfunc = NULL;
   1140  1.1  perseant 		break;
   1141  1.1  perseant 
   1142  1.1  perseant 	case LFS_CLEANMODE_GREEDY:
   1143  1.1  perseant 		cleanfunc = segselect_greedy;
   1144  1.1  perseant 		break;
   1145  1.1  perseant 
   1146  1.1  perseant 	case LFS_CLEANMODE_CB:
   1147  1.1  perseant 		cleanfunc = segselect_cb_time;
   1148  1.1  perseant 		break;
   1149  1.1  perseant 
   1150  1.1  perseant 	default:
   1151  1.1  perseant 		return EINVAL;
   1152  1.1  perseant 	}
   1153  1.1  perseant 
   1154  1.1  perseant 	mutex_enter(&lfs_lock);
   1155  1.1  perseant 	if (fs->lfs_clean_selector == NULL && cleanfunc != NULL)
   1156  1.1  perseant 		if (++lfs_ncleaners == 1) {
   1157  1.1  perseant 			printf("Starting cleaner thread\n");
   1158  1.1  perseant 			if (lfs_cleaner_daemon == NULL &&
   1159  1.1  perseant 			    kthread_create(PRI_BIO, 0, NULL,
   1160  1.1  perseant 					   lfs_cleanerd, NULL, NULL,
   1161  1.1  perseant 					   "lfs_cleaner") != 0)
   1162  1.1  perseant 				panic("fork lfs_cleaner");
   1163  1.1  perseant 		}
   1164  1.1  perseant 	if (fs->lfs_clean_selector != NULL && cleanfunc == NULL)
   1165  1.1  perseant 		if (--lfs_ncleaners == 0) {
   1166  1.1  perseant 			printf("Stopping cleaner thread\n");
   1167  1.1  perseant 			kthread_join(lfs_cleaner_daemon);
   1168  1.1  perseant 		}
   1169  1.1  perseant 	fs->lfs_clean_selector = cleanfunc;
   1170  1.1  perseant 	mutex_exit(&lfs_lock);
   1171  1.1  perseant 
   1172  1.1  perseant 	return 0;
   1173  1.1  perseant }
   1174