Home | History | Annotate | Line # | Download | only in lfs_cleanerd
coalesce.c revision 1.2
      1  1.2  perseant /*      $NetBSD: coalesce.c,v 1.2 2002/06/14 00:58:40 perseant Exp $  */
      2  1.1  perseant 
      3  1.1  perseant /*-
      4  1.1  perseant  * Copyright (c) 2002 The NetBSD Foundation, Inc.
      5  1.1  perseant  * All rights reserved.
      6  1.1  perseant  *
      7  1.1  perseant  * This code is derived from software contributed to The NetBSD Foundation
      8  1.1  perseant  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  1.1  perseant  *
     10  1.1  perseant  * Redistribution and use in source and binary forms, with or without
     11  1.1  perseant  * modification, are permitted provided that the following conditions
     12  1.1  perseant  * are met:
     13  1.1  perseant  * 1. Redistributions of source code must retain the above copyright
     14  1.1  perseant  *    notice, this list of conditions and the following disclaimer.
     15  1.1  perseant  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1  perseant  *    notice, this list of conditions and the following disclaimer in the
     17  1.1  perseant  *    documentation and/or other materials provided with the distribution.
     18  1.1  perseant  * 3. All advertising materials mentioning features or use of this software
     19  1.1  perseant  *    must display the following acknowledgement:
     20  1.1  perseant  *      This product includes software developed by the NetBSD
     21  1.1  perseant  *      Foundation, Inc. and its contributors.
     22  1.1  perseant  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  1.1  perseant  *    contributors may be used to endorse or promote products derived
     24  1.1  perseant  *    from this software without specific prior written permission.
     25  1.1  perseant  *
     26  1.1  perseant  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  1.1  perseant  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  1.1  perseant  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  1.1  perseant  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  1.1  perseant  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  1.1  perseant  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  1.1  perseant  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  1.1  perseant  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  1.1  perseant  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  1.1  perseant  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  1.1  perseant  * POSSIBILITY OF SUCH DAMAGE.
     37  1.1  perseant  */
     38  1.1  perseant 
     39  1.1  perseant #include <sys/param.h>
     40  1.1  perseant #include <sys/mount.h>
     41  1.1  perseant #include <sys/time.h>
     42  1.1  perseant #include <sys/resource.h>
     43  1.1  perseant #include <sys/types.h>
     44  1.1  perseant #include <sys/wait.h>
     45  1.1  perseant #include <sys/mman.h>
     46  1.1  perseant 
     47  1.1  perseant #include <ufs/ufs/dinode.h>
     48  1.1  perseant #include <ufs/lfs/lfs.h>
     49  1.1  perseant 
     50  1.1  perseant #include <fcntl.h>
     51  1.1  perseant #include <signal.h>
     52  1.1  perseant #include <stdio.h>
     53  1.1  perseant #include <stdlib.h>
     54  1.1  perseant #include <string.h>
     55  1.1  perseant #include <time.h>
     56  1.1  perseant #include <unistd.h>
     57  1.1  perseant #include <util.h>
     58  1.1  perseant #include <errno.h>
     59  1.1  perseant #include <err.h>
     60  1.1  perseant 
     61  1.1  perseant #include <syslog.h>
     62  1.1  perseant 
     63  1.1  perseant #include "clean.h"
     64  1.1  perseant 
     65  1.2  perseant extern int debug, do_mmap;
     66  1.1  perseant 
     67  1.1  perseant static int
     68  1.1  perseant tossdead(const void *client, const void *a, const void *b)
     69  1.1  perseant {
     70  1.2  perseant 	return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 ||
     71  1.1  perseant 		((BLOCK_INFO_15 *)a)->bi_size == 0);
     72  1.1  perseant }
     73  1.1  perseant 
     74  1.2  perseant static int log2int(int n)
     75  1.2  perseant {
     76  1.2  perseant 	int log;
     77  1.2  perseant 
     78  1.2  perseant 	log = 0;
     79  1.2  perseant 	while (n > 0) {
     80  1.2  perseant 		++log;
     81  1.2  perseant 		n /= 2;
     82  1.2  perseant 	}
     83  1.2  perseant 	return log - 1;
     84  1.2  perseant }
     85  1.2  perseant 
     86  1.1  perseant /*
     87  1.1  perseant  * Find out if this inode's data blocks are discontinuous; if they are,
     88  1.1  perseant  * rewrite them using lfs_markv.  Return the number of inodes rewritten.
     89  1.1  perseant  */
     90  1.1  perseant int clean_inode(struct fs_info *fsp, ino_t ino)
     91  1.1  perseant {
     92  1.1  perseant 	int i, error;
     93  1.1  perseant 	BLOCK_INFO_15 *bip, *tbip;
     94  1.1  perseant 	struct dinode *dip;
     95  1.2  perseant 	int nb, onb, noff;
     96  1.1  perseant 	ufs_daddr_t toff;
     97  1.1  perseant 	struct lfs *lfsp;
     98  1.1  perseant 	int bps;
     99  1.1  perseant         SEGUSE *sup;
    100  1.1  perseant 
    101  1.1  perseant 	lfsp = &fsp->fi_lfs;
    102  1.1  perseant 
    103  1.1  perseant         dip = get_dinode(fsp, ino);
    104  1.1  perseant 	if (dip == NULL)
    105  1.1  perseant 		return 0;
    106  1.1  perseant 
    107  1.1  perseant 	/* Compute file block size, set up for lfs_bmapv */
    108  1.2  perseant 	onb = nb = btofsb(lfsp, dip->di_size);
    109  1.2  perseant 
    110  1.2  perseant 	/* XXX for now, don't do any file small enough to have fragments */
    111  1.2  perseant 	if (nb < NDADDR)
    112  1.2  perseant 		return 0;
    113  1.2  perseant 
    114  1.2  perseant 	/* Sanity checks */
    115  1.2  perseant 	if (dip->di_size < 0) {
    116  1.2  perseant 		syslog(LOG_WARNING, "ino %d, negative size (%lld)",
    117  1.2  perseant 			ino, (long long)dip->di_size);
    118  1.2  perseant 		return -1;
    119  1.2  perseant 	}
    120  1.1  perseant 	if (nb > dip->di_blocks) {
    121  1.1  perseant 		syslog(LOG_WARNING, "ino %d, computed blocks %d > held blocks %d",
    122  1.1  perseant 			ino, nb, dip->di_blocks);
    123  1.1  perseant 		return -1;
    124  1.1  perseant 	}
    125  1.2  perseant 
    126  1.1  perseant 	bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb);
    127  1.1  perseant 	if (bip == NULL) {
    128  1.1  perseant 		syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb);
    129  1.1  perseant 		return -1;
    130  1.1  perseant 	}
    131  1.1  perseant 	for (i = 0; i < nb; i++) {
    132  1.1  perseant 		memset(bip + i, 0, sizeof(BLOCK_INFO_15));
    133  1.1  perseant 		bip[i].bi_inode = ino;
    134  1.1  perseant 		bip[i].bi_lbn = i;
    135  1.2  perseant 		bip[i].bi_version = dip->di_gen;
    136  1.1  perseant 		/* Don't set the size, but let lfs_bmap fill it in */
    137  1.1  perseant 	}
    138  1.1  perseant 	if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) {
    139  1.2  perseant                 syslog(LOG_WARNING, "lfs_bmapv: %m");
    140  1.1  perseant 		free(bip);
    141  1.1  perseant 		return -1;
    142  1.1  perseant 	}
    143  1.1  perseant 	noff = toff = 0;
    144  1.1  perseant 	for (i = 1; i < nb; i++) {
    145  1.1  perseant 		if (bip[i].bi_daddr != bip[i - 1].bi_daddr + 1)
    146  1.1  perseant 			++noff;
    147  1.1  perseant 		toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr - 1);
    148  1.1  perseant 	}
    149  1.1  perseant 
    150  1.1  perseant 	/*
    151  1.1  perseant 	 * If this file is not discontinuous, there's no point in rewriting it.
    152  1.1  perseant          *
    153  1.1  perseant          * Explicitly allow a certain amount of discontinuity, since large
    154  1.1  perseant          * files will be broken among segments and medium-sized files
    155  1.1  perseant          * can have a break or two and it's okay.
    156  1.1  perseant 	 */
    157  1.2  perseant 	if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
    158  1.2  perseant 	    segtod(lfsp, noff) * 2 < nb) {
    159  1.1  perseant 		free(bip);
    160  1.1  perseant 		return 0;
    161  1.1  perseant 	} else if (debug)
    162  1.1  perseant 		syslog(LOG_DEBUG, "ino %d total discontinuity "
    163  1.1  perseant 			"%d (%d) for %d blocks", ino, noff, toff, nb);
    164  1.1  perseant 
    165  1.1  perseant 	/* Search for blocks in active segments; don't move them. */
    166  1.1  perseant 	for (i = 0; i < nb; i++) {
    167  1.1  perseant 		if (bip[i].bi_daddr <= 0)
    168  1.1  perseant 			continue;
    169  1.1  perseant 		sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
    170  1.1  perseant 				dtosn(lfsp, bip[i].bi_daddr));
    171  1.1  perseant 		if (sup->su_flags & SEGUSE_ACTIVE)
    172  1.1  perseant 			bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
    173  1.1  perseant 	}
    174  1.1  perseant         /*
    175  1.1  perseant 	 * Get rid of any we've marked dead.  If this is an older
    176  1.1  perseant 	 * kernel that doesn't have lfs_bmapv fill in the block
    177  1.1  perseant 	 * sizes, we'll toss everything here.
    178  1.1  perseant 	 */
    179  1.1  perseant 	toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL);
    180  1.1  perseant         if (nb && tossdead(NULL, bip + nb - 1, NULL))
    181  1.1  perseant                 --nb;
    182  1.1  perseant         if (nb == 0) {
    183  1.1  perseant 		free(bip);
    184  1.1  perseant 		return 0;
    185  1.1  perseant 	}
    186  1.1  perseant 
    187  1.2  perseant #if 0
    188  1.2  perseant 	/*
    189  1.2  perseant 	 * Double-check that we've tossed everything invalid. (wtf?!)
    190  1.2  perseant 	 */
    191  1.2  perseant 	for (i = 0; i < nb; i++) {
    192  1.2  perseant 		if (bip[i].bi_daddr <= 0) {
    193  1.2  perseant 			syslog(LOG_ERR, "negative daddr not tossed, bombing");
    194  1.2  perseant 			free(bip);
    195  1.2  perseant 			return 0;
    196  1.2  perseant 		}
    197  1.2  perseant 	}
    198  1.2  perseant #endif
    199  1.2  perseant 
    200  1.1  perseant 	/*
    201  1.2  perseant 	 * We may have tossed enough blocks that it is no longer worthwhile
    202  1.2  perseant 	 * to rewrite this inode.
    203  1.1  perseant 	 */
    204  1.2  perseant 	if ((1 << (onb - nb)) > onb) {
    205  1.2  perseant 		syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
    206  1.1  perseant 		return 0;
    207  1.1  perseant 	}
    208  1.1  perseant 
    209  1.1  perseant         /*
    210  1.1  perseant 	 * We are going to rewrite this inode.
    211  1.1  perseant 	 * For any remaining blocks, read in their contents.
    212  1.1  perseant 	 */
    213  1.1  perseant 	for (i = 0; i < nb; i++) {
    214  1.1  perseant 		bip[i].bi_bp = malloc(bip[i].bi_size);
    215  1.1  perseant                 get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size, bip[i].bi_daddr);
    216  1.1  perseant 	}
    217  1.1  perseant 	if (debug)
    218  1.1  perseant 		syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb);
    219  1.1  perseant 
    220  1.2  perseant 	/*
    221  1.2  perseant 	 * Write in segment-sized chunks.  If at any point we'd write more
    222  1.2  perseant 	 * than half of the available segments, sleep until that's not
    223  1.2  perseant 	 * true any more.
    224  1.2  perseant 	 */
    225  1.1  perseant 	bps = segtod(lfsp, 1);
    226  1.1  perseant 	for (tbip = bip; tbip < bip + nb; tbip += bps) {
    227  1.2  perseant 		while (fsp->fi_cip->clean < 4) {
    228  1.2  perseant 			lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL);
    229  1.2  perseant 			reread_fs_info(fsp, do_mmap);
    230  1.2  perseant 			/* XXX start over? */
    231  1.2  perseant 		}
    232  1.1  perseant 		lfs_markv(&fsp->fi_statfsp->f_fsid, tbip,
    233  1.1  perseant                           (tbip + bps < bip + nb ? bps : nb % bps));
    234  1.1  perseant 	}
    235  1.1  perseant 
    236  1.1  perseant 	for (i = 0; i < nb; i++)
    237  1.1  perseant 		if (bip[i].bi_bp)
    238  1.1  perseant 			free(bip[i].bi_bp);
    239  1.1  perseant 	free(bip);
    240  1.1  perseant 	return 1;
    241  1.1  perseant }
    242  1.1  perseant 
    243  1.1  perseant /*
    244  1.1  perseant  * Try coalescing every inode in the filesystem.
    245  1.1  perseant  * Return the number of inodes actually altered.
    246  1.1  perseant  */
    247  1.1  perseant int clean_all_inodes(struct fs_info *fsp)
    248  1.1  perseant {
    249  1.1  perseant 	int i;
    250  1.1  perseant 	int r, tot;
    251  1.1  perseant 
    252  1.1  perseant 	tot = 0;
    253  1.1  perseant 	for (i = 0; i < fsp->fi_ifile_count; i++) {
    254  1.1  perseant 		r = clean_inode(fsp, i);
    255  1.1  perseant 		if (r > 0)
    256  1.1  perseant 			tot += r;
    257  1.1  perseant 	}
    258  1.1  perseant 	return tot;
    259  1.1  perseant }
    260  1.1  perseant 
    261  1.1  perseant int fork_coalesce(struct fs_info *fsp)
    262  1.1  perseant {
    263  1.1  perseant 	static pid_t childpid;
    264  1.2  perseant 	int num;
    265  1.2  perseant 
    266  1.2  perseant 	reread_fs_info(fsp, do_mmap);
    267  1.1  perseant 
    268  1.1  perseant 	if (childpid) {
    269  1.1  perseant      		if (waitpid(childpid, NULL, WNOHANG) == childpid)
    270  1.1  perseant 			childpid = 0;
    271  1.1  perseant 	}
    272  1.1  perseant 	if (childpid && kill(childpid, 0) >= 0) {
    273  1.1  perseant 		/* already running a coalesce process */
    274  1.2  perseant 		if (debug)
    275  1.2  perseant 			syslog(LOG_DEBUG, "coalescing already in progress");
    276  1.1  perseant 		return 0;
    277  1.1  perseant 	}
    278  1.1  perseant 	childpid = fork();
    279  1.1  perseant 	if (childpid < 0) {
    280  1.1  perseant 		syslog(LOG_ERR, "fork: %m");
    281  1.1  perseant 		return 0;
    282  1.1  perseant 	} else if (childpid == 0) {
    283  1.2  perseant 		syslog(LOG_NOTICE, "new coalescing process (%d)", childpid);
    284  1.2  perseant 		num = clean_all_inodes(fsp);
    285  1.2  perseant 		syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
    286  1.1  perseant 		exit(0);
    287  1.1  perseant 	}
    288  1.1  perseant 	return 0;
    289  1.1  perseant }
    290