Home | History | Annotate | Line # | Download | only in lfs_cleanerd
coalesce.c revision 1.3
      1  1.3  perseant /*      $NetBSD: coalesce.c,v 1.3 2002/06/14 05:21:21 perseant Exp $  */
      2  1.1  perseant 
      3  1.1  perseant /*-
      4  1.1  perseant  * Copyright (c) 2002 The NetBSD Foundation, Inc.
      5  1.1  perseant  * All rights reserved.
      6  1.1  perseant  *
      7  1.1  perseant  * This code is derived from software contributed to The NetBSD Foundation
      8  1.1  perseant  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  1.1  perseant  *
     10  1.1  perseant  * Redistribution and use in source and binary forms, with or without
     11  1.1  perseant  * modification, are permitted provided that the following conditions
     12  1.1  perseant  * are met:
     13  1.1  perseant  * 1. Redistributions of source code must retain the above copyright
     14  1.1  perseant  *    notice, this list of conditions and the following disclaimer.
     15  1.1  perseant  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1  perseant  *    notice, this list of conditions and the following disclaimer in the
     17  1.1  perseant  *    documentation and/or other materials provided with the distribution.
     18  1.1  perseant  * 3. All advertising materials mentioning features or use of this software
     19  1.1  perseant  *    must display the following acknowledgement:
     20  1.1  perseant  *      This product includes software developed by the NetBSD
     21  1.1  perseant  *      Foundation, Inc. and its contributors.
     22  1.1  perseant  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  1.1  perseant  *    contributors may be used to endorse or promote products derived
     24  1.1  perseant  *    from this software without specific prior written permission.
     25  1.1  perseant  *
     26  1.1  perseant  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  1.1  perseant  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  1.1  perseant  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  1.1  perseant  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  1.1  perseant  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  1.1  perseant  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  1.1  perseant  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  1.1  perseant  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  1.1  perseant  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  1.1  perseant  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  1.1  perseant  * POSSIBILITY OF SUCH DAMAGE.
     37  1.1  perseant  */
     38  1.1  perseant 
     39  1.1  perseant #include <sys/param.h>
     40  1.1  perseant #include <sys/mount.h>
     41  1.1  perseant #include <sys/time.h>
     42  1.1  perseant #include <sys/resource.h>
     43  1.1  perseant #include <sys/types.h>
     44  1.1  perseant #include <sys/wait.h>
     45  1.1  perseant #include <sys/mman.h>
     46  1.1  perseant 
     47  1.1  perseant #include <ufs/ufs/dinode.h>
     48  1.1  perseant #include <ufs/lfs/lfs.h>
     49  1.1  perseant 
     50  1.1  perseant #include <fcntl.h>
     51  1.1  perseant #include <signal.h>
     52  1.1  perseant #include <stdio.h>
     53  1.1  perseant #include <stdlib.h>
     54  1.1  perseant #include <string.h>
     55  1.1  perseant #include <time.h>
     56  1.1  perseant #include <unistd.h>
     57  1.1  perseant #include <util.h>
     58  1.1  perseant #include <errno.h>
     59  1.1  perseant #include <err.h>
     60  1.1  perseant 
     61  1.1  perseant #include <syslog.h>
     62  1.1  perseant 
     63  1.1  perseant #include "clean.h"
     64  1.1  perseant 
     65  1.2  perseant extern int debug, do_mmap;
     66  1.1  perseant 
     67  1.1  perseant static int
     68  1.1  perseant tossdead(const void *client, const void *a, const void *b)
     69  1.1  perseant {
     70  1.2  perseant 	return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 ||
     71  1.1  perseant 		((BLOCK_INFO_15 *)a)->bi_size == 0);
     72  1.1  perseant }
     73  1.1  perseant 
     74  1.2  perseant static int log2int(int n)
     75  1.2  perseant {
     76  1.2  perseant 	int log;
     77  1.2  perseant 
     78  1.2  perseant 	log = 0;
     79  1.2  perseant 	while (n > 0) {
     80  1.2  perseant 		++log;
     81  1.2  perseant 		n /= 2;
     82  1.2  perseant 	}
     83  1.2  perseant 	return log - 1;
     84  1.2  perseant }
     85  1.2  perseant 
     86  1.3  perseant enum coalesce_returncodes {
     87  1.3  perseant 	COALESCE_OK = 0,
     88  1.3  perseant 	COALESCE_NOINODE,
     89  1.3  perseant 	COALESCE_TOOSMALL,
     90  1.3  perseant 	COALESCE_BADSIZE,
     91  1.3  perseant 	COALESCE_BADBLOCKSIZE,
     92  1.3  perseant 	COALESCE_NOMEM,
     93  1.3  perseant 	COALESCE_BADBMAPV,
     94  1.3  perseant 	COALESCE_NOTWORTHIT,
     95  1.3  perseant 	COALESCE_NOTHINGLEFT,
     96  1.3  perseant 	COALESCE_NOTHINGLEFT2,
     97  1.3  perseant 
     98  1.3  perseant 	COALESCE_MAXERROR
     99  1.3  perseant };
    100  1.3  perseant 
    101  1.3  perseant char *coalesce_return[] = {
    102  1.3  perseant 	"Successfully coalesced",
    103  1.3  perseant 	"File not in use or inode not found",
    104  1.3  perseant 	"Not large enough to coalesce",
    105  1.3  perseant 	"Negative size",
    106  1.3  perseant 	"Not enough blocks to account for size",
    107  1.3  perseant 	"Malloc failed",
    108  1.3  perseant 	"lfs_bmapv failed",
    109  1.3  perseant 	"Not broken enough to fix",
    110  1.3  perseant 	"Too many blocks not found",
    111  1.3  perseant 	"Too many blocks found in active segments",
    112  1.3  perseant 
    113  1.3  perseant 	"No such error"
    114  1.3  perseant };
    115  1.3  perseant 
    116  1.1  perseant /*
    117  1.1  perseant  * Find out if this inode's data blocks are discontinuous; if they are,
    118  1.1  perseant  * rewrite them using lfs_markv.  Return the number of inodes rewritten.
    119  1.1  perseant  */
    120  1.1  perseant int clean_inode(struct fs_info *fsp, ino_t ino)
    121  1.1  perseant {
    122  1.1  perseant 	int i, error;
    123  1.1  perseant 	BLOCK_INFO_15 *bip, *tbip;
    124  1.1  perseant 	struct dinode *dip;
    125  1.2  perseant 	int nb, onb, noff;
    126  1.1  perseant 	ufs_daddr_t toff;
    127  1.1  perseant 	struct lfs *lfsp;
    128  1.1  perseant 	int bps;
    129  1.1  perseant         SEGUSE *sup;
    130  1.1  perseant 
    131  1.1  perseant 	lfsp = &fsp->fi_lfs;
    132  1.1  perseant 
    133  1.1  perseant         dip = get_dinode(fsp, ino);
    134  1.1  perseant 	if (dip == NULL)
    135  1.3  perseant 		return COALESCE_NOINODE;
    136  1.1  perseant 
    137  1.1  perseant 	/* Compute file block size, set up for lfs_bmapv */
    138  1.2  perseant 	onb = nb = btofsb(lfsp, dip->di_size);
    139  1.2  perseant 
    140  1.2  perseant 	/* XXX for now, don't do any file small enough to have fragments */
    141  1.2  perseant 	if (nb < NDADDR)
    142  1.3  perseant 		return COALESCE_TOOSMALL;
    143  1.2  perseant 
    144  1.2  perseant 	/* Sanity checks */
    145  1.2  perseant 	if (dip->di_size < 0) {
    146  1.3  perseant 		if (debug)
    147  1.3  perseant 			syslog(LOG_DEBUG, "ino %d, negative size (%lld)",
    148  1.3  perseant 				ino, (long long)dip->di_size);
    149  1.3  perseant 		return COALESCE_BADSIZE;
    150  1.2  perseant 	}
    151  1.1  perseant 	if (nb > dip->di_blocks) {
    152  1.3  perseant 		if (debug)
    153  1.3  perseant 			syslog(LOG_DEBUG, "ino %d, computed blocks %d > held blocks %d",
    154  1.3  perseant 				ino, nb, dip->di_blocks);
    155  1.3  perseant 		return COALESCE_BADBLOCKSIZE;
    156  1.1  perseant 	}
    157  1.2  perseant 
    158  1.1  perseant 	bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb);
    159  1.1  perseant 	if (bip == NULL) {
    160  1.1  perseant 		syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb);
    161  1.3  perseant 		return COALESCE_NOMEM;
    162  1.1  perseant 	}
    163  1.1  perseant 	for (i = 0; i < nb; i++) {
    164  1.1  perseant 		memset(bip + i, 0, sizeof(BLOCK_INFO_15));
    165  1.1  perseant 		bip[i].bi_inode = ino;
    166  1.1  perseant 		bip[i].bi_lbn = i;
    167  1.2  perseant 		bip[i].bi_version = dip->di_gen;
    168  1.1  perseant 		/* Don't set the size, but let lfs_bmap fill it in */
    169  1.1  perseant 	}
    170  1.1  perseant 	if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) {
    171  1.2  perseant                 syslog(LOG_WARNING, "lfs_bmapv: %m");
    172  1.1  perseant 		free(bip);
    173  1.3  perseant 		return COALESCE_BADBMAPV;
    174  1.1  perseant 	}
    175  1.1  perseant 	noff = toff = 0;
    176  1.1  perseant 	for (i = 1; i < nb; i++) {
    177  1.1  perseant 		if (bip[i].bi_daddr != bip[i - 1].bi_daddr + 1)
    178  1.1  perseant 			++noff;
    179  1.1  perseant 		toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr - 1);
    180  1.1  perseant 	}
    181  1.1  perseant 
    182  1.1  perseant 	/*
    183  1.1  perseant 	 * If this file is not discontinuous, there's no point in rewriting it.
    184  1.1  perseant          *
    185  1.1  perseant          * Explicitly allow a certain amount of discontinuity, since large
    186  1.1  perseant          * files will be broken among segments and medium-sized files
    187  1.1  perseant          * can have a break or two and it's okay.
    188  1.1  perseant 	 */
    189  1.2  perseant 	if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
    190  1.2  perseant 	    segtod(lfsp, noff) * 2 < nb) {
    191  1.1  perseant 		free(bip);
    192  1.3  perseant 		return COALESCE_NOTWORTHIT;
    193  1.1  perseant 	} else if (debug)
    194  1.1  perseant 		syslog(LOG_DEBUG, "ino %d total discontinuity "
    195  1.1  perseant 			"%d (%d) for %d blocks", ino, noff, toff, nb);
    196  1.1  perseant 
    197  1.1  perseant 	/* Search for blocks in active segments; don't move them. */
    198  1.1  perseant 	for (i = 0; i < nb; i++) {
    199  1.1  perseant 		if (bip[i].bi_daddr <= 0)
    200  1.1  perseant 			continue;
    201  1.1  perseant 		sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
    202  1.1  perseant 				dtosn(lfsp, bip[i].bi_daddr));
    203  1.1  perseant 		if (sup->su_flags & SEGUSE_ACTIVE)
    204  1.1  perseant 			bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
    205  1.1  perseant 	}
    206  1.1  perseant         /*
    207  1.1  perseant 	 * Get rid of any we've marked dead.  If this is an older
    208  1.1  perseant 	 * kernel that doesn't have lfs_bmapv fill in the block
    209  1.1  perseant 	 * sizes, we'll toss everything here.
    210  1.1  perseant 	 */
    211  1.1  perseant 	toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL);
    212  1.1  perseant         if (nb && tossdead(NULL, bip + nb - 1, NULL))
    213  1.1  perseant                 --nb;
    214  1.1  perseant         if (nb == 0) {
    215  1.1  perseant 		free(bip);
    216  1.3  perseant 		return COALESCE_NOTHINGLEFT;
    217  1.2  perseant 	}
    218  1.2  perseant 
    219  1.1  perseant 	/*
    220  1.2  perseant 	 * We may have tossed enough blocks that it is no longer worthwhile
    221  1.2  perseant 	 * to rewrite this inode.
    222  1.1  perseant 	 */
    223  1.3  perseant 	if (onb - nb > log2int(onb)) {
    224  1.3  perseant 		if (debug)
    225  1.3  perseant 			syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
    226  1.3  perseant 		return COALESCE_NOTHINGLEFT2;
    227  1.1  perseant 	}
    228  1.1  perseant 
    229  1.1  perseant         /*
    230  1.1  perseant 	 * We are going to rewrite this inode.
    231  1.1  perseant 	 * For any remaining blocks, read in their contents.
    232  1.1  perseant 	 */
    233  1.1  perseant 	for (i = 0; i < nb; i++) {
    234  1.1  perseant 		bip[i].bi_bp = malloc(bip[i].bi_size);
    235  1.1  perseant                 get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size, bip[i].bi_daddr);
    236  1.1  perseant 	}
    237  1.1  perseant 	if (debug)
    238  1.1  perseant 		syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb);
    239  1.1  perseant 
    240  1.2  perseant 	/*
    241  1.2  perseant 	 * Write in segment-sized chunks.  If at any point we'd write more
    242  1.2  perseant 	 * than half of the available segments, sleep until that's not
    243  1.2  perseant 	 * true any more.
    244  1.2  perseant 	 */
    245  1.1  perseant 	bps = segtod(lfsp, 1);
    246  1.1  perseant 	for (tbip = bip; tbip < bip + nb; tbip += bps) {
    247  1.2  perseant 		while (fsp->fi_cip->clean < 4) {
    248  1.2  perseant 			lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL);
    249  1.2  perseant 			reread_fs_info(fsp, do_mmap);
    250  1.2  perseant 			/* XXX start over? */
    251  1.2  perseant 		}
    252  1.1  perseant 		lfs_markv(&fsp->fi_statfsp->f_fsid, tbip,
    253  1.1  perseant                           (tbip + bps < bip + nb ? bps : nb % bps));
    254  1.1  perseant 	}
    255  1.1  perseant 
    256  1.1  perseant 	for (i = 0; i < nb; i++)
    257  1.1  perseant 		if (bip[i].bi_bp)
    258  1.1  perseant 			free(bip[i].bi_bp);
    259  1.1  perseant 	free(bip);
    260  1.3  perseant 	return COALESCE_OK;
    261  1.1  perseant }
    262  1.1  perseant 
    263  1.1  perseant /*
    264  1.1  perseant  * Try coalescing every inode in the filesystem.
    265  1.1  perseant  * Return the number of inodes actually altered.
    266  1.1  perseant  */
    267  1.1  perseant int clean_all_inodes(struct fs_info *fsp)
    268  1.1  perseant {
    269  1.3  perseant 	int i, r;
    270  1.3  perseant 	int totals[COALESCE_MAXERROR];
    271  1.1  perseant 
    272  1.3  perseant 	memset(totals, 0, sizeof(totals));
    273  1.1  perseant 	for (i = 0; i < fsp->fi_ifile_count; i++) {
    274  1.1  perseant 		r = clean_inode(fsp, i);
    275  1.3  perseant 		++totals[r];
    276  1.1  perseant 	}
    277  1.3  perseant 
    278  1.3  perseant 	for (i = 0; i < COALESCE_MAXERROR; i++)
    279  1.3  perseant 		if (totals[i])
    280  1.3  perseant 			syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
    281  1.3  perseant 				totals[i]);
    282  1.3  perseant 
    283  1.3  perseant 	return totals[COALESCE_OK];
    284  1.1  perseant }
    285  1.1  perseant 
    286  1.1  perseant int fork_coalesce(struct fs_info *fsp)
    287  1.1  perseant {
    288  1.1  perseant 	static pid_t childpid;
    289  1.2  perseant 	int num;
    290  1.2  perseant 
    291  1.2  perseant 	reread_fs_info(fsp, do_mmap);
    292  1.1  perseant 
    293  1.1  perseant 	if (childpid) {
    294  1.1  perseant      		if (waitpid(childpid, NULL, WNOHANG) == childpid)
    295  1.1  perseant 			childpid = 0;
    296  1.1  perseant 	}
    297  1.1  perseant 	if (childpid && kill(childpid, 0) >= 0) {
    298  1.1  perseant 		/* already running a coalesce process */
    299  1.2  perseant 		if (debug)
    300  1.2  perseant 			syslog(LOG_DEBUG, "coalescing already in progress");
    301  1.1  perseant 		return 0;
    302  1.1  perseant 	}
    303  1.1  perseant 	childpid = fork();
    304  1.1  perseant 	if (childpid < 0) {
    305  1.1  perseant 		syslog(LOG_ERR, "fork: %m");
    306  1.1  perseant 		return 0;
    307  1.1  perseant 	} else if (childpid == 0) {
    308  1.3  perseant 		syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
    309  1.2  perseant 		num = clean_all_inodes(fsp);
    310  1.2  perseant 		syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
    311  1.1  perseant 		exit(0);
    312  1.1  perseant 	}
    313  1.1  perseant 	return 0;
    314  1.1  perseant }
    315