Home | History | Annotate | Line # | Download | only in lfs_cleanerd
coalesce.c revision 1.5
      1 /*      $NetBSD: coalesce.c,v 1.5 2002/12/15 08:38:17 yamt Exp $  */
      2 
      3 /*-
      4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *      This product includes software developed by the NetBSD
     21  *      Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/param.h>
     40 #include <sys/mount.h>
     41 #include <sys/time.h>
     42 #include <sys/resource.h>
     43 #include <sys/types.h>
     44 #include <sys/wait.h>
     45 #include <sys/mman.h>
     46 
     47 #include <ufs/ufs/dinode.h>
     48 #include <ufs/lfs/lfs.h>
     49 
     50 #include <fcntl.h>
     51 #include <signal.h>
     52 #include <stdio.h>
     53 #include <stdlib.h>
     54 #include <string.h>
     55 #include <time.h>
     56 #include <unistd.h>
     57 #include <util.h>
     58 #include <errno.h>
     59 #include <err.h>
     60 
     61 #include <syslog.h>
     62 
     63 #include "clean.h"
     64 
     65 extern int debug, do_mmap;
     66 
     67 static int
     68 tossdead(const void *client, const void *a, const void *b)
     69 {
     70 	return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 ||
     71 		((BLOCK_INFO_15 *)a)->bi_size == 0);
     72 }
     73 
     74 static int log2int(int n)
     75 {
     76 	int log;
     77 
     78 	log = 0;
     79 	while (n > 0) {
     80 		++log;
     81 		n /= 2;
     82 	}
     83 	return log - 1;
     84 }
     85 
     86 enum coalesce_returncodes {
     87 	COALESCE_OK = 0,
     88 	COALESCE_NOINODE,
     89 	COALESCE_TOOSMALL,
     90 	COALESCE_BADSIZE,
     91 	COALESCE_BADBLOCKSIZE,
     92 	COALESCE_NOMEM,
     93 	COALESCE_BADBMAPV,
     94 	COALESCE_NOTWORTHIT,
     95 	COALESCE_NOTHINGLEFT,
     96 	COALESCE_NOTHINGLEFT2,
     97 	COALESCE_EIO,
     98 
     99 	COALESCE_MAXERROR
    100 };
    101 
    102 char *coalesce_return[] = {
    103 	"Successfully coalesced",
    104 	"File not in use or inode not found",
    105 	"Not large enough to coalesce",
    106 	"Negative size",
    107 	"Not enough blocks to account for size",
    108 	"Malloc failed",
    109 	"lfs_bmapv failed",
    110 	"Not broken enough to fix",
    111 	"Too many blocks not found",
    112 	"Too many blocks found in active segments",
    113 	"I/O error",
    114 
    115 	"No such error"
    116 };
    117 
    118 /*
    119  * Find out if this inode's data blocks are discontinuous; if they are,
    120  * rewrite them using lfs_markv.  Return the number of inodes rewritten.
    121  */
    122 int clean_inode(struct fs_info *fsp, ino_t ino)
    123 {
    124 	int i, error;
    125 	BLOCK_INFO_15 *bip = NULL, *tbip;
    126 	struct dinode *dip;
    127 	int nb, onb, noff;
    128 	ufs_daddr_t toff;
    129 	struct lfs *lfsp;
    130 	int bps;
    131 	SEGUSE *sup;
    132 	int retval;
    133 
    134 	lfsp = &fsp->fi_lfs;
    135 
    136         dip = get_dinode(fsp, ino);
    137 	if (dip == NULL)
    138 		return COALESCE_NOINODE;
    139 
    140 	/* Compute file block size, set up for lfs_bmapv */
    141 	onb = nb = lblkno(lfsp, dip->di_size);
    142 
    143 	/* XXX for now, don't do any file small enough to have fragments */
    144 	if (nb < NDADDR)
    145 		return COALESCE_TOOSMALL;
    146 
    147 	/* Sanity checks */
    148 	if (dip->di_size < 0) {
    149 		if (debug)
    150 			syslog(LOG_DEBUG, "ino %d, negative size (%lld)",
    151 				ino, (long long)dip->di_size);
    152 		return COALESCE_BADSIZE;
    153 	}
    154 	if (nb > dip->di_blocks) {
    155 		if (debug)
    156 			syslog(LOG_DEBUG, "ino %d, computed blocks %d > held blocks %d",
    157 				ino, nb, dip->di_blocks);
    158 		return COALESCE_BADBLOCKSIZE;
    159 	}
    160 
    161 	bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb);
    162 	if (bip == NULL) {
    163 		syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb);
    164 		return COALESCE_NOMEM;
    165 	}
    166 	for (i = 0; i < nb; i++) {
    167 		memset(bip + i, 0, sizeof(BLOCK_INFO_15));
    168 		bip[i].bi_inode = ino;
    169 		bip[i].bi_lbn = i;
    170 		bip[i].bi_version = dip->di_gen;
    171 		/* Don't set the size, but let lfs_bmap fill it in */
    172 	}
    173 	if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) {
    174                 syslog(LOG_WARNING, "lfs_bmapv: %m");
    175 		retval = COALESCE_BADBMAPV;
    176 		goto out;
    177 	}
    178 #if 0
    179 	for (i = 0; i < nb; i++) {
    180 		printf("bi_size = %d, bi_ino = %d, "
    181 		    "bi_lbn = %d, bi_daddr = %d\n",
    182 		    bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
    183 		    bip[i].bi_daddr);
    184 	}
    185 #endif
    186 	noff = toff = 0;
    187 	for (i = 1; i < nb; i++) {
    188 		if (bip[i].bi_daddr != bip[i - 1].bi_daddr + lfsp->lfs_frag)
    189 			++noff;
    190 		toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
    191 		    - lfsp->lfs_frag) >> lfsp->lfs_fbshift;
    192 	}
    193 
    194 	/*
    195 	 * If this file is not discontinuous, there's no point in rewriting it.
    196          *
    197          * Explicitly allow a certain amount of discontinuity, since large
    198          * files will be broken among segments and medium-sized files
    199          * can have a break or two and it's okay.
    200 	 */
    201 	if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
    202 	    segtod(lfsp, noff) * 2 < nb) {
    203 		retval = COALESCE_NOTWORTHIT;
    204 		goto out;
    205 	} else if (debug)
    206 		syslog(LOG_DEBUG, "ino %d total discontinuity "
    207 			"%d (%d) for %d blocks", ino, noff, toff, nb);
    208 
    209 	/* Search for blocks in active segments; don't move them. */
    210 	for (i = 0; i < nb; i++) {
    211 		if (bip[i].bi_daddr <= 0)
    212 			continue;
    213 		sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
    214 				dtosn(lfsp, bip[i].bi_daddr));
    215 		if (sup->su_flags & SEGUSE_ACTIVE)
    216 			bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
    217 	}
    218         /*
    219 	 * Get rid of any we've marked dead.  If this is an older
    220 	 * kernel that doesn't have lfs_bmapv fill in the block
    221 	 * sizes, we'll toss everything here.
    222 	 */
    223 	toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL);
    224         if (nb && tossdead(NULL, bip + nb - 1, NULL))
    225                 --nb;
    226         if (nb == 0) {
    227 		retval = COALESCE_NOTHINGLEFT;
    228 		goto out;
    229 	}
    230 
    231 	/*
    232 	 * We may have tossed enough blocks that it is no longer worthwhile
    233 	 * to rewrite this inode.
    234 	 */
    235 	if (onb - nb > log2int(onb)) {
    236 		if (debug)
    237 			syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
    238 		return COALESCE_NOTHINGLEFT2;
    239 	}
    240 
    241         /*
    242 	 * We are going to rewrite this inode.
    243 	 * For any remaining blocks, read in their contents.
    244 	 */
    245 	for (i = 0; i < nb; i++) {
    246 		bip[i].bi_bp = malloc(bip[i].bi_size);
    247 		if (bip[i].bi_bp == NULL) {
    248 			syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
    249 			    bip[i].bi_size);
    250 			retval = COALESCE_NOMEM;
    251 			goto out;
    252 		}
    253                 if (get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size,
    254 		    bip[i].bi_daddr) != bip[i].bi_size) {
    255 			retval = COALESCE_EIO;
    256 			goto out;
    257 		}
    258 	}
    259 	if (debug)
    260 		syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb);
    261 
    262 	/*
    263 	 * Write in segment-sized chunks.  If at any point we'd write more
    264 	 * than half of the available segments, sleep until that's not
    265 	 * true any more.
    266 	 */
    267 	bps = segtod(lfsp, 1);
    268 	for (tbip = bip; tbip < bip + nb; tbip += bps) {
    269 		while (fsp->fi_cip->clean < 4) {
    270 			lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL);
    271 			reread_fs_info(fsp, do_mmap);
    272 			/* XXX start over? */
    273 		}
    274 		lfs_markv(&fsp->fi_statfsp->f_fsid, tbip,
    275                           (tbip + bps < bip + nb ? bps : nb % bps));
    276 	}
    277 
    278 	retval = COALESCE_OK;
    279 out:
    280 	if (bip) {
    281 		for (i = 0; i < onb; i++)
    282 			if (bip[i].bi_bp)
    283 				free(bip[i].bi_bp);
    284 		free(bip);
    285 	}
    286 	return retval;
    287 }
    288 
    289 /*
    290  * Try coalescing every inode in the filesystem.
    291  * Return the number of inodes actually altered.
    292  */
    293 int clean_all_inodes(struct fs_info *fsp)
    294 {
    295 	int i, r;
    296 	int totals[COALESCE_MAXERROR];
    297 
    298 	memset(totals, 0, sizeof(totals));
    299 	for (i = 0; i < fsp->fi_ifile_count; i++) {
    300 		r = clean_inode(fsp, i);
    301 		++totals[r];
    302 	}
    303 
    304 	for (i = 0; i < COALESCE_MAXERROR; i++)
    305 		if (totals[i])
    306 			syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
    307 				totals[i]);
    308 
    309 	return totals[COALESCE_OK];
    310 }
    311 
    312 int fork_coalesce(struct fs_info *fsp)
    313 {
    314 	static pid_t childpid;
    315 	int num;
    316 
    317 	reread_fs_info(fsp, do_mmap);
    318 
    319 	if (childpid) {
    320      		if (waitpid(childpid, NULL, WNOHANG) == childpid)
    321 			childpid = 0;
    322 	}
    323 	if (childpid && kill(childpid, 0) >= 0) {
    324 		/* already running a coalesce process */
    325 		if (debug)
    326 			syslog(LOG_DEBUG, "coalescing already in progress");
    327 		return 0;
    328 	}
    329 	childpid = fork();
    330 	if (childpid < 0) {
    331 		syslog(LOG_ERR, "fork: %m");
    332 		return 0;
    333 	} else if (childpid == 0) {
    334 		syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
    335 		num = clean_all_inodes(fsp);
    336 		syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
    337 		exit(0);
    338 	}
    339 	return 0;
    340 }
    341