Home | History | Annotate | Line # | Download | only in lfs_cleanerd
coalesce.c revision 1.10
      1 /*      $NetBSD: coalesce.c,v 1.10 2005/08/19 02:06:29 christos Exp $  */
      2 
      3 /*-
      4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *      This product includes software developed by the NetBSD
     21  *      Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/param.h>
     40 #include <sys/mount.h>
     41 #include <sys/time.h>
     42 #include <sys/resource.h>
     43 #include <sys/types.h>
     44 #include <sys/wait.h>
     45 #include <sys/mman.h>
     46 
     47 #include <ufs/ufs/dinode.h>
     48 #include <ufs/lfs/lfs.h>
     49 
     50 #include <fcntl.h>
     51 #include <signal.h>
     52 #include <stdio.h>
     53 #include <stdlib.h>
     54 #include <string.h>
     55 #include <time.h>
     56 #include <unistd.h>
     57 #include <util.h>
     58 #include <errno.h>
     59 #include <err.h>
     60 
     61 #include <syslog.h>
     62 
     63 #include "clean.h"
     64 
     65 extern int debug, do_mmap;
     66 
     67 static int
     68 tossdead(const void *client, const void *a, const void *b)
     69 {
     70 	return (((BLOCK_INFO *)a)->bi_daddr <= 0 ||
     71 		((BLOCK_INFO *)a)->bi_size == 0);
     72 }
     73 
     74 static int log2int(int n)
     75 {
     76 	int log;
     77 
     78 	log = 0;
     79 	while (n > 0) {
     80 		++log;
     81 		n /= 2;
     82 	}
     83 	return log - 1;
     84 }
     85 
     86 enum coalesce_returncodes {
     87 	COALESCE_OK = 0,
     88 	COALESCE_NOINODE,
     89 	COALESCE_TOOSMALL,
     90 	COALESCE_BADSIZE,
     91 	COALESCE_BADBLOCKSIZE,
     92 	COALESCE_NOMEM,
     93 	COALESCE_BADBMAPV,
     94 	COALESCE_NOTWORTHIT,
     95 	COALESCE_NOTHINGLEFT,
     96 	COALESCE_NOTHINGLEFT2,
     97 	COALESCE_EIO,
     98 
     99 	COALESCE_MAXERROR
    100 };
    101 
    102 char *coalesce_return[] = {
    103 	"Successfully coalesced",
    104 	"File not in use or inode not found",
    105 	"Not large enough to coalesce",
    106 	"Negative size",
    107 	"Not enough blocks to account for size",
    108 	"Malloc failed",
    109 	"LFCNBMAPV failed",
    110 	"Not broken enough to fix",
    111 	"Too many blocks not found",
    112 	"Too many blocks found in active segments",
    113 	"I/O error",
    114 
    115 	"No such error"
    116 };
    117 
    118 /*
    119  * Find out if this inode's data blocks are discontinuous; if they are,
    120  * rewrite them using markv.  Return the number of inodes rewritten.
    121  */
    122 int clean_inode(struct fs_info *fsp, ino_t ino)
    123 {
    124 	int i, error;
    125 	BLOCK_INFO *bip = NULL, *tbip;
    126 	struct ufs1_dinode *dip;
    127 	int nb, onb, noff;
    128 	daddr_t toff;
    129 	struct lfs *lfsp;
    130 	int bps;
    131 	SEGUSE *sup;
    132 	int retval;
    133 
    134 	lfsp = &fsp->fi_lfs;
    135 
    136         dip = get_dinode(fsp, ino);
    137 	if (dip == NULL)
    138 		return COALESCE_NOINODE;
    139 
    140 	/* Compute file block size, set up for bmapv */
    141 	onb = nb = lblkno(lfsp, dip->di_size);
    142 
    143 	/* XXX for now, don't do any file small enough to have fragments */
    144 	if (nb < NDADDR)
    145 		return COALESCE_TOOSMALL;
    146 
    147 	/* Sanity checks */
    148 	if (dip->di_size < 0) {
    149 		if (debug)
    150 			syslog(LOG_DEBUG, "ino %llu, negative size (%lld)",
    151 			    (unsigned long long)ino, (long long)dip->di_size);
    152 		return COALESCE_BADSIZE;
    153 	}
    154 	if (nb > dip->di_blocks) {
    155 		if (debug)
    156 			syslog(LOG_DEBUG, "ino %llu, computed blocks %d "
    157 			    "> held blocks %d",
    158 			    (unsigned long long)ino, nb, dip->di_blocks);
    159 		return COALESCE_BADBLOCKSIZE;
    160 	}
    161 
    162 	bip = (BLOCK_INFO *)malloc(sizeof(BLOCK_INFO) * nb);
    163 	if (bip == NULL) {
    164 		syslog(LOG_WARNING, "ino %llu, %d blocks: %m",
    165 		    (unsigned long long)ino, nb);
    166 		return COALESCE_NOMEM;
    167 	}
    168 	for (i = 0; i < nb; i++) {
    169 		memset(bip + i, 0, sizeof(BLOCK_INFO));
    170 		bip[i].bi_inode = ino;
    171 		bip[i].bi_lbn = i;
    172 		bip[i].bi_version = dip->di_gen;
    173 		/* Don't set the size, but let lfs_bmap fill it in */
    174 	}
    175 	if ((error = lfs_bmapv_emul(ifile_fd, bip, nb)) < 0) {
    176                 syslog(LOG_WARNING, "LFCNBMAPV: %m");
    177 		retval = COALESCE_BADBMAPV;
    178 		goto out;
    179 	}
    180 #if 0
    181 	for (i = 0; i < nb; i++) {
    182 		printf("bi_size = %d, bi_ino = %d, "
    183 		    "bi_lbn = %d, bi_daddr = %d\n",
    184 		    bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
    185 		    bip[i].bi_daddr);
    186 	}
    187 #endif
    188 	noff = toff = 0;
    189 	for (i = 1; i < nb; i++) {
    190 		if (bip[i].bi_daddr != bip[i - 1].bi_daddr + lfsp->lfs_frag)
    191 			++noff;
    192 		toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
    193 		    - lfsp->lfs_frag) >> lfsp->lfs_fbshift;
    194 	}
    195 
    196 	/*
    197 	 * If this file is not discontinuous, there's no point in rewriting it.
    198          *
    199          * Explicitly allow a certain amount of discontinuity, since large
    200          * files will be broken among segments and medium-sized files
    201          * can have a break or two and it's okay.
    202 	 */
    203 	if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
    204 	    segtod(lfsp, noff) * 2 < nb) {
    205 		retval = COALESCE_NOTWORTHIT;
    206 		goto out;
    207 	} else if (debug)
    208 		syslog(LOG_DEBUG, "ino %llu total discontinuity "
    209 		    "%d (%lld) for %d blocks", (unsigned long long)ino,
    210 		    noff, (long long)toff, nb);
    211 
    212 	/* Search for blocks in active segments; don't move them. */
    213 	for (i = 0; i < nb; i++) {
    214 		if (bip[i].bi_daddr <= 0)
    215 			continue;
    216 		sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
    217 				dtosn(lfsp, bip[i].bi_daddr));
    218 		if (sup->su_flags & SEGUSE_ACTIVE)
    219 			bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
    220 	}
    221         /*
    222 	 * Get rid of any we've marked dead.  If this is an older
    223 	 * kernel that doesn't have bmapv fill in the block
    224 	 * sizes, we'll toss everything here.
    225 	 */
    226 	toss(bip, &nb, sizeof(BLOCK_INFO), tossdead, NULL);
    227         if (nb && tossdead(NULL, bip + nb - 1, NULL))
    228                 --nb;
    229         if (nb == 0) {
    230 		retval = COALESCE_NOTHINGLEFT;
    231 		goto out;
    232 	}
    233 
    234 	/*
    235 	 * We may have tossed enough blocks that it is no longer worthwhile
    236 	 * to rewrite this inode.
    237 	 */
    238 	if (onb - nb > log2int(onb)) {
    239 		if (debug)
    240 			syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
    241 		return COALESCE_NOTHINGLEFT2;
    242 	}
    243 
    244         /*
    245 	 * We are going to rewrite this inode.
    246 	 * For any remaining blocks, read in their contents.
    247 	 */
    248 	for (i = 0; i < nb; i++) {
    249 		bip[i].bi_bp = malloc(bip[i].bi_size);
    250 		if (bip[i].bi_bp == NULL) {
    251 			syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
    252 			    bip[i].bi_size);
    253 			retval = COALESCE_NOMEM;
    254 			goto out;
    255 		}
    256                 if (get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size,
    257 		    bip[i].bi_daddr) != bip[i].bi_size) {
    258 			retval = COALESCE_EIO;
    259 			goto out;
    260 		}
    261 	}
    262 	if (debug)
    263 		syslog(LOG_DEBUG, "ino %llu markv %d blocks",
    264 		    (unsigned long long)ino, nb);
    265 
    266 	/*
    267 	 * Write in segment-sized chunks.  If at any point we'd write more
    268 	 * than half of the available segments, sleep until that's not
    269 	 * true any more.
    270 	 */
    271 	bps = segtod(lfsp, 1);
    272 	for (tbip = bip; tbip < bip + nb; tbip += bps) {
    273 		while (fsp->fi_cip->clean < 4) {
    274 			lfs_segwait_emul(ifile_fd, NULL);
    275 			reread_fs_info(fsp, do_mmap);
    276 			/* XXX start over? */
    277 		}
    278 		lfs_markv_emul(ifile_fd, tbip,
    279                           (tbip + bps < bip + nb ? bps : nb % bps));
    280 	}
    281 
    282 	retval = COALESCE_OK;
    283 out:
    284 	if (bip) {
    285 		for (i = 0; i < onb; i++)
    286 			if (bip[i].bi_bp)
    287 				free(bip[i].bi_bp);
    288 		free(bip);
    289 	}
    290 	return retval;
    291 }
    292 
    293 /*
    294  * Try coalescing every inode in the filesystem.
    295  * Return the number of inodes actually altered.
    296  */
    297 int clean_all_inodes(struct fs_info *fsp)
    298 {
    299 	int i, r;
    300 	int totals[COALESCE_MAXERROR];
    301 
    302 	memset(totals, 0, sizeof(totals));
    303 	for (i = 0; i < fsp->fi_ifile_count; i++) {
    304 		r = clean_inode(fsp, i);
    305 		++totals[r];
    306 	}
    307 
    308 	for (i = 0; i < COALESCE_MAXERROR; i++)
    309 		if (totals[i])
    310 			syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
    311 				totals[i]);
    312 
    313 	return totals[COALESCE_OK];
    314 }
    315 
    316 int fork_coalesce(struct fs_info *fsp)
    317 {
    318 	static pid_t childpid;
    319 	int num;
    320 
    321 	reread_fs_info(fsp, do_mmap);
    322 
    323 	if (childpid) {
    324      		if (waitpid(childpid, NULL, WNOHANG) == childpid)
    325 			childpid = 0;
    326 	}
    327 	if (childpid && kill(childpid, 0) >= 0) {
    328 		/* already running a coalesce process */
    329 		if (debug)
    330 			syslog(LOG_DEBUG, "coalescing already in progress");
    331 		return 0;
    332 	}
    333 	childpid = fork();
    334 	if (childpid < 0) {
    335 		syslog(LOG_ERR, "fork: %m");
    336 		return 0;
    337 	} else if (childpid == 0) {
    338 		syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
    339 		num = clean_all_inodes(fsp);
    340 		syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
    341 		exit(0);
    342 	}
    343 	return 0;
    344 }
    345