Home | History | Annotate | Line # | Download | only in lfs_cleanerd
coalesce.c revision 1.3
      1 /*      $NetBSD: coalesce.c,v 1.3 2002/06/14 05:21:21 perseant Exp $  */
      2 
      3 /*-
      4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *      This product includes software developed by the NetBSD
     21  *      Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/param.h>
     40 #include <sys/mount.h>
     41 #include <sys/time.h>
     42 #include <sys/resource.h>
     43 #include <sys/types.h>
     44 #include <sys/wait.h>
     45 #include <sys/mman.h>
     46 
     47 #include <ufs/ufs/dinode.h>
     48 #include <ufs/lfs/lfs.h>
     49 
     50 #include <fcntl.h>
     51 #include <signal.h>
     52 #include <stdio.h>
     53 #include <stdlib.h>
     54 #include <string.h>
     55 #include <time.h>
     56 #include <unistd.h>
     57 #include <util.h>
     58 #include <errno.h>
     59 #include <err.h>
     60 
     61 #include <syslog.h>
     62 
     63 #include "clean.h"
     64 
     65 extern int debug, do_mmap;
     66 
     67 static int
     68 tossdead(const void *client, const void *a, const void *b)
     69 {
     70 	return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 ||
     71 		((BLOCK_INFO_15 *)a)->bi_size == 0);
     72 }
     73 
     74 static int log2int(int n)
     75 {
     76 	int log;
     77 
     78 	log = 0;
     79 	while (n > 0) {
     80 		++log;
     81 		n /= 2;
     82 	}
     83 	return log - 1;
     84 }
     85 
     86 enum coalesce_returncodes {
     87 	COALESCE_OK = 0,
     88 	COALESCE_NOINODE,
     89 	COALESCE_TOOSMALL,
     90 	COALESCE_BADSIZE,
     91 	COALESCE_BADBLOCKSIZE,
     92 	COALESCE_NOMEM,
     93 	COALESCE_BADBMAPV,
     94 	COALESCE_NOTWORTHIT,
     95 	COALESCE_NOTHINGLEFT,
     96 	COALESCE_NOTHINGLEFT2,
     97 
     98 	COALESCE_MAXERROR
     99 };
    100 
    101 char *coalesce_return[] = {
    102 	"Successfully coalesced",
    103 	"File not in use or inode not found",
    104 	"Not large enough to coalesce",
    105 	"Negative size",
    106 	"Not enough blocks to account for size",
    107 	"Malloc failed",
    108 	"lfs_bmapv failed",
    109 	"Not broken enough to fix",
    110 	"Too many blocks not found",
    111 	"Too many blocks found in active segments",
    112 
    113 	"No such error"
    114 };
    115 
    116 /*
    117  * Find out if this inode's data blocks are discontinuous; if they are,
    118  * rewrite them using lfs_markv.  Return the number of inodes rewritten.
    119  */
    120 int clean_inode(struct fs_info *fsp, ino_t ino)
    121 {
    122 	int i, error;
    123 	BLOCK_INFO_15 *bip, *tbip;
    124 	struct dinode *dip;
    125 	int nb, onb, noff;
    126 	ufs_daddr_t toff;
    127 	struct lfs *lfsp;
    128 	int bps;
    129         SEGUSE *sup;
    130 
    131 	lfsp = &fsp->fi_lfs;
    132 
    133         dip = get_dinode(fsp, ino);
    134 	if (dip == NULL)
    135 		return COALESCE_NOINODE;
    136 
    137 	/* Compute file block size, set up for lfs_bmapv */
    138 	onb = nb = btofsb(lfsp, dip->di_size);
    139 
    140 	/* XXX for now, don't do any file small enough to have fragments */
    141 	if (nb < NDADDR)
    142 		return COALESCE_TOOSMALL;
    143 
    144 	/* Sanity checks */
    145 	if (dip->di_size < 0) {
    146 		if (debug)
    147 			syslog(LOG_DEBUG, "ino %d, negative size (%lld)",
    148 				ino, (long long)dip->di_size);
    149 		return COALESCE_BADSIZE;
    150 	}
    151 	if (nb > dip->di_blocks) {
    152 		if (debug)
    153 			syslog(LOG_DEBUG, "ino %d, computed blocks %d > held blocks %d",
    154 				ino, nb, dip->di_blocks);
    155 		return COALESCE_BADBLOCKSIZE;
    156 	}
    157 
    158 	bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb);
    159 	if (bip == NULL) {
    160 		syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb);
    161 		return COALESCE_NOMEM;
    162 	}
    163 	for (i = 0; i < nb; i++) {
    164 		memset(bip + i, 0, sizeof(BLOCK_INFO_15));
    165 		bip[i].bi_inode = ino;
    166 		bip[i].bi_lbn = i;
    167 		bip[i].bi_version = dip->di_gen;
    168 		/* Don't set the size, but let lfs_bmap fill it in */
    169 	}
    170 	if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) {
    171                 syslog(LOG_WARNING, "lfs_bmapv: %m");
    172 		free(bip);
    173 		return COALESCE_BADBMAPV;
    174 	}
    175 	noff = toff = 0;
    176 	for (i = 1; i < nb; i++) {
    177 		if (bip[i].bi_daddr != bip[i - 1].bi_daddr + 1)
    178 			++noff;
    179 		toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr - 1);
    180 	}
    181 
    182 	/*
    183 	 * If this file is not discontinuous, there's no point in rewriting it.
    184          *
    185          * Explicitly allow a certain amount of discontinuity, since large
    186          * files will be broken among segments and medium-sized files
    187          * can have a break or two and it's okay.
    188 	 */
    189 	if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
    190 	    segtod(lfsp, noff) * 2 < nb) {
    191 		free(bip);
    192 		return COALESCE_NOTWORTHIT;
    193 	} else if (debug)
    194 		syslog(LOG_DEBUG, "ino %d total discontinuity "
    195 			"%d (%d) for %d blocks", ino, noff, toff, nb);
    196 
    197 	/* Search for blocks in active segments; don't move them. */
    198 	for (i = 0; i < nb; i++) {
    199 		if (bip[i].bi_daddr <= 0)
    200 			continue;
    201 		sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
    202 				dtosn(lfsp, bip[i].bi_daddr));
    203 		if (sup->su_flags & SEGUSE_ACTIVE)
    204 			bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
    205 	}
    206         /*
    207 	 * Get rid of any we've marked dead.  If this is an older
    208 	 * kernel that doesn't have lfs_bmapv fill in the block
    209 	 * sizes, we'll toss everything here.
    210 	 */
    211 	toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL);
    212         if (nb && tossdead(NULL, bip + nb - 1, NULL))
    213                 --nb;
    214         if (nb == 0) {
    215 		free(bip);
    216 		return COALESCE_NOTHINGLEFT;
    217 	}
    218 
    219 	/*
    220 	 * We may have tossed enough blocks that it is no longer worthwhile
    221 	 * to rewrite this inode.
    222 	 */
    223 	if (onb - nb > log2int(onb)) {
    224 		if (debug)
    225 			syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
    226 		return COALESCE_NOTHINGLEFT2;
    227 	}
    228 
    229         /*
    230 	 * We are going to rewrite this inode.
    231 	 * For any remaining blocks, read in their contents.
    232 	 */
    233 	for (i = 0; i < nb; i++) {
    234 		bip[i].bi_bp = malloc(bip[i].bi_size);
    235                 get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size, bip[i].bi_daddr);
    236 	}
    237 	if (debug)
    238 		syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb);
    239 
    240 	/*
    241 	 * Write in segment-sized chunks.  If at any point we'd write more
    242 	 * than half of the available segments, sleep until that's not
    243 	 * true any more.
    244 	 */
    245 	bps = segtod(lfsp, 1);
    246 	for (tbip = bip; tbip < bip + nb; tbip += bps) {
    247 		while (fsp->fi_cip->clean < 4) {
    248 			lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL);
    249 			reread_fs_info(fsp, do_mmap);
    250 			/* XXX start over? */
    251 		}
    252 		lfs_markv(&fsp->fi_statfsp->f_fsid, tbip,
    253                           (tbip + bps < bip + nb ? bps : nb % bps));
    254 	}
    255 
    256 	for (i = 0; i < nb; i++)
    257 		if (bip[i].bi_bp)
    258 			free(bip[i].bi_bp);
    259 	free(bip);
    260 	return COALESCE_OK;
    261 }
    262 
    263 /*
    264  * Try coalescing every inode in the filesystem.
    265  * Return the number of inodes actually altered.
    266  */
    267 int clean_all_inodes(struct fs_info *fsp)
    268 {
    269 	int i, r;
    270 	int totals[COALESCE_MAXERROR];
    271 
    272 	memset(totals, 0, sizeof(totals));
    273 	for (i = 0; i < fsp->fi_ifile_count; i++) {
    274 		r = clean_inode(fsp, i);
    275 		++totals[r];
    276 	}
    277 
    278 	for (i = 0; i < COALESCE_MAXERROR; i++)
    279 		if (totals[i])
    280 			syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
    281 				totals[i]);
    282 
    283 	return totals[COALESCE_OK];
    284 }
    285 
    286 int fork_coalesce(struct fs_info *fsp)
    287 {
    288 	static pid_t childpid;
    289 	int num;
    290 
    291 	reread_fs_info(fsp, do_mmap);
    292 
    293 	if (childpid) {
    294      		if (waitpid(childpid, NULL, WNOHANG) == childpid)
    295 			childpid = 0;
    296 	}
    297 	if (childpid && kill(childpid, 0) >= 0) {
    298 		/* already running a coalesce process */
    299 		if (debug)
    300 			syslog(LOG_DEBUG, "coalescing already in progress");
    301 		return 0;
    302 	}
    303 	childpid = fork();
    304 	if (childpid < 0) {
    305 		syslog(LOG_ERR, "fork: %m");
    306 		return 0;
    307 	} else if (childpid == 0) {
    308 		syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
    309 		num = clean_all_inodes(fsp);
    310 		syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
    311 		exit(0);
    312 	}
    313 	return 0;
    314 }
    315