Home | History | Annotate | Line # | Download | only in lfs_cleanerd
coalesce.c revision 1.14
      1 /*      $NetBSD: coalesce.c,v 1.14 2007/10/08 21:41:12 ad Exp $  */
      2 
      3 /*-
      4  * Copyright (c) 2002, 2005 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *      This product includes software developed by the NetBSD
     21  *      Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/param.h>
     40 #include <sys/mount.h>
     41 #include <sys/time.h>
     42 #include <sys/resource.h>
     43 #include <sys/types.h>
     44 #include <sys/wait.h>
     45 #include <sys/mman.h>
     46 
     47 #include <ufs/ufs/dinode.h>
     48 #include <ufs/lfs/lfs.h>
     49 
     50 #include <fcntl.h>
     51 #include <signal.h>
     52 #include <stdio.h>
     53 #include <stdlib.h>
     54 #include <string.h>
     55 #include <time.h>
     56 #include <unistd.h>
     57 #include <util.h>
     58 #include <errno.h>
     59 #include <err.h>
     60 
     61 #include <syslog.h>
     62 
     63 #include "bufcache.h"
     64 #include "vnode.h"
     65 #include "cleaner.h"
     66 
     67 extern int debug, do_mmap;
     68 
     69 int log2int(int n)
     70 {
     71 	int log;
     72 
     73 	log = 0;
     74 	while (n > 0) {
     75 		++log;
     76 		n >>= 1;
     77 	}
     78 	return log - 1;
     79 }
     80 
     81 enum coalesce_returncodes {
     82 	COALESCE_OK = 0,
     83 	COALESCE_NOINODE,
     84 	COALESCE_TOOSMALL,
     85 	COALESCE_BADSIZE,
     86 	COALESCE_BADBLOCKSIZE,
     87 	COALESCE_NOMEM,
     88 	COALESCE_BADBMAPV,
     89 	COALESCE_BADMARKV,
     90 	COALESCE_NOTWORTHIT,
     91 	COALESCE_NOTHINGLEFT,
     92 	COALESCE_EIO,
     93 
     94 	COALESCE_MAXERROR
     95 };
     96 
     97 char *coalesce_return[] = {
     98 	"Successfully coalesced",
     99 	"File not in use or inode not found",
    100 	"Not large enough to coalesce",
    101 	"Negative size",
    102 	"Not enough blocks to account for size",
    103 	"Malloc failed",
    104 	"LFCNBMAPV failed",
    105 	"Not broken enough to fix",
    106 	"Too many blocks not found",
    107 	"Too many blocks found in active segments",
    108 	"I/O error",
    109 
    110 	"No such error"
    111 };
    112 
    113 static struct ufs1_dinode *
    114 get_dinode(struct clfs *fs, ino_t ino)
    115 {
    116 	IFILE *ifp;
    117 	daddr_t daddr;
    118 	struct ubuf *bp;
    119 	struct ufs1_dinode *dip, *r;
    120 
    121 	lfs_ientry(&ifp, fs, ino, &bp);
    122 	daddr = ifp->if_daddr;
    123 	brelse(bp, 0);
    124 
    125 	if (daddr == 0x0)
    126 		return NULL;
    127 
    128 	bread(fs->clfs_devvp, daddr, fs->lfs_ibsize, NOCRED, &bp);
    129 	for (dip = (struct ufs1_dinode *)bp->b_data;
    130 	     dip < (struct ufs1_dinode *)(bp->b_data + fs->lfs_ibsize); dip++)
    131 		if (dip->di_inumber == ino) {
    132 			r = (struct ufs1_dinode *)malloc(sizeof(*r));
    133 			memcpy(r, dip, sizeof(*r));
    134 			brelse(bp, 0);
    135 			return r;
    136 		}
    137 	brelse(bp, 0);
    138 	return NULL;
    139 }
    140 
    141 /*
    142  * Find out if this inode's data blocks are discontinuous; if they are,
    143  * rewrite them using markv.  Return the number of inodes rewritten.
    144  */
    145 static int
    146 clean_inode(struct clfs *fs, ino_t ino)
    147 {
    148 	BLOCK_INFO *bip = NULL, *tbip;
    149 	CLEANERINFO cip;
    150 	struct ubuf *bp;
    151 	struct ufs1_dinode *dip;
    152 	struct clfs_seguse *sup;
    153 	struct lfs_fcntl_markv /* {
    154 		BLOCK_INFO *blkiov;
    155 		int blkcnt;
    156 	} */ lim;
    157 	daddr_t toff;
    158 	int i;
    159 	int nb, onb, noff;
    160 	int retval;
    161 	int bps;
    162 
    163 	dip = get_dinode(fs, ino);
    164 	if (dip == NULL)
    165 		return COALESCE_NOINODE;
    166 
    167 	/* Compute file block size, set up for bmapv */
    168 	onb = nb = lblkno(fs, dip->di_size);
    169 
    170 	/* XXX for now, don't do any file small enough to have fragments */
    171 	if (nb < NDADDR) {
    172 		free(dip);
    173 		return COALESCE_TOOSMALL;
    174 	}
    175 
    176 	/* Sanity checks */
    177 	if (dip->di_size < 0) {
    178 		dlog("ino %d, negative size (%" PRId64 ")", ino, dip->di_size);
    179 		free(dip);
    180 		return COALESCE_BADSIZE;
    181 	}
    182 	if (nb > dip->di_blocks) {
    183 		dlog("ino %d, computed blocks %d > held blocks %d", ino, nb,
    184 		     dip->di_blocks);
    185 		free(dip);
    186 		return COALESCE_BADBLOCKSIZE;
    187 	}
    188 
    189 	bip = (BLOCK_INFO *)malloc(sizeof(BLOCK_INFO) * nb);
    190 	if (bip == NULL) {
    191 		syslog(LOG_WARNING, "ino %llu, %d blocks: %m",
    192 		    (unsigned long long)ino, nb);
    193 		free(dip);
    194 		return COALESCE_NOMEM;
    195 	}
    196 	for (i = 0; i < nb; i++) {
    197 		memset(bip + i, 0, sizeof(BLOCK_INFO));
    198 		bip[i].bi_inode = ino;
    199 		bip[i].bi_lbn = i;
    200 		bip[i].bi_version = dip->di_gen;
    201 		/* Don't set the size, but let lfs_bmap fill it in */
    202 	}
    203 	lim.blkiov = bip;
    204 	lim.blkcnt = nb;
    205 	if (fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim) < 0) {
    206 		syslog(LOG_WARNING, "%s: coalesce: LFCNBMAPV: %m",
    207 		       fs->lfs_fsmnt);
    208 		retval = COALESCE_BADBMAPV;
    209 		goto out;
    210 	}
    211 #if 0
    212 	for (i = 0; i < nb; i++) {
    213 		printf("bi_size = %d, bi_ino = %d, "
    214 		    "bi_lbn = %d, bi_daddr = %d\n",
    215 		    bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
    216 		    bip[i].bi_daddr);
    217 	}
    218 #endif
    219 	noff = toff = 0;
    220 	for (i = 1; i < nb; i++) {
    221 		if (bip[i].bi_daddr != bip[i - 1].bi_daddr + fs->lfs_frag)
    222 			++noff;
    223 		toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
    224 		    - fs->lfs_frag) >> fs->lfs_fbshift;
    225 	}
    226 
    227 	/*
    228 	 * If this file is not discontinuous, there's no point in rewriting it.
    229 	 *
    230 	 * Explicitly allow a certain amount of discontinuity, since large
    231 	 * files will be broken among segments and medium-sized files
    232 	 * can have a break or two and it's okay.
    233 	 */
    234 	if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
    235 	    segtod(fs, noff) * 2 < nb) {
    236 		retval = COALESCE_NOTWORTHIT;
    237 		goto out;
    238 	} else if (debug)
    239 		syslog(LOG_DEBUG, "ino %llu total discontinuity "
    240 		    "%d (%lld) for %d blocks", (unsigned long long)ino,
    241 		    noff, (long long)toff, nb);
    242 
    243 	/* Search for blocks in active segments; don't move them. */
    244 	for (i = 0; i < nb; i++) {
    245 		if (bip[i].bi_daddr <= 0)
    246 			continue;
    247 		sup = &fs->clfs_segtab[dtosn(fs, bip[i].bi_daddr)];
    248 		if (sup->flags & SEGUSE_ACTIVE)
    249 			bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
    250 	}
    251 
    252 	/*
    253 	 * Get rid of any blocks we've marked dead.  If this is an older
    254 	 * kernel that doesn't have bmapv fill in the block sizes, we'll
    255 	 * toss everything here.
    256 	 */
    257 	onb = nb;
    258 	toss_old_blocks(fs, &bip, &nb, NULL);
    259 	nb = i;
    260 
    261 	/*
    262 	 * We may have tossed enough blocks that it is no longer worthwhile
    263 	 * to rewrite this inode.
    264 	 */
    265 	if (nb == 0 || onb - nb > log2int(onb)) {
    266 		if (debug)
    267 			syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
    268 		retval = COALESCE_NOTHINGLEFT;
    269 		goto out;
    270 	}
    271 
    272 	/*
    273 	 * We are going to rewrite this inode.
    274 	 * For any remaining blocks, read in their contents.
    275 	 */
    276 	for (i = 0; i < nb; i++) {
    277 		bip[i].bi_bp = malloc(bip[i].bi_size);
    278 		if (bip[i].bi_bp == NULL) {
    279 			syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
    280 			    bip[i].bi_size);
    281 			retval = COALESCE_NOMEM;
    282 			goto out;
    283 		}
    284 
    285 		if (pread(fs->clfs_devfd, bip[i].bi_bp, bip[i].bi_size,
    286 			  fsbtob(fs, bip[i].bi_daddr)) < 0) {
    287 			retval = COALESCE_EIO;
    288 			goto out;
    289 		}
    290 	}
    291 	if (debug)
    292 		syslog(LOG_DEBUG, "ino %llu markv %d blocks",
    293 		    (unsigned long long)ino, nb);
    294 
    295 	/*
    296 	 * Write in segment-sized chunks.  If at any point we'd write more
    297 	 * than half of the available segments, sleep until that's not
    298 	 * true any more.
    299 	 */
    300 	bps = segtod(fs, 1);
    301 	for (tbip = bip; tbip < bip + nb; tbip += bps) {
    302 		do {
    303 			bread(fs->lfs_ivnode, 0, fs->lfs_bsize, NOCRED, &bp);
    304 			cip = *(CLEANERINFO *)bp->b_data;
    305 			brelse(bp, B_INVAL);
    306 
    307 			if (cip.clean < 4) /* XXX magic number 4 */
    308 				fcntl(fs->clfs_ifilefd, LFCNSEGWAIT, NULL);
    309 		} while(cip.clean < 4);
    310 
    311 		lim.blkiov = tbip;
    312 		lim.blkcnt = (tbip + bps < bip + nb ? bps : nb % bps);
    313 		if (fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim) < 0) {
    314 			retval = COALESCE_BADMARKV;
    315 			goto out;
    316 		}
    317 	}
    318 
    319 	retval = COALESCE_OK;
    320 out:
    321 	free(dip);
    322 	if (bip) {
    323 		for (i = 0; i < onb; i++)
    324 			if (bip[i].bi_bp)
    325 				free(bip[i].bi_bp);
    326 		free(bip);
    327 	}
    328 	return retval;
    329 }
    330 
    331 /*
    332  * Try coalescing every inode in the filesystem.
    333  * Return the number of inodes actually altered.
    334  */
    335 int clean_all_inodes(struct clfs *fs)
    336 {
    337 	int i, r, maxino;
    338 	int totals[COALESCE_MAXERROR];
    339 	struct stat st;
    340 
    341 	memset(totals, 0, sizeof(totals));
    342 
    343 	fstat(fs->clfs_ifilefd, &st);
    344 	maxino = fs->lfs_ifpb * (st.st_size >> fs->lfs_bshift) -
    345 		fs->lfs_segtabsz - fs->lfs_cleansz;
    346 
    347 	for (i = 0; i < maxino; i++) {
    348 		r = clean_inode(fs, i);
    349 		++totals[r];
    350 	}
    351 
    352 	for (i = 0; i < COALESCE_MAXERROR; i++)
    353 		if (totals[i])
    354 			syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
    355 			       totals[i]);
    356 
    357 	return totals[COALESCE_OK];
    358 }
    359 
    360 /*
    361  * Fork a child process to coalesce this fs.
    362  */
    363 int
    364 fork_coalesce(struct clfs *fs)
    365 {
    366 	static pid_t childpid;
    367 	int num;
    368 
    369 	/*
    370 	 * If already running a coalescing child, don't start a new one.
    371 	 */
    372 	if (childpid) {
    373 		if (waitpid(childpid, NULL, WNOHANG) == childpid)
    374 			childpid = 0;
    375 	}
    376 	if (childpid && kill(childpid, 0) >= 0) {
    377 		/* already running a coalesce process */
    378 		if (debug)
    379 			syslog(LOG_DEBUG, "coalescing already in progress");
    380 		return 0;
    381 	}
    382 
    383 	/*
    384 	 * Fork a child and let the child coalease
    385 	 */
    386 	childpid = fork();
    387 	if (childpid < 0) {
    388 		syslog(LOG_ERR, "%s: fork to coaleasce: %m", fs->lfs_fsmnt);
    389 		return 0;
    390 	} else if (childpid == 0) {
    391 		syslog(LOG_NOTICE, "%s: new coalescing process, pid %d",
    392 		       fs->lfs_fsmnt, getpid());
    393 		num = clean_all_inodes(fs);
    394 		syslog(LOG_NOTICE, "%s: coalesced %d discontiguous inodes",
    395 		       fs->lfs_fsmnt, num);
    396 		exit(0);
    397 	}
    398 
    399 	return 0;
    400 }
    401