Home | History | Annotate | Line # | Download | only in lfs_cleanerd
coalesce.c revision 1.11
      1 /*      $NetBSD: coalesce.c,v 1.11 2006/03/30 19:10:13 perseant Exp $  */
      2 
      3 /*-
      4  * Copyright (c) 2002, 2005 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Konrad E. Schroder <perseant (at) hhhh.org>.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *      This product includes software developed by the NetBSD
     21  *      Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/param.h>
     40 #include <sys/mount.h>
     41 #include <sys/time.h>
     42 #include <sys/resource.h>
     43 #include <sys/types.h>
     44 #include <sys/wait.h>
     45 #include <sys/mman.h>
     46 
     47 #include <ufs/ufs/dinode.h>
     48 #include <ufs/lfs/lfs.h>
     49 
     50 #include <fcntl.h>
     51 #include <signal.h>
     52 #include <stdio.h>
     53 #include <stdlib.h>
     54 #include <string.h>
     55 #include <time.h>
     56 #include <unistd.h>
     57 #include <util.h>
     58 #include <errno.h>
     59 #include <err.h>
     60 
     61 #include <syslog.h>
     62 
     63 #include "bufcache.h"
     64 #include "vnode.h"
     65 #include "cleaner.h"
     66 
     67 extern int debug, do_mmap;
     68 
     69 int log2int(int n)
     70 {
     71 	int log;
     72 
     73 	log = 0;
     74 	while (n > 0) {
     75 		++log;
     76 		n >>= 1;
     77 	}
     78 	return log - 1;
     79 }
     80 
     81 enum coalesce_returncodes {
     82 	COALESCE_OK = 0,
     83 	COALESCE_NOINODE,
     84 	COALESCE_TOOSMALL,
     85 	COALESCE_BADSIZE,
     86 	COALESCE_BADBLOCKSIZE,
     87 	COALESCE_NOMEM,
     88 	COALESCE_BADBMAPV,
     89 	COALESCE_BADMARKV,
     90 	COALESCE_NOTWORTHIT,
     91 	COALESCE_NOTHINGLEFT,
     92 	COALESCE_EIO,
     93 
     94 	COALESCE_MAXERROR
     95 };
     96 
     97 char *coalesce_return[] = {
     98 	"Successfully coalesced",
     99 	"File not in use or inode not found",
    100 	"Not large enough to coalesce",
    101 	"Negative size",
    102 	"Not enough blocks to account for size",
    103 	"Malloc failed",
    104 	"LFCNBMAPV failed",
    105 	"Not broken enough to fix",
    106 	"Too many blocks not found",
    107 	"Too many blocks found in active segments",
    108 	"I/O error",
    109 
    110 	"No such error"
    111 };
    112 
    113 static struct ufs1_dinode *
    114 get_dinode(struct clfs *fs, ino_t ino)
    115 {
    116 	IFILE *ifp;
    117 	daddr_t daddr;
    118 	struct ubuf *bp;
    119 	struct ufs1_dinode *dip, *r;
    120 
    121 	lfs_ientry(&ifp, fs, ino, &bp);
    122 	daddr = ifp->if_daddr;
    123 	brelse(bp);
    124 
    125 	if (daddr == 0x0)
    126 		return NULL;
    127 
    128 	bread(fs->clfs_devvp, daddr, fs->lfs_ibsize, NOCRED, &bp);
    129 	for (dip = (struct ufs1_dinode *)bp->b_data;
    130 	     dip < (struct ufs1_dinode *)(bp->b_data + fs->lfs_ibsize); dip++)
    131 		if (dip->di_inumber == ino) {
    132 			r = (struct ufs1_dinode *)malloc(sizeof(*r));
    133 			memcpy(r, dip, sizeof(*r));
    134 			brelse(bp);
    135 			return r;
    136 		}
    137 	brelse(bp);
    138 	return NULL;
    139 }
    140 
    141 /*
    142  * Find out if this inode's data blocks are discontinuous; if they are,
    143  * rewrite them using markv.  Return the number of inodes rewritten.
    144  */
    145 static int
    146 clean_inode(struct clfs *fs, ino_t ino)
    147 {
    148 	BLOCK_INFO *bip = NULL, *tbip;
    149 	CLEANERINFO cip;
    150 	struct ubuf *bp;
    151 	struct ufs1_dinode *dip;
    152 	struct clfs_seguse *sup;
    153 	struct lfs_fcntl_markv /* {
    154 		BLOCK_INFO *blkiov;
    155 		int blkcnt;
    156 	} */ lim;
    157 	daddr_t toff;
    158 	int i;
    159 	int nb, onb, noff;
    160 	int retval;
    161 	int bps;
    162 
    163 	dip = get_dinode(fs, ino);
    164 	if (dip == NULL)
    165 		return COALESCE_NOINODE;
    166 
    167 	/* Compute file block size, set up for bmapv */
    168 	onb = nb = lblkno(fs, dip->di_size);
    169 
    170 	/* XXX for now, don't do any file small enough to have fragments */
    171 	if (nb < NDADDR)
    172 		return COALESCE_TOOSMALL;
    173 
    174 	/* Sanity checks */
    175 	if (dip->di_size < 0) {
    176 		dlog("ino %d, negative size (%" PRId64 ")", ino, dip->di_size);
    177 		return COALESCE_BADSIZE;
    178 	}
    179 	if (nb > dip->di_blocks) {
    180 		dlog("ino %d, computed blocks %d > held blocks %d", ino, nb,
    181 		     dip->di_blocks);
    182 		return COALESCE_BADBLOCKSIZE;
    183 	}
    184 
    185 	bip = (BLOCK_INFO *)malloc(sizeof(BLOCK_INFO) * nb);
    186 	if (bip == NULL) {
    187 		syslog(LOG_WARNING, "ino %llu, %d blocks: %m",
    188 		    (unsigned long long)ino, nb);
    189 		return COALESCE_NOMEM;
    190 	}
    191 	for (i = 0; i < nb; i++) {
    192 		memset(bip + i, 0, sizeof(BLOCK_INFO));
    193 		bip[i].bi_inode = ino;
    194 		bip[i].bi_lbn = i;
    195 		bip[i].bi_version = dip->di_gen;
    196 		/* Don't set the size, but let lfs_bmap fill it in */
    197 	}
    198 	lim.blkiov = bip;
    199 	lim.blkcnt = nb;
    200 	if (fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim) < 0) {
    201 		syslog(LOG_WARNING, "%s: coalesce: LFCNBMAPV: %m",
    202 		       fs->lfs_fsmnt);
    203 		retval = COALESCE_BADBMAPV;
    204 		goto out;
    205 	}
    206 #if 0
    207 	for (i = 0; i < nb; i++) {
    208 		printf("bi_size = %d, bi_ino = %d, "
    209 		    "bi_lbn = %d, bi_daddr = %d\n",
    210 		    bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
    211 		    bip[i].bi_daddr);
    212 	}
    213 #endif
    214 	noff = toff = 0;
    215 	for (i = 1; i < nb; i++) {
    216 		if (bip[i].bi_daddr != bip[i - 1].bi_daddr + fs->lfs_frag)
    217 			++noff;
    218 		toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
    219 		    - fs->lfs_frag) >> fs->lfs_fbshift;
    220 	}
    221 
    222 	/*
    223 	 * If this file is not discontinuous, there's no point in rewriting it.
    224 	 *
    225 	 * Explicitly allow a certain amount of discontinuity, since large
    226 	 * files will be broken among segments and medium-sized files
    227 	 * can have a break or two and it's okay.
    228 	 */
    229 	if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
    230 	    segtod(fs, noff) * 2 < nb) {
    231 		retval = COALESCE_NOTWORTHIT;
    232 		goto out;
    233 	} else if (debug)
    234 		syslog(LOG_DEBUG, "ino %llu total discontinuity "
    235 		    "%d (%lld) for %d blocks", (unsigned long long)ino,
    236 		    noff, (long long)toff, nb);
    237 
    238 	/* Search for blocks in active segments; don't move them. */
    239 	for (i = 0; i < nb; i++) {
    240 		if (bip[i].bi_daddr <= 0)
    241 			continue;
    242 		sup = &fs->clfs_segtab[dtosn(fs, bip[i].bi_daddr)];
    243 		if (sup->flags & SEGUSE_ACTIVE)
    244 			bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
    245 	}
    246 
    247 	/*
    248 	 * Get rid of any blocks we've marked dead.  If this is an older
    249 	 * kernel that doesn't have bmapv fill in the block sizes, we'll
    250 	 * toss everything here.
    251 	 */
    252 	onb = nb;
    253 	toss_old_blocks(fs, &bip, &nb);
    254 	nb = i;
    255 
    256 	/*
    257 	 * We may have tossed enough blocks that it is no longer worthwhile
    258 	 * to rewrite this inode.
    259 	 */
    260 	if (nb == 0 || onb - nb > log2int(onb)) {
    261 		if (debug)
    262 			syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
    263 		retval = COALESCE_NOTHINGLEFT;
    264 		goto out;
    265 	}
    266 
    267 	/*
    268 	 * We are going to rewrite this inode.
    269 	 * For any remaining blocks, read in their contents.
    270 	 */
    271 	for (i = 0; i < nb; i++) {
    272 		bip[i].bi_bp = malloc(bip[i].bi_size);
    273 		if (bip[i].bi_bp == NULL) {
    274 			syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
    275 			    bip[i].bi_size);
    276 			retval = COALESCE_NOMEM;
    277 			goto out;
    278 		}
    279 
    280 		if (pread(fs->clfs_devfd, bip[i].bi_bp, bip[i].bi_size,
    281 			  fsbtob(fs, bip[i].bi_daddr)) < 0) {
    282 			retval = COALESCE_EIO;
    283 			goto out;
    284 		}
    285 	}
    286 	if (debug)
    287 		syslog(LOG_DEBUG, "ino %llu markv %d blocks",
    288 		    (unsigned long long)ino, nb);
    289 
    290 	/*
    291 	 * Write in segment-sized chunks.  If at any point we'd write more
    292 	 * than half of the available segments, sleep until that's not
    293 	 * true any more.
    294 	 */
    295 	bps = segtod(fs, 1);
    296 	for (tbip = bip; tbip < bip + nb; tbip += bps) {
    297 		do {
    298 			bread(fs->lfs_ivnode, 0, fs->lfs_bsize, NOCRED, &bp);
    299 			cip = *(CLEANERINFO *)bp->b_data;
    300 			bp->b_flags |= B_INVAL;
    301 			brelse(bp);
    302 
    303 			if (cip.clean < 4) /* XXX magic number 4 */
    304 				fcntl(fs->clfs_ifilefd, LFCNSEGWAIT, NULL);
    305 		} while(cip.clean < 4);
    306 
    307 		lim.blkiov = tbip;
    308 		lim.blkcnt = (tbip + bps < bip + nb ? bps : nb % bps);
    309 		if (fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim) < 0) {
    310 			retval = COALESCE_BADMARKV;
    311 			goto out;
    312 		}
    313 	}
    314 
    315 	retval = COALESCE_OK;
    316 out:
    317 	free(dip);
    318 	if (bip) {
    319 		for (i = 0; i < onb; i++)
    320 			if (bip[i].bi_bp)
    321 				free(bip[i].bi_bp);
    322 		free(bip);
    323 	}
    324 	return retval;
    325 }
    326 
    327 /*
    328  * Try coalescing every inode in the filesystem.
    329  * Return the number of inodes actually altered.
    330  */
    331 int clean_all_inodes(struct clfs *fs)
    332 {
    333 	int i, r, maxino;
    334 	int totals[COALESCE_MAXERROR];
    335 	struct stat st;
    336 
    337 	memset(totals, 0, sizeof(totals));
    338 
    339 	fstat(fs->clfs_ifilefd, &st);
    340 	maxino = fs->lfs_ifpb * (st.st_size >> fs->lfs_bshift) -
    341 		fs->lfs_segtabsz - fs->lfs_cleansz;
    342 
    343 	for (i = 0; i < maxino; i++) {
    344 		r = clean_inode(fs, i);
    345 		++totals[r];
    346 	}
    347 
    348 	for (i = 0; i < COALESCE_MAXERROR; i++)
    349 		if (totals[i])
    350 			syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
    351 			       totals[i]);
    352 
    353 	return totals[COALESCE_OK];
    354 }
    355 
    356 /*
    357  * Fork a child process to coalesce this fs.
    358  */
    359 int
    360 fork_coalesce(struct clfs *fs)
    361 {
    362 	static pid_t childpid;
    363 	int num;
    364 
    365 	/*
    366 	 * If already running a coalescing child, don't start a new one.
    367 	 */
    368 	if (childpid) {
    369 		if (waitpid(childpid, NULL, WNOHANG) == childpid)
    370 			childpid = 0;
    371 	}
    372 	if (childpid && kill(childpid, 0) >= 0) {
    373 		/* already running a coalesce process */
    374 		if (debug)
    375 			syslog(LOG_DEBUG, "coalescing already in progress");
    376 		return 0;
    377 	}
    378 
    379 	/*
    380 	 * Fork a child and let the child coalease
    381 	 */
    382 	childpid = fork();
    383 	if (childpid < 0) {
    384 		syslog(LOG_ERR, "%s: fork to coaleasce: %m", fs->lfs_fsmnt);
    385 		return 0;
    386 	} else if (childpid == 0) {
    387 		syslog(LOG_NOTICE, "%s: new coalescing process, pid %d",
    388 		       fs->lfs_fsmnt, getpid());
    389 		num = clean_all_inodes(fs);
    390 		syslog(LOG_NOTICE, "%s: coalesced %d discontiguous inodes",
    391 		       fs->lfs_fsmnt, num);
    392 		exit(0);
    393 	}
    394 
    395 	return 0;
    396 }
    397