coalesce.c revision 1.16 1 /* $NetBSD: coalesce.c,v 1.16 2008/05/16 09:21:59 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <sys/mount.h>
34 #include <sys/time.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 #include <sys/mman.h>
39
40 #include <ufs/ufs/dinode.h>
41 #include <ufs/lfs/lfs.h>
42
43 #include <fcntl.h>
44 #include <signal.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <time.h>
49 #include <unistd.h>
50 #include <util.h>
51 #include <errno.h>
52 #include <err.h>
53
54 #include <syslog.h>
55
56 #include "bufcache.h"
57 #include "vnode.h"
58 #include "cleaner.h"
59
60 extern int debug, do_mmap;
61
62 int log2int(int n)
63 {
64 int log;
65
66 log = 0;
67 while (n > 0) {
68 ++log;
69 n >>= 1;
70 }
71 return log - 1;
72 }
73
74 enum coalesce_returncodes {
75 COALESCE_OK = 0,
76 COALESCE_NOINODE,
77 COALESCE_TOOSMALL,
78 COALESCE_BADSIZE,
79 COALESCE_BADBLOCKSIZE,
80 COALESCE_NOMEM,
81 COALESCE_BADBMAPV,
82 COALESCE_BADMARKV,
83 COALESCE_NOTWORTHIT,
84 COALESCE_NOTHINGLEFT,
85 COALESCE_EIO,
86
87 COALESCE_MAXERROR
88 };
89
90 char *coalesce_return[] = {
91 "Successfully coalesced",
92 "File not in use or inode not found",
93 "Not large enough to coalesce",
94 "Negative size",
95 "Not enough blocks to account for size",
96 "Malloc failed",
97 "LFCNBMAPV failed",
98 "Not broken enough to fix",
99 "Too many blocks not found",
100 "Too many blocks found in active segments",
101 "I/O error",
102
103 "No such error"
104 };
105
106 static struct ufs1_dinode *
107 get_dinode(struct clfs *fs, ino_t ino)
108 {
109 IFILE *ifp;
110 daddr_t daddr;
111 struct ubuf *bp;
112 struct ufs1_dinode *dip, *r;
113
114 lfs_ientry(&ifp, fs, ino, &bp);
115 daddr = ifp->if_daddr;
116 brelse(bp, 0);
117
118 if (daddr == 0x0)
119 return NULL;
120
121 bread(fs->clfs_devvp, daddr, fs->lfs_ibsize, NOCRED, 0, &bp);
122 for (dip = (struct ufs1_dinode *)bp->b_data;
123 dip < (struct ufs1_dinode *)(bp->b_data + fs->lfs_ibsize); dip++)
124 if (dip->di_inumber == ino) {
125 r = (struct ufs1_dinode *)malloc(sizeof(*r));
126 memcpy(r, dip, sizeof(*r));
127 brelse(bp, 0);
128 return r;
129 }
130 brelse(bp, 0);
131 return NULL;
132 }
133
134 /*
135 * Find out if this inode's data blocks are discontinuous; if they are,
136 * rewrite them using markv. Return the number of inodes rewritten.
137 */
138 static int
139 clean_inode(struct clfs *fs, ino_t ino)
140 {
141 BLOCK_INFO *bip = NULL, *tbip;
142 CLEANERINFO cip;
143 struct ubuf *bp;
144 struct ufs1_dinode *dip;
145 struct clfs_seguse *sup;
146 struct lfs_fcntl_markv /* {
147 BLOCK_INFO *blkiov;
148 int blkcnt;
149 } */ lim;
150 daddr_t toff;
151 int i;
152 int nb, onb, noff;
153 int retval;
154 int bps;
155
156 dip = get_dinode(fs, ino);
157 if (dip == NULL)
158 return COALESCE_NOINODE;
159
160 /* Compute file block size, set up for bmapv */
161 onb = nb = lblkno(fs, dip->di_size);
162
163 /* XXX for now, don't do any file small enough to have fragments */
164 if (nb < NDADDR) {
165 free(dip);
166 return COALESCE_TOOSMALL;
167 }
168
169 /* Sanity checks */
170 if (dip->di_size < 0) {
171 dlog("ino %d, negative size (%" PRId64 ")", ino, dip->di_size);
172 free(dip);
173 return COALESCE_BADSIZE;
174 }
175 if (nb > dip->di_blocks) {
176 dlog("ino %d, computed blocks %d > held blocks %d", ino, nb,
177 dip->di_blocks);
178 free(dip);
179 return COALESCE_BADBLOCKSIZE;
180 }
181
182 bip = (BLOCK_INFO *)malloc(sizeof(BLOCK_INFO) * nb);
183 if (bip == NULL) {
184 syslog(LOG_WARNING, "ino %llu, %d blocks: %m",
185 (unsigned long long)ino, nb);
186 free(dip);
187 return COALESCE_NOMEM;
188 }
189 for (i = 0; i < nb; i++) {
190 memset(bip + i, 0, sizeof(BLOCK_INFO));
191 bip[i].bi_inode = ino;
192 bip[i].bi_lbn = i;
193 bip[i].bi_version = dip->di_gen;
194 /* Don't set the size, but let lfs_bmap fill it in */
195 }
196 lim.blkiov = bip;
197 lim.blkcnt = nb;
198 if (fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim) < 0) {
199 syslog(LOG_WARNING, "%s: coalesce: LFCNBMAPV: %m",
200 fs->lfs_fsmnt);
201 retval = COALESCE_BADBMAPV;
202 goto out;
203 }
204 #if 0
205 for (i = 0; i < nb; i++) {
206 printf("bi_size = %d, bi_ino = %d, "
207 "bi_lbn = %d, bi_daddr = %d\n",
208 bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
209 bip[i].bi_daddr);
210 }
211 #endif
212 noff = toff = 0;
213 for (i = 1; i < nb; i++) {
214 if (bip[i].bi_daddr != bip[i - 1].bi_daddr + fs->lfs_frag)
215 ++noff;
216 toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
217 - fs->lfs_frag) >> fs->lfs_fbshift;
218 }
219
220 /*
221 * If this file is not discontinuous, there's no point in rewriting it.
222 *
223 * Explicitly allow a certain amount of discontinuity, since large
224 * files will be broken among segments and medium-sized files
225 * can have a break or two and it's okay.
226 */
227 if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
228 segtod(fs, noff) * 2 < nb) {
229 retval = COALESCE_NOTWORTHIT;
230 goto out;
231 } else if (debug)
232 syslog(LOG_DEBUG, "ino %llu total discontinuity "
233 "%d (%lld) for %d blocks", (unsigned long long)ino,
234 noff, (long long)toff, nb);
235
236 /* Search for blocks in active segments; don't move them. */
237 for (i = 0; i < nb; i++) {
238 if (bip[i].bi_daddr <= 0)
239 continue;
240 sup = &fs->clfs_segtab[dtosn(fs, bip[i].bi_daddr)];
241 if (sup->flags & SEGUSE_ACTIVE)
242 bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
243 }
244
245 /*
246 * Get rid of any blocks we've marked dead. If this is an older
247 * kernel that doesn't have bmapv fill in the block sizes, we'll
248 * toss everything here.
249 */
250 onb = nb;
251 toss_old_blocks(fs, &bip, &nb, NULL);
252 nb = i;
253
254 /*
255 * We may have tossed enough blocks that it is no longer worthwhile
256 * to rewrite this inode.
257 */
258 if (nb == 0 || onb - nb > log2int(onb)) {
259 if (debug)
260 syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
261 retval = COALESCE_NOTHINGLEFT;
262 goto out;
263 }
264
265 /*
266 * We are going to rewrite this inode.
267 * For any remaining blocks, read in their contents.
268 */
269 for (i = 0; i < nb; i++) {
270 bip[i].bi_bp = malloc(bip[i].bi_size);
271 if (bip[i].bi_bp == NULL) {
272 syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
273 bip[i].bi_size);
274 retval = COALESCE_NOMEM;
275 goto out;
276 }
277
278 if (pread(fs->clfs_devfd, bip[i].bi_bp, bip[i].bi_size,
279 fsbtob(fs, bip[i].bi_daddr)) < 0) {
280 retval = COALESCE_EIO;
281 goto out;
282 }
283 }
284 if (debug)
285 syslog(LOG_DEBUG, "ino %llu markv %d blocks",
286 (unsigned long long)ino, nb);
287
288 /*
289 * Write in segment-sized chunks. If at any point we'd write more
290 * than half of the available segments, sleep until that's not
291 * true any more.
292 */
293 bps = segtod(fs, 1);
294 for (tbip = bip; tbip < bip + nb; tbip += bps) {
295 do {
296 bread(fs->lfs_ivnode, 0, fs->lfs_bsize, NOCRED, 0, &bp);
297 cip = *(CLEANERINFO *)bp->b_data;
298 brelse(bp, B_INVAL);
299
300 if (cip.clean < 4) /* XXX magic number 4 */
301 fcntl(fs->clfs_ifilefd, LFCNSEGWAIT, NULL);
302 } while(cip.clean < 4);
303
304 lim.blkiov = tbip;
305 lim.blkcnt = (tbip + bps < bip + nb ? bps : nb % bps);
306 if (fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim) < 0) {
307 retval = COALESCE_BADMARKV;
308 goto out;
309 }
310 }
311
312 retval = COALESCE_OK;
313 out:
314 free(dip);
315 if (bip) {
316 for (i = 0; i < onb; i++)
317 if (bip[i].bi_bp)
318 free(bip[i].bi_bp);
319 free(bip);
320 }
321 return retval;
322 }
323
324 /*
325 * Try coalescing every inode in the filesystem.
326 * Return the number of inodes actually altered.
327 */
328 int clean_all_inodes(struct clfs *fs)
329 {
330 int i, r, maxino;
331 int totals[COALESCE_MAXERROR];
332 struct stat st;
333
334 memset(totals, 0, sizeof(totals));
335
336 fstat(fs->clfs_ifilefd, &st);
337 maxino = fs->lfs_ifpb * (st.st_size >> fs->lfs_bshift) -
338 fs->lfs_segtabsz - fs->lfs_cleansz;
339
340 for (i = 0; i < maxino; i++) {
341 r = clean_inode(fs, i);
342 ++totals[r];
343 }
344
345 for (i = 0; i < COALESCE_MAXERROR; i++)
346 if (totals[i])
347 syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
348 totals[i]);
349
350 return totals[COALESCE_OK];
351 }
352
353 /*
354 * Fork a child process to coalesce this fs.
355 */
356 int
357 fork_coalesce(struct clfs *fs)
358 {
359 static pid_t childpid;
360 int num;
361
362 /*
363 * If already running a coalescing child, don't start a new one.
364 */
365 if (childpid) {
366 if (waitpid(childpid, NULL, WNOHANG) == childpid)
367 childpid = 0;
368 }
369 if (childpid && kill(childpid, 0) >= 0) {
370 /* already running a coalesce process */
371 if (debug)
372 syslog(LOG_DEBUG, "coalescing already in progress");
373 return 0;
374 }
375
376 /*
377 * Fork a child and let the child coalease
378 */
379 childpid = fork();
380 if (childpid < 0) {
381 syslog(LOG_ERR, "%s: fork to coaleasce: %m", fs->lfs_fsmnt);
382 return 0;
383 } else if (childpid == 0) {
384 syslog(LOG_NOTICE, "%s: new coalescing process, pid %d",
385 fs->lfs_fsmnt, getpid());
386 num = clean_all_inodes(fs);
387 syslog(LOG_NOTICE, "%s: coalesced %d discontiguous inodes",
388 fs->lfs_fsmnt, num);
389 exit(0);
390 }
391
392 return 0;
393 }
394