coalesce.c revision 1.10 1 /* $NetBSD: coalesce.c,v 1.10 2005/08/19 02:06:29 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/param.h>
40 #include <sys/mount.h>
41 #include <sys/time.h>
42 #include <sys/resource.h>
43 #include <sys/types.h>
44 #include <sys/wait.h>
45 #include <sys/mman.h>
46
47 #include <ufs/ufs/dinode.h>
48 #include <ufs/lfs/lfs.h>
49
50 #include <fcntl.h>
51 #include <signal.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <time.h>
56 #include <unistd.h>
57 #include <util.h>
58 #include <errno.h>
59 #include <err.h>
60
61 #include <syslog.h>
62
63 #include "clean.h"
64
65 extern int debug, do_mmap;
66
67 static int
68 tossdead(const void *client, const void *a, const void *b)
69 {
70 return (((BLOCK_INFO *)a)->bi_daddr <= 0 ||
71 ((BLOCK_INFO *)a)->bi_size == 0);
72 }
73
74 static int log2int(int n)
75 {
76 int log;
77
78 log = 0;
79 while (n > 0) {
80 ++log;
81 n /= 2;
82 }
83 return log - 1;
84 }
85
86 enum coalesce_returncodes {
87 COALESCE_OK = 0,
88 COALESCE_NOINODE,
89 COALESCE_TOOSMALL,
90 COALESCE_BADSIZE,
91 COALESCE_BADBLOCKSIZE,
92 COALESCE_NOMEM,
93 COALESCE_BADBMAPV,
94 COALESCE_NOTWORTHIT,
95 COALESCE_NOTHINGLEFT,
96 COALESCE_NOTHINGLEFT2,
97 COALESCE_EIO,
98
99 COALESCE_MAXERROR
100 };
101
102 char *coalesce_return[] = {
103 "Successfully coalesced",
104 "File not in use or inode not found",
105 "Not large enough to coalesce",
106 "Negative size",
107 "Not enough blocks to account for size",
108 "Malloc failed",
109 "LFCNBMAPV failed",
110 "Not broken enough to fix",
111 "Too many blocks not found",
112 "Too many blocks found in active segments",
113 "I/O error",
114
115 "No such error"
116 };
117
118 /*
119 * Find out if this inode's data blocks are discontinuous; if they are,
120 * rewrite them using markv. Return the number of inodes rewritten.
121 */
122 int clean_inode(struct fs_info *fsp, ino_t ino)
123 {
124 int i, error;
125 BLOCK_INFO *bip = NULL, *tbip;
126 struct ufs1_dinode *dip;
127 int nb, onb, noff;
128 daddr_t toff;
129 struct lfs *lfsp;
130 int bps;
131 SEGUSE *sup;
132 int retval;
133
134 lfsp = &fsp->fi_lfs;
135
136 dip = get_dinode(fsp, ino);
137 if (dip == NULL)
138 return COALESCE_NOINODE;
139
140 /* Compute file block size, set up for bmapv */
141 onb = nb = lblkno(lfsp, dip->di_size);
142
143 /* XXX for now, don't do any file small enough to have fragments */
144 if (nb < NDADDR)
145 return COALESCE_TOOSMALL;
146
147 /* Sanity checks */
148 if (dip->di_size < 0) {
149 if (debug)
150 syslog(LOG_DEBUG, "ino %llu, negative size (%lld)",
151 (unsigned long long)ino, (long long)dip->di_size);
152 return COALESCE_BADSIZE;
153 }
154 if (nb > dip->di_blocks) {
155 if (debug)
156 syslog(LOG_DEBUG, "ino %llu, computed blocks %d "
157 "> held blocks %d",
158 (unsigned long long)ino, nb, dip->di_blocks);
159 return COALESCE_BADBLOCKSIZE;
160 }
161
162 bip = (BLOCK_INFO *)malloc(sizeof(BLOCK_INFO) * nb);
163 if (bip == NULL) {
164 syslog(LOG_WARNING, "ino %llu, %d blocks: %m",
165 (unsigned long long)ino, nb);
166 return COALESCE_NOMEM;
167 }
168 for (i = 0; i < nb; i++) {
169 memset(bip + i, 0, sizeof(BLOCK_INFO));
170 bip[i].bi_inode = ino;
171 bip[i].bi_lbn = i;
172 bip[i].bi_version = dip->di_gen;
173 /* Don't set the size, but let lfs_bmap fill it in */
174 }
175 if ((error = lfs_bmapv_emul(ifile_fd, bip, nb)) < 0) {
176 syslog(LOG_WARNING, "LFCNBMAPV: %m");
177 retval = COALESCE_BADBMAPV;
178 goto out;
179 }
180 #if 0
181 for (i = 0; i < nb; i++) {
182 printf("bi_size = %d, bi_ino = %d, "
183 "bi_lbn = %d, bi_daddr = %d\n",
184 bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
185 bip[i].bi_daddr);
186 }
187 #endif
188 noff = toff = 0;
189 for (i = 1; i < nb; i++) {
190 if (bip[i].bi_daddr != bip[i - 1].bi_daddr + lfsp->lfs_frag)
191 ++noff;
192 toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
193 - lfsp->lfs_frag) >> lfsp->lfs_fbshift;
194 }
195
196 /*
197 * If this file is not discontinuous, there's no point in rewriting it.
198 *
199 * Explicitly allow a certain amount of discontinuity, since large
200 * files will be broken among segments and medium-sized files
201 * can have a break or two and it's okay.
202 */
203 if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
204 segtod(lfsp, noff) * 2 < nb) {
205 retval = COALESCE_NOTWORTHIT;
206 goto out;
207 } else if (debug)
208 syslog(LOG_DEBUG, "ino %llu total discontinuity "
209 "%d (%lld) for %d blocks", (unsigned long long)ino,
210 noff, (long long)toff, nb);
211
212 /* Search for blocks in active segments; don't move them. */
213 for (i = 0; i < nb; i++) {
214 if (bip[i].bi_daddr <= 0)
215 continue;
216 sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
217 dtosn(lfsp, bip[i].bi_daddr));
218 if (sup->su_flags & SEGUSE_ACTIVE)
219 bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
220 }
221 /*
222 * Get rid of any we've marked dead. If this is an older
223 * kernel that doesn't have bmapv fill in the block
224 * sizes, we'll toss everything here.
225 */
226 toss(bip, &nb, sizeof(BLOCK_INFO), tossdead, NULL);
227 if (nb && tossdead(NULL, bip + nb - 1, NULL))
228 --nb;
229 if (nb == 0) {
230 retval = COALESCE_NOTHINGLEFT;
231 goto out;
232 }
233
234 /*
235 * We may have tossed enough blocks that it is no longer worthwhile
236 * to rewrite this inode.
237 */
238 if (onb - nb > log2int(onb)) {
239 if (debug)
240 syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
241 return COALESCE_NOTHINGLEFT2;
242 }
243
244 /*
245 * We are going to rewrite this inode.
246 * For any remaining blocks, read in their contents.
247 */
248 for (i = 0; i < nb; i++) {
249 bip[i].bi_bp = malloc(bip[i].bi_size);
250 if (bip[i].bi_bp == NULL) {
251 syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
252 bip[i].bi_size);
253 retval = COALESCE_NOMEM;
254 goto out;
255 }
256 if (get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size,
257 bip[i].bi_daddr) != bip[i].bi_size) {
258 retval = COALESCE_EIO;
259 goto out;
260 }
261 }
262 if (debug)
263 syslog(LOG_DEBUG, "ino %llu markv %d blocks",
264 (unsigned long long)ino, nb);
265
266 /*
267 * Write in segment-sized chunks. If at any point we'd write more
268 * than half of the available segments, sleep until that's not
269 * true any more.
270 */
271 bps = segtod(lfsp, 1);
272 for (tbip = bip; tbip < bip + nb; tbip += bps) {
273 while (fsp->fi_cip->clean < 4) {
274 lfs_segwait_emul(ifile_fd, NULL);
275 reread_fs_info(fsp, do_mmap);
276 /* XXX start over? */
277 }
278 lfs_markv_emul(ifile_fd, tbip,
279 (tbip + bps < bip + nb ? bps : nb % bps));
280 }
281
282 retval = COALESCE_OK;
283 out:
284 if (bip) {
285 for (i = 0; i < onb; i++)
286 if (bip[i].bi_bp)
287 free(bip[i].bi_bp);
288 free(bip);
289 }
290 return retval;
291 }
292
293 /*
294 * Try coalescing every inode in the filesystem.
295 * Return the number of inodes actually altered.
296 */
297 int clean_all_inodes(struct fs_info *fsp)
298 {
299 int i, r;
300 int totals[COALESCE_MAXERROR];
301
302 memset(totals, 0, sizeof(totals));
303 for (i = 0; i < fsp->fi_ifile_count; i++) {
304 r = clean_inode(fsp, i);
305 ++totals[r];
306 }
307
308 for (i = 0; i < COALESCE_MAXERROR; i++)
309 if (totals[i])
310 syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
311 totals[i]);
312
313 return totals[COALESCE_OK];
314 }
315
316 int fork_coalesce(struct fs_info *fsp)
317 {
318 static pid_t childpid;
319 int num;
320
321 reread_fs_info(fsp, do_mmap);
322
323 if (childpid) {
324 if (waitpid(childpid, NULL, WNOHANG) == childpid)
325 childpid = 0;
326 }
327 if (childpid && kill(childpid, 0) >= 0) {
328 /* already running a coalesce process */
329 if (debug)
330 syslog(LOG_DEBUG, "coalescing already in progress");
331 return 0;
332 }
333 childpid = fork();
334 if (childpid < 0) {
335 syslog(LOG_ERR, "fork: %m");
336 return 0;
337 } else if (childpid == 0) {
338 syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
339 num = clean_all_inodes(fsp);
340 syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
341 exit(0);
342 }
343 return 0;
344 }
345