coalesce.c revision 1.5 1 /* $NetBSD: coalesce.c,v 1.5 2002/12/15 08:38:17 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/param.h>
40 #include <sys/mount.h>
41 #include <sys/time.h>
42 #include <sys/resource.h>
43 #include <sys/types.h>
44 #include <sys/wait.h>
45 #include <sys/mman.h>
46
47 #include <ufs/ufs/dinode.h>
48 #include <ufs/lfs/lfs.h>
49
50 #include <fcntl.h>
51 #include <signal.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <time.h>
56 #include <unistd.h>
57 #include <util.h>
58 #include <errno.h>
59 #include <err.h>
60
61 #include <syslog.h>
62
63 #include "clean.h"
64
65 extern int debug, do_mmap;
66
67 static int
68 tossdead(const void *client, const void *a, const void *b)
69 {
70 return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 ||
71 ((BLOCK_INFO_15 *)a)->bi_size == 0);
72 }
73
74 static int log2int(int n)
75 {
76 int log;
77
78 log = 0;
79 while (n > 0) {
80 ++log;
81 n /= 2;
82 }
83 return log - 1;
84 }
85
86 enum coalesce_returncodes {
87 COALESCE_OK = 0,
88 COALESCE_NOINODE,
89 COALESCE_TOOSMALL,
90 COALESCE_BADSIZE,
91 COALESCE_BADBLOCKSIZE,
92 COALESCE_NOMEM,
93 COALESCE_BADBMAPV,
94 COALESCE_NOTWORTHIT,
95 COALESCE_NOTHINGLEFT,
96 COALESCE_NOTHINGLEFT2,
97 COALESCE_EIO,
98
99 COALESCE_MAXERROR
100 };
101
102 char *coalesce_return[] = {
103 "Successfully coalesced",
104 "File not in use or inode not found",
105 "Not large enough to coalesce",
106 "Negative size",
107 "Not enough blocks to account for size",
108 "Malloc failed",
109 "lfs_bmapv failed",
110 "Not broken enough to fix",
111 "Too many blocks not found",
112 "Too many blocks found in active segments",
113 "I/O error",
114
115 "No such error"
116 };
117
118 /*
119 * Find out if this inode's data blocks are discontinuous; if they are,
120 * rewrite them using lfs_markv. Return the number of inodes rewritten.
121 */
122 int clean_inode(struct fs_info *fsp, ino_t ino)
123 {
124 int i, error;
125 BLOCK_INFO_15 *bip = NULL, *tbip;
126 struct dinode *dip;
127 int nb, onb, noff;
128 ufs_daddr_t toff;
129 struct lfs *lfsp;
130 int bps;
131 SEGUSE *sup;
132 int retval;
133
134 lfsp = &fsp->fi_lfs;
135
136 dip = get_dinode(fsp, ino);
137 if (dip == NULL)
138 return COALESCE_NOINODE;
139
140 /* Compute file block size, set up for lfs_bmapv */
141 onb = nb = lblkno(lfsp, dip->di_size);
142
143 /* XXX for now, don't do any file small enough to have fragments */
144 if (nb < NDADDR)
145 return COALESCE_TOOSMALL;
146
147 /* Sanity checks */
148 if (dip->di_size < 0) {
149 if (debug)
150 syslog(LOG_DEBUG, "ino %d, negative size (%lld)",
151 ino, (long long)dip->di_size);
152 return COALESCE_BADSIZE;
153 }
154 if (nb > dip->di_blocks) {
155 if (debug)
156 syslog(LOG_DEBUG, "ino %d, computed blocks %d > held blocks %d",
157 ino, nb, dip->di_blocks);
158 return COALESCE_BADBLOCKSIZE;
159 }
160
161 bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb);
162 if (bip == NULL) {
163 syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb);
164 return COALESCE_NOMEM;
165 }
166 for (i = 0; i < nb; i++) {
167 memset(bip + i, 0, sizeof(BLOCK_INFO_15));
168 bip[i].bi_inode = ino;
169 bip[i].bi_lbn = i;
170 bip[i].bi_version = dip->di_gen;
171 /* Don't set the size, but let lfs_bmap fill it in */
172 }
173 if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) {
174 syslog(LOG_WARNING, "lfs_bmapv: %m");
175 retval = COALESCE_BADBMAPV;
176 goto out;
177 }
178 #if 0
179 for (i = 0; i < nb; i++) {
180 printf("bi_size = %d, bi_ino = %d, "
181 "bi_lbn = %d, bi_daddr = %d\n",
182 bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
183 bip[i].bi_daddr);
184 }
185 #endif
186 noff = toff = 0;
187 for (i = 1; i < nb; i++) {
188 if (bip[i].bi_daddr != bip[i - 1].bi_daddr + lfsp->lfs_frag)
189 ++noff;
190 toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
191 - lfsp->lfs_frag) >> lfsp->lfs_fbshift;
192 }
193
194 /*
195 * If this file is not discontinuous, there's no point in rewriting it.
196 *
197 * Explicitly allow a certain amount of discontinuity, since large
198 * files will be broken among segments and medium-sized files
199 * can have a break or two and it's okay.
200 */
201 if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
202 segtod(lfsp, noff) * 2 < nb) {
203 retval = COALESCE_NOTWORTHIT;
204 goto out;
205 } else if (debug)
206 syslog(LOG_DEBUG, "ino %d total discontinuity "
207 "%d (%d) for %d blocks", ino, noff, toff, nb);
208
209 /* Search for blocks in active segments; don't move them. */
210 for (i = 0; i < nb; i++) {
211 if (bip[i].bi_daddr <= 0)
212 continue;
213 sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
214 dtosn(lfsp, bip[i].bi_daddr));
215 if (sup->su_flags & SEGUSE_ACTIVE)
216 bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
217 }
218 /*
219 * Get rid of any we've marked dead. If this is an older
220 * kernel that doesn't have lfs_bmapv fill in the block
221 * sizes, we'll toss everything here.
222 */
223 toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL);
224 if (nb && tossdead(NULL, bip + nb - 1, NULL))
225 --nb;
226 if (nb == 0) {
227 retval = COALESCE_NOTHINGLEFT;
228 goto out;
229 }
230
231 /*
232 * We may have tossed enough blocks that it is no longer worthwhile
233 * to rewrite this inode.
234 */
235 if (onb - nb > log2int(onb)) {
236 if (debug)
237 syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
238 return COALESCE_NOTHINGLEFT2;
239 }
240
241 /*
242 * We are going to rewrite this inode.
243 * For any remaining blocks, read in their contents.
244 */
245 for (i = 0; i < nb; i++) {
246 bip[i].bi_bp = malloc(bip[i].bi_size);
247 if (bip[i].bi_bp == NULL) {
248 syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
249 bip[i].bi_size);
250 retval = COALESCE_NOMEM;
251 goto out;
252 }
253 if (get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size,
254 bip[i].bi_daddr) != bip[i].bi_size) {
255 retval = COALESCE_EIO;
256 goto out;
257 }
258 }
259 if (debug)
260 syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb);
261
262 /*
263 * Write in segment-sized chunks. If at any point we'd write more
264 * than half of the available segments, sleep until that's not
265 * true any more.
266 */
267 bps = segtod(lfsp, 1);
268 for (tbip = bip; tbip < bip + nb; tbip += bps) {
269 while (fsp->fi_cip->clean < 4) {
270 lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL);
271 reread_fs_info(fsp, do_mmap);
272 /* XXX start over? */
273 }
274 lfs_markv(&fsp->fi_statfsp->f_fsid, tbip,
275 (tbip + bps < bip + nb ? bps : nb % bps));
276 }
277
278 retval = COALESCE_OK;
279 out:
280 if (bip) {
281 for (i = 0; i < onb; i++)
282 if (bip[i].bi_bp)
283 free(bip[i].bi_bp);
284 free(bip);
285 }
286 return retval;
287 }
288
289 /*
290 * Try coalescing every inode in the filesystem.
291 * Return the number of inodes actually altered.
292 */
293 int clean_all_inodes(struct fs_info *fsp)
294 {
295 int i, r;
296 int totals[COALESCE_MAXERROR];
297
298 memset(totals, 0, sizeof(totals));
299 for (i = 0; i < fsp->fi_ifile_count; i++) {
300 r = clean_inode(fsp, i);
301 ++totals[r];
302 }
303
304 for (i = 0; i < COALESCE_MAXERROR; i++)
305 if (totals[i])
306 syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
307 totals[i]);
308
309 return totals[COALESCE_OK];
310 }
311
312 int fork_coalesce(struct fs_info *fsp)
313 {
314 static pid_t childpid;
315 int num;
316
317 reread_fs_info(fsp, do_mmap);
318
319 if (childpid) {
320 if (waitpid(childpid, NULL, WNOHANG) == childpid)
321 childpid = 0;
322 }
323 if (childpid && kill(childpid, 0) >= 0) {
324 /* already running a coalesce process */
325 if (debug)
326 syslog(LOG_DEBUG, "coalescing already in progress");
327 return 0;
328 }
329 childpid = fork();
330 if (childpid < 0) {
331 syslog(LOG_ERR, "fork: %m");
332 return 0;
333 } else if (childpid == 0) {
334 syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
335 num = clean_all_inodes(fsp);
336 syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
337 exit(0);
338 }
339 return 0;
340 }
341