coalesce.c revision 1.3 1 1.3 perseant /* $NetBSD: coalesce.c,v 1.3 2002/06/14 05:21:21 perseant Exp $ */
2 1.1 perseant
3 1.1 perseant /*-
4 1.1 perseant * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 1.1 perseant * All rights reserved.
6 1.1 perseant *
7 1.1 perseant * This code is derived from software contributed to The NetBSD Foundation
8 1.1 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 1.1 perseant *
10 1.1 perseant * Redistribution and use in source and binary forms, with or without
11 1.1 perseant * modification, are permitted provided that the following conditions
12 1.1 perseant * are met:
13 1.1 perseant * 1. Redistributions of source code must retain the above copyright
14 1.1 perseant * notice, this list of conditions and the following disclaimer.
15 1.1 perseant * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 perseant * notice, this list of conditions and the following disclaimer in the
17 1.1 perseant * documentation and/or other materials provided with the distribution.
18 1.1 perseant * 3. All advertising materials mentioning features or use of this software
19 1.1 perseant * must display the following acknowledgement:
20 1.1 perseant * This product includes software developed by the NetBSD
21 1.1 perseant * Foundation, Inc. and its contributors.
22 1.1 perseant * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 perseant * contributors may be used to endorse or promote products derived
24 1.1 perseant * from this software without specific prior written permission.
25 1.1 perseant *
26 1.1 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 perseant * POSSIBILITY OF SUCH DAMAGE.
37 1.1 perseant */
38 1.1 perseant
39 1.1 perseant #include <sys/param.h>
40 1.1 perseant #include <sys/mount.h>
41 1.1 perseant #include <sys/time.h>
42 1.1 perseant #include <sys/resource.h>
43 1.1 perseant #include <sys/types.h>
44 1.1 perseant #include <sys/wait.h>
45 1.1 perseant #include <sys/mman.h>
46 1.1 perseant
47 1.1 perseant #include <ufs/ufs/dinode.h>
48 1.1 perseant #include <ufs/lfs/lfs.h>
49 1.1 perseant
50 1.1 perseant #include <fcntl.h>
51 1.1 perseant #include <signal.h>
52 1.1 perseant #include <stdio.h>
53 1.1 perseant #include <stdlib.h>
54 1.1 perseant #include <string.h>
55 1.1 perseant #include <time.h>
56 1.1 perseant #include <unistd.h>
57 1.1 perseant #include <util.h>
58 1.1 perseant #include <errno.h>
59 1.1 perseant #include <err.h>
60 1.1 perseant
61 1.1 perseant #include <syslog.h>
62 1.1 perseant
63 1.1 perseant #include "clean.h"
64 1.1 perseant
65 1.2 perseant extern int debug, do_mmap;
66 1.1 perseant
67 1.1 perseant static int
68 1.1 perseant tossdead(const void *client, const void *a, const void *b)
69 1.1 perseant {
70 1.2 perseant return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 ||
71 1.1 perseant ((BLOCK_INFO_15 *)a)->bi_size == 0);
72 1.1 perseant }
73 1.1 perseant
74 1.2 perseant static int log2int(int n)
75 1.2 perseant {
76 1.2 perseant int log;
77 1.2 perseant
78 1.2 perseant log = 0;
79 1.2 perseant while (n > 0) {
80 1.2 perseant ++log;
81 1.2 perseant n /= 2;
82 1.2 perseant }
83 1.2 perseant return log - 1;
84 1.2 perseant }
85 1.2 perseant
86 1.3 perseant enum coalesce_returncodes {
87 1.3 perseant COALESCE_OK = 0,
88 1.3 perseant COALESCE_NOINODE,
89 1.3 perseant COALESCE_TOOSMALL,
90 1.3 perseant COALESCE_BADSIZE,
91 1.3 perseant COALESCE_BADBLOCKSIZE,
92 1.3 perseant COALESCE_NOMEM,
93 1.3 perseant COALESCE_BADBMAPV,
94 1.3 perseant COALESCE_NOTWORTHIT,
95 1.3 perseant COALESCE_NOTHINGLEFT,
96 1.3 perseant COALESCE_NOTHINGLEFT2,
97 1.3 perseant
98 1.3 perseant COALESCE_MAXERROR
99 1.3 perseant };
100 1.3 perseant
101 1.3 perseant char *coalesce_return[] = {
102 1.3 perseant "Successfully coalesced",
103 1.3 perseant "File not in use or inode not found",
104 1.3 perseant "Not large enough to coalesce",
105 1.3 perseant "Negative size",
106 1.3 perseant "Not enough blocks to account for size",
107 1.3 perseant "Malloc failed",
108 1.3 perseant "lfs_bmapv failed",
109 1.3 perseant "Not broken enough to fix",
110 1.3 perseant "Too many blocks not found",
111 1.3 perseant "Too many blocks found in active segments",
112 1.3 perseant
113 1.3 perseant "No such error"
114 1.3 perseant };
115 1.3 perseant
116 1.1 perseant /*
117 1.1 perseant * Find out if this inode's data blocks are discontinuous; if they are,
118 1.1 perseant * rewrite them using lfs_markv. Return the number of inodes rewritten.
119 1.1 perseant */
120 1.1 perseant int clean_inode(struct fs_info *fsp, ino_t ino)
121 1.1 perseant {
122 1.1 perseant int i, error;
123 1.1 perseant BLOCK_INFO_15 *bip, *tbip;
124 1.1 perseant struct dinode *dip;
125 1.2 perseant int nb, onb, noff;
126 1.1 perseant ufs_daddr_t toff;
127 1.1 perseant struct lfs *lfsp;
128 1.1 perseant int bps;
129 1.1 perseant SEGUSE *sup;
130 1.1 perseant
131 1.1 perseant lfsp = &fsp->fi_lfs;
132 1.1 perseant
133 1.1 perseant dip = get_dinode(fsp, ino);
134 1.1 perseant if (dip == NULL)
135 1.3 perseant return COALESCE_NOINODE;
136 1.1 perseant
137 1.1 perseant /* Compute file block size, set up for lfs_bmapv */
138 1.2 perseant onb = nb = btofsb(lfsp, dip->di_size);
139 1.2 perseant
140 1.2 perseant /* XXX for now, don't do any file small enough to have fragments */
141 1.2 perseant if (nb < NDADDR)
142 1.3 perseant return COALESCE_TOOSMALL;
143 1.2 perseant
144 1.2 perseant /* Sanity checks */
145 1.2 perseant if (dip->di_size < 0) {
146 1.3 perseant if (debug)
147 1.3 perseant syslog(LOG_DEBUG, "ino %d, negative size (%lld)",
148 1.3 perseant ino, (long long)dip->di_size);
149 1.3 perseant return COALESCE_BADSIZE;
150 1.2 perseant }
151 1.1 perseant if (nb > dip->di_blocks) {
152 1.3 perseant if (debug)
153 1.3 perseant syslog(LOG_DEBUG, "ino %d, computed blocks %d > held blocks %d",
154 1.3 perseant ino, nb, dip->di_blocks);
155 1.3 perseant return COALESCE_BADBLOCKSIZE;
156 1.1 perseant }
157 1.2 perseant
158 1.1 perseant bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb);
159 1.1 perseant if (bip == NULL) {
160 1.1 perseant syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb);
161 1.3 perseant return COALESCE_NOMEM;
162 1.1 perseant }
163 1.1 perseant for (i = 0; i < nb; i++) {
164 1.1 perseant memset(bip + i, 0, sizeof(BLOCK_INFO_15));
165 1.1 perseant bip[i].bi_inode = ino;
166 1.1 perseant bip[i].bi_lbn = i;
167 1.2 perseant bip[i].bi_version = dip->di_gen;
168 1.1 perseant /* Don't set the size, but let lfs_bmap fill it in */
169 1.1 perseant }
170 1.1 perseant if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) {
171 1.2 perseant syslog(LOG_WARNING, "lfs_bmapv: %m");
172 1.1 perseant free(bip);
173 1.3 perseant return COALESCE_BADBMAPV;
174 1.1 perseant }
175 1.1 perseant noff = toff = 0;
176 1.1 perseant for (i = 1; i < nb; i++) {
177 1.1 perseant if (bip[i].bi_daddr != bip[i - 1].bi_daddr + 1)
178 1.1 perseant ++noff;
179 1.1 perseant toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr - 1);
180 1.1 perseant }
181 1.1 perseant
182 1.1 perseant /*
183 1.1 perseant * If this file is not discontinuous, there's no point in rewriting it.
184 1.1 perseant *
185 1.1 perseant * Explicitly allow a certain amount of discontinuity, since large
186 1.1 perseant * files will be broken among segments and medium-sized files
187 1.1 perseant * can have a break or two and it's okay.
188 1.1 perseant */
189 1.2 perseant if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
190 1.2 perseant segtod(lfsp, noff) * 2 < nb) {
191 1.1 perseant free(bip);
192 1.3 perseant return COALESCE_NOTWORTHIT;
193 1.1 perseant } else if (debug)
194 1.1 perseant syslog(LOG_DEBUG, "ino %d total discontinuity "
195 1.1 perseant "%d (%d) for %d blocks", ino, noff, toff, nb);
196 1.1 perseant
197 1.1 perseant /* Search for blocks in active segments; don't move them. */
198 1.1 perseant for (i = 0; i < nb; i++) {
199 1.1 perseant if (bip[i].bi_daddr <= 0)
200 1.1 perseant continue;
201 1.1 perseant sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
202 1.1 perseant dtosn(lfsp, bip[i].bi_daddr));
203 1.1 perseant if (sup->su_flags & SEGUSE_ACTIVE)
204 1.1 perseant bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
205 1.1 perseant }
206 1.1 perseant /*
207 1.1 perseant * Get rid of any we've marked dead. If this is an older
208 1.1 perseant * kernel that doesn't have lfs_bmapv fill in the block
209 1.1 perseant * sizes, we'll toss everything here.
210 1.1 perseant */
211 1.1 perseant toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL);
212 1.1 perseant if (nb && tossdead(NULL, bip + nb - 1, NULL))
213 1.1 perseant --nb;
214 1.1 perseant if (nb == 0) {
215 1.1 perseant free(bip);
216 1.3 perseant return COALESCE_NOTHINGLEFT;
217 1.2 perseant }
218 1.2 perseant
219 1.1 perseant /*
220 1.2 perseant * We may have tossed enough blocks that it is no longer worthwhile
221 1.2 perseant * to rewrite this inode.
222 1.1 perseant */
223 1.3 perseant if (onb - nb > log2int(onb)) {
224 1.3 perseant if (debug)
225 1.3 perseant syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
226 1.3 perseant return COALESCE_NOTHINGLEFT2;
227 1.1 perseant }
228 1.1 perseant
229 1.1 perseant /*
230 1.1 perseant * We are going to rewrite this inode.
231 1.1 perseant * For any remaining blocks, read in their contents.
232 1.1 perseant */
233 1.1 perseant for (i = 0; i < nb; i++) {
234 1.1 perseant bip[i].bi_bp = malloc(bip[i].bi_size);
235 1.1 perseant get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size, bip[i].bi_daddr);
236 1.1 perseant }
237 1.1 perseant if (debug)
238 1.1 perseant syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb);
239 1.1 perseant
240 1.2 perseant /*
241 1.2 perseant * Write in segment-sized chunks. If at any point we'd write more
242 1.2 perseant * than half of the available segments, sleep until that's not
243 1.2 perseant * true any more.
244 1.2 perseant */
245 1.1 perseant bps = segtod(lfsp, 1);
246 1.1 perseant for (tbip = bip; tbip < bip + nb; tbip += bps) {
247 1.2 perseant while (fsp->fi_cip->clean < 4) {
248 1.2 perseant lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL);
249 1.2 perseant reread_fs_info(fsp, do_mmap);
250 1.2 perseant /* XXX start over? */
251 1.2 perseant }
252 1.1 perseant lfs_markv(&fsp->fi_statfsp->f_fsid, tbip,
253 1.1 perseant (tbip + bps < bip + nb ? bps : nb % bps));
254 1.1 perseant }
255 1.1 perseant
256 1.1 perseant for (i = 0; i < nb; i++)
257 1.1 perseant if (bip[i].bi_bp)
258 1.1 perseant free(bip[i].bi_bp);
259 1.1 perseant free(bip);
260 1.3 perseant return COALESCE_OK;
261 1.1 perseant }
262 1.1 perseant
263 1.1 perseant /*
264 1.1 perseant * Try coalescing every inode in the filesystem.
265 1.1 perseant * Return the number of inodes actually altered.
266 1.1 perseant */
267 1.1 perseant int clean_all_inodes(struct fs_info *fsp)
268 1.1 perseant {
269 1.3 perseant int i, r;
270 1.3 perseant int totals[COALESCE_MAXERROR];
271 1.1 perseant
272 1.3 perseant memset(totals, 0, sizeof(totals));
273 1.1 perseant for (i = 0; i < fsp->fi_ifile_count; i++) {
274 1.1 perseant r = clean_inode(fsp, i);
275 1.3 perseant ++totals[r];
276 1.1 perseant }
277 1.3 perseant
278 1.3 perseant for (i = 0; i < COALESCE_MAXERROR; i++)
279 1.3 perseant if (totals[i])
280 1.3 perseant syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
281 1.3 perseant totals[i]);
282 1.3 perseant
283 1.3 perseant return totals[COALESCE_OK];
284 1.1 perseant }
285 1.1 perseant
286 1.1 perseant int fork_coalesce(struct fs_info *fsp)
287 1.1 perseant {
288 1.1 perseant static pid_t childpid;
289 1.2 perseant int num;
290 1.2 perseant
291 1.2 perseant reread_fs_info(fsp, do_mmap);
292 1.1 perseant
293 1.1 perseant if (childpid) {
294 1.1 perseant if (waitpid(childpid, NULL, WNOHANG) == childpid)
295 1.1 perseant childpid = 0;
296 1.1 perseant }
297 1.1 perseant if (childpid && kill(childpid, 0) >= 0) {
298 1.1 perseant /* already running a coalesce process */
299 1.2 perseant if (debug)
300 1.2 perseant syslog(LOG_DEBUG, "coalescing already in progress");
301 1.1 perseant return 0;
302 1.1 perseant }
303 1.1 perseant childpid = fork();
304 1.1 perseant if (childpid < 0) {
305 1.1 perseant syslog(LOG_ERR, "fork: %m");
306 1.1 perseant return 0;
307 1.1 perseant } else if (childpid == 0) {
308 1.3 perseant syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
309 1.2 perseant num = clean_all_inodes(fsp);
310 1.2 perseant syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
311 1.1 perseant exit(0);
312 1.1 perseant }
313 1.1 perseant return 0;
314 1.1 perseant }
315