rcache.c revision 1.6 1 /* $NetBSD: rcache.c,v 1.6 2001/05/27 14:17:57 lukem Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Martin J. Laubach <mjl (at) emsi.priv.at> and
9 * Manuel Bouyer <Manuel.Bouyer (at) lip6.fr>.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39 /*-----------------------------------------------------------------------*/
40 #include <sys/types.h>
41 #include <sys/uio.h>
42 #include <sys/mman.h>
43 #include <sys/param.h>
44 #include <sys/sysctl.h>
45 #include <ufs/ufs/dinode.h>
46
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <fcntl.h>
51 #include <errno.h>
52 #include <string.h>
53
54 #include "dump.h"
55
56 /*-----------------------------------------------------------------------*/
57 #define MAXCACHEBUFS 512 /* max 512 buffers */
58 #define MAXMEMPART 6 /* max 15% of the user mem */
59
60 /*-----------------------------------------------------------------------*/
61 struct cheader {
62 volatile size_t count;
63 };
64
65 struct cdesc {
66 volatile daddr_t blkstart;
67 volatile daddr_t blkend;/* start + nblksread */
68 volatile daddr_t blocksRead;
69 volatile size_t time;
70 #ifdef DIAGNOSTICS
71 volatile pid_t owner;
72 #endif
73 };
74
75 static int findlru(void);
76
77 static void *shareBuffer = NULL;
78 static struct cheader *cheader;
79 static struct cdesc *cdesc;
80 static char *cdata;
81 static int cachebufs;
82 static int nblksread;
83
84 #ifdef STATS
85 static int nreads;
86 static int nphysread;
87 static int64_t readsize;
88 static int64_t physreadsize;
89 #endif
90
91 #define CDATA(i) (cdata + ((i) * nblksread * dev_bsize))
92
93 void
94 initcache(int cachesize, int readblksize)
95 {
96 size_t len;
97 size_t sharedSize;
98
99 nblksread = (readblksize + ufsib->ufs_bsize - 1) / ufsib->ufs_bsize;
100 if(cachesize == -1) { /* Compute from memory available */
101 int usermem;
102 int mib[2] = { CTL_HW, HW_USERMEM };
103
104 len = sizeof(usermem);
105 if (sysctl(mib, 2, &usermem, &len, NULL, 0) < 0) {
106 msg("sysctl(hw.usermem) failed: %s\n", strerror(errno));
107 return;
108 }
109 cachebufs = (usermem / MAXMEMPART) / (nblksread * dev_bsize);
110 } else { /* User specified */
111 cachebufs = cachesize;
112 }
113
114 if(cachebufs) { /* Don't allocate if zero --> no caching */
115 if (cachebufs > MAXCACHEBUFS)
116 cachebufs = MAXCACHEBUFS;
117
118 sharedSize = sizeof(struct cheader) +
119 sizeof(struct cdesc) * cachebufs +
120 nblksread * cachebufs * dev_bsize;
121 #ifdef STATS
122 fprintf(stderr, "Using %d buffers (%d bytes)\n", cachebufs,
123 sharedSize);
124 #endif
125 shareBuffer = mmap(NULL, sharedSize, PROT_READ | PROT_WRITE,
126 MAP_ANON | MAP_SHARED, -1, 0);
127 if (shareBuffer == (void *)-1) {
128 msg("can't mmap shared memory for buffer: %s\n",
129 strerror(errno));
130 return;
131 }
132 cheader = shareBuffer;
133 cdesc = (struct cdesc *) (((char *) shareBuffer) +
134 sizeof(struct cheader));
135 cdata = ((char *) shareBuffer) + sizeof(struct cheader) +
136 sizeof(struct cdesc) * cachebufs;
137
138 memset(shareBuffer, '\0', sharedSize);
139 }
140 }
141
142 /*
143 * Find the cache buffer descriptor that shows the minimal access time
144 */
145 static int
146 findlru(void)
147 {
148 int i;
149 int minTime = cdesc[0].time;
150 int minIdx = 0;
151
152 for (i = 0; i < cachebufs; i++) {
153 if (cdesc[i].time < minTime) {
154 minIdx = i;
155 minTime = cdesc[i].time;
156 }
157 }
158
159 return minIdx;
160 }
161
162 /*
163 * Read data directly from disk, with smart error handling.
164 * Try to recover from hard errors by reading in sector sized pieces.
165 * Error recovery is attempted at most BREADEMAX times before seeking
166 * consent from the operator to continue.
167 */
168
169 static int breaderrors = 0;
170 #define BREADEMAX 32
171
172 void
173 rawread(daddr_t blkno, char *buf, int size)
174 {
175 int cnt, i;
176 #ifdef STATS
177 nphysread++;
178 physreadsize += size;
179 #endif
180
181 if (lseek(diskfd, ((off_t) blkno << dev_bshift), 0) < 0) {
182 msg("rawread: lseek fails\n");
183 goto err;
184 }
185 if ((cnt = read(diskfd, buf, size)) == size)
186 return;
187 if (cnt == -1)
188 msg("read error from %s: %s: [block %d]: count=%d\n",
189 disk, strerror(errno), blkno, size);
190 else
191 msg("short read error from %s: [block %d]: count=%d, got=%d\n",
192 disk, blkno, size, cnt);
193 err:
194 if (++breaderrors > BREADEMAX) {
195 msg("More than %d block read errors from %s\n",
196 BREADEMAX, disk);
197 broadcast("DUMP IS AILING!\n");
198 msg("This is an unrecoverable error.\n");
199 if (!query("Do you want to attempt to continue?")){
200 dumpabort(0);
201 /*NOTREACHED*/
202 } else
203 breaderrors = 0;
204 }
205 /*
206 * Zero buffer, then try to read each sector of buffer separately.
207 */
208 memset(buf, 0, size);
209 for (i = 0; i < size; i += dev_bsize, buf += dev_bsize, blkno++) {
210 if (lseek(diskfd, ((off_t)blkno << dev_bshift), 0) < 0) {
211 msg("rawread: lseek2 fails: %s!\n",
212 strerror(errno));
213 continue;
214 }
215 if ((cnt = read(diskfd, buf, (int)dev_bsize)) == dev_bsize)
216 continue;
217 if (cnt == -1) {
218 msg("read error from %s: %s: [sector %d]: count=%ld: "
219 "%s\n", disk, strerror(errno), blkno, dev_bsize,
220 strerror(errno));
221 continue;
222 }
223 msg("short read error from %s: [sector %d]: count=%ld, got=%d\n",
224 disk, blkno, dev_bsize, cnt);
225 }
226 }
227
228 void
229 bread(daddr_t blkno, char *buf, int size)
230 {
231 int osize = size;
232 daddr_t oblkno = blkno;
233 char *obuf = buf;
234 daddr_t numBlocks = (size + dev_bsize -1) / dev_bsize;
235
236 #ifdef STATS
237 nreads++;
238 readsize += size;
239 #endif
240
241 if (!shareBuffer) {
242 rawread(blkno, buf, size);
243 return;
244 }
245
246 if (flock(diskfd, LOCK_EX)) {
247 msg("flock(LOCK_EX) failed: %s\n",
248 strerror(errno));
249 rawread(blkno, buf, size);
250 return;
251 }
252
253
254 retry:
255 while(size > 0) {
256 int i;
257
258 for (i = 0; i < cachebufs; i++) {
259 struct cdesc *curr = &cdesc[i];
260
261 #ifdef DIAGNOSTICS
262 if (curr->owner) {
263 fprintf(stderr, "Owner is set (%d, me=%d), can"
264 "not happen.\n", curr->owner, getpid());
265 }
266 #endif
267
268 if (curr->blkend == 0)
269 continue;
270 /*
271 * If we find a bit of the read in the buffers,
272 * now compute how many blocks we can copy,
273 * copy them out, adjust blkno, buf and size,
274 * and restart
275 */
276 if (curr->blkstart <= blkno &&
277 blkno < curr->blkend) {
278 /* Number of data blocks to be copied */
279 int toCopy = MIN(size,
280 (curr->blkend - blkno) * dev_bsize);
281 #ifdef DIAGNOSTICS
282 if (toCopy <= 0 ||
283 toCopy > nblksread * dev_bsize) {
284 fprintf(stderr, "toCopy %d !\n",
285 toCopy);
286 dumpabort(0);
287 }
288 if (CDATA(i) + (blkno - curr->blkstart) *
289 dev_bsize < CDATA(i) ||
290 CDATA(i) + (blkno - curr->blkstart) *
291 dev_bsize >
292 CDATA(i) + nblksread * dev_bsize) {
293 fprintf(stderr, "%p < %p !!!\n",
294 CDATA(i) + (blkno -
295 curr->blkstart) * dev_bsize,
296 CDATA(i));
297 fprintf(stderr, "cdesc[i].blkstart %d "
298 "blkno %d dev_bsize %ld\n",
299 curr->blkstart, blkno, dev_bsize);
300 dumpabort(0);
301 }
302 #endif
303 memcpy(buf, CDATA(i) +
304 (blkno - curr->blkstart) * dev_bsize,
305 toCopy);
306
307 buf += toCopy;
308 size -= toCopy;
309 blkno += (toCopy + dev_bsize - 1) / dev_bsize;
310 numBlocks -=
311 (toCopy + dev_bsize - 1) / dev_bsize;
312
313 curr->time = cheader->count++;
314
315 /*
316 * If all data of a cache block have been
317 * read, chances are good no more reads
318 * will occur, so expire the cache immediately
319 */
320
321 curr->blocksRead +=
322 (toCopy + dev_bsize -1) / dev_bsize;
323 if (curr->blocksRead >= nblksread)
324 curr->time = 0;
325
326 goto retry;
327 }
328 }
329
330 /* No more to do? */
331 if (size == 0)
332 break;
333
334 /*
335 * This does actually not happen if fs blocks are not greater
336 * than nblksread.
337 */
338 if (numBlocks > nblksread) {
339 rawread(oblkno, obuf, osize);
340 break;
341 } else {
342 int idx;
343 ssize_t rsize;
344 daddr_t blockBlkNo;
345
346 blockBlkNo = (blkno / nblksread) * nblksread;
347 idx = findlru();
348 rsize = MIN(nblksread,
349 ufsib->ufs_dsize - blockBlkNo) *
350 dev_bsize;
351
352 #ifdef DIAGNOSTICS
353 if (cdesc[idx].owner)
354 fprintf(stderr, "Owner is set (%d, me=%d), can"
355 "not happen(2).\n", cdesc[idx].owner,
356 getpid());
357 cdesc[idx].owner = getpid();
358 #endif
359 cdesc[idx].time = cheader->count++;
360 cdesc[idx].blkstart = blockBlkNo;
361 cdesc[idx].blocksRead = 0;
362
363 if (lseek(diskfd,
364 ((off_t) (blockBlkNo) << dev_bshift), 0) < 0) {
365 msg("readBlocks: lseek fails: %s\n",
366 strerror(errno));
367 rsize = -1;
368 } else {
369 rsize = read(diskfd, CDATA(idx), rsize);
370 if (rsize < 0) {
371 msg("readBlocks: read fails: %s\n",
372 strerror(errno));
373 }
374 }
375
376 /* On errors, panic, punt, try to read without
377 * cache and let raw read routine do the rest.
378 */
379
380 if (rsize <= 0) {
381 rawread(oblkno, obuf, osize);
382 #ifdef DIAGNOSTICS
383 if (cdesc[idx].owner != getpid())
384 fprintf(stderr, "Owner changed from "
385 "%d to %d, can't happen\n",
386 getpid(), cdesc[idx].owner);
387 cdesc[idx].owner = 0;
388 #endif
389 break;
390 }
391
392 /* On short read, just note the fact and go on */
393 cdesc[idx].blkend = blockBlkNo + rsize / dev_bsize;
394
395 #ifdef STATS
396 nphysread++;
397 physreadsize += rsize;
398 #endif
399 #ifdef DIAGNOSTICS
400 if (cdesc[idx].owner != getpid())
401 fprintf(stderr, "Owner changed from "
402 "%d to %d, can't happen\n",
403 getpid(), cdesc[idx].owner);
404 cdesc[idx].owner = 0;
405 #endif
406 /*
407 * We swapped some of data in, let the loop fetch
408 * them from cache
409 */
410 }
411 }
412
413 if (flock(diskfd, LOCK_UN))
414 msg("flock(LOCK_UN) failed: %s\n",
415 strerror(errno));
416 return;
417 }
418
419 void
420 printcachestats(void)
421 {
422 #ifdef STATS
423 fprintf(stderr, "Pid %d: %d reads (%u bytes) "
424 "%d physical reads (%u bytes) %d%% hits, %d%% overhead\n",
425 getpid(), nreads, (u_int) readsize, nphysread,
426 (u_int) physreadsize, (nreads - nphysread) * 100 / nreads,
427 (int) (((physreadsize - readsize) * 100) / readsize));
428 #endif
429 }
430