Home | History | Annotate | Line # | Download | only in dump
rcache.c revision 1.1
      1 /*      $NetBSD: rcache.c,v 1.1 1999/03/23 14:22:59 bouyer Exp $       */
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Martin J. Laubach <mjl (at) emsi.priv.at> and
      9  *    Manuel Bouyer <Manuel.Bouyer (at) lip6.fr>.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *      This product includes software developed by the NetBSD
     22  *      Foundation, Inc. and its contributors.
     23  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24  *    contributors may be used to endorse or promote products derived
     25  *    from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 /*-----------------------------------------------------------------------*/
     40 #include <sys/types.h>
     41 #include <sys/uio.h>
     42 #include <sys/mman.h>
     43 #include <sys/param.h>
     44 #include <sys/sysctl.h>
     45 #include <ufs/ufs/dinode.h>
     46 #include <ufs/ffs/fs.h>
     47 
     48 #include <stdio.h>
     49 #include <stdlib.h>
     50 #include <unistd.h>
     51 #include <fcntl.h>
     52 #include <errno.h>
     53 #include <string.h>
     54 
     55 #include "dump.h"
     56 
     57 /*-----------------------------------------------------------------------*/
     58 #define MAXCACHEBUFS	512	/* max 512 buffers */
     59 #define MAXMEMPART	6	/* max 15% of the user mem */
     60 
     61 /*-----------------------------------------------------------------------*/
     62 struct cheader {
     63 	volatile size_t count;
     64 };
     65 
     66 struct cdesc {
     67 	volatile daddr_t blkstart;
     68 	volatile daddr_t blkend;/* start + nblksread */
     69 	volatile daddr_t blocksRead;
     70 	volatile size_t time;
     71 #ifdef DIAGNOSTICS
     72 	volatile pid_t owner;
     73 #endif
     74 };
     75 
     76 static int findlru __P((void));
     77 
     78 static void *shareBuffer = NULL;
     79 static struct cheader *cheader;
     80 static struct cdesc *cdesc;
     81 static char *cdata;
     82 static int cachebufs;
     83 static int nblksread;
     84 
     85 #ifdef STATS
     86 static int nreads;
     87 static int nphysread;
     88 static int64_t readsize;
     89 static int64_t physreadsize;
     90 #endif
     91 
     92 #define CDATA(i)	(cdata + ((i) * nblksread * dev_bsize))
     93 
     94 /*-----------------------------------------------------------------------*/
     95 void
     96 initcache(cachesize, readblksize)
     97 	int cachesize;
     98 	int readblksize;
     99 {
    100 	size_t len;
    101 	size_t  sharedSize;
    102 
    103 	nblksread = (readblksize + sblock->fs_bsize - 1) / sblock->fs_bsize;
    104 	if(cachesize == -1) {	/* Compute from memory available */
    105 		int usermem;
    106 		int mib[2] = { CTL_HW, HW_USERMEM };
    107 
    108 		len = sizeof(usermem);
    109 		if (sysctl(mib, 2, &usermem, &len, NULL, 0) < 0) {
    110 			msg("sysctl(hw.usermem) failed: %s\n", strerror(errno));
    111 			return;
    112 		}
    113 		cachebufs = (usermem / MAXMEMPART) / (nblksread * dev_bsize);
    114 	} else {		/* User specified */
    115 		cachebufs = cachesize;
    116 	}
    117 
    118 	if(cachebufs) {	/* Don't allocate if zero --> no caching */
    119 		if (cachebufs > MAXCACHEBUFS)
    120 			cachebufs = MAXCACHEBUFS;
    121 
    122 		sharedSize = sizeof(struct cheader) +
    123 	   	    sizeof(struct cdesc) * cachebufs +
    124 	   	    nblksread * cachebufs * dev_bsize;
    125 #ifdef STATS
    126 		fprintf(stderr, "Using %d buffers (%d bytes)\n", cachebufs,
    127 	   	    sharedSize);
    128 #endif
    129 		shareBuffer = mmap(NULL, sharedSize, PROT_READ | PROT_WRITE,
    130 	   	    MAP_ANON | MAP_SHARED, -1, 0);
    131 		if (shareBuffer == (void *)-1) {
    132 			msg("can't mmap shared memory for buffer: %s\n",
    133 			    strerror(errno));
    134 			return;
    135 		}
    136 		cheader = shareBuffer;
    137 		cdesc = (struct cdesc *) (((char *) shareBuffer) +
    138 		    sizeof(struct cheader));
    139 		cdata = ((char *) shareBuffer) + sizeof(struct cheader) +
    140 	   	    sizeof(struct cdesc) * cachebufs;
    141 
    142 		memset(shareBuffer, '\0', sharedSize);
    143 	}
    144 }
    145 /*-----------------------------------------------------------------------*/
    146 /* Find the cache buffer descriptor that shows the minimal access time */
    147 
    148 static int
    149 findlru()
    150 {
    151 	int     i;
    152 	int     minTime = cdesc[0].time;
    153 	int     minIdx = 0;
    154 
    155 	for (i = 0; i < cachebufs; i++) {
    156 		if (cdesc[i].time < minTime) {
    157 			minIdx = i;
    158 			minTime = cdesc[i].time;
    159 		}
    160 	}
    161 
    162 	return minIdx;
    163 }
    164 /*-----------------------------------------------------------------------*/
    165 /*
    166  * Read data directly from disk, with smart error handling.
    167  * Try to recover from hard errors by reading in sector sized pieces.
    168  * Error recovery is attempted at most BREADEMAX times before seeking
    169  * consent from the operator to continue.
    170  */
    171 
    172 
    173 static int breaderrors = 0;
    174 #define BREADEMAX 32
    175 
    176 void
    177 rawread(blkno, buf, size)
    178 	daddr_t blkno;
    179 	char *buf;
    180 	int size;
    181 {
    182 	int cnt, i;
    183 #ifdef STATS
    184 	nphysread++;
    185 	physreadsize += size;
    186 #endif
    187 
    188 	if (lseek(diskfd, ((off_t) blkno << dev_bshift), 0) < 0) {
    189 		msg("rawread: lseek fails\n");
    190 		goto err;
    191 	}
    192 	if ((cnt =  read(diskfd, buf, size)) == size)
    193 		return;
    194 	if (cnt == -1)
    195 		msg("read error from %s: %s: [block %d]: count=%d\n",
    196 			disk, strerror(errno), blkno, size);
    197 	else
    198 		msg("short read error from %s: [block %d]: count=%d, got=%d\n",
    199 			disk, blkno, size, cnt);
    200 err:
    201 	if (++breaderrors > BREADEMAX) {
    202 		msg("More than %d block read errors from %d\n",
    203 			BREADEMAX, disk);
    204 		broadcast("DUMP IS AILING!\n");
    205 		msg("This is an unrecoverable error.\n");
    206 		if (!query("Do you want to attempt to continue?")){
    207 			dumpabort(0);
    208 			/*NOTREACHED*/
    209 		} else
    210 			breaderrors = 0;
    211 	}
    212 	/*
    213 	 * Zero buffer, then try to read each sector of buffer separately.
    214 	 */
    215 	memset(buf, 0, size);
    216 	for (i = 0; i < size; i += dev_bsize, buf += dev_bsize, blkno++) {
    217 		if (lseek(diskfd, ((off_t)blkno << dev_bshift), 0) < 0) {
    218 			msg("rawread: lseek2 fails: %s!\n",
    219 			    strerror(errno));
    220 			continue;
    221 		}
    222 		if ((cnt = read(diskfd, buf, (int)dev_bsize)) == dev_bsize)
    223 			continue;
    224 		if (cnt == -1) {
    225 			msg("read error from %s: %s: [sector %d]: count=%d: "
    226 			    "%s\n", disk, strerror(errno), blkno, dev_bsize,
    227 			    strerror(errno));
    228 			continue;
    229 		}
    230 		msg("short read error from %s: [sector %d]: count=%d, got=%d\n",
    231 		    disk, blkno, dev_bsize, cnt);
    232 	}
    233 }
    234 
    235 /*-----------------------------------------------------------------------*/
    236 #define min(a,b)	(((a) < (b)) ? (a) : (b))
    237 
    238 void
    239 bread(blkno, buf, size)
    240 	daddr_t blkno;
    241 	char *buf;
    242 	int size;
    243 {
    244 	int     osize = size;
    245 	daddr_t oblkno = blkno;
    246 	char   *obuf = buf;
    247 	daddr_t numBlocks = (size + dev_bsize -1) / dev_bsize;
    248 
    249 #ifdef STATS
    250 	nreads++;
    251 	readsize += size;
    252 #endif
    253 
    254 	if (!shareBuffer) {
    255 		rawread(blkno, buf, size);
    256 		return;
    257 	}
    258 
    259 	if (flock(diskfd, LOCK_EX)) {
    260 		msg("flock(LOCK_EX) failed: %s\n",
    261 		    strerror(errno));
    262 		rawread(blkno, buf, size);
    263 		return;
    264 	}
    265 
    266 
    267 retry:
    268 	while(size > 0) {
    269 		int     i;
    270 
    271 		for (i = 0; i < cachebufs; i++) {
    272 			struct cdesc *curr = &cdesc[i];
    273 
    274 #ifdef DIAGNOSTICS
    275 			if (curr->owner) {
    276 				fprintf(stderr, "Owner is set (%d, me=%d), can"
    277 				    "not happen.\n", curr->owner, getpid());
    278 			}
    279 #endif
    280 
    281 			if (curr->blkend == 0)
    282 				continue;
    283 			/*
    284 			 * If we find a bit of the read in the buffers,
    285 			 * now compute how many blocks we can copy,
    286 			 * copy them out, adjust blkno, buf and size,
    287 			 * and restart
    288 			 */
    289 			if (curr->blkstart <= blkno &&
    290 			    blkno < curr->blkend) {
    291 				/* Number of data blocks to be copied */
    292 				int toCopy = min(size,
    293 				    (curr->blkend - blkno) * dev_bsize);
    294 #ifdef DIAGNOSTICS
    295 				if (toCopy <= 0 ||
    296 				    toCopy > nblksread * dev_bsize) {
    297 					fprintf(stderr, "toCopy %d !\n",
    298 					    toCopy);
    299 					dumpabort(0);
    300 				}
    301 				if (CDATA(i) + (blkno - curr->blkstart) *
    302 			   	    dev_bsize < CDATA(i) ||
    303 			   	    CDATA(i) + (blkno - curr->blkstart) *
    304 			   	    dev_bsize >
    305 				    CDATA(i) + nblksread * dev_bsize) {
    306 					fprintf(stderr, "%p < %p !!!\n",
    307 				   	   CDATA(i) + (blkno -
    308 						curr->blkstart) * dev_bsize,
    309 					   CDATA(i));
    310 					fprintf(stderr, "cdesc[i].blkstart %d "
    311 					    "blkno %d dev_bsize %ld\n",
    312 				   	    curr->blkstart, blkno, dev_bsize);
    313 					dumpabort(0);
    314 				}
    315 #endif
    316 				memcpy(buf, CDATA(i) +
    317 				    (blkno - curr->blkstart) * dev_bsize,
    318 			   	    toCopy);
    319 
    320 				buf 	+= toCopy;
    321 				size 	-= toCopy;
    322 				blkno 	+= (toCopy + dev_bsize - 1) / dev_bsize;
    323 				numBlocks -=
    324 				    (toCopy  + dev_bsize - 1) / dev_bsize;
    325 
    326 				curr->time = cheader->count++;
    327 
    328 				/*
    329 				 * If all data of a cache block have been
    330 				 * read, chances are good no more reads
    331 				 * will occur, so expire the cache immediately
    332 				 */
    333 
    334 				curr->blocksRead +=
    335 				    (toCopy + dev_bsize -1) / dev_bsize;
    336 				if (curr->blocksRead >= nblksread)
    337 					curr->time = 0;
    338 
    339 				goto retry;
    340 			}
    341 		}
    342 
    343 		/* No more to do? */
    344 		if (size == 0)
    345 			break;
    346 
    347 		/*
    348 		 * This does actually not happen if fs blocks are not greater
    349 		 * than nblksread.
    350 		 */
    351 		if (numBlocks > nblksread) {
    352 			rawread(oblkno, obuf, osize);
    353 			break;
    354 		} else {
    355 			int     idx;
    356 			ssize_t rsize;
    357 			daddr_t blockBlkNo;
    358 
    359 			blockBlkNo = (blkno / nblksread) * nblksread;
    360 			idx = findlru();
    361 			rsize = min(nblksread,
    362 			    fsbtodb(sblock, sblock->fs_size) - blockBlkNo) *
    363 			    dev_bsize;
    364 
    365 #ifdef DIAGNOSTICS
    366 			if (cdesc[idx].owner)
    367 				fprintf(stderr, "Owner is set (%d, me=%d), can"
    368 				    "not happen(2).\n", cdesc[idx].owner,
    369 				    getpid());
    370 			cdesc[idx].owner = getpid();
    371 #endif
    372 			cdesc[idx].time = cheader->count++;
    373 			cdesc[idx].blkstart = blockBlkNo;
    374 			cdesc[idx].blocksRead = 0;
    375 
    376 			if (lseek(diskfd,
    377 			    ((off_t) (blockBlkNo) << dev_bshift), 0) < 0) {
    378 				msg("readBlocks: lseek fails: %s\n",
    379 				    strerror(errno));
    380 				rsize = -1;
    381 			} else {
    382 				rsize = read(diskfd, CDATA(idx), rsize);
    383 				if (rsize < 0) {
    384 					msg("readBlocks: read fails: %s\n",
    385 					    strerror(errno));
    386 				}
    387 			}
    388 
    389 			/* On errors, panic, punt, try to read without
    390 			 * cache and let raw read routine do the rest.
    391 			 */
    392 
    393 			if (rsize <= 0) {
    394 				rawread(oblkno, obuf, osize);
    395 #ifdef DIAGNOSTICS
    396 				if (cdesc[idx].owner != getpid())
    397 					fprintf(stderr, "Owner changed from "
    398 					    "%d to %d, can't happen\n",
    399 					    getpid(), cdesc[idx].owner);
    400 				cdesc[idx].owner = 0;
    401 #endif
    402 				break;
    403 			}
    404 
    405 			/* On short read, just note the fact and go on */
    406 			cdesc[idx].blkend = blockBlkNo + rsize / dev_bsize;
    407 
    408 #ifdef STATS
    409 			nphysread++;
    410 			physreadsize += rsize;
    411 #endif
    412 #ifdef DIAGNOSTICS
    413 			if (cdesc[idx].owner != getpid())
    414 				fprintf(stderr, "Owner changed from "
    415 				    "%d to %d, can't happen\n",
    416 				    getpid(), cdesc[idx].owner);
    417 			cdesc[idx].owner = 0;
    418 #endif
    419 			/*
    420 			 * We swapped some of data in, let the loop fetch
    421 			 * them from cache
    422 			 */
    423 		}
    424 	}
    425 
    426 	if (flock(diskfd, LOCK_UN))
    427 		msg("flock(LOCK_UN) failed: %s\n",
    428 		    strerror(errno));
    429 	return;
    430 }
    431 
    432 /*-----------------------------------------------------------------------*/
    433 void
    434 printcachestats()
    435 {
    436 #ifdef STATS
    437 	fprintf(stderr, "Pid %d: %d reads (%u bytes) "
    438 	    "%d physical reads (%u bytes) %d%% hits, %d%% overhead\n",
    439 	    getpid(), nreads, (u_int) readsize, nphysread,
    440 	    (u_int) physreadsize, (nreads - nphysread) * 100 / nreads,
    441 	    (int) (((physreadsize - readsize) * 100) / readsize));
    442 #endif
    443 }
    444 
    445 /*-----------------------------------------------------------------------*/
    446