Home | History | Annotate | Line # | Download | only in mpool
mpool.c revision 1.5
      1 /*	$NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1990, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #if defined(LIBC_SCCS) && !defined(lint)
     37 #if 0
     38 static char sccsid[] = "@(#)mpool.c	8.2 (Berkeley) 2/21/94";
     39 #else
     40 static char rcsid[] = "$NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $";
     41 #endif
     42 #endif /* LIBC_SCCS and not lint */
     43 
     44 #include <sys/param.h>
     45 #include <sys/stat.h>
     46 
     47 #include <errno.h>
     48 #include <stdio.h>
     49 #include <stdlib.h>
     50 #include <string.h>
     51 #include <unistd.h>
     52 
     53 #include <db.h>
     54 #define	__MPOOLINTERFACE_PRIVATE
     55 #include "mpool.h"
     56 
     57 static BKT *mpool_bkt __P((MPOOL *));
     58 static BKT *mpool_look __P((MPOOL *, pgno_t));
     59 static int  mpool_write __P((MPOOL *, BKT *));
     60 #ifdef DEBUG
     61 static void __mpoolerr __P((const char *fmt, ...));
     62 #endif
     63 
     64 /*
     65  * MPOOL_OPEN -- initialize a memory pool.
     66  *
     67  * Parameters:
     68  *	key:		Shared buffer key.
     69  *	fd:		File descriptor.
     70  *	pagesize:	File page size.
     71  *	maxcache:	Max number of cached pages.
     72  *
     73  * Returns:
     74  *	MPOOL pointer, NULL on error.
     75  */
     76 MPOOL *
     77 mpool_open(key, fd, pagesize, maxcache)
     78 	DBT *key;
     79 	int fd;
     80 	pgno_t pagesize, maxcache;
     81 {
     82 	struct stat sb;
     83 	MPOOL *mp;
     84 	int entry;
     85 
     86 	if (fstat(fd, &sb))
     87 		return (NULL);
     88 	/* XXX
     89 	 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so
     90 	 * that stat(2) returns true for ISSOCK on pipes.  Until then, this is
     91 	 * fairly close.
     92 	 */
     93 	if (!S_ISREG(sb.st_mode)) {
     94 		errno = ESPIPE;
     95 		return (NULL);
     96 	}
     97 
     98 	if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL)
     99 		return (NULL);
    100 	mp->free.cnext = mp->free.cprev = (BKT *)&mp->free;
    101 	mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru;
    102 	for (entry = 0; entry < HASHSIZE; ++entry)
    103 		mp->hashtable[entry].hnext = mp->hashtable[entry].hprev =
    104 		    mp->hashtable[entry].cnext = mp->hashtable[entry].cprev =
    105 		    (BKT *)&mp->hashtable[entry];
    106 	mp->curcache = 0;
    107 	mp->maxcache = maxcache;
    108 	mp->pagesize = pagesize;
    109 	mp->npages = sb.st_size / pagesize;
    110 	mp->fd = fd;
    111 	mp->pgcookie = NULL;
    112 	mp->pgin = mp->pgout = NULL;
    113 
    114 #ifdef STATISTICS
    115 	mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush =
    116 	    mp->pageget = mp->pagenew = mp->pageput = mp->pageread =
    117 	    mp->pagewrite = 0;
    118 #endif
    119 	return (mp);
    120 }
    121 
    122 /*
    123  * MPOOL_FILTER -- initialize input/output filters.
    124  *
    125  * Parameters:
    126  *	pgin:		Page in conversion routine.
    127  *	pgout:		Page out conversion routine.
    128  *	pgcookie:	Cookie for page in/out routines.
    129  */
    130 void
    131 mpool_filter(mp, pgin, pgout, pgcookie)
    132 	MPOOL *mp;
    133 	void (*pgin) __P((void *, pgno_t, void *));
    134 	void (*pgout) __P((void *, pgno_t, void *));
    135 	void *pgcookie;
    136 {
    137 	mp->pgin = pgin;
    138 	mp->pgout = pgout;
    139 	mp->pgcookie = pgcookie;
    140 }
    141 
    142 /*
    143  * MPOOL_NEW -- get a new page
    144  *
    145  * Parameters:
    146  *	mp:		mpool cookie
    147  *	pgnoadddr:	place to store new page number
    148  * Returns:
    149  *	RET_ERROR, RET_SUCCESS
    150  */
    151 void *
    152 mpool_new(mp, pgnoaddr)
    153 	MPOOL *mp;
    154 	pgno_t *pgnoaddr;
    155 {
    156 	BKT *b;
    157 	BKTHDR *hp;
    158 
    159 #ifdef STATISTICS
    160 	++mp->pagenew;
    161 #endif
    162 	/*
    163 	 * Get a BKT from the cache.  Assign a new page number, attach it to
    164 	 * the hash and lru chains and return.
    165 	 */
    166 	if ((b = mpool_bkt(mp)) == NULL)
    167 		return (NULL);
    168 	*pgnoaddr = b->pgno = mp->npages++;
    169 	b->flags = MPOOL_PINNED;
    170 	inshash(b, b->pgno);
    171 	inschain(b, &mp->lru);
    172 	return (b->page);
    173 }
    174 
    175 /*
    176  * MPOOL_GET -- get a page from the pool
    177  *
    178  * Parameters:
    179  *	mp:	mpool cookie
    180  *	pgno:	page number
    181  *	flags:	not used
    182  *
    183  * Returns:
    184  *	RET_ERROR, RET_SUCCESS
    185  */
    186 void *
    187 mpool_get(mp, pgno, flags)
    188 	MPOOL *mp;
    189 	pgno_t pgno;
    190 	u_int flags;		/* XXX not used? */
    191 {
    192 	BKT *b;
    193 	BKTHDR *hp;
    194 	off_t off;
    195 	int nr;
    196 
    197 	/*
    198 	 * If asking for a specific page that is already in the cache, find
    199 	 * it and return it.
    200 	 */
    201 	if (b = mpool_look(mp, pgno)) {
    202 #ifdef STATISTICS
    203 		++mp->pageget;
    204 #endif
    205 #ifdef DEBUG
    206 		if (b->flags & MPOOL_PINNED)
    207 			__mpoolerr("mpool_get: page %d already pinned",
    208 			    b->pgno);
    209 #endif
    210 		rmchain(b);
    211 		inschain(b, &mp->lru);
    212 		b->flags |= MPOOL_PINNED;
    213 		return (b->page);
    214 	}
    215 
    216 	/* Not allowed to retrieve a non-existent page. */
    217 	if (pgno >= mp->npages) {
    218 		errno = EINVAL;
    219 		return (NULL);
    220 	}
    221 
    222 	/* Get a page from the cache. */
    223 	if ((b = mpool_bkt(mp)) == NULL)
    224 		return (NULL);
    225 	b->pgno = pgno;
    226 	b->flags = MPOOL_PINNED;
    227 
    228 #ifdef STATISTICS
    229 	++mp->pageread;
    230 #endif
    231 	/* Read in the contents. */
    232 	off = mp->pagesize * pgno;
    233 	if (lseek(mp->fd, off, SEEK_SET) != off)
    234 		return (NULL);
    235 	if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) {
    236 		if (nr >= 0)
    237 			errno = EFTYPE;
    238 		return (NULL);
    239 	}
    240 	if (mp->pgin)
    241 		(mp->pgin)(mp->pgcookie, b->pgno, b->page);
    242 
    243 	inshash(b, b->pgno);
    244 	inschain(b, &mp->lru);
    245 #ifdef STATISTICS
    246 	++mp->pageget;
    247 #endif
    248 	return (b->page);
    249 }
    250 
    251 /*
    252  * MPOOL_PUT -- return a page to the pool
    253  *
    254  * Parameters:
    255  *	mp:	mpool cookie
    256  *	page:	page pointer
    257  *	pgno:	page number
    258  *
    259  * Returns:
    260  *	RET_ERROR, RET_SUCCESS
    261  */
    262 int
    263 mpool_put(mp, page, flags)
    264 	MPOOL *mp;
    265 	void *page;
    266 	u_int flags;
    267 {
    268 	BKT *baddr;
    269 #ifdef DEBUG
    270 	BKT *b;
    271 #endif
    272 
    273 #ifdef STATISTICS
    274 	++mp->pageput;
    275 #endif
    276 	baddr = (BKT *)((char *)page - sizeof(BKT));
    277 #ifdef DEBUG
    278 	if (!(baddr->flags & MPOOL_PINNED))
    279 		__mpoolerr("mpool_put: page %d not pinned", b->pgno);
    280 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
    281 		if (b == (BKT *)&mp->lru)
    282 			__mpoolerr("mpool_put: %0x: bad address", baddr);
    283 		if (b == baddr)
    284 			break;
    285 	}
    286 #endif
    287 	baddr->flags &= ~MPOOL_PINNED;
    288 	baddr->flags |= flags & MPOOL_DIRTY;
    289 	return (RET_SUCCESS);
    290 }
    291 
    292 /*
    293  * MPOOL_CLOSE -- close the buffer pool
    294  *
    295  * Parameters:
    296  *	mp:	mpool cookie
    297  *
    298  * Returns:
    299  *	RET_ERROR, RET_SUCCESS
    300  */
    301 int
    302 mpool_close(mp)
    303 	MPOOL *mp;
    304 {
    305 	BKT *b, *next;
    306 
    307 	/* Free up any space allocated to the lru pages. */
    308 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) {
    309 		next = b->cprev;
    310 		free(b);
    311 	}
    312 	free(mp);
    313 	return (RET_SUCCESS);
    314 }
    315 
    316 /*
    317  * MPOOL_SYNC -- sync the file to disk.
    318  *
    319  * Parameters:
    320  *	mp:	mpool cookie
    321  *
    322  * Returns:
    323  *	RET_ERROR, RET_SUCCESS
    324  */
    325 int
    326 mpool_sync(mp)
    327 	MPOOL *mp;
    328 {
    329 	BKT *b;
    330 
    331 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
    332 		if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR)
    333 			return (RET_ERROR);
    334 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
    335 }
    336 
    337 /*
    338  * MPOOL_BKT -- get/create a BKT from the cache
    339  *
    340  * Parameters:
    341  *	mp:	mpool cookie
    342  *
    343  * Returns:
    344  *	NULL on failure and a pointer to the BKT on success
    345  */
    346 static BKT *
    347 mpool_bkt(mp)
    348 	MPOOL *mp;
    349 {
    350 	BKT *b;
    351 
    352 	if (mp->curcache < mp->maxcache)
    353 		goto new;
    354 
    355 	/*
    356 	 * If the cache is maxxed out, search the lru list for a buffer we
    357 	 * can flush.  If we find one, write it if necessary and take it off
    358 	 * any lists.  If we don't find anything we grow the cache anyway.
    359 	 * The cache never shrinks.
    360 	 */
    361 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
    362 		if (!(b->flags & MPOOL_PINNED)) {
    363 			if (b->flags & MPOOL_DIRTY &&
    364 			    mpool_write(mp, b) == RET_ERROR)
    365 				return (NULL);
    366 			rmhash(b);
    367 			rmchain(b);
    368 #ifdef STATISTICS
    369 			++mp->pageflush;
    370 #endif
    371 #ifdef DEBUG
    372 			{
    373 				void *spage;
    374 				spage = b->page;
    375 				memset(b, 0xff, sizeof(BKT) + mp->pagesize);
    376 				b->page = spage;
    377 			}
    378 #endif
    379 			return (b);
    380 		}
    381 
    382 new:	if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
    383 		return (NULL);
    384 #ifdef STATISTICS
    385 	++mp->pagealloc;
    386 #endif
    387 #ifdef DEBUG
    388 	memset(b, 0xff, sizeof(BKT) + mp->pagesize);
    389 #endif
    390 	b->page = (char *)b + sizeof(BKT);
    391 	++mp->curcache;
    392 	return (b);
    393 }
    394 
    395 /*
    396  * MPOOL_WRITE -- sync a page to disk
    397  *
    398  * Parameters:
    399  *	mp:	mpool cookie
    400  *
    401  * Returns:
    402  *	RET_ERROR, RET_SUCCESS
    403  */
    404 static int
    405 mpool_write(mp, b)
    406 	MPOOL *mp;
    407 	BKT *b;
    408 {
    409 	off_t off;
    410 
    411 	if (mp->pgout)
    412 		(mp->pgout)(mp->pgcookie, b->pgno, b->page);
    413 
    414 #ifdef STATISTICS
    415 	++mp->pagewrite;
    416 #endif
    417 	off = mp->pagesize * b->pgno;
    418 	if (lseek(mp->fd, off, SEEK_SET) != off)
    419 		return (RET_ERROR);
    420 	if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize)
    421 		return (RET_ERROR);
    422 	b->flags &= ~MPOOL_DIRTY;
    423 	return (RET_SUCCESS);
    424 }
    425 
    426 /*
    427  * MPOOL_LOOK -- lookup a page
    428  *
    429  * Parameters:
    430  *	mp:	mpool cookie
    431  *	pgno:	page number
    432  *
    433  * Returns:
    434  *	NULL on failure and a pointer to the BKT on success
    435  */
    436 static BKT *
    437 mpool_look(mp, pgno)
    438 	MPOOL *mp;
    439 	pgno_t pgno;
    440 {
    441 	register BKT *b;
    442 	register BKTHDR *tb;
    443 
    444 	/* XXX
    445 	 * If find the buffer, put it first on the hash chain so can
    446 	 * find it again quickly.
    447 	 */
    448 	tb = &mp->hashtable[HASHKEY(pgno)];
    449 	for (b = tb->hnext; b != (BKT *)tb; b = b->hnext)
    450 		if (b->pgno == pgno) {
    451 #ifdef STATISTICS
    452 			++mp->cachehit;
    453 #endif
    454 			return (b);
    455 		}
    456 #ifdef STATISTICS
    457 	++mp->cachemiss;
    458 #endif
    459 	return (NULL);
    460 }
    461 
    462 #ifdef STATISTICS
    463 /*
    464  * MPOOL_STAT -- cache statistics
    465  *
    466  * Parameters:
    467  *	mp:	mpool cookie
    468  */
    469 void
    470 mpool_stat(mp)
    471 	MPOOL *mp;
    472 {
    473 	BKT *b;
    474 	int cnt;
    475 	char *sep;
    476 
    477 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
    478 	(void)fprintf(stderr,
    479 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
    480 	    mp->pagesize, mp->curcache, mp->maxcache);
    481 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
    482 	    mp->pageput, mp->pageget, mp->pagenew);
    483 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
    484 	    mp->pagealloc, mp->pageflush);
    485 	if (mp->cachehit + mp->cachemiss)
    486 		(void)fprintf(stderr,
    487 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
    488 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
    489 		    * 100, mp->cachehit, mp->cachemiss);
    490 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
    491 	    mp->pageread, mp->pagewrite);
    492 
    493 	sep = "";
    494 	cnt = 0;
    495 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
    496 		(void)fprintf(stderr, "%s%d", sep, b->pgno);
    497 		if (b->flags & MPOOL_DIRTY)
    498 			(void)fprintf(stderr, "d");
    499 		if (b->flags & MPOOL_PINNED)
    500 			(void)fprintf(stderr, "P");
    501 		if (++cnt == 10) {
    502 			sep = "\n";
    503 			cnt = 0;
    504 		} else
    505 			sep = ", ";
    506 
    507 	}
    508 	(void)fprintf(stderr, "\n");
    509 }
    510 #endif
    511 
    512 #ifdef DEBUG
    513 #if __STDC__
    514 #include <stdarg.h>
    515 #else
    516 #include <varargs.h>
    517 #endif
    518 
    519 static void
    520 #if __STDC__
    521 __mpoolerr(const char *fmt, ...)
    522 #else
    523 __mpoolerr(fmt, va_alist)
    524 	char *fmt;
    525 	va_dcl
    526 #endif
    527 {
    528 	va_list ap;
    529 #if __STDC__
    530 	va_start(ap, fmt);
    531 #else
    532 	va_start(ap);
    533 #endif
    534 	(void)vfprintf(stderr, fmt, ap);
    535 	va_end(ap);
    536 	(void)fprintf(stderr, "\n");
    537 	abort();
    538 	/* NOTREACHED */
    539 }
    540 #endif
    541