Home | History | Annotate | Line # | Download | only in mpool
mpool.c revision 1.4
      1 /*-
      2  * Copyright (c) 1990, 1993
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the University of
     16  *	California, Berkeley and its contributors.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  */
     33 
     34 #if defined(LIBC_SCCS) && !defined(lint)
     35 static char sccsid[] = "@(#)mpool.c	8.2 (Berkeley) 2/21/94";
     36 #endif /* LIBC_SCCS and not lint */
     37 
     38 #include <sys/param.h>
     39 #include <sys/stat.h>
     40 
     41 #include <errno.h>
     42 #include <stdio.h>
     43 #include <stdlib.h>
     44 #include <string.h>
     45 #include <unistd.h>
     46 
     47 #include <db.h>
     48 #define	__MPOOLINTERFACE_PRIVATE
     49 #include "mpool.h"
     50 
     51 static BKT *mpool_bkt __P((MPOOL *));
     52 static BKT *mpool_look __P((MPOOL *, pgno_t));
     53 static int  mpool_write __P((MPOOL *, BKT *));
     54 #ifdef DEBUG
     55 static void __mpoolerr __P((const char *fmt, ...));
     56 #endif
     57 
     58 /*
     59  * MPOOL_OPEN -- initialize a memory pool.
     60  *
     61  * Parameters:
     62  *	key:		Shared buffer key.
     63  *	fd:		File descriptor.
     64  *	pagesize:	File page size.
     65  *	maxcache:	Max number of cached pages.
     66  *
     67  * Returns:
     68  *	MPOOL pointer, NULL on error.
     69  */
     70 MPOOL *
     71 mpool_open(key, fd, pagesize, maxcache)
     72 	DBT *key;
     73 	int fd;
     74 	pgno_t pagesize, maxcache;
     75 {
     76 	struct stat sb;
     77 	MPOOL *mp;
     78 	int entry;
     79 
     80 	if (fstat(fd, &sb))
     81 		return (NULL);
     82 	/* XXX
     83 	 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so
     84 	 * that stat(2) returns true for ISSOCK on pipes.  Until then, this is
     85 	 * fairly close.
     86 	 */
     87 	if (!S_ISREG(sb.st_mode)) {
     88 		errno = ESPIPE;
     89 		return (NULL);
     90 	}
     91 
     92 	if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL)
     93 		return (NULL);
     94 	mp->free.cnext = mp->free.cprev = (BKT *)&mp->free;
     95 	mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru;
     96 	for (entry = 0; entry < HASHSIZE; ++entry)
     97 		mp->hashtable[entry].hnext = mp->hashtable[entry].hprev =
     98 		    mp->hashtable[entry].cnext = mp->hashtable[entry].cprev =
     99 		    (BKT *)&mp->hashtable[entry];
    100 	mp->curcache = 0;
    101 	mp->maxcache = maxcache;
    102 	mp->pagesize = pagesize;
    103 	mp->npages = sb.st_size / pagesize;
    104 	mp->fd = fd;
    105 	mp->pgcookie = NULL;
    106 	mp->pgin = mp->pgout = NULL;
    107 
    108 #ifdef STATISTICS
    109 	mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush =
    110 	    mp->pageget = mp->pagenew = mp->pageput = mp->pageread =
    111 	    mp->pagewrite = 0;
    112 #endif
    113 	return (mp);
    114 }
    115 
    116 /*
    117  * MPOOL_FILTER -- initialize input/output filters.
    118  *
    119  * Parameters:
    120  *	pgin:		Page in conversion routine.
    121  *	pgout:		Page out conversion routine.
    122  *	pgcookie:	Cookie for page in/out routines.
    123  */
    124 void
    125 mpool_filter(mp, pgin, pgout, pgcookie)
    126 	MPOOL *mp;
    127 	void (*pgin) __P((void *, pgno_t, void *));
    128 	void (*pgout) __P((void *, pgno_t, void *));
    129 	void *pgcookie;
    130 {
    131 	mp->pgin = pgin;
    132 	mp->pgout = pgout;
    133 	mp->pgcookie = pgcookie;
    134 }
    135 
    136 /*
    137  * MPOOL_NEW -- get a new page
    138  *
    139  * Parameters:
    140  *	mp:		mpool cookie
    141  *	pgnoadddr:	place to store new page number
    142  * Returns:
    143  *	RET_ERROR, RET_SUCCESS
    144  */
    145 void *
    146 mpool_new(mp, pgnoaddr)
    147 	MPOOL *mp;
    148 	pgno_t *pgnoaddr;
    149 {
    150 	BKT *b;
    151 	BKTHDR *hp;
    152 
    153 #ifdef STATISTICS
    154 	++mp->pagenew;
    155 #endif
    156 	/*
    157 	 * Get a BKT from the cache.  Assign a new page number, attach it to
    158 	 * the hash and lru chains and return.
    159 	 */
    160 	if ((b = mpool_bkt(mp)) == NULL)
    161 		return (NULL);
    162 	*pgnoaddr = b->pgno = mp->npages++;
    163 	b->flags = MPOOL_PINNED;
    164 	inshash(b, b->pgno);
    165 	inschain(b, &mp->lru);
    166 	return (b->page);
    167 }
    168 
    169 /*
    170  * MPOOL_GET -- get a page from the pool
    171  *
    172  * Parameters:
    173  *	mp:	mpool cookie
    174  *	pgno:	page number
    175  *	flags:	not used
    176  *
    177  * Returns:
    178  *	RET_ERROR, RET_SUCCESS
    179  */
    180 void *
    181 mpool_get(mp, pgno, flags)
    182 	MPOOL *mp;
    183 	pgno_t pgno;
    184 	u_int flags;		/* XXX not used? */
    185 {
    186 	BKT *b;
    187 	BKTHDR *hp;
    188 	off_t off;
    189 	int nr;
    190 
    191 	/*
    192 	 * If asking for a specific page that is already in the cache, find
    193 	 * it and return it.
    194 	 */
    195 	if (b = mpool_look(mp, pgno)) {
    196 #ifdef STATISTICS
    197 		++mp->pageget;
    198 #endif
    199 #ifdef DEBUG
    200 		if (b->flags & MPOOL_PINNED)
    201 			__mpoolerr("mpool_get: page %d already pinned",
    202 			    b->pgno);
    203 #endif
    204 		rmchain(b);
    205 		inschain(b, &mp->lru);
    206 		b->flags |= MPOOL_PINNED;
    207 		return (b->page);
    208 	}
    209 
    210 	/* Not allowed to retrieve a non-existent page. */
    211 	if (pgno >= mp->npages) {
    212 		errno = EINVAL;
    213 		return (NULL);
    214 	}
    215 
    216 	/* Get a page from the cache. */
    217 	if ((b = mpool_bkt(mp)) == NULL)
    218 		return (NULL);
    219 	b->pgno = pgno;
    220 	b->flags = MPOOL_PINNED;
    221 
    222 #ifdef STATISTICS
    223 	++mp->pageread;
    224 #endif
    225 	/* Read in the contents. */
    226 	off = mp->pagesize * pgno;
    227 	if (lseek(mp->fd, off, SEEK_SET) != off)
    228 		return (NULL);
    229 	if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) {
    230 		if (nr >= 0)
    231 			errno = EFTYPE;
    232 		return (NULL);
    233 	}
    234 	if (mp->pgin)
    235 		(mp->pgin)(mp->pgcookie, b->pgno, b->page);
    236 
    237 	inshash(b, b->pgno);
    238 	inschain(b, &mp->lru);
    239 #ifdef STATISTICS
    240 	++mp->pageget;
    241 #endif
    242 	return (b->page);
    243 }
    244 
    245 /*
    246  * MPOOL_PUT -- return a page to the pool
    247  *
    248  * Parameters:
    249  *	mp:	mpool cookie
    250  *	page:	page pointer
    251  *	pgno:	page number
    252  *
    253  * Returns:
    254  *	RET_ERROR, RET_SUCCESS
    255  */
    256 int
    257 mpool_put(mp, page, flags)
    258 	MPOOL *mp;
    259 	void *page;
    260 	u_int flags;
    261 {
    262 	BKT *baddr;
    263 #ifdef DEBUG
    264 	BKT *b;
    265 #endif
    266 
    267 #ifdef STATISTICS
    268 	++mp->pageput;
    269 #endif
    270 	baddr = (BKT *)((char *)page - sizeof(BKT));
    271 #ifdef DEBUG
    272 	if (!(baddr->flags & MPOOL_PINNED))
    273 		__mpoolerr("mpool_put: page %d not pinned", b->pgno);
    274 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
    275 		if (b == (BKT *)&mp->lru)
    276 			__mpoolerr("mpool_put: %0x: bad address", baddr);
    277 		if (b == baddr)
    278 			break;
    279 	}
    280 #endif
    281 	baddr->flags &= ~MPOOL_PINNED;
    282 	baddr->flags |= flags & MPOOL_DIRTY;
    283 	return (RET_SUCCESS);
    284 }
    285 
    286 /*
    287  * MPOOL_CLOSE -- close the buffer pool
    288  *
    289  * Parameters:
    290  *	mp:	mpool cookie
    291  *
    292  * Returns:
    293  *	RET_ERROR, RET_SUCCESS
    294  */
    295 int
    296 mpool_close(mp)
    297 	MPOOL *mp;
    298 {
    299 	BKT *b, *next;
    300 
    301 	/* Free up any space allocated to the lru pages. */
    302 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) {
    303 		next = b->cprev;
    304 		free(b);
    305 	}
    306 	free(mp);
    307 	return (RET_SUCCESS);
    308 }
    309 
    310 /*
    311  * MPOOL_SYNC -- sync the file to disk.
    312  *
    313  * Parameters:
    314  *	mp:	mpool cookie
    315  *
    316  * Returns:
    317  *	RET_ERROR, RET_SUCCESS
    318  */
    319 int
    320 mpool_sync(mp)
    321 	MPOOL *mp;
    322 {
    323 	BKT *b;
    324 
    325 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
    326 		if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR)
    327 			return (RET_ERROR);
    328 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
    329 }
    330 
    331 /*
    332  * MPOOL_BKT -- get/create a BKT from the cache
    333  *
    334  * Parameters:
    335  *	mp:	mpool cookie
    336  *
    337  * Returns:
    338  *	NULL on failure and a pointer to the BKT on success
    339  */
    340 static BKT *
    341 mpool_bkt(mp)
    342 	MPOOL *mp;
    343 {
    344 	BKT *b;
    345 
    346 	if (mp->curcache < mp->maxcache)
    347 		goto new;
    348 
    349 	/*
    350 	 * If the cache is maxxed out, search the lru list for a buffer we
    351 	 * can flush.  If we find one, write it if necessary and take it off
    352 	 * any lists.  If we don't find anything we grow the cache anyway.
    353 	 * The cache never shrinks.
    354 	 */
    355 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
    356 		if (!(b->flags & MPOOL_PINNED)) {
    357 			if (b->flags & MPOOL_DIRTY &&
    358 			    mpool_write(mp, b) == RET_ERROR)
    359 				return (NULL);
    360 			rmhash(b);
    361 			rmchain(b);
    362 #ifdef STATISTICS
    363 			++mp->pageflush;
    364 #endif
    365 #ifdef DEBUG
    366 			{
    367 				void *spage;
    368 				spage = b->page;
    369 				memset(b, 0xff, sizeof(BKT) + mp->pagesize);
    370 				b->page = spage;
    371 			}
    372 #endif
    373 			return (b);
    374 		}
    375 
    376 new:	if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
    377 		return (NULL);
    378 #ifdef STATISTICS
    379 	++mp->pagealloc;
    380 #endif
    381 #ifdef DEBUG
    382 	memset(b, 0xff, sizeof(BKT) + mp->pagesize);
    383 #endif
    384 	b->page = (char *)b + sizeof(BKT);
    385 	++mp->curcache;
    386 	return (b);
    387 }
    388 
    389 /*
    390  * MPOOL_WRITE -- sync a page to disk
    391  *
    392  * Parameters:
    393  *	mp:	mpool cookie
    394  *
    395  * Returns:
    396  *	RET_ERROR, RET_SUCCESS
    397  */
    398 static int
    399 mpool_write(mp, b)
    400 	MPOOL *mp;
    401 	BKT *b;
    402 {
    403 	off_t off;
    404 
    405 	if (mp->pgout)
    406 		(mp->pgout)(mp->pgcookie, b->pgno, b->page);
    407 
    408 #ifdef STATISTICS
    409 	++mp->pagewrite;
    410 #endif
    411 	off = mp->pagesize * b->pgno;
    412 	if (lseek(mp->fd, off, SEEK_SET) != off)
    413 		return (RET_ERROR);
    414 	if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize)
    415 		return (RET_ERROR);
    416 	b->flags &= ~MPOOL_DIRTY;
    417 	return (RET_SUCCESS);
    418 }
    419 
    420 /*
    421  * MPOOL_LOOK -- lookup a page
    422  *
    423  * Parameters:
    424  *	mp:	mpool cookie
    425  *	pgno:	page number
    426  *
    427  * Returns:
    428  *	NULL on failure and a pointer to the BKT on success
    429  */
    430 static BKT *
    431 mpool_look(mp, pgno)
    432 	MPOOL *mp;
    433 	pgno_t pgno;
    434 {
    435 	register BKT *b;
    436 	register BKTHDR *tb;
    437 
    438 	/* XXX
    439 	 * If find the buffer, put it first on the hash chain so can
    440 	 * find it again quickly.
    441 	 */
    442 	tb = &mp->hashtable[HASHKEY(pgno)];
    443 	for (b = tb->hnext; b != (BKT *)tb; b = b->hnext)
    444 		if (b->pgno == pgno) {
    445 #ifdef STATISTICS
    446 			++mp->cachehit;
    447 #endif
    448 			return (b);
    449 		}
    450 #ifdef STATISTICS
    451 	++mp->cachemiss;
    452 #endif
    453 	return (NULL);
    454 }
    455 
    456 #ifdef STATISTICS
    457 /*
    458  * MPOOL_STAT -- cache statistics
    459  *
    460  * Parameters:
    461  *	mp:	mpool cookie
    462  */
    463 void
    464 mpool_stat(mp)
    465 	MPOOL *mp;
    466 {
    467 	BKT *b;
    468 	int cnt;
    469 	char *sep;
    470 
    471 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
    472 	(void)fprintf(stderr,
    473 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
    474 	    mp->pagesize, mp->curcache, mp->maxcache);
    475 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
    476 	    mp->pageput, mp->pageget, mp->pagenew);
    477 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
    478 	    mp->pagealloc, mp->pageflush);
    479 	if (mp->cachehit + mp->cachemiss)
    480 		(void)fprintf(stderr,
    481 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
    482 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
    483 		    * 100, mp->cachehit, mp->cachemiss);
    484 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
    485 	    mp->pageread, mp->pagewrite);
    486 
    487 	sep = "";
    488 	cnt = 0;
    489 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
    490 		(void)fprintf(stderr, "%s%d", sep, b->pgno);
    491 		if (b->flags & MPOOL_DIRTY)
    492 			(void)fprintf(stderr, "d");
    493 		if (b->flags & MPOOL_PINNED)
    494 			(void)fprintf(stderr, "P");
    495 		if (++cnt == 10) {
    496 			sep = "\n";
    497 			cnt = 0;
    498 		} else
    499 			sep = ", ";
    500 
    501 	}
    502 	(void)fprintf(stderr, "\n");
    503 }
    504 #endif
    505 
    506 #ifdef DEBUG
    507 #if __STDC__
    508 #include <stdarg.h>
    509 #else
    510 #include <varargs.h>
    511 #endif
    512 
    513 static void
    514 #if __STDC__
    515 __mpoolerr(const char *fmt, ...)
    516 #else
    517 __mpoolerr(fmt, va_alist)
    518 	char *fmt;
    519 	va_dcl
    520 #endif
    521 {
    522 	va_list ap;
    523 #if __STDC__
    524 	va_start(ap, fmt);
    525 #else
    526 	va_start(ap);
    527 #endif
    528 	(void)vfprintf(stderr, fmt, ap);
    529 	va_end(ap);
    530 	(void)fprintf(stderr, "\n");
    531 	abort();
    532 	/* NOTREACHED */
    533 }
    534 #endif
    535