Home | History | Annotate | Line # | Download | only in mpool
mpool.c revision 1.3
      1 /*-
      2  * Copyright (c) 1990, 1993
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the University of
     16  *	California, Berkeley and its contributors.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  */
     33 
     34 #if defined(LIBC_SCCS) && !defined(lint)
     35 /*static char *sccsid = "from: @(#)mpool.c	8.1 (Berkeley) 6/6/93";*/
     36 static char *rcsid = "$Id: mpool.c,v 1.3 1993/08/26 00:43:53 jtc Exp $";
     37 #endif /* LIBC_SCCS and not lint */
     38 
     39 #include <sys/param.h>
     40 #include <sys/stat.h>
     41 
     42 #include <errno.h>
     43 #include <stdio.h>
     44 #include <stdlib.h>
     45 #include <string.h>
     46 #include <unistd.h>
     47 
     48 #include <db.h>
     49 #define	__MPOOLINTERFACE_PRIVATE
     50 #include "mpool.h"
     51 
     52 static BKT *mpool_bkt __P((MPOOL *));
     53 static BKT *mpool_look __P((MPOOL *, pgno_t));
     54 static int  mpool_write __P((MPOOL *, BKT *));
     55 #ifdef DEBUG
     56 static void __mpoolerr __P((const char *fmt, ...));
     57 #endif
     58 
     59 /*
     60  * MPOOL_OPEN -- initialize a memory pool.
     61  *
     62  * Parameters:
     63  *	key:		Shared buffer key.
     64  *	fd:		File descriptor.
     65  *	pagesize:	File page size.
     66  *	maxcache:	Max number of cached pages.
     67  *
     68  * Returns:
     69  *	MPOOL pointer, NULL on error.
     70  */
     71 MPOOL *
     72 mpool_open(key, fd, pagesize, maxcache)
     73 	DBT *key;
     74 	int fd;
     75 	pgno_t pagesize, maxcache;
     76 {
     77 	struct stat sb;
     78 	MPOOL *mp;
     79 	int entry;
     80 
     81 	if (fstat(fd, &sb))
     82 		return (NULL);
     83 	/* XXX
     84 	 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so
     85 	 * that stat(2) returns true for ISSOCK on pipes.  Until then, this is
     86 	 * fairly close.
     87 	 */
     88 	if (!S_ISREG(sb.st_mode)) {
     89 		errno = ESPIPE;
     90 		return (NULL);
     91 	}
     92 
     93 	if ((mp = malloc(sizeof(MPOOL))) == NULL)
     94 		return (NULL);
     95 	mp->free.cnext = mp->free.cprev = (BKT *)&mp->free;
     96 	mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru;
     97 	for (entry = 0; entry < HASHSIZE; ++entry)
     98 		mp->hashtable[entry].hnext = mp->hashtable[entry].hprev =
     99 		    mp->hashtable[entry].cnext = mp->hashtable[entry].cprev =
    100 		    (BKT *)&mp->hashtable[entry];
    101 	mp->curcache = 0;
    102 	mp->maxcache = maxcache;
    103 	mp->pagesize = pagesize;
    104 	mp->npages = sb.st_size / pagesize;
    105 	mp->fd = fd;
    106 	mp->pgcookie = NULL;
    107 	mp->pgin = mp->pgout = NULL;
    108 
    109 #ifdef STATISTICS
    110 	mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush =
    111 	    mp->pageget = mp->pagenew = mp->pageput = mp->pageread =
    112 	    mp->pagewrite = 0;
    113 #endif
    114 	return (mp);
    115 }
    116 
    117 /*
    118  * MPOOL_FILTER -- initialize input/output filters.
    119  *
    120  * Parameters:
    121  *	pgin:		Page in conversion routine.
    122  *	pgout:		Page out conversion routine.
    123  *	pgcookie:	Cookie for page in/out routines.
    124  */
    125 void
    126 mpool_filter(mp, pgin, pgout, pgcookie)
    127 	MPOOL *mp;
    128 	void (*pgin) __P((void *, pgno_t, void *));
    129 	void (*pgout) __P((void *, pgno_t, void *));
    130 	void *pgcookie;
    131 {
    132 	mp->pgin = pgin;
    133 	mp->pgout = pgout;
    134 	mp->pgcookie = pgcookie;
    135 }
    136 
    137 /*
    138  * MPOOL_NEW -- get a new page
    139  *
    140  * Parameters:
    141  *	mp:		mpool cookie
    142  *	pgnoadddr:	place to store new page number
    143  * Returns:
    144  *	RET_ERROR, RET_SUCCESS
    145  */
    146 void *
    147 mpool_new(mp, pgnoaddr)
    148 	MPOOL *mp;
    149 	pgno_t *pgnoaddr;
    150 {
    151 	BKT *b;
    152 	BKTHDR *hp;
    153 
    154 #ifdef STATISTICS
    155 	++mp->pagenew;
    156 #endif
    157 	/*
    158 	 * Get a BKT from the cache.  Assign a new page number, attach it to
    159 	 * the hash and lru chains and return.
    160 	 */
    161 	if ((b = mpool_bkt(mp)) == NULL)
    162 		return (NULL);
    163 	*pgnoaddr = b->pgno = mp->npages++;
    164 	b->flags = MPOOL_PINNED;
    165 	inshash(b, b->pgno);
    166 	inschain(b, &mp->lru);
    167 	return (b->page);
    168 }
    169 
    170 /*
    171  * MPOOL_GET -- get a page from the pool
    172  *
    173  * Parameters:
    174  *	mp:	mpool cookie
    175  *	pgno:	page number
    176  *	flags:	not used
    177  *
    178  * Returns:
    179  *	RET_ERROR, RET_SUCCESS
    180  */
    181 void *
    182 mpool_get(mp, pgno, flags)
    183 	MPOOL *mp;
    184 	pgno_t pgno;
    185 	u_int flags;		/* XXX not used? */
    186 {
    187 	BKT *b;
    188 	BKTHDR *hp;
    189 	off_t off;
    190 	int nr;
    191 
    192 	/*
    193 	 * If asking for a specific page that is already in the cache, find
    194 	 * it and return it.
    195 	 */
    196 	if (b = mpool_look(mp, pgno)) {
    197 #ifdef STATISTICS
    198 		++mp->pageget;
    199 #endif
    200 #ifdef DEBUG
    201 		if (b->flags & MPOOL_PINNED)
    202 			__mpoolerr("mpool_get: page %d already pinned",
    203 			    b->pgno);
    204 #endif
    205 		rmchain(b);
    206 		inschain(b, &mp->lru);
    207 		b->flags |= MPOOL_PINNED;
    208 		return (b->page);
    209 	}
    210 
    211 	/* Not allowed to retrieve a non-existent page. */
    212 	if (pgno >= mp->npages) {
    213 		errno = EINVAL;
    214 		return (NULL);
    215 	}
    216 
    217 	/* Get a page from the cache. */
    218 	if ((b = mpool_bkt(mp)) == NULL)
    219 		return (NULL);
    220 	b->pgno = pgno;
    221 	b->flags = MPOOL_PINNED;
    222 
    223 #ifdef STATISTICS
    224 	++mp->pageread;
    225 #endif
    226 	/* Read in the contents. */
    227 	off = mp->pagesize * pgno;
    228 	if (lseek(mp->fd, off, SEEK_SET) != off)
    229 		return (NULL);
    230 	if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) {
    231 		if (nr >= 0)
    232 			errno = EFTYPE;
    233 		return (NULL);
    234 	}
    235 	if (mp->pgin)
    236 		(mp->pgin)(mp->pgcookie, b->pgno, b->page);
    237 
    238 	inshash(b, b->pgno);
    239 	inschain(b, &mp->lru);
    240 #ifdef STATISTICS
    241 	++mp->pageget;
    242 #endif
    243 	return (b->page);
    244 }
    245 
    246 /*
    247  * MPOOL_PUT -- return a page to the pool
    248  *
    249  * Parameters:
    250  *	mp:	mpool cookie
    251  *	page:	page pointer
    252  *	pgno:	page number
    253  *
    254  * Returns:
    255  *	RET_ERROR, RET_SUCCESS
    256  */
    257 int
    258 mpool_put(mp, page, flags)
    259 	MPOOL *mp;
    260 	void *page;
    261 	u_int flags;
    262 {
    263 	BKT *baddr;
    264 #ifdef DEBUG
    265 	BKT *b;
    266 #endif
    267 
    268 #ifdef STATISTICS
    269 	++mp->pageput;
    270 #endif
    271 	baddr = (BKT *)((char *)page - sizeof(BKT));
    272 #ifdef DEBUG
    273 	if (!(baddr->flags & MPOOL_PINNED))
    274 		__mpoolerr("mpool_put: page %d not pinned", b->pgno);
    275 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
    276 		if (b == (BKT *)&mp->lru)
    277 			__mpoolerr("mpool_put: %0x: bad address", baddr);
    278 		if (b == baddr)
    279 			break;
    280 	}
    281 #endif
    282 	baddr->flags &= ~MPOOL_PINNED;
    283 	baddr->flags |= flags & MPOOL_DIRTY;
    284 	return (RET_SUCCESS);
    285 }
    286 
    287 /*
    288  * MPOOL_CLOSE -- close the buffer pool
    289  *
    290  * Parameters:
    291  *	mp:	mpool cookie
    292  *
    293  * Returns:
    294  *	RET_ERROR, RET_SUCCESS
    295  */
    296 int
    297 mpool_close(mp)
    298 	MPOOL *mp;
    299 {
    300 	BKT *b, *next;
    301 
    302 	/* Free up any space allocated to the lru pages. */
    303 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) {
    304 		next = b->cprev;
    305 		free(b);
    306 	}
    307 	free(mp);
    308 	return (RET_SUCCESS);
    309 }
    310 
    311 /*
    312  * MPOOL_SYNC -- sync the file to disk.
    313  *
    314  * Parameters:
    315  *	mp:	mpool cookie
    316  *
    317  * Returns:
    318  *	RET_ERROR, RET_SUCCESS
    319  */
    320 int
    321 mpool_sync(mp)
    322 	MPOOL *mp;
    323 {
    324 	BKT *b;
    325 
    326 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
    327 		if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR)
    328 			return (RET_ERROR);
    329 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
    330 }
    331 
    332 /*
    333  * MPOOL_BKT -- get/create a BKT from the cache
    334  *
    335  * Parameters:
    336  *	mp:	mpool cookie
    337  *
    338  * Returns:
    339  *	NULL on failure and a pointer to the BKT on success
    340  */
    341 static BKT *
    342 mpool_bkt(mp)
    343 	MPOOL *mp;
    344 {
    345 	BKT *b;
    346 
    347 	if (mp->curcache < mp->maxcache)
    348 		goto new;
    349 
    350 	/*
    351 	 * If the cache is maxxed out, search the lru list for a buffer we
    352 	 * can flush.  If we find one, write it if necessary and take it off
    353 	 * any lists.  If we don't find anything we grow the cache anyway.
    354 	 * The cache never shrinks.
    355 	 */
    356 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
    357 		if (!(b->flags & MPOOL_PINNED)) {
    358 			if (b->flags & MPOOL_DIRTY &&
    359 			    mpool_write(mp, b) == RET_ERROR)
    360 				return (NULL);
    361 			rmhash(b);
    362 			rmchain(b);
    363 #ifdef STATISTICS
    364 			++mp->pageflush;
    365 #endif
    366 #ifdef DEBUG
    367 			{
    368 				void *spage;
    369 				spage = b->page;
    370 				memset(b, 0xff, sizeof(BKT) + mp->pagesize);
    371 				b->page = spage;
    372 			}
    373 #endif
    374 			return (b);
    375 		}
    376 
    377 new:	if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL)
    378 		return (NULL);
    379 #ifdef STATISTICS
    380 	++mp->pagealloc;
    381 #endif
    382 #ifdef DEBUG
    383 	memset(b, 0xff, sizeof(BKT) + mp->pagesize);
    384 #endif
    385 	b->page = (char *)b + sizeof(BKT);
    386 	++mp->curcache;
    387 	return (b);
    388 }
    389 
    390 /*
    391  * MPOOL_WRITE -- sync a page to disk
    392  *
    393  * Parameters:
    394  *	mp:	mpool cookie
    395  *
    396  * Returns:
    397  *	RET_ERROR, RET_SUCCESS
    398  */
    399 static int
    400 mpool_write(mp, b)
    401 	MPOOL *mp;
    402 	BKT *b;
    403 {
    404 	off_t off;
    405 
    406 	if (mp->pgout)
    407 		(mp->pgout)(mp->pgcookie, b->pgno, b->page);
    408 
    409 #ifdef STATISTICS
    410 	++mp->pagewrite;
    411 #endif
    412 	off = mp->pagesize * b->pgno;
    413 	if (lseek(mp->fd, off, SEEK_SET) != off)
    414 		return (RET_ERROR);
    415 	if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize)
    416 		return (RET_ERROR);
    417 	b->flags &= ~MPOOL_DIRTY;
    418 	return (RET_SUCCESS);
    419 }
    420 
    421 /*
    422  * MPOOL_LOOK -- lookup a page
    423  *
    424  * Parameters:
    425  *	mp:	mpool cookie
    426  *	pgno:	page number
    427  *
    428  * Returns:
    429  *	NULL on failure and a pointer to the BKT on success
    430  */
    431 static BKT *
    432 mpool_look(mp, pgno)
    433 	MPOOL *mp;
    434 	pgno_t pgno;
    435 {
    436 	register BKT *b;
    437 	register BKTHDR *tb;
    438 
    439 	/* XXX
    440 	 * If find the buffer, put it first on the hash chain so can
    441 	 * find it again quickly.
    442 	 */
    443 	tb = &mp->hashtable[HASHKEY(pgno)];
    444 	for (b = tb->hnext; b != (BKT *)tb; b = b->hnext)
    445 		if (b->pgno == pgno) {
    446 #ifdef STATISTICS
    447 			++mp->cachehit;
    448 #endif
    449 			return (b);
    450 		}
    451 #ifdef STATISTICS
    452 	++mp->cachemiss;
    453 #endif
    454 	return (NULL);
    455 }
    456 
    457 #ifdef STATISTICS
    458 /*
    459  * MPOOL_STAT -- cache statistics
    460  *
    461  * Parameters:
    462  *	mp:	mpool cookie
    463  */
    464 void
    465 mpool_stat(mp)
    466 	MPOOL *mp;
    467 {
    468 	BKT *b;
    469 	int cnt;
    470 	char *sep;
    471 
    472 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
    473 	(void)fprintf(stderr,
    474 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
    475 	    mp->pagesize, mp->curcache, mp->maxcache);
    476 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
    477 	    mp->pageput, mp->pageget, mp->pagenew);
    478 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
    479 	    mp->pagealloc, mp->pageflush);
    480 	if (mp->cachehit + mp->cachemiss)
    481 		(void)fprintf(stderr,
    482 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
    483 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
    484 		    * 100, mp->cachehit, mp->cachemiss);
    485 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
    486 	    mp->pageread, mp->pagewrite);
    487 
    488 	sep = "";
    489 	cnt = 0;
    490 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
    491 		(void)fprintf(stderr, "%s%d", sep, b->pgno);
    492 		if (b->flags & MPOOL_DIRTY)
    493 			(void)fprintf(stderr, "d");
    494 		if (b->flags & MPOOL_PINNED)
    495 			(void)fprintf(stderr, "P");
    496 		if (++cnt == 10) {
    497 			sep = "\n";
    498 			cnt = 0;
    499 		} else
    500 			sep = ", ";
    501 
    502 	}
    503 	(void)fprintf(stderr, "\n");
    504 }
    505 #endif
    506 
    507 #ifdef DEBUG
    508 #if __STDC__
    509 #include <stdarg.h>
    510 #else
    511 #include <varargs.h>
    512 #endif
    513 
    514 static void
    515 #if __STDC__
    516 __mpoolerr(const char *fmt, ...)
    517 #else
    518 __mpoolerr(fmt, va_alist)
    519 	char *fmt;
    520 	va_dcl
    521 #endif
    522 {
    523 	va_list ap;
    524 #if __STDC__
    525 	va_start(ap, fmt);
    526 #else
    527 	va_start(ap);
    528 #endif
    529 	(void)vfprintf(stderr, fmt, ap);
    530 	va_end(ap);
    531 	(void)fprintf(stderr, "\n");
    532 	abort();
    533 	/* NOTREACHED */
    534 }
    535 #endif
    536