Home | History | Annotate | Line # | Download | only in mpool
mpool.c revision 1.14
      1 /*	$NetBSD: mpool.c,v 1.14 2003/08/07 16:42:44 agc Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1990, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #if defined(LIBC_SCCS) && !defined(lint)
     34 #if 0
     35 static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
     36 #else
     37 __RCSID("$NetBSD: mpool.c,v 1.14 2003/08/07 16:42:44 agc Exp $");
     38 #endif
     39 #endif /* LIBC_SCCS and not lint */
     40 
     41 #include "namespace.h"
     42 #include <sys/queue.h>
     43 #include <sys/stat.h>
     44 
     45 #include <errno.h>
     46 #include <stdio.h>
     47 #include <stdlib.h>
     48 #include <string.h>
     49 #include <unistd.h>
     50 
     51 #include <db.h>
     52 
     53 #define	__MPOOLINTERFACE_PRIVATE
     54 #include <mpool.h>
     55 
     56 #ifdef __weak_alias
     57 __weak_alias(mpool_close,_mpool_close)
     58 __weak_alias(mpool_filter,_mpool_filter)
     59 __weak_alias(mpool_get,_mpool_get)
     60 __weak_alias(mpool_new,_mpool_new)
     61 __weak_alias(mpool_open,_mpool_open)
     62 __weak_alias(mpool_put,_mpool_put)
     63 __weak_alias(mpool_sync,_mpool_sync)
     64 #endif
     65 
     66 static BKT *mpool_bkt __P((MPOOL *));
     67 static BKT *mpool_look __P((MPOOL *, pgno_t));
     68 static int  mpool_write __P((MPOOL *, BKT *));
     69 
     70 /*
     71  * mpool_open --
     72  *	Initialize a memory pool.
     73  */
     74 /*ARGSUSED*/
     75 MPOOL *
     76 mpool_open(key, fd, pagesize, maxcache)
     77 	void *key;
     78 	int fd;
     79 	pgno_t pagesize, maxcache;
     80 {
     81 	struct stat sb;
     82 	MPOOL *mp;
     83 	int entry;
     84 
     85 	/*
     86 	 * Get information about the file.
     87 	 *
     88 	 * XXX
     89 	 * We don't currently handle pipes, although we should.
     90 	 */
     91 	if (fstat(fd, &sb))
     92 		return (NULL);
     93 	if (!S_ISREG(sb.st_mode)) {
     94 		errno = ESPIPE;
     95 		return (NULL);
     96 	}
     97 
     98 	/* Allocate and initialize the MPOOL cookie. */
     99 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
    100 		return (NULL);
    101 	CIRCLEQ_INIT(&mp->lqh);
    102 	for (entry = 0; entry < HASHSIZE; ++entry)
    103 		CIRCLEQ_INIT(&mp->hqh[entry]);
    104 	mp->maxcache = maxcache;
    105 	mp->npages = (pgno_t)(sb.st_size / pagesize);
    106 	mp->pagesize = pagesize;
    107 	mp->fd = fd;
    108 	return (mp);
    109 }
    110 
    111 /*
    112  * mpool_filter --
    113  *	Initialize input/output filters.
    114  */
    115 void
    116 mpool_filter(mp, pgin, pgout, pgcookie)
    117 	MPOOL *mp;
    118 	void (*pgin) __P((void *, pgno_t, void *));
    119 	void (*pgout) __P((void *, pgno_t, void *));
    120 	void *pgcookie;
    121 {
    122 	mp->pgin = pgin;
    123 	mp->pgout = pgout;
    124 	mp->pgcookie = pgcookie;
    125 }
    126 
    127 /*
    128  * mpool_new --
    129  *	Get a new page of memory.
    130  */
    131 void *
    132 mpool_new(mp, pgnoaddr)
    133 	MPOOL *mp;
    134 	pgno_t *pgnoaddr;
    135 {
    136 	struct _hqh *head;
    137 	BKT *bp;
    138 
    139 	if (mp->npages == MAX_PAGE_NUMBER) {
    140 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
    141 		abort();
    142 	}
    143 #ifdef STATISTICS
    144 	++mp->pagenew;
    145 #endif
    146 	/*
    147 	 * Get a BKT from the cache.  Assign a new page number, attach
    148 	 * it to the head of the hash chain, the tail of the lru chain,
    149 	 * and return.
    150 	 */
    151 	if ((bp = mpool_bkt(mp)) == NULL)
    152 		return (NULL);
    153 	*pgnoaddr = bp->pgno = mp->npages++;
    154 	bp->flags = MPOOL_PINNED;
    155 
    156 	head = &mp->hqh[HASHKEY(bp->pgno)];
    157 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
    158 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    159 	return (bp->page);
    160 }
    161 
    162 /*
    163  * mpool_get
    164  *	Get a page.
    165  */
    166 /*ARGSUSED*/
    167 void *
    168 mpool_get(mp, pgno, flags)
    169 	MPOOL *mp;
    170 	pgno_t pgno;
    171 	u_int flags;				/* XXX not used? */
    172 {
    173 	struct _hqh *head;
    174 	BKT *bp;
    175 	off_t off;
    176 	int nr;
    177 
    178 	/* Check for attempt to retrieve a non-existent page. */
    179 	if (pgno >= mp->npages) {
    180 		errno = EINVAL;
    181 		return (NULL);
    182 	}
    183 
    184 #ifdef STATISTICS
    185 	++mp->pageget;
    186 #endif
    187 
    188 	/* Check for a page that is cached. */
    189 	if ((bp = mpool_look(mp, pgno)) != NULL) {
    190 #ifdef DEBUG
    191 		if (bp->flags & MPOOL_PINNED) {
    192 			(void)fprintf(stderr,
    193 			    "mpool_get: page %d already pinned\n", bp->pgno);
    194 			abort();
    195 		}
    196 #endif
    197 		/*
    198 		 * Move the page to the head of the hash chain and the tail
    199 		 * of the lru chain.
    200 		 */
    201 		head = &mp->hqh[HASHKEY(bp->pgno)];
    202 		CIRCLEQ_REMOVE(head, bp, hq);
    203 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
    204 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
    205 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    206 
    207 		/* Return a pinned page. */
    208 		bp->flags |= MPOOL_PINNED;
    209 		return (bp->page);
    210 	}
    211 
    212 	/* Get a page from the cache. */
    213 	if ((bp = mpool_bkt(mp)) == NULL)
    214 		return (NULL);
    215 
    216 	/* Read in the contents. */
    217 #ifdef STATISTICS
    218 	++mp->pageread;
    219 #endif
    220 	off = mp->pagesize * pgno;
    221 	if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
    222 		if (nr >= 0)
    223 			errno = EFTYPE;
    224 		return (NULL);
    225 	}
    226 
    227 	/* Set the page number, pin the page. */
    228 	bp->pgno = pgno;
    229 	bp->flags = MPOOL_PINNED;
    230 
    231 	/*
    232 	 * Add the page to the head of the hash chain and the tail
    233 	 * of the lru chain.
    234 	 */
    235 	head = &mp->hqh[HASHKEY(bp->pgno)];
    236 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
    237 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    238 
    239 	/* Run through the user's filter. */
    240 	if (mp->pgin != NULL)
    241 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
    242 
    243 	return (bp->page);
    244 }
    245 
    246 /*
    247  * mpool_put
    248  *	Return a page.
    249  */
    250 /*ARGSUSED*/
    251 int
    252 mpool_put(mp, page, flags)
    253 	MPOOL *mp;
    254 	void *page;
    255 	u_int flags;
    256 {
    257 	BKT *bp;
    258 
    259 #ifdef STATISTICS
    260 	++mp->pageput;
    261 #endif
    262 	bp = (BKT *)(void *)((char *)page - sizeof(BKT));
    263 #ifdef DEBUG
    264 	if (!(bp->flags & MPOOL_PINNED)) {
    265 		(void)fprintf(stderr,
    266 		    "mpool_put: page %d not pinned\n", bp->pgno);
    267 		abort();
    268 	}
    269 #endif
    270 	bp->flags &= ~MPOOL_PINNED;
    271 	bp->flags |= flags & MPOOL_DIRTY;
    272 	return (RET_SUCCESS);
    273 }
    274 
    275 /*
    276  * mpool_close
    277  *	Close the buffer pool.
    278  */
    279 int
    280 mpool_close(mp)
    281 	MPOOL *mp;
    282 {
    283 	BKT *bp;
    284 
    285 	/* Free up any space allocated to the lru pages. */
    286 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
    287 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
    288 		free(bp);
    289 	}
    290 
    291 	/* Free the MPOOL cookie. */
    292 	free(mp);
    293 	return (RET_SUCCESS);
    294 }
    295 
    296 /*
    297  * mpool_sync
    298  *	Sync the pool to disk.
    299  */
    300 int
    301 mpool_sync(mp)
    302 	MPOOL *mp;
    303 {
    304 	BKT *bp;
    305 
    306 	/* Walk the lru chain, flushing any dirty pages to disk. */
    307 	for (bp = mp->lqh.cqh_first;
    308 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
    309 		if (bp->flags & MPOOL_DIRTY &&
    310 		    mpool_write(mp, bp) == RET_ERROR)
    311 			return (RET_ERROR);
    312 
    313 	/* Sync the file descriptor. */
    314 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
    315 }
    316 
    317 /*
    318  * mpool_bkt
    319  *	Get a page from the cache (or create one).
    320  */
    321 static BKT *
    322 mpool_bkt(mp)
    323 	MPOOL *mp;
    324 {
    325 	struct _hqh *head;
    326 	BKT *bp;
    327 
    328 	/* If under the max cached, always create a new page. */
    329 	if (mp->curcache < mp->maxcache)
    330 		goto new;
    331 
    332 	/*
    333 	 * If the cache is max'd out, walk the lru list for a buffer we
    334 	 * can flush.  If we find one, write it (if necessary) and take it
    335 	 * off any lists.  If we don't find anything we grow the cache anyway.
    336 	 * The cache never shrinks.
    337 	 */
    338 	for (bp = mp->lqh.cqh_first;
    339 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
    340 		if (!(bp->flags & MPOOL_PINNED)) {
    341 			/* Flush if dirty. */
    342 			if (bp->flags & MPOOL_DIRTY &&
    343 			    mpool_write(mp, bp) == RET_ERROR)
    344 				return (NULL);
    345 #ifdef STATISTICS
    346 			++mp->pageflush;
    347 #endif
    348 			/* Remove from the hash and lru queues. */
    349 			head = &mp->hqh[HASHKEY(bp->pgno)];
    350 			CIRCLEQ_REMOVE(head, bp, hq);
    351 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
    352 #ifdef DEBUG
    353 			{ void *spage;
    354 				spage = bp->page;
    355 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
    356 				bp->page = spage;
    357 			}
    358 #endif
    359 			return (bp);
    360 		}
    361 
    362 new:	if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
    363 		return (NULL);
    364 #ifdef STATISTICS
    365 	++mp->pagealloc;
    366 #endif
    367 #if defined(DEBUG) || defined(PURIFY)
    368 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
    369 #endif
    370 	bp->page = (char *)(void *)bp + sizeof(BKT);
    371 	++mp->curcache;
    372 	return (bp);
    373 }
    374 
    375 /*
    376  * mpool_write
    377  *	Write a page to disk.
    378  */
    379 static int
    380 mpool_write(mp, bp)
    381 	MPOOL *mp;
    382 	BKT *bp;
    383 {
    384 	off_t off;
    385 
    386 #ifdef STATISTICS
    387 	++mp->pagewrite;
    388 #endif
    389 
    390 	/* Run through the user's filter. */
    391 	if (mp->pgout)
    392 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
    393 
    394 	off = mp->pagesize * bp->pgno;
    395 	if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
    396 		return (RET_ERROR);
    397 
    398 	/*
    399 	 * Re-run through the input filter since this page may soon be
    400 	 * accessed via the cache, and whatever the user's output filter
    401 	 * did may screw things up if we don't let the input filter
    402 	 * restore the in-core copy.
    403 	 */
    404 	if (mp->pgin)
    405 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
    406 
    407 	bp->flags &= ~MPOOL_DIRTY;
    408 	return (RET_SUCCESS);
    409 }
    410 
    411 /*
    412  * mpool_look
    413  *	Lookup a page in the cache.
    414  */
    415 static BKT *
    416 mpool_look(mp, pgno)
    417 	MPOOL *mp;
    418 	pgno_t pgno;
    419 {
    420 	struct _hqh *head;
    421 	BKT *bp;
    422 
    423 	head = &mp->hqh[HASHKEY(pgno)];
    424 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
    425 		if (bp->pgno == pgno) {
    426 #ifdef STATISTICS
    427 			++mp->cachehit;
    428 #endif
    429 			return (bp);
    430 		}
    431 #ifdef STATISTICS
    432 	++mp->cachemiss;
    433 #endif
    434 	return (NULL);
    435 }
    436 
    437 #ifdef STATISTICS
    438 /*
    439  * mpool_stat
    440  *	Print out cache statistics.
    441  */
    442 void
    443 mpool_stat(mp)
    444 	MPOOL *mp;
    445 {
    446 	BKT *bp;
    447 	int cnt;
    448 	char *sep;
    449 
    450 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
    451 	(void)fprintf(stderr,
    452 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
    453 	    mp->pagesize, mp->curcache, mp->maxcache);
    454 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
    455 	    mp->pageput, mp->pageget, mp->pagenew);
    456 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
    457 	    mp->pagealloc, mp->pageflush);
    458 	if (mp->cachehit + mp->cachemiss)
    459 		(void)fprintf(stderr,
    460 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
    461 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
    462 		    * 100, mp->cachehit, mp->cachemiss);
    463 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
    464 	    mp->pageread, mp->pagewrite);
    465 
    466 	sep = "";
    467 	cnt = 0;
    468 	for (bp = mp->lqh.cqh_first;
    469 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
    470 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
    471 		if (bp->flags & MPOOL_DIRTY)
    472 			(void)fprintf(stderr, "d");
    473 		if (bp->flags & MPOOL_PINNED)
    474 			(void)fprintf(stderr, "P");
    475 		if (++cnt == 10) {
    476 			sep = "\n";
    477 			cnt = 0;
    478 		} else
    479 			sep = ", ";
    480 
    481 	}
    482 	(void)fprintf(stderr, "\n");
    483 }
    484 #endif
    485