Home | History | Annotate | Line # | Download | only in mpool
mpool.c revision 1.15
      1 /*	$NetBSD: mpool.c,v 1.15 2006/01/24 17:37:05 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1990, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #if defined(LIBC_SCCS) && !defined(lint)
     34 #if 0
     35 static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
     36 #else
     37 __RCSID("$NetBSD: mpool.c,v 1.15 2006/01/24 17:37:05 christos Exp $");
     38 #endif
     39 #endif /* LIBC_SCCS and not lint */
     40 
     41 #include "namespace.h"
     42 #include <sys/queue.h>
     43 #include <sys/stat.h>
     44 
     45 #include <errno.h>
     46 #include <stdio.h>
     47 #include <stdlib.h>
     48 #include <string.h>
     49 #include <unistd.h>
     50 
     51 #include <db.h>
     52 
     53 #define	__MPOOLINTERFACE_PRIVATE
     54 #include <mpool.h>
     55 
     56 #ifdef __weak_alias
     57 __weak_alias(mpool_close,_mpool_close)
     58 __weak_alias(mpool_filter,_mpool_filter)
     59 __weak_alias(mpool_get,_mpool_get)
     60 __weak_alias(mpool_new,_mpool_new)
     61 __weak_alias(mpool_open,_mpool_open)
     62 __weak_alias(mpool_put,_mpool_put)
     63 __weak_alias(mpool_sync,_mpool_sync)
     64 #endif
     65 
     66 static BKT *mpool_bkt __P((MPOOL *));
     67 static BKT *mpool_look __P((MPOOL *, pgno_t));
     68 static int  mpool_write __P((MPOOL *, BKT *));
     69 
     70 /*
     71  * mpool_open --
     72  *	Initialize a memory pool.
     73  */
     74 /*ARGSUSED*/
     75 MPOOL *
     76 mpool_open(key, fd, pagesize, maxcache)
     77 	void *key;
     78 	int fd;
     79 	pgno_t pagesize, maxcache;
     80 {
     81 	struct stat sb;
     82 	MPOOL *mp;
     83 	int entry;
     84 
     85 	/*
     86 	 * Get information about the file.
     87 	 *
     88 	 * XXX
     89 	 * We don't currently handle pipes, although we should.
     90 	 */
     91 	if (fstat(fd, &sb))
     92 		return (NULL);
     93 	if (!S_ISREG(sb.st_mode)) {
     94 		errno = ESPIPE;
     95 		return (NULL);
     96 	}
     97 
     98 	/* Allocate and initialize the MPOOL cookie. */
     99 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
    100 		return (NULL);
    101 	CIRCLEQ_INIT(&mp->lqh);
    102 	for (entry = 0; entry < HASHSIZE; ++entry)
    103 		CIRCLEQ_INIT(&mp->hqh[entry]);
    104 	mp->maxcache = maxcache;
    105 	mp->npages = (pgno_t)(sb.st_size / pagesize);
    106 	mp->pagesize = pagesize;
    107 	mp->fd = fd;
    108 	return (mp);
    109 }
    110 
    111 /*
    112  * mpool_filter --
    113  *	Initialize input/output filters.
    114  */
    115 void
    116 mpool_filter(mp, pgin, pgout, pgcookie)
    117 	MPOOL *mp;
    118 	void (*pgin) __P((void *, pgno_t, void *));
    119 	void (*pgout) __P((void *, pgno_t, void *));
    120 	void *pgcookie;
    121 {
    122 	mp->pgin = pgin;
    123 	mp->pgout = pgout;
    124 	mp->pgcookie = pgcookie;
    125 }
    126 
    127 /*
    128  * mpool_new --
    129  *	Get a new page of memory.
    130  */
    131 void *
    132 mpool_new(mp, pgnoaddr)
    133 	MPOOL *mp;
    134 	pgno_t *pgnoaddr;
    135 {
    136 	struct _hqh *head;
    137 	BKT *bp;
    138 
    139 	if (mp->npages == MAX_PAGE_NUMBER) {
    140 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
    141 		abort();
    142 	}
    143 #ifdef STATISTICS
    144 	++mp->pagenew;
    145 #endif
    146 	/*
    147 	 * Get a BKT from the cache.  Assign a new page number, attach
    148 	 * it to the head of the hash chain, the tail of the lru chain,
    149 	 * and return.
    150 	 */
    151 	if ((bp = mpool_bkt(mp)) == NULL)
    152 		return (NULL);
    153 	*pgnoaddr = bp->pgno = mp->npages++;
    154 	bp->flags = MPOOL_PINNED;
    155 
    156 	head = &mp->hqh[HASHKEY(bp->pgno)];
    157 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
    158 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    159 	return (bp->page);
    160 }
    161 
    162 /*
    163  * mpool_get
    164  *	Get a page.
    165  */
    166 /*ARGSUSED*/
    167 void *
    168 mpool_get(mp, pgno, flags)
    169 	MPOOL *mp;
    170 	pgno_t pgno;
    171 	u_int flags;				/* XXX not used? */
    172 {
    173 	struct _hqh *head;
    174 	BKT *bp;
    175 	off_t off;
    176 	int nr;
    177 
    178 	/* Check for attempt to retrieve a non-existent page. */
    179 	if (pgno >= mp->npages) {
    180 		errno = EINVAL;
    181 		return (NULL);
    182 	}
    183 
    184 #ifdef STATISTICS
    185 	++mp->pageget;
    186 #endif
    187 
    188 	/* Check for a page that is cached. */
    189 	if ((bp = mpool_look(mp, pgno)) != NULL) {
    190 #ifdef DEBUG
    191 		if (bp->flags & MPOOL_PINNED) {
    192 			(void)fprintf(stderr,
    193 			    "mpool_get: page %d already pinned\n", bp->pgno);
    194 			abort();
    195 		}
    196 #endif
    197 		/*
    198 		 * Move the page to the head of the hash chain and the tail
    199 		 * of the lru chain.
    200 		 */
    201 		head = &mp->hqh[HASHKEY(bp->pgno)];
    202 		CIRCLEQ_REMOVE(head, bp, hq);
    203 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
    204 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
    205 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    206 
    207 		/* Return a pinned page. */
    208 		bp->flags |= MPOOL_PINNED;
    209 		return (bp->page);
    210 	}
    211 
    212 	/* Get a page from the cache. */
    213 	if ((bp = mpool_bkt(mp)) == NULL)
    214 		return (NULL);
    215 
    216 	/* Read in the contents. */
    217 #ifdef STATISTICS
    218 	++mp->pageread;
    219 #endif
    220 	off = mp->pagesize * pgno;
    221 	if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
    222 		if (nr >= 0)
    223 			errno = EFTYPE;
    224 		return (NULL);
    225 	}
    226 
    227 	/* Set the page number, pin the page. */
    228 	bp->pgno = pgno;
    229 	bp->flags = MPOOL_PINNED;
    230 
    231 	/*
    232 	 * Add the page to the head of the hash chain and the tail
    233 	 * of the lru chain.
    234 	 */
    235 	head = &mp->hqh[HASHKEY(bp->pgno)];
    236 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
    237 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    238 
    239 	/* Run through the user's filter. */
    240 	if (mp->pgin != NULL)
    241 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
    242 
    243 	return (bp->page);
    244 }
    245 
    246 /*
    247  * mpool_put
    248  *	Return a page.
    249  */
    250 /*ARGSUSED*/
    251 int
    252 mpool_put(mp, page, flags)
    253 	MPOOL *mp;
    254 	void *page;
    255 	u_int flags;
    256 {
    257 	BKT *bp;
    258 
    259 #ifdef STATISTICS
    260 	++mp->pageput;
    261 #endif
    262 	bp = (BKT *)(void *)((char *)page - sizeof(BKT));
    263 #ifdef DEBUG
    264 	if (!(bp->flags & MPOOL_PINNED)) {
    265 		(void)fprintf(stderr,
    266 		    "mpool_put: page %d not pinned\n", bp->pgno);
    267 		abort();
    268 	}
    269 #endif
    270 	bp->flags &= ~MPOOL_PINNED;
    271 	bp->flags |= flags & MPOOL_DIRTY;
    272 	return (RET_SUCCESS);
    273 }
    274 
    275 /*
    276  * mpool_close
    277  *	Close the buffer pool.
    278  */
    279 int
    280 mpool_close(mp)
    281 	MPOOL *mp;
    282 {
    283 	BKT *bp;
    284 
    285 	/* Free up any space allocated to the lru pages. */
    286 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
    287 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
    288 		free(bp);
    289 	}
    290 
    291 	/* Free the MPOOL cookie. */
    292 	free(mp);
    293 	return (RET_SUCCESS);
    294 }
    295 
    296 /*
    297  * mpool_sync
    298  *	Sync the pool to disk.
    299  */
    300 int
    301 mpool_sync(mp)
    302 	MPOOL *mp;
    303 {
    304 	BKT *bp;
    305 
    306 	/* Walk the lru chain, flushing any dirty pages to disk. */
    307 	for (bp = mp->lqh.cqh_first;
    308 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
    309 		if (bp->flags & MPOOL_DIRTY &&
    310 		    mpool_write(mp, bp) == RET_ERROR)
    311 			return (RET_ERROR);
    312 
    313 	/* Sync the file descriptor. */
    314 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
    315 }
    316 
    317 /*
    318  * mpool_bkt
    319  *	Get a page from the cache (or create one).
    320  */
    321 static BKT *
    322 mpool_bkt(mp)
    323 	MPOOL *mp;
    324 {
    325 	struct _hqh *head;
    326 	BKT *bp;
    327 
    328 	/* If under the max cached, always create a new page. */
    329 	if (mp->curcache < mp->maxcache)
    330 		goto new;
    331 
    332 	/*
    333 	 * If the cache is max'd out, walk the lru list for a buffer we
    334 	 * can flush.  If we find one, write it (if necessary) and take it
    335 	 * off any lists.  If we don't find anything we grow the cache anyway.
    336 	 * The cache never shrinks.
    337 	 */
    338 	for (bp = mp->lqh.cqh_first;
    339 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
    340 		if (!(bp->flags & MPOOL_PINNED)) {
    341 			/* Flush if dirty. */
    342 			if (bp->flags & MPOOL_DIRTY &&
    343 			    mpool_write(mp, bp) == RET_ERROR)
    344 				return (NULL);
    345 #ifdef STATISTICS
    346 			++mp->pageflush;
    347 #endif
    348 			/* Remove from the hash and lru queues. */
    349 			head = &mp->hqh[HASHKEY(bp->pgno)];
    350 			CIRCLEQ_REMOVE(head, bp, hq);
    351 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
    352 #ifdef DEBUG
    353 			{
    354 				void *spage = bp->page;
    355 				(void)memset(bp, 0xff,
    356 				    (size_t)(sizeof(BKT) + mp->pagesize));
    357 				bp->page = spage;
    358 			}
    359 #endif
    360 			return (bp);
    361 		}
    362 
    363 new:	if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
    364 		return (NULL);
    365 #ifdef STATISTICS
    366 	++mp->pagealloc;
    367 #endif
    368 #if defined(DEBUG) || defined(PURIFY)
    369 	(void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
    370 #endif
    371 	bp->page = (char *)(void *)bp + sizeof(BKT);
    372 	++mp->curcache;
    373 	return (bp);
    374 }
    375 
    376 /*
    377  * mpool_write
    378  *	Write a page to disk.
    379  */
    380 static int
    381 mpool_write(mp, bp)
    382 	MPOOL *mp;
    383 	BKT *bp;
    384 {
    385 	off_t off;
    386 
    387 #ifdef STATISTICS
    388 	++mp->pagewrite;
    389 #endif
    390 
    391 	/* Run through the user's filter. */
    392 	if (mp->pgout)
    393 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
    394 
    395 	off = mp->pagesize * bp->pgno;
    396 	if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
    397 		return (RET_ERROR);
    398 
    399 	/*
    400 	 * Re-run through the input filter since this page may soon be
    401 	 * accessed via the cache, and whatever the user's output filter
    402 	 * did may screw things up if we don't let the input filter
    403 	 * restore the in-core copy.
    404 	 */
    405 	if (mp->pgin)
    406 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
    407 
    408 	bp->flags &= ~MPOOL_DIRTY;
    409 	return (RET_SUCCESS);
    410 }
    411 
    412 /*
    413  * mpool_look
    414  *	Lookup a page in the cache.
    415  */
    416 static BKT *
    417 mpool_look(mp, pgno)
    418 	MPOOL *mp;
    419 	pgno_t pgno;
    420 {
    421 	struct _hqh *head;
    422 	BKT *bp;
    423 
    424 	head = &mp->hqh[HASHKEY(pgno)];
    425 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
    426 		if (bp->pgno == pgno) {
    427 #ifdef STATISTICS
    428 			++mp->cachehit;
    429 #endif
    430 			return (bp);
    431 		}
    432 #ifdef STATISTICS
    433 	++mp->cachemiss;
    434 #endif
    435 	return (NULL);
    436 }
    437 
    438 #ifdef STATISTICS
    439 /*
    440  * mpool_stat
    441  *	Print out cache statistics.
    442  */
    443 void
    444 mpool_stat(mp)
    445 	MPOOL *mp;
    446 {
    447 	BKT *bp;
    448 	int cnt;
    449 	char *sep;
    450 
    451 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
    452 	(void)fprintf(stderr,
    453 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
    454 	    mp->pagesize, mp->curcache, mp->maxcache);
    455 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
    456 	    mp->pageput, mp->pageget, mp->pagenew);
    457 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
    458 	    mp->pagealloc, mp->pageflush);
    459 	if (mp->cachehit + mp->cachemiss)
    460 		(void)fprintf(stderr,
    461 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
    462 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
    463 		    * 100, mp->cachehit, mp->cachemiss);
    464 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
    465 	    mp->pageread, mp->pagewrite);
    466 
    467 	sep = "";
    468 	cnt = 0;
    469 	for (bp = mp->lqh.cqh_first;
    470 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
    471 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
    472 		if (bp->flags & MPOOL_DIRTY)
    473 			(void)fprintf(stderr, "d");
    474 		if (bp->flags & MPOOL_PINNED)
    475 			(void)fprintf(stderr, "P");
    476 		if (++cnt == 10) {
    477 			sep = "\n";
    478 			cnt = 0;
    479 		} else
    480 			sep = ", ";
    481 
    482 	}
    483 	(void)fprintf(stderr, "\n");
    484 }
    485 #endif
    486