Home | History | Annotate | Line # | Download | only in mpool
mpool.c revision 1.12
      1 /*	$NetBSD: mpool.c,v 1.12 2000/01/22 22:19:08 mycroft Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1990, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 #if defined(LIBC_SCCS) && !defined(lint)
     38 #if 0
     39 static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
     40 #else
     41 __RCSID("$NetBSD: mpool.c,v 1.12 2000/01/22 22:19:08 mycroft Exp $");
     42 #endif
     43 #endif /* LIBC_SCCS and not lint */
     44 
     45 #include "namespace.h"
     46 #include <sys/param.h>
     47 #include <sys/queue.h>
     48 #include <sys/stat.h>
     49 
     50 #include <errno.h>
     51 #include <stdio.h>
     52 #include <stdlib.h>
     53 #include <string.h>
     54 #include <unistd.h>
     55 
     56 #include <db.h>
     57 
     58 #define	__MPOOLINTERFACE_PRIVATE
     59 #include <mpool.h>
     60 
     61 #ifdef __weak_alias
     62 __weak_alias(mpool_close,_mpool_close)
     63 __weak_alias(mpool_filter,_mpool_filter)
     64 __weak_alias(mpool_get,_mpool_get)
     65 __weak_alias(mpool_new,_mpool_new)
     66 __weak_alias(mpool_open,_mpool_open)
     67 __weak_alias(mpool_put,_mpool_put)
     68 __weak_alias(mpool_sync,_mpool_sync)
     69 #endif
     70 
     71 static BKT *mpool_bkt __P((MPOOL *));
     72 static BKT *mpool_look __P((MPOOL *, pgno_t));
     73 static int  mpool_write __P((MPOOL *, BKT *));
     74 
     75 /*
     76  * mpool_open --
     77  *	Initialize a memory pool.
     78  */
     79 /*ARGSUSED*/
     80 MPOOL *
     81 mpool_open(key, fd, pagesize, maxcache)
     82 	void *key;
     83 	int fd;
     84 	pgno_t pagesize, maxcache;
     85 {
     86 	struct stat sb;
     87 	MPOOL *mp;
     88 	int entry;
     89 
     90 	/*
     91 	 * Get information about the file.
     92 	 *
     93 	 * XXX
     94 	 * We don't currently handle pipes, although we should.
     95 	 */
     96 	if (fstat(fd, &sb))
     97 		return (NULL);
     98 	if (!S_ISREG(sb.st_mode)) {
     99 		errno = ESPIPE;
    100 		return (NULL);
    101 	}
    102 
    103 	/* Allocate and initialize the MPOOL cookie. */
    104 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
    105 		return (NULL);
    106 	CIRCLEQ_INIT(&mp->lqh);
    107 	for (entry = 0; entry < HASHSIZE; ++entry)
    108 		CIRCLEQ_INIT(&mp->hqh[entry]);
    109 	mp->maxcache = maxcache;
    110 	mp->npages = (pgno_t)(sb.st_size / pagesize);
    111 	mp->pagesize = pagesize;
    112 	mp->fd = fd;
    113 	return (mp);
    114 }
    115 
    116 /*
    117  * mpool_filter --
    118  *	Initialize input/output filters.
    119  */
    120 void
    121 mpool_filter(mp, pgin, pgout, pgcookie)
    122 	MPOOL *mp;
    123 	void (*pgin) __P((void *, pgno_t, void *));
    124 	void (*pgout) __P((void *, pgno_t, void *));
    125 	void *pgcookie;
    126 {
    127 	mp->pgin = pgin;
    128 	mp->pgout = pgout;
    129 	mp->pgcookie = pgcookie;
    130 }
    131 
    132 /*
    133  * mpool_new --
    134  *	Get a new page of memory.
    135  */
    136 void *
    137 mpool_new(mp, pgnoaddr)
    138 	MPOOL *mp;
    139 	pgno_t *pgnoaddr;
    140 {
    141 	struct _hqh *head;
    142 	BKT *bp;
    143 
    144 	if (mp->npages == MAX_PAGE_NUMBER) {
    145 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
    146 		abort();
    147 	}
    148 #ifdef STATISTICS
    149 	++mp->pagenew;
    150 #endif
    151 	/*
    152 	 * Get a BKT from the cache.  Assign a new page number, attach
    153 	 * it to the head of the hash chain, the tail of the lru chain,
    154 	 * and return.
    155 	 */
    156 	if ((bp = mpool_bkt(mp)) == NULL)
    157 		return (NULL);
    158 	*pgnoaddr = bp->pgno = mp->npages++;
    159 	bp->flags = MPOOL_PINNED;
    160 
    161 	head = &mp->hqh[HASHKEY(bp->pgno)];
    162 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
    163 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    164 	return (bp->page);
    165 }
    166 
    167 /*
    168  * mpool_get
    169  *	Get a page.
    170  */
    171 /*ARGSUSED*/
    172 void *
    173 mpool_get(mp, pgno, flags)
    174 	MPOOL *mp;
    175 	pgno_t pgno;
    176 	u_int flags;				/* XXX not used? */
    177 {
    178 	struct _hqh *head;
    179 	BKT *bp;
    180 	off_t off;
    181 	int nr;
    182 
    183 	/* Check for attempt to retrieve a non-existent page. */
    184 	if (pgno >= mp->npages) {
    185 		errno = EINVAL;
    186 		return (NULL);
    187 	}
    188 
    189 #ifdef STATISTICS
    190 	++mp->pageget;
    191 #endif
    192 
    193 	/* Check for a page that is cached. */
    194 	if ((bp = mpool_look(mp, pgno)) != NULL) {
    195 #ifdef DEBUG
    196 		if (bp->flags & MPOOL_PINNED) {
    197 			(void)fprintf(stderr,
    198 			    "mpool_get: page %d already pinned\n", bp->pgno);
    199 			abort();
    200 		}
    201 #endif
    202 		/*
    203 		 * Move the page to the head of the hash chain and the tail
    204 		 * of the lru chain.
    205 		 */
    206 		head = &mp->hqh[HASHKEY(bp->pgno)];
    207 		CIRCLEQ_REMOVE(head, bp, hq);
    208 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
    209 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
    210 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    211 
    212 		/* Return a pinned page. */
    213 		bp->flags |= MPOOL_PINNED;
    214 		return (bp->page);
    215 	}
    216 
    217 	/* Get a page from the cache. */
    218 	if ((bp = mpool_bkt(mp)) == NULL)
    219 		return (NULL);
    220 
    221 	/* Read in the contents. */
    222 #ifdef STATISTICS
    223 	++mp->pageread;
    224 #endif
    225 	off = mp->pagesize * pgno;
    226 	if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
    227 		if (nr >= 0)
    228 			errno = EFTYPE;
    229 		return (NULL);
    230 	}
    231 
    232 	/* Set the page number, pin the page. */
    233 	bp->pgno = pgno;
    234 	bp->flags = MPOOL_PINNED;
    235 
    236 	/*
    237 	 * Add the page to the head of the hash chain and the tail
    238 	 * of the lru chain.
    239 	 */
    240 	head = &mp->hqh[HASHKEY(bp->pgno)];
    241 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
    242 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    243 
    244 	/* Run through the user's filter. */
    245 	if (mp->pgin != NULL)
    246 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
    247 
    248 	return (bp->page);
    249 }
    250 
    251 /*
    252  * mpool_put
    253  *	Return a page.
    254  */
    255 /*ARGSUSED*/
    256 int
    257 mpool_put(mp, page, flags)
    258 	MPOOL *mp;
    259 	void *page;
    260 	u_int flags;
    261 {
    262 	BKT *bp;
    263 
    264 #ifdef STATISTICS
    265 	++mp->pageput;
    266 #endif
    267 	bp = (BKT *)(void *)((char *)page - sizeof(BKT));
    268 #ifdef DEBUG
    269 	if (!(bp->flags & MPOOL_PINNED)) {
    270 		(void)fprintf(stderr,
    271 		    "mpool_put: page %d not pinned\n", bp->pgno);
    272 		abort();
    273 	}
    274 #endif
    275 	bp->flags &= ~MPOOL_PINNED;
    276 	bp->flags |= flags & MPOOL_DIRTY;
    277 	return (RET_SUCCESS);
    278 }
    279 
    280 /*
    281  * mpool_close
    282  *	Close the buffer pool.
    283  */
    284 int
    285 mpool_close(mp)
    286 	MPOOL *mp;
    287 {
    288 	BKT *bp;
    289 
    290 	/* Free up any space allocated to the lru pages. */
    291 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
    292 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
    293 		free(bp);
    294 	}
    295 
    296 	/* Free the MPOOL cookie. */
    297 	free(mp);
    298 	return (RET_SUCCESS);
    299 }
    300 
    301 /*
    302  * mpool_sync
    303  *	Sync the pool to disk.
    304  */
    305 int
    306 mpool_sync(mp)
    307 	MPOOL *mp;
    308 {
    309 	BKT *bp;
    310 
    311 	/* Walk the lru chain, flushing any dirty pages to disk. */
    312 	for (bp = mp->lqh.cqh_first;
    313 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
    314 		if (bp->flags & MPOOL_DIRTY &&
    315 		    mpool_write(mp, bp) == RET_ERROR)
    316 			return (RET_ERROR);
    317 
    318 	/* Sync the file descriptor. */
    319 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
    320 }
    321 
    322 /*
    323  * mpool_bkt
    324  *	Get a page from the cache (or create one).
    325  */
    326 static BKT *
    327 mpool_bkt(mp)
    328 	MPOOL *mp;
    329 {
    330 	struct _hqh *head;
    331 	BKT *bp;
    332 
    333 	/* If under the max cached, always create a new page. */
    334 	if (mp->curcache < mp->maxcache)
    335 		goto new;
    336 
    337 	/*
    338 	 * If the cache is max'd out, walk the lru list for a buffer we
    339 	 * can flush.  If we find one, write it (if necessary) and take it
    340 	 * off any lists.  If we don't find anything we grow the cache anyway.
    341 	 * The cache never shrinks.
    342 	 */
    343 	for (bp = mp->lqh.cqh_first;
    344 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
    345 		if (!(bp->flags & MPOOL_PINNED)) {
    346 			/* Flush if dirty. */
    347 			if (bp->flags & MPOOL_DIRTY &&
    348 			    mpool_write(mp, bp) == RET_ERROR)
    349 				return (NULL);
    350 #ifdef STATISTICS
    351 			++mp->pageflush;
    352 #endif
    353 			/* Remove from the hash and lru queues. */
    354 			head = &mp->hqh[HASHKEY(bp->pgno)];
    355 			CIRCLEQ_REMOVE(head, bp, hq);
    356 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
    357 #ifdef DEBUG
    358 			{ void *spage;
    359 				spage = bp->page;
    360 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
    361 				bp->page = spage;
    362 			}
    363 #endif
    364 			return (bp);
    365 		}
    366 
    367 new:	if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
    368 		return (NULL);
    369 #ifdef STATISTICS
    370 	++mp->pagealloc;
    371 #endif
    372 #if defined(DEBUG) || defined(PURIFY)
    373 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
    374 #endif
    375 	bp->page = (char *)(void *)bp + sizeof(BKT);
    376 	++mp->curcache;
    377 	return (bp);
    378 }
    379 
    380 /*
    381  * mpool_write
    382  *	Write a page to disk.
    383  */
    384 static int
    385 mpool_write(mp, bp)
    386 	MPOOL *mp;
    387 	BKT *bp;
    388 {
    389 	off_t off;
    390 
    391 #ifdef STATISTICS
    392 	++mp->pagewrite;
    393 #endif
    394 
    395 	/* Run through the user's filter. */
    396 	if (mp->pgout)
    397 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
    398 
    399 	off = mp->pagesize * bp->pgno;
    400 	if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
    401 		return (RET_ERROR);
    402 
    403 	/*
    404 	 * Re-run through the input filter since this page may soon be
    405 	 * accessed via the cache, and whatever the user's output filter
    406 	 * did may screw things up if we don't let the input filter
    407 	 * restore the in-core copy.
    408 	 */
    409 	if (mp->pgin)
    410 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
    411 
    412 	bp->flags &= ~MPOOL_DIRTY;
    413 	return (RET_SUCCESS);
    414 }
    415 
    416 /*
    417  * mpool_look
    418  *	Lookup a page in the cache.
    419  */
    420 static BKT *
    421 mpool_look(mp, pgno)
    422 	MPOOL *mp;
    423 	pgno_t pgno;
    424 {
    425 	struct _hqh *head;
    426 	BKT *bp;
    427 
    428 	head = &mp->hqh[HASHKEY(pgno)];
    429 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
    430 		if (bp->pgno == pgno) {
    431 #ifdef STATISTICS
    432 			++mp->cachehit;
    433 #endif
    434 			return (bp);
    435 		}
    436 #ifdef STATISTICS
    437 	++mp->cachemiss;
    438 #endif
    439 	return (NULL);
    440 }
    441 
    442 #ifdef STATISTICS
    443 /*
    444  * mpool_stat
    445  *	Print out cache statistics.
    446  */
    447 void
    448 mpool_stat(mp)
    449 	MPOOL *mp;
    450 {
    451 	BKT *bp;
    452 	int cnt;
    453 	char *sep;
    454 
    455 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
    456 	(void)fprintf(stderr,
    457 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
    458 	    mp->pagesize, mp->curcache, mp->maxcache);
    459 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
    460 	    mp->pageput, mp->pageget, mp->pagenew);
    461 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
    462 	    mp->pagealloc, mp->pageflush);
    463 	if (mp->cachehit + mp->cachemiss)
    464 		(void)fprintf(stderr,
    465 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
    466 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
    467 		    * 100, mp->cachehit, mp->cachemiss);
    468 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
    469 	    mp->pageread, mp->pagewrite);
    470 
    471 	sep = "";
    472 	cnt = 0;
    473 	for (bp = mp->lqh.cqh_first;
    474 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
    475 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
    476 		if (bp->flags & MPOOL_DIRTY)
    477 			(void)fprintf(stderr, "d");
    478 		if (bp->flags & MPOOL_PINNED)
    479 			(void)fprintf(stderr, "P");
    480 		if (++cnt == 10) {
    481 			sep = "\n";
    482 			cnt = 0;
    483 		} else
    484 			sep = ", ";
    485 
    486 	}
    487 	(void)fprintf(stderr, "\n");
    488 }
    489 #endif
    490