Home | History | Annotate | Line # | Download | only in mpool
mpool.c revision 1.1.1.2
      1 /*-
      2  * Copyright (c) 1990, 1993, 1994
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the University of
     16  *	California, Berkeley and its contributors.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  */
     33 
     34 #if defined(LIBC_SCCS) && !defined(lint)
     35 static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
     36 #endif /* LIBC_SCCS and not lint */
     37 
     38 #include <sys/param.h>
     39 #include <sys/queue.h>
     40 #include <sys/stat.h>
     41 
     42 #include <errno.h>
     43 #include <stdio.h>
     44 #include <stdlib.h>
     45 #include <string.h>
     46 #include <unistd.h>
     47 
     48 #include <db.h>
     49 
     50 #define	__MPOOLINTERFACE_PRIVATE
     51 #include <mpool.h>
     52 
     53 static BKT *mpool_bkt __P((MPOOL *));
     54 static BKT *mpool_look __P((MPOOL *, pgno_t));
     55 static int  mpool_write __P((MPOOL *, BKT *));
     56 
     57 /*
     58  * mpool_open --
     59  *	Initialize a memory pool.
     60  */
     61 MPOOL *
     62 mpool_open(key, fd, pagesize, maxcache)
     63 	void *key;
     64 	int fd;
     65 	pgno_t pagesize, maxcache;
     66 {
     67 	struct stat sb;
     68 	MPOOL *mp;
     69 	int entry;
     70 
     71 	/*
     72 	 * Get information about the file.
     73 	 *
     74 	 * XXX
     75 	 * We don't currently handle pipes, although we should.
     76 	 */
     77 	if (fstat(fd, &sb))
     78 		return (NULL);
     79 	if (!S_ISREG(sb.st_mode)) {
     80 		errno = ESPIPE;
     81 		return (NULL);
     82 	}
     83 
     84 	/* Allocate and initialize the MPOOL cookie. */
     85 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
     86 		return (NULL);
     87 	CIRCLEQ_INIT(&mp->lqh);
     88 	for (entry = 0; entry < HASHSIZE; ++entry)
     89 		CIRCLEQ_INIT(&mp->hqh[entry]);
     90 	mp->maxcache = maxcache;
     91 	mp->npages = sb.st_size / pagesize;
     92 	mp->pagesize = pagesize;
     93 	mp->fd = fd;
     94 	return (mp);
     95 }
     96 
     97 /*
     98  * mpool_filter --
     99  *	Initialize input/output filters.
    100  */
    101 void
    102 mpool_filter(mp, pgin, pgout, pgcookie)
    103 	MPOOL *mp;
    104 	void (*pgin) __P((void *, pgno_t, void *));
    105 	void (*pgout) __P((void *, pgno_t, void *));
    106 	void *pgcookie;
    107 {
    108 	mp->pgin = pgin;
    109 	mp->pgout = pgout;
    110 	mp->pgcookie = pgcookie;
    111 }
    112 
    113 /*
    114  * mpool_new --
    115  *	Get a new page of memory.
    116  */
    117 void *
    118 mpool_new(mp, pgnoaddr)
    119 	MPOOL *mp;
    120 	pgno_t *pgnoaddr;
    121 {
    122 	struct _hqh *head;
    123 	BKT *bp;
    124 
    125 	if (mp->npages == MAX_PAGE_NUMBER) {
    126 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
    127 		abort();
    128 	}
    129 #ifdef STATISTICS
    130 	++mp->pagenew;
    131 #endif
    132 	/*
    133 	 * Get a BKT from the cache.  Assign a new page number, attach
    134 	 * it to the head of the hash chain, the tail of the lru chain,
    135 	 * and return.
    136 	 */
    137 	if ((bp = mpool_bkt(mp)) == NULL)
    138 		return (NULL);
    139 	*pgnoaddr = bp->pgno = mp->npages++;
    140 	bp->flags = MPOOL_PINNED;
    141 
    142 	head = &mp->hqh[HASHKEY(bp->pgno)];
    143 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
    144 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    145 	return (bp->page);
    146 }
    147 
    148 /*
    149  * mpool_get
    150  *	Get a page.
    151  */
    152 void *
    153 mpool_get(mp, pgno, flags)
    154 	MPOOL *mp;
    155 	pgno_t pgno;
    156 	u_int flags;				/* XXX not used? */
    157 {
    158 	struct _hqh *head;
    159 	BKT *bp;
    160 	off_t off;
    161 	int nr;
    162 
    163 	/* Check for attempt to retrieve a non-existent page. */
    164 	if (pgno >= mp->npages) {
    165 		errno = EINVAL;
    166 		return (NULL);
    167 	}
    168 
    169 #ifdef STATISTICS
    170 	++mp->pageget;
    171 #endif
    172 
    173 	/* Check for a page that is cached. */
    174 	if ((bp = mpool_look(mp, pgno)) != NULL) {
    175 #ifdef DEBUG
    176 		if (bp->flags & MPOOL_PINNED) {
    177 			(void)fprintf(stderr,
    178 			    "mpool_get: page %d already pinned\n", bp->pgno);
    179 			abort();
    180 		}
    181 #endif
    182 		/*
    183 		 * Move the page to the head of the hash chain and the tail
    184 		 * of the lru chain.
    185 		 */
    186 		head = &mp->hqh[HASHKEY(bp->pgno)];
    187 		CIRCLEQ_REMOVE(head, bp, hq);
    188 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
    189 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
    190 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    191 
    192 		/* Return a pinned page. */
    193 		bp->flags |= MPOOL_PINNED;
    194 		return (bp->page);
    195 	}
    196 
    197 	/* Get a page from the cache. */
    198 	if ((bp = mpool_bkt(mp)) == NULL)
    199 		return (NULL);
    200 
    201 	/* Read in the contents. */
    202 #ifdef STATISTICS
    203 	++mp->pageread;
    204 #endif
    205 	off = mp->pagesize * pgno;
    206 	if (lseek(mp->fd, off, SEEK_SET) != off)
    207 		return (NULL);
    208 	if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
    209 		if (nr >= 0)
    210 			errno = EFTYPE;
    211 		return (NULL);
    212 	}
    213 
    214 	/* Set the page number, pin the page. */
    215 	bp->pgno = pgno;
    216 	bp->flags = MPOOL_PINNED;
    217 
    218 	/*
    219 	 * Add the page to the head of the hash chain and the tail
    220 	 * of the lru chain.
    221 	 */
    222 	head = &mp->hqh[HASHKEY(bp->pgno)];
    223 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
    224 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
    225 
    226 	/* Run through the user's filter. */
    227 	if (mp->pgin != NULL)
    228 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
    229 
    230 	return (bp->page);
    231 }
    232 
    233 /*
    234  * mpool_put
    235  *	Return a page.
    236  */
    237 int
    238 mpool_put(mp, page, flags)
    239 	MPOOL *mp;
    240 	void *page;
    241 	u_int flags;
    242 {
    243 	BKT *bp;
    244 
    245 #ifdef STATISTICS
    246 	++mp->pageput;
    247 #endif
    248 	bp = (BKT *)((char *)page - sizeof(BKT));
    249 #ifdef DEBUG
    250 	if (!(bp->flags & MPOOL_PINNED)) {
    251 		(void)fprintf(stderr,
    252 		    "mpool_put: page %d not pinned\n", bp->pgno);
    253 		abort();
    254 	}
    255 #endif
    256 	bp->flags &= ~MPOOL_PINNED;
    257 	bp->flags |= flags & MPOOL_DIRTY;
    258 	return (RET_SUCCESS);
    259 }
    260 
    261 /*
    262  * mpool_close
    263  *	Close the buffer pool.
    264  */
    265 int
    266 mpool_close(mp)
    267 	MPOOL *mp;
    268 {
    269 	BKT *bp;
    270 
    271 	/* Free up any space allocated to the lru pages. */
    272 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
    273 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
    274 		free(bp);
    275 	}
    276 
    277 	/* Free the MPOOL cookie. */
    278 	free(mp);
    279 	return (RET_SUCCESS);
    280 }
    281 
    282 /*
    283  * mpool_sync
    284  *	Sync the pool to disk.
    285  */
    286 int
    287 mpool_sync(mp)
    288 	MPOOL *mp;
    289 {
    290 	BKT *bp;
    291 
    292 	/* Walk the lru chain, flushing any dirty pages to disk. */
    293 	for (bp = mp->lqh.cqh_first;
    294 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
    295 		if (bp->flags & MPOOL_DIRTY &&
    296 		    mpool_write(mp, bp) == RET_ERROR)
    297 			return (RET_ERROR);
    298 
    299 	/* Sync the file descriptor. */
    300 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
    301 }
    302 
    303 /*
    304  * mpool_bkt
    305  *	Get a page from the cache (or create one).
    306  */
    307 static BKT *
    308 mpool_bkt(mp)
    309 	MPOOL *mp;
    310 {
    311 	struct _hqh *head;
    312 	BKT *bp;
    313 
    314 	/* If under the max cached, always create a new page. */
    315 	if (mp->curcache < mp->maxcache)
    316 		goto new;
    317 
    318 	/*
    319 	 * If the cache is max'd out, walk the lru list for a buffer we
    320 	 * can flush.  If we find one, write it (if necessary) and take it
    321 	 * off any lists.  If we don't find anything we grow the cache anyway.
    322 	 * The cache never shrinks.
    323 	 */
    324 	for (bp = mp->lqh.cqh_first;
    325 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
    326 		if (!(bp->flags & MPOOL_PINNED)) {
    327 			/* Flush if dirty. */
    328 			if (bp->flags & MPOOL_DIRTY &&
    329 			    mpool_write(mp, bp) == RET_ERROR)
    330 				return (NULL);
    331 #ifdef STATISTICS
    332 			++mp->pageflush;
    333 #endif
    334 			/* Remove from the hash and lru queues. */
    335 			head = &mp->hqh[HASHKEY(bp->pgno)];
    336 			CIRCLEQ_REMOVE(head, bp, hq);
    337 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
    338 #ifdef DEBUG
    339 			{ void *spage;
    340 				spage = bp->page;
    341 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
    342 				bp->page = spage;
    343 			}
    344 #endif
    345 			return (bp);
    346 		}
    347 
    348 new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
    349 		return (NULL);
    350 #ifdef STATISTICS
    351 	++mp->pagealloc;
    352 #endif
    353 #if defined(DEBUG) || defined(PURIFY)
    354 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
    355 #endif
    356 	bp->page = (char *)bp + sizeof(BKT);
    357 	++mp->curcache;
    358 	return (bp);
    359 }
    360 
    361 /*
    362  * mpool_write
    363  *	Write a page to disk.
    364  */
    365 static int
    366 mpool_write(mp, bp)
    367 	MPOOL *mp;
    368 	BKT *bp;
    369 {
    370 	off_t off;
    371 
    372 #ifdef STATISTICS
    373 	++mp->pagewrite;
    374 #endif
    375 
    376 	/* Run through the user's filter. */
    377 	if (mp->pgout)
    378 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
    379 
    380 	off = mp->pagesize * bp->pgno;
    381 	if (lseek(mp->fd, off, SEEK_SET) != off)
    382 		return (RET_ERROR);
    383 	if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
    384 		return (RET_ERROR);
    385 
    386 	bp->flags &= ~MPOOL_DIRTY;
    387 	return (RET_SUCCESS);
    388 }
    389 
    390 /*
    391  * mpool_look
    392  *	Lookup a page in the cache.
    393  */
    394 static BKT *
    395 mpool_look(mp, pgno)
    396 	MPOOL *mp;
    397 	pgno_t pgno;
    398 {
    399 	struct _hqh *head;
    400 	BKT *bp;
    401 
    402 	head = &mp->hqh[HASHKEY(pgno)];
    403 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
    404 		if (bp->pgno == pgno) {
    405 #ifdef STATISTICS
    406 			++mp->cachehit;
    407 #endif
    408 			return (bp);
    409 		}
    410 #ifdef STATISTICS
    411 	++mp->cachemiss;
    412 #endif
    413 	return (NULL);
    414 }
    415 
    416 #ifdef STATISTICS
    417 /*
    418  * mpool_stat
    419  *	Print out cache statistics.
    420  */
    421 void
    422 mpool_stat(mp)
    423 	MPOOL *mp;
    424 {
    425 	BKT *bp;
    426 	int cnt;
    427 	char *sep;
    428 
    429 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
    430 	(void)fprintf(stderr,
    431 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
    432 	    mp->pagesize, mp->curcache, mp->maxcache);
    433 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
    434 	    mp->pageput, mp->pageget, mp->pagenew);
    435 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
    436 	    mp->pagealloc, mp->pageflush);
    437 	if (mp->cachehit + mp->cachemiss)
    438 		(void)fprintf(stderr,
    439 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
    440 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
    441 		    * 100, mp->cachehit, mp->cachemiss);
    442 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
    443 	    mp->pageread, mp->pagewrite);
    444 
    445 	sep = "";
    446 	cnt = 0;
    447 	for (bp = mp->lqh.cqh_first;
    448 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
    449 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
    450 		if (bp->flags & MPOOL_DIRTY)
    451 			(void)fprintf(stderr, "d");
    452 		if (bp->flags & MPOOL_PINNED)
    453 			(void)fprintf(stderr, "P");
    454 		if (++cnt == 10) {
    455 			sep = "\n";
    456 			cnt = 0;
    457 		} else
    458 			sep = ", ";
    459 
    460 	}
    461 	(void)fprintf(stderr, "\n");
    462 }
    463 #endif
    464