mpool.c revision 1.14 1 /* $NetBSD: mpool.c,v 1.14 2003/08/07 16:42:44 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 #if 0
35 static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94";
36 #else
37 __RCSID("$NetBSD: mpool.c,v 1.14 2003/08/07 16:42:44 agc Exp $");
38 #endif
39 #endif /* LIBC_SCCS and not lint */
40
41 #include "namespace.h"
42 #include <sys/queue.h>
43 #include <sys/stat.h>
44
45 #include <errno.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50
51 #include <db.h>
52
53 #define __MPOOLINTERFACE_PRIVATE
54 #include <mpool.h>
55
56 #ifdef __weak_alias
57 __weak_alias(mpool_close,_mpool_close)
58 __weak_alias(mpool_filter,_mpool_filter)
59 __weak_alias(mpool_get,_mpool_get)
60 __weak_alias(mpool_new,_mpool_new)
61 __weak_alias(mpool_open,_mpool_open)
62 __weak_alias(mpool_put,_mpool_put)
63 __weak_alias(mpool_sync,_mpool_sync)
64 #endif
65
66 static BKT *mpool_bkt __P((MPOOL *));
67 static BKT *mpool_look __P((MPOOL *, pgno_t));
68 static int mpool_write __P((MPOOL *, BKT *));
69
70 /*
71 * mpool_open --
72 * Initialize a memory pool.
73 */
74 /*ARGSUSED*/
75 MPOOL *
76 mpool_open(key, fd, pagesize, maxcache)
77 void *key;
78 int fd;
79 pgno_t pagesize, maxcache;
80 {
81 struct stat sb;
82 MPOOL *mp;
83 int entry;
84
85 /*
86 * Get information about the file.
87 *
88 * XXX
89 * We don't currently handle pipes, although we should.
90 */
91 if (fstat(fd, &sb))
92 return (NULL);
93 if (!S_ISREG(sb.st_mode)) {
94 errno = ESPIPE;
95 return (NULL);
96 }
97
98 /* Allocate and initialize the MPOOL cookie. */
99 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
100 return (NULL);
101 CIRCLEQ_INIT(&mp->lqh);
102 for (entry = 0; entry < HASHSIZE; ++entry)
103 CIRCLEQ_INIT(&mp->hqh[entry]);
104 mp->maxcache = maxcache;
105 mp->npages = (pgno_t)(sb.st_size / pagesize);
106 mp->pagesize = pagesize;
107 mp->fd = fd;
108 return (mp);
109 }
110
111 /*
112 * mpool_filter --
113 * Initialize input/output filters.
114 */
115 void
116 mpool_filter(mp, pgin, pgout, pgcookie)
117 MPOOL *mp;
118 void (*pgin) __P((void *, pgno_t, void *));
119 void (*pgout) __P((void *, pgno_t, void *));
120 void *pgcookie;
121 {
122 mp->pgin = pgin;
123 mp->pgout = pgout;
124 mp->pgcookie = pgcookie;
125 }
126
127 /*
128 * mpool_new --
129 * Get a new page of memory.
130 */
131 void *
132 mpool_new(mp, pgnoaddr)
133 MPOOL *mp;
134 pgno_t *pgnoaddr;
135 {
136 struct _hqh *head;
137 BKT *bp;
138
139 if (mp->npages == MAX_PAGE_NUMBER) {
140 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
141 abort();
142 }
143 #ifdef STATISTICS
144 ++mp->pagenew;
145 #endif
146 /*
147 * Get a BKT from the cache. Assign a new page number, attach
148 * it to the head of the hash chain, the tail of the lru chain,
149 * and return.
150 */
151 if ((bp = mpool_bkt(mp)) == NULL)
152 return (NULL);
153 *pgnoaddr = bp->pgno = mp->npages++;
154 bp->flags = MPOOL_PINNED;
155
156 head = &mp->hqh[HASHKEY(bp->pgno)];
157 CIRCLEQ_INSERT_HEAD(head, bp, hq);
158 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
159 return (bp->page);
160 }
161
162 /*
163 * mpool_get
164 * Get a page.
165 */
166 /*ARGSUSED*/
167 void *
168 mpool_get(mp, pgno, flags)
169 MPOOL *mp;
170 pgno_t pgno;
171 u_int flags; /* XXX not used? */
172 {
173 struct _hqh *head;
174 BKT *bp;
175 off_t off;
176 int nr;
177
178 /* Check for attempt to retrieve a non-existent page. */
179 if (pgno >= mp->npages) {
180 errno = EINVAL;
181 return (NULL);
182 }
183
184 #ifdef STATISTICS
185 ++mp->pageget;
186 #endif
187
188 /* Check for a page that is cached. */
189 if ((bp = mpool_look(mp, pgno)) != NULL) {
190 #ifdef DEBUG
191 if (bp->flags & MPOOL_PINNED) {
192 (void)fprintf(stderr,
193 "mpool_get: page %d already pinned\n", bp->pgno);
194 abort();
195 }
196 #endif
197 /*
198 * Move the page to the head of the hash chain and the tail
199 * of the lru chain.
200 */
201 head = &mp->hqh[HASHKEY(bp->pgno)];
202 CIRCLEQ_REMOVE(head, bp, hq);
203 CIRCLEQ_INSERT_HEAD(head, bp, hq);
204 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
205 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
206
207 /* Return a pinned page. */
208 bp->flags |= MPOOL_PINNED;
209 return (bp->page);
210 }
211
212 /* Get a page from the cache. */
213 if ((bp = mpool_bkt(mp)) == NULL)
214 return (NULL);
215
216 /* Read in the contents. */
217 #ifdef STATISTICS
218 ++mp->pageread;
219 #endif
220 off = mp->pagesize * pgno;
221 if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
222 if (nr >= 0)
223 errno = EFTYPE;
224 return (NULL);
225 }
226
227 /* Set the page number, pin the page. */
228 bp->pgno = pgno;
229 bp->flags = MPOOL_PINNED;
230
231 /*
232 * Add the page to the head of the hash chain and the tail
233 * of the lru chain.
234 */
235 head = &mp->hqh[HASHKEY(bp->pgno)];
236 CIRCLEQ_INSERT_HEAD(head, bp, hq);
237 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
238
239 /* Run through the user's filter. */
240 if (mp->pgin != NULL)
241 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
242
243 return (bp->page);
244 }
245
246 /*
247 * mpool_put
248 * Return a page.
249 */
250 /*ARGSUSED*/
251 int
252 mpool_put(mp, page, flags)
253 MPOOL *mp;
254 void *page;
255 u_int flags;
256 {
257 BKT *bp;
258
259 #ifdef STATISTICS
260 ++mp->pageput;
261 #endif
262 bp = (BKT *)(void *)((char *)page - sizeof(BKT));
263 #ifdef DEBUG
264 if (!(bp->flags & MPOOL_PINNED)) {
265 (void)fprintf(stderr,
266 "mpool_put: page %d not pinned\n", bp->pgno);
267 abort();
268 }
269 #endif
270 bp->flags &= ~MPOOL_PINNED;
271 bp->flags |= flags & MPOOL_DIRTY;
272 return (RET_SUCCESS);
273 }
274
275 /*
276 * mpool_close
277 * Close the buffer pool.
278 */
279 int
280 mpool_close(mp)
281 MPOOL *mp;
282 {
283 BKT *bp;
284
285 /* Free up any space allocated to the lru pages. */
286 while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
287 CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
288 free(bp);
289 }
290
291 /* Free the MPOOL cookie. */
292 free(mp);
293 return (RET_SUCCESS);
294 }
295
296 /*
297 * mpool_sync
298 * Sync the pool to disk.
299 */
300 int
301 mpool_sync(mp)
302 MPOOL *mp;
303 {
304 BKT *bp;
305
306 /* Walk the lru chain, flushing any dirty pages to disk. */
307 for (bp = mp->lqh.cqh_first;
308 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
309 if (bp->flags & MPOOL_DIRTY &&
310 mpool_write(mp, bp) == RET_ERROR)
311 return (RET_ERROR);
312
313 /* Sync the file descriptor. */
314 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
315 }
316
317 /*
318 * mpool_bkt
319 * Get a page from the cache (or create one).
320 */
321 static BKT *
322 mpool_bkt(mp)
323 MPOOL *mp;
324 {
325 struct _hqh *head;
326 BKT *bp;
327
328 /* If under the max cached, always create a new page. */
329 if (mp->curcache < mp->maxcache)
330 goto new;
331
332 /*
333 * If the cache is max'd out, walk the lru list for a buffer we
334 * can flush. If we find one, write it (if necessary) and take it
335 * off any lists. If we don't find anything we grow the cache anyway.
336 * The cache never shrinks.
337 */
338 for (bp = mp->lqh.cqh_first;
339 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
340 if (!(bp->flags & MPOOL_PINNED)) {
341 /* Flush if dirty. */
342 if (bp->flags & MPOOL_DIRTY &&
343 mpool_write(mp, bp) == RET_ERROR)
344 return (NULL);
345 #ifdef STATISTICS
346 ++mp->pageflush;
347 #endif
348 /* Remove from the hash and lru queues. */
349 head = &mp->hqh[HASHKEY(bp->pgno)];
350 CIRCLEQ_REMOVE(head, bp, hq);
351 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
352 #ifdef DEBUG
353 { void *spage;
354 spage = bp->page;
355 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
356 bp->page = spage;
357 }
358 #endif
359 return (bp);
360 }
361
362 new: if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
363 return (NULL);
364 #ifdef STATISTICS
365 ++mp->pagealloc;
366 #endif
367 #if defined(DEBUG) || defined(PURIFY)
368 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
369 #endif
370 bp->page = (char *)(void *)bp + sizeof(BKT);
371 ++mp->curcache;
372 return (bp);
373 }
374
375 /*
376 * mpool_write
377 * Write a page to disk.
378 */
379 static int
380 mpool_write(mp, bp)
381 MPOOL *mp;
382 BKT *bp;
383 {
384 off_t off;
385
386 #ifdef STATISTICS
387 ++mp->pagewrite;
388 #endif
389
390 /* Run through the user's filter. */
391 if (mp->pgout)
392 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
393
394 off = mp->pagesize * bp->pgno;
395 if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
396 return (RET_ERROR);
397
398 /*
399 * Re-run through the input filter since this page may soon be
400 * accessed via the cache, and whatever the user's output filter
401 * did may screw things up if we don't let the input filter
402 * restore the in-core copy.
403 */
404 if (mp->pgin)
405 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
406
407 bp->flags &= ~MPOOL_DIRTY;
408 return (RET_SUCCESS);
409 }
410
411 /*
412 * mpool_look
413 * Lookup a page in the cache.
414 */
415 static BKT *
416 mpool_look(mp, pgno)
417 MPOOL *mp;
418 pgno_t pgno;
419 {
420 struct _hqh *head;
421 BKT *bp;
422
423 head = &mp->hqh[HASHKEY(pgno)];
424 for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
425 if (bp->pgno == pgno) {
426 #ifdef STATISTICS
427 ++mp->cachehit;
428 #endif
429 return (bp);
430 }
431 #ifdef STATISTICS
432 ++mp->cachemiss;
433 #endif
434 return (NULL);
435 }
436
437 #ifdef STATISTICS
438 /*
439 * mpool_stat
440 * Print out cache statistics.
441 */
442 void
443 mpool_stat(mp)
444 MPOOL *mp;
445 {
446 BKT *bp;
447 int cnt;
448 char *sep;
449
450 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
451 (void)fprintf(stderr,
452 "page size %lu, cacheing %lu pages of %lu page max cache\n",
453 mp->pagesize, mp->curcache, mp->maxcache);
454 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
455 mp->pageput, mp->pageget, mp->pagenew);
456 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
457 mp->pagealloc, mp->pageflush);
458 if (mp->cachehit + mp->cachemiss)
459 (void)fprintf(stderr,
460 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
461 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
462 * 100, mp->cachehit, mp->cachemiss);
463 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
464 mp->pageread, mp->pagewrite);
465
466 sep = "";
467 cnt = 0;
468 for (bp = mp->lqh.cqh_first;
469 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
470 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
471 if (bp->flags & MPOOL_DIRTY)
472 (void)fprintf(stderr, "d");
473 if (bp->flags & MPOOL_PINNED)
474 (void)fprintf(stderr, "P");
475 if (++cnt == 10) {
476 sep = "\n";
477 cnt = 0;
478 } else
479 sep = ", ";
480
481 }
482 (void)fprintf(stderr, "\n");
483 }
484 #endif
485