mpool.c revision 1.15 1 /* $NetBSD: mpool.c,v 1.15 2006/01/24 17:37:05 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 #if 0
35 static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94";
36 #else
37 __RCSID("$NetBSD: mpool.c,v 1.15 2006/01/24 17:37:05 christos Exp $");
38 #endif
39 #endif /* LIBC_SCCS and not lint */
40
41 #include "namespace.h"
42 #include <sys/queue.h>
43 #include <sys/stat.h>
44
45 #include <errno.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50
51 #include <db.h>
52
53 #define __MPOOLINTERFACE_PRIVATE
54 #include <mpool.h>
55
56 #ifdef __weak_alias
57 __weak_alias(mpool_close,_mpool_close)
58 __weak_alias(mpool_filter,_mpool_filter)
59 __weak_alias(mpool_get,_mpool_get)
60 __weak_alias(mpool_new,_mpool_new)
61 __weak_alias(mpool_open,_mpool_open)
62 __weak_alias(mpool_put,_mpool_put)
63 __weak_alias(mpool_sync,_mpool_sync)
64 #endif
65
66 static BKT *mpool_bkt __P((MPOOL *));
67 static BKT *mpool_look __P((MPOOL *, pgno_t));
68 static int mpool_write __P((MPOOL *, BKT *));
69
70 /*
71 * mpool_open --
72 * Initialize a memory pool.
73 */
74 /*ARGSUSED*/
75 MPOOL *
76 mpool_open(key, fd, pagesize, maxcache)
77 void *key;
78 int fd;
79 pgno_t pagesize, maxcache;
80 {
81 struct stat sb;
82 MPOOL *mp;
83 int entry;
84
85 /*
86 * Get information about the file.
87 *
88 * XXX
89 * We don't currently handle pipes, although we should.
90 */
91 if (fstat(fd, &sb))
92 return (NULL);
93 if (!S_ISREG(sb.st_mode)) {
94 errno = ESPIPE;
95 return (NULL);
96 }
97
98 /* Allocate and initialize the MPOOL cookie. */
99 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
100 return (NULL);
101 CIRCLEQ_INIT(&mp->lqh);
102 for (entry = 0; entry < HASHSIZE; ++entry)
103 CIRCLEQ_INIT(&mp->hqh[entry]);
104 mp->maxcache = maxcache;
105 mp->npages = (pgno_t)(sb.st_size / pagesize);
106 mp->pagesize = pagesize;
107 mp->fd = fd;
108 return (mp);
109 }
110
111 /*
112 * mpool_filter --
113 * Initialize input/output filters.
114 */
115 void
116 mpool_filter(mp, pgin, pgout, pgcookie)
117 MPOOL *mp;
118 void (*pgin) __P((void *, pgno_t, void *));
119 void (*pgout) __P((void *, pgno_t, void *));
120 void *pgcookie;
121 {
122 mp->pgin = pgin;
123 mp->pgout = pgout;
124 mp->pgcookie = pgcookie;
125 }
126
127 /*
128 * mpool_new --
129 * Get a new page of memory.
130 */
131 void *
132 mpool_new(mp, pgnoaddr)
133 MPOOL *mp;
134 pgno_t *pgnoaddr;
135 {
136 struct _hqh *head;
137 BKT *bp;
138
139 if (mp->npages == MAX_PAGE_NUMBER) {
140 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
141 abort();
142 }
143 #ifdef STATISTICS
144 ++mp->pagenew;
145 #endif
146 /*
147 * Get a BKT from the cache. Assign a new page number, attach
148 * it to the head of the hash chain, the tail of the lru chain,
149 * and return.
150 */
151 if ((bp = mpool_bkt(mp)) == NULL)
152 return (NULL);
153 *pgnoaddr = bp->pgno = mp->npages++;
154 bp->flags = MPOOL_PINNED;
155
156 head = &mp->hqh[HASHKEY(bp->pgno)];
157 CIRCLEQ_INSERT_HEAD(head, bp, hq);
158 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
159 return (bp->page);
160 }
161
162 /*
163 * mpool_get
164 * Get a page.
165 */
166 /*ARGSUSED*/
167 void *
168 mpool_get(mp, pgno, flags)
169 MPOOL *mp;
170 pgno_t pgno;
171 u_int flags; /* XXX not used? */
172 {
173 struct _hqh *head;
174 BKT *bp;
175 off_t off;
176 int nr;
177
178 /* Check for attempt to retrieve a non-existent page. */
179 if (pgno >= mp->npages) {
180 errno = EINVAL;
181 return (NULL);
182 }
183
184 #ifdef STATISTICS
185 ++mp->pageget;
186 #endif
187
188 /* Check for a page that is cached. */
189 if ((bp = mpool_look(mp, pgno)) != NULL) {
190 #ifdef DEBUG
191 if (bp->flags & MPOOL_PINNED) {
192 (void)fprintf(stderr,
193 "mpool_get: page %d already pinned\n", bp->pgno);
194 abort();
195 }
196 #endif
197 /*
198 * Move the page to the head of the hash chain and the tail
199 * of the lru chain.
200 */
201 head = &mp->hqh[HASHKEY(bp->pgno)];
202 CIRCLEQ_REMOVE(head, bp, hq);
203 CIRCLEQ_INSERT_HEAD(head, bp, hq);
204 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
205 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
206
207 /* Return a pinned page. */
208 bp->flags |= MPOOL_PINNED;
209 return (bp->page);
210 }
211
212 /* Get a page from the cache. */
213 if ((bp = mpool_bkt(mp)) == NULL)
214 return (NULL);
215
216 /* Read in the contents. */
217 #ifdef STATISTICS
218 ++mp->pageread;
219 #endif
220 off = mp->pagesize * pgno;
221 if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
222 if (nr >= 0)
223 errno = EFTYPE;
224 return (NULL);
225 }
226
227 /* Set the page number, pin the page. */
228 bp->pgno = pgno;
229 bp->flags = MPOOL_PINNED;
230
231 /*
232 * Add the page to the head of the hash chain and the tail
233 * of the lru chain.
234 */
235 head = &mp->hqh[HASHKEY(bp->pgno)];
236 CIRCLEQ_INSERT_HEAD(head, bp, hq);
237 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
238
239 /* Run through the user's filter. */
240 if (mp->pgin != NULL)
241 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
242
243 return (bp->page);
244 }
245
246 /*
247 * mpool_put
248 * Return a page.
249 */
250 /*ARGSUSED*/
251 int
252 mpool_put(mp, page, flags)
253 MPOOL *mp;
254 void *page;
255 u_int flags;
256 {
257 BKT *bp;
258
259 #ifdef STATISTICS
260 ++mp->pageput;
261 #endif
262 bp = (BKT *)(void *)((char *)page - sizeof(BKT));
263 #ifdef DEBUG
264 if (!(bp->flags & MPOOL_PINNED)) {
265 (void)fprintf(stderr,
266 "mpool_put: page %d not pinned\n", bp->pgno);
267 abort();
268 }
269 #endif
270 bp->flags &= ~MPOOL_PINNED;
271 bp->flags |= flags & MPOOL_DIRTY;
272 return (RET_SUCCESS);
273 }
274
275 /*
276 * mpool_close
277 * Close the buffer pool.
278 */
279 int
280 mpool_close(mp)
281 MPOOL *mp;
282 {
283 BKT *bp;
284
285 /* Free up any space allocated to the lru pages. */
286 while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
287 CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
288 free(bp);
289 }
290
291 /* Free the MPOOL cookie. */
292 free(mp);
293 return (RET_SUCCESS);
294 }
295
296 /*
297 * mpool_sync
298 * Sync the pool to disk.
299 */
300 int
301 mpool_sync(mp)
302 MPOOL *mp;
303 {
304 BKT *bp;
305
306 /* Walk the lru chain, flushing any dirty pages to disk. */
307 for (bp = mp->lqh.cqh_first;
308 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
309 if (bp->flags & MPOOL_DIRTY &&
310 mpool_write(mp, bp) == RET_ERROR)
311 return (RET_ERROR);
312
313 /* Sync the file descriptor. */
314 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
315 }
316
317 /*
318 * mpool_bkt
319 * Get a page from the cache (or create one).
320 */
321 static BKT *
322 mpool_bkt(mp)
323 MPOOL *mp;
324 {
325 struct _hqh *head;
326 BKT *bp;
327
328 /* If under the max cached, always create a new page. */
329 if (mp->curcache < mp->maxcache)
330 goto new;
331
332 /*
333 * If the cache is max'd out, walk the lru list for a buffer we
334 * can flush. If we find one, write it (if necessary) and take it
335 * off any lists. If we don't find anything we grow the cache anyway.
336 * The cache never shrinks.
337 */
338 for (bp = mp->lqh.cqh_first;
339 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
340 if (!(bp->flags & MPOOL_PINNED)) {
341 /* Flush if dirty. */
342 if (bp->flags & MPOOL_DIRTY &&
343 mpool_write(mp, bp) == RET_ERROR)
344 return (NULL);
345 #ifdef STATISTICS
346 ++mp->pageflush;
347 #endif
348 /* Remove from the hash and lru queues. */
349 head = &mp->hqh[HASHKEY(bp->pgno)];
350 CIRCLEQ_REMOVE(head, bp, hq);
351 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
352 #ifdef DEBUG
353 {
354 void *spage = bp->page;
355 (void)memset(bp, 0xff,
356 (size_t)(sizeof(BKT) + mp->pagesize));
357 bp->page = spage;
358 }
359 #endif
360 return (bp);
361 }
362
363 new: if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
364 return (NULL);
365 #ifdef STATISTICS
366 ++mp->pagealloc;
367 #endif
368 #if defined(DEBUG) || defined(PURIFY)
369 (void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
370 #endif
371 bp->page = (char *)(void *)bp + sizeof(BKT);
372 ++mp->curcache;
373 return (bp);
374 }
375
376 /*
377 * mpool_write
378 * Write a page to disk.
379 */
380 static int
381 mpool_write(mp, bp)
382 MPOOL *mp;
383 BKT *bp;
384 {
385 off_t off;
386
387 #ifdef STATISTICS
388 ++mp->pagewrite;
389 #endif
390
391 /* Run through the user's filter. */
392 if (mp->pgout)
393 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
394
395 off = mp->pagesize * bp->pgno;
396 if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
397 return (RET_ERROR);
398
399 /*
400 * Re-run through the input filter since this page may soon be
401 * accessed via the cache, and whatever the user's output filter
402 * did may screw things up if we don't let the input filter
403 * restore the in-core copy.
404 */
405 if (mp->pgin)
406 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
407
408 bp->flags &= ~MPOOL_DIRTY;
409 return (RET_SUCCESS);
410 }
411
412 /*
413 * mpool_look
414 * Lookup a page in the cache.
415 */
416 static BKT *
417 mpool_look(mp, pgno)
418 MPOOL *mp;
419 pgno_t pgno;
420 {
421 struct _hqh *head;
422 BKT *bp;
423
424 head = &mp->hqh[HASHKEY(pgno)];
425 for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
426 if (bp->pgno == pgno) {
427 #ifdef STATISTICS
428 ++mp->cachehit;
429 #endif
430 return (bp);
431 }
432 #ifdef STATISTICS
433 ++mp->cachemiss;
434 #endif
435 return (NULL);
436 }
437
438 #ifdef STATISTICS
439 /*
440 * mpool_stat
441 * Print out cache statistics.
442 */
443 void
444 mpool_stat(mp)
445 MPOOL *mp;
446 {
447 BKT *bp;
448 int cnt;
449 char *sep;
450
451 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
452 (void)fprintf(stderr,
453 "page size %lu, cacheing %lu pages of %lu page max cache\n",
454 mp->pagesize, mp->curcache, mp->maxcache);
455 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
456 mp->pageput, mp->pageget, mp->pagenew);
457 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
458 mp->pagealloc, mp->pageflush);
459 if (mp->cachehit + mp->cachemiss)
460 (void)fprintf(stderr,
461 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
462 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
463 * 100, mp->cachehit, mp->cachemiss);
464 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
465 mp->pageread, mp->pagewrite);
466
467 sep = "";
468 cnt = 0;
469 for (bp = mp->lqh.cqh_first;
470 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
471 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
472 if (bp->flags & MPOOL_DIRTY)
473 (void)fprintf(stderr, "d");
474 if (bp->flags & MPOOL_PINNED)
475 (void)fprintf(stderr, "P");
476 if (++cnt == 10) {
477 sep = "\n";
478 cnt = 0;
479 } else
480 sep = ", ";
481
482 }
483 (void)fprintf(stderr, "\n");
484 }
485 #endif
486