mpool.c revision 1.22 1 /* $NetBSD: mpool.c,v 1.22 2016/09/24 20:11:12 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: mpool.c,v 1.22 2016/09/24 20:11:12 christos Exp $");
38
39 #include "namespace.h"
40 #include <sys/queue.h>
41 #include <sys/stat.h>
42
43 #include <errno.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48
49 #include <db.h>
50
51 #define __MPOOLINTERFACE_PRIVATE
52 #include <mpool.h>
53
54 #ifdef __weak_alias
55 __weak_alias(mpool_close,_mpool_close)
56 __weak_alias(mpool_filter,_mpool_filter)
57 __weak_alias(mpool_get,_mpool_get)
58 __weak_alias(mpool_getf,_mpool_getf)
59 __weak_alias(mpool_new,_mpool_new)
60 __weak_alias(mpool_newf,_mpool_newf)
61 __weak_alias(mpool_open,_mpool_open)
62 __weak_alias(mpool_put,_mpool_put)
63 __weak_alias(mpool_sync,_mpool_sync)
64 #endif
65
66 static BKT *mpool_bkt(MPOOL *);
67 static BKT *mpool_look(MPOOL *, pgno_t);
68 static int mpool_write(MPOOL *, BKT *);
69
70 /*
71 * mpool_open --
72 * Initialize a memory pool.
73 */
74 /*ARGSUSED*/
75 MPOOL *
76 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
77 {
78 struct stat sb;
79 MPOOL *mp;
80 int entry;
81
82 /*
83 * Get information about the file.
84 *
85 * XXX
86 * We don't currently handle pipes, although we should.
87 */
88 if (fstat(fd, &sb))
89 return NULL;
90 if (!S_ISREG(sb.st_mode)) {
91 errno = ESPIPE;
92 return NULL;
93 }
94
95 /* Allocate and initialize the MPOOL cookie. */
96 if ((mp = calloc(1, sizeof(*mp))) == NULL)
97 return (NULL);
98 TAILQ_INIT(&mp->lqh);
99 for (entry = 0; entry < HASHSIZE; ++entry)
100 TAILQ_INIT(&mp->hqh[entry]);
101 mp->maxcache = maxcache;
102 mp->npages = (pgno_t)(sb.st_size / pagesize);
103 mp->pagesize = pagesize;
104 mp->fd = fd;
105 return mp;
106 }
107
108 /*
109 * mpool_filter --
110 * Initialize input/output filters.
111 */
112 void
113 mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
114 void (*pgout)(void *, pgno_t, void *), void *pgcookie)
115 {
116 mp->pgin = pgin;
117 mp->pgout = pgout;
118 mp->pgcookie = pgcookie;
119 }
120
121 /*
122 * mpool_new --
123 * Get a new page of memory.
124 */
125 void *
126 mpool_newf(MPOOL *mp, pgno_t *pgnoaddr, unsigned int flags)
127 {
128 struct _hqh *head;
129 BKT *bp;
130
131 if (mp->npages == MAX_PAGE_NUMBER) {
132 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
133 abort();
134 }
135 #ifdef STATISTICS
136 ++mp->pagenew;
137 #endif
138 /*
139 * Get a BKT from the cache. Assign a new page number, attach
140 * it to the head of the hash chain, the tail of the lru chain,
141 * and return.
142 */
143 if ((bp = mpool_bkt(mp)) == NULL)
144 return NULL;
145
146 if (flags == MPOOL_PAGE_REQUEST) {
147 mp->npages++;
148 bp->pgno = *pgnoaddr;
149 } else
150 bp->pgno = *pgnoaddr = mp->npages++;
151
152 bp->flags = MPOOL_PINNED | MPOOL_INUSE;
153
154 head = &mp->hqh[HASHKEY(bp->pgno)];
155 TAILQ_INSERT_HEAD(head, bp, hq);
156 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
157 return bp->page;
158 }
159
160 void *
161 mpool_new(MPOOL *mp, pgno_t *pgnoaddr)
162 {
163 return mpool_newf(mp, pgnoaddr, 0);
164 }
165
166 int
167 mpool_delete(MPOOL *mp, void *page)
168 {
169 struct _hqh *head;
170 BKT *bp;
171
172 bp = (void *)((char *)page - sizeof(BKT));
173
174 #ifdef DEBUG
175 if (!(bp->flags & MPOOL_PINNED)) {
176 (void)fprintf(stderr,
177 "%s: page %d not pinned\n", __func__, bp->pgno);
178 abort();
179 }
180 #endif
181
182 /* Remove from the hash and lru queues. */
183 head = &mp->hqh[HASHKEY(bp->pgno)];
184 TAILQ_REMOVE(head, bp, hq);
185 TAILQ_REMOVE(&mp->lqh, bp, q);
186
187 free(bp);
188 return RET_SUCCESS;
189 }
190
191 /*
192 * mpool_get
193 * Get a page.
194 */
195 /*ARGSUSED*/
196 void *
197 mpool_getf(MPOOL *mp, pgno_t pgno, unsigned int flags)
198 {
199 struct _hqh *head;
200 BKT *bp;
201 off_t off;
202 ssize_t nr;
203
204 /* Check for attempt to retrieve a non-existent page. */
205 if (pgno >= mp->npages) {
206 errno = EINVAL;
207 return NULL;
208 }
209
210 #ifdef STATISTICS
211 ++mp->pageget;
212 #endif
213
214 /* Check for a page that is cached. */
215 if ((bp = mpool_look(mp, pgno)) != NULL) {
216 #ifdef DEBUG
217 if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
218 (void)fprintf(stderr,
219 "mpool_get: page %d already pinned\n", bp->pgno);
220 abort();
221 }
222 #endif
223 /*
224 * Move the page to the head of the hash chain and the tail
225 * of the lru chain.
226 */
227 head = &mp->hqh[HASHKEY(bp->pgno)];
228 TAILQ_REMOVE(head, bp, hq);
229 TAILQ_INSERT_HEAD(head, bp, hq);
230 TAILQ_REMOVE(&mp->lqh, bp, q);
231 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
232
233 /* Return a pinned page. */
234 if (!(flags & MPOOL_IGNOREPIN))
235 bp->flags |= MPOOL_PINNED;
236 return bp->page;
237 }
238
239 /* Get a page from the cache. */
240 if ((bp = mpool_bkt(mp)) == NULL)
241 return NULL;
242
243 /* Read in the contents. */
244 #ifdef STATISTICS
245 ++mp->pageread;
246 #endif
247 off = mp->pagesize * pgno;
248 if (off / mp->pagesize != pgno) {
249 /* Run past the end of the file, or at least the part we
250 can address without large-file support? */
251 errno = E2BIG;
252 return NULL;
253 }
254
255 if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
256 if (nr > 0) {
257 errno = EFTYPE;
258 return NULL;
259 } else if (nr == 0) {
260 /*
261 * A zero-length reads, means you need to create a
262 * new page.
263 */
264 memset(bp->page, 0, mp->pagesize);
265 } else
266 return NULL;
267 }
268
269 /* Set the page number, pin the page. */
270 bp->pgno = pgno;
271 if (!(flags & MPOOL_IGNOREPIN))
272 bp->flags = MPOOL_PINNED;
273 bp->flags |= MPOOL_INUSE;
274
275 /*
276 * Add the page to the head of the hash chain and the tail
277 * of the lru chain.
278 */
279 head = &mp->hqh[HASHKEY(bp->pgno)];
280 TAILQ_INSERT_HEAD(head, bp, hq);
281 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
282
283 /* Run through the user's filter. */
284 if (mp->pgin != NULL)
285 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
286
287 return bp->page;
288 }
289
290 void *
291 mpool_get(MPOOL *mp, pgno_t pgno)
292 {
293 return mpool_getf(mp, pgno, 0);
294 }
295
296 /*
297 * mpool_put
298 * Return a page.
299 */
300 /*ARGSUSED*/
301 int
302 mpool_put(MPOOL *mp, void *page, u_int flags)
303 {
304 BKT *bp;
305
306 #ifdef STATISTICS
307 ++mp->pageput;
308 #endif
309 bp = (void *)((intptr_t)page - sizeof(BKT));
310 #ifdef DEBUG
311 if (!(bp->flags & MPOOL_PINNED)) {
312 (void)fprintf(stderr,
313 "mpool_put: page %d not pinned\n", bp->pgno);
314 abort();
315 }
316 #endif
317 bp->flags &= ~MPOOL_PINNED;
318 if (flags & MPOOL_DIRTY)
319 bp->flags |= flags & MPOOL_DIRTY;
320 return (RET_SUCCESS);
321 }
322
323 /*
324 * mpool_close
325 * Close the buffer pool.
326 */
327 int
328 mpool_close(MPOOL *mp)
329 {
330 BKT *bp;
331
332 /* Free up any space allocated to the lru pages. */
333 while (!TAILQ_EMPTY(&mp->lqh)) {
334 bp = TAILQ_FIRST(&mp->lqh);
335 TAILQ_REMOVE(&mp->lqh, bp, q);
336 free(bp);
337 }
338
339 /* Free the MPOOL cookie. */
340 free(mp);
341 return RET_SUCCESS;
342 }
343
344 /*
345 * mpool_sync
346 * Sync the pool to disk.
347 */
348 int
349 mpool_sync(MPOOL *mp)
350 {
351 BKT *bp;
352
353 /* Walk the lru chain, flushing any dirty pages to disk. */
354 TAILQ_FOREACH(bp, &mp->lqh, q)
355 if (bp->flags & MPOOL_DIRTY &&
356 mpool_write(mp, bp) == RET_ERROR)
357 return RET_ERROR;
358
359 /* Sync the file descriptor. */
360 return fsync(mp->fd) ? RET_ERROR : RET_SUCCESS;
361 }
362
363 /*
364 * mpool_bkt
365 * Get a page from the cache (or create one).
366 */
367 static BKT *
368 mpool_bkt(MPOOL *mp)
369 {
370 struct _hqh *head;
371 BKT *bp;
372
373 /* If under the max cached, always create a new page. */
374 if (mp->curcache < mp->maxcache)
375 goto new;
376
377 /*
378 * If the cache is max'd out, walk the lru list for a buffer we
379 * can flush. If we find one, write it (if necessary) and take it
380 * off any lists. If we don't find anything we grow the cache anyway.
381 * The cache never shrinks.
382 */
383 TAILQ_FOREACH(bp, &mp->lqh, q)
384 if (!(bp->flags & MPOOL_PINNED)) {
385 /* Flush if dirty. */
386 if (bp->flags & MPOOL_DIRTY &&
387 mpool_write(mp, bp) == RET_ERROR)
388 return NULL;
389 #ifdef STATISTICS
390 ++mp->pageflush;
391 #endif
392 /* Remove from the hash and lru queues. */
393 head = &mp->hqh[HASHKEY(bp->pgno)];
394 TAILQ_REMOVE(head, bp, hq);
395 TAILQ_REMOVE(&mp->lqh, bp, q);
396 #ifdef DEBUG
397 {
398 void *spage = bp->page;
399 (void)memset(bp, 0xff,
400 (size_t)(sizeof(BKT) + mp->pagesize));
401 bp->page = spage;
402 }
403 #endif
404 return bp;
405 }
406
407 new: if ((bp = calloc(1, (size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
408 return NULL;
409 #ifdef STATISTICS
410 ++mp->pagealloc;
411 #endif
412 #if defined(DEBUG) || defined(PURIFY)
413 (void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
414 #endif
415 bp->page = (void *)((intptr_t)bp + sizeof(BKT));
416 ++mp->curcache;
417 return bp;
418 }
419
420 /*
421 * mpool_write
422 * Write a page to disk.
423 */
424 static int
425 mpool_write(MPOOL *mp, BKT *bp)
426 {
427 off_t off;
428
429 #ifdef STATISTICS
430 ++mp->pagewrite;
431 #endif
432
433 /* Run through the user's filter. */
434 if (mp->pgout)
435 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
436
437 off = mp->pagesize * bp->pgno;
438 if (off / mp->pagesize != bp->pgno) {
439 /* Run past the end of the file, or at least the part we
440 can address without large-file support? */
441 errno = E2BIG;
442 return RET_ERROR;
443 }
444
445 if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) !=
446 (ssize_t)mp->pagesize)
447 return RET_ERROR;
448
449 /*
450 * Re-run through the input filter since this page may soon be
451 * accessed via the cache, and whatever the user's output filter
452 * did may screw things up if we don't let the input filter
453 * restore the in-core copy.
454 */
455 if (mp->pgin)
456 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
457
458 bp->flags &= ~MPOOL_DIRTY;
459 return RET_SUCCESS;
460 }
461
462 /*
463 * mpool_look
464 * Lookup a page in the cache.
465 */
466 static BKT *
467 mpool_look(MPOOL *mp, pgno_t pgno)
468 {
469 struct _hqh *head;
470 BKT *bp;
471
472 head = &mp->hqh[HASHKEY(pgno)];
473 TAILQ_FOREACH(bp, head, hq)
474 if (bp->pgno == pgno) {
475 #ifdef STATISTICS
476 ++mp->cachehit;
477 #endif
478 return bp;
479 }
480 #ifdef STATISTICS
481 ++mp->cachemiss;
482 #endif
483 return NULL;
484 }
485
486 #ifdef STATISTICS
487 /*
488 * mpool_stat
489 * Print out cache statistics.
490 */
491 void
492 mpool_stat(mp)
493 MPOOL *mp;
494 {
495 BKT *bp;
496 int cnt;
497 const char *sep;
498
499 (void)fprintf(stderr, "%lu pages in the file\n", (u_long)mp->npages);
500 (void)fprintf(stderr,
501 "page size %lu, cacheing %lu pages of %lu page max cache\n",
502 (u_long)mp->pagesize, (u_long)mp->curcache, (u_long)mp->maxcache);
503 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
504 mp->pageput, mp->pageget, mp->pagenew);
505 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
506 mp->pagealloc, mp->pageflush);
507 if (mp->cachehit + mp->cachemiss)
508 (void)fprintf(stderr,
509 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
510 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
511 * 100, mp->cachehit, mp->cachemiss);
512 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
513 mp->pageread, mp->pagewrite);
514
515 sep = "";
516 cnt = 0;
517 TAILQ_FOREACH(bp, &mp->lqh, q) {
518 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
519 if (bp->flags & MPOOL_DIRTY)
520 (void)fprintf(stderr, "d");
521 if (bp->flags & MPOOL_PINNED)
522 (void)fprintf(stderr, "P");
523 if (++cnt == 10) {
524 sep = "\n";
525 cnt = 0;
526 } else
527 sep = ", ";
528
529 }
530 (void)fprintf(stderr, "\n");
531 }
532 #endif
533