mpool.c revision 1.17 1 /* $NetBSD: mpool.c,v 1.17 2008/09/10 17:52:36 joerg Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: mpool.c,v 1.17 2008/09/10 17:52:36 joerg Exp $");
34
35 #include "namespace.h"
36 #include <sys/queue.h>
37 #include <sys/stat.h>
38
39 #include <errno.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44
45 #include <db.h>
46
47 #define __MPOOLINTERFACE_PRIVATE
48 #include <mpool.h>
49
50 #ifdef __weak_alias
51 __weak_alias(mpool_close,_mpool_close)
52 __weak_alias(mpool_filter,_mpool_filter)
53 __weak_alias(mpool_get,_mpool_get)
54 __weak_alias(mpool_new,_mpool_new)
55 __weak_alias(mpool_open,_mpool_open)
56 __weak_alias(mpool_put,_mpool_put)
57 __weak_alias(mpool_sync,_mpool_sync)
58 #endif
59
60 static BKT *mpool_bkt(MPOOL *);
61 static BKT *mpool_look(MPOOL *, pgno_t);
62 static int mpool_write(MPOOL *, BKT *);
63
64 /*
65 * mpool_open --
66 * Initialize a memory pool.
67 */
68 /*ARGSUSED*/
69 MPOOL *
70 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
71 {
72 struct stat sb;
73 MPOOL *mp;
74 int entry;
75
76 /*
77 * Get information about the file.
78 *
79 * XXX
80 * We don't currently handle pipes, although we should.
81 */
82 if (fstat(fd, &sb))
83 return (NULL);
84 if (!S_ISREG(sb.st_mode)) {
85 errno = ESPIPE;
86 return (NULL);
87 }
88
89 /* Allocate and initialize the MPOOL cookie. */
90 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
91 return (NULL);
92 CIRCLEQ_INIT(&mp->lqh);
93 for (entry = 0; entry < HASHSIZE; ++entry)
94 CIRCLEQ_INIT(&mp->hqh[entry]);
95 mp->maxcache = maxcache;
96 mp->npages = (pgno_t)(sb.st_size / pagesize);
97 mp->pagesize = pagesize;
98 mp->fd = fd;
99 return (mp);
100 }
101
102 /*
103 * mpool_filter --
104 * Initialize input/output filters.
105 */
106 void
107 mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
108 void (*pgout)(void *, pgno_t, void *), void *pgcookie)
109 {
110 mp->pgin = pgin;
111 mp->pgout = pgout;
112 mp->pgcookie = pgcookie;
113 }
114
115 /*
116 * mpool_new --
117 * Get a new page of memory.
118 */
119 void *
120 mpool_new( MPOOL *mp, pgno_t *pgnoaddr)
121 {
122 struct _hqh *head;
123 BKT *bp;
124
125 if (mp->npages == MAX_PAGE_NUMBER) {
126 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
127 abort();
128 }
129 #ifdef STATISTICS
130 ++mp->pagenew;
131 #endif
132 /*
133 * Get a BKT from the cache. Assign a new page number, attach
134 * it to the head of the hash chain, the tail of the lru chain,
135 * and return.
136 */
137 if ((bp = mpool_bkt(mp)) == NULL)
138 return (NULL);
139 *pgnoaddr = bp->pgno = mp->npages++;
140 bp->flags = MPOOL_PINNED;
141
142 head = &mp->hqh[HASHKEY(bp->pgno)];
143 CIRCLEQ_INSERT_HEAD(head, bp, hq);
144 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
145 return (bp->page);
146 }
147
148 /*
149 * mpool_get
150 * Get a page.
151 */
152 /*ARGSUSED*/
153 void *
154 mpool_get(MPOOL *mp, pgno_t pgno, u_int flags)
155 {
156 struct _hqh *head;
157 BKT *bp;
158 off_t off;
159 ssize_t nr;
160
161 /* Check for attempt to retrieve a non-existent page. */
162 if (pgno >= mp->npages) {
163 errno = EINVAL;
164 return (NULL);
165 }
166
167 #ifdef STATISTICS
168 ++mp->pageget;
169 #endif
170
171 /* Check for a page that is cached. */
172 if ((bp = mpool_look(mp, pgno)) != NULL) {
173 #ifdef DEBUG
174 if (bp->flags & MPOOL_PINNED) {
175 (void)fprintf(stderr,
176 "mpool_get: page %d already pinned\n", bp->pgno);
177 abort();
178 }
179 #endif
180 /*
181 * Move the page to the head of the hash chain and the tail
182 * of the lru chain.
183 */
184 head = &mp->hqh[HASHKEY(bp->pgno)];
185 CIRCLEQ_REMOVE(head, bp, hq);
186 CIRCLEQ_INSERT_HEAD(head, bp, hq);
187 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
188 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
189
190 /* Return a pinned page. */
191 bp->flags |= MPOOL_PINNED;
192 return (bp->page);
193 }
194
195 /* Get a page from the cache. */
196 if ((bp = mpool_bkt(mp)) == NULL)
197 return (NULL);
198
199 /* Read in the contents. */
200 #ifdef STATISTICS
201 ++mp->pageread;
202 #endif
203 off = mp->pagesize * pgno;
204 if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
205 if (nr >= 0)
206 errno = EFTYPE;
207 return (NULL);
208 }
209
210 /* Set the page number, pin the page. */
211 bp->pgno = pgno;
212 bp->flags = MPOOL_PINNED;
213
214 /*
215 * Add the page to the head of the hash chain and the tail
216 * of the lru chain.
217 */
218 head = &mp->hqh[HASHKEY(bp->pgno)];
219 CIRCLEQ_INSERT_HEAD(head, bp, hq);
220 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
221
222 /* Run through the user's filter. */
223 if (mp->pgin != NULL)
224 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
225
226 return (bp->page);
227 }
228
229 /*
230 * mpool_put
231 * Return a page.
232 */
233 /*ARGSUSED*/
234 int
235 mpool_put(MPOOL *mp, void *page, u_int flags)
236 {
237 BKT *bp;
238
239 #ifdef STATISTICS
240 ++mp->pageput;
241 #endif
242 bp = (BKT *)(void *)((char *)page - sizeof(BKT));
243 #ifdef DEBUG
244 if (!(bp->flags & MPOOL_PINNED)) {
245 (void)fprintf(stderr,
246 "mpool_put: page %d not pinned\n", bp->pgno);
247 abort();
248 }
249 #endif
250 bp->flags &= ~MPOOL_PINNED;
251 bp->flags |= flags & MPOOL_DIRTY;
252 return (RET_SUCCESS);
253 }
254
255 /*
256 * mpool_close
257 * Close the buffer pool.
258 */
259 int
260 mpool_close(MPOOL *mp)
261 {
262 BKT *bp;
263
264 /* Free up any space allocated to the lru pages. */
265 while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
266 CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
267 free(bp);
268 }
269
270 /* Free the MPOOL cookie. */
271 free(mp);
272 return (RET_SUCCESS);
273 }
274
275 /*
276 * mpool_sync
277 * Sync the pool to disk.
278 */
279 int
280 mpool_sync(MPOOL *mp)
281 {
282 BKT *bp;
283
284 /* Walk the lru chain, flushing any dirty pages to disk. */
285 for (bp = mp->lqh.cqh_first;
286 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
287 if (bp->flags & MPOOL_DIRTY &&
288 mpool_write(mp, bp) == RET_ERROR)
289 return (RET_ERROR);
290
291 /* Sync the file descriptor. */
292 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
293 }
294
295 /*
296 * mpool_bkt
297 * Get a page from the cache (or create one).
298 */
299 static BKT *
300 mpool_bkt(MPOOL *mp)
301 {
302 struct _hqh *head;
303 BKT *bp;
304
305 /* If under the max cached, always create a new page. */
306 if (mp->curcache < mp->maxcache)
307 goto new;
308
309 /*
310 * If the cache is max'd out, walk the lru list for a buffer we
311 * can flush. If we find one, write it (if necessary) and take it
312 * off any lists. If we don't find anything we grow the cache anyway.
313 * The cache never shrinks.
314 */
315 for (bp = mp->lqh.cqh_first;
316 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
317 if (!(bp->flags & MPOOL_PINNED)) {
318 /* Flush if dirty. */
319 if (bp->flags & MPOOL_DIRTY &&
320 mpool_write(mp, bp) == RET_ERROR)
321 return (NULL);
322 #ifdef STATISTICS
323 ++mp->pageflush;
324 #endif
325 /* Remove from the hash and lru queues. */
326 head = &mp->hqh[HASHKEY(bp->pgno)];
327 CIRCLEQ_REMOVE(head, bp, hq);
328 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
329 #ifdef DEBUG
330 {
331 void *spage = bp->page;
332 (void)memset(bp, 0xff,
333 (size_t)(sizeof(BKT) + mp->pagesize));
334 bp->page = spage;
335 }
336 #endif
337 return (bp);
338 }
339
340 new: if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
341 return (NULL);
342 #ifdef STATISTICS
343 ++mp->pagealloc;
344 #endif
345 #if defined(DEBUG) || defined(PURIFY)
346 (void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
347 #endif
348 bp->page = (char *)(void *)bp + sizeof(BKT);
349 ++mp->curcache;
350 return (bp);
351 }
352
353 /*
354 * mpool_write
355 * Write a page to disk.
356 */
357 static int
358 mpool_write(MPOOL *mp, BKT *bp)
359 {
360 off_t off;
361
362 #ifdef STATISTICS
363 ++mp->pagewrite;
364 #endif
365
366 /* Run through the user's filter. */
367 if (mp->pgout)
368 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
369
370 off = mp->pagesize * bp->pgno;
371 if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
372 return (RET_ERROR);
373
374 /*
375 * Re-run through the input filter since this page may soon be
376 * accessed via the cache, and whatever the user's output filter
377 * did may screw things up if we don't let the input filter
378 * restore the in-core copy.
379 */
380 if (mp->pgin)
381 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
382
383 bp->flags &= ~MPOOL_DIRTY;
384 return (RET_SUCCESS);
385 }
386
387 /*
388 * mpool_look
389 * Lookup a page in the cache.
390 */
391 static BKT *
392 mpool_look(MPOOL *mp, pgno_t pgno)
393 {
394 struct _hqh *head;
395 BKT *bp;
396
397 head = &mp->hqh[HASHKEY(pgno)];
398 for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
399 if (bp->pgno == pgno) {
400 #ifdef STATISTICS
401 ++mp->cachehit;
402 #endif
403 return (bp);
404 }
405 #ifdef STATISTICS
406 ++mp->cachemiss;
407 #endif
408 return (NULL);
409 }
410
411 #ifdef STATISTICS
412 /*
413 * mpool_stat
414 * Print out cache statistics.
415 */
416 void
417 mpool_stat(mp)
418 MPOOL *mp;
419 {
420 BKT *bp;
421 int cnt;
422 const char *sep;
423
424 (void)fprintf(stderr, "%lu pages in the file\n", (u_long)mp->npages);
425 (void)fprintf(stderr,
426 "page size %lu, cacheing %lu pages of %lu page max cache\n",
427 (u_long)mp->pagesize, (u_long)mp->curcache, (u_long)mp->maxcache);
428 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
429 mp->pageput, mp->pageget, mp->pagenew);
430 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
431 mp->pagealloc, mp->pageflush);
432 if (mp->cachehit + mp->cachemiss)
433 (void)fprintf(stderr,
434 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
435 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
436 * 100, mp->cachehit, mp->cachemiss);
437 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
438 mp->pageread, mp->pagewrite);
439
440 sep = "";
441 cnt = 0;
442 for (bp = mp->lqh.cqh_first;
443 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
444 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
445 if (bp->flags & MPOOL_DIRTY)
446 (void)fprintf(stderr, "d");
447 if (bp->flags & MPOOL_PINNED)
448 (void)fprintf(stderr, "P");
449 if (++cnt == 10) {
450 sep = "\n";
451 cnt = 0;
452 } else
453 sep = ", ";
454
455 }
456 (void)fprintf(stderr, "\n");
457 }
458 #endif
459