mpool.c revision 1.17 1 1.17 joerg /* $NetBSD: mpool.c,v 1.17 2008/09/10 17:52:36 joerg Exp $ */
2 1.5 cgd
3 1.1 cgd /*-
4 1.6 cgd * Copyright (c) 1990, 1993, 1994
5 1.1 cgd * The Regents of the University of California. All rights reserved.
6 1.1 cgd *
7 1.1 cgd * Redistribution and use in source and binary forms, with or without
8 1.1 cgd * modification, are permitted provided that the following conditions
9 1.1 cgd * are met:
10 1.1 cgd * 1. Redistributions of source code must retain the above copyright
11 1.1 cgd * notice, this list of conditions and the following disclaimer.
12 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 cgd * notice, this list of conditions and the following disclaimer in the
14 1.1 cgd * documentation and/or other materials provided with the distribution.
15 1.14 agc * 3. Neither the name of the University nor the names of its contributors
16 1.1 cgd * may be used to endorse or promote products derived from this software
17 1.1 cgd * without specific prior written permission.
18 1.1 cgd *
19 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.1 cgd * SUCH DAMAGE.
30 1.1 cgd */
31 1.1 cgd
32 1.7 christos #include <sys/cdefs.h>
33 1.17 joerg __RCSID("$NetBSD: mpool.c,v 1.17 2008/09/10 17:52:36 joerg Exp $");
34 1.1 cgd
35 1.8 jtc #include "namespace.h"
36 1.6 cgd #include <sys/queue.h>
37 1.1 cgd #include <sys/stat.h>
38 1.1 cgd
39 1.1 cgd #include <errno.h>
40 1.1 cgd #include <stdio.h>
41 1.1 cgd #include <stdlib.h>
42 1.1 cgd #include <string.h>
43 1.1 cgd #include <unistd.h>
44 1.1 cgd
45 1.1 cgd #include <db.h>
46 1.6 cgd
47 1.1 cgd #define __MPOOLINTERFACE_PRIVATE
48 1.6 cgd #include <mpool.h>
49 1.8 jtc
50 1.8 jtc #ifdef __weak_alias
51 1.12 mycroft __weak_alias(mpool_close,_mpool_close)
52 1.12 mycroft __weak_alias(mpool_filter,_mpool_filter)
53 1.12 mycroft __weak_alias(mpool_get,_mpool_get)
54 1.12 mycroft __weak_alias(mpool_new,_mpool_new)
55 1.12 mycroft __weak_alias(mpool_open,_mpool_open)
56 1.12 mycroft __weak_alias(mpool_put,_mpool_put)
57 1.12 mycroft __weak_alias(mpool_sync,_mpool_sync)
58 1.8 jtc #endif
59 1.1 cgd
60 1.16 christos static BKT *mpool_bkt(MPOOL *);
61 1.16 christos static BKT *mpool_look(MPOOL *, pgno_t);
62 1.16 christos static int mpool_write(MPOOL *, BKT *);
63 1.1 cgd
64 1.1 cgd /*
65 1.6 cgd * mpool_open --
66 1.6 cgd * Initialize a memory pool.
67 1.1 cgd */
68 1.10 christos /*ARGSUSED*/
69 1.1 cgd MPOOL *
70 1.16 christos mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
71 1.1 cgd {
72 1.1 cgd struct stat sb;
73 1.1 cgd MPOOL *mp;
74 1.1 cgd int entry;
75 1.1 cgd
76 1.6 cgd /*
77 1.6 cgd * Get information about the file.
78 1.6 cgd *
79 1.6 cgd * XXX
80 1.6 cgd * We don't currently handle pipes, although we should.
81 1.6 cgd */
82 1.1 cgd if (fstat(fd, &sb))
83 1.1 cgd return (NULL);
84 1.1 cgd if (!S_ISREG(sb.st_mode)) {
85 1.1 cgd errno = ESPIPE;
86 1.1 cgd return (NULL);
87 1.1 cgd }
88 1.1 cgd
89 1.6 cgd /* Allocate and initialize the MPOOL cookie. */
90 1.6 cgd if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
91 1.1 cgd return (NULL);
92 1.6 cgd CIRCLEQ_INIT(&mp->lqh);
93 1.1 cgd for (entry = 0; entry < HASHSIZE; ++entry)
94 1.6 cgd CIRCLEQ_INIT(&mp->hqh[entry]);
95 1.1 cgd mp->maxcache = maxcache;
96 1.10 christos mp->npages = (pgno_t)(sb.st_size / pagesize);
97 1.1 cgd mp->pagesize = pagesize;
98 1.1 cgd mp->fd = fd;
99 1.1 cgd return (mp);
100 1.1 cgd }
101 1.1 cgd
102 1.1 cgd /*
103 1.6 cgd * mpool_filter --
104 1.6 cgd * Initialize input/output filters.
105 1.1 cgd */
106 1.1 cgd void
107 1.16 christos mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
108 1.16 christos void (*pgout)(void *, pgno_t, void *), void *pgcookie)
109 1.1 cgd {
110 1.1 cgd mp->pgin = pgin;
111 1.1 cgd mp->pgout = pgout;
112 1.1 cgd mp->pgcookie = pgcookie;
113 1.1 cgd }
114 1.1 cgd
115 1.1 cgd /*
116 1.6 cgd * mpool_new --
117 1.6 cgd * Get a new page of memory.
118 1.1 cgd */
119 1.1 cgd void *
120 1.16 christos mpool_new( MPOOL *mp, pgno_t *pgnoaddr)
121 1.1 cgd {
122 1.6 cgd struct _hqh *head;
123 1.6 cgd BKT *bp;
124 1.1 cgd
125 1.6 cgd if (mp->npages == MAX_PAGE_NUMBER) {
126 1.6 cgd (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
127 1.6 cgd abort();
128 1.6 cgd }
129 1.1 cgd #ifdef STATISTICS
130 1.1 cgd ++mp->pagenew;
131 1.1 cgd #endif
132 1.1 cgd /*
133 1.6 cgd * Get a BKT from the cache. Assign a new page number, attach
134 1.6 cgd * it to the head of the hash chain, the tail of the lru chain,
135 1.6 cgd * and return.
136 1.1 cgd */
137 1.6 cgd if ((bp = mpool_bkt(mp)) == NULL)
138 1.1 cgd return (NULL);
139 1.6 cgd *pgnoaddr = bp->pgno = mp->npages++;
140 1.6 cgd bp->flags = MPOOL_PINNED;
141 1.6 cgd
142 1.6 cgd head = &mp->hqh[HASHKEY(bp->pgno)];
143 1.6 cgd CIRCLEQ_INSERT_HEAD(head, bp, hq);
144 1.6 cgd CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
145 1.6 cgd return (bp->page);
146 1.1 cgd }
147 1.1 cgd
148 1.1 cgd /*
149 1.6 cgd * mpool_get
150 1.6 cgd * Get a page.
151 1.1 cgd */
152 1.10 christos /*ARGSUSED*/
153 1.1 cgd void *
154 1.16 christos mpool_get(MPOOL *mp, pgno_t pgno, u_int flags)
155 1.1 cgd {
156 1.6 cgd struct _hqh *head;
157 1.6 cgd BKT *bp;
158 1.1 cgd off_t off;
159 1.16 christos ssize_t nr;
160 1.1 cgd
161 1.6 cgd /* Check for attempt to retrieve a non-existent page. */
162 1.6 cgd if (pgno >= mp->npages) {
163 1.6 cgd errno = EINVAL;
164 1.6 cgd return (NULL);
165 1.6 cgd }
166 1.6 cgd
167 1.1 cgd #ifdef STATISTICS
168 1.6 cgd ++mp->pageget;
169 1.1 cgd #endif
170 1.6 cgd
171 1.6 cgd /* Check for a page that is cached. */
172 1.6 cgd if ((bp = mpool_look(mp, pgno)) != NULL) {
173 1.1 cgd #ifdef DEBUG
174 1.6 cgd if (bp->flags & MPOOL_PINNED) {
175 1.6 cgd (void)fprintf(stderr,
176 1.6 cgd "mpool_get: page %d already pinned\n", bp->pgno);
177 1.6 cgd abort();
178 1.6 cgd }
179 1.6 cgd #endif
180 1.6 cgd /*
181 1.6 cgd * Move the page to the head of the hash chain and the tail
182 1.6 cgd * of the lru chain.
183 1.6 cgd */
184 1.6 cgd head = &mp->hqh[HASHKEY(bp->pgno)];
185 1.6 cgd CIRCLEQ_REMOVE(head, bp, hq);
186 1.6 cgd CIRCLEQ_INSERT_HEAD(head, bp, hq);
187 1.6 cgd CIRCLEQ_REMOVE(&mp->lqh, bp, q);
188 1.6 cgd CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
189 1.6 cgd
190 1.6 cgd /* Return a pinned page. */
191 1.6 cgd bp->flags |= MPOOL_PINNED;
192 1.6 cgd return (bp->page);
193 1.1 cgd }
194 1.1 cgd
195 1.1 cgd /* Get a page from the cache. */
196 1.6 cgd if ((bp = mpool_bkt(mp)) == NULL)
197 1.1 cgd return (NULL);
198 1.1 cgd
199 1.6 cgd /* Read in the contents. */
200 1.1 cgd #ifdef STATISTICS
201 1.1 cgd ++mp->pageread;
202 1.1 cgd #endif
203 1.1 cgd off = mp->pagesize * pgno;
204 1.10 christos if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
205 1.1 cgd if (nr >= 0)
206 1.1 cgd errno = EFTYPE;
207 1.1 cgd return (NULL);
208 1.1 cgd }
209 1.1 cgd
210 1.6 cgd /* Set the page number, pin the page. */
211 1.6 cgd bp->pgno = pgno;
212 1.6 cgd bp->flags = MPOOL_PINNED;
213 1.6 cgd
214 1.6 cgd /*
215 1.6 cgd * Add the page to the head of the hash chain and the tail
216 1.6 cgd * of the lru chain.
217 1.6 cgd */
218 1.6 cgd head = &mp->hqh[HASHKEY(bp->pgno)];
219 1.6 cgd CIRCLEQ_INSERT_HEAD(head, bp, hq);
220 1.6 cgd CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
221 1.6 cgd
222 1.6 cgd /* Run through the user's filter. */
223 1.6 cgd if (mp->pgin != NULL)
224 1.6 cgd (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
225 1.6 cgd
226 1.6 cgd return (bp->page);
227 1.1 cgd }
228 1.1 cgd
229 1.1 cgd /*
230 1.6 cgd * mpool_put
231 1.6 cgd * Return a page.
232 1.1 cgd */
233 1.10 christos /*ARGSUSED*/
234 1.1 cgd int
235 1.16 christos mpool_put(MPOOL *mp, void *page, u_int flags)
236 1.1 cgd {
237 1.6 cgd BKT *bp;
238 1.1 cgd
239 1.1 cgd #ifdef STATISTICS
240 1.1 cgd ++mp->pageput;
241 1.1 cgd #endif
242 1.10 christos bp = (BKT *)(void *)((char *)page - sizeof(BKT));
243 1.1 cgd #ifdef DEBUG
244 1.6 cgd if (!(bp->flags & MPOOL_PINNED)) {
245 1.6 cgd (void)fprintf(stderr,
246 1.6 cgd "mpool_put: page %d not pinned\n", bp->pgno);
247 1.6 cgd abort();
248 1.1 cgd }
249 1.1 cgd #endif
250 1.6 cgd bp->flags &= ~MPOOL_PINNED;
251 1.6 cgd bp->flags |= flags & MPOOL_DIRTY;
252 1.1 cgd return (RET_SUCCESS);
253 1.1 cgd }
254 1.1 cgd
255 1.1 cgd /*
256 1.6 cgd * mpool_close
257 1.6 cgd * Close the buffer pool.
258 1.1 cgd */
259 1.1 cgd int
260 1.16 christos mpool_close(MPOOL *mp)
261 1.1 cgd {
262 1.6 cgd BKT *bp;
263 1.1 cgd
264 1.1 cgd /* Free up any space allocated to the lru pages. */
265 1.6 cgd while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
266 1.6 cgd CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
267 1.6 cgd free(bp);
268 1.1 cgd }
269 1.6 cgd
270 1.6 cgd /* Free the MPOOL cookie. */
271 1.1 cgd free(mp);
272 1.1 cgd return (RET_SUCCESS);
273 1.1 cgd }
274 1.1 cgd
275 1.1 cgd /*
276 1.6 cgd * mpool_sync
277 1.6 cgd * Sync the pool to disk.
278 1.1 cgd */
279 1.1 cgd int
280 1.16 christos mpool_sync(MPOOL *mp)
281 1.1 cgd {
282 1.6 cgd BKT *bp;
283 1.1 cgd
284 1.6 cgd /* Walk the lru chain, flushing any dirty pages to disk. */
285 1.6 cgd for (bp = mp->lqh.cqh_first;
286 1.6 cgd bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
287 1.6 cgd if (bp->flags & MPOOL_DIRTY &&
288 1.6 cgd mpool_write(mp, bp) == RET_ERROR)
289 1.1 cgd return (RET_ERROR);
290 1.6 cgd
291 1.6 cgd /* Sync the file descriptor. */
292 1.1 cgd return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
293 1.1 cgd }
294 1.1 cgd
295 1.1 cgd /*
296 1.6 cgd * mpool_bkt
297 1.6 cgd * Get a page from the cache (or create one).
298 1.1 cgd */
299 1.1 cgd static BKT *
300 1.16 christos mpool_bkt(MPOOL *mp)
301 1.1 cgd {
302 1.6 cgd struct _hqh *head;
303 1.6 cgd BKT *bp;
304 1.1 cgd
305 1.6 cgd /* If under the max cached, always create a new page. */
306 1.1 cgd if (mp->curcache < mp->maxcache)
307 1.1 cgd goto new;
308 1.1 cgd
309 1.1 cgd /*
310 1.6 cgd * If the cache is max'd out, walk the lru list for a buffer we
311 1.6 cgd * can flush. If we find one, write it (if necessary) and take it
312 1.6 cgd * off any lists. If we don't find anything we grow the cache anyway.
313 1.1 cgd * The cache never shrinks.
314 1.1 cgd */
315 1.6 cgd for (bp = mp->lqh.cqh_first;
316 1.6 cgd bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
317 1.6 cgd if (!(bp->flags & MPOOL_PINNED)) {
318 1.6 cgd /* Flush if dirty. */
319 1.6 cgd if (bp->flags & MPOOL_DIRTY &&
320 1.6 cgd mpool_write(mp, bp) == RET_ERROR)
321 1.1 cgd return (NULL);
322 1.1 cgd #ifdef STATISTICS
323 1.1 cgd ++mp->pageflush;
324 1.1 cgd #endif
325 1.6 cgd /* Remove from the hash and lru queues. */
326 1.6 cgd head = &mp->hqh[HASHKEY(bp->pgno)];
327 1.6 cgd CIRCLEQ_REMOVE(head, bp, hq);
328 1.6 cgd CIRCLEQ_REMOVE(&mp->lqh, bp, q);
329 1.1 cgd #ifdef DEBUG
330 1.15 christos {
331 1.15 christos void *spage = bp->page;
332 1.15 christos (void)memset(bp, 0xff,
333 1.15 christos (size_t)(sizeof(BKT) + mp->pagesize));
334 1.6 cgd bp->page = spage;
335 1.1 cgd }
336 1.1 cgd #endif
337 1.6 cgd return (bp);
338 1.1 cgd }
339 1.1 cgd
340 1.10 christos new: if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
341 1.1 cgd return (NULL);
342 1.1 cgd #ifdef STATISTICS
343 1.1 cgd ++mp->pagealloc;
344 1.1 cgd #endif
345 1.6 cgd #if defined(DEBUG) || defined(PURIFY)
346 1.15 christos (void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
347 1.1 cgd #endif
348 1.10 christos bp->page = (char *)(void *)bp + sizeof(BKT);
349 1.1 cgd ++mp->curcache;
350 1.6 cgd return (bp);
351 1.1 cgd }
352 1.1 cgd
353 1.1 cgd /*
354 1.6 cgd * mpool_write
355 1.6 cgd * Write a page to disk.
356 1.1 cgd */
357 1.1 cgd static int
358 1.16 christos mpool_write(MPOOL *mp, BKT *bp)
359 1.1 cgd {
360 1.1 cgd off_t off;
361 1.1 cgd
362 1.1 cgd #ifdef STATISTICS
363 1.1 cgd ++mp->pagewrite;
364 1.1 cgd #endif
365 1.6 cgd
366 1.6 cgd /* Run through the user's filter. */
367 1.6 cgd if (mp->pgout)
368 1.6 cgd (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
369 1.6 cgd
370 1.6 cgd off = mp->pagesize * bp->pgno;
371 1.10 christos if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
372 1.1 cgd return (RET_ERROR);
373 1.11 scw
374 1.11 scw /*
375 1.11 scw * Re-run through the input filter since this page may soon be
376 1.11 scw * accessed via the cache, and whatever the user's output filter
377 1.11 scw * did may screw things up if we don't let the input filter
378 1.11 scw * restore the in-core copy.
379 1.11 scw */
380 1.11 scw if (mp->pgin)
381 1.11 scw (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
382 1.6 cgd
383 1.6 cgd bp->flags &= ~MPOOL_DIRTY;
384 1.1 cgd return (RET_SUCCESS);
385 1.1 cgd }
386 1.1 cgd
387 1.1 cgd /*
388 1.6 cgd * mpool_look
389 1.6 cgd * Lookup a page in the cache.
390 1.1 cgd */
391 1.1 cgd static BKT *
392 1.16 christos mpool_look(MPOOL *mp, pgno_t pgno)
393 1.1 cgd {
394 1.6 cgd struct _hqh *head;
395 1.6 cgd BKT *bp;
396 1.1 cgd
397 1.6 cgd head = &mp->hqh[HASHKEY(pgno)];
398 1.6 cgd for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
399 1.6 cgd if (bp->pgno == pgno) {
400 1.1 cgd #ifdef STATISTICS
401 1.1 cgd ++mp->cachehit;
402 1.1 cgd #endif
403 1.6 cgd return (bp);
404 1.1 cgd }
405 1.1 cgd #ifdef STATISTICS
406 1.1 cgd ++mp->cachemiss;
407 1.1 cgd #endif
408 1.1 cgd return (NULL);
409 1.1 cgd }
410 1.1 cgd
411 1.1 cgd #ifdef STATISTICS
412 1.1 cgd /*
413 1.6 cgd * mpool_stat
414 1.6 cgd * Print out cache statistics.
415 1.1 cgd */
416 1.1 cgd void
417 1.1 cgd mpool_stat(mp)
418 1.1 cgd MPOOL *mp;
419 1.1 cgd {
420 1.6 cgd BKT *bp;
421 1.1 cgd int cnt;
422 1.16 christos const char *sep;
423 1.1 cgd
424 1.16 christos (void)fprintf(stderr, "%lu pages in the file\n", (u_long)mp->npages);
425 1.1 cgd (void)fprintf(stderr,
426 1.1 cgd "page size %lu, cacheing %lu pages of %lu page max cache\n",
427 1.16 christos (u_long)mp->pagesize, (u_long)mp->curcache, (u_long)mp->maxcache);
428 1.1 cgd (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
429 1.1 cgd mp->pageput, mp->pageget, mp->pagenew);
430 1.1 cgd (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
431 1.1 cgd mp->pagealloc, mp->pageflush);
432 1.1 cgd if (mp->cachehit + mp->cachemiss)
433 1.1 cgd (void)fprintf(stderr,
434 1.1 cgd "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
435 1.1 cgd ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
436 1.1 cgd * 100, mp->cachehit, mp->cachemiss);
437 1.1 cgd (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
438 1.1 cgd mp->pageread, mp->pagewrite);
439 1.1 cgd
440 1.1 cgd sep = "";
441 1.1 cgd cnt = 0;
442 1.6 cgd for (bp = mp->lqh.cqh_first;
443 1.6 cgd bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
444 1.6 cgd (void)fprintf(stderr, "%s%d", sep, bp->pgno);
445 1.6 cgd if (bp->flags & MPOOL_DIRTY)
446 1.1 cgd (void)fprintf(stderr, "d");
447 1.6 cgd if (bp->flags & MPOOL_PINNED)
448 1.1 cgd (void)fprintf(stderr, "P");
449 1.1 cgd if (++cnt == 10) {
450 1.1 cgd sep = "\n";
451 1.1 cgd cnt = 0;
452 1.1 cgd } else
453 1.1 cgd sep = ", ";
454 1.1 cgd
455 1.1 cgd }
456 1.1 cgd (void)fprintf(stderr, "\n");
457 1.1 cgd }
458 1.1 cgd #endif
459