mpool.c revision 1.9 1 1.9 thorpej /* $NetBSD: mpool.c,v 1.9 1998/06/30 21:30:52 thorpej Exp $ */
2 1.5 cgd
3 1.1 cgd /*-
4 1.6 cgd * Copyright (c) 1990, 1993, 1994
5 1.1 cgd * The Regents of the University of California. All rights reserved.
6 1.1 cgd *
7 1.1 cgd * Redistribution and use in source and binary forms, with or without
8 1.1 cgd * modification, are permitted provided that the following conditions
9 1.1 cgd * are met:
10 1.1 cgd * 1. Redistributions of source code must retain the above copyright
11 1.1 cgd * notice, this list of conditions and the following disclaimer.
12 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 cgd * notice, this list of conditions and the following disclaimer in the
14 1.1 cgd * documentation and/or other materials provided with the distribution.
15 1.1 cgd * 3. All advertising materials mentioning features or use of this software
16 1.1 cgd * must display the following acknowledgement:
17 1.1 cgd * This product includes software developed by the University of
18 1.1 cgd * California, Berkeley and its contributors.
19 1.1 cgd * 4. Neither the name of the University nor the names of its contributors
20 1.1 cgd * may be used to endorse or promote products derived from this software
21 1.1 cgd * without specific prior written permission.
22 1.1 cgd *
23 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 1.1 cgd * SUCH DAMAGE.
34 1.1 cgd */
35 1.1 cgd
36 1.7 christos #include <sys/cdefs.h>
37 1.1 cgd #if defined(LIBC_SCCS) && !defined(lint)
38 1.5 cgd #if 0
39 1.6 cgd static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94";
40 1.5 cgd #else
41 1.9 thorpej __RCSID("$NetBSD: mpool.c,v 1.9 1998/06/30 21:30:52 thorpej Exp $");
42 1.5 cgd #endif
43 1.1 cgd #endif /* LIBC_SCCS and not lint */
44 1.1 cgd
45 1.8 jtc #include "namespace.h"
46 1.1 cgd #include <sys/param.h>
47 1.6 cgd #include <sys/queue.h>
48 1.1 cgd #include <sys/stat.h>
49 1.1 cgd
50 1.1 cgd #include <errno.h>
51 1.1 cgd #include <stdio.h>
52 1.1 cgd #include <stdlib.h>
53 1.1 cgd #include <string.h>
54 1.1 cgd #include <unistd.h>
55 1.1 cgd
56 1.1 cgd #include <db.h>
57 1.6 cgd
58 1.1 cgd #define __MPOOLINTERFACE_PRIVATE
59 1.6 cgd #include <mpool.h>
60 1.8 jtc
61 1.8 jtc #ifdef __weak_alias
62 1.8 jtc __weak_alias(mpool_close,_mpool_close);
63 1.8 jtc __weak_alias(mpool_filter,_mpool_filter);
64 1.8 jtc __weak_alias(mpool_get,_mpool_get);
65 1.8 jtc __weak_alias(mpool_new,_mpool_new);
66 1.8 jtc __weak_alias(mpool_open,_mpool_open);
67 1.8 jtc __weak_alias(mpool_put,_mpool_put);
68 1.8 jtc __weak_alias(mpool_sync,_mpool_sync);
69 1.8 jtc #endif
70 1.1 cgd
71 1.1 cgd static BKT *mpool_bkt __P((MPOOL *));
72 1.1 cgd static BKT *mpool_look __P((MPOOL *, pgno_t));
73 1.1 cgd static int mpool_write __P((MPOOL *, BKT *));
74 1.1 cgd
75 1.1 cgd /*
76 1.6 cgd * mpool_open --
77 1.6 cgd * Initialize a memory pool.
78 1.1 cgd */
79 1.1 cgd MPOOL *
80 1.1 cgd mpool_open(key, fd, pagesize, maxcache)
81 1.6 cgd void *key;
82 1.1 cgd int fd;
83 1.1 cgd pgno_t pagesize, maxcache;
84 1.1 cgd {
85 1.1 cgd struct stat sb;
86 1.1 cgd MPOOL *mp;
87 1.1 cgd int entry;
88 1.1 cgd
89 1.6 cgd /*
90 1.6 cgd * Get information about the file.
91 1.6 cgd *
92 1.6 cgd * XXX
93 1.6 cgd * We don't currently handle pipes, although we should.
94 1.6 cgd */
95 1.1 cgd if (fstat(fd, &sb))
96 1.1 cgd return (NULL);
97 1.1 cgd if (!S_ISREG(sb.st_mode)) {
98 1.1 cgd errno = ESPIPE;
99 1.1 cgd return (NULL);
100 1.1 cgd }
101 1.1 cgd
102 1.6 cgd /* Allocate and initialize the MPOOL cookie. */
103 1.6 cgd if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
104 1.1 cgd return (NULL);
105 1.6 cgd CIRCLEQ_INIT(&mp->lqh);
106 1.1 cgd for (entry = 0; entry < HASHSIZE; ++entry)
107 1.6 cgd CIRCLEQ_INIT(&mp->hqh[entry]);
108 1.1 cgd mp->maxcache = maxcache;
109 1.6 cgd mp->npages = sb.st_size / pagesize;
110 1.1 cgd mp->pagesize = pagesize;
111 1.1 cgd mp->fd = fd;
112 1.1 cgd return (mp);
113 1.1 cgd }
114 1.1 cgd
115 1.1 cgd /*
116 1.6 cgd * mpool_filter --
117 1.6 cgd * Initialize input/output filters.
118 1.1 cgd */
119 1.1 cgd void
120 1.1 cgd mpool_filter(mp, pgin, pgout, pgcookie)
121 1.1 cgd MPOOL *mp;
122 1.1 cgd void (*pgin) __P((void *, pgno_t, void *));
123 1.1 cgd void (*pgout) __P((void *, pgno_t, void *));
124 1.1 cgd void *pgcookie;
125 1.1 cgd {
126 1.1 cgd mp->pgin = pgin;
127 1.1 cgd mp->pgout = pgout;
128 1.1 cgd mp->pgcookie = pgcookie;
129 1.1 cgd }
130 1.1 cgd
131 1.1 cgd /*
132 1.6 cgd * mpool_new --
133 1.6 cgd * Get a new page of memory.
134 1.1 cgd */
135 1.1 cgd void *
136 1.1 cgd mpool_new(mp, pgnoaddr)
137 1.1 cgd MPOOL *mp;
138 1.1 cgd pgno_t *pgnoaddr;
139 1.1 cgd {
140 1.6 cgd struct _hqh *head;
141 1.6 cgd BKT *bp;
142 1.1 cgd
143 1.6 cgd if (mp->npages == MAX_PAGE_NUMBER) {
144 1.6 cgd (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
145 1.6 cgd abort();
146 1.6 cgd }
147 1.1 cgd #ifdef STATISTICS
148 1.1 cgd ++mp->pagenew;
149 1.1 cgd #endif
150 1.1 cgd /*
151 1.6 cgd * Get a BKT from the cache. Assign a new page number, attach
152 1.6 cgd * it to the head of the hash chain, the tail of the lru chain,
153 1.6 cgd * and return.
154 1.1 cgd */
155 1.6 cgd if ((bp = mpool_bkt(mp)) == NULL)
156 1.1 cgd return (NULL);
157 1.6 cgd *pgnoaddr = bp->pgno = mp->npages++;
158 1.6 cgd bp->flags = MPOOL_PINNED;
159 1.6 cgd
160 1.6 cgd head = &mp->hqh[HASHKEY(bp->pgno)];
161 1.6 cgd CIRCLEQ_INSERT_HEAD(head, bp, hq);
162 1.6 cgd CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
163 1.6 cgd return (bp->page);
164 1.1 cgd }
165 1.1 cgd
166 1.1 cgd /*
167 1.6 cgd * mpool_get
168 1.6 cgd * Get a page.
169 1.1 cgd */
170 1.1 cgd void *
171 1.1 cgd mpool_get(mp, pgno, flags)
172 1.1 cgd MPOOL *mp;
173 1.1 cgd pgno_t pgno;
174 1.6 cgd u_int flags; /* XXX not used? */
175 1.1 cgd {
176 1.6 cgd struct _hqh *head;
177 1.6 cgd BKT *bp;
178 1.1 cgd off_t off;
179 1.1 cgd int nr;
180 1.1 cgd
181 1.6 cgd /* Check for attempt to retrieve a non-existent page. */
182 1.6 cgd if (pgno >= mp->npages) {
183 1.6 cgd errno = EINVAL;
184 1.6 cgd return (NULL);
185 1.6 cgd }
186 1.6 cgd
187 1.1 cgd #ifdef STATISTICS
188 1.6 cgd ++mp->pageget;
189 1.1 cgd #endif
190 1.6 cgd
191 1.6 cgd /* Check for a page that is cached. */
192 1.6 cgd if ((bp = mpool_look(mp, pgno)) != NULL) {
193 1.1 cgd #ifdef DEBUG
194 1.6 cgd if (bp->flags & MPOOL_PINNED) {
195 1.6 cgd (void)fprintf(stderr,
196 1.6 cgd "mpool_get: page %d already pinned\n", bp->pgno);
197 1.6 cgd abort();
198 1.6 cgd }
199 1.6 cgd #endif
200 1.6 cgd /*
201 1.6 cgd * Move the page to the head of the hash chain and the tail
202 1.6 cgd * of the lru chain.
203 1.6 cgd */
204 1.6 cgd head = &mp->hqh[HASHKEY(bp->pgno)];
205 1.6 cgd CIRCLEQ_REMOVE(head, bp, hq);
206 1.6 cgd CIRCLEQ_INSERT_HEAD(head, bp, hq);
207 1.6 cgd CIRCLEQ_REMOVE(&mp->lqh, bp, q);
208 1.6 cgd CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
209 1.6 cgd
210 1.6 cgd /* Return a pinned page. */
211 1.6 cgd bp->flags |= MPOOL_PINNED;
212 1.6 cgd return (bp->page);
213 1.1 cgd }
214 1.1 cgd
215 1.1 cgd /* Get a page from the cache. */
216 1.6 cgd if ((bp = mpool_bkt(mp)) == NULL)
217 1.1 cgd return (NULL);
218 1.1 cgd
219 1.6 cgd /* Read in the contents. */
220 1.1 cgd #ifdef STATISTICS
221 1.1 cgd ++mp->pageread;
222 1.1 cgd #endif
223 1.1 cgd off = mp->pagesize * pgno;
224 1.9 thorpej if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != mp->pagesize) {
225 1.1 cgd if (nr >= 0)
226 1.1 cgd errno = EFTYPE;
227 1.1 cgd return (NULL);
228 1.1 cgd }
229 1.1 cgd
230 1.6 cgd /* Set the page number, pin the page. */
231 1.6 cgd bp->pgno = pgno;
232 1.6 cgd bp->flags = MPOOL_PINNED;
233 1.6 cgd
234 1.6 cgd /*
235 1.6 cgd * Add the page to the head of the hash chain and the tail
236 1.6 cgd * of the lru chain.
237 1.6 cgd */
238 1.6 cgd head = &mp->hqh[HASHKEY(bp->pgno)];
239 1.6 cgd CIRCLEQ_INSERT_HEAD(head, bp, hq);
240 1.6 cgd CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
241 1.6 cgd
242 1.6 cgd /* Run through the user's filter. */
243 1.6 cgd if (mp->pgin != NULL)
244 1.6 cgd (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
245 1.6 cgd
246 1.6 cgd return (bp->page);
247 1.1 cgd }
248 1.1 cgd
249 1.1 cgd /*
250 1.6 cgd * mpool_put
251 1.6 cgd * Return a page.
252 1.1 cgd */
253 1.1 cgd int
254 1.1 cgd mpool_put(mp, page, flags)
255 1.1 cgd MPOOL *mp;
256 1.1 cgd void *page;
257 1.1 cgd u_int flags;
258 1.1 cgd {
259 1.6 cgd BKT *bp;
260 1.1 cgd
261 1.1 cgd #ifdef STATISTICS
262 1.1 cgd ++mp->pageput;
263 1.1 cgd #endif
264 1.6 cgd bp = (BKT *)((char *)page - sizeof(BKT));
265 1.1 cgd #ifdef DEBUG
266 1.6 cgd if (!(bp->flags & MPOOL_PINNED)) {
267 1.6 cgd (void)fprintf(stderr,
268 1.6 cgd "mpool_put: page %d not pinned\n", bp->pgno);
269 1.6 cgd abort();
270 1.1 cgd }
271 1.1 cgd #endif
272 1.6 cgd bp->flags &= ~MPOOL_PINNED;
273 1.6 cgd bp->flags |= flags & MPOOL_DIRTY;
274 1.1 cgd return (RET_SUCCESS);
275 1.1 cgd }
276 1.1 cgd
277 1.1 cgd /*
278 1.6 cgd * mpool_close
279 1.6 cgd * Close the buffer pool.
280 1.1 cgd */
281 1.1 cgd int
282 1.1 cgd mpool_close(mp)
283 1.1 cgd MPOOL *mp;
284 1.1 cgd {
285 1.6 cgd BKT *bp;
286 1.1 cgd
287 1.1 cgd /* Free up any space allocated to the lru pages. */
288 1.6 cgd while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
289 1.6 cgd CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
290 1.6 cgd free(bp);
291 1.1 cgd }
292 1.6 cgd
293 1.6 cgd /* Free the MPOOL cookie. */
294 1.1 cgd free(mp);
295 1.1 cgd return (RET_SUCCESS);
296 1.1 cgd }
297 1.1 cgd
298 1.1 cgd /*
299 1.6 cgd * mpool_sync
300 1.6 cgd * Sync the pool to disk.
301 1.1 cgd */
302 1.1 cgd int
303 1.1 cgd mpool_sync(mp)
304 1.1 cgd MPOOL *mp;
305 1.1 cgd {
306 1.6 cgd BKT *bp;
307 1.1 cgd
308 1.6 cgd /* Walk the lru chain, flushing any dirty pages to disk. */
309 1.6 cgd for (bp = mp->lqh.cqh_first;
310 1.6 cgd bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
311 1.6 cgd if (bp->flags & MPOOL_DIRTY &&
312 1.6 cgd mpool_write(mp, bp) == RET_ERROR)
313 1.1 cgd return (RET_ERROR);
314 1.6 cgd
315 1.6 cgd /* Sync the file descriptor. */
316 1.1 cgd return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
317 1.1 cgd }
318 1.1 cgd
319 1.1 cgd /*
320 1.6 cgd * mpool_bkt
321 1.6 cgd * Get a page from the cache (or create one).
322 1.1 cgd */
323 1.1 cgd static BKT *
324 1.1 cgd mpool_bkt(mp)
325 1.1 cgd MPOOL *mp;
326 1.1 cgd {
327 1.6 cgd struct _hqh *head;
328 1.6 cgd BKT *bp;
329 1.1 cgd
330 1.6 cgd /* If under the max cached, always create a new page. */
331 1.1 cgd if (mp->curcache < mp->maxcache)
332 1.1 cgd goto new;
333 1.1 cgd
334 1.1 cgd /*
335 1.6 cgd * If the cache is max'd out, walk the lru list for a buffer we
336 1.6 cgd * can flush. If we find one, write it (if necessary) and take it
337 1.6 cgd * off any lists. If we don't find anything we grow the cache anyway.
338 1.1 cgd * The cache never shrinks.
339 1.1 cgd */
340 1.6 cgd for (bp = mp->lqh.cqh_first;
341 1.6 cgd bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
342 1.6 cgd if (!(bp->flags & MPOOL_PINNED)) {
343 1.6 cgd /* Flush if dirty. */
344 1.6 cgd if (bp->flags & MPOOL_DIRTY &&
345 1.6 cgd mpool_write(mp, bp) == RET_ERROR)
346 1.1 cgd return (NULL);
347 1.1 cgd #ifdef STATISTICS
348 1.1 cgd ++mp->pageflush;
349 1.1 cgd #endif
350 1.6 cgd /* Remove from the hash and lru queues. */
351 1.6 cgd head = &mp->hqh[HASHKEY(bp->pgno)];
352 1.6 cgd CIRCLEQ_REMOVE(head, bp, hq);
353 1.6 cgd CIRCLEQ_REMOVE(&mp->lqh, bp, q);
354 1.1 cgd #ifdef DEBUG
355 1.6 cgd { void *spage;
356 1.6 cgd spage = bp->page;
357 1.6 cgd memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
358 1.6 cgd bp->page = spage;
359 1.1 cgd }
360 1.1 cgd #endif
361 1.6 cgd return (bp);
362 1.1 cgd }
363 1.1 cgd
364 1.6 cgd new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
365 1.1 cgd return (NULL);
366 1.1 cgd #ifdef STATISTICS
367 1.1 cgd ++mp->pagealloc;
368 1.1 cgd #endif
369 1.6 cgd #if defined(DEBUG) || defined(PURIFY)
370 1.6 cgd memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
371 1.1 cgd #endif
372 1.6 cgd bp->page = (char *)bp + sizeof(BKT);
373 1.1 cgd ++mp->curcache;
374 1.6 cgd return (bp);
375 1.1 cgd }
376 1.1 cgd
377 1.1 cgd /*
378 1.6 cgd * mpool_write
379 1.6 cgd * Write a page to disk.
380 1.1 cgd */
381 1.1 cgd static int
382 1.6 cgd mpool_write(mp, bp)
383 1.1 cgd MPOOL *mp;
384 1.6 cgd BKT *bp;
385 1.1 cgd {
386 1.1 cgd off_t off;
387 1.1 cgd
388 1.1 cgd #ifdef STATISTICS
389 1.1 cgd ++mp->pagewrite;
390 1.1 cgd #endif
391 1.6 cgd
392 1.6 cgd /* Run through the user's filter. */
393 1.6 cgd if (mp->pgout)
394 1.6 cgd (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
395 1.6 cgd
396 1.6 cgd off = mp->pagesize * bp->pgno;
397 1.9 thorpej if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize)
398 1.1 cgd return (RET_ERROR);
399 1.6 cgd
400 1.6 cgd bp->flags &= ~MPOOL_DIRTY;
401 1.1 cgd return (RET_SUCCESS);
402 1.1 cgd }
403 1.1 cgd
404 1.1 cgd /*
405 1.6 cgd * mpool_look
406 1.6 cgd * Lookup a page in the cache.
407 1.1 cgd */
408 1.1 cgd static BKT *
409 1.1 cgd mpool_look(mp, pgno)
410 1.1 cgd MPOOL *mp;
411 1.1 cgd pgno_t pgno;
412 1.1 cgd {
413 1.6 cgd struct _hqh *head;
414 1.6 cgd BKT *bp;
415 1.1 cgd
416 1.6 cgd head = &mp->hqh[HASHKEY(pgno)];
417 1.6 cgd for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
418 1.6 cgd if (bp->pgno == pgno) {
419 1.1 cgd #ifdef STATISTICS
420 1.1 cgd ++mp->cachehit;
421 1.1 cgd #endif
422 1.6 cgd return (bp);
423 1.1 cgd }
424 1.1 cgd #ifdef STATISTICS
425 1.1 cgd ++mp->cachemiss;
426 1.1 cgd #endif
427 1.1 cgd return (NULL);
428 1.1 cgd }
429 1.1 cgd
430 1.1 cgd #ifdef STATISTICS
431 1.1 cgd /*
432 1.6 cgd * mpool_stat
433 1.6 cgd * Print out cache statistics.
434 1.1 cgd */
435 1.1 cgd void
436 1.1 cgd mpool_stat(mp)
437 1.1 cgd MPOOL *mp;
438 1.1 cgd {
439 1.6 cgd BKT *bp;
440 1.1 cgd int cnt;
441 1.1 cgd char *sep;
442 1.1 cgd
443 1.1 cgd (void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
444 1.1 cgd (void)fprintf(stderr,
445 1.1 cgd "page size %lu, cacheing %lu pages of %lu page max cache\n",
446 1.1 cgd mp->pagesize, mp->curcache, mp->maxcache);
447 1.1 cgd (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
448 1.1 cgd mp->pageput, mp->pageget, mp->pagenew);
449 1.1 cgd (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
450 1.1 cgd mp->pagealloc, mp->pageflush);
451 1.1 cgd if (mp->cachehit + mp->cachemiss)
452 1.1 cgd (void)fprintf(stderr,
453 1.1 cgd "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
454 1.1 cgd ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
455 1.1 cgd * 100, mp->cachehit, mp->cachemiss);
456 1.1 cgd (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
457 1.1 cgd mp->pageread, mp->pagewrite);
458 1.1 cgd
459 1.1 cgd sep = "";
460 1.1 cgd cnt = 0;
461 1.6 cgd for (bp = mp->lqh.cqh_first;
462 1.6 cgd bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
463 1.6 cgd (void)fprintf(stderr, "%s%d", sep, bp->pgno);
464 1.6 cgd if (bp->flags & MPOOL_DIRTY)
465 1.1 cgd (void)fprintf(stderr, "d");
466 1.6 cgd if (bp->flags & MPOOL_PINNED)
467 1.1 cgd (void)fprintf(stderr, "P");
468 1.1 cgd if (++cnt == 10) {
469 1.1 cgd sep = "\n";
470 1.1 cgd cnt = 0;
471 1.1 cgd } else
472 1.1 cgd sep = ", ";
473 1.1 cgd
474 1.1 cgd }
475 1.1 cgd (void)fprintf(stderr, "\n");
476 1.1 cgd }
477 1.1 cgd #endif
478