subr_pool.c revision 1.20 1 /* $NetBSD: subr_pool.c,v 1.20 1999/03/31 01:14:06 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
9 * Simulation Facility, NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/errno.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/lock.h>
47 #include <sys/pool.h>
48 #include <sys/syslog.h>
49
50 #include <vm/vm.h>
51 #include <vm/vm_kern.h>
52
53 #include <uvm/uvm.h>
54
55 /*
56 * Pool resource management utility.
57 *
58 * Memory is allocated in pages which are split into pieces according
59 * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
60 * in the pool structure and the individual pool items are on a linked list
61 * headed by `ph_itemlist' in each page header. The memory for building
62 * the page list is either taken from the allocated pages themselves (for
63 * small pool items) or taken from an internal pool of page headers (`phpool').
64 *
65 */
66
67 /* List of all pools */
68 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
69
70 /* Private pool for page header structures */
71 static struct pool phpool;
72
73 /* # of seconds to retain page after last use */
74 int pool_inactive_time = 10;
75
76 /* Next candidate for drainage (see pool_drain()) */
77 static struct pool *drainpp = NULL;
78
79 struct pool_item_header {
80 /* Page headers */
81 TAILQ_ENTRY(pool_item_header)
82 ph_pagelist; /* pool page list */
83 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */
84 LIST_ENTRY(pool_item_header)
85 ph_hashlist; /* Off-page page headers */
86 int ph_nmissing; /* # of chunks in use */
87 caddr_t ph_page; /* this page's address */
88 struct timeval ph_time; /* last referenced */
89 };
90
91 struct pool_item {
92 #ifdef DIAGNOSTIC
93 int pi_magic;
94 #define PI_MAGIC 0xdeadbeef
95 #endif
96 /* Other entries use only this list entry */
97 TAILQ_ENTRY(pool_item) pi_list;
98 };
99
100
101 #define PR_HASH_INDEX(pp,addr) \
102 (((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1))
103
104
105
106 static struct pool_item_header
107 *pr_find_pagehead __P((struct pool *, caddr_t));
108 static void pr_rmpage __P((struct pool *, struct pool_item_header *));
109 static int pool_catchup __P((struct pool *));
110 static int pool_prime_page __P((struct pool *, caddr_t));
111 static void *pool_page_alloc __P((unsigned long, int, int));
112 static void pool_page_free __P((void *, unsigned long, int));
113
114
115 #ifdef POOL_DIAGNOSTIC
116 /*
117 * Pool log entry. An array of these is allocated in pool_create().
118 */
119 struct pool_log {
120 const char *pl_file;
121 long pl_line;
122 int pl_action;
123 #define PRLOG_GET 1
124 #define PRLOG_PUT 2
125 void *pl_addr;
126 };
127
128 /* Number of entries in pool log buffers */
129 #ifndef POOL_LOGSIZE
130 #define POOL_LOGSIZE 10
131 #endif
132
133 int pool_logsize = POOL_LOGSIZE;
134
135 static void pr_log __P((struct pool *, void *, int, const char *, long));
136 static void pr_printlog __P((struct pool *));
137
138 static __inline__ void
139 pr_log(pp, v, action, file, line)
140 struct pool *pp;
141 void *v;
142 int action;
143 const char *file;
144 long line;
145 {
146 int n = pp->pr_curlogentry;
147 struct pool_log *pl;
148
149 if ((pp->pr_roflags & PR_LOGGING) == 0)
150 return;
151
152 /*
153 * Fill in the current entry. Wrap around and overwrite
154 * the oldest entry if necessary.
155 */
156 pl = &pp->pr_log[n];
157 pl->pl_file = file;
158 pl->pl_line = line;
159 pl->pl_action = action;
160 pl->pl_addr = v;
161 if (++n >= pp->pr_logsize)
162 n = 0;
163 pp->pr_curlogentry = n;
164 }
165
166 static void
167 pr_printlog(pp)
168 struct pool *pp;
169 {
170 int i = pp->pr_logsize;
171 int n = pp->pr_curlogentry;
172
173 if ((pp->pr_roflags & PR_LOGGING) == 0)
174 return;
175
176 pool_print(pp, "printlog");
177
178 /*
179 * Print all entries in this pool's log.
180 */
181 while (i-- > 0) {
182 struct pool_log *pl = &pp->pr_log[n];
183 if (pl->pl_action != 0) {
184 printf("log entry %d:\n", i);
185 printf("\taction = %s, addr = %p\n",
186 pl->pl_action == PRLOG_GET ? "get" : "put",
187 pl->pl_addr);
188 printf("\tfile: %s at line %lu\n",
189 pl->pl_file, pl->pl_line);
190 }
191 if (++n >= pp->pr_logsize)
192 n = 0;
193 }
194 }
195 #else
196 #define pr_log(pp, v, action, file, line)
197 #define pr_printlog(pp)
198 #endif
199
200
201 /*
202 * Return the pool page header based on page address.
203 */
204 static __inline__ struct pool_item_header *
205 pr_find_pagehead(pp, page)
206 struct pool *pp;
207 caddr_t page;
208 {
209 struct pool_item_header *ph;
210
211 if ((pp->pr_roflags & PR_PHINPAGE) != 0)
212 return ((struct pool_item_header *)(page + pp->pr_phoffset));
213
214 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
215 ph != NULL;
216 ph = LIST_NEXT(ph, ph_hashlist)) {
217 if (ph->ph_page == page)
218 return (ph);
219 }
220 return (NULL);
221 }
222
223 /*
224 * Remove a page from the pool.
225 */
226 static __inline__ void
227 pr_rmpage(pp, ph)
228 struct pool *pp;
229 struct pool_item_header *ph;
230 {
231
232 /*
233 * If the page was idle, decrement the idle page count.
234 */
235 if (ph->ph_nmissing == 0) {
236 #ifdef DIAGNOSTIC
237 if (pp->pr_nidle == 0)
238 panic("pr_rmpage: nidle inconsistent");
239 if (pp->pr_nitems < pp->pr_itemsperpage)
240 panic("pr_rmpage: nitems inconsistent");
241 #endif
242 pp->pr_nidle--;
243 }
244
245 pp->pr_nitems -= pp->pr_itemsperpage;
246
247 /*
248 * Unlink a page from the pool and release it.
249 */
250 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
251 (*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype);
252 pp->pr_npages--;
253 pp->pr_npagefree++;
254
255 if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
256 LIST_REMOVE(ph, ph_hashlist);
257 pool_put(&phpool, ph);
258 }
259
260 if (pp->pr_curpage == ph) {
261 /*
262 * Find a new non-empty page header, if any.
263 * Start search from the page head, to increase the
264 * chance for "high water" pages to be freed.
265 */
266 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
267 ph = TAILQ_NEXT(ph, ph_pagelist))
268 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
269 break;
270
271 pp->pr_curpage = ph;
272 }
273 }
274
275 /*
276 * Allocate and initialize a pool.
277 */
278 struct pool *
279 pool_create(size, align, ioff, nitems, wchan, pagesz, alloc, release, mtype)
280 size_t size;
281 u_int align;
282 u_int ioff;
283 int nitems;
284 char *wchan;
285 size_t pagesz;
286 void *(*alloc) __P((unsigned long, int, int));
287 void (*release) __P((void *, unsigned long, int));
288 int mtype;
289 {
290 struct pool *pp;
291 int flags;
292
293 pp = (struct pool *)malloc(sizeof(*pp), M_POOL, M_NOWAIT);
294 if (pp == NULL)
295 return (NULL);
296
297 flags = PR_FREEHEADER;
298 #ifdef POOL_DIAGNOSTIC
299 if (pool_logsize != 0)
300 flags |= PR_LOGGING;
301 #endif
302
303 pool_init(pp, size, align, ioff, flags, wchan, pagesz,
304 alloc, release, mtype);
305
306 if (nitems != 0) {
307 if (pool_prime(pp, nitems, NULL) != 0) {
308 pool_destroy(pp);
309 return (NULL);
310 }
311 }
312
313 return (pp);
314 }
315
316 /*
317 * Initialize the given pool resource structure.
318 *
319 * We export this routine to allow other kernel parts to declare
320 * static pools that must be initialized before malloc() is available.
321 */
322 void
323 pool_init(pp, size, align, ioff, flags, wchan, pagesz, alloc, release, mtype)
324 struct pool *pp;
325 size_t size;
326 u_int align;
327 u_int ioff;
328 int flags;
329 char *wchan;
330 size_t pagesz;
331 void *(*alloc) __P((unsigned long, int, int));
332 void (*release) __P((void *, unsigned long, int));
333 int mtype;
334 {
335 int off, slack, i;
336
337 /*
338 * Check arguments and construct default values.
339 */
340 if (!powerof2(pagesz) || pagesz > PAGE_SIZE)
341 panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);
342
343 if (alloc == NULL && release == NULL) {
344 alloc = pool_page_alloc;
345 release = pool_page_free;
346 pagesz = PAGE_SIZE; /* Rounds to PAGE_SIZE anyhow. */
347 } else if ((alloc != NULL && release != NULL) == 0) {
348 /* If you specifiy one, must specify both. */
349 panic("pool_init: must specify alloc and release together");
350 }
351
352 if (pagesz == 0)
353 pagesz = PAGE_SIZE;
354
355 if (align == 0)
356 align = ALIGN(1);
357
358 if (size < sizeof(struct pool_item))
359 size = sizeof(struct pool_item);
360
361 /*
362 * Initialize the pool structure.
363 */
364 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
365 TAILQ_INIT(&pp->pr_pagelist);
366 pp->pr_curpage = NULL;
367 pp->pr_npages = 0;
368 pp->pr_minitems = 0;
369 pp->pr_minpages = 0;
370 pp->pr_maxpages = UINT_MAX;
371 pp->pr_roflags = flags;
372 pp->pr_flags = 0;
373 pp->pr_size = ALIGN(size);
374 pp->pr_align = align;
375 pp->pr_wchan = wchan;
376 pp->pr_mtype = mtype;
377 pp->pr_alloc = alloc;
378 pp->pr_free = release;
379 pp->pr_pagesz = pagesz;
380 pp->pr_pagemask = ~(pagesz - 1);
381 pp->pr_pageshift = ffs(pagesz) - 1;
382 pp->pr_nitems = 0;
383 pp->pr_nout = 0;
384 pp->pr_hardlimit = UINT_MAX;
385 pp->pr_hardlimit_warning = NULL;
386 pp->pr_hardlimit_ratecap = 0;
387 memset(&pp->pr_hardlimit_warning_last, 0,
388 sizeof(pp->pr_hardlimit_warning_last));
389
390 /*
391 * Decide whether to put the page header off page to avoid
392 * wasting too large a part of the page. Off-page page headers
393 * go on a hash table, so we can match a returned item
394 * with its header based on the page address.
395 * We use 1/16 of the page size as the threshold (XXX: tune)
396 */
397 if (pp->pr_size < pagesz/16) {
398 /* Use the end of the page for the page header */
399 pp->pr_roflags |= PR_PHINPAGE;
400 pp->pr_phoffset = off =
401 pagesz - ALIGN(sizeof(struct pool_item_header));
402 } else {
403 /* The page header will be taken from our page header pool */
404 pp->pr_phoffset = 0;
405 off = pagesz;
406 for (i = 0; i < PR_HASHTABSIZE; i++) {
407 LIST_INIT(&pp->pr_hashtab[i]);
408 }
409 }
410
411 /*
412 * Alignment is to take place at `ioff' within the item. This means
413 * we must reserve up to `align - 1' bytes on the page to allow
414 * appropriate positioning of each item.
415 *
416 * Silently enforce `0 <= ioff < align'.
417 */
418 pp->pr_itemoffset = ioff = ioff % align;
419 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
420
421 /*
422 * Use the slack between the chunks and the page header
423 * for "cache coloring".
424 */
425 slack = off - pp->pr_itemsperpage * pp->pr_size;
426 pp->pr_maxcolor = (slack / align) * align;
427 pp->pr_curcolor = 0;
428
429 pp->pr_nget = 0;
430 pp->pr_nfail = 0;
431 pp->pr_nput = 0;
432 pp->pr_npagealloc = 0;
433 pp->pr_npagefree = 0;
434 pp->pr_hiwat = 0;
435 pp->pr_nidle = 0;
436
437 #ifdef POOL_DIAGNOSTIC
438 if ((flags & PR_LOGGING) != 0) {
439 pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
440 M_TEMP, M_NOWAIT);
441 if (pp->pr_log == NULL)
442 pp->pr_roflags &= ~PR_LOGGING;
443 pp->pr_curlogentry = 0;
444 pp->pr_logsize = pool_logsize;
445 }
446 #endif
447
448 simple_lock_init(&pp->pr_lock);
449 lockinit(&pp->pr_resourcelock, PSWP, wchan, 0, 0);
450
451 /*
452 * Initialize private page header pool if we haven't done so yet.
453 */
454 if (phpool.pr_size == 0) {
455 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
456 0, "phpool", 0, 0, 0, 0);
457 }
458
459 return;
460 }
461
462 /*
463 * De-commision a pool resource.
464 */
465 void
466 pool_destroy(pp)
467 struct pool *pp;
468 {
469 struct pool_item_header *ph;
470
471 #ifdef DIAGNOSTIC
472 if (pp->pr_nout != 0) {
473 pr_printlog(pp);
474 panic("pool_destroy: pool busy: still out: %u\n",
475 pp->pr_nout);
476 }
477 #endif
478
479 /* Remove all pages */
480 if ((pp->pr_roflags & PR_STATIC) == 0)
481 while ((ph = pp->pr_pagelist.tqh_first) != NULL)
482 pr_rmpage(pp, ph);
483
484 /* Remove from global pool list */
485 TAILQ_REMOVE(&pool_head, pp, pr_poollist);
486 drainpp = NULL;
487
488 #ifdef POOL_DIAGNOSTIC
489 if ((pp->pr_roflags & PR_LOGGING) != 0)
490 free(pp->pr_log, M_TEMP);
491 #endif
492
493 if (pp->pr_roflags & PR_FREEHEADER)
494 free(pp, M_POOL);
495 }
496
497
498 /*
499 * Grab an item from the pool; must be called at appropriate spl level
500 */
501 #ifdef POOL_DIAGNOSTIC
502 void *
503 _pool_get(pp, flags, file, line)
504 struct pool *pp;
505 int flags;
506 const char *file;
507 long line;
508 #else
509 void *
510 pool_get(pp, flags)
511 struct pool *pp;
512 int flags;
513 #endif
514 {
515 void *v;
516 struct pool_item *pi;
517 struct pool_item_header *ph;
518
519 #ifdef DIAGNOSTIC
520 if ((pp->pr_roflags & PR_STATIC) && (flags & PR_MALLOCOK)) {
521 pr_printlog(pp);
522 panic("pool_get: static");
523 }
524 #endif
525
526 if (curproc == NULL && (flags & PR_WAITOK) != 0)
527 panic("pool_get: must have NOWAIT");
528
529 simple_lock(&pp->pr_lock);
530
531 startover:
532 /*
533 * Check to see if we've reached the hard limit. If we have,
534 * and we can wait, then wait until an item has been returned to
535 * the pool.
536 */
537 #ifdef DIAGNOSTIC
538 if (pp->pr_nout > pp->pr_hardlimit) {
539 simple_unlock(&pp->pr_lock);
540 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
541 }
542 #endif
543 if (pp->pr_nout == pp->pr_hardlimit) {
544 if (flags & PR_WAITOK) {
545 /*
546 * XXX: A warning isn't logged in this case. Should
547 * it be?
548 */
549 pp->pr_flags |= PR_WANTED;
550 simple_unlock(&pp->pr_lock);
551 tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
552 simple_lock(&pp->pr_lock);
553 goto startover;
554 }
555 if (pp->pr_hardlimit_warning != NULL) {
556 /*
557 * Log a message that the hard limit has been hit.
558 */
559 struct timeval curtime, logdiff;
560 int s = splclock();
561 curtime = mono_time;
562 splx(s);
563 timersub(&curtime, &pp->pr_hardlimit_warning_last,
564 &logdiff);
565 if (logdiff.tv_sec >= pp->pr_hardlimit_ratecap) {
566 pp->pr_hardlimit_warning_last = curtime;
567 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
568 }
569 }
570 simple_unlock(&pp->pr_lock);
571 return (NULL);
572 }
573
574 /*
575 * The convention we use is that if `curpage' is not NULL, then
576 * it points at a non-empty bucket. In particular, `curpage'
577 * never points at a page header which has PR_PHINPAGE set and
578 * has no items in its bucket.
579 */
580 if ((ph = pp->pr_curpage) == NULL) {
581 void *v;
582 int lkflags = LK_EXCLUSIVE | LK_INTERLOCK |
583 ((flags & PR_WAITOK) == 0 ? LK_NOWAIT : 0);
584
585 #ifdef DIAGNOSTIC
586 if (pp->pr_nitems != 0) {
587 simple_unlock(&pp->pr_lock);
588 printf("pool_get: %s: curpage NULL, nitems %u\n",
589 pp->pr_wchan, pp->pr_nitems);
590 panic("pool_get: nitems inconsistent\n");
591 }
592 #endif
593
594 /* Get long-term lock on pool */
595 if (lockmgr(&pp->pr_resourcelock, lkflags, &pp->pr_lock) != 0)
596 return (NULL);
597
598 /* Check if pool became non-empty while we slept */
599 if ((ph = pp->pr_curpage) != NULL)
600 goto again;
601
602 /* Call the page back-end allocator for more memory */
603 v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
604 if (v == NULL) {
605 if (flags & PR_URGENT)
606 panic("pool_get: urgent");
607 if ((flags & PR_WAITOK) == 0) {
608 pp->pr_nfail++;
609 lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
610 return (NULL);
611 }
612
613 /*
614 * Wait for items to be returned to this pool.
615 * XXX: we actually want to wait just until
616 * the page allocator has memory again. Depending
617 * on this pool's usage, we might get stuck here
618 * for a long time.
619 *
620 * XXX: maybe we should wake up once a second and
621 * try again?
622 */
623 simple_lock(&pp->pr_lock);
624 (void) lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
625 pp->pr_flags |= PR_WANTED;
626 simple_unlock(&pp->pr_lock);
627 tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
628 simple_lock(&pp->pr_lock);
629 goto startover;
630 }
631
632 /* We have more memory; add it to the pool */
633 pp->pr_npagealloc++;
634 pool_prime_page(pp, v);
635
636 again:
637 /* Re-acquire pool interlock */
638 simple_lock(&pp->pr_lock);
639 lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
640
641 /* Start the allocation process over. */
642 goto startover;
643 }
644
645 if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)
646 panic("pool_get: %s: page empty", pp->pr_wchan);
647 #ifdef DIAGNOSTIC
648 if (pp->pr_nitems == 0) {
649 simple_unlock(&pp->pr_lock);
650 printf("pool_get: %s: items on itemlist, nitems %u\n",
651 pp->pr_wchan, pp->pr_nitems);
652 panic("pool_get: nitems inconsistent\n");
653 }
654 #endif
655 pr_log(pp, v, PRLOG_GET, file, line);
656
657 #ifdef DIAGNOSTIC
658 if (pi->pi_magic != PI_MAGIC) {
659 pr_printlog(pp);
660 panic("pool_get(%s): free list modified: magic=%x; page %p;"
661 " item addr %p\n",
662 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
663 }
664 #endif
665
666 /*
667 * Remove from item list.
668 */
669 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
670 pp->pr_nitems--;
671 pp->pr_nout++;
672 if (ph->ph_nmissing == 0) {
673 #ifdef DIAGNOSTIC
674 if (pp->pr_nidle == 0)
675 panic("pool_get: nidle inconsistent");
676 #endif
677 pp->pr_nidle--;
678 }
679 ph->ph_nmissing++;
680 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
681 /*
682 * Find a new non-empty page header, if any.
683 * Start search from the page head, to increase
684 * the chance for "high water" pages to be freed.
685 *
686 * First, move the now empty page to the head of
687 * the page list.
688 */
689 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
690 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
691 while ((ph = TAILQ_NEXT(ph, ph_pagelist)) != NULL)
692 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
693 break;
694
695 pp->pr_curpage = ph;
696 }
697
698 pp->pr_nget++;
699
700 /*
701 * If we have a low water mark and we are now below that low
702 * water mark, add more items to the pool.
703 */
704 if (pp->pr_nitems < pp->pr_minitems && pool_catchup(pp) != 0) {
705 /*
706 * XXX: Should we log a warning? Should we set up a timeout
707 * to try again in a second or so? The latter could break
708 * a caller's assumptions about interrupt protection, etc.
709 */
710 }
711
712 simple_unlock(&pp->pr_lock);
713 return (v);
714 }
715
716 /*
717 * Return resource to the pool; must be called at appropriate spl level
718 */
719 #ifdef POOL_DIAGNOSTIC
720 void
721 _pool_put(pp, v, file, line)
722 struct pool *pp;
723 void *v;
724 const char *file;
725 long line;
726 #else
727 void
728 pool_put(pp, v)
729 struct pool *pp;
730 void *v;
731 #endif
732 {
733 struct pool_item *pi = v;
734 struct pool_item_header *ph;
735 caddr_t page;
736
737 page = (caddr_t)((u_long)v & pp->pr_pagemask);
738
739 simple_lock(&pp->pr_lock);
740
741 pr_log(pp, v, PRLOG_PUT, file, line);
742
743 if ((ph = pr_find_pagehead(pp, page)) == NULL) {
744 pr_printlog(pp);
745 panic("pool_put: %s: page header missing", pp->pr_wchan);
746 }
747
748 /*
749 * Return to item list.
750 */
751 #ifdef DIAGNOSTIC
752 pi->pi_magic = PI_MAGIC;
753 #endif
754 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
755 ph->ph_nmissing--;
756 pp->pr_nput++;
757 pp->pr_nitems++;
758 pp->pr_nout--;
759
760 /* Cancel "pool empty" condition if it exists */
761 if (pp->pr_curpage == NULL)
762 pp->pr_curpage = ph;
763
764 if (pp->pr_flags & PR_WANTED) {
765 pp->pr_flags &= ~PR_WANTED;
766 if (ph->ph_nmissing == 0)
767 pp->pr_nidle++;
768 wakeup((caddr_t)pp);
769 simple_unlock(&pp->pr_lock);
770 return;
771 }
772
773 /*
774 * If this page is now complete, move it to the end of the pagelist.
775 * If this page has just become un-empty, move it the head.
776 */
777 if (ph->ph_nmissing == 0) {
778 pp->pr_nidle++;
779 if (pp->pr_npages > pp->pr_maxpages) {
780 #if 0
781 timeout(pool_drain, 0, pool_inactive_time*hz);
782 #else
783 pr_rmpage(pp, ph);
784 #endif
785 } else {
786 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
787 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
788 ph->ph_time = time;
789
790 /* XXX - update curpage */
791 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
792 ph = TAILQ_NEXT(ph, ph_pagelist))
793 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
794 break;
795
796 pp->pr_curpage = ph;
797 }
798 }
799
800 simple_unlock(&pp->pr_lock);
801 }
802
803 /*
804 * Add N items to the pool.
805 */
806 int
807 pool_prime(pp, n, storage)
808 struct pool *pp;
809 int n;
810 caddr_t storage;
811 {
812 caddr_t cp;
813 int newnitems, newpages;
814
815 #ifdef DIAGNOSTIC
816 if (storage && !(pp->pr_roflags & PR_STATIC))
817 panic("pool_prime: static");
818 /* !storage && static caught below */
819 #endif
820
821 (void)lockmgr(&pp->pr_resourcelock, LK_EXCLUSIVE, NULL);
822 newnitems = pp->pr_minitems + n;
823 newpages =
824 roundup(newnitems, pp->pr_itemsperpage) / pp->pr_itemsperpage
825 - pp->pr_minpages;
826
827 while (newpages-- > 0) {
828
829 if (pp->pr_roflags & PR_STATIC) {
830 cp = storage;
831 storage += pp->pr_pagesz;
832 } else {
833 cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
834 }
835
836 if (cp == NULL) {
837 (void)lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
838 return (ENOMEM);
839 }
840
841 pool_prime_page(pp, cp);
842 pp->pr_minpages++;
843 }
844
845 pp->pr_minitems = newnitems;
846
847 if (pp->pr_minpages >= pp->pr_maxpages)
848 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */
849
850 (void)lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
851 return (0);
852 }
853
854 /*
855 * Add a page worth of items to the pool.
856 */
857 static int
858 pool_prime_page(pp, storage)
859 struct pool *pp;
860 caddr_t storage;
861 {
862 struct pool_item *pi;
863 struct pool_item_header *ph;
864 caddr_t cp = storage;
865 unsigned int align = pp->pr_align;
866 unsigned int ioff = pp->pr_itemoffset;
867 int n;
868
869 simple_lock(&pp->pr_lock);
870
871 if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
872 ph = (struct pool_item_header *)(cp + pp->pr_phoffset);
873 } else {
874 ph = pool_get(&phpool, PR_URGENT);
875 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)],
876 ph, ph_hashlist);
877 }
878
879 /*
880 * Insert page header.
881 */
882 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
883 TAILQ_INIT(&ph->ph_itemlist);
884 ph->ph_page = storage;
885 ph->ph_nmissing = 0;
886 ph->ph_time.tv_sec = ph->ph_time.tv_usec = 0;
887
888 pp->pr_nidle++;
889
890 /*
891 * Color this page.
892 */
893 cp = (caddr_t)(cp + pp->pr_curcolor);
894 if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
895 pp->pr_curcolor = 0;
896
897 /*
898 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
899 */
900 if (ioff != 0)
901 cp = (caddr_t)(cp + (align - ioff));
902
903 /*
904 * Insert remaining chunks on the bucket list.
905 */
906 n = pp->pr_itemsperpage;
907 pp->pr_nitems += n;
908
909 while (n--) {
910 pi = (struct pool_item *)cp;
911
912 /* Insert on page list */
913 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
914 #ifdef DIAGNOSTIC
915 pi->pi_magic = PI_MAGIC;
916 #endif
917 cp = (caddr_t)(cp + pp->pr_size);
918 }
919
920 /*
921 * If the pool was depleted, point at the new page.
922 */
923 if (pp->pr_curpage == NULL)
924 pp->pr_curpage = ph;
925
926 if (++pp->pr_npages > pp->pr_hiwat)
927 pp->pr_hiwat = pp->pr_npages;
928
929 simple_unlock(&pp->pr_lock);
930 return (0);
931 }
932
933 /*
934 * Like pool_prime(), except this is used by pool_get() when nitems
935 * drops below the low water mark. This is used to catch up nitmes
936 * with the low water mark.
937 *
938 * Note 1, we never wait for memory or locks here, we let the caller
939 * decide what to do.
940 *
941 * Note 2, this doesn't work with static pools.
942 *
943 * Note 3, we must be called with the pool already locked, and we return
944 * with it locked.
945 */
946 static int
947 pool_catchup(pp)
948 struct pool *pp;
949 {
950 caddr_t cp;
951 int error = 0;
952 u_long nitems;
953
954 if (pp->pr_roflags & PR_STATIC) {
955 /*
956 * We dropped below the low water mark, and this is not a
957 * good thing. Log a warning.
958 */
959 printf("WARNING: static pool `%s' dropped below low water "
960 "mark\n", pp->pr_wchan);
961 return (0);
962 }
963
964 for (;;) {
965 /*
966 * Pool is locked; get the current number of items
967 * availabler.
968 */
969 nitems = pp->pr_nitems;
970
971 /* Acquire the resource lock and release the interlock. */
972 error = lockmgr(&pp->pr_resourcelock,
973 LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT, &pp->pr_lock);
974 if (error)
975 break;
976
977 if (nitems >= pp->pr_minitems) {
978 simple_lock(&pp->pr_lock);
979 (void) lockmgr(&pp->pr_resourcelock, LK_RELEASE,
980 NULL);
981 break;
982 }
983
984 /* Call the page back-end allocator for more memory. */
985 cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
986 if (cp == NULL) {
987 simple_lock(&pp->pr_lock);
988 (void) lockmgr(&pp->pr_resourcelock, LK_RELEASE,
989 NULL);
990 error = ENOMEM;
991 break;
992 }
993
994 pool_prime_page(pp, cp);
995
996 simple_lock(&pp->pr_lock);
997 (void) lockmgr(&pp->pr_resourcelock, LK_RELEASE,
998 NULL);
999 }
1000
1001 return (error);
1002 }
1003
1004 void
1005 pool_setlowat(pp, n)
1006 pool_handle_t pp;
1007 int n;
1008 {
1009 int error;
1010
1011 (void)lockmgr(&pp->pr_resourcelock, LK_EXCLUSIVE, NULL);
1012 pp->pr_minitems = n;
1013 pp->pr_minpages = (n == 0)
1014 ? 0
1015 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1016 (void)lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
1017
1018 /* Make sure we're caught up with the newly-set low water mark. */
1019 simple_lock(&pp->pr_lock);
1020 error = pool_catchup(pp);
1021 simple_unlock(&pp->pr_lock);
1022
1023 if (error) {
1024 /*
1025 * XXX: Should we log a warning? Should we set up a timeout
1026 * to try again in a second or so? The latter could break
1027 * a caller's assumptions about interrupt protection, etc.
1028 */
1029 }
1030 }
1031
1032 void
1033 pool_sethiwat(pp, n)
1034 pool_handle_t pp;
1035 int n;
1036 {
1037
1038 (void)lockmgr(&pp->pr_resourcelock, LK_EXCLUSIVE, NULL);
1039 pp->pr_maxpages = (n == 0)
1040 ? 0
1041 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1042 (void)lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
1043 }
1044
1045 void
1046 pool_sethardlimit(pp, n, warnmess, ratecap)
1047 pool_handle_t pp;
1048 int n;
1049 const char *warnmess;
1050 int ratecap;
1051 {
1052
1053 simple_lock(&pp->pr_lock);
1054
1055 pp->pr_hardlimit = n;
1056 pp->pr_hardlimit_warning = warnmess;
1057 pp->pr_hardlimit_ratecap = ratecap;
1058 memset(&pp->pr_hardlimit_warning_last, 0,
1059 sizeof(pp->pr_hardlimit_warning_last));
1060
1061 /*
1062 * In-line version of pool_sethiwat(), because we need to release
1063 * the interlock.
1064 */
1065 (void)lockmgr(&pp->pr_resourcelock, LK_EXCLUSIVE | LK_INTERLOCK,
1066 &pp->pr_lock);
1067 pp->pr_maxpages = (n == 0)
1068 ? 0
1069 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1070 (void)lockmgr(&pp->pr_resourcelock, LK_RELEASE, NULL);
1071 }
1072
1073 /*
1074 * Default page allocator.
1075 */
1076 static void *
1077 pool_page_alloc(sz, flags, mtype)
1078 unsigned long sz;
1079 int flags;
1080 int mtype;
1081 {
1082 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1083
1084 return ((void *)uvm_km_alloc_poolpage(waitok));
1085 }
1086
1087 static void
1088 pool_page_free(v, sz, mtype)
1089 void *v;
1090 unsigned long sz;
1091 int mtype;
1092 {
1093
1094 uvm_km_free_poolpage((vaddr_t)v);
1095 }
1096
1097 /*
1098 * Alternate pool page allocator for pools that know they will
1099 * never be accessed in interrupt context.
1100 */
1101 void *
1102 pool_page_alloc_nointr(sz, flags, mtype)
1103 unsigned long sz;
1104 int flags;
1105 int mtype;
1106 {
1107 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1108
1109 /*
1110 * With UVM, we can use the kernel_map.
1111 */
1112 return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
1113 waitok));
1114 }
1115
1116 void
1117 pool_page_free_nointr(v, sz, mtype)
1118 void *v;
1119 unsigned long sz;
1120 int mtype;
1121 {
1122
1123 uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
1124 }
1125
1126
1127 /*
1128 * Release all complete pages that have not been used recently.
1129 */
1130 void
1131 pool_reclaim (pp)
1132 pool_handle_t pp;
1133 {
1134 struct pool_item_header *ph, *phnext;
1135 struct timeval curtime = time;
1136
1137 if (pp->pr_roflags & PR_STATIC)
1138 return;
1139
1140 if (simple_lock_try(&pp->pr_lock) == 0)
1141 return;
1142
1143 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) {
1144 phnext = TAILQ_NEXT(ph, ph_pagelist);
1145
1146 /* Check our minimum page claim */
1147 if (pp->pr_npages <= pp->pr_minpages)
1148 break;
1149
1150 if (ph->ph_nmissing == 0) {
1151 struct timeval diff;
1152 timersub(&curtime, &ph->ph_time, &diff);
1153 if (diff.tv_sec < pool_inactive_time)
1154 continue;
1155 pr_rmpage(pp, ph);
1156 }
1157 }
1158
1159 simple_unlock(&pp->pr_lock);
1160 }
1161
1162
1163 /*
1164 * Drain pools, one at a time.
1165 */
1166 void
1167 pool_drain(arg)
1168 void *arg;
1169 {
1170 struct pool *pp;
1171 int s = splimp();
1172
1173 /* XXX:lock pool head */
1174 if (drainpp == NULL && (drainpp = TAILQ_FIRST(&pool_head)) == NULL) {
1175 splx(s);
1176 return;
1177 }
1178
1179 pp = drainpp;
1180 drainpp = TAILQ_NEXT(pp, pr_poollist);
1181 /* XXX:unlock pool head */
1182
1183 pool_reclaim(pp);
1184 splx(s);
1185 }
1186
1187
1188 #if defined(POOL_DIAGNOSTIC) || defined(DEBUG)
1189 /*
1190 * Diagnostic helpers.
1191 */
1192 void
1193 pool_print(pp, label)
1194 struct pool *pp;
1195 char *label;
1196 {
1197
1198 if (label != NULL)
1199 printf("%s: ", label);
1200
1201 printf("pool %s: nalloc %lu nfree %lu npagealloc %lu npagefree %lu\n"
1202 " npages %u minitems %u itemsperpage %u itemoffset %u\n"
1203 " nidle %lu\n",
1204 pp->pr_wchan,
1205 pp->pr_nget,
1206 pp->pr_nput,
1207 pp->pr_npagealloc,
1208 pp->pr_npagefree,
1209 pp->pr_npages,
1210 pp->pr_minitems,
1211 pp->pr_itemsperpage,
1212 pp->pr_itemoffset,
1213 pp->pr_nidle);
1214 }
1215
1216 int
1217 pool_chk(pp, label)
1218 struct pool *pp;
1219 char *label;
1220 {
1221 struct pool_item_header *ph;
1222 int r = 0;
1223
1224 simple_lock(&pp->pr_lock);
1225
1226 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
1227 ph = TAILQ_NEXT(ph, ph_pagelist)) {
1228
1229 struct pool_item *pi;
1230 int n;
1231 caddr_t page;
1232
1233 page = (caddr_t)((u_long)ph & pp->pr_pagemask);
1234 if (page != ph->ph_page &&
1235 (pp->pr_roflags & PR_PHINPAGE) != 0) {
1236 if (label != NULL)
1237 printf("%s: ", label);
1238 printf("pool(%p:%s): page inconsistency: page %p;"
1239 " at page head addr %p (p %p)\n", pp,
1240 pp->pr_wchan, ph->ph_page,
1241 ph, page);
1242 r++;
1243 goto out;
1244 }
1245
1246 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1247 pi != NULL;
1248 pi = TAILQ_NEXT(pi,pi_list), n++) {
1249
1250 #ifdef DIAGNOSTIC
1251 if (pi->pi_magic != PI_MAGIC) {
1252 if (label != NULL)
1253 printf("%s: ", label);
1254 printf("pool(%s): free list modified: magic=%x;"
1255 " page %p; item ordinal %d;"
1256 " addr %p (p %p)\n",
1257 pp->pr_wchan, pi->pi_magic, ph->ph_page,
1258 n, pi, page);
1259 panic("pool");
1260 }
1261 #endif
1262 page = (caddr_t)((u_long)pi & pp->pr_pagemask);
1263 if (page == ph->ph_page)
1264 continue;
1265
1266 if (label != NULL)
1267 printf("%s: ", label);
1268 printf("pool(%p:%s): page inconsistency: page %p;"
1269 " item ordinal %d; addr %p (p %p)\n", pp,
1270 pp->pr_wchan, ph->ph_page,
1271 n, pi, page);
1272 r++;
1273 goto out;
1274 }
1275 }
1276 out:
1277 simple_unlock(&pp->pr_lock);
1278 return (r);
1279 }
1280 #endif /* POOL_DIAGNOSTIC || DEBUG */
1281