subr_pool.c revision 1.63 1 /* $NetBSD: subr_pool.c,v 1.63 2001/10/21 00:06:05 chs Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
9 * Simulation Facility, NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #include "opt_pool.h"
41 #include "opt_poollog.h"
42 #include "opt_lockdebug.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/errno.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/lock.h>
51 #include <sys/pool.h>
52 #include <sys/syslog.h>
53
54 #include <uvm/uvm.h>
55
56 /*
57 * Pool resource management utility.
58 *
59 * Memory is allocated in pages which are split into pieces according
60 * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
61 * in the pool structure and the individual pool items are on a linked list
62 * headed by `ph_itemlist' in each page header. The memory for building
63 * the page list is either taken from the allocated pages themselves (for
64 * small pool items) or taken from an internal pool of page headers (`phpool').
65 */
66
67 /* List of all pools */
68 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
69
70 /* Private pool for page header structures */
71 static struct pool phpool;
72
73 #ifdef POOL_SUBPAGE
74 /* Pool of subpages for use by normal pools. */
75 static struct pool psppool;
76 #endif
77
78 /* # of seconds to retain page after last use */
79 int pool_inactive_time = 10;
80
81 /* Next candidate for drainage (see pool_drain()) */
82 static struct pool *drainpp;
83
84 /* This spin lock protects both pool_head and drainpp. */
85 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
86
87 struct pool_item_header {
88 /* Page headers */
89 TAILQ_ENTRY(pool_item_header)
90 ph_pagelist; /* pool page list */
91 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */
92 LIST_ENTRY(pool_item_header)
93 ph_hashlist; /* Off-page page headers */
94 int ph_nmissing; /* # of chunks in use */
95 caddr_t ph_page; /* this page's address */
96 struct timeval ph_time; /* last referenced */
97 };
98 TAILQ_HEAD(pool_pagelist,pool_item_header);
99
100 struct pool_item {
101 #ifdef DIAGNOSTIC
102 int pi_magic;
103 #endif
104 #define PI_MAGIC 0xdeadbeef
105 /* Other entries use only this list entry */
106 TAILQ_ENTRY(pool_item) pi_list;
107 };
108
109 #define PR_HASH_INDEX(pp,addr) \
110 (((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1))
111
112 #define POOL_NEEDS_CATCHUP(pp) \
113 ((pp)->pr_nitems < (pp)->pr_minitems)
114
115 /*
116 * Pool cache management.
117 *
118 * Pool caches provide a way for constructed objects to be cached by the
119 * pool subsystem. This can lead to performance improvements by avoiding
120 * needless object construction/destruction; it is deferred until absolutely
121 * necessary.
122 *
123 * Caches are grouped into cache groups. Each cache group references
124 * up to 16 constructed objects. When a cache allocates an object
125 * from the pool, it calls the object's constructor and places it into
126 * a cache group. When a cache group frees an object back to the pool,
127 * it first calls the object's destructor. This allows the object to
128 * persist in constructed form while freed to the cache.
129 *
130 * Multiple caches may exist for each pool. This allows a single
131 * object type to have multiple constructed forms. The pool references
132 * each cache, so that when a pool is drained by the pagedaemon, it can
133 * drain each individual cache as well. Each time a cache is drained,
134 * the most idle cache group is freed to the pool in its entirety.
135 *
136 * Pool caches are layed on top of pools. By layering them, we can avoid
137 * the complexity of cache management for pools which would not benefit
138 * from it.
139 */
140
141 /* The cache group pool. */
142 static struct pool pcgpool;
143
144 /* The pool cache group. */
145 #define PCG_NOBJECTS 16
146 struct pool_cache_group {
147 TAILQ_ENTRY(pool_cache_group)
148 pcg_list; /* link in the pool cache's group list */
149 u_int pcg_avail; /* # available objects */
150 /* pointers to the objects */
151 void *pcg_objects[PCG_NOBJECTS];
152 };
153
154 static void pool_cache_reclaim(struct pool_cache *);
155
156 static int pool_catchup(struct pool *);
157 static void pool_prime_page(struct pool *, caddr_t,
158 struct pool_item_header *);
159 static void *pool_page_alloc(unsigned long, int, int);
160 static void pool_page_free(void *, unsigned long, int);
161 #ifdef POOL_SUBPAGE
162 static void *pool_subpage_alloc(unsigned long, int, int);
163 static void pool_subpage_free(void *, unsigned long, int);
164 #endif
165
166 static void pool_print1(struct pool *, const char *,
167 void (*)(const char *, ...));
168
169 /*
170 * Pool log entry. An array of these is allocated in pool_init().
171 */
172 struct pool_log {
173 const char *pl_file;
174 long pl_line;
175 int pl_action;
176 #define PRLOG_GET 1
177 #define PRLOG_PUT 2
178 void *pl_addr;
179 };
180
181 /* Number of entries in pool log buffers */
182 #ifndef POOL_LOGSIZE
183 #define POOL_LOGSIZE 10
184 #endif
185
186 int pool_logsize = POOL_LOGSIZE;
187
188 #ifdef POOL_DIAGNOSTIC
189 static __inline void
190 pr_log(struct pool *pp, void *v, int action, const char *file, long line)
191 {
192 int n = pp->pr_curlogentry;
193 struct pool_log *pl;
194
195 if ((pp->pr_roflags & PR_LOGGING) == 0)
196 return;
197
198 /*
199 * Fill in the current entry. Wrap around and overwrite
200 * the oldest entry if necessary.
201 */
202 pl = &pp->pr_log[n];
203 pl->pl_file = file;
204 pl->pl_line = line;
205 pl->pl_action = action;
206 pl->pl_addr = v;
207 if (++n >= pp->pr_logsize)
208 n = 0;
209 pp->pr_curlogentry = n;
210 }
211
212 static void
213 pr_printlog(struct pool *pp, struct pool_item *pi,
214 void (*pr)(const char *, ...))
215 {
216 int i = pp->pr_logsize;
217 int n = pp->pr_curlogentry;
218
219 if ((pp->pr_roflags & PR_LOGGING) == 0)
220 return;
221
222 /*
223 * Print all entries in this pool's log.
224 */
225 while (i-- > 0) {
226 struct pool_log *pl = &pp->pr_log[n];
227 if (pl->pl_action != 0) {
228 if (pi == NULL || pi == pl->pl_addr) {
229 (*pr)("\tlog entry %d:\n", i);
230 (*pr)("\t\taction = %s, addr = %p\n",
231 pl->pl_action == PRLOG_GET ? "get" : "put",
232 pl->pl_addr);
233 (*pr)("\t\tfile: %s at line %lu\n",
234 pl->pl_file, pl->pl_line);
235 }
236 }
237 if (++n >= pp->pr_logsize)
238 n = 0;
239 }
240 }
241
242 static __inline void
243 pr_enter(struct pool *pp, const char *file, long line)
244 {
245
246 if (__predict_false(pp->pr_entered_file != NULL)) {
247 printf("pool %s: reentrancy at file %s line %ld\n",
248 pp->pr_wchan, file, line);
249 printf(" previous entry at file %s line %ld\n",
250 pp->pr_entered_file, pp->pr_entered_line);
251 panic("pr_enter");
252 }
253
254 pp->pr_entered_file = file;
255 pp->pr_entered_line = line;
256 }
257
258 static __inline void
259 pr_leave(struct pool *pp)
260 {
261
262 if (__predict_false(pp->pr_entered_file == NULL)) {
263 printf("pool %s not entered?\n", pp->pr_wchan);
264 panic("pr_leave");
265 }
266
267 pp->pr_entered_file = NULL;
268 pp->pr_entered_line = 0;
269 }
270
271 static __inline void
272 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...))
273 {
274
275 if (pp->pr_entered_file != NULL)
276 (*pr)("\n\tcurrently entered from file %s line %ld\n",
277 pp->pr_entered_file, pp->pr_entered_line);
278 }
279 #else
280 #define pr_log(pp, v, action, file, line)
281 #define pr_printlog(pp, pi, pr)
282 #define pr_enter(pp, file, line)
283 #define pr_leave(pp)
284 #define pr_enter_check(pp, pr)
285 #endif /* POOL_DIAGNOSTIC */
286
287 /*
288 * Return the pool page header based on page address.
289 */
290 static __inline struct pool_item_header *
291 pr_find_pagehead(struct pool *pp, caddr_t page)
292 {
293 struct pool_item_header *ph;
294
295 if ((pp->pr_roflags & PR_PHINPAGE) != 0)
296 return ((struct pool_item_header *)(page + pp->pr_phoffset));
297
298 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
299 ph != NULL;
300 ph = LIST_NEXT(ph, ph_hashlist)) {
301 if (ph->ph_page == page)
302 return (ph);
303 }
304 return (NULL);
305 }
306
307 /*
308 * Remove a page from the pool.
309 */
310 static __inline void
311 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
312 struct pool_pagelist *pq)
313 {
314 int s;
315
316 /*
317 * If the page was idle, decrement the idle page count.
318 */
319 if (ph->ph_nmissing == 0) {
320 #ifdef DIAGNOSTIC
321 if (pp->pr_nidle == 0)
322 panic("pr_rmpage: nidle inconsistent");
323 if (pp->pr_nitems < pp->pr_itemsperpage)
324 panic("pr_rmpage: nitems inconsistent");
325 #endif
326 pp->pr_nidle--;
327 }
328
329 pp->pr_nitems -= pp->pr_itemsperpage;
330
331 /*
332 * Unlink a page from the pool and release it (or queue it for release).
333 */
334 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
335 if (pq) {
336 TAILQ_INSERT_HEAD(pq, ph, ph_pagelist);
337 } else {
338 (*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype);
339 if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
340 LIST_REMOVE(ph, ph_hashlist);
341 s = splhigh();
342 pool_put(&phpool, ph);
343 splx(s);
344 }
345 }
346 pp->pr_npages--;
347 pp->pr_npagefree++;
348
349 if (pp->pr_curpage == ph) {
350 /*
351 * Find a new non-empty page header, if any.
352 * Start search from the page head, to increase the
353 * chance for "high water" pages to be freed.
354 */
355 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist)
356 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
357 break;
358
359 pp->pr_curpage = ph;
360 }
361 }
362
363 /*
364 * Initialize the given pool resource structure.
365 *
366 * We export this routine to allow other kernel parts to declare
367 * static pools that must be initialized before malloc() is available.
368 */
369 void
370 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
371 const char *wchan, size_t pagesz,
372 void *(*alloc)(unsigned long, int, int),
373 void (*release)(void *, unsigned long, int),
374 int mtype)
375 {
376 int off, slack, i;
377
378 #ifdef POOL_DIAGNOSTIC
379 /*
380 * Always log if POOL_DIAGNOSTIC is defined.
381 */
382 if (pool_logsize != 0)
383 flags |= PR_LOGGING;
384 #endif
385
386 /*
387 * Check arguments and construct default values.
388 */
389 if (!powerof2(pagesz))
390 panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);
391
392 if (alloc == NULL && release == NULL) {
393 #ifdef POOL_SUBPAGE
394 alloc = pool_subpage_alloc;
395 release = pool_subpage_free;
396 pagesz = POOL_SUBPAGE;
397 #else
398 alloc = pool_page_alloc;
399 release = pool_page_free;
400 pagesz = PAGE_SIZE; /* Rounds to PAGE_SIZE anyhow. */
401 #endif
402 } else if ((alloc != NULL && release != NULL) == 0) {
403 /* If you specifiy one, must specify both. */
404 panic("pool_init: must specify alloc and release together");
405 }
406 #ifdef POOL_SUBPAGE
407 else if (alloc == pool_page_alloc_nointr &&
408 release == pool_page_free_nointr)
409 pagesz = POOL_SUBPAGE;
410 #endif
411
412 if (pagesz == 0)
413 pagesz = PAGE_SIZE;
414
415 if (align == 0)
416 align = ALIGN(1);
417
418 if (size < sizeof(struct pool_item))
419 size = sizeof(struct pool_item);
420
421 size = ALIGN(size);
422 if (size > pagesz)
423 panic("pool_init: pool item size (%lu) too large",
424 (u_long)size);
425
426 /*
427 * Initialize the pool structure.
428 */
429 TAILQ_INIT(&pp->pr_pagelist);
430 TAILQ_INIT(&pp->pr_cachelist);
431 pp->pr_curpage = NULL;
432 pp->pr_npages = 0;
433 pp->pr_minitems = 0;
434 pp->pr_minpages = 0;
435 pp->pr_maxpages = UINT_MAX;
436 pp->pr_roflags = flags;
437 pp->pr_flags = 0;
438 pp->pr_size = size;
439 pp->pr_align = align;
440 pp->pr_wchan = wchan;
441 pp->pr_mtype = mtype;
442 pp->pr_alloc = alloc;
443 pp->pr_free = release;
444 pp->pr_pagesz = pagesz;
445 pp->pr_pagemask = ~(pagesz - 1);
446 pp->pr_pageshift = ffs(pagesz) - 1;
447 pp->pr_nitems = 0;
448 pp->pr_nout = 0;
449 pp->pr_hardlimit = UINT_MAX;
450 pp->pr_hardlimit_warning = NULL;
451 pp->pr_hardlimit_ratecap.tv_sec = 0;
452 pp->pr_hardlimit_ratecap.tv_usec = 0;
453 pp->pr_hardlimit_warning_last.tv_sec = 0;
454 pp->pr_hardlimit_warning_last.tv_usec = 0;
455
456 /*
457 * Decide whether to put the page header off page to avoid
458 * wasting too large a part of the page. Off-page page headers
459 * go on a hash table, so we can match a returned item
460 * with its header based on the page address.
461 * We use 1/16 of the page size as the threshold (XXX: tune)
462 */
463 if (pp->pr_size < pagesz/16) {
464 /* Use the end of the page for the page header */
465 pp->pr_roflags |= PR_PHINPAGE;
466 pp->pr_phoffset = off =
467 pagesz - ALIGN(sizeof(struct pool_item_header));
468 } else {
469 /* The page header will be taken from our page header pool */
470 pp->pr_phoffset = 0;
471 off = pagesz;
472 for (i = 0; i < PR_HASHTABSIZE; i++) {
473 LIST_INIT(&pp->pr_hashtab[i]);
474 }
475 }
476
477 /*
478 * Alignment is to take place at `ioff' within the item. This means
479 * we must reserve up to `align - 1' bytes on the page to allow
480 * appropriate positioning of each item.
481 *
482 * Silently enforce `0 <= ioff < align'.
483 */
484 pp->pr_itemoffset = ioff = ioff % align;
485 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
486 KASSERT(pp->pr_itemsperpage != 0);
487
488 /*
489 * Use the slack between the chunks and the page header
490 * for "cache coloring".
491 */
492 slack = off - pp->pr_itemsperpage * pp->pr_size;
493 pp->pr_maxcolor = (slack / align) * align;
494 pp->pr_curcolor = 0;
495
496 pp->pr_nget = 0;
497 pp->pr_nfail = 0;
498 pp->pr_nput = 0;
499 pp->pr_npagealloc = 0;
500 pp->pr_npagefree = 0;
501 pp->pr_hiwat = 0;
502 pp->pr_nidle = 0;
503
504 #ifdef POOL_DIAGNOSTIC
505 if (flags & PR_LOGGING) {
506 if (kmem_map == NULL ||
507 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
508 M_TEMP, M_NOWAIT)) == NULL)
509 pp->pr_roflags &= ~PR_LOGGING;
510 pp->pr_curlogentry = 0;
511 pp->pr_logsize = pool_logsize;
512 }
513 #endif
514
515 pp->pr_entered_file = NULL;
516 pp->pr_entered_line = 0;
517
518 simple_lock_init(&pp->pr_slock);
519
520 /*
521 * Initialize private page header pool and cache magazine pool if we
522 * haven't done so yet.
523 * XXX LOCKING.
524 */
525 if (phpool.pr_size == 0) {
526 #ifdef POOL_SUBPAGE
527 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 0,
528 "phpool", PAGE_SIZE, pool_page_alloc, pool_page_free, 0);
529 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0,
530 PR_RECURSIVE, "psppool", PAGE_SIZE,
531 pool_page_alloc, pool_page_free, 0);
532 #else
533 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
534 0, "phpool", 0, 0, 0, 0);
535 #endif
536 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
537 0, "pcgpool", 0, 0, 0, 0);
538 }
539
540 /* Insert into the list of all pools. */
541 simple_lock(&pool_head_slock);
542 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
543 simple_unlock(&pool_head_slock);
544 }
545
546 /*
547 * De-commision a pool resource.
548 */
549 void
550 pool_destroy(struct pool *pp)
551 {
552 struct pool_item_header *ph;
553 struct pool_cache *pc;
554
555 /* Destroy all caches for this pool. */
556 while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL)
557 pool_cache_destroy(pc);
558
559 #ifdef DIAGNOSTIC
560 if (pp->pr_nout != 0) {
561 pr_printlog(pp, NULL, printf);
562 panic("pool_destroy: pool busy: still out: %u\n",
563 pp->pr_nout);
564 }
565 #endif
566
567 /* Remove all pages */
568 if ((pp->pr_roflags & PR_STATIC) == 0)
569 while ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL)
570 pr_rmpage(pp, ph, NULL);
571
572 /* Remove from global pool list */
573 simple_lock(&pool_head_slock);
574 TAILQ_REMOVE(&pool_head, pp, pr_poollist);
575 if (drainpp == pp) {
576 drainpp = NULL;
577 }
578 simple_unlock(&pool_head_slock);
579
580 #ifdef POOL_DIAGNOSTIC
581 if ((pp->pr_roflags & PR_LOGGING) != 0)
582 free(pp->pr_log, M_TEMP);
583 #endif
584
585 if (pp->pr_roflags & PR_FREEHEADER)
586 free(pp, M_POOL);
587 }
588
589 static __inline struct pool_item_header *
590 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
591 {
592 struct pool_item_header *ph;
593 int s;
594
595 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0);
596
597 if ((pp->pr_roflags & PR_PHINPAGE) != 0)
598 ph = (struct pool_item_header *) (storage + pp->pr_phoffset);
599 else {
600 s = splhigh();
601 ph = pool_get(&phpool, flags);
602 splx(s);
603 }
604
605 return (ph);
606 }
607
608 /*
609 * Grab an item from the pool; must be called at appropriate spl level
610 */
611 void *
612 #ifdef POOL_DIAGNOSTIC
613 _pool_get(struct pool *pp, int flags, const char *file, long line)
614 #else
615 pool_get(struct pool *pp, int flags)
616 #endif
617 {
618 struct pool_item *pi;
619 struct pool_item_header *ph;
620 void *v;
621
622 #ifdef DIAGNOSTIC
623 if (__predict_false((pp->pr_roflags & PR_STATIC) &&
624 (flags & PR_MALLOCOK))) {
625 pr_printlog(pp, NULL, printf);
626 panic("pool_get: static");
627 }
628
629 if (__predict_false(curproc == NULL && doing_shutdown == 0 &&
630 (flags & PR_WAITOK) != 0))
631 panic("pool_get: must have NOWAIT");
632
633 #ifdef LOCKDEBUG
634 if (flags & PR_WAITOK)
635 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)");
636 #endif
637 #endif /* DIAGNOSTIC */
638
639 simple_lock(&pp->pr_slock);
640 pr_enter(pp, file, line);
641
642 startover:
643 /*
644 * Check to see if we've reached the hard limit. If we have,
645 * and we can wait, then wait until an item has been returned to
646 * the pool.
647 */
648 #ifdef DIAGNOSTIC
649 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
650 pr_leave(pp);
651 simple_unlock(&pp->pr_slock);
652 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
653 }
654 #endif
655 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
656 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
657 /*
658 * XXX: A warning isn't logged in this case. Should
659 * it be?
660 */
661 pp->pr_flags |= PR_WANTED;
662 pr_leave(pp);
663 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
664 pr_enter(pp, file, line);
665 goto startover;
666 }
667
668 /*
669 * Log a message that the hard limit has been hit.
670 */
671 if (pp->pr_hardlimit_warning != NULL &&
672 ratecheck(&pp->pr_hardlimit_warning_last,
673 &pp->pr_hardlimit_ratecap))
674 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
675
676 if (flags & PR_URGENT)
677 panic("pool_get: urgent");
678
679 pp->pr_nfail++;
680
681 pr_leave(pp);
682 simple_unlock(&pp->pr_slock);
683 return (NULL);
684 }
685
686 /*
687 * The convention we use is that if `curpage' is not NULL, then
688 * it points at a non-empty bucket. In particular, `curpage'
689 * never points at a page header which has PR_PHINPAGE set and
690 * has no items in its bucket.
691 */
692 if ((ph = pp->pr_curpage) == NULL) {
693 #ifdef DIAGNOSTIC
694 if (pp->pr_nitems != 0) {
695 simple_unlock(&pp->pr_slock);
696 printf("pool_get: %s: curpage NULL, nitems %u\n",
697 pp->pr_wchan, pp->pr_nitems);
698 panic("pool_get: nitems inconsistent\n");
699 }
700 #endif
701
702 /*
703 * Call the back-end page allocator for more memory.
704 * Release the pool lock, as the back-end page allocator
705 * may block.
706 */
707 pr_leave(pp);
708 simple_unlock(&pp->pr_slock);
709 v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
710 if (__predict_true(v != NULL))
711 ph = pool_alloc_item_header(pp, v, flags);
712 simple_lock(&pp->pr_slock);
713 pr_enter(pp, file, line);
714
715 if (__predict_false(v == NULL || ph == NULL)) {
716 if (v != NULL)
717 (*pp->pr_free)(v, pp->pr_pagesz, pp->pr_mtype);
718
719 /*
720 * We were unable to allocate a page or item
721 * header, but we released the lock during
722 * allocation, so perhaps items were freed
723 * back to the pool. Check for this case.
724 */
725 if (pp->pr_curpage != NULL)
726 goto startover;
727
728 if (flags & PR_URGENT)
729 panic("pool_get: urgent");
730
731 if ((flags & PR_WAITOK) == 0) {
732 pp->pr_nfail++;
733 pr_leave(pp);
734 simple_unlock(&pp->pr_slock);
735 return (NULL);
736 }
737
738 /*
739 * Wait for items to be returned to this pool.
740 *
741 * XXX: we actually want to wait just until
742 * the page allocator has memory again. Depending
743 * on this pool's usage, we might get stuck here
744 * for a long time.
745 *
746 * XXX: maybe we should wake up once a second and
747 * try again?
748 */
749 pp->pr_flags |= PR_WANTED;
750 pr_leave(pp);
751 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
752 pr_enter(pp, file, line);
753 goto startover;
754 }
755
756 /* We have more memory; add it to the pool */
757 pool_prime_page(pp, v, ph);
758 pp->pr_npagealloc++;
759
760 /* Start the allocation process over. */
761 goto startover;
762 }
763
764 if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
765 pr_leave(pp);
766 simple_unlock(&pp->pr_slock);
767 panic("pool_get: %s: page empty", pp->pr_wchan);
768 }
769 #ifdef DIAGNOSTIC
770 if (__predict_false(pp->pr_nitems == 0)) {
771 pr_leave(pp);
772 simple_unlock(&pp->pr_slock);
773 printf("pool_get: %s: items on itemlist, nitems %u\n",
774 pp->pr_wchan, pp->pr_nitems);
775 panic("pool_get: nitems inconsistent\n");
776 }
777
778 pr_log(pp, v, PRLOG_GET, file, line);
779
780 if (__predict_false(pi->pi_magic != PI_MAGIC)) {
781 pr_printlog(pp, pi, printf);
782 panic("pool_get(%s): free list modified: magic=%x; page %p;"
783 " item addr %p\n",
784 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
785 }
786 #endif
787
788 /*
789 * Remove from item list.
790 */
791 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
792 pp->pr_nitems--;
793 pp->pr_nout++;
794 if (ph->ph_nmissing == 0) {
795 #ifdef DIAGNOSTIC
796 if (__predict_false(pp->pr_nidle == 0))
797 panic("pool_get: nidle inconsistent");
798 #endif
799 pp->pr_nidle--;
800 }
801 ph->ph_nmissing++;
802 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
803 #ifdef DIAGNOSTIC
804 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
805 pr_leave(pp);
806 simple_unlock(&pp->pr_slock);
807 panic("pool_get: %s: nmissing inconsistent",
808 pp->pr_wchan);
809 }
810 #endif
811 /*
812 * Find a new non-empty page header, if any.
813 * Start search from the page head, to increase
814 * the chance for "high water" pages to be freed.
815 *
816 * Migrate empty pages to the end of the list. This
817 * will speed the update of curpage as pages become
818 * idle. Empty pages intermingled with idle pages
819 * is no big deal. As soon as a page becomes un-empty,
820 * it will move back to the head of the list.
821 */
822 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
823 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
824 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist)
825 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
826 break;
827
828 pp->pr_curpage = ph;
829 }
830
831 pp->pr_nget++;
832
833 /*
834 * If we have a low water mark and we are now below that low
835 * water mark, add more items to the pool.
836 */
837 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
838 /*
839 * XXX: Should we log a warning? Should we set up a timeout
840 * to try again in a second or so? The latter could break
841 * a caller's assumptions about interrupt protection, etc.
842 */
843 }
844
845 pr_leave(pp);
846 simple_unlock(&pp->pr_slock);
847 return (v);
848 }
849
850 /*
851 * Internal version of pool_put(). Pool is already locked/entered.
852 */
853 static void
854 pool_do_put(struct pool *pp, void *v)
855 {
856 struct pool_item *pi = v;
857 struct pool_item_header *ph;
858 caddr_t page;
859 int s;
860
861 LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
862
863 page = (caddr_t)((u_long)v & pp->pr_pagemask);
864
865 #ifdef DIAGNOSTIC
866 if (__predict_false(pp->pr_nout == 0)) {
867 printf("pool %s: putting with none out\n",
868 pp->pr_wchan);
869 panic("pool_put");
870 }
871 #endif
872
873 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) {
874 pr_printlog(pp, NULL, printf);
875 panic("pool_put: %s: page header missing", pp->pr_wchan);
876 }
877
878 #ifdef LOCKDEBUG
879 /*
880 * Check if we're freeing a locked simple lock.
881 */
882 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size);
883 #endif
884
885 /*
886 * Return to item list.
887 */
888 #ifdef DIAGNOSTIC
889 pi->pi_magic = PI_MAGIC;
890 #endif
891 #ifdef DEBUG
892 {
893 int i, *ip = v;
894
895 for (i = 0; i < pp->pr_size / sizeof(int); i++) {
896 *ip++ = PI_MAGIC;
897 }
898 }
899 #endif
900
901 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
902 ph->ph_nmissing--;
903 pp->pr_nput++;
904 pp->pr_nitems++;
905 pp->pr_nout--;
906
907 /* Cancel "pool empty" condition if it exists */
908 if (pp->pr_curpage == NULL)
909 pp->pr_curpage = ph;
910
911 if (pp->pr_flags & PR_WANTED) {
912 pp->pr_flags &= ~PR_WANTED;
913 if (ph->ph_nmissing == 0)
914 pp->pr_nidle++;
915 wakeup((caddr_t)pp);
916 return;
917 }
918
919 /*
920 * If this page is now complete, do one of two things:
921 *
922 * (1) If we have more pages than the page high water
923 * mark, free the page back to the system.
924 *
925 * (2) Move it to the end of the page list, so that
926 * we minimize our chances of fragmenting the
927 * pool. Idle pages migrate to the end (along with
928 * completely empty pages, so that we find un-empty
929 * pages more quickly when we update curpage) of the
930 * list so they can be more easily swept up by
931 * the pagedaemon when pages are scarce.
932 */
933 if (ph->ph_nmissing == 0) {
934 pp->pr_nidle++;
935 if (pp->pr_npages > pp->pr_maxpages) {
936 pr_rmpage(pp, ph, NULL);
937 } else {
938 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
939 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
940
941 /*
942 * Update the timestamp on the page. A page must
943 * be idle for some period of time before it can
944 * be reclaimed by the pagedaemon. This minimizes
945 * ping-pong'ing for memory.
946 */
947 s = splclock();
948 ph->ph_time = mono_time;
949 splx(s);
950
951 /*
952 * Update the current page pointer. Just look for
953 * the first page with any free items.
954 *
955 * XXX: Maybe we want an option to look for the
956 * page with the fewest available items, to minimize
957 * fragmentation?
958 */
959 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist)
960 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
961 break;
962
963 pp->pr_curpage = ph;
964 }
965 }
966 /*
967 * If the page has just become un-empty, move it to the head of
968 * the list, and make it the current page. The next allocation
969 * will get the item from this page, instead of further fragmenting
970 * the pool.
971 */
972 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
973 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
974 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
975 pp->pr_curpage = ph;
976 }
977 }
978
979 /*
980 * Return resource to the pool; must be called at appropriate spl level
981 */
982 #ifdef POOL_DIAGNOSTIC
983 void
984 _pool_put(struct pool *pp, void *v, const char *file, long line)
985 {
986
987 simple_lock(&pp->pr_slock);
988 pr_enter(pp, file, line);
989
990 pr_log(pp, v, PRLOG_PUT, file, line);
991
992 pool_do_put(pp, v);
993
994 pr_leave(pp);
995 simple_unlock(&pp->pr_slock);
996 }
997 #undef pool_put
998 #endif /* POOL_DIAGNOSTIC */
999
1000 void
1001 pool_put(struct pool *pp, void *v)
1002 {
1003
1004 simple_lock(&pp->pr_slock);
1005
1006 pool_do_put(pp, v);
1007
1008 simple_unlock(&pp->pr_slock);
1009 }
1010
1011 #ifdef POOL_DIAGNOSTIC
1012 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__)
1013 #endif
1014
1015 /*
1016 * Add N items to the pool.
1017 */
1018 int
1019 pool_prime(struct pool *pp, int n)
1020 {
1021 struct pool_item_header *ph;
1022 caddr_t cp;
1023 int newpages, error = 0;
1024
1025 simple_lock(&pp->pr_slock);
1026
1027 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1028
1029 while (newpages-- > 0) {
1030 simple_unlock(&pp->pr_slock);
1031 cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype);
1032 if (__predict_true(cp != NULL))
1033 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1034 simple_lock(&pp->pr_slock);
1035
1036 if (__predict_false(cp == NULL || ph == NULL)) {
1037 error = ENOMEM;
1038 if (cp != NULL)
1039 (*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype);
1040 break;
1041 }
1042
1043 pool_prime_page(pp, cp, ph);
1044 pp->pr_npagealloc++;
1045 pp->pr_minpages++;
1046 }
1047
1048 if (pp->pr_minpages >= pp->pr_maxpages)
1049 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */
1050
1051 simple_unlock(&pp->pr_slock);
1052 return (0);
1053 }
1054
1055 /*
1056 * Add a page worth of items to the pool.
1057 *
1058 * Note, we must be called with the pool descriptor LOCKED.
1059 */
1060 static void
1061 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
1062 {
1063 struct pool_item *pi;
1064 caddr_t cp = storage;
1065 unsigned int align = pp->pr_align;
1066 unsigned int ioff = pp->pr_itemoffset;
1067 int n;
1068
1069 if (((u_long)cp & (pp->pr_pagesz - 1)) != 0)
1070 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
1071
1072 if ((pp->pr_roflags & PR_PHINPAGE) == 0)
1073 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)],
1074 ph, ph_hashlist);
1075
1076 /*
1077 * Insert page header.
1078 */
1079 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
1080 TAILQ_INIT(&ph->ph_itemlist);
1081 ph->ph_page = storage;
1082 ph->ph_nmissing = 0;
1083 memset(&ph->ph_time, 0, sizeof(ph->ph_time));
1084
1085 pp->pr_nidle++;
1086
1087 /*
1088 * Color this page.
1089 */
1090 cp = (caddr_t)(cp + pp->pr_curcolor);
1091 if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
1092 pp->pr_curcolor = 0;
1093
1094 /*
1095 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
1096 */
1097 if (ioff != 0)
1098 cp = (caddr_t)(cp + (align - ioff));
1099
1100 /*
1101 * Insert remaining chunks on the bucket list.
1102 */
1103 n = pp->pr_itemsperpage;
1104 pp->pr_nitems += n;
1105
1106 while (n--) {
1107 pi = (struct pool_item *)cp;
1108
1109 /* Insert on page list */
1110 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
1111 #ifdef DIAGNOSTIC
1112 pi->pi_magic = PI_MAGIC;
1113 #endif
1114 cp = (caddr_t)(cp + pp->pr_size);
1115 }
1116
1117 /*
1118 * If the pool was depleted, point at the new page.
1119 */
1120 if (pp->pr_curpage == NULL)
1121 pp->pr_curpage = ph;
1122
1123 if (++pp->pr_npages > pp->pr_hiwat)
1124 pp->pr_hiwat = pp->pr_npages;
1125 }
1126
1127 /*
1128 * Used by pool_get() when nitems drops below the low water mark. This
1129 * is used to catch up nitmes with the low water mark.
1130 *
1131 * Note 1, we never wait for memory here, we let the caller decide what to do.
1132 *
1133 * Note 2, this doesn't work with static pools.
1134 *
1135 * Note 3, we must be called with the pool already locked, and we return
1136 * with it locked.
1137 */
1138 static int
1139 pool_catchup(struct pool *pp)
1140 {
1141 struct pool_item_header *ph;
1142 caddr_t cp;
1143 int error = 0;
1144
1145 if (pp->pr_roflags & PR_STATIC) {
1146 /*
1147 * We dropped below the low water mark, and this is not a
1148 * good thing. Log a warning.
1149 *
1150 * XXX: rate-limit this?
1151 */
1152 printf("WARNING: static pool `%s' dropped below low water "
1153 "mark\n", pp->pr_wchan);
1154 return (0);
1155 }
1156
1157 while (POOL_NEEDS_CATCHUP(pp)) {
1158 /*
1159 * Call the page back-end allocator for more memory.
1160 *
1161 * XXX: We never wait, so should we bother unlocking
1162 * the pool descriptor?
1163 */
1164 simple_unlock(&pp->pr_slock);
1165 cp = (*pp->pr_alloc)(pp->pr_pagesz, PR_NOWAIT, pp->pr_mtype);
1166 if (__predict_true(cp != NULL))
1167 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
1168 simple_lock(&pp->pr_slock);
1169 if (__predict_false(cp == NULL || ph == NULL)) {
1170 if (cp != NULL)
1171 (*pp->pr_free)(cp, pp->pr_pagesz, pp->pr_mtype);
1172 error = ENOMEM;
1173 break;
1174 }
1175 pool_prime_page(pp, cp, ph);
1176 pp->pr_npagealloc++;
1177 }
1178
1179 return (error);
1180 }
1181
1182 void
1183 pool_setlowat(struct pool *pp, int n)
1184 {
1185 int error;
1186
1187 simple_lock(&pp->pr_slock);
1188
1189 pp->pr_minitems = n;
1190 pp->pr_minpages = (n == 0)
1191 ? 0
1192 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1193
1194 /* Make sure we're caught up with the newly-set low water mark. */
1195 if (POOL_NEEDS_CATCHUP(pp) && (error = pool_catchup(pp) != 0)) {
1196 /*
1197 * XXX: Should we log a warning? Should we set up a timeout
1198 * to try again in a second or so? The latter could break
1199 * a caller's assumptions about interrupt protection, etc.
1200 */
1201 }
1202
1203 simple_unlock(&pp->pr_slock);
1204 }
1205
1206 void
1207 pool_sethiwat(struct pool *pp, int n)
1208 {
1209
1210 simple_lock(&pp->pr_slock);
1211
1212 pp->pr_maxpages = (n == 0)
1213 ? 0
1214 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1215
1216 simple_unlock(&pp->pr_slock);
1217 }
1218
1219 void
1220 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
1221 {
1222
1223 simple_lock(&pp->pr_slock);
1224
1225 pp->pr_hardlimit = n;
1226 pp->pr_hardlimit_warning = warnmess;
1227 pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1228 pp->pr_hardlimit_warning_last.tv_sec = 0;
1229 pp->pr_hardlimit_warning_last.tv_usec = 0;
1230
1231 /*
1232 * In-line version of pool_sethiwat(), because we don't want to
1233 * release the lock.
1234 */
1235 pp->pr_maxpages = (n == 0)
1236 ? 0
1237 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1238
1239 simple_unlock(&pp->pr_slock);
1240 }
1241
1242 /*
1243 * Default page allocator.
1244 */
1245 static void *
1246 pool_page_alloc(unsigned long sz, int flags, int mtype)
1247 {
1248 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1249
1250 return ((void *)uvm_km_alloc_poolpage(waitok));
1251 }
1252
1253 static void
1254 pool_page_free(void *v, unsigned long sz, int mtype)
1255 {
1256
1257 uvm_km_free_poolpage((vaddr_t)v);
1258 }
1259
1260 #ifdef POOL_SUBPAGE
1261 /*
1262 * Sub-page allocator, for machines with large hardware pages.
1263 */
1264 static void *
1265 pool_subpage_alloc(unsigned long sz, int flags, int mtype)
1266 {
1267
1268 return pool_get(&psppool, flags);
1269 }
1270
1271 static void
1272 pool_subpage_free(void *v, unsigned long sz, int mtype)
1273 {
1274
1275 pool_put(&psppool, v);
1276 }
1277 #endif
1278
1279 #ifdef POOL_SUBPAGE
1280 /* We don't provide a real nointr allocator. Maybe later. */
1281 void *
1282 pool_page_alloc_nointr(unsigned long sz, int flags, int mtype)
1283 {
1284
1285 return pool_subpage_alloc(sz, flags, mtype);
1286 }
1287
1288 void
1289 pool_page_free_nointr(void *v, unsigned long sz, int mtype)
1290 {
1291
1292 pool_subpage_free(v, sz, mtype);
1293 }
1294 #else
1295 /*
1296 * Alternate pool page allocator for pools that know they will
1297 * never be accessed in interrupt context.
1298 */
1299 void *
1300 pool_page_alloc_nointr(unsigned long sz, int flags, int mtype)
1301 {
1302 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1303
1304 return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
1305 waitok));
1306 }
1307
1308 void
1309 pool_page_free_nointr(void *v, unsigned long sz, int mtype)
1310 {
1311
1312 uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
1313 }
1314 #endif
1315
1316
1317 /*
1318 * Release all complete pages that have not been used recently.
1319 */
1320 void
1321 #ifdef POOL_DIAGNOSTIC
1322 _pool_reclaim(struct pool *pp, const char *file, long line)
1323 #else
1324 pool_reclaim(struct pool *pp)
1325 #endif
1326 {
1327 struct pool_item_header *ph, *phnext;
1328 struct pool_cache *pc;
1329 struct timeval curtime;
1330 struct pool_pagelist pq;
1331 int s;
1332
1333 if (pp->pr_roflags & PR_STATIC)
1334 return;
1335
1336 if (simple_lock_try(&pp->pr_slock) == 0)
1337 return;
1338 pr_enter(pp, file, line);
1339 TAILQ_INIT(&pq);
1340
1341 /*
1342 * Reclaim items from the pool's caches.
1343 */
1344 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist)
1345 pool_cache_reclaim(pc);
1346
1347 s = splclock();
1348 curtime = mono_time;
1349 splx(s);
1350
1351 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) {
1352 phnext = TAILQ_NEXT(ph, ph_pagelist);
1353
1354 /* Check our minimum page claim */
1355 if (pp->pr_npages <= pp->pr_minpages)
1356 break;
1357
1358 if (ph->ph_nmissing == 0) {
1359 struct timeval diff;
1360 timersub(&curtime, &ph->ph_time, &diff);
1361 if (diff.tv_sec < pool_inactive_time)
1362 continue;
1363
1364 /*
1365 * If freeing this page would put us below
1366 * the low water mark, stop now.
1367 */
1368 if ((pp->pr_nitems - pp->pr_itemsperpage) <
1369 pp->pr_minitems)
1370 break;
1371
1372 pr_rmpage(pp, ph, &pq);
1373 }
1374 }
1375
1376 pr_leave(pp);
1377 simple_unlock(&pp->pr_slock);
1378 if (TAILQ_EMPTY(&pq)) {
1379 return;
1380 }
1381 while ((ph = TAILQ_FIRST(&pq)) != NULL) {
1382 TAILQ_REMOVE(&pq, ph, ph_pagelist);
1383 (*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype);
1384 if (pp->pr_roflags & PR_PHINPAGE) {
1385 continue;
1386 }
1387 LIST_REMOVE(ph, ph_hashlist);
1388 s = splhigh();
1389 pool_put(&phpool, ph);
1390 splx(s);
1391 }
1392 }
1393
1394
1395 /*
1396 * Drain pools, one at a time.
1397 *
1398 * Note, we must never be called from an interrupt context.
1399 */
1400 void
1401 pool_drain(void *arg)
1402 {
1403 struct pool *pp;
1404 int s;
1405
1406 pp = NULL;
1407 s = splvm();
1408 simple_lock(&pool_head_slock);
1409 if (drainpp == NULL) {
1410 drainpp = TAILQ_FIRST(&pool_head);
1411 }
1412 if (drainpp) {
1413 pp = drainpp;
1414 drainpp = TAILQ_NEXT(pp, pr_poollist);
1415 }
1416 simple_unlock(&pool_head_slock);
1417 pool_reclaim(pp);
1418 splx(s);
1419 }
1420
1421
1422 /*
1423 * Diagnostic helpers.
1424 */
1425 void
1426 pool_print(struct pool *pp, const char *modif)
1427 {
1428 int s;
1429
1430 s = splvm();
1431 if (simple_lock_try(&pp->pr_slock) == 0) {
1432 printf("pool %s is locked; try again later\n",
1433 pp->pr_wchan);
1434 splx(s);
1435 return;
1436 }
1437 pool_print1(pp, modif, printf);
1438 simple_unlock(&pp->pr_slock);
1439 splx(s);
1440 }
1441
1442 void
1443 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
1444 {
1445 int didlock = 0;
1446
1447 if (pp == NULL) {
1448 (*pr)("Must specify a pool to print.\n");
1449 return;
1450 }
1451
1452 /*
1453 * Called from DDB; interrupts should be blocked, and all
1454 * other processors should be paused. We can skip locking
1455 * the pool in this case.
1456 *
1457 * We do a simple_lock_try() just to print the lock
1458 * status, however.
1459 */
1460
1461 if (simple_lock_try(&pp->pr_slock) == 0)
1462 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan);
1463 else
1464 didlock = 1;
1465
1466 pool_print1(pp, modif, pr);
1467
1468 if (didlock)
1469 simple_unlock(&pp->pr_slock);
1470 }
1471
1472 static void
1473 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
1474 {
1475 struct pool_item_header *ph;
1476 struct pool_cache *pc;
1477 struct pool_cache_group *pcg;
1478 #ifdef DIAGNOSTIC
1479 struct pool_item *pi;
1480 #endif
1481 int i, print_log = 0, print_pagelist = 0, print_cache = 0;
1482 char c;
1483
1484 while ((c = *modif++) != '\0') {
1485 if (c == 'l')
1486 print_log = 1;
1487 if (c == 'p')
1488 print_pagelist = 1;
1489 if (c == 'c')
1490 print_cache = 1;
1491 modif++;
1492 }
1493
1494 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
1495 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
1496 pp->pr_roflags);
1497 (*pr)("\tpagesz %u, mtype %d\n", pp->pr_pagesz, pp->pr_mtype);
1498 (*pr)("\talloc %p, release %p\n", pp->pr_alloc, pp->pr_free);
1499 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1500 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1501 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1502 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1503
1504 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1505 pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1506 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1507 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1508
1509 if (print_pagelist == 0)
1510 goto skip_pagelist;
1511
1512 if ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL)
1513 (*pr)("\n\tpage list:\n");
1514 for (; ph != NULL; ph = TAILQ_NEXT(ph, ph_pagelist)) {
1515 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n",
1516 ph->ph_page, ph->ph_nmissing,
1517 (u_long)ph->ph_time.tv_sec,
1518 (u_long)ph->ph_time.tv_usec);
1519 #ifdef DIAGNOSTIC
1520 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
1521 if (pi->pi_magic != PI_MAGIC) {
1522 (*pr)("\t\t\titem %p, magic 0x%x\n",
1523 pi, pi->pi_magic);
1524 }
1525 }
1526 #endif
1527 }
1528 if (pp->pr_curpage == NULL)
1529 (*pr)("\tno current page\n");
1530 else
1531 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1532
1533 skip_pagelist:
1534
1535 if (print_log == 0)
1536 goto skip_log;
1537
1538 (*pr)("\n");
1539 if ((pp->pr_roflags & PR_LOGGING) == 0)
1540 (*pr)("\tno log\n");
1541 else
1542 pr_printlog(pp, NULL, pr);
1543
1544 skip_log:
1545
1546 if (print_cache == 0)
1547 goto skip_cache;
1548
1549 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) {
1550 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc,
1551 pc->pc_allocfrom, pc->pc_freeto);
1552 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n",
1553 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems);
1554 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1555 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);
1556 for (i = 0; i < PCG_NOBJECTS; i++)
1557 (*pr)("\t\t\t%p\n", pcg->pcg_objects[i]);
1558 }
1559 }
1560
1561 skip_cache:
1562
1563 pr_enter_check(pp, pr);
1564 }
1565
1566 int
1567 pool_chk(struct pool *pp, const char *label)
1568 {
1569 struct pool_item_header *ph;
1570 int r = 0;
1571
1572 simple_lock(&pp->pr_slock);
1573
1574 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) {
1575 struct pool_item *pi;
1576 int n;
1577 caddr_t page;
1578
1579 page = (caddr_t)((u_long)ph & pp->pr_pagemask);
1580 if (page != ph->ph_page &&
1581 (pp->pr_roflags & PR_PHINPAGE) != 0) {
1582 if (label != NULL)
1583 printf("%s: ", label);
1584 printf("pool(%p:%s): page inconsistency: page %p;"
1585 " at page head addr %p (p %p)\n", pp,
1586 pp->pr_wchan, ph->ph_page,
1587 ph, page);
1588 r++;
1589 goto out;
1590 }
1591
1592 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1593 pi != NULL;
1594 pi = TAILQ_NEXT(pi,pi_list), n++) {
1595
1596 #ifdef DIAGNOSTIC
1597 if (pi->pi_magic != PI_MAGIC) {
1598 if (label != NULL)
1599 printf("%s: ", label);
1600 printf("pool(%s): free list modified: magic=%x;"
1601 " page %p; item ordinal %d;"
1602 " addr %p (p %p)\n",
1603 pp->pr_wchan, pi->pi_magic, ph->ph_page,
1604 n, pi, page);
1605 panic("pool");
1606 }
1607 #endif
1608 page = (caddr_t)((u_long)pi & pp->pr_pagemask);
1609 if (page == ph->ph_page)
1610 continue;
1611
1612 if (label != NULL)
1613 printf("%s: ", label);
1614 printf("pool(%p:%s): page inconsistency: page %p;"
1615 " item ordinal %d; addr %p (p %p)\n", pp,
1616 pp->pr_wchan, ph->ph_page,
1617 n, pi, page);
1618 r++;
1619 goto out;
1620 }
1621 }
1622 out:
1623 simple_unlock(&pp->pr_slock);
1624 return (r);
1625 }
1626
1627 /*
1628 * pool_cache_init:
1629 *
1630 * Initialize a pool cache.
1631 *
1632 * NOTE: If the pool must be protected from interrupts, we expect
1633 * to be called at the appropriate interrupt priority level.
1634 */
1635 void
1636 pool_cache_init(struct pool_cache *pc, struct pool *pp,
1637 int (*ctor)(void *, void *, int),
1638 void (*dtor)(void *, void *),
1639 void *arg)
1640 {
1641
1642 TAILQ_INIT(&pc->pc_grouplist);
1643 simple_lock_init(&pc->pc_slock);
1644
1645 pc->pc_allocfrom = NULL;
1646 pc->pc_freeto = NULL;
1647 pc->pc_pool = pp;
1648
1649 pc->pc_ctor = ctor;
1650 pc->pc_dtor = dtor;
1651 pc->pc_arg = arg;
1652
1653 pc->pc_hits = 0;
1654 pc->pc_misses = 0;
1655
1656 pc->pc_ngroups = 0;
1657
1658 pc->pc_nitems = 0;
1659
1660 simple_lock(&pp->pr_slock);
1661 TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist);
1662 simple_unlock(&pp->pr_slock);
1663 }
1664
1665 /*
1666 * pool_cache_destroy:
1667 *
1668 * Destroy a pool cache.
1669 */
1670 void
1671 pool_cache_destroy(struct pool_cache *pc)
1672 {
1673 struct pool *pp = pc->pc_pool;
1674
1675 /* First, invalidate the entire cache. */
1676 pool_cache_invalidate(pc);
1677
1678 /* ...and remove it from the pool's cache list. */
1679 simple_lock(&pp->pr_slock);
1680 TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist);
1681 simple_unlock(&pp->pr_slock);
1682 }
1683
1684 static __inline void *
1685 pcg_get(struct pool_cache_group *pcg)
1686 {
1687 void *object;
1688 u_int idx;
1689
1690 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS);
1691 KASSERT(pcg->pcg_avail != 0);
1692 idx = --pcg->pcg_avail;
1693
1694 KASSERT(pcg->pcg_objects[idx] != NULL);
1695 object = pcg->pcg_objects[idx];
1696 pcg->pcg_objects[idx] = NULL;
1697
1698 return (object);
1699 }
1700
1701 static __inline void
1702 pcg_put(struct pool_cache_group *pcg, void *object)
1703 {
1704 u_int idx;
1705
1706 KASSERT(pcg->pcg_avail < PCG_NOBJECTS);
1707 idx = pcg->pcg_avail++;
1708
1709 KASSERT(pcg->pcg_objects[idx] == NULL);
1710 pcg->pcg_objects[idx] = object;
1711 }
1712
1713 /*
1714 * pool_cache_get:
1715 *
1716 * Get an object from a pool cache.
1717 */
1718 void *
1719 pool_cache_get(struct pool_cache *pc, int flags)
1720 {
1721 struct pool_cache_group *pcg;
1722 void *object;
1723
1724 #ifdef LOCKDEBUG
1725 if (flags & PR_WAITOK)
1726 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)");
1727 #endif
1728
1729 simple_lock(&pc->pc_slock);
1730
1731 if ((pcg = pc->pc_allocfrom) == NULL) {
1732 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1733 if (pcg->pcg_avail != 0) {
1734 pc->pc_allocfrom = pcg;
1735 goto have_group;
1736 }
1737 }
1738
1739 /*
1740 * No groups with any available objects. Allocate
1741 * a new object, construct it, and return it to
1742 * the caller. We will allocate a group, if necessary,
1743 * when the object is freed back to the cache.
1744 */
1745 pc->pc_misses++;
1746 simple_unlock(&pc->pc_slock);
1747 object = pool_get(pc->pc_pool, flags);
1748 if (object != NULL && pc->pc_ctor != NULL) {
1749 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) {
1750 pool_put(pc->pc_pool, object);
1751 return (NULL);
1752 }
1753 }
1754 return (object);
1755 }
1756
1757 have_group:
1758 pc->pc_hits++;
1759 pc->pc_nitems--;
1760 object = pcg_get(pcg);
1761
1762 if (pcg->pcg_avail == 0)
1763 pc->pc_allocfrom = NULL;
1764
1765 simple_unlock(&pc->pc_slock);
1766
1767 return (object);
1768 }
1769
1770 /*
1771 * pool_cache_put:
1772 *
1773 * Put an object back to the pool cache.
1774 */
1775 void
1776 pool_cache_put(struct pool_cache *pc, void *object)
1777 {
1778 struct pool_cache_group *pcg;
1779 int s;
1780
1781 simple_lock(&pc->pc_slock);
1782
1783 if ((pcg = pc->pc_freeto) == NULL) {
1784 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
1785 if (pcg->pcg_avail != PCG_NOBJECTS) {
1786 pc->pc_freeto = pcg;
1787 goto have_group;
1788 }
1789 }
1790
1791 /*
1792 * No empty groups to free the object to. Attempt to
1793 * allocate one.
1794 */
1795 simple_unlock(&pc->pc_slock);
1796 s = splvm();
1797 pcg = pool_get(&pcgpool, PR_NOWAIT);
1798 splx(s);
1799 if (pcg != NULL) {
1800 memset(pcg, 0, sizeof(*pcg));
1801 simple_lock(&pc->pc_slock);
1802 pc->pc_ngroups++;
1803 TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list);
1804 if (pc->pc_freeto == NULL)
1805 pc->pc_freeto = pcg;
1806 goto have_group;
1807 }
1808
1809 /*
1810 * Unable to allocate a cache group; destruct the object
1811 * and free it back to the pool.
1812 */
1813 pool_cache_destruct_object(pc, object);
1814 return;
1815 }
1816
1817 have_group:
1818 pc->pc_nitems++;
1819 pcg_put(pcg, object);
1820
1821 if (pcg->pcg_avail == PCG_NOBJECTS)
1822 pc->pc_freeto = NULL;
1823
1824 simple_unlock(&pc->pc_slock);
1825 }
1826
1827 /*
1828 * pool_cache_destruct_object:
1829 *
1830 * Force destruction of an object and its release back into
1831 * the pool.
1832 */
1833 void
1834 pool_cache_destruct_object(struct pool_cache *pc, void *object)
1835 {
1836
1837 if (pc->pc_dtor != NULL)
1838 (*pc->pc_dtor)(pc->pc_arg, object);
1839 pool_put(pc->pc_pool, object);
1840 }
1841
1842 /*
1843 * pool_cache_do_invalidate:
1844 *
1845 * This internal function implements pool_cache_invalidate() and
1846 * pool_cache_reclaim().
1847 */
1848 static void
1849 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups,
1850 void (*putit)(struct pool *, void *))
1851 {
1852 struct pool_cache_group *pcg, *npcg;
1853 void *object;
1854 int s;
1855
1856 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
1857 pcg = npcg) {
1858 npcg = TAILQ_NEXT(pcg, pcg_list);
1859 while (pcg->pcg_avail != 0) {
1860 pc->pc_nitems--;
1861 object = pcg_get(pcg);
1862 if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
1863 pc->pc_allocfrom = NULL;
1864 if (pc->pc_dtor != NULL)
1865 (*pc->pc_dtor)(pc->pc_arg, object);
1866 (*putit)(pc->pc_pool, object);
1867 }
1868 if (free_groups) {
1869 pc->pc_ngroups--;
1870 TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
1871 if (pc->pc_freeto == pcg)
1872 pc->pc_freeto = NULL;
1873 s = splvm();
1874 pool_put(&pcgpool, pcg);
1875 splx(s);
1876 }
1877 }
1878 }
1879
1880 /*
1881 * pool_cache_invalidate:
1882 *
1883 * Invalidate a pool cache (destruct and release all of the
1884 * cached objects).
1885 */
1886 void
1887 pool_cache_invalidate(struct pool_cache *pc)
1888 {
1889
1890 simple_lock(&pc->pc_slock);
1891 pool_cache_do_invalidate(pc, 0, pool_put);
1892 simple_unlock(&pc->pc_slock);
1893 }
1894
1895 /*
1896 * pool_cache_reclaim:
1897 *
1898 * Reclaim a pool cache for pool_reclaim().
1899 */
1900 static void
1901 pool_cache_reclaim(struct pool_cache *pc)
1902 {
1903
1904 simple_lock(&pc->pc_slock);
1905 pool_cache_do_invalidate(pc, 1, pool_do_put);
1906 simple_unlock(&pc->pc_slock);
1907 }
1908