subr_pool.c revision 1.24 1 /* $NetBSD: subr_pool.c,v 1.24 1999/04/29 17:47:19 scottr Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
9 * Simulation Facility, NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #include "opt_poollog.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/errno.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/lock.h>
49 #include <sys/pool.h>
50 #include <sys/syslog.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_kern.h>
54
55 #include <uvm/uvm.h>
56
57 /*
58 * Pool resource management utility.
59 *
60 * Memory is allocated in pages which are split into pieces according
61 * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
62 * in the pool structure and the individual pool items are on a linked list
63 * headed by `ph_itemlist' in each page header. The memory for building
64 * the page list is either taken from the allocated pages themselves (for
65 * small pool items) or taken from an internal pool of page headers (`phpool').
66 */
67
68 /* List of all pools */
69 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
70
71 /* Private pool for page header structures */
72 static struct pool phpool;
73
74 /* # of seconds to retain page after last use */
75 int pool_inactive_time = 10;
76
77 /* Next candidate for drainage (see pool_drain()) */
78 static struct pool *drainpp;
79
80 /* This spin lock protects both pool_head and drainpp. */
81 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
82
83 struct pool_item_header {
84 /* Page headers */
85 TAILQ_ENTRY(pool_item_header)
86 ph_pagelist; /* pool page list */
87 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */
88 LIST_ENTRY(pool_item_header)
89 ph_hashlist; /* Off-page page headers */
90 int ph_nmissing; /* # of chunks in use */
91 caddr_t ph_page; /* this page's address */
92 struct timeval ph_time; /* last referenced */
93 };
94
95 struct pool_item {
96 #ifdef DIAGNOSTIC
97 int pi_magic;
98 #define PI_MAGIC 0xdeadbeef
99 #endif
100 /* Other entries use only this list entry */
101 TAILQ_ENTRY(pool_item) pi_list;
102 };
103
104
105 #define PR_HASH_INDEX(pp,addr) \
106 (((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1))
107
108
109
110 static struct pool_item_header
111 *pr_find_pagehead __P((struct pool *, caddr_t));
112 static void pr_rmpage __P((struct pool *, struct pool_item_header *));
113 static int pool_catchup __P((struct pool *));
114 static void pool_prime_page __P((struct pool *, caddr_t));
115 static void *pool_page_alloc __P((unsigned long, int, int));
116 static void pool_page_free __P((void *, unsigned long, int));
117
118 #if defined(POOL_DIAGNOSTIC) || defined(DEBUG)
119 static void pool_print1 __P((struct pool *, const char *));
120 #endif
121
122 #ifdef POOL_DIAGNOSTIC
123 /*
124 * Pool log entry. An array of these is allocated in pool_create().
125 */
126 struct pool_log {
127 const char *pl_file;
128 long pl_line;
129 int pl_action;
130 #define PRLOG_GET 1
131 #define PRLOG_PUT 2
132 void *pl_addr;
133 };
134
135 /* Number of entries in pool log buffers */
136 #ifndef POOL_LOGSIZE
137 #define POOL_LOGSIZE 10
138 #endif
139
140 int pool_logsize = POOL_LOGSIZE;
141
142 static void pr_log __P((struct pool *, void *, int, const char *, long));
143 static void pr_printlog __P((struct pool *));
144
145 static __inline__ void
146 pr_log(pp, v, action, file, line)
147 struct pool *pp;
148 void *v;
149 int action;
150 const char *file;
151 long line;
152 {
153 int n = pp->pr_curlogentry;
154 struct pool_log *pl;
155
156 if ((pp->pr_roflags & PR_LOGGING) == 0)
157 return;
158
159 /*
160 * Fill in the current entry. Wrap around and overwrite
161 * the oldest entry if necessary.
162 */
163 pl = &pp->pr_log[n];
164 pl->pl_file = file;
165 pl->pl_line = line;
166 pl->pl_action = action;
167 pl->pl_addr = v;
168 if (++n >= pp->pr_logsize)
169 n = 0;
170 pp->pr_curlogentry = n;
171 }
172
173 static void
174 pr_printlog(pp)
175 struct pool *pp;
176 {
177 int i = pp->pr_logsize;
178 int n = pp->pr_curlogentry;
179
180 if ((pp->pr_roflags & PR_LOGGING) == 0)
181 return;
182
183 pool_print1(pp, "printlog");
184
185 /*
186 * Print all entries in this pool's log.
187 */
188 while (i-- > 0) {
189 struct pool_log *pl = &pp->pr_log[n];
190 if (pl->pl_action != 0) {
191 printf("log entry %d:\n", i);
192 printf("\taction = %s, addr = %p\n",
193 pl->pl_action == PRLOG_GET ? "get" : "put",
194 pl->pl_addr);
195 printf("\tfile: %s at line %lu\n",
196 pl->pl_file, pl->pl_line);
197 }
198 if (++n >= pp->pr_logsize)
199 n = 0;
200 }
201 }
202 #else
203 #define pr_log(pp, v, action, file, line)
204 #define pr_printlog(pp)
205 #endif
206
207
208 /*
209 * Return the pool page header based on page address.
210 */
211 static __inline__ struct pool_item_header *
212 pr_find_pagehead(pp, page)
213 struct pool *pp;
214 caddr_t page;
215 {
216 struct pool_item_header *ph;
217
218 if ((pp->pr_roflags & PR_PHINPAGE) != 0)
219 return ((struct pool_item_header *)(page + pp->pr_phoffset));
220
221 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
222 ph != NULL;
223 ph = LIST_NEXT(ph, ph_hashlist)) {
224 if (ph->ph_page == page)
225 return (ph);
226 }
227 return (NULL);
228 }
229
230 /*
231 * Remove a page from the pool.
232 */
233 static __inline__ void
234 pr_rmpage(pp, ph)
235 struct pool *pp;
236 struct pool_item_header *ph;
237 {
238
239 /*
240 * If the page was idle, decrement the idle page count.
241 */
242 if (ph->ph_nmissing == 0) {
243 #ifdef DIAGNOSTIC
244 if (pp->pr_nidle == 0)
245 panic("pr_rmpage: nidle inconsistent");
246 if (pp->pr_nitems < pp->pr_itemsperpage)
247 panic("pr_rmpage: nitems inconsistent");
248 #endif
249 pp->pr_nidle--;
250 }
251
252 pp->pr_nitems -= pp->pr_itemsperpage;
253
254 /*
255 * Unlink a page from the pool and release it.
256 */
257 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
258 (*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype);
259 pp->pr_npages--;
260 pp->pr_npagefree++;
261
262 if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
263 LIST_REMOVE(ph, ph_hashlist);
264 pool_put(&phpool, ph);
265 }
266
267 if (pp->pr_curpage == ph) {
268 /*
269 * Find a new non-empty page header, if any.
270 * Start search from the page head, to increase the
271 * chance for "high water" pages to be freed.
272 */
273 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
274 ph = TAILQ_NEXT(ph, ph_pagelist))
275 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
276 break;
277
278 pp->pr_curpage = ph;
279 }
280 }
281
282 /*
283 * Allocate and initialize a pool.
284 */
285 struct pool *
286 pool_create(size, align, ioff, nitems, wchan, pagesz, alloc, release, mtype)
287 size_t size;
288 u_int align;
289 u_int ioff;
290 int nitems;
291 const char *wchan;
292 size_t pagesz;
293 void *(*alloc) __P((unsigned long, int, int));
294 void (*release) __P((void *, unsigned long, int));
295 int mtype;
296 {
297 struct pool *pp;
298 int flags;
299
300 pp = (struct pool *)malloc(sizeof(*pp), M_POOL, M_NOWAIT);
301 if (pp == NULL)
302 return (NULL);
303
304 flags = PR_FREEHEADER;
305 #ifdef POOL_DIAGNOSTIC
306 if (pool_logsize != 0)
307 flags |= PR_LOGGING;
308 #endif
309
310 pool_init(pp, size, align, ioff, flags, wchan, pagesz,
311 alloc, release, mtype);
312
313 if (nitems != 0) {
314 if (pool_prime(pp, nitems, NULL) != 0) {
315 pool_destroy(pp);
316 return (NULL);
317 }
318 }
319
320 return (pp);
321 }
322
323 /*
324 * Initialize the given pool resource structure.
325 *
326 * We export this routine to allow other kernel parts to declare
327 * static pools that must be initialized before malloc() is available.
328 */
329 void
330 pool_init(pp, size, align, ioff, flags, wchan, pagesz, alloc, release, mtype)
331 struct pool *pp;
332 size_t size;
333 u_int align;
334 u_int ioff;
335 int flags;
336 const char *wchan;
337 size_t pagesz;
338 void *(*alloc) __P((unsigned long, int, int));
339 void (*release) __P((void *, unsigned long, int));
340 int mtype;
341 {
342 int off, slack, i;
343
344 /*
345 * Check arguments and construct default values.
346 */
347 if (!powerof2(pagesz) || pagesz > PAGE_SIZE)
348 panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);
349
350 if (alloc == NULL && release == NULL) {
351 alloc = pool_page_alloc;
352 release = pool_page_free;
353 pagesz = PAGE_SIZE; /* Rounds to PAGE_SIZE anyhow. */
354 } else if ((alloc != NULL && release != NULL) == 0) {
355 /* If you specifiy one, must specify both. */
356 panic("pool_init: must specify alloc and release together");
357 }
358
359 if (pagesz == 0)
360 pagesz = PAGE_SIZE;
361
362 if (align == 0)
363 align = ALIGN(1);
364
365 if (size < sizeof(struct pool_item))
366 size = sizeof(struct pool_item);
367
368 /*
369 * Initialize the pool structure.
370 */
371 TAILQ_INIT(&pp->pr_pagelist);
372 pp->pr_curpage = NULL;
373 pp->pr_npages = 0;
374 pp->pr_minitems = 0;
375 pp->pr_minpages = 0;
376 pp->pr_maxpages = UINT_MAX;
377 pp->pr_roflags = flags;
378 pp->pr_flags = 0;
379 pp->pr_size = ALIGN(size);
380 pp->pr_align = align;
381 pp->pr_wchan = wchan;
382 pp->pr_mtype = mtype;
383 pp->pr_alloc = alloc;
384 pp->pr_free = release;
385 pp->pr_pagesz = pagesz;
386 pp->pr_pagemask = ~(pagesz - 1);
387 pp->pr_pageshift = ffs(pagesz) - 1;
388 pp->pr_nitems = 0;
389 pp->pr_nout = 0;
390 pp->pr_hardlimit = UINT_MAX;
391 pp->pr_hardlimit_warning = NULL;
392 pp->pr_hardlimit_ratecap = 0;
393 memset(&pp->pr_hardlimit_warning_last, 0,
394 sizeof(pp->pr_hardlimit_warning_last));
395
396 /*
397 * Decide whether to put the page header off page to avoid
398 * wasting too large a part of the page. Off-page page headers
399 * go on a hash table, so we can match a returned item
400 * with its header based on the page address.
401 * We use 1/16 of the page size as the threshold (XXX: tune)
402 */
403 if (pp->pr_size < pagesz/16) {
404 /* Use the end of the page for the page header */
405 pp->pr_roflags |= PR_PHINPAGE;
406 pp->pr_phoffset = off =
407 pagesz - ALIGN(sizeof(struct pool_item_header));
408 } else {
409 /* The page header will be taken from our page header pool */
410 pp->pr_phoffset = 0;
411 off = pagesz;
412 for (i = 0; i < PR_HASHTABSIZE; i++) {
413 LIST_INIT(&pp->pr_hashtab[i]);
414 }
415 }
416
417 /*
418 * Alignment is to take place at `ioff' within the item. This means
419 * we must reserve up to `align - 1' bytes on the page to allow
420 * appropriate positioning of each item.
421 *
422 * Silently enforce `0 <= ioff < align'.
423 */
424 pp->pr_itemoffset = ioff = ioff % align;
425 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
426
427 /*
428 * Use the slack between the chunks and the page header
429 * for "cache coloring".
430 */
431 slack = off - pp->pr_itemsperpage * pp->pr_size;
432 pp->pr_maxcolor = (slack / align) * align;
433 pp->pr_curcolor = 0;
434
435 pp->pr_nget = 0;
436 pp->pr_nfail = 0;
437 pp->pr_nput = 0;
438 pp->pr_npagealloc = 0;
439 pp->pr_npagefree = 0;
440 pp->pr_hiwat = 0;
441 pp->pr_nidle = 0;
442
443 #ifdef POOL_DIAGNOSTIC
444 if ((flags & PR_LOGGING) != 0) {
445 pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
446 M_TEMP, M_NOWAIT);
447 if (pp->pr_log == NULL)
448 pp->pr_roflags &= ~PR_LOGGING;
449 pp->pr_curlogentry = 0;
450 pp->pr_logsize = pool_logsize;
451 }
452 #endif
453
454 simple_lock_init(&pp->pr_slock);
455
456 /*
457 * Initialize private page header pool if we haven't done so yet.
458 * XXX LOCKING.
459 */
460 if (phpool.pr_size == 0) {
461 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
462 0, "phpool", 0, 0, 0, 0);
463 }
464
465 /* Insert into the list of all pools. */
466 simple_lock(&pool_head_slock);
467 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
468 simple_unlock(&pool_head_slock);
469 }
470
471 /*
472 * De-commision a pool resource.
473 */
474 void
475 pool_destroy(pp)
476 struct pool *pp;
477 {
478 struct pool_item_header *ph;
479
480 #ifdef DIAGNOSTIC
481 if (pp->pr_nout != 0) {
482 pr_printlog(pp);
483 panic("pool_destroy: pool busy: still out: %u\n",
484 pp->pr_nout);
485 }
486 #endif
487
488 /* Remove all pages */
489 if ((pp->pr_roflags & PR_STATIC) == 0)
490 while ((ph = pp->pr_pagelist.tqh_first) != NULL)
491 pr_rmpage(pp, ph);
492
493 /* Remove from global pool list */
494 simple_lock(&pool_head_slock);
495 TAILQ_REMOVE(&pool_head, pp, pr_poollist);
496 /* XXX Only clear this if we were drainpp? */
497 drainpp = NULL;
498 simple_unlock(&pool_head_slock);
499
500 #ifdef POOL_DIAGNOSTIC
501 if ((pp->pr_roflags & PR_LOGGING) != 0)
502 free(pp->pr_log, M_TEMP);
503 #endif
504
505 if (pp->pr_roflags & PR_FREEHEADER)
506 free(pp, M_POOL);
507 }
508
509
510 /*
511 * Grab an item from the pool; must be called at appropriate spl level
512 */
513 #ifdef POOL_DIAGNOSTIC
514 void *
515 _pool_get(pp, flags, file, line)
516 struct pool *pp;
517 int flags;
518 const char *file;
519 long line;
520 #else
521 void *
522 pool_get(pp, flags)
523 struct pool *pp;
524 int flags;
525 #endif
526 {
527 void *v;
528 struct pool_item *pi;
529 struct pool_item_header *ph;
530
531 #ifdef DIAGNOSTIC
532 if ((pp->pr_roflags & PR_STATIC) && (flags & PR_MALLOCOK)) {
533 pr_printlog(pp);
534 panic("pool_get: static");
535 }
536 #endif
537
538 if (curproc == NULL && (flags & PR_WAITOK) != 0)
539 panic("pool_get: must have NOWAIT");
540
541 simple_lock(&pp->pr_slock);
542
543 startover:
544 /*
545 * Check to see if we've reached the hard limit. If we have,
546 * and we can wait, then wait until an item has been returned to
547 * the pool.
548 */
549 #ifdef DIAGNOSTIC
550 if (pp->pr_nout > pp->pr_hardlimit) {
551 simple_unlock(&pp->pr_slock);
552 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
553 }
554 #endif
555 if (pp->pr_nout == pp->pr_hardlimit) {
556 if (flags & PR_WAITOK) {
557 /*
558 * XXX: A warning isn't logged in this case. Should
559 * it be?
560 */
561 pp->pr_flags |= PR_WANTED;
562 simple_unlock(&pp->pr_slock);
563 tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
564 simple_lock(&pp->pr_slock);
565 goto startover;
566 }
567 if (pp->pr_hardlimit_warning != NULL) {
568 /*
569 * Log a message that the hard limit has been hit.
570 */
571 struct timeval curtime, logdiff;
572 int s = splclock();
573 curtime = mono_time;
574 splx(s);
575 timersub(&curtime, &pp->pr_hardlimit_warning_last,
576 &logdiff);
577 if (logdiff.tv_sec >= pp->pr_hardlimit_ratecap) {
578 pp->pr_hardlimit_warning_last = curtime;
579 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
580 }
581 }
582
583 if (flags & PR_URGENT)
584 panic("pool_get: urgent");
585
586 pp->pr_nfail++;
587
588 simple_unlock(&pp->pr_slock);
589 return (NULL);
590 }
591
592 /*
593 * The convention we use is that if `curpage' is not NULL, then
594 * it points at a non-empty bucket. In particular, `curpage'
595 * never points at a page header which has PR_PHINPAGE set and
596 * has no items in its bucket.
597 */
598 if ((ph = pp->pr_curpage) == NULL) {
599 void *v;
600
601 #ifdef DIAGNOSTIC
602 if (pp->pr_nitems != 0) {
603 simple_unlock(&pp->pr_slock);
604 printf("pool_get: %s: curpage NULL, nitems %u\n",
605 pp->pr_wchan, pp->pr_nitems);
606 panic("pool_get: nitems inconsistent\n");
607 }
608 #endif
609
610 /*
611 * Call the back-end page allocator for more memory.
612 * Release the pool lock, as the back-end page allocator
613 * may block.
614 */
615 simple_unlock(&pp->pr_slock);
616 v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
617 simple_lock(&pp->pr_slock);
618
619 if (v == NULL) {
620 /*
621 * We were unable to allocate a page, but
622 * we released the lock during allocation,
623 * so perhaps items were freed back to the
624 * pool. Check for this case.
625 */
626 if (pp->pr_curpage != NULL)
627 goto startover;
628
629 if (flags & PR_URGENT)
630 panic("pool_get: urgent");
631
632 if ((flags & PR_WAITOK) == 0) {
633 pp->pr_nfail++;
634 simple_unlock(&pp->pr_slock);
635 return (NULL);
636 }
637
638 /*
639 * Wait for items to be returned to this pool.
640 *
641 * XXX: we actually want to wait just until
642 * the page allocator has memory again. Depending
643 * on this pool's usage, we might get stuck here
644 * for a long time.
645 *
646 * XXX: maybe we should wake up once a second and
647 * try again?
648 */
649 pp->pr_flags |= PR_WANTED;
650 simple_unlock(&pp->pr_slock);
651 tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
652 simple_lock(&pp->pr_slock);
653 goto startover;
654 }
655
656 /* We have more memory; add it to the pool */
657 pp->pr_npagealloc++;
658 pool_prime_page(pp, v);
659
660 /* Start the allocation process over. */
661 goto startover;
662 }
663
664 if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL) {
665 simple_unlock(&pp->pr_slock);
666 panic("pool_get: %s: page empty", pp->pr_wchan);
667 }
668 #ifdef DIAGNOSTIC
669 if (pp->pr_nitems == 0) {
670 simple_unlock(&pp->pr_slock);
671 printf("pool_get: %s: items on itemlist, nitems %u\n",
672 pp->pr_wchan, pp->pr_nitems);
673 panic("pool_get: nitems inconsistent\n");
674 }
675 #endif
676 pr_log(pp, v, PRLOG_GET, file, line);
677
678 #ifdef DIAGNOSTIC
679 if (pi->pi_magic != PI_MAGIC) {
680 pr_printlog(pp);
681 panic("pool_get(%s): free list modified: magic=%x; page %p;"
682 " item addr %p\n",
683 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
684 }
685 #endif
686
687 /*
688 * Remove from item list.
689 */
690 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
691 pp->pr_nitems--;
692 pp->pr_nout++;
693 if (ph->ph_nmissing == 0) {
694 #ifdef DIAGNOSTIC
695 if (pp->pr_nidle == 0)
696 panic("pool_get: nidle inconsistent");
697 #endif
698 pp->pr_nidle--;
699 }
700 ph->ph_nmissing++;
701 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
702 #ifdef DIAGNOSTIC
703 if (ph->ph_nmissing != pp->pr_itemsperpage) {
704 simple_unlock(&pp->pr_slock);
705 panic("pool_get: %s: nmissing inconsistent",
706 pp->pr_wchan);
707 }
708 #endif
709 /*
710 * Find a new non-empty page header, if any.
711 * Start search from the page head, to increase
712 * the chance for "high water" pages to be freed.
713 *
714 * Migrate empty pages to the end of the list. This
715 * will speed the update of curpage as pages become
716 * idle. Empty pages intermingled with idle pages
717 * is no big deal. As soon as a page becomes un-empty,
718 * it will move back to the head of the list.
719 */
720 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
721 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
722 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
723 ph = TAILQ_NEXT(ph, ph_pagelist))
724 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
725 break;
726
727 pp->pr_curpage = ph;
728 }
729
730 pp->pr_nget++;
731
732 /*
733 * If we have a low water mark and we are now below that low
734 * water mark, add more items to the pool.
735 */
736 if (pp->pr_nitems < pp->pr_minitems && pool_catchup(pp) != 0) {
737 /*
738 * XXX: Should we log a warning? Should we set up a timeout
739 * to try again in a second or so? The latter could break
740 * a caller's assumptions about interrupt protection, etc.
741 */
742 }
743
744 simple_unlock(&pp->pr_slock);
745 return (v);
746 }
747
748 /*
749 * Return resource to the pool; must be called at appropriate spl level
750 */
751 #ifdef POOL_DIAGNOSTIC
752 void
753 _pool_put(pp, v, file, line)
754 struct pool *pp;
755 void *v;
756 const char *file;
757 long line;
758 #else
759 void
760 pool_put(pp, v)
761 struct pool *pp;
762 void *v;
763 #endif
764 {
765 struct pool_item *pi = v;
766 struct pool_item_header *ph;
767 caddr_t page;
768 int s;
769
770 page = (caddr_t)((u_long)v & pp->pr_pagemask);
771
772 simple_lock(&pp->pr_slock);
773
774 pr_log(pp, v, PRLOG_PUT, file, line);
775
776 if ((ph = pr_find_pagehead(pp, page)) == NULL) {
777 pr_printlog(pp);
778 panic("pool_put: %s: page header missing", pp->pr_wchan);
779 }
780
781 /*
782 * Return to item list.
783 */
784 #ifdef DIAGNOSTIC
785 pi->pi_magic = PI_MAGIC;
786 #endif
787 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
788 ph->ph_nmissing--;
789 pp->pr_nput++;
790 pp->pr_nitems++;
791 pp->pr_nout--;
792
793 /* Cancel "pool empty" condition if it exists */
794 if (pp->pr_curpage == NULL)
795 pp->pr_curpage = ph;
796
797 if (pp->pr_flags & PR_WANTED) {
798 pp->pr_flags &= ~PR_WANTED;
799 if (ph->ph_nmissing == 0)
800 pp->pr_nidle++;
801 simple_unlock(&pp->pr_slock);
802 wakeup((caddr_t)pp);
803 return;
804 }
805
806 /*
807 * If this page is now complete, do one of two things:
808 *
809 * (1) If we have more pages than the page high water
810 * mark, free the page back to the system.
811 *
812 * (2) Move it to the end of the page list, so that
813 * we minimize our chances of fragmenting the
814 * pool. Idle pages migrate to the end (along with
815 * completely empty pages, so that we find un-empty
816 * pages more quickly when we update curpage) of the
817 * list so they can be more easily swept up by
818 * the pagedaemon when pages are scarce.
819 */
820 if (ph->ph_nmissing == 0) {
821 pp->pr_nidle++;
822 if (pp->pr_npages > pp->pr_maxpages) {
823 pr_rmpage(pp, ph);
824 } else {
825 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
826 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
827
828 /*
829 * Update the timestamp on the page. A page must
830 * be idle for some period of time before it can
831 * be reclaimed by the pagedaemon. This minimizes
832 * ping-pong'ing for memory.
833 */
834 s = splclock();
835 ph->ph_time = mono_time;
836 splx(s);
837
838 /*
839 * Update the current page pointer. Just look for
840 * the first page with any free items.
841 *
842 * XXX: Maybe we want an option to look for the
843 * page with the fewest available items, to minimize
844 * fragmentation?
845 */
846 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
847 ph = TAILQ_NEXT(ph, ph_pagelist))
848 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
849 break;
850
851 pp->pr_curpage = ph;
852 }
853 }
854 /*
855 * If the page has just become un-empty, move it to the head of
856 * the list, and make it the current page. The next allocation
857 * will get the item from this page, instead of further fragmenting
858 * the pool.
859 */
860 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
861 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
862 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
863 pp->pr_curpage = ph;
864 }
865
866 simple_unlock(&pp->pr_slock);
867
868 }
869
870 /*
871 * Add N items to the pool.
872 */
873 int
874 pool_prime(pp, n, storage)
875 struct pool *pp;
876 int n;
877 caddr_t storage;
878 {
879 caddr_t cp;
880 int newnitems, newpages;
881
882 #ifdef DIAGNOSTIC
883 if (storage && !(pp->pr_roflags & PR_STATIC))
884 panic("pool_prime: static");
885 /* !storage && static caught below */
886 #endif
887
888 simple_lock(&pp->pr_slock);
889
890 newnitems = pp->pr_minitems + n;
891 newpages =
892 roundup(newnitems, pp->pr_itemsperpage) / pp->pr_itemsperpage
893 - pp->pr_minpages;
894
895 while (newpages-- > 0) {
896 if (pp->pr_roflags & PR_STATIC) {
897 cp = storage;
898 storage += pp->pr_pagesz;
899 } else {
900 simple_unlock(&pp->pr_slock);
901 cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
902 simple_lock(&pp->pr_slock);
903 }
904
905 if (cp == NULL) {
906 simple_unlock(&pp->pr_slock);
907 return (ENOMEM);
908 }
909
910 pool_prime_page(pp, cp);
911 pp->pr_minpages++;
912 }
913
914 pp->pr_minitems = newnitems;
915
916 if (pp->pr_minpages >= pp->pr_maxpages)
917 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */
918
919 simple_unlock(&pp->pr_slock);
920 return (0);
921 }
922
923 /*
924 * Add a page worth of items to the pool.
925 *
926 * Note, we must be called with the pool descriptor LOCKED.
927 */
928 static void
929 pool_prime_page(pp, storage)
930 struct pool *pp;
931 caddr_t storage;
932 {
933 struct pool_item *pi;
934 struct pool_item_header *ph;
935 caddr_t cp = storage;
936 unsigned int align = pp->pr_align;
937 unsigned int ioff = pp->pr_itemoffset;
938 int n;
939
940 if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
941 ph = (struct pool_item_header *)(cp + pp->pr_phoffset);
942 } else {
943 ph = pool_get(&phpool, PR_URGENT);
944 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)],
945 ph, ph_hashlist);
946 }
947
948 /*
949 * Insert page header.
950 */
951 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
952 TAILQ_INIT(&ph->ph_itemlist);
953 ph->ph_page = storage;
954 ph->ph_nmissing = 0;
955 memset(&ph->ph_time, 0, sizeof(ph->ph_time));
956
957 pp->pr_nidle++;
958
959 /*
960 * Color this page.
961 */
962 cp = (caddr_t)(cp + pp->pr_curcolor);
963 if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
964 pp->pr_curcolor = 0;
965
966 /*
967 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
968 */
969 if (ioff != 0)
970 cp = (caddr_t)(cp + (align - ioff));
971
972 /*
973 * Insert remaining chunks on the bucket list.
974 */
975 n = pp->pr_itemsperpage;
976 pp->pr_nitems += n;
977
978 while (n--) {
979 pi = (struct pool_item *)cp;
980
981 /* Insert on page list */
982 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
983 #ifdef DIAGNOSTIC
984 pi->pi_magic = PI_MAGIC;
985 #endif
986 cp = (caddr_t)(cp + pp->pr_size);
987 }
988
989 /*
990 * If the pool was depleted, point at the new page.
991 */
992 if (pp->pr_curpage == NULL)
993 pp->pr_curpage = ph;
994
995 if (++pp->pr_npages > pp->pr_hiwat)
996 pp->pr_hiwat = pp->pr_npages;
997 }
998
999 /*
1000 * Like pool_prime(), except this is used by pool_get() when nitems
1001 * drops below the low water mark. This is used to catch up nitmes
1002 * with the low water mark.
1003 *
1004 * Note 1, we never wait for memory here, we let the caller decide what to do.
1005 *
1006 * Note 2, this doesn't work with static pools.
1007 *
1008 * Note 3, we must be called with the pool already locked, and we return
1009 * with it locked.
1010 */
1011 static int
1012 pool_catchup(pp)
1013 struct pool *pp;
1014 {
1015 caddr_t cp;
1016 int error = 0;
1017
1018 if (pp->pr_roflags & PR_STATIC) {
1019 /*
1020 * We dropped below the low water mark, and this is not a
1021 * good thing. Log a warning.
1022 *
1023 * XXX: rate-limit this?
1024 */
1025 printf("WARNING: static pool `%s' dropped below low water "
1026 "mark\n", pp->pr_wchan);
1027 return (0);
1028 }
1029
1030 while (pp->pr_nitems < pp->pr_minitems) {
1031 /*
1032 * Call the page back-end allocator for more memory.
1033 *
1034 * XXX: We never wait, so should we bother unlocking
1035 * the pool descriptor?
1036 */
1037 simple_unlock(&pp->pr_slock);
1038 cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
1039 simple_lock(&pp->pr_slock);
1040 if (cp == NULL) {
1041 error = ENOMEM;
1042 break;
1043 }
1044 pool_prime_page(pp, cp);
1045 }
1046
1047 return (error);
1048 }
1049
1050 void
1051 pool_setlowat(pp, n)
1052 pool_handle_t pp;
1053 int n;
1054 {
1055 int error;
1056
1057 simple_lock(&pp->pr_slock);
1058
1059 pp->pr_minitems = n;
1060 pp->pr_minpages = (n == 0)
1061 ? 0
1062 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1063
1064 /* Make sure we're caught up with the newly-set low water mark. */
1065 if ((error = pool_catchup(pp)) != 0) {
1066 /*
1067 * XXX: Should we log a warning? Should we set up a timeout
1068 * to try again in a second or so? The latter could break
1069 * a caller's assumptions about interrupt protection, etc.
1070 */
1071 }
1072
1073 simple_unlock(&pp->pr_slock);
1074 }
1075
1076 void
1077 pool_sethiwat(pp, n)
1078 pool_handle_t pp;
1079 int n;
1080 {
1081
1082 simple_lock(&pp->pr_slock);
1083
1084 pp->pr_maxpages = (n == 0)
1085 ? 0
1086 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1087
1088 simple_unlock(&pp->pr_slock);
1089 }
1090
1091 void
1092 pool_sethardlimit(pp, n, warnmess, ratecap)
1093 pool_handle_t pp;
1094 int n;
1095 const char *warnmess;
1096 int ratecap;
1097 {
1098
1099 simple_lock(&pp->pr_slock);
1100
1101 pp->pr_hardlimit = n;
1102 pp->pr_hardlimit_warning = warnmess;
1103 pp->pr_hardlimit_ratecap = ratecap;
1104 memset(&pp->pr_hardlimit_warning_last, 0,
1105 sizeof(pp->pr_hardlimit_warning_last));
1106
1107 /*
1108 * In-line version of pool_sethiwat(), because we don't want to
1109 * release the lock.
1110 */
1111 pp->pr_maxpages = (n == 0)
1112 ? 0
1113 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1114
1115 simple_unlock(&pp->pr_slock);
1116 }
1117
1118 /*
1119 * Default page allocator.
1120 */
1121 static void *
1122 pool_page_alloc(sz, flags, mtype)
1123 unsigned long sz;
1124 int flags;
1125 int mtype;
1126 {
1127 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1128
1129 return ((void *)uvm_km_alloc_poolpage(waitok));
1130 }
1131
1132 static void
1133 pool_page_free(v, sz, mtype)
1134 void *v;
1135 unsigned long sz;
1136 int mtype;
1137 {
1138
1139 uvm_km_free_poolpage((vaddr_t)v);
1140 }
1141
1142 /*
1143 * Alternate pool page allocator for pools that know they will
1144 * never be accessed in interrupt context.
1145 */
1146 void *
1147 pool_page_alloc_nointr(sz, flags, mtype)
1148 unsigned long sz;
1149 int flags;
1150 int mtype;
1151 {
1152 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1153
1154 return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
1155 waitok));
1156 }
1157
1158 void
1159 pool_page_free_nointr(v, sz, mtype)
1160 void *v;
1161 unsigned long sz;
1162 int mtype;
1163 {
1164
1165 uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
1166 }
1167
1168
1169 /*
1170 * Release all complete pages that have not been used recently.
1171 */
1172 void
1173 pool_reclaim(pp)
1174 pool_handle_t pp;
1175 {
1176 struct pool_item_header *ph, *phnext;
1177 struct timeval curtime;
1178 int s;
1179
1180 if (pp->pr_roflags & PR_STATIC)
1181 return;
1182
1183 if (simple_lock_try(&pp->pr_slock) == 0)
1184 return;
1185
1186 s = splclock();
1187 curtime = mono_time;
1188 splx(s);
1189
1190 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) {
1191 phnext = TAILQ_NEXT(ph, ph_pagelist);
1192
1193 /* Check our minimum page claim */
1194 if (pp->pr_npages <= pp->pr_minpages)
1195 break;
1196
1197 if (ph->ph_nmissing == 0) {
1198 struct timeval diff;
1199 timersub(&curtime, &ph->ph_time, &diff);
1200 if (diff.tv_sec < pool_inactive_time)
1201 continue;
1202
1203 /*
1204 * If freeing this page would put us below
1205 * the low water mark, stop now.
1206 */
1207 if ((pp->pr_nitems - pp->pr_itemsperpage) <
1208 pp->pr_minitems)
1209 break;
1210
1211 pr_rmpage(pp, ph);
1212 }
1213 }
1214
1215 simple_unlock(&pp->pr_slock);
1216 }
1217
1218
1219 /*
1220 * Drain pools, one at a time.
1221 *
1222 * Note, we must never be called from an interrupt context.
1223 */
1224 void
1225 pool_drain(arg)
1226 void *arg;
1227 {
1228 struct pool *pp;
1229 int s;
1230
1231 s = splimp();
1232 simple_lock(&pool_head_slock);
1233
1234 if (drainpp == NULL && (drainpp = TAILQ_FIRST(&pool_head)) == NULL)
1235 goto out;
1236
1237 pp = drainpp;
1238 drainpp = TAILQ_NEXT(pp, pr_poollist);
1239
1240 pool_reclaim(pp);
1241
1242 out:
1243 simple_unlock(&pool_head_slock);
1244 splx(s);
1245 }
1246
1247
1248 #if defined(POOL_DIAGNOSTIC) || defined(DEBUG)
1249 /*
1250 * Diagnostic helpers.
1251 */
1252 void
1253 pool_print(pp, label)
1254 struct pool *pp;
1255 const char *label;
1256 {
1257 int s;
1258
1259 s = splimp();
1260 simple_lock(&pp->pr_slock);
1261 pool_print1(pp, label);
1262 simple_unlock(&pp->pr_slock);
1263 splx(s);
1264 }
1265
1266 static void
1267 pool_print1(pp, label)
1268 struct pool *pp;
1269 const char *label;
1270 {
1271
1272 if (label != NULL)
1273 printf("%s: ", label);
1274
1275 printf("pool %s: nalloc %lu nfree %lu npagealloc %lu npagefree %lu\n"
1276 " npages %u minitems %u itemsperpage %u itemoffset %u\n"
1277 " nidle %lu\n",
1278 pp->pr_wchan,
1279 pp->pr_nget,
1280 pp->pr_nput,
1281 pp->pr_npagealloc,
1282 pp->pr_npagefree,
1283 pp->pr_npages,
1284 pp->pr_minitems,
1285 pp->pr_itemsperpage,
1286 pp->pr_itemoffset,
1287 pp->pr_nidle);
1288 }
1289
1290 int
1291 pool_chk(pp, label)
1292 struct pool *pp;
1293 char *label;
1294 {
1295 struct pool_item_header *ph;
1296 int r = 0;
1297
1298 simple_lock(&pp->pr_slock);
1299
1300 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
1301 ph = TAILQ_NEXT(ph, ph_pagelist)) {
1302
1303 struct pool_item *pi;
1304 int n;
1305 caddr_t page;
1306
1307 page = (caddr_t)((u_long)ph & pp->pr_pagemask);
1308 if (page != ph->ph_page &&
1309 (pp->pr_roflags & PR_PHINPAGE) != 0) {
1310 if (label != NULL)
1311 printf("%s: ", label);
1312 printf("pool(%p:%s): page inconsistency: page %p;"
1313 " at page head addr %p (p %p)\n", pp,
1314 pp->pr_wchan, ph->ph_page,
1315 ph, page);
1316 r++;
1317 goto out;
1318 }
1319
1320 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1321 pi != NULL;
1322 pi = TAILQ_NEXT(pi,pi_list), n++) {
1323
1324 #ifdef DIAGNOSTIC
1325 if (pi->pi_magic != PI_MAGIC) {
1326 if (label != NULL)
1327 printf("%s: ", label);
1328 printf("pool(%s): free list modified: magic=%x;"
1329 " page %p; item ordinal %d;"
1330 " addr %p (p %p)\n",
1331 pp->pr_wchan, pi->pi_magic, ph->ph_page,
1332 n, pi, page);
1333 panic("pool");
1334 }
1335 #endif
1336 page = (caddr_t)((u_long)pi & pp->pr_pagemask);
1337 if (page == ph->ph_page)
1338 continue;
1339
1340 if (label != NULL)
1341 printf("%s: ", label);
1342 printf("pool(%p:%s): page inconsistency: page %p;"
1343 " item ordinal %d; addr %p (p %p)\n", pp,
1344 pp->pr_wchan, ph->ph_page,
1345 n, pi, page);
1346 r++;
1347 goto out;
1348 }
1349 }
1350 out:
1351 simple_unlock(&pp->pr_slock);
1352 return (r);
1353 }
1354 #endif /* POOL_DIAGNOSTIC || DEBUG */
1355