subr_pool.c revision 1.21.2.2 1 /* $NetBSD: subr_pool.c,v 1.21.2.2 1999/04/07 00:34:55 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
9 * Simulation Facility, NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/errno.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/lock.h>
47 #include <sys/pool.h>
48 #include <sys/syslog.h>
49
50 #include <vm/vm.h>
51 #include <vm/vm_kern.h>
52
53 #include <uvm/uvm.h>
54
55 /*
56 * Pool resource management utility.
57 *
58 * Memory is allocated in pages which are split into pieces according
59 * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
60 * in the pool structure and the individual pool items are on a linked list
61 * headed by `ph_itemlist' in each page header. The memory for building
62 * the page list is either taken from the allocated pages themselves (for
63 * small pool items) or taken from an internal pool of page headers (`phpool').
64 */
65
66 /* List of all pools */
67 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
68
69 /* Private pool for page header structures */
70 static struct pool phpool;
71
72 /* # of seconds to retain page after last use */
73 int pool_inactive_time = 10;
74
75 /* Next candidate for drainage (see pool_drain()) */
76 static struct pool *drainpp;
77
78 /* This spin lock protects both pool_head and drainpp. */
79 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
80
81 struct pool_item_header {
82 /* Page headers */
83 TAILQ_ENTRY(pool_item_header)
84 ph_pagelist; /* pool page list */
85 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */
86 LIST_ENTRY(pool_item_header)
87 ph_hashlist; /* Off-page page headers */
88 int ph_nmissing; /* # of chunks in use */
89 caddr_t ph_page; /* this page's address */
90 struct timeval ph_time; /* last referenced */
91 };
92
93 struct pool_item {
94 #ifdef DIAGNOSTIC
95 int pi_magic;
96 #define PI_MAGIC 0xdeadbeef
97 #endif
98 /* Other entries use only this list entry */
99 TAILQ_ENTRY(pool_item) pi_list;
100 };
101
102
103 #define PR_HASH_INDEX(pp,addr) \
104 (((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1))
105
106
107
108 static struct pool_item_header
109 *pr_find_pagehead __P((struct pool *, caddr_t));
110 static void pr_rmpage __P((struct pool *, struct pool_item_header *));
111 static int pool_catchup __P((struct pool *));
112 static void pool_prime_page __P((struct pool *, caddr_t));
113 static void *pool_page_alloc __P((unsigned long, int, int));
114 static void pool_page_free __P((void *, unsigned long, int));
115
116 #if defined(POOL_DIAGNOSTIC) || defined(DEBUG)
117 static void pool_print1 __P((struct pool *, const char *));
118 #endif
119
120 #ifdef POOL_DIAGNOSTIC
121 /*
122 * Pool log entry. An array of these is allocated in pool_create().
123 */
124 struct pool_log {
125 const char *pl_file;
126 long pl_line;
127 int pl_action;
128 #define PRLOG_GET 1
129 #define PRLOG_PUT 2
130 void *pl_addr;
131 };
132
133 /* Number of entries in pool log buffers */
134 #ifndef POOL_LOGSIZE
135 #define POOL_LOGSIZE 10
136 #endif
137
138 int pool_logsize = POOL_LOGSIZE;
139
140 static void pr_log __P((struct pool *, void *, int, const char *, long));
141 static void pr_printlog __P((struct pool *));
142
143 static __inline__ void
144 pr_log(pp, v, action, file, line)
145 struct pool *pp;
146 void *v;
147 int action;
148 const char *file;
149 long line;
150 {
151 int n = pp->pr_curlogentry;
152 struct pool_log *pl;
153
154 if ((pp->pr_roflags & PR_LOGGING) == 0)
155 return;
156
157 /*
158 * Fill in the current entry. Wrap around and overwrite
159 * the oldest entry if necessary.
160 */
161 pl = &pp->pr_log[n];
162 pl->pl_file = file;
163 pl->pl_line = line;
164 pl->pl_action = action;
165 pl->pl_addr = v;
166 if (++n >= pp->pr_logsize)
167 n = 0;
168 pp->pr_curlogentry = n;
169 }
170
171 static void
172 pr_printlog(pp)
173 struct pool *pp;
174 {
175 int i = pp->pr_logsize;
176 int n = pp->pr_curlogentry;
177
178 if ((pp->pr_roflags & PR_LOGGING) == 0)
179 return;
180
181 pool_print1(pp, "printlog");
182
183 /*
184 * Print all entries in this pool's log.
185 */
186 while (i-- > 0) {
187 struct pool_log *pl = &pp->pr_log[n];
188 if (pl->pl_action != 0) {
189 printf("log entry %d:\n", i);
190 printf("\taction = %s, addr = %p\n",
191 pl->pl_action == PRLOG_GET ? "get" : "put",
192 pl->pl_addr);
193 printf("\tfile: %s at line %lu\n",
194 pl->pl_file, pl->pl_line);
195 }
196 if (++n >= pp->pr_logsize)
197 n = 0;
198 }
199 }
200 #else
201 #define pr_log(pp, v, action, file, line)
202 #define pr_printlog(pp)
203 #endif
204
205
206 /*
207 * Return the pool page header based on page address.
208 */
209 static __inline__ struct pool_item_header *
210 pr_find_pagehead(pp, page)
211 struct pool *pp;
212 caddr_t page;
213 {
214 struct pool_item_header *ph;
215
216 if ((pp->pr_roflags & PR_PHINPAGE) != 0)
217 return ((struct pool_item_header *)(page + pp->pr_phoffset));
218
219 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
220 ph != NULL;
221 ph = LIST_NEXT(ph, ph_hashlist)) {
222 if (ph->ph_page == page)
223 return (ph);
224 }
225 return (NULL);
226 }
227
228 /*
229 * Remove a page from the pool.
230 */
231 static __inline__ void
232 pr_rmpage(pp, ph)
233 struct pool *pp;
234 struct pool_item_header *ph;
235 {
236
237 /*
238 * If the page was idle, decrement the idle page count.
239 */
240 if (ph->ph_nmissing == 0) {
241 #ifdef DIAGNOSTIC
242 if (pp->pr_nidle == 0)
243 panic("pr_rmpage: nidle inconsistent");
244 if (pp->pr_nitems < pp->pr_itemsperpage)
245 panic("pr_rmpage: nitems inconsistent");
246 #endif
247 pp->pr_nidle--;
248 }
249
250 pp->pr_nitems -= pp->pr_itemsperpage;
251
252 /*
253 * Unlink a page from the pool and release it.
254 */
255 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
256 (*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype);
257 pp->pr_npages--;
258 pp->pr_npagefree++;
259
260 if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
261 LIST_REMOVE(ph, ph_hashlist);
262 pool_put(&phpool, ph);
263 }
264
265 if (pp->pr_curpage == ph) {
266 /*
267 * Find a new non-empty page header, if any.
268 * Start search from the page head, to increase the
269 * chance for "high water" pages to be freed.
270 */
271 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
272 ph = TAILQ_NEXT(ph, ph_pagelist))
273 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
274 break;
275
276 pp->pr_curpage = ph;
277 }
278 }
279
280 /*
281 * Allocate and initialize a pool.
282 */
283 struct pool *
284 pool_create(size, align, ioff, nitems, wchan, pagesz, alloc, release, mtype)
285 size_t size;
286 u_int align;
287 u_int ioff;
288 int nitems;
289 const char *wchan;
290 size_t pagesz;
291 void *(*alloc) __P((unsigned long, int, int));
292 void (*release) __P((void *, unsigned long, int));
293 int mtype;
294 {
295 struct pool *pp;
296 int flags;
297
298 pp = (struct pool *)malloc(sizeof(*pp), M_POOL, M_NOWAIT);
299 if (pp == NULL)
300 return (NULL);
301
302 flags = PR_FREEHEADER;
303 #ifdef POOL_DIAGNOSTIC
304 if (pool_logsize != 0)
305 flags |= PR_LOGGING;
306 #endif
307
308 pool_init(pp, size, align, ioff, flags, wchan, pagesz,
309 alloc, release, mtype);
310
311 if (nitems != 0) {
312 if (pool_prime(pp, nitems, NULL) != 0) {
313 pool_destroy(pp);
314 return (NULL);
315 }
316 }
317
318 return (pp);
319 }
320
321 /*
322 * Initialize the given pool resource structure.
323 *
324 * We export this routine to allow other kernel parts to declare
325 * static pools that must be initialized before malloc() is available.
326 */
327 void
328 pool_init(pp, size, align, ioff, flags, wchan, pagesz, alloc, release, mtype)
329 struct pool *pp;
330 size_t size;
331 u_int align;
332 u_int ioff;
333 int flags;
334 const char *wchan;
335 size_t pagesz;
336 void *(*alloc) __P((unsigned long, int, int));
337 void (*release) __P((void *, unsigned long, int));
338 int mtype;
339 {
340 int off, slack, i;
341
342 /*
343 * Check arguments and construct default values.
344 */
345 if (!powerof2(pagesz) || pagesz > PAGE_SIZE)
346 panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);
347
348 if (alloc == NULL && release == NULL) {
349 alloc = pool_page_alloc;
350 release = pool_page_free;
351 pagesz = PAGE_SIZE; /* Rounds to PAGE_SIZE anyhow. */
352 } else if ((alloc != NULL && release != NULL) == 0) {
353 /* If you specifiy one, must specify both. */
354 panic("pool_init: must specify alloc and release together");
355 }
356
357 if (pagesz == 0)
358 pagesz = PAGE_SIZE;
359
360 if (align == 0)
361 align = ALIGN(1);
362
363 if (size < sizeof(struct pool_item))
364 size = sizeof(struct pool_item);
365
366 /*
367 * Initialize the pool structure.
368 */
369 TAILQ_INIT(&pp->pr_pagelist);
370 pp->pr_curpage = NULL;
371 pp->pr_npages = 0;
372 pp->pr_minitems = 0;
373 pp->pr_minpages = 0;
374 pp->pr_maxpages = UINT_MAX;
375 pp->pr_roflags = flags;
376 pp->pr_flags = 0;
377 pp->pr_size = ALIGN(size);
378 pp->pr_align = align;
379 pp->pr_wchan = wchan;
380 pp->pr_mtype = mtype;
381 pp->pr_alloc = alloc;
382 pp->pr_free = release;
383 pp->pr_pagesz = pagesz;
384 pp->pr_pagemask = ~(pagesz - 1);
385 pp->pr_pageshift = ffs(pagesz) - 1;
386 pp->pr_nitems = 0;
387 pp->pr_nout = 0;
388 pp->pr_hardlimit = UINT_MAX;
389 pp->pr_hardlimit_warning = NULL;
390 pp->pr_hardlimit_ratecap = 0;
391 memset(&pp->pr_hardlimit_warning_last, 0,
392 sizeof(pp->pr_hardlimit_warning_last));
393
394 /*
395 * Decide whether to put the page header off page to avoid
396 * wasting too large a part of the page. Off-page page headers
397 * go on a hash table, so we can match a returned item
398 * with its header based on the page address.
399 * We use 1/16 of the page size as the threshold (XXX: tune)
400 */
401 if (pp->pr_size < pagesz/16) {
402 /* Use the end of the page for the page header */
403 pp->pr_roflags |= PR_PHINPAGE;
404 pp->pr_phoffset = off =
405 pagesz - ALIGN(sizeof(struct pool_item_header));
406 } else {
407 /* The page header will be taken from our page header pool */
408 pp->pr_phoffset = 0;
409 off = pagesz;
410 for (i = 0; i < PR_HASHTABSIZE; i++) {
411 LIST_INIT(&pp->pr_hashtab[i]);
412 }
413 }
414
415 /*
416 * Alignment is to take place at `ioff' within the item. This means
417 * we must reserve up to `align - 1' bytes on the page to allow
418 * appropriate positioning of each item.
419 *
420 * Silently enforce `0 <= ioff < align'.
421 */
422 pp->pr_itemoffset = ioff = ioff % align;
423 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
424
425 /*
426 * Use the slack between the chunks and the page header
427 * for "cache coloring".
428 */
429 slack = off - pp->pr_itemsperpage * pp->pr_size;
430 pp->pr_maxcolor = (slack / align) * align;
431 pp->pr_curcolor = 0;
432
433 pp->pr_nget = 0;
434 pp->pr_nfail = 0;
435 pp->pr_nput = 0;
436 pp->pr_npagealloc = 0;
437 pp->pr_npagefree = 0;
438 pp->pr_hiwat = 0;
439 pp->pr_nidle = 0;
440
441 #ifdef POOL_DIAGNOSTIC
442 if ((flags & PR_LOGGING) != 0) {
443 pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
444 M_TEMP, M_NOWAIT);
445 if (pp->pr_log == NULL)
446 pp->pr_roflags &= ~PR_LOGGING;
447 pp->pr_curlogentry = 0;
448 pp->pr_logsize = pool_logsize;
449 }
450 #endif
451
452 simple_lock_init(&pp->pr_slock);
453
454 /*
455 * Initialize private page header pool if we haven't done so yet.
456 * XXX LOCKING.
457 */
458 if (phpool.pr_size == 0) {
459 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
460 0, "phpool", 0, 0, 0, 0);
461 }
462
463 /* Insert into the list of all pools. */
464 simple_lock(&pool_head_slock);
465 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
466 simple_unlock(&pool_head_slock);
467 }
468
469 /*
470 * De-commision a pool resource.
471 */
472 void
473 pool_destroy(pp)
474 struct pool *pp;
475 {
476 struct pool_item_header *ph;
477
478 #ifdef DIAGNOSTIC
479 if (pp->pr_nout != 0) {
480 pr_printlog(pp);
481 panic("pool_destroy: pool busy: still out: %u\n",
482 pp->pr_nout);
483 }
484 #endif
485
486 /* Remove all pages */
487 if ((pp->pr_roflags & PR_STATIC) == 0)
488 while ((ph = pp->pr_pagelist.tqh_first) != NULL)
489 pr_rmpage(pp, ph);
490
491 /* Remove from global pool list */
492 simple_lock(&pool_head_slock);
493 TAILQ_REMOVE(&pool_head, pp, pr_poollist);
494 /* XXX Only clear this if we were drainpp? */
495 drainpp = NULL;
496 simple_unlock(&pool_head_slock);
497
498 #ifdef POOL_DIAGNOSTIC
499 if ((pp->pr_roflags & PR_LOGGING) != 0)
500 free(pp->pr_log, M_TEMP);
501 #endif
502
503 if (pp->pr_roflags & PR_FREEHEADER)
504 free(pp, M_POOL);
505 }
506
507
508 /*
509 * Grab an item from the pool; must be called at appropriate spl level
510 */
511 #ifdef POOL_DIAGNOSTIC
512 void *
513 _pool_get(pp, flags, file, line)
514 struct pool *pp;
515 int flags;
516 const char *file;
517 long line;
518 #else
519 void *
520 pool_get(pp, flags)
521 struct pool *pp;
522 int flags;
523 #endif
524 {
525 void *v;
526 struct pool_item *pi;
527 struct pool_item_header *ph;
528
529 #ifdef DIAGNOSTIC
530 if ((pp->pr_roflags & PR_STATIC) && (flags & PR_MALLOCOK)) {
531 pr_printlog(pp);
532 panic("pool_get: static");
533 }
534 #endif
535
536 if (curproc == NULL && (flags & PR_WAITOK) != 0)
537 panic("pool_get: must have NOWAIT");
538
539 simple_lock(&pp->pr_slock);
540
541 startover:
542 /*
543 * Check to see if we've reached the hard limit. If we have,
544 * and we can wait, then wait until an item has been returned to
545 * the pool.
546 */
547 #ifdef DIAGNOSTIC
548 if (pp->pr_nout > pp->pr_hardlimit) {
549 simple_unlock(&pp->pr_slock);
550 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
551 }
552 #endif
553 if (pp->pr_nout == pp->pr_hardlimit) {
554 if (flags & PR_WAITOK) {
555 /*
556 * XXX: A warning isn't logged in this case. Should
557 * it be?
558 */
559 pp->pr_flags |= PR_WANTED;
560 simple_unlock(&pp->pr_slock);
561 tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
562 simple_lock(&pp->pr_slock);
563 goto startover;
564 }
565 if (pp->pr_hardlimit_warning != NULL) {
566 /*
567 * Log a message that the hard limit has been hit.
568 */
569 struct timeval curtime, logdiff;
570 int s = splclock();
571 curtime = mono_time;
572 splx(s);
573 timersub(&curtime, &pp->pr_hardlimit_warning_last,
574 &logdiff);
575 if (logdiff.tv_sec >= pp->pr_hardlimit_ratecap) {
576 pp->pr_hardlimit_warning_last = curtime;
577 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
578 }
579 }
580
581 if (flags & PR_URGENT)
582 panic("pool_get: urgent");
583
584 pp->pr_nfail++;
585
586 simple_unlock(&pp->pr_slock);
587 return (NULL);
588 }
589
590 /*
591 * The convention we use is that if `curpage' is not NULL, then
592 * it points at a non-empty bucket. In particular, `curpage'
593 * never points at a page header which has PR_PHINPAGE set and
594 * has no items in its bucket.
595 */
596 if ((ph = pp->pr_curpage) == NULL) {
597 void *v;
598
599 #ifdef DIAGNOSTIC
600 if (pp->pr_nitems != 0) {
601 simple_unlock(&pp->pr_slock);
602 printf("pool_get: %s: curpage NULL, nitems %u\n",
603 pp->pr_wchan, pp->pr_nitems);
604 panic("pool_get: nitems inconsistent\n");
605 }
606 #endif
607
608 /*
609 * Call the back-end page allocator for more memory.
610 * Release the pool lock, as the back-end page allocator
611 * may block.
612 */
613 simple_unlock(&pp->pr_slock);
614 v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
615 simple_lock(&pp->pr_slock);
616
617 if (v == NULL) {
618 /*
619 * We were unable to allocate a page, but
620 * we released the lock during allocation,
621 * so perhaps items were freed back to the
622 * pool. Check for this case.
623 */
624 if (pp->pr_curpage != NULL)
625 goto startover;
626
627 if (flags & PR_URGENT)
628 panic("pool_get: urgent");
629
630 if ((flags & PR_WAITOK) == 0) {
631 pp->pr_nfail++;
632 simple_unlock(&pp->pr_slock);
633 return (NULL);
634 }
635
636 /*
637 * Wait for items to be returned to this pool.
638 *
639 * XXX: we actually want to wait just until
640 * the page allocator has memory again. Depending
641 * on this pool's usage, we might get stuck here
642 * for a long time.
643 *
644 * XXX: maybe we should wake up once a second and
645 * try again?
646 */
647 pp->pr_flags |= PR_WANTED;
648 simple_unlock(&pp->pr_slock);
649 tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
650 simple_lock(&pp->pr_slock);
651 goto startover;
652 }
653
654 /* We have more memory; add it to the pool */
655 pp->pr_npagealloc++;
656 pool_prime_page(pp, v);
657
658 /* Start the allocation process over. */
659 goto startover;
660 }
661
662 if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL) {
663 simple_unlock(&pp->pr_slock);
664 panic("pool_get: %s: page empty", pp->pr_wchan);
665 }
666 #ifdef DIAGNOSTIC
667 if (pp->pr_nitems == 0) {
668 simple_unlock(&pp->pr_slock);
669 printf("pool_get: %s: items on itemlist, nitems %u\n",
670 pp->pr_wchan, pp->pr_nitems);
671 panic("pool_get: nitems inconsistent\n");
672 }
673 #endif
674 pr_log(pp, v, PRLOG_GET, file, line);
675
676 #ifdef DIAGNOSTIC
677 if (pi->pi_magic != PI_MAGIC) {
678 pr_printlog(pp);
679 panic("pool_get(%s): free list modified: magic=%x; page %p;"
680 " item addr %p\n",
681 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
682 }
683 #endif
684
685 /*
686 * Remove from item list.
687 */
688 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
689 pp->pr_nitems--;
690 pp->pr_nout++;
691 if (ph->ph_nmissing == 0) {
692 #ifdef DIAGNOSTIC
693 if (pp->pr_nidle == 0)
694 panic("pool_get: nidle inconsistent");
695 #endif
696 pp->pr_nidle--;
697 }
698 ph->ph_nmissing++;
699 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
700 #ifdef DIAGNOSTIC
701 if (ph->ph_nmissing != pp->pr_itemsperpage) {
702 simple_unlock(&pp->pr_slock);
703 panic("pool_get: %s: nmissing inconsistent",
704 pp->pr_wchan);
705 }
706 #endif
707 /*
708 * Find a new non-empty page header, if any.
709 * Start search from the page head, to increase
710 * the chance for "high water" pages to be freed.
711 *
712 * Migrate empty pages to the end of the list. This
713 * will speed the update of curpage as pages become
714 * idle. Empty pages intermingled with idle pages
715 * is no big deal. As soon as a page becomes un-empty,
716 * it will move back to the head of the list.
717 */
718 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
719 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
720 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
721 ph = TAILQ_NEXT(ph, ph_pagelist))
722 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
723 break;
724
725 pp->pr_curpage = ph;
726 }
727
728 pp->pr_nget++;
729
730 /*
731 * If we have a low water mark and we are now below that low
732 * water mark, add more items to the pool.
733 */
734 if (pp->pr_nitems < pp->pr_minitems && pool_catchup(pp) != 0) {
735 /*
736 * XXX: Should we log a warning? Should we set up a timeout
737 * to try again in a second or so? The latter could break
738 * a caller's assumptions about interrupt protection, etc.
739 */
740 }
741
742 simple_unlock(&pp->pr_slock);
743 return (v);
744 }
745
746 /*
747 * Return resource to the pool; must be called at appropriate spl level
748 */
749 #ifdef POOL_DIAGNOSTIC
750 void
751 _pool_put(pp, v, file, line)
752 struct pool *pp;
753 void *v;
754 const char *file;
755 long line;
756 #else
757 void
758 pool_put(pp, v)
759 struct pool *pp;
760 void *v;
761 #endif
762 {
763 struct pool_item *pi = v;
764 struct pool_item_header *ph;
765 caddr_t page;
766 int s;
767
768 page = (caddr_t)((u_long)v & pp->pr_pagemask);
769
770 simple_lock(&pp->pr_slock);
771
772 pr_log(pp, v, PRLOG_PUT, file, line);
773
774 if ((ph = pr_find_pagehead(pp, page)) == NULL) {
775 pr_printlog(pp);
776 panic("pool_put: %s: page header missing", pp->pr_wchan);
777 }
778
779 /*
780 * Return to item list.
781 */
782 #ifdef DIAGNOSTIC
783 pi->pi_magic = PI_MAGIC;
784 #endif
785 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
786 ph->ph_nmissing--;
787 pp->pr_nput++;
788 pp->pr_nitems++;
789 pp->pr_nout--;
790
791 /* Cancel "pool empty" condition if it exists */
792 if (pp->pr_curpage == NULL)
793 pp->pr_curpage = ph;
794
795 if (pp->pr_flags & PR_WANTED) {
796 pp->pr_flags &= ~PR_WANTED;
797 if (ph->ph_nmissing == 0)
798 pp->pr_nidle++;
799 simple_unlock(&pp->pr_slock);
800 wakeup((caddr_t)pp);
801 return;
802 }
803
804 /*
805 * If this page is now complete, do one of two things:
806 *
807 * (1) If we have more pages than the page high water
808 * mark, free the page back to the system.
809 *
810 * (2) Move it to the end of the page list, so that
811 * we minimize our chances of fragmenting the
812 * pool. Idle pages migrate to the end (along with
813 * completely empty pages, so that we find un-empty
814 * pages more quickly when we update curpage) of the
815 * list so they can be more easily swept up by
816 * the pagedaemon when pages are scarce.
817 */
818 if (ph->ph_nmissing == 0) {
819 pp->pr_nidle++;
820 if (pp->pr_npages > pp->pr_maxpages) {
821 pr_rmpage(pp, ph);
822 } else {
823 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
824 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
825
826 /*
827 * Update the timestamp on the page. A page must
828 * be idle for some period of time before it can
829 * be reclaimed by the pagedaemon. This minimizes
830 * ping-pong'ing for memory.
831 */
832 s = splclock();
833 ph->ph_time = mono_time;
834 splx(s);
835
836 /*
837 * Update the current page pointer. Just look for
838 * the first page with any free items.
839 *
840 * XXX: Maybe we want an option to look for the
841 * page with the fewest available items, to minimize
842 * fragmentation?
843 */
844 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
845 ph = TAILQ_NEXT(ph, ph_pagelist))
846 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
847 break;
848
849 pp->pr_curpage = ph;
850 }
851 }
852 /*
853 * If the page has just become un-empty, move it to the head of
854 * the list, and make it the current page. The next allocation
855 * will get the item from this page, instead of further fragmenting
856 * the pool.
857 */
858 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
859 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
860 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
861 pp->pr_curpage = ph;
862 }
863
864 simple_unlock(&pp->pr_slock);
865
866 }
867
868 /*
869 * Add N items to the pool.
870 */
871 int
872 pool_prime(pp, n, storage)
873 struct pool *pp;
874 int n;
875 caddr_t storage;
876 {
877 caddr_t cp;
878 int newnitems, newpages;
879
880 #ifdef DIAGNOSTIC
881 if (storage && !(pp->pr_roflags & PR_STATIC))
882 panic("pool_prime: static");
883 /* !storage && static caught below */
884 #endif
885
886 simple_lock(&pp->pr_slock);
887
888 newnitems = pp->pr_minitems + n;
889 newpages =
890 roundup(newnitems, pp->pr_itemsperpage) / pp->pr_itemsperpage
891 - pp->pr_minpages;
892
893 while (newpages-- > 0) {
894 if (pp->pr_roflags & PR_STATIC) {
895 cp = storage;
896 storage += pp->pr_pagesz;
897 } else {
898 simple_unlock(&pp->pr_slock);
899 cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
900 simple_lock(&pp->pr_slock);
901 }
902
903 if (cp == NULL) {
904 simple_unlock(&pp->pr_slock);
905 return (ENOMEM);
906 }
907
908 pool_prime_page(pp, cp);
909 pp->pr_minpages++;
910 }
911
912 pp->pr_minitems = newnitems;
913
914 if (pp->pr_minpages >= pp->pr_maxpages)
915 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */
916
917 simple_unlock(&pp->pr_slock);
918 return (0);
919 }
920
921 /*
922 * Add a page worth of items to the pool.
923 *
924 * Note, we must be called with the pool descriptor LOCKED.
925 */
926 static void
927 pool_prime_page(pp, storage)
928 struct pool *pp;
929 caddr_t storage;
930 {
931 struct pool_item *pi;
932 struct pool_item_header *ph;
933 caddr_t cp = storage;
934 unsigned int align = pp->pr_align;
935 unsigned int ioff = pp->pr_itemoffset;
936 int n;
937
938 if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
939 ph = (struct pool_item_header *)(cp + pp->pr_phoffset);
940 } else {
941 ph = pool_get(&phpool, PR_URGENT);
942 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)],
943 ph, ph_hashlist);
944 }
945
946 /*
947 * Insert page header.
948 */
949 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
950 TAILQ_INIT(&ph->ph_itemlist);
951 ph->ph_page = storage;
952 ph->ph_nmissing = 0;
953 memset(&ph->ph_time, 0, sizeof(ph->ph_time));
954
955 pp->pr_nidle++;
956
957 /*
958 * Color this page.
959 */
960 cp = (caddr_t)(cp + pp->pr_curcolor);
961 if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
962 pp->pr_curcolor = 0;
963
964 /*
965 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
966 */
967 if (ioff != 0)
968 cp = (caddr_t)(cp + (align - ioff));
969
970 /*
971 * Insert remaining chunks on the bucket list.
972 */
973 n = pp->pr_itemsperpage;
974 pp->pr_nitems += n;
975
976 while (n--) {
977 pi = (struct pool_item *)cp;
978
979 /* Insert on page list */
980 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
981 #ifdef DIAGNOSTIC
982 pi->pi_magic = PI_MAGIC;
983 #endif
984 cp = (caddr_t)(cp + pp->pr_size);
985 }
986
987 /*
988 * If the pool was depleted, point at the new page.
989 */
990 if (pp->pr_curpage == NULL)
991 pp->pr_curpage = ph;
992
993 if (++pp->pr_npages > pp->pr_hiwat)
994 pp->pr_hiwat = pp->pr_npages;
995 }
996
997 /*
998 * Like pool_prime(), except this is used by pool_get() when nitems
999 * drops below the low water mark. This is used to catch up nitmes
1000 * with the low water mark.
1001 *
1002 * Note 1, we never wait for memory here, we let the caller decide what to do.
1003 *
1004 * Note 2, this doesn't work with static pools.
1005 *
1006 * Note 3, we must be called with the pool already locked, and we return
1007 * with it locked.
1008 */
1009 static int
1010 pool_catchup(pp)
1011 struct pool *pp;
1012 {
1013 caddr_t cp;
1014 int error = 0;
1015
1016 if (pp->pr_roflags & PR_STATIC) {
1017 /*
1018 * We dropped below the low water mark, and this is not a
1019 * good thing. Log a warning.
1020 *
1021 * XXX: rate-limit this?
1022 */
1023 printf("WARNING: static pool `%s' dropped below low water "
1024 "mark\n", pp->pr_wchan);
1025 return (0);
1026 }
1027
1028 while (pp->pr_nitems < pp->pr_minitems) {
1029 /*
1030 * Call the page back-end allocator for more memory.
1031 *
1032 * XXX: We never wait, so should we bother unlocking
1033 * the pool descriptor?
1034 */
1035 simple_unlock(&pp->pr_slock);
1036 cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
1037 simple_lock(&pp->pr_slock);
1038 if (cp == NULL) {
1039 error = ENOMEM;
1040 break;
1041 }
1042 pool_prime_page(pp, cp);
1043 }
1044
1045 return (error);
1046 }
1047
1048 void
1049 pool_setlowat(pp, n)
1050 pool_handle_t pp;
1051 int n;
1052 {
1053 int error;
1054
1055 simple_lock(&pp->pr_slock);
1056
1057 pp->pr_minitems = n;
1058 pp->pr_minpages = (n == 0)
1059 ? 0
1060 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1061
1062 /* Make sure we're caught up with the newly-set low water mark. */
1063 if ((error = pool_catchup(pp)) != 0) {
1064 /*
1065 * XXX: Should we log a warning? Should we set up a timeout
1066 * to try again in a second or so? The latter could break
1067 * a caller's assumptions about interrupt protection, etc.
1068 */
1069 }
1070
1071 simple_unlock(&pp->pr_slock);
1072 }
1073
1074 void
1075 pool_sethiwat(pp, n)
1076 pool_handle_t pp;
1077 int n;
1078 {
1079
1080 simple_lock(&pp->pr_slock);
1081
1082 pp->pr_maxpages = (n == 0)
1083 ? 0
1084 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1085
1086 simple_unlock(&pp->pr_slock);
1087 }
1088
1089 void
1090 pool_sethardlimit(pp, n, warnmess, ratecap)
1091 pool_handle_t pp;
1092 int n;
1093 const char *warnmess;
1094 int ratecap;
1095 {
1096
1097 simple_lock(&pp->pr_slock);
1098
1099 pp->pr_hardlimit = n;
1100 pp->pr_hardlimit_warning = warnmess;
1101 pp->pr_hardlimit_ratecap = ratecap;
1102 memset(&pp->pr_hardlimit_warning_last, 0,
1103 sizeof(pp->pr_hardlimit_warning_last));
1104
1105 /*
1106 * In-line version of pool_sethiwat(), because we don't want to
1107 * release the lock.
1108 */
1109 pp->pr_maxpages = (n == 0)
1110 ? 0
1111 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1112
1113 simple_unlock(&pp->pr_slock);
1114 }
1115
1116 /*
1117 * Default page allocator.
1118 */
1119 static void *
1120 pool_page_alloc(sz, flags, mtype)
1121 unsigned long sz;
1122 int flags;
1123 int mtype;
1124 {
1125 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1126
1127 return ((void *)uvm_km_alloc_poolpage(waitok));
1128 }
1129
1130 static void
1131 pool_page_free(v, sz, mtype)
1132 void *v;
1133 unsigned long sz;
1134 int mtype;
1135 {
1136
1137 uvm_km_free_poolpage((vaddr_t)v);
1138 }
1139
1140 /*
1141 * Alternate pool page allocator for pools that know they will
1142 * never be accessed in interrupt context.
1143 */
1144 void *
1145 pool_page_alloc_nointr(sz, flags, mtype)
1146 unsigned long sz;
1147 int flags;
1148 int mtype;
1149 {
1150 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1151
1152 return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
1153 waitok));
1154 }
1155
1156 void
1157 pool_page_free_nointr(v, sz, mtype)
1158 void *v;
1159 unsigned long sz;
1160 int mtype;
1161 {
1162
1163 uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
1164 }
1165
1166
1167 /*
1168 * Release all complete pages that have not been used recently.
1169 */
1170 void
1171 pool_reclaim(pp)
1172 pool_handle_t pp;
1173 {
1174 struct pool_item_header *ph, *phnext;
1175 struct timeval curtime;
1176 int s;
1177
1178 if (pp->pr_roflags & PR_STATIC)
1179 return;
1180
1181 if (simple_lock_try(&pp->pr_slock) == 0)
1182 return;
1183
1184 s = splclock();
1185 curtime = mono_time;
1186 splx(s);
1187
1188 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) {
1189 phnext = TAILQ_NEXT(ph, ph_pagelist);
1190
1191 /* Check our minimum page claim */
1192 if (pp->pr_npages <= pp->pr_minpages)
1193 break;
1194
1195 if (ph->ph_nmissing == 0) {
1196 struct timeval diff;
1197 timersub(&curtime, &ph->ph_time, &diff);
1198 if (diff.tv_sec < pool_inactive_time)
1199 continue;
1200
1201 /*
1202 * If freeing this page would put us below
1203 * the low water mark, stop now.
1204 */
1205 if ((pp->pr_nitems - pp->pr_itemsperpage) <
1206 pp->pr_minitems)
1207 break;
1208
1209 pr_rmpage(pp, ph);
1210 }
1211 }
1212
1213 simple_unlock(&pp->pr_slock);
1214 }
1215
1216
1217 /*
1218 * Drain pools, one at a time.
1219 *
1220 * Note, we must never be called from an interrupt context.
1221 */
1222 void
1223 pool_drain(arg)
1224 void *arg;
1225 {
1226 struct pool *pp;
1227 int s;
1228
1229 s = splimp();
1230 simple_lock(&pool_head_slock);
1231
1232 if (drainpp == NULL && (drainpp = TAILQ_FIRST(&pool_head)) == NULL)
1233 goto out;
1234
1235 pp = drainpp;
1236 drainpp = TAILQ_NEXT(pp, pr_poollist);
1237
1238 pool_reclaim(pp);
1239
1240 out:
1241 simple_unlock(&pool_head_slock);
1242 splx(s);
1243 }
1244
1245
1246 #if defined(POOL_DIAGNOSTIC) || defined(DEBUG)
1247 /*
1248 * Diagnostic helpers.
1249 */
1250 void
1251 pool_print(pp, label)
1252 struct pool *pp;
1253 const char *label;
1254 {
1255 int s;
1256
1257 s = splimp();
1258 simple_lock(&pp->pr_slock);
1259 pool_print1(pp, label);
1260 simple_unlock(&pp->pr_slock);
1261 splx(s);
1262 }
1263
1264 static void
1265 pool_print1(pp, label)
1266 struct pool *pp;
1267 const char *label;
1268 {
1269
1270 if (label != NULL)
1271 printf("%s: ", label);
1272
1273 printf("pool %s: nalloc %lu nfree %lu npagealloc %lu npagefree %lu\n"
1274 " npages %u minitems %u itemsperpage %u itemoffset %u\n"
1275 " nidle %lu\n",
1276 pp->pr_wchan,
1277 pp->pr_nget,
1278 pp->pr_nput,
1279 pp->pr_npagealloc,
1280 pp->pr_npagefree,
1281 pp->pr_npages,
1282 pp->pr_minitems,
1283 pp->pr_itemsperpage,
1284 pp->pr_itemoffset,
1285 pp->pr_nidle);
1286 }
1287
1288 int
1289 pool_chk(pp, label)
1290 struct pool *pp;
1291 char *label;
1292 {
1293 struct pool_item_header *ph;
1294 int r = 0;
1295
1296 simple_lock(&pp->pr_slock);
1297
1298 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
1299 ph = TAILQ_NEXT(ph, ph_pagelist)) {
1300
1301 struct pool_item *pi;
1302 int n;
1303 caddr_t page;
1304
1305 page = (caddr_t)((u_long)ph & pp->pr_pagemask);
1306 if (page != ph->ph_page &&
1307 (pp->pr_roflags & PR_PHINPAGE) != 0) {
1308 if (label != NULL)
1309 printf("%s: ", label);
1310 printf("pool(%p:%s): page inconsistency: page %p;"
1311 " at page head addr %p (p %p)\n", pp,
1312 pp->pr_wchan, ph->ph_page,
1313 ph, page);
1314 r++;
1315 goto out;
1316 }
1317
1318 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1319 pi != NULL;
1320 pi = TAILQ_NEXT(pi,pi_list), n++) {
1321
1322 #ifdef DIAGNOSTIC
1323 if (pi->pi_magic != PI_MAGIC) {
1324 if (label != NULL)
1325 printf("%s: ", label);
1326 printf("pool(%s): free list modified: magic=%x;"
1327 " page %p; item ordinal %d;"
1328 " addr %p (p %p)\n",
1329 pp->pr_wchan, pi->pi_magic, ph->ph_page,
1330 n, pi, page);
1331 panic("pool");
1332 }
1333 #endif
1334 page = (caddr_t)((u_long)pi & pp->pr_pagemask);
1335 if (page == ph->ph_page)
1336 continue;
1337
1338 if (label != NULL)
1339 printf("%s: ", label);
1340 printf("pool(%p:%s): page inconsistency: page %p;"
1341 " item ordinal %d; addr %p (p %p)\n", pp,
1342 pp->pr_wchan, ph->ph_page,
1343 n, pi, page);
1344 r++;
1345 goto out;
1346 }
1347 }
1348 out:
1349 simple_unlock(&pp->pr_slock);
1350 return (r);
1351 }
1352 #endif /* POOL_DIAGNOSTIC || DEBUG */
1353