subr_pool.c revision 1.21.2.1 1 /* $NetBSD: subr_pool.c,v 1.21.2.1 1999/04/04 17:20:14 chs Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
9 * Simulation Facility, NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/errno.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/lock.h>
47 #include <sys/pool.h>
48 #include <sys/syslog.h>
49
50 #include <vm/vm.h>
51 #include <vm/vm_kern.h>
52
53 #include <uvm/uvm.h>
54
55 /*
56 * Pool resource management utility.
57 *
58 * Memory is allocated in pages which are split into pieces according
59 * to the pool item size. Each page is kept on a list headed by `pr_pagelist'
60 * in the pool structure and the individual pool items are on a linked list
61 * headed by `ph_itemlist' in each page header. The memory for building
62 * the page list is either taken from the allocated pages themselves (for
63 * small pool items) or taken from an internal pool of page headers (`phpool').
64 */
65
66 /* List of all pools */
67 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
68
69 /* Private pool for page header structures */
70 static struct pool phpool;
71
72 /* # of seconds to retain page after last use */
73 int pool_inactive_time = 10;
74
75 /* Next candidate for drainage (see pool_drain()) */
76 static struct pool *drainpp = NULL;
77
78 struct pool_item_header {
79 /* Page headers */
80 TAILQ_ENTRY(pool_item_header)
81 ph_pagelist; /* pool page list */
82 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */
83 LIST_ENTRY(pool_item_header)
84 ph_hashlist; /* Off-page page headers */
85 int ph_nmissing; /* # of chunks in use */
86 caddr_t ph_page; /* this page's address */
87 struct timeval ph_time; /* last referenced */
88 };
89
90 struct pool_item {
91 #ifdef DIAGNOSTIC
92 int pi_magic;
93 #define PI_MAGIC 0xdeadbeef
94 #endif
95 /* Other entries use only this list entry */
96 TAILQ_ENTRY(pool_item) pi_list;
97 };
98
99
100 #define PR_HASH_INDEX(pp,addr) \
101 (((u_long)(addr) >> (pp)->pr_pageshift) & (PR_HASHTABSIZE - 1))
102
103
104
105 static struct pool_item_header
106 *pr_find_pagehead __P((struct pool *, caddr_t));
107 static void pr_rmpage __P((struct pool *, struct pool_item_header *));
108 static int pool_catchup __P((struct pool *));
109 static void pool_prime_page __P((struct pool *, caddr_t));
110 static void *pool_page_alloc __P((unsigned long, int, int));
111 static void pool_page_free __P((void *, unsigned long, int));
112
113 #if defined(POOL_DIAGNOSTIC) || defined(DEBUG)
114 static void pool_print1 __P((struct pool *, const char *));
115 #endif
116
117 #ifdef POOL_DIAGNOSTIC
118 /*
119 * Pool log entry. An array of these is allocated in pool_create().
120 */
121 struct pool_log {
122 const char *pl_file;
123 long pl_line;
124 int pl_action;
125 #define PRLOG_GET 1
126 #define PRLOG_PUT 2
127 void *pl_addr;
128 };
129
130 /* Number of entries in pool log buffers */
131 #ifndef POOL_LOGSIZE
132 #define POOL_LOGSIZE 10
133 #endif
134
135 int pool_logsize = POOL_LOGSIZE;
136
137 static void pr_log __P((struct pool *, void *, int, const char *, long));
138 static void pr_printlog __P((struct pool *));
139
140 static __inline__ void
141 pr_log(pp, v, action, file, line)
142 struct pool *pp;
143 void *v;
144 int action;
145 const char *file;
146 long line;
147 {
148 int n = pp->pr_curlogentry;
149 struct pool_log *pl;
150
151 if ((pp->pr_roflags & PR_LOGGING) == 0)
152 return;
153
154 /*
155 * Fill in the current entry. Wrap around and overwrite
156 * the oldest entry if necessary.
157 */
158 pl = &pp->pr_log[n];
159 pl->pl_file = file;
160 pl->pl_line = line;
161 pl->pl_action = action;
162 pl->pl_addr = v;
163 if (++n >= pp->pr_logsize)
164 n = 0;
165 pp->pr_curlogentry = n;
166 }
167
168 static void
169 pr_printlog(pp)
170 struct pool *pp;
171 {
172 int i = pp->pr_logsize;
173 int n = pp->pr_curlogentry;
174
175 if ((pp->pr_roflags & PR_LOGGING) == 0)
176 return;
177
178 pool_print1(pp, "printlog");
179
180 /*
181 * Print all entries in this pool's log.
182 */
183 while (i-- > 0) {
184 struct pool_log *pl = &pp->pr_log[n];
185 if (pl->pl_action != 0) {
186 printf("log entry %d:\n", i);
187 printf("\taction = %s, addr = %p\n",
188 pl->pl_action == PRLOG_GET ? "get" : "put",
189 pl->pl_addr);
190 printf("\tfile: %s at line %lu\n",
191 pl->pl_file, pl->pl_line);
192 }
193 if (++n >= pp->pr_logsize)
194 n = 0;
195 }
196 }
197 #else
198 #define pr_log(pp, v, action, file, line)
199 #define pr_printlog(pp)
200 #endif
201
202
203 /*
204 * Return the pool page header based on page address.
205 */
206 static __inline__ struct pool_item_header *
207 pr_find_pagehead(pp, page)
208 struct pool *pp;
209 caddr_t page;
210 {
211 struct pool_item_header *ph;
212
213 if ((pp->pr_roflags & PR_PHINPAGE) != 0)
214 return ((struct pool_item_header *)(page + pp->pr_phoffset));
215
216 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]);
217 ph != NULL;
218 ph = LIST_NEXT(ph, ph_hashlist)) {
219 if (ph->ph_page == page)
220 return (ph);
221 }
222 return (NULL);
223 }
224
225 /*
226 * Remove a page from the pool.
227 */
228 static __inline__ void
229 pr_rmpage(pp, ph)
230 struct pool *pp;
231 struct pool_item_header *ph;
232 {
233
234 /*
235 * If the page was idle, decrement the idle page count.
236 */
237 if (ph->ph_nmissing == 0) {
238 #ifdef DIAGNOSTIC
239 if (pp->pr_nidle == 0)
240 panic("pr_rmpage: nidle inconsistent");
241 if (pp->pr_nitems < pp->pr_itemsperpage)
242 panic("pr_rmpage: nitems inconsistent");
243 #endif
244 pp->pr_nidle--;
245 }
246
247 pp->pr_nitems -= pp->pr_itemsperpage;
248
249 /*
250 * Unlink a page from the pool and release it.
251 */
252 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
253 (*pp->pr_free)(ph->ph_page, pp->pr_pagesz, pp->pr_mtype);
254 pp->pr_npages--;
255 pp->pr_npagefree++;
256
257 if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
258 LIST_REMOVE(ph, ph_hashlist);
259 pool_put(&phpool, ph);
260 }
261
262 if (pp->pr_curpage == ph) {
263 /*
264 * Find a new non-empty page header, if any.
265 * Start search from the page head, to increase the
266 * chance for "high water" pages to be freed.
267 */
268 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
269 ph = TAILQ_NEXT(ph, ph_pagelist))
270 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
271 break;
272
273 pp->pr_curpage = ph;
274 }
275 }
276
277 /*
278 * Allocate and initialize a pool.
279 */
280 struct pool *
281 pool_create(size, align, ioff, nitems, wchan, pagesz, alloc, release, mtype)
282 size_t size;
283 u_int align;
284 u_int ioff;
285 int nitems;
286 const char *wchan;
287 size_t pagesz;
288 void *(*alloc) __P((unsigned long, int, int));
289 void (*release) __P((void *, unsigned long, int));
290 int mtype;
291 {
292 struct pool *pp;
293 int flags;
294
295 pp = (struct pool *)malloc(sizeof(*pp), M_POOL, M_NOWAIT);
296 if (pp == NULL)
297 return (NULL);
298
299 flags = PR_FREEHEADER;
300 #ifdef POOL_DIAGNOSTIC
301 if (pool_logsize != 0)
302 flags |= PR_LOGGING;
303 #endif
304
305 pool_init(pp, size, align, ioff, flags, wchan, pagesz,
306 alloc, release, mtype);
307
308 if (nitems != 0) {
309 if (pool_prime(pp, nitems, NULL) != 0) {
310 pool_destroy(pp);
311 return (NULL);
312 }
313 }
314
315 return (pp);
316 }
317
318 /*
319 * Initialize the given pool resource structure.
320 *
321 * We export this routine to allow other kernel parts to declare
322 * static pools that must be initialized before malloc() is available.
323 */
324 void
325 pool_init(pp, size, align, ioff, flags, wchan, pagesz, alloc, release, mtype)
326 struct pool *pp;
327 size_t size;
328 u_int align;
329 u_int ioff;
330 int flags;
331 const char *wchan;
332 size_t pagesz;
333 void *(*alloc) __P((unsigned long, int, int));
334 void (*release) __P((void *, unsigned long, int));
335 int mtype;
336 {
337 int off, slack, i;
338
339 /*
340 * Check arguments and construct default values.
341 */
342 if (!powerof2(pagesz) || pagesz > PAGE_SIZE)
343 panic("pool_init: page size invalid (%lx)\n", (u_long)pagesz);
344
345 if (alloc == NULL && release == NULL) {
346 alloc = pool_page_alloc;
347 release = pool_page_free;
348 pagesz = PAGE_SIZE; /* Rounds to PAGE_SIZE anyhow. */
349 } else if ((alloc != NULL && release != NULL) == 0) {
350 /* If you specifiy one, must specify both. */
351 panic("pool_init: must specify alloc and release together");
352 }
353
354 if (pagesz == 0)
355 pagesz = PAGE_SIZE;
356
357 if (align == 0)
358 align = ALIGN(1);
359
360 if (size < sizeof(struct pool_item))
361 size = sizeof(struct pool_item);
362
363 /*
364 * Initialize the pool structure.
365 */
366 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
367 TAILQ_INIT(&pp->pr_pagelist);
368 pp->pr_curpage = NULL;
369 pp->pr_npages = 0;
370 pp->pr_minitems = 0;
371 pp->pr_minpages = 0;
372 pp->pr_maxpages = UINT_MAX;
373 pp->pr_roflags = flags;
374 pp->pr_flags = 0;
375 pp->pr_size = ALIGN(size);
376 pp->pr_align = align;
377 pp->pr_wchan = wchan;
378 pp->pr_mtype = mtype;
379 pp->pr_alloc = alloc;
380 pp->pr_free = release;
381 pp->pr_pagesz = pagesz;
382 pp->pr_pagemask = ~(pagesz - 1);
383 pp->pr_pageshift = ffs(pagesz) - 1;
384 pp->pr_nitems = 0;
385 pp->pr_nout = 0;
386 pp->pr_hardlimit = UINT_MAX;
387 pp->pr_hardlimit_warning = NULL;
388 pp->pr_hardlimit_ratecap = 0;
389 memset(&pp->pr_hardlimit_warning_last, 0,
390 sizeof(pp->pr_hardlimit_warning_last));
391
392 /*
393 * Decide whether to put the page header off page to avoid
394 * wasting too large a part of the page. Off-page page headers
395 * go on a hash table, so we can match a returned item
396 * with its header based on the page address.
397 * We use 1/16 of the page size as the threshold (XXX: tune)
398 */
399 if (pp->pr_size < pagesz/16) {
400 /* Use the end of the page for the page header */
401 pp->pr_roflags |= PR_PHINPAGE;
402 pp->pr_phoffset = off =
403 pagesz - ALIGN(sizeof(struct pool_item_header));
404 } else {
405 /* The page header will be taken from our page header pool */
406 pp->pr_phoffset = 0;
407 off = pagesz;
408 for (i = 0; i < PR_HASHTABSIZE; i++) {
409 LIST_INIT(&pp->pr_hashtab[i]);
410 }
411 }
412
413 /*
414 * Alignment is to take place at `ioff' within the item. This means
415 * we must reserve up to `align - 1' bytes on the page to allow
416 * appropriate positioning of each item.
417 *
418 * Silently enforce `0 <= ioff < align'.
419 */
420 pp->pr_itemoffset = ioff = ioff % align;
421 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
422
423 /*
424 * Use the slack between the chunks and the page header
425 * for "cache coloring".
426 */
427 slack = off - pp->pr_itemsperpage * pp->pr_size;
428 pp->pr_maxcolor = (slack / align) * align;
429 pp->pr_curcolor = 0;
430
431 pp->pr_nget = 0;
432 pp->pr_nfail = 0;
433 pp->pr_nput = 0;
434 pp->pr_npagealloc = 0;
435 pp->pr_npagefree = 0;
436 pp->pr_hiwat = 0;
437 pp->pr_nidle = 0;
438
439 #ifdef POOL_DIAGNOSTIC
440 if ((flags & PR_LOGGING) != 0) {
441 pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
442 M_TEMP, M_NOWAIT);
443 if (pp->pr_log == NULL)
444 pp->pr_roflags &= ~PR_LOGGING;
445 pp->pr_curlogentry = 0;
446 pp->pr_logsize = pool_logsize;
447 }
448 #endif
449
450 simple_lock_init(&pp->pr_slock);
451
452 /*
453 * Initialize private page header pool if we haven't done so yet.
454 */
455 if (phpool.pr_size == 0) {
456 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
457 0, "phpool", 0, 0, 0, 0);
458 }
459
460 return;
461 }
462
463 /*
464 * De-commision a pool resource.
465 */
466 void
467 pool_destroy(pp)
468 struct pool *pp;
469 {
470 struct pool_item_header *ph;
471
472 #ifdef DIAGNOSTIC
473 if (pp->pr_nout != 0) {
474 pr_printlog(pp);
475 panic("pool_destroy: pool busy: still out: %u\n",
476 pp->pr_nout);
477 }
478 #endif
479
480 /* Remove all pages */
481 if ((pp->pr_roflags & PR_STATIC) == 0)
482 while ((ph = pp->pr_pagelist.tqh_first) != NULL)
483 pr_rmpage(pp, ph);
484
485 /* Remove from global pool list */
486 TAILQ_REMOVE(&pool_head, pp, pr_poollist);
487 drainpp = NULL;
488
489 #ifdef POOL_DIAGNOSTIC
490 if ((pp->pr_roflags & PR_LOGGING) != 0)
491 free(pp->pr_log, M_TEMP);
492 #endif
493
494 if (pp->pr_roflags & PR_FREEHEADER)
495 free(pp, M_POOL);
496 }
497
498
499 /*
500 * Grab an item from the pool; must be called at appropriate spl level
501 */
502 #ifdef POOL_DIAGNOSTIC
503 void *
504 _pool_get(pp, flags, file, line)
505 struct pool *pp;
506 int flags;
507 const char *file;
508 long line;
509 #else
510 void *
511 pool_get(pp, flags)
512 struct pool *pp;
513 int flags;
514 #endif
515 {
516 void *v;
517 struct pool_item *pi;
518 struct pool_item_header *ph;
519
520 #ifdef DIAGNOSTIC
521 if ((pp->pr_roflags & PR_STATIC) && (flags & PR_MALLOCOK)) {
522 pr_printlog(pp);
523 panic("pool_get: static");
524 }
525 #endif
526
527 if (curproc == NULL && (flags & PR_WAITOK) != 0)
528 panic("pool_get: must have NOWAIT");
529
530 simple_lock(&pp->pr_slock);
531
532 startover:
533 /*
534 * Check to see if we've reached the hard limit. If we have,
535 * and we can wait, then wait until an item has been returned to
536 * the pool.
537 */
538 #ifdef DIAGNOSTIC
539 if (pp->pr_nout > pp->pr_hardlimit) {
540 simple_unlock(&pp->pr_slock);
541 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
542 }
543 #endif
544 if (pp->pr_nout == pp->pr_hardlimit) {
545 if (flags & PR_WAITOK) {
546 /*
547 * XXX: A warning isn't logged in this case. Should
548 * it be?
549 */
550 pp->pr_flags |= PR_WANTED;
551 simple_unlock(&pp->pr_slock);
552 tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
553 simple_lock(&pp->pr_slock);
554 goto startover;
555 }
556 if (pp->pr_hardlimit_warning != NULL) {
557 /*
558 * Log a message that the hard limit has been hit.
559 */
560 struct timeval curtime, logdiff;
561 int s = splclock();
562 curtime = mono_time;
563 splx(s);
564 timersub(&curtime, &pp->pr_hardlimit_warning_last,
565 &logdiff);
566 if (logdiff.tv_sec >= pp->pr_hardlimit_ratecap) {
567 pp->pr_hardlimit_warning_last = curtime;
568 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
569 }
570 }
571
572 if (flags & PR_URGENT)
573 panic("pool_get: urgent");
574
575 pp->pr_nfail++;
576
577 simple_unlock(&pp->pr_slock);
578 return (NULL);
579 }
580
581 /*
582 * The convention we use is that if `curpage' is not NULL, then
583 * it points at a non-empty bucket. In particular, `curpage'
584 * never points at a page header which has PR_PHINPAGE set and
585 * has no items in its bucket.
586 */
587 if ((ph = pp->pr_curpage) == NULL) {
588 void *v;
589
590 #ifdef DIAGNOSTIC
591 if (pp->pr_nitems != 0) {
592 simple_unlock(&pp->pr_slock);
593 printf("pool_get: %s: curpage NULL, nitems %u\n",
594 pp->pr_wchan, pp->pr_nitems);
595 panic("pool_get: nitems inconsistent\n");
596 }
597 #endif
598
599 /*
600 * Call the back-end page allocator for more memory.
601 * Release the pool lock, as the back-end page allocator
602 * may block.
603 */
604 simple_unlock(&pp->pr_slock);
605 v = (*pp->pr_alloc)(pp->pr_pagesz, flags, pp->pr_mtype);
606 simple_lock(&pp->pr_slock);
607
608 if (v == NULL) {
609 /*
610 * We were unable to allocate a page, but
611 * we released the lock during allocation,
612 * so perhaps items were freed back to the
613 * pool. Check for this case.
614 */
615 if (pp->pr_curpage != NULL)
616 goto startover;
617
618 if (flags & PR_URGENT)
619 panic("pool_get: urgent");
620
621 if ((flags & PR_WAITOK) == 0) {
622 pp->pr_nfail++;
623 simple_unlock(&pp->pr_slock);
624 return (NULL);
625 }
626
627 /*
628 * Wait for items to be returned to this pool.
629 *
630 * XXX: we actually want to wait just until
631 * the page allocator has memory again. Depending
632 * on this pool's usage, we might get stuck here
633 * for a long time.
634 *
635 * XXX: maybe we should wake up once a second and
636 * try again?
637 */
638 pp->pr_flags |= PR_WANTED;
639 simple_unlock(&pp->pr_slock);
640 tsleep((caddr_t)pp, PSWP, pp->pr_wchan, 0);
641 simple_lock(&pp->pr_slock);
642 goto startover;
643 }
644
645 /* We have more memory; add it to the pool */
646 pp->pr_npagealloc++;
647 pool_prime_page(pp, v);
648
649 /* Start the allocation process over. */
650 goto startover;
651 }
652
653 if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL) {
654 simple_unlock(&pp->pr_slock);
655 panic("pool_get: %s: page empty", pp->pr_wchan);
656 }
657 #ifdef DIAGNOSTIC
658 if (pp->pr_nitems == 0) {
659 simple_unlock(&pp->pr_slock);
660 printf("pool_get: %s: items on itemlist, nitems %u\n",
661 pp->pr_wchan, pp->pr_nitems);
662 panic("pool_get: nitems inconsistent\n");
663 }
664 #endif
665 pr_log(pp, v, PRLOG_GET, file, line);
666
667 #ifdef DIAGNOSTIC
668 if (pi->pi_magic != PI_MAGIC) {
669 pr_printlog(pp);
670 panic("pool_get(%s): free list modified: magic=%x; page %p;"
671 " item addr %p\n",
672 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
673 }
674 #endif
675
676 /*
677 * Remove from item list.
678 */
679 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
680 pp->pr_nitems--;
681 pp->pr_nout++;
682 if (ph->ph_nmissing == 0) {
683 #ifdef DIAGNOSTIC
684 if (pp->pr_nidle == 0)
685 panic("pool_get: nidle inconsistent");
686 #endif
687 pp->pr_nidle--;
688 }
689 ph->ph_nmissing++;
690 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) {
691 #ifdef DIAGNOSTIC
692 if (ph->ph_nmissing != pp->pr_itemsperpage) {
693 simple_unlock(&pp->pr_slock);
694 panic("pool_get: %s: nmissing inconsistent",
695 pp->pr_wchan);
696 }
697 #endif
698 /*
699 * Find a new non-empty page header, if any.
700 * Start search from the page head, to increase
701 * the chance for "high water" pages to be freed.
702 *
703 * Migrate empty pages to the end of the list. This
704 * will speed the update of curpage as pages become
705 * idle. Empty pages intermingled with idle pages
706 * is no big deal. As soon as a page becomes un-empty,
707 * it will move back to the head of the list.
708 */
709 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
710 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
711 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
712 ph = TAILQ_NEXT(ph, ph_pagelist))
713 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
714 break;
715
716 pp->pr_curpage = ph;
717 }
718
719 pp->pr_nget++;
720
721 /*
722 * If we have a low water mark and we are now below that low
723 * water mark, add more items to the pool.
724 */
725 if (pp->pr_nitems < pp->pr_minitems && pool_catchup(pp) != 0) {
726 /*
727 * XXX: Should we log a warning? Should we set up a timeout
728 * to try again in a second or so? The latter could break
729 * a caller's assumptions about interrupt protection, etc.
730 */
731 }
732
733 simple_unlock(&pp->pr_slock);
734 return (v);
735 }
736
737 /*
738 * Return resource to the pool; must be called at appropriate spl level
739 */
740 #ifdef POOL_DIAGNOSTIC
741 void
742 _pool_put(pp, v, file, line)
743 struct pool *pp;
744 void *v;
745 const char *file;
746 long line;
747 #else
748 void
749 pool_put(pp, v)
750 struct pool *pp;
751 void *v;
752 #endif
753 {
754 struct pool_item *pi = v;
755 struct pool_item_header *ph;
756 caddr_t page;
757 int s;
758
759 page = (caddr_t)((u_long)v & pp->pr_pagemask);
760
761 simple_lock(&pp->pr_slock);
762
763 pr_log(pp, v, PRLOG_PUT, file, line);
764
765 if ((ph = pr_find_pagehead(pp, page)) == NULL) {
766 pr_printlog(pp);
767 panic("pool_put: %s: page header missing", pp->pr_wchan);
768 }
769
770 /*
771 * Return to item list.
772 */
773 #ifdef DIAGNOSTIC
774 pi->pi_magic = PI_MAGIC;
775 #endif
776 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
777 ph->ph_nmissing--;
778 pp->pr_nput++;
779 pp->pr_nitems++;
780 pp->pr_nout--;
781
782 /* Cancel "pool empty" condition if it exists */
783 if (pp->pr_curpage == NULL)
784 pp->pr_curpage = ph;
785
786 if (pp->pr_flags & PR_WANTED) {
787 pp->pr_flags &= ~PR_WANTED;
788 if (ph->ph_nmissing == 0)
789 pp->pr_nidle++;
790 simple_unlock(&pp->pr_slock);
791 wakeup((caddr_t)pp);
792 return;
793 }
794
795 /*
796 * If this page is now complete, do one of two things:
797 *
798 * (1) If we have more pages than the page high water
799 * mark, free the page back to the system.
800 *
801 * (2) Move it to the end of the page list, so that
802 * we minimize our chances of fragmenting the
803 * pool. Idle pages migrate to the end (along with
804 * completely empty pages, so that we find un-empty
805 * pages more quickly when we update curpage) of the
806 * list so they can be more easily swept up by
807 * the pagedaemon when pages are scarce.
808 */
809 if (ph->ph_nmissing == 0) {
810 pp->pr_nidle++;
811 if (pp->pr_npages > pp->pr_maxpages) {
812 pr_rmpage(pp, ph);
813 } else {
814 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
815 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist);
816
817 /*
818 * Update the timestamp on the page. A page must
819 * be idle for some period of time before it can
820 * be reclaimed by the pagedaemon. This minimizes
821 * ping-pong'ing for memory.
822 */
823 s = splclock();
824 ph->ph_time = mono_time;
825 splx(s);
826
827 /*
828 * Update the current page pointer. Just look for
829 * the first page with any free items.
830 *
831 * XXX: Maybe we want an option to look for the
832 * page with the fewest available items, to minimize
833 * fragmentation?
834 */
835 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
836 ph = TAILQ_NEXT(ph, ph_pagelist))
837 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL)
838 break;
839
840 pp->pr_curpage = ph;
841 }
842 }
843 /*
844 * If the page has just become un-empty, move it to the head of
845 * the list, and make it the current page. The next allocation
846 * will get the item from this page, instead of further fragmenting
847 * the pool.
848 */
849 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
850 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist);
851 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
852 pp->pr_curpage = ph;
853 }
854
855 simple_unlock(&pp->pr_slock);
856
857 }
858
859 /*
860 * Add N items to the pool.
861 */
862 int
863 pool_prime(pp, n, storage)
864 struct pool *pp;
865 int n;
866 caddr_t storage;
867 {
868 caddr_t cp;
869 int newnitems, newpages;
870
871 #ifdef DIAGNOSTIC
872 if (storage && !(pp->pr_roflags & PR_STATIC))
873 panic("pool_prime: static");
874 /* !storage && static caught below */
875 #endif
876
877 simple_lock(&pp->pr_slock);
878
879 newnitems = pp->pr_minitems + n;
880 newpages =
881 roundup(newnitems, pp->pr_itemsperpage) / pp->pr_itemsperpage
882 - pp->pr_minpages;
883
884 while (newpages-- > 0) {
885 if (pp->pr_roflags & PR_STATIC) {
886 cp = storage;
887 storage += pp->pr_pagesz;
888 } else {
889 simple_unlock(&pp->pr_slock);
890 cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
891 simple_lock(&pp->pr_slock);
892 }
893
894 if (cp == NULL) {
895 simple_unlock(&pp->pr_slock);
896 return (ENOMEM);
897 }
898
899 pool_prime_page(pp, cp);
900 pp->pr_minpages++;
901 }
902
903 pp->pr_minitems = newnitems;
904
905 if (pp->pr_minpages >= pp->pr_maxpages)
906 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */
907
908 simple_unlock(&pp->pr_slock);
909 return (0);
910 }
911
912 /*
913 * Add a page worth of items to the pool.
914 *
915 * Note, we must be called with the pool descriptor LOCKED.
916 */
917 static void
918 pool_prime_page(pp, storage)
919 struct pool *pp;
920 caddr_t storage;
921 {
922 struct pool_item *pi;
923 struct pool_item_header *ph;
924 caddr_t cp = storage;
925 unsigned int align = pp->pr_align;
926 unsigned int ioff = pp->pr_itemoffset;
927 int n;
928
929 if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
930 ph = (struct pool_item_header *)(cp + pp->pr_phoffset);
931 } else {
932 ph = pool_get(&phpool, PR_URGENT);
933 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)],
934 ph, ph_hashlist);
935 }
936
937 /*
938 * Insert page header.
939 */
940 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist);
941 TAILQ_INIT(&ph->ph_itemlist);
942 ph->ph_page = storage;
943 ph->ph_nmissing = 0;
944 memset(&ph->ph_time, 0, sizeof(ph->ph_time));
945
946 pp->pr_nidle++;
947
948 /*
949 * Color this page.
950 */
951 cp = (caddr_t)(cp + pp->pr_curcolor);
952 if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
953 pp->pr_curcolor = 0;
954
955 /*
956 * Adjust storage to apply aligment to `pr_itemoffset' in each item.
957 */
958 if (ioff != 0)
959 cp = (caddr_t)(cp + (align - ioff));
960
961 /*
962 * Insert remaining chunks on the bucket list.
963 */
964 n = pp->pr_itemsperpage;
965 pp->pr_nitems += n;
966
967 while (n--) {
968 pi = (struct pool_item *)cp;
969
970 /* Insert on page list */
971 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
972 #ifdef DIAGNOSTIC
973 pi->pi_magic = PI_MAGIC;
974 #endif
975 cp = (caddr_t)(cp + pp->pr_size);
976 }
977
978 /*
979 * If the pool was depleted, point at the new page.
980 */
981 if (pp->pr_curpage == NULL)
982 pp->pr_curpage = ph;
983
984 if (++pp->pr_npages > pp->pr_hiwat)
985 pp->pr_hiwat = pp->pr_npages;
986 }
987
988 /*
989 * Like pool_prime(), except this is used by pool_get() when nitems
990 * drops below the low water mark. This is used to catch up nitmes
991 * with the low water mark.
992 *
993 * Note 1, we never wait for memory here, we let the caller decide what to do.
994 *
995 * Note 2, this doesn't work with static pools.
996 *
997 * Note 3, we must be called with the pool already locked, and we return
998 * with it locked.
999 */
1000 static int
1001 pool_catchup(pp)
1002 struct pool *pp;
1003 {
1004 caddr_t cp;
1005 int error = 0;
1006
1007 if (pp->pr_roflags & PR_STATIC) {
1008 /*
1009 * We dropped below the low water mark, and this is not a
1010 * good thing. Log a warning.
1011 *
1012 * XXX: rate-limit this?
1013 */
1014 printf("WARNING: static pool `%s' dropped below low water "
1015 "mark\n", pp->pr_wchan);
1016 return (0);
1017 }
1018
1019 while (pp->pr_nitems < pp->pr_minitems) {
1020 /*
1021 * Call the page back-end allocator for more memory.
1022 *
1023 * XXX: We never wait, so should we bother unlocking
1024 * the pool descriptor?
1025 */
1026 simple_unlock(&pp->pr_slock);
1027 cp = (*pp->pr_alloc)(pp->pr_pagesz, 0, pp->pr_mtype);
1028 simple_lock(&pp->pr_slock);
1029 if (cp == NULL) {
1030 error = ENOMEM;
1031 break;
1032 }
1033 pool_prime_page(pp, cp);
1034 }
1035
1036 return (error);
1037 }
1038
1039 void
1040 pool_setlowat(pp, n)
1041 pool_handle_t pp;
1042 int n;
1043 {
1044 int error;
1045
1046 simple_lock(&pp->pr_slock);
1047
1048 pp->pr_minitems = n;
1049 pp->pr_minpages = (n == 0)
1050 ? 0
1051 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1052
1053 /* Make sure we're caught up with the newly-set low water mark. */
1054 if ((error = pool_catchup(pp)) != 0) {
1055 /*
1056 * XXX: Should we log a warning? Should we set up a timeout
1057 * to try again in a second or so? The latter could break
1058 * a caller's assumptions about interrupt protection, etc.
1059 */
1060 }
1061
1062 simple_unlock(&pp->pr_slock);
1063 }
1064
1065 void
1066 pool_sethiwat(pp, n)
1067 pool_handle_t pp;
1068 int n;
1069 {
1070
1071 simple_lock(&pp->pr_slock);
1072
1073 pp->pr_maxpages = (n == 0)
1074 ? 0
1075 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1076
1077 simple_unlock(&pp->pr_slock);
1078 }
1079
1080 void
1081 pool_sethardlimit(pp, n, warnmess, ratecap)
1082 pool_handle_t pp;
1083 int n;
1084 const char *warnmess;
1085 int ratecap;
1086 {
1087
1088 simple_lock(&pp->pr_slock);
1089
1090 pp->pr_hardlimit = n;
1091 pp->pr_hardlimit_warning = warnmess;
1092 pp->pr_hardlimit_ratecap = ratecap;
1093 memset(&pp->pr_hardlimit_warning_last, 0,
1094 sizeof(pp->pr_hardlimit_warning_last));
1095
1096 /*
1097 * In-line version of pool_sethiwat(), because we don't want to
1098 * release the lock.
1099 */
1100 pp->pr_maxpages = (n == 0)
1101 ? 0
1102 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1103
1104 simple_unlock(&pp->pr_slock);
1105 }
1106
1107 /*
1108 * Default page allocator.
1109 */
1110 static void *
1111 pool_page_alloc(sz, flags, mtype)
1112 unsigned long sz;
1113 int flags;
1114 int mtype;
1115 {
1116 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1117
1118 return ((void *)uvm_km_alloc_poolpage(waitok));
1119 }
1120
1121 static void
1122 pool_page_free(v, sz, mtype)
1123 void *v;
1124 unsigned long sz;
1125 int mtype;
1126 {
1127
1128 uvm_km_free_poolpage((vaddr_t)v);
1129 }
1130
1131 /*
1132 * Alternate pool page allocator for pools that know they will
1133 * never be accessed in interrupt context.
1134 */
1135 void *
1136 pool_page_alloc_nointr(sz, flags, mtype)
1137 unsigned long sz;
1138 int flags;
1139 int mtype;
1140 {
1141 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
1142
1143 return ((void *)uvm_km_alloc_poolpage1(kernel_map, uvm.kernel_object,
1144 waitok));
1145 }
1146
1147 void
1148 pool_page_free_nointr(v, sz, mtype)
1149 void *v;
1150 unsigned long sz;
1151 int mtype;
1152 {
1153
1154 uvm_km_free_poolpage1(kernel_map, (vaddr_t)v);
1155 }
1156
1157
1158 /*
1159 * Release all complete pages that have not been used recently.
1160 */
1161 void
1162 pool_reclaim(pp)
1163 pool_handle_t pp;
1164 {
1165 struct pool_item_header *ph, *phnext;
1166 struct timeval curtime;
1167 int s;
1168
1169 if (pp->pr_roflags & PR_STATIC)
1170 return;
1171
1172 if (simple_lock_try(&pp->pr_slock) == 0)
1173 return;
1174
1175 s = splclock();
1176 curtime = mono_time;
1177 splx(s);
1178
1179 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) {
1180 phnext = TAILQ_NEXT(ph, ph_pagelist);
1181
1182 /* Check our minimum page claim */
1183 if (pp->pr_npages <= pp->pr_minpages)
1184 break;
1185
1186 if (ph->ph_nmissing == 0) {
1187 struct timeval diff;
1188 timersub(&curtime, &ph->ph_time, &diff);
1189 if (diff.tv_sec < pool_inactive_time)
1190 continue;
1191
1192 /*
1193 * If freeing this page would put us below
1194 * the low water mark, stop now.
1195 */
1196 if ((pp->pr_nitems - pp->pr_itemsperpage) <
1197 pp->pr_minitems)
1198 break;
1199
1200 pr_rmpage(pp, ph);
1201 }
1202 }
1203
1204 simple_unlock(&pp->pr_slock);
1205 }
1206
1207
1208 /*
1209 * Drain pools, one at a time.
1210 *
1211 * Note, we must never be called from an interrupt context.
1212 */
1213 void
1214 pool_drain(arg)
1215 void *arg;
1216 {
1217 struct pool *pp;
1218 int s = splimp();
1219
1220 /* XXX:lock pool head */
1221 if (drainpp == NULL && (drainpp = TAILQ_FIRST(&pool_head)) == NULL) {
1222 splx(s);
1223 return;
1224 }
1225
1226 pp = drainpp;
1227 drainpp = TAILQ_NEXT(pp, pr_poollist);
1228 /* XXX:unlock pool head */
1229
1230 pool_reclaim(pp);
1231 splx(s);
1232 }
1233
1234
1235 #if defined(POOL_DIAGNOSTIC) || defined(DEBUG)
1236 /*
1237 * Diagnostic helpers.
1238 */
1239 void
1240 pool_print(pp, label)
1241 struct pool *pp;
1242 const char *label;
1243 {
1244 int s;
1245
1246 s = splimp();
1247 simple_lock(&pp->pr_slock);
1248 pool_print1(pp, label);
1249 simple_unlock(&pp->pr_slock);
1250 splx(s);
1251 }
1252
1253 static void
1254 pool_print1(pp, label)
1255 struct pool *pp;
1256 const char *label;
1257 {
1258
1259 if (label != NULL)
1260 printf("%s: ", label);
1261
1262 printf("pool %s: nalloc %lu nfree %lu npagealloc %lu npagefree %lu\n"
1263 " npages %u minitems %u itemsperpage %u itemoffset %u\n"
1264 " nidle %lu\n",
1265 pp->pr_wchan,
1266 pp->pr_nget,
1267 pp->pr_nput,
1268 pp->pr_npagealloc,
1269 pp->pr_npagefree,
1270 pp->pr_npages,
1271 pp->pr_minitems,
1272 pp->pr_itemsperpage,
1273 pp->pr_itemoffset,
1274 pp->pr_nidle);
1275 }
1276
1277 int
1278 pool_chk(pp, label)
1279 struct pool *pp;
1280 char *label;
1281 {
1282 struct pool_item_header *ph;
1283 int r = 0;
1284
1285 simple_lock(&pp->pr_slock);
1286
1287 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL;
1288 ph = TAILQ_NEXT(ph, ph_pagelist)) {
1289
1290 struct pool_item *pi;
1291 int n;
1292 caddr_t page;
1293
1294 page = (caddr_t)((u_long)ph & pp->pr_pagemask);
1295 if (page != ph->ph_page &&
1296 (pp->pr_roflags & PR_PHINPAGE) != 0) {
1297 if (label != NULL)
1298 printf("%s: ", label);
1299 printf("pool(%p:%s): page inconsistency: page %p;"
1300 " at page head addr %p (p %p)\n", pp,
1301 pp->pr_wchan, ph->ph_page,
1302 ph, page);
1303 r++;
1304 goto out;
1305 }
1306
1307 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
1308 pi != NULL;
1309 pi = TAILQ_NEXT(pi,pi_list), n++) {
1310
1311 #ifdef DIAGNOSTIC
1312 if (pi->pi_magic != PI_MAGIC) {
1313 if (label != NULL)
1314 printf("%s: ", label);
1315 printf("pool(%s): free list modified: magic=%x;"
1316 " page %p; item ordinal %d;"
1317 " addr %p (p %p)\n",
1318 pp->pr_wchan, pi->pi_magic, ph->ph_page,
1319 n, pi, page);
1320 panic("pool");
1321 }
1322 #endif
1323 page = (caddr_t)((u_long)pi & pp->pr_pagemask);
1324 if (page == ph->ph_page)
1325 continue;
1326
1327 if (label != NULL)
1328 printf("%s: ", label);
1329 printf("pool(%p:%s): page inconsistency: page %p;"
1330 " item ordinal %d; addr %p (p %p)\n", pp,
1331 pp->pr_wchan, ph->ph_page,
1332 n, pi, page);
1333 r++;
1334 goto out;
1335 }
1336 }
1337 out:
1338 simple_unlock(&pp->pr_slock);
1339 return (r);
1340 }
1341 #endif /* POOL_DIAGNOSTIC || DEBUG */
1342