1 1.295 bouyer /* $NetBSD: subr_pool.c,v 1.295 2025/05/26 08:32:11 bouyer Exp $ */ 2 1.1 pk 3 1.229 maxv /* 4 1.271 ad * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010, 2014, 2015, 2018, 5 1.279 thorpej * 2020, 2021 The NetBSD Foundation, Inc. 6 1.1 pk * All rights reserved. 7 1.1 pk * 8 1.1 pk * This code is derived from software contributed to The NetBSD Foundation 9 1.20 thorpej * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 1.204 maxv * Simulation Facility, NASA Ames Research Center; by Andrew Doran, and by 11 1.204 maxv * Maxime Villard. 12 1.1 pk * 13 1.1 pk * Redistribution and use in source and binary forms, with or without 14 1.1 pk * modification, are permitted provided that the following conditions 15 1.1 pk * are met: 16 1.1 pk * 1. Redistributions of source code must retain the above copyright 17 1.1 pk * notice, this list of conditions and the following disclaimer. 18 1.1 pk * 2. Redistributions in binary form must reproduce the above copyright 19 1.1 pk * notice, this list of conditions and the following disclaimer in the 20 1.1 pk * documentation and/or other materials provided with the distribution. 21 1.1 pk * 22 1.1 pk * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 1.1 pk * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 1.1 pk * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 1.1 pk * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 1.1 pk * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 1.1 pk * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 1.1 pk * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 1.1 pk * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 1.1 pk * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 1.1 pk * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 1.1 pk * POSSIBILITY OF SUCH DAMAGE. 33 1.1 pk */ 34 1.64 lukem 35 1.64 lukem #include <sys/cdefs.h> 36 1.295 bouyer __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.295 2025/05/26 08:32:11 bouyer Exp $"); 37 1.24 scottr 38 1.205 pooka #ifdef _KERNEL_OPT 39 1.141 yamt #include "opt_ddb.h" 40 1.28 thorpej #include "opt_lockdebug.h" 41 1.249 maxv #include "opt_pool.h" 42 1.205 pooka #endif 43 1.1 pk 44 1.1 pk #include <sys/param.h> 45 1.1 pk #include <sys/systm.h> 46 1.203 joerg #include <sys/sysctl.h> 47 1.135 yamt #include <sys/bitops.h> 48 1.1 pk #include <sys/proc.h> 49 1.1 pk #include <sys/errno.h> 50 1.1 pk #include <sys/kernel.h> 51 1.191 para #include <sys/vmem.h> 52 1.1 pk #include <sys/pool.h> 53 1.20 thorpej #include <sys/syslog.h> 54 1.125 ad #include <sys/debug.h> 55 1.271 ad #include <sys/lock.h> 56 1.134 ad #include <sys/lockdebug.h> 57 1.134 ad #include <sys/xcall.h> 58 1.134 ad #include <sys/cpu.h> 59 1.145 ad #include <sys/atomic.h> 60 1.224 maxv #include <sys/asan.h> 61 1.262 maxv #include <sys/msan.h> 62 1.270 maxv #include <sys/fault.h> 63 1.3 pk 64 1.187 uebayasi #include <uvm/uvm_extern.h> 65 1.3 pk 66 1.1 pk /* 67 1.1 pk * Pool resource management utility. 68 1.3 pk * 69 1.88 chs * Memory is allocated in pages which are split into pieces according to 70 1.88 chs * the pool item size. Each page is kept on one of three lists in the 71 1.88 chs * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 72 1.88 chs * for empty, full and partially-full pages respectively. The individual 73 1.88 chs * pool items are on a linked list headed by `ph_itemlist' in each page 74 1.88 chs * header. The memory for building the page list is either taken from 75 1.88 chs * the allocated pages themselves (for small pool items) or taken from 76 1.88 chs * an internal pool of page headers (`phpool'). 77 1.1 pk */ 78 1.1 pk 79 1.221 para /* List of all pools. Non static as needed by 'vmstat -m' */ 80 1.202 abs TAILQ_HEAD(, pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 81 1.134 ad 82 1.3 pk /* Private pool for page header structures */ 83 1.97 yamt #define PHPOOL_MAX 8 84 1.97 yamt static struct pool phpool[PHPOOL_MAX]; 85 1.135 yamt #define PHPOOL_FREELIST_NELEM(idx) \ 86 1.256 maxv (((idx) == 0) ? BITMAP_MIN_SIZE : BITMAP_SIZE * (1 << (idx))) 87 1.3 pk 88 1.262 maxv #if !defined(KMSAN) && (defined(DIAGNOSTIC) || defined(KASAN)) 89 1.224 maxv #define POOL_REDZONE 90 1.224 maxv #endif 91 1.224 maxv 92 1.268 maxv #if defined(POOL_QUARANTINE) 93 1.268 maxv #define POOL_NOCACHE 94 1.268 maxv #endif 95 1.268 maxv 96 1.204 maxv #ifdef POOL_REDZONE 97 1.224 maxv # ifdef KASAN 98 1.224 maxv # define POOL_REDZONE_SIZE 8 99 1.224 maxv # else 100 1.224 maxv # define POOL_REDZONE_SIZE 2 101 1.224 maxv # endif 102 1.204 maxv static void pool_redzone_init(struct pool *, size_t); 103 1.204 maxv static void pool_redzone_fill(struct pool *, void *); 104 1.204 maxv static void pool_redzone_check(struct pool *, void *); 105 1.229 maxv static void pool_cache_redzone_check(pool_cache_t, void *); 106 1.204 maxv #else 107 1.229 maxv # define pool_redzone_init(pp, sz) __nothing 108 1.229 maxv # define pool_redzone_fill(pp, ptr) __nothing 109 1.229 maxv # define pool_redzone_check(pp, ptr) __nothing 110 1.229 maxv # define pool_cache_redzone_check(pc, ptr) __nothing 111 1.204 maxv #endif 112 1.204 maxv 113 1.262 maxv #ifdef KMSAN 114 1.262 maxv static inline void pool_get_kmsan(struct pool *, void *); 115 1.262 maxv static inline void pool_put_kmsan(struct pool *, void *); 116 1.262 maxv static inline void pool_cache_get_kmsan(pool_cache_t, void *); 117 1.262 maxv static inline void pool_cache_put_kmsan(pool_cache_t, void *); 118 1.262 maxv #else 119 1.262 maxv #define pool_get_kmsan(pp, ptr) __nothing 120 1.262 maxv #define pool_put_kmsan(pp, ptr) __nothing 121 1.262 maxv #define pool_cache_get_kmsan(pc, ptr) __nothing 122 1.262 maxv #define pool_cache_put_kmsan(pc, ptr) __nothing 123 1.262 maxv #endif 124 1.262 maxv 125 1.249 maxv #ifdef POOL_QUARANTINE 126 1.249 maxv static void pool_quarantine_init(struct pool *); 127 1.249 maxv static void pool_quarantine_flush(struct pool *); 128 1.249 maxv static bool pool_put_quarantine(struct pool *, void *, 129 1.249 maxv struct pool_pagelist *); 130 1.249 maxv #else 131 1.249 maxv #define pool_quarantine_init(a) __nothing 132 1.249 maxv #define pool_quarantine_flush(a) __nothing 133 1.249 maxv #define pool_put_quarantine(a, b, c) false 134 1.268 maxv #endif 135 1.268 maxv 136 1.268 maxv #ifdef POOL_NOCACHE 137 1.268 maxv static bool pool_cache_put_nocache(pool_cache_t, void *); 138 1.268 maxv #else 139 1.268 maxv #define pool_cache_put_nocache(a, b) false 140 1.249 maxv #endif 141 1.249 maxv 142 1.261 christos #define NO_CTOR __FPTRCAST(int (*)(void *, void *, int), nullop) 143 1.261 christos #define NO_DTOR __FPTRCAST(void (*)(void *, void *), nullop) 144 1.261 christos 145 1.279 thorpej #define pc_has_pser(pc) (((pc)->pc_roflags & PR_PSERIALIZE) != 0) 146 1.261 christos #define pc_has_ctor(pc) ((pc)->pc_ctor != NO_CTOR) 147 1.261 christos #define pc_has_dtor(pc) ((pc)->pc_dtor != NO_DTOR) 148 1.229 maxv 149 1.279 thorpej #define pp_has_pser(pp) (((pp)->pr_roflags & PR_PSERIALIZE) != 0) 150 1.279 thorpej 151 1.279 thorpej #define pool_barrier() xc_barrier(0) 152 1.279 thorpej 153 1.258 maxv /* 154 1.258 maxv * Pool backend allocators. 155 1.258 maxv * 156 1.258 maxv * Each pool has a backend allocator that handles allocation, deallocation, 157 1.258 maxv * and any additional draining that might be needed. 158 1.258 maxv * 159 1.258 maxv * We provide two standard allocators: 160 1.258 maxv * 161 1.258 maxv * pool_allocator_kmem - the default when no allocator is specified 162 1.258 maxv * 163 1.258 maxv * pool_allocator_nointr - used for pools that will not be accessed 164 1.258 maxv * in interrupt context. 165 1.258 maxv */ 166 1.258 maxv void *pool_page_alloc(struct pool *, int); 167 1.258 maxv void pool_page_free(struct pool *, void *); 168 1.258 maxv 169 1.98 yamt static void *pool_page_alloc_meta(struct pool *, int); 170 1.98 yamt static void pool_page_free_meta(struct pool *, void *); 171 1.98 yamt 172 1.258 maxv struct pool_allocator pool_allocator_kmem = { 173 1.258 maxv .pa_alloc = pool_page_alloc, 174 1.258 maxv .pa_free = pool_page_free, 175 1.258 maxv .pa_pagesz = 0 176 1.258 maxv }; 177 1.258 maxv 178 1.258 maxv struct pool_allocator pool_allocator_nointr = { 179 1.258 maxv .pa_alloc = pool_page_alloc, 180 1.258 maxv .pa_free = pool_page_free, 181 1.258 maxv .pa_pagesz = 0 182 1.258 maxv }; 183 1.258 maxv 184 1.134 ad struct pool_allocator pool_allocator_meta = { 185 1.191 para .pa_alloc = pool_page_alloc_meta, 186 1.191 para .pa_free = pool_page_free_meta, 187 1.191 para .pa_pagesz = 0 188 1.98 yamt }; 189 1.98 yamt 190 1.208 chs #define POOL_ALLOCATOR_BIG_BASE 13 191 1.258 maxv static struct pool_allocator pool_allocator_big[] = { 192 1.258 maxv { 193 1.258 maxv .pa_alloc = pool_page_alloc, 194 1.258 maxv .pa_free = pool_page_free, 195 1.258 maxv .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 0), 196 1.258 maxv }, 197 1.258 maxv { 198 1.258 maxv .pa_alloc = pool_page_alloc, 199 1.258 maxv .pa_free = pool_page_free, 200 1.258 maxv .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 1), 201 1.258 maxv }, 202 1.258 maxv { 203 1.258 maxv .pa_alloc = pool_page_alloc, 204 1.258 maxv .pa_free = pool_page_free, 205 1.258 maxv .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 2), 206 1.258 maxv }, 207 1.258 maxv { 208 1.258 maxv .pa_alloc = pool_page_alloc, 209 1.258 maxv .pa_free = pool_page_free, 210 1.258 maxv .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 3), 211 1.258 maxv }, 212 1.258 maxv { 213 1.258 maxv .pa_alloc = pool_page_alloc, 214 1.258 maxv .pa_free = pool_page_free, 215 1.258 maxv .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 4), 216 1.258 maxv }, 217 1.258 maxv { 218 1.258 maxv .pa_alloc = pool_page_alloc, 219 1.258 maxv .pa_free = pool_page_free, 220 1.258 maxv .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 5), 221 1.258 maxv }, 222 1.258 maxv { 223 1.258 maxv .pa_alloc = pool_page_alloc, 224 1.258 maxv .pa_free = pool_page_free, 225 1.258 maxv .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 6), 226 1.258 maxv }, 227 1.258 maxv { 228 1.258 maxv .pa_alloc = pool_page_alloc, 229 1.258 maxv .pa_free = pool_page_free, 230 1.258 maxv .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 7), 231 1.273 jdolecek }, 232 1.273 jdolecek { 233 1.273 jdolecek .pa_alloc = pool_page_alloc, 234 1.273 jdolecek .pa_free = pool_page_free, 235 1.273 jdolecek .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 8), 236 1.273 jdolecek }, 237 1.273 jdolecek { 238 1.273 jdolecek .pa_alloc = pool_page_alloc, 239 1.273 jdolecek .pa_free = pool_page_free, 240 1.273 jdolecek .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 9), 241 1.273 jdolecek }, 242 1.273 jdolecek { 243 1.273 jdolecek .pa_alloc = pool_page_alloc, 244 1.273 jdolecek .pa_free = pool_page_free, 245 1.273 jdolecek .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 10), 246 1.273 jdolecek }, 247 1.273 jdolecek { 248 1.273 jdolecek .pa_alloc = pool_page_alloc, 249 1.273 jdolecek .pa_free = pool_page_free, 250 1.273 jdolecek .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 11), 251 1.258 maxv } 252 1.258 maxv }; 253 1.258 maxv 254 1.208 chs static int pool_bigidx(size_t); 255 1.208 chs 256 1.3 pk /* # of seconds to retain page after last use */ 257 1.3 pk int pool_inactive_time = 10; 258 1.3 pk 259 1.3 pk /* Next candidate for drainage (see pool_drain()) */ 260 1.236 maxv static struct pool *drainpp; 261 1.23 thorpej 262 1.134 ad /* This lock protects both pool_head and drainpp. */ 263 1.134 ad static kmutex_t pool_head_lock; 264 1.134 ad static kcondvar_t pool_busy; 265 1.3 pk 266 1.178 elad /* This lock protects initialization of a potentially shared pool allocator */ 267 1.178 elad static kmutex_t pool_allocator_lock; 268 1.178 elad 269 1.245 maxv static unsigned int poolid_counter = 0; 270 1.245 maxv 271 1.135 yamt typedef uint32_t pool_item_bitmap_t; 272 1.135 yamt #define BITMAP_SIZE (CHAR_BIT * sizeof(pool_item_bitmap_t)) 273 1.135 yamt #define BITMAP_MASK (BITMAP_SIZE - 1) 274 1.256 maxv #define BITMAP_MIN_SIZE (CHAR_BIT * sizeof(((struct pool_item_header *)NULL)->ph_u2)) 275 1.99 yamt 276 1.3 pk struct pool_item_header { 277 1.3 pk /* Page headers */ 278 1.88 chs LIST_ENTRY(pool_item_header) 279 1.3 pk ph_pagelist; /* pool page list */ 280 1.245 maxv union { 281 1.245 maxv /* !PR_PHINPAGE */ 282 1.245 maxv struct { 283 1.245 maxv SPLAY_ENTRY(pool_item_header) 284 1.245 maxv phu_node; /* off-page page headers */ 285 1.245 maxv } phu_offpage; 286 1.245 maxv /* PR_PHINPAGE */ 287 1.245 maxv struct { 288 1.245 maxv unsigned int phu_poolid; 289 1.245 maxv } phu_onpage; 290 1.245 maxv } ph_u1; 291 1.128 christos void * ph_page; /* this page's address */ 292 1.151 yamt uint32_t ph_time; /* last referenced */ 293 1.135 yamt uint16_t ph_nmissing; /* # of chunks in use */ 294 1.141 yamt uint16_t ph_off; /* start offset in page */ 295 1.97 yamt union { 296 1.242 maxv /* !PR_USEBMAP */ 297 1.97 yamt struct { 298 1.102 chs LIST_HEAD(, pool_item) 299 1.97 yamt phu_itemlist; /* chunk list for this page */ 300 1.97 yamt } phu_normal; 301 1.242 maxv /* PR_USEBMAP */ 302 1.97 yamt struct { 303 1.141 yamt pool_item_bitmap_t phu_bitmap[1]; 304 1.97 yamt } phu_notouch; 305 1.245 maxv } ph_u2; 306 1.3 pk }; 307 1.245 maxv #define ph_node ph_u1.phu_offpage.phu_node 308 1.245 maxv #define ph_poolid ph_u1.phu_onpage.phu_poolid 309 1.245 maxv #define ph_itemlist ph_u2.phu_normal.phu_itemlist 310 1.245 maxv #define ph_bitmap ph_u2.phu_notouch.phu_bitmap 311 1.3 pk 312 1.240 maxv #define PHSIZE ALIGN(sizeof(struct pool_item_header)) 313 1.240 maxv 314 1.256 maxv CTASSERT(offsetof(struct pool_item_header, ph_u2) + 315 1.256 maxv BITMAP_MIN_SIZE / CHAR_BIT == sizeof(struct pool_item_header)); 316 1.256 maxv 317 1.229 maxv #if defined(DIAGNOSTIC) && !defined(KASAN) 318 1.229 maxv #define POOL_CHECK_MAGIC 319 1.229 maxv #endif 320 1.229 maxv 321 1.1 pk struct pool_item { 322 1.229 maxv #ifdef POOL_CHECK_MAGIC 323 1.82 thorpej u_int pi_magic; 324 1.33 chs #endif 325 1.134 ad #define PI_MAGIC 0xdeaddeadU 326 1.3 pk /* Other entries use only this list entry */ 327 1.102 chs LIST_ENTRY(pool_item) pi_list; 328 1.3 pk }; 329 1.3 pk 330 1.53 thorpej #define POOL_NEEDS_CATCHUP(pp) \ 331 1.267 chs ((pp)->pr_nitems < (pp)->pr_minitems || \ 332 1.267 chs (pp)->pr_npages < (pp)->pr_minpages) 333 1.253 maxv #define POOL_OBJ_TO_PAGE(pp, v) \ 334 1.253 maxv (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask) 335 1.53 thorpej 336 1.43 thorpej /* 337 1.43 thorpej * Pool cache management. 338 1.43 thorpej * 339 1.43 thorpej * Pool caches provide a way for constructed objects to be cached by the 340 1.43 thorpej * pool subsystem. This can lead to performance improvements by avoiding 341 1.43 thorpej * needless object construction/destruction; it is deferred until absolutely 342 1.43 thorpej * necessary. 343 1.43 thorpej * 344 1.134 ad * Caches are grouped into cache groups. Each cache group references up 345 1.134 ad * to PCG_NUMOBJECTS constructed objects. When a cache allocates an 346 1.134 ad * object from the pool, it calls the object's constructor and places it 347 1.134 ad * into a cache group. When a cache group frees an object back to the 348 1.134 ad * pool, it first calls the object's destructor. This allows the object 349 1.134 ad * to persist in constructed form while freed to the cache. 350 1.134 ad * 351 1.134 ad * The pool references each cache, so that when a pool is drained by the 352 1.134 ad * pagedaemon, it can drain each individual cache as well. Each time a 353 1.134 ad * cache is drained, the most idle cache group is freed to the pool in 354 1.134 ad * its entirety. 355 1.43 thorpej * 356 1.284 andvar * Pool caches are laid on top of pools. By layering them, we can avoid 357 1.43 thorpej * the complexity of cache management for pools which would not benefit 358 1.43 thorpej * from it. 359 1.43 thorpej */ 360 1.43 thorpej 361 1.142 ad static struct pool pcg_normal_pool; 362 1.142 ad static struct pool pcg_large_pool; 363 1.134 ad static struct pool cache_pool; 364 1.134 ad static struct pool cache_cpu_pool; 365 1.3 pk 366 1.271 ad static pcg_t *volatile pcg_large_cache __cacheline_aligned; 367 1.271 ad static pcg_t *volatile pcg_normal_cache __cacheline_aligned; 368 1.271 ad 369 1.145 ad /* List of all caches. */ 370 1.145 ad TAILQ_HEAD(,pool_cache) pool_cache_head = 371 1.145 ad TAILQ_HEAD_INITIALIZER(pool_cache_head); 372 1.145 ad 373 1.162 ad int pool_cache_disable; /* global disable for caching */ 374 1.169 yamt static const pcg_t pcg_dummy; /* zero sized: always empty, yet always full */ 375 1.145 ad 376 1.271 ad static bool pool_cache_put_slow(pool_cache_t, pool_cache_cpu_t *, int, 377 1.162 ad void *); 378 1.271 ad static bool pool_cache_get_slow(pool_cache_t, pool_cache_cpu_t *, int, 379 1.162 ad void **, paddr_t *, int); 380 1.134 ad static void pool_cache_cpu_init1(struct cpu_info *, pool_cache_t); 381 1.271 ad static int pool_cache_invalidate_groups(pool_cache_t, pcg_t *); 382 1.175 jym static void pool_cache_invalidate_cpu(pool_cache_t, u_int); 383 1.196 jym static void pool_cache_transfer(pool_cache_t); 384 1.271 ad static int pool_pcg_get(pcg_t *volatile *, pcg_t **); 385 1.271 ad static int pool_pcg_put(pcg_t *volatile *, pcg_t *); 386 1.271 ad static pcg_t * pool_pcg_trunc(pcg_t *volatile *); 387 1.3 pk 388 1.42 thorpej static int pool_catchup(struct pool *); 389 1.128 christos static void pool_prime_page(struct pool *, void *, 390 1.55 thorpej struct pool_item_header *); 391 1.88 chs static void pool_update_curpage(struct pool *); 392 1.66 thorpej 393 1.113 yamt static int pool_grow(struct pool *, int); 394 1.117 yamt static void *pool_allocator_alloc(struct pool *, int); 395 1.117 yamt static void pool_allocator_free(struct pool *, void *); 396 1.3 pk 397 1.97 yamt static void pool_print_pagelist(struct pool *, struct pool_pagelist *, 398 1.199 christos void (*)(const char *, ...) __printflike(1, 2)); 399 1.42 thorpej static void pool_print1(struct pool *, const char *, 400 1.199 christos void (*)(const char *, ...) __printflike(1, 2)); 401 1.3 pk 402 1.88 chs static int pool_chk_page(struct pool *, const char *, 403 1.88 chs struct pool_item_header *); 404 1.88 chs 405 1.234 maxv /* -------------------------------------------------------------------------- */ 406 1.234 maxv 407 1.135 yamt static inline unsigned int 408 1.234 maxv pr_item_bitmap_index(const struct pool *pp, const struct pool_item_header *ph, 409 1.97 yamt const void *v) 410 1.97 yamt { 411 1.97 yamt const char *cp = v; 412 1.135 yamt unsigned int idx; 413 1.97 yamt 414 1.242 maxv KASSERT(pp->pr_roflags & PR_USEBMAP); 415 1.128 christos idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size; 416 1.237 maxv 417 1.237 maxv if (__predict_false(idx >= pp->pr_itemsperpage)) { 418 1.237 maxv panic("%s: [%s] %u >= %u", __func__, pp->pr_wchan, idx, 419 1.237 maxv pp->pr_itemsperpage); 420 1.237 maxv } 421 1.237 maxv 422 1.97 yamt return idx; 423 1.97 yamt } 424 1.97 yamt 425 1.110 perry static inline void 426 1.234 maxv pr_item_bitmap_put(const struct pool *pp, struct pool_item_header *ph, 427 1.97 yamt void *obj) 428 1.97 yamt { 429 1.234 maxv unsigned int idx = pr_item_bitmap_index(pp, ph, obj); 430 1.135 yamt pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE); 431 1.223 kamil pool_item_bitmap_t mask = 1U << (idx & BITMAP_MASK); 432 1.97 yamt 433 1.237 maxv if (__predict_false((*bitmap & mask) != 0)) { 434 1.237 maxv panic("%s: [%s] %p already freed", __func__, pp->pr_wchan, obj); 435 1.237 maxv } 436 1.237 maxv 437 1.135 yamt *bitmap |= mask; 438 1.97 yamt } 439 1.97 yamt 440 1.110 perry static inline void * 441 1.234 maxv pr_item_bitmap_get(const struct pool *pp, struct pool_item_header *ph) 442 1.97 yamt { 443 1.135 yamt pool_item_bitmap_t *bitmap = ph->ph_bitmap; 444 1.135 yamt unsigned int idx; 445 1.135 yamt int i; 446 1.97 yamt 447 1.135 yamt for (i = 0; ; i++) { 448 1.135 yamt int bit; 449 1.97 yamt 450 1.135 yamt KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage); 451 1.135 yamt bit = ffs32(bitmap[i]); 452 1.135 yamt if (bit) { 453 1.135 yamt pool_item_bitmap_t mask; 454 1.135 yamt 455 1.135 yamt bit--; 456 1.135 yamt idx = (i * BITMAP_SIZE) + bit; 457 1.222 kamil mask = 1U << bit; 458 1.135 yamt KASSERT((bitmap[i] & mask) != 0); 459 1.135 yamt bitmap[i] &= ~mask; 460 1.135 yamt break; 461 1.135 yamt } 462 1.135 yamt } 463 1.135 yamt KASSERT(idx < pp->pr_itemsperpage); 464 1.128 christos return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size; 465 1.97 yamt } 466 1.97 yamt 467 1.135 yamt static inline void 468 1.234 maxv pr_item_bitmap_init(const struct pool *pp, struct pool_item_header *ph) 469 1.135 yamt { 470 1.135 yamt pool_item_bitmap_t *bitmap = ph->ph_bitmap; 471 1.135 yamt const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE); 472 1.135 yamt int i; 473 1.135 yamt 474 1.135 yamt for (i = 0; i < n; i++) { 475 1.135 yamt bitmap[i] = (pool_item_bitmap_t)-1; 476 1.135 yamt } 477 1.135 yamt } 478 1.135 yamt 479 1.234 maxv /* -------------------------------------------------------------------------- */ 480 1.234 maxv 481 1.234 maxv static inline void 482 1.234 maxv pr_item_linkedlist_put(const struct pool *pp, struct pool_item_header *ph, 483 1.234 maxv void *obj) 484 1.234 maxv { 485 1.234 maxv struct pool_item *pi = obj; 486 1.234 maxv 487 1.279 thorpej KASSERT(!pp_has_pser(pp)); 488 1.279 thorpej 489 1.234 maxv #ifdef POOL_CHECK_MAGIC 490 1.234 maxv pi->pi_magic = PI_MAGIC; 491 1.234 maxv #endif 492 1.234 maxv 493 1.234 maxv if (pp->pr_redzone) { 494 1.234 maxv /* 495 1.234 maxv * Mark the pool_item as valid. The rest is already 496 1.234 maxv * invalid. 497 1.234 maxv */ 498 1.248 maxv kasan_mark(pi, sizeof(*pi), sizeof(*pi), 0); 499 1.234 maxv } 500 1.234 maxv 501 1.234 maxv LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 502 1.234 maxv } 503 1.234 maxv 504 1.234 maxv static inline void * 505 1.234 maxv pr_item_linkedlist_get(struct pool *pp, struct pool_item_header *ph) 506 1.234 maxv { 507 1.234 maxv struct pool_item *pi; 508 1.234 maxv void *v; 509 1.234 maxv 510 1.234 maxv v = pi = LIST_FIRST(&ph->ph_itemlist); 511 1.234 maxv if (__predict_false(v == NULL)) { 512 1.234 maxv mutex_exit(&pp->pr_lock); 513 1.234 maxv panic("%s: [%s] page empty", __func__, pp->pr_wchan); 514 1.234 maxv } 515 1.234 maxv KASSERTMSG((pp->pr_nitems > 0), 516 1.234 maxv "%s: [%s] nitems %u inconsistent on itemlist", 517 1.234 maxv __func__, pp->pr_wchan, pp->pr_nitems); 518 1.234 maxv #ifdef POOL_CHECK_MAGIC 519 1.234 maxv KASSERTMSG((pi->pi_magic == PI_MAGIC), 520 1.234 maxv "%s: [%s] free list modified: " 521 1.234 maxv "magic=%x; page %p; item addr %p", __func__, 522 1.234 maxv pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 523 1.234 maxv #endif 524 1.234 maxv 525 1.234 maxv /* 526 1.234 maxv * Remove from item list. 527 1.234 maxv */ 528 1.234 maxv LIST_REMOVE(pi, pi_list); 529 1.234 maxv 530 1.234 maxv return v; 531 1.234 maxv } 532 1.234 maxv 533 1.234 maxv /* -------------------------------------------------------------------------- */ 534 1.234 maxv 535 1.253 maxv static inline void 536 1.253 maxv pr_phinpage_check(struct pool *pp, struct pool_item_header *ph, void *page, 537 1.253 maxv void *object) 538 1.253 maxv { 539 1.253 maxv if (__predict_false((void *)ph->ph_page != page)) { 540 1.253 maxv panic("%s: [%s] item %p not part of pool", __func__, 541 1.253 maxv pp->pr_wchan, object); 542 1.253 maxv } 543 1.253 maxv if (__predict_false((char *)object < (char *)page + ph->ph_off)) { 544 1.253 maxv panic("%s: [%s] item %p below item space", __func__, 545 1.253 maxv pp->pr_wchan, object); 546 1.253 maxv } 547 1.253 maxv if (__predict_false(ph->ph_poolid != pp->pr_poolid)) { 548 1.253 maxv panic("%s: [%s] item %p poolid %u != %u", __func__, 549 1.253 maxv pp->pr_wchan, object, ph->ph_poolid, pp->pr_poolid); 550 1.253 maxv } 551 1.253 maxv } 552 1.253 maxv 553 1.253 maxv static inline void 554 1.253 maxv pc_phinpage_check(pool_cache_t pc, void *object) 555 1.253 maxv { 556 1.253 maxv struct pool_item_header *ph; 557 1.253 maxv struct pool *pp; 558 1.253 maxv void *page; 559 1.253 maxv 560 1.253 maxv pp = &pc->pc_pool; 561 1.253 maxv page = POOL_OBJ_TO_PAGE(pp, object); 562 1.253 maxv ph = (struct pool_item_header *)page; 563 1.253 maxv 564 1.253 maxv pr_phinpage_check(pp, ph, page, object); 565 1.253 maxv } 566 1.253 maxv 567 1.253 maxv /* -------------------------------------------------------------------------- */ 568 1.253 maxv 569 1.110 perry static inline int 570 1.88 chs phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 571 1.88 chs { 572 1.121 yamt 573 1.121 yamt /* 574 1.236 maxv * We consider pool_item_header with smaller ph_page bigger. This 575 1.236 maxv * unnatural ordering is for the benefit of pr_find_pagehead. 576 1.121 yamt */ 577 1.88 chs if (a->ph_page < b->ph_page) 578 1.236 maxv return 1; 579 1.121 yamt else if (a->ph_page > b->ph_page) 580 1.236 maxv return -1; 581 1.88 chs else 582 1.236 maxv return 0; 583 1.88 chs } 584 1.88 chs 585 1.88 chs SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 586 1.88 chs SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 587 1.88 chs 588 1.141 yamt static inline struct pool_item_header * 589 1.141 yamt pr_find_pagehead_noalign(struct pool *pp, void *v) 590 1.141 yamt { 591 1.141 yamt struct pool_item_header *ph, tmp; 592 1.141 yamt 593 1.141 yamt tmp.ph_page = (void *)(uintptr_t)v; 594 1.141 yamt ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 595 1.141 yamt if (ph == NULL) { 596 1.141 yamt ph = SPLAY_ROOT(&pp->pr_phtree); 597 1.141 yamt if (ph != NULL && phtree_compare(&tmp, ph) >= 0) { 598 1.141 yamt ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph); 599 1.141 yamt } 600 1.141 yamt KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0); 601 1.141 yamt } 602 1.141 yamt 603 1.141 yamt return ph; 604 1.141 yamt } 605 1.141 yamt 606 1.3 pk /* 607 1.121 yamt * Return the pool page header based on item address. 608 1.3 pk */ 609 1.110 perry static inline struct pool_item_header * 610 1.121 yamt pr_find_pagehead(struct pool *pp, void *v) 611 1.3 pk { 612 1.88 chs struct pool_item_header *ph, tmp; 613 1.3 pk 614 1.121 yamt if ((pp->pr_roflags & PR_NOALIGN) != 0) { 615 1.141 yamt ph = pr_find_pagehead_noalign(pp, v); 616 1.121 yamt } else { 617 1.253 maxv void *page = POOL_OBJ_TO_PAGE(pp, v); 618 1.121 yamt if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 619 1.241 maxv ph = (struct pool_item_header *)page; 620 1.253 maxv pr_phinpage_check(pp, ph, page, v); 621 1.121 yamt } else { 622 1.121 yamt tmp.ph_page = page; 623 1.121 yamt ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp); 624 1.121 yamt } 625 1.121 yamt } 626 1.3 pk 627 1.121 yamt KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) || 628 1.128 christos ((char *)ph->ph_page <= (char *)v && 629 1.128 christos (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz)); 630 1.88 chs return ph; 631 1.3 pk } 632 1.3 pk 633 1.101 thorpej static void 634 1.101 thorpej pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq) 635 1.101 thorpej { 636 1.101 thorpej struct pool_item_header *ph; 637 1.101 thorpej 638 1.101 thorpej while ((ph = LIST_FIRST(pq)) != NULL) { 639 1.101 thorpej LIST_REMOVE(ph, ph_pagelist); 640 1.101 thorpej pool_allocator_free(pp, ph->ph_page); 641 1.134 ad if ((pp->pr_roflags & PR_PHINPAGE) == 0) 642 1.101 thorpej pool_put(pp->pr_phpool, ph); 643 1.101 thorpej } 644 1.101 thorpej } 645 1.101 thorpej 646 1.3 pk /* 647 1.3 pk * Remove a page from the pool. 648 1.3 pk */ 649 1.110 perry static inline void 650 1.61 chs pr_rmpage(struct pool *pp, struct pool_item_header *ph, 651 1.61 chs struct pool_pagelist *pq) 652 1.3 pk { 653 1.3 pk 654 1.134 ad KASSERT(mutex_owned(&pp->pr_lock)); 655 1.91 yamt 656 1.3 pk /* 657 1.7 thorpej * If the page was idle, decrement the idle page count. 658 1.3 pk */ 659 1.6 thorpej if (ph->ph_nmissing == 0) { 660 1.207 riastrad KASSERT(pp->pr_nidle != 0); 661 1.207 riastrad KASSERTMSG((pp->pr_nitems >= pp->pr_itemsperpage), 662 1.251 christos "%s: [%s] nitems=%u < itemsperpage=%u", __func__, 663 1.251 christos pp->pr_wchan, pp->pr_nitems, pp->pr_itemsperpage); 664 1.6 thorpej pp->pr_nidle--; 665 1.6 thorpej } 666 1.7 thorpej 667 1.20 thorpej pp->pr_nitems -= pp->pr_itemsperpage; 668 1.20 thorpej 669 1.7 thorpej /* 670 1.101 thorpej * Unlink the page from the pool and queue it for release. 671 1.7 thorpej */ 672 1.88 chs LIST_REMOVE(ph, ph_pagelist); 673 1.245 maxv if (pp->pr_roflags & PR_PHINPAGE) { 674 1.245 maxv if (__predict_false(ph->ph_poolid != pp->pr_poolid)) { 675 1.245 maxv panic("%s: [%s] ph %p poolid %u != %u", 676 1.245 maxv __func__, pp->pr_wchan, ph, ph->ph_poolid, 677 1.245 maxv pp->pr_poolid); 678 1.245 maxv } 679 1.245 maxv } else { 680 1.91 yamt SPLAY_REMOVE(phtree, &pp->pr_phtree, ph); 681 1.245 maxv } 682 1.101 thorpej LIST_INSERT_HEAD(pq, ph, ph_pagelist); 683 1.101 thorpej 684 1.7 thorpej pp->pr_npages--; 685 1.7 thorpej pp->pr_npagefree++; 686 1.6 thorpej 687 1.88 chs pool_update_curpage(pp); 688 1.3 pk } 689 1.3 pk 690 1.3 pk /* 691 1.94 simonb * Initialize all the pools listed in the "pools" link set. 692 1.94 simonb */ 693 1.94 simonb void 694 1.117 yamt pool_subsystem_init(void) 695 1.94 simonb { 696 1.192 rmind size_t size; 697 1.191 para int idx; 698 1.94 simonb 699 1.134 ad mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE); 700 1.179 mlelstv mutex_init(&pool_allocator_lock, MUTEX_DEFAULT, IPL_NONE); 701 1.134 ad cv_init(&pool_busy, "poolbusy"); 702 1.134 ad 703 1.191 para /* 704 1.191 para * Initialize private page header pool and cache magazine pool if we 705 1.191 para * haven't done so yet. 706 1.191 para */ 707 1.191 para for (idx = 0; idx < PHPOOL_MAX; idx++) { 708 1.191 para static char phpool_names[PHPOOL_MAX][6+1+6+1]; 709 1.191 para int nelem; 710 1.191 para size_t sz; 711 1.191 para 712 1.191 para nelem = PHPOOL_FREELIST_NELEM(idx); 713 1.256 maxv KASSERT(nelem != 0); 714 1.191 para snprintf(phpool_names[idx], sizeof(phpool_names[idx]), 715 1.191 para "phpool-%d", nelem); 716 1.256 maxv sz = offsetof(struct pool_item_header, 717 1.256 maxv ph_bitmap[howmany(nelem, BITMAP_SIZE)]); 718 1.191 para pool_init(&phpool[idx], sz, 0, 0, 0, 719 1.191 para phpool_names[idx], &pool_allocator_meta, IPL_VM); 720 1.117 yamt } 721 1.191 para 722 1.191 para size = sizeof(pcg_t) + 723 1.191 para (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t); 724 1.191 para pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0, 725 1.191 para "pcgnormal", &pool_allocator_meta, IPL_VM); 726 1.191 para 727 1.191 para size = sizeof(pcg_t) + 728 1.191 para (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t); 729 1.191 para pool_init(&pcg_large_pool, size, coherency_unit, 0, 0, 730 1.191 para "pcglarge", &pool_allocator_meta, IPL_VM); 731 1.134 ad 732 1.156 ad pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit, 733 1.191 para 0, 0, "pcache", &pool_allocator_meta, IPL_NONE); 734 1.134 ad 735 1.156 ad pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit, 736 1.191 para 0, 0, "pcachecpu", &pool_allocator_meta, IPL_NONE); 737 1.94 simonb } 738 1.94 simonb 739 1.240 maxv static inline bool 740 1.240 maxv pool_init_is_phinpage(const struct pool *pp) 741 1.240 maxv { 742 1.240 maxv size_t pagesize; 743 1.240 maxv 744 1.240 maxv if (pp->pr_roflags & PR_PHINPAGE) { 745 1.240 maxv return true; 746 1.240 maxv } 747 1.240 maxv if (pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) { 748 1.240 maxv return false; 749 1.240 maxv } 750 1.240 maxv 751 1.240 maxv pagesize = pp->pr_alloc->pa_pagesz; 752 1.240 maxv 753 1.240 maxv /* 754 1.240 maxv * Threshold: the item size is below 1/16 of a page size, and below 755 1.240 maxv * 8 times the page header size. The latter ensures we go off-page 756 1.240 maxv * if the page header would make us waste a rather big item. 757 1.240 maxv */ 758 1.240 maxv if (pp->pr_size < MIN(pagesize / 16, PHSIZE * 8)) { 759 1.240 maxv return true; 760 1.240 maxv } 761 1.240 maxv 762 1.240 maxv /* Put the header into the page if it doesn't waste any items. */ 763 1.240 maxv if (pagesize / pp->pr_size == (pagesize - PHSIZE) / pp->pr_size) { 764 1.240 maxv return true; 765 1.240 maxv } 766 1.240 maxv 767 1.240 maxv return false; 768 1.240 maxv } 769 1.240 maxv 770 1.242 maxv static inline bool 771 1.242 maxv pool_init_is_usebmap(const struct pool *pp) 772 1.242 maxv { 773 1.243 maxv size_t bmapsize; 774 1.243 maxv 775 1.242 maxv if (pp->pr_roflags & PR_NOTOUCH) { 776 1.242 maxv return true; 777 1.242 maxv } 778 1.242 maxv 779 1.243 maxv /* 780 1.256 maxv * If we're off-page, go with a bitmap. 781 1.256 maxv */ 782 1.256 maxv if (!(pp->pr_roflags & PR_PHINPAGE)) { 783 1.256 maxv return true; 784 1.256 maxv } 785 1.256 maxv 786 1.256 maxv /* 787 1.243 maxv * If we're on-page, and the page header can already contain a bitmap 788 1.243 maxv * big enough to cover all the items of the page, go with a bitmap. 789 1.243 maxv */ 790 1.243 maxv bmapsize = roundup(PHSIZE, pp->pr_align) - 791 1.243 maxv offsetof(struct pool_item_header, ph_bitmap[0]); 792 1.243 maxv KASSERT(bmapsize % sizeof(pool_item_bitmap_t) == 0); 793 1.243 maxv if (pp->pr_itemsperpage <= bmapsize * CHAR_BIT) { 794 1.243 maxv return true; 795 1.243 maxv } 796 1.243 maxv 797 1.242 maxv return false; 798 1.242 maxv } 799 1.242 maxv 800 1.94 simonb /* 801 1.3 pk * Initialize the given pool resource structure. 802 1.3 pk * 803 1.3 pk * We export this routine to allow other kernel parts to declare 804 1.195 rmind * static pools that must be initialized before kmem(9) is available. 805 1.3 pk */ 806 1.3 pk void 807 1.42 thorpej pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 808 1.129 ad const char *wchan, struct pool_allocator *palloc, int ipl) 809 1.3 pk { 810 1.116 simonb struct pool *pp1; 811 1.240 maxv size_t prsize; 812 1.237 maxv int itemspace, slack; 813 1.3 pk 814 1.238 maxv /* XXX ioff will be removed. */ 815 1.238 maxv KASSERT(ioff == 0); 816 1.238 maxv 817 1.116 simonb #ifdef DEBUG 818 1.198 christos if (__predict_true(!cold)) 819 1.198 christos mutex_enter(&pool_head_lock); 820 1.116 simonb /* 821 1.116 simonb * Check that the pool hasn't already been initialised and 822 1.116 simonb * added to the list of all pools. 823 1.116 simonb */ 824 1.145 ad TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 825 1.116 simonb if (pp == pp1) 826 1.213 christos panic("%s: [%s] already initialised", __func__, 827 1.116 simonb wchan); 828 1.116 simonb } 829 1.198 christos if (__predict_true(!cold)) 830 1.198 christos mutex_exit(&pool_head_lock); 831 1.116 simonb #endif 832 1.116 simonb 833 1.291 chs if (palloc == NULL) { 834 1.291 chs if (size > PAGE_SIZE) { 835 1.291 chs int bigidx = pool_bigidx(size); 836 1.291 chs 837 1.291 chs palloc = &pool_allocator_big[bigidx]; 838 1.291 chs flags |= PR_NOALIGN; 839 1.291 chs } else if (ipl == IPL_NONE) { 840 1.291 chs palloc = &pool_allocator_nointr; 841 1.291 chs } else { 842 1.291 chs palloc = &pool_allocator_kmem; 843 1.291 chs } 844 1.291 chs } 845 1.244 maxv 846 1.180 mlelstv if (!cold) 847 1.180 mlelstv mutex_enter(&pool_allocator_lock); 848 1.178 elad if (palloc->pa_refcnt++ == 0) { 849 1.112 bjh21 if (palloc->pa_pagesz == 0) 850 1.66 thorpej palloc->pa_pagesz = PAGE_SIZE; 851 1.66 thorpej 852 1.66 thorpej TAILQ_INIT(&palloc->pa_list); 853 1.66 thorpej 854 1.134 ad mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM); 855 1.66 thorpej palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 856 1.66 thorpej palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 857 1.4 thorpej } 858 1.180 mlelstv if (!cold) 859 1.180 mlelstv mutex_exit(&pool_allocator_lock); 860 1.3 pk 861 1.279 thorpej /* 862 1.279 thorpej * PR_PSERIALIZE implies PR_NOTOUCH; freed objects must remain 863 1.279 thorpej * valid until the the backing page is returned to the system. 864 1.279 thorpej */ 865 1.279 thorpej if (flags & PR_PSERIALIZE) { 866 1.279 thorpej flags |= PR_NOTOUCH; 867 1.279 thorpej } 868 1.279 thorpej 869 1.3 pk if (align == 0) 870 1.3 pk align = ALIGN(1); 871 1.14 thorpej 872 1.204 maxv prsize = size; 873 1.204 maxv if ((flags & PR_NOTOUCH) == 0 && prsize < sizeof(struct pool_item)) 874 1.204 maxv prsize = sizeof(struct pool_item); 875 1.3 pk 876 1.204 maxv prsize = roundup(prsize, align); 877 1.207 riastrad KASSERTMSG((prsize <= palloc->pa_pagesz), 878 1.213 christos "%s: [%s] pool item size (%zu) larger than page size (%u)", 879 1.213 christos __func__, wchan, prsize, palloc->pa_pagesz); 880 1.35 pk 881 1.3 pk /* 882 1.3 pk * Initialize the pool structure. 883 1.3 pk */ 884 1.88 chs LIST_INIT(&pp->pr_emptypages); 885 1.88 chs LIST_INIT(&pp->pr_fullpages); 886 1.88 chs LIST_INIT(&pp->pr_partpages); 887 1.134 ad pp->pr_cache = NULL; 888 1.3 pk pp->pr_curpage = NULL; 889 1.3 pk pp->pr_npages = 0; 890 1.3 pk pp->pr_minitems = 0; 891 1.3 pk pp->pr_minpages = 0; 892 1.292 chs pp->pr_maxitems = UINT_MAX; 893 1.3 pk pp->pr_maxpages = UINT_MAX; 894 1.20 thorpej pp->pr_roflags = flags; 895 1.20 thorpej pp->pr_flags = 0; 896 1.204 maxv pp->pr_size = prsize; 897 1.233 maxv pp->pr_reqsize = size; 898 1.3 pk pp->pr_align = align; 899 1.3 pk pp->pr_wchan = wchan; 900 1.66 thorpej pp->pr_alloc = palloc; 901 1.245 maxv pp->pr_poolid = atomic_inc_uint_nv(&poolid_counter); 902 1.20 thorpej pp->pr_nitems = 0; 903 1.20 thorpej pp->pr_nout = 0; 904 1.20 thorpej pp->pr_hardlimit = UINT_MAX; 905 1.20 thorpej pp->pr_hardlimit_warning = NULL; 906 1.31 thorpej pp->pr_hardlimit_ratecap.tv_sec = 0; 907 1.31 thorpej pp->pr_hardlimit_ratecap.tv_usec = 0; 908 1.31 thorpej pp->pr_hardlimit_warning_last.tv_sec = 0; 909 1.31 thorpej pp->pr_hardlimit_warning_last.tv_usec = 0; 910 1.68 thorpej pp->pr_drain_hook = NULL; 911 1.68 thorpej pp->pr_drain_hook_arg = NULL; 912 1.125 ad pp->pr_freecheck = NULL; 913 1.255 maxv pp->pr_redzone = false; 914 1.204 maxv pool_redzone_init(pp, size); 915 1.249 maxv pool_quarantine_init(pp); 916 1.3 pk 917 1.3 pk /* 918 1.240 maxv * Decide whether to put the page header off-page to avoid wasting too 919 1.240 maxv * large a part of the page or too big an item. Off-page page headers 920 1.240 maxv * go on a hash table, so we can match a returned item with its header 921 1.240 maxv * based on the page address. 922 1.3 pk */ 923 1.240 maxv if (pool_init_is_phinpage(pp)) { 924 1.241 maxv /* Use the beginning of the page for the page header */ 925 1.241 maxv itemspace = palloc->pa_pagesz - roundup(PHSIZE, align); 926 1.241 maxv pp->pr_itemoffset = roundup(PHSIZE, align); 927 1.239 maxv pp->pr_roflags |= PR_PHINPAGE; 928 1.2 pk } else { 929 1.3 pk /* The page header will be taken from our page header pool */ 930 1.237 maxv itemspace = palloc->pa_pagesz; 931 1.241 maxv pp->pr_itemoffset = 0; 932 1.88 chs SPLAY_INIT(&pp->pr_phtree); 933 1.2 pk } 934 1.1 pk 935 1.243 maxv pp->pr_itemsperpage = itemspace / pp->pr_size; 936 1.243 maxv KASSERT(pp->pr_itemsperpage != 0); 937 1.243 maxv 938 1.242 maxv /* 939 1.242 maxv * Decide whether to use a bitmap or a linked list to manage freed 940 1.242 maxv * items. 941 1.242 maxv */ 942 1.242 maxv if (pool_init_is_usebmap(pp)) { 943 1.242 maxv pp->pr_roflags |= PR_USEBMAP; 944 1.242 maxv } 945 1.242 maxv 946 1.242 maxv /* 947 1.256 maxv * If we're off-page, then we're using a bitmap; choose the appropriate 948 1.256 maxv * pool to allocate page headers, whose size varies depending on the 949 1.256 maxv * bitmap. If we're on-page, nothing to do. 950 1.242 maxv */ 951 1.256 maxv if (!(pp->pr_roflags & PR_PHINPAGE)) { 952 1.97 yamt int idx; 953 1.97 yamt 954 1.256 maxv KASSERT(pp->pr_roflags & PR_USEBMAP); 955 1.256 maxv 956 1.97 yamt for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx); 957 1.97 yamt idx++) { 958 1.97 yamt /* nothing */ 959 1.97 yamt } 960 1.97 yamt if (idx >= PHPOOL_MAX) { 961 1.97 yamt /* 962 1.97 yamt * if you see this panic, consider to tweak 963 1.97 yamt * PHPOOL_MAX and PHPOOL_FREELIST_NELEM. 964 1.97 yamt */ 965 1.213 christos panic("%s: [%s] too large itemsperpage(%d) for " 966 1.242 maxv "PR_USEBMAP", __func__, 967 1.97 yamt pp->pr_wchan, pp->pr_itemsperpage); 968 1.97 yamt } 969 1.97 yamt pp->pr_phpool = &phpool[idx]; 970 1.242 maxv } else { 971 1.97 yamt pp->pr_phpool = NULL; 972 1.97 yamt } 973 1.3 pk 974 1.3 pk /* 975 1.3 pk * Use the slack between the chunks and the page header 976 1.3 pk * for "cache coloring". 977 1.3 pk */ 978 1.237 maxv slack = itemspace - pp->pr_itemsperpage * pp->pr_size; 979 1.239 maxv pp->pr_maxcolor = rounddown(slack, align); 980 1.3 pk pp->pr_curcolor = 0; 981 1.3 pk 982 1.3 pk pp->pr_nget = 0; 983 1.3 pk pp->pr_nfail = 0; 984 1.3 pk pp->pr_nput = 0; 985 1.3 pk pp->pr_npagealloc = 0; 986 1.3 pk pp->pr_npagefree = 0; 987 1.1 pk pp->pr_hiwat = 0; 988 1.8 thorpej pp->pr_nidle = 0; 989 1.134 ad pp->pr_refcnt = 0; 990 1.3 pk 991 1.157 ad mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl); 992 1.134 ad cv_init(&pp->pr_cv, wchan); 993 1.134 ad pp->pr_ipl = ipl; 994 1.1 pk 995 1.145 ad /* Insert into the list of all pools. */ 996 1.181 mlelstv if (!cold) 997 1.134 ad mutex_enter(&pool_head_lock); 998 1.145 ad TAILQ_FOREACH(pp1, &pool_head, pr_poollist) { 999 1.145 ad if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0) 1000 1.145 ad break; 1001 1.145 ad } 1002 1.145 ad if (pp1 == NULL) 1003 1.145 ad TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 1004 1.145 ad else 1005 1.145 ad TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist); 1006 1.181 mlelstv if (!cold) 1007 1.134 ad mutex_exit(&pool_head_lock); 1008 1.134 ad 1009 1.167 skrll /* Insert this into the list of pools using this allocator. */ 1010 1.181 mlelstv if (!cold) 1011 1.134 ad mutex_enter(&palloc->pa_lock); 1012 1.145 ad TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 1013 1.181 mlelstv if (!cold) 1014 1.134 ad mutex_exit(&palloc->pa_lock); 1015 1.1 pk } 1016 1.1 pk 1017 1.1 pk /* 1018 1.283 andvar * De-commission a pool resource. 1019 1.1 pk */ 1020 1.1 pk void 1021 1.42 thorpej pool_destroy(struct pool *pp) 1022 1.1 pk { 1023 1.101 thorpej struct pool_pagelist pq; 1024 1.3 pk struct pool_item_header *ph; 1025 1.43 thorpej 1026 1.249 maxv pool_quarantine_flush(pp); 1027 1.249 maxv 1028 1.101 thorpej /* Remove from global pool list */ 1029 1.134 ad mutex_enter(&pool_head_lock); 1030 1.134 ad while (pp->pr_refcnt != 0) 1031 1.134 ad cv_wait(&pool_busy, &pool_head_lock); 1032 1.145 ad TAILQ_REMOVE(&pool_head, pp, pr_poollist); 1033 1.101 thorpej if (drainpp == pp) 1034 1.101 thorpej drainpp = NULL; 1035 1.134 ad mutex_exit(&pool_head_lock); 1036 1.101 thorpej 1037 1.101 thorpej /* Remove this pool from its allocator's list of pools. */ 1038 1.134 ad mutex_enter(&pp->pr_alloc->pa_lock); 1039 1.66 thorpej TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 1040 1.134 ad mutex_exit(&pp->pr_alloc->pa_lock); 1041 1.66 thorpej 1042 1.178 elad mutex_enter(&pool_allocator_lock); 1043 1.178 elad if (--pp->pr_alloc->pa_refcnt == 0) 1044 1.178 elad mutex_destroy(&pp->pr_alloc->pa_lock); 1045 1.178 elad mutex_exit(&pool_allocator_lock); 1046 1.178 elad 1047 1.134 ad mutex_enter(&pp->pr_lock); 1048 1.101 thorpej 1049 1.134 ad KASSERT(pp->pr_cache == NULL); 1050 1.207 riastrad KASSERTMSG((pp->pr_nout == 0), 1051 1.251 christos "%s: [%s] pool busy: still out: %u", __func__, pp->pr_wchan, 1052 1.251 christos pp->pr_nout); 1053 1.101 thorpej KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 1054 1.101 thorpej KASSERT(LIST_EMPTY(&pp->pr_partpages)); 1055 1.101 thorpej 1056 1.3 pk /* Remove all pages */ 1057 1.101 thorpej LIST_INIT(&pq); 1058 1.88 chs while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1059 1.101 thorpej pr_rmpage(pp, ph, &pq); 1060 1.101 thorpej 1061 1.134 ad mutex_exit(&pp->pr_lock); 1062 1.3 pk 1063 1.101 thorpej pr_pagelist_free(pp, &pq); 1064 1.134 ad cv_destroy(&pp->pr_cv); 1065 1.134 ad mutex_destroy(&pp->pr_lock); 1066 1.1 pk } 1067 1.1 pk 1068 1.68 thorpej void 1069 1.68 thorpej pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 1070 1.68 thorpej { 1071 1.68 thorpej 1072 1.68 thorpej /* XXX no locking -- must be used just after pool_init() */ 1073 1.207 riastrad KASSERTMSG((pp->pr_drain_hook == NULL), 1074 1.213 christos "%s: [%s] already set", __func__, pp->pr_wchan); 1075 1.68 thorpej pp->pr_drain_hook = fn; 1076 1.68 thorpej pp->pr_drain_hook_arg = arg; 1077 1.68 thorpej } 1078 1.68 thorpej 1079 1.88 chs static struct pool_item_header * 1080 1.128 christos pool_alloc_item_header(struct pool *pp, void *storage, int flags) 1081 1.55 thorpej { 1082 1.55 thorpej struct pool_item_header *ph; 1083 1.55 thorpej 1084 1.55 thorpej if ((pp->pr_roflags & PR_PHINPAGE) != 0) 1085 1.241 maxv ph = storage; 1086 1.134 ad else 1087 1.97 yamt ph = pool_get(pp->pr_phpool, flags); 1088 1.55 thorpej 1089 1.236 maxv return ph; 1090 1.55 thorpej } 1091 1.1 pk 1092 1.1 pk /* 1093 1.134 ad * Grab an item from the pool. 1094 1.1 pk */ 1095 1.3 pk void * 1096 1.56 sommerfe pool_get(struct pool *pp, int flags) 1097 1.1 pk { 1098 1.3 pk struct pool_item_header *ph; 1099 1.55 thorpej void *v; 1100 1.1 pk 1101 1.215 christos KASSERT(!(flags & PR_NOWAIT) != !(flags & PR_WAITOK)); 1102 1.207 riastrad KASSERTMSG((pp->pr_itemsperpage != 0), 1103 1.213 christos "%s: [%s] pr_itemsperpage is zero, " 1104 1.213 christos "pool not initialized?", __func__, pp->pr_wchan); 1105 1.207 riastrad KASSERTMSG((!(cpu_intr_p() || cpu_softintr_p()) 1106 1.207 riastrad || pp->pr_ipl != IPL_NONE || cold || panicstr != NULL), 1107 1.213 christos "%s: [%s] is IPL_NONE, but called from interrupt context", 1108 1.213 christos __func__, pp->pr_wchan); 1109 1.155 ad if (flags & PR_WAITOK) { 1110 1.154 yamt ASSERT_SLEEPABLE(); 1111 1.155 ad } 1112 1.1 pk 1113 1.270 maxv if (flags & PR_NOWAIT) { 1114 1.270 maxv if (fault_inject()) 1115 1.270 maxv return NULL; 1116 1.270 maxv } 1117 1.270 maxv 1118 1.134 ad mutex_enter(&pp->pr_lock); 1119 1.20 thorpej startover: 1120 1.20 thorpej /* 1121 1.20 thorpej * Check to see if we've reached the hard limit. If we have, 1122 1.20 thorpej * and we can wait, then wait until an item has been returned to 1123 1.20 thorpej * the pool. 1124 1.20 thorpej */ 1125 1.207 riastrad KASSERTMSG((pp->pr_nout <= pp->pr_hardlimit), 1126 1.213 christos "%s: %s: crossed hard limit", __func__, pp->pr_wchan); 1127 1.34 thorpej if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 1128 1.68 thorpej if (pp->pr_drain_hook != NULL) { 1129 1.68 thorpej /* 1130 1.68 thorpej * Since the drain hook is going to free things 1131 1.68 thorpej * back to the pool, unlock, call the hook, re-lock, 1132 1.68 thorpej * and check the hardlimit condition again. 1133 1.68 thorpej */ 1134 1.134 ad mutex_exit(&pp->pr_lock); 1135 1.68 thorpej (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 1136 1.134 ad mutex_enter(&pp->pr_lock); 1137 1.68 thorpej if (pp->pr_nout < pp->pr_hardlimit) 1138 1.68 thorpej goto startover; 1139 1.68 thorpej } 1140 1.68 thorpej 1141 1.29 sommerfe if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 1142 1.20 thorpej /* 1143 1.20 thorpej * XXX: A warning isn't logged in this case. Should 1144 1.20 thorpej * it be? 1145 1.20 thorpej */ 1146 1.20 thorpej pp->pr_flags |= PR_WANTED; 1147 1.212 christos do { 1148 1.212 christos cv_wait(&pp->pr_cv, &pp->pr_lock); 1149 1.212 christos } while (pp->pr_flags & PR_WANTED); 1150 1.20 thorpej goto startover; 1151 1.20 thorpej } 1152 1.31 thorpej 1153 1.31 thorpej /* 1154 1.31 thorpej * Log a message that the hard limit has been hit. 1155 1.31 thorpej */ 1156 1.31 thorpej if (pp->pr_hardlimit_warning != NULL && 1157 1.31 thorpej ratecheck(&pp->pr_hardlimit_warning_last, 1158 1.31 thorpej &pp->pr_hardlimit_ratecap)) 1159 1.31 thorpej log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 1160 1.21 thorpej 1161 1.21 thorpej pp->pr_nfail++; 1162 1.21 thorpej 1163 1.134 ad mutex_exit(&pp->pr_lock); 1164 1.216 christos KASSERT((flags & (PR_NOWAIT|PR_LIMITFAIL)) != 0); 1165 1.236 maxv return NULL; 1166 1.20 thorpej } 1167 1.20 thorpej 1168 1.3 pk /* 1169 1.3 pk * The convention we use is that if `curpage' is not NULL, then 1170 1.3 pk * it points at a non-empty bucket. In particular, `curpage' 1171 1.3 pk * never points at a page header which has PR_PHINPAGE set and 1172 1.3 pk * has no items in its bucket. 1173 1.3 pk */ 1174 1.20 thorpej if ((ph = pp->pr_curpage) == NULL) { 1175 1.113 yamt int error; 1176 1.113 yamt 1177 1.207 riastrad KASSERTMSG((pp->pr_nitems == 0), 1178 1.213 christos "%s: [%s] curpage NULL, inconsistent nitems %u", 1179 1.213 christos __func__, pp->pr_wchan, pp->pr_nitems); 1180 1.20 thorpej 1181 1.21 thorpej /* 1182 1.21 thorpej * Call the back-end page allocator for more memory. 1183 1.21 thorpej * Release the pool lock, as the back-end page allocator 1184 1.21 thorpej * may block. 1185 1.21 thorpej */ 1186 1.113 yamt error = pool_grow(pp, flags); 1187 1.113 yamt if (error != 0) { 1188 1.21 thorpej /* 1189 1.210 mlelstv * pool_grow aborts when another thread 1190 1.210 mlelstv * is allocating a new page. Retry if it 1191 1.210 mlelstv * waited for it. 1192 1.210 mlelstv */ 1193 1.210 mlelstv if (error == ERESTART) 1194 1.210 mlelstv goto startover; 1195 1.210 mlelstv 1196 1.210 mlelstv /* 1197 1.55 thorpej * We were unable to allocate a page or item 1198 1.55 thorpej * header, but we released the lock during 1199 1.55 thorpej * allocation, so perhaps items were freed 1200 1.55 thorpej * back to the pool. Check for this case. 1201 1.21 thorpej */ 1202 1.21 thorpej if (pp->pr_curpage != NULL) 1203 1.21 thorpej goto startover; 1204 1.15 pk 1205 1.117 yamt pp->pr_nfail++; 1206 1.134 ad mutex_exit(&pp->pr_lock); 1207 1.265 chs KASSERT((flags & (PR_NOWAIT|PR_LIMITFAIL)) != 0); 1208 1.236 maxv return NULL; 1209 1.1 pk } 1210 1.3 pk 1211 1.20 thorpej /* Start the allocation process over. */ 1212 1.20 thorpej goto startover; 1213 1.3 pk } 1214 1.242 maxv if (pp->pr_roflags & PR_USEBMAP) { 1215 1.207 riastrad KASSERTMSG((ph->ph_nmissing < pp->pr_itemsperpage), 1216 1.251 christos "%s: [%s] pool page empty", __func__, pp->pr_wchan); 1217 1.234 maxv v = pr_item_bitmap_get(pp, ph); 1218 1.97 yamt } else { 1219 1.234 maxv v = pr_item_linkedlist_get(pp, ph); 1220 1.97 yamt } 1221 1.20 thorpej pp->pr_nitems--; 1222 1.20 thorpej pp->pr_nout++; 1223 1.6 thorpej if (ph->ph_nmissing == 0) { 1224 1.207 riastrad KASSERT(pp->pr_nidle > 0); 1225 1.6 thorpej pp->pr_nidle--; 1226 1.88 chs 1227 1.88 chs /* 1228 1.88 chs * This page was previously empty. Move it to the list of 1229 1.88 chs * partially-full pages. This page is already curpage. 1230 1.88 chs */ 1231 1.88 chs LIST_REMOVE(ph, ph_pagelist); 1232 1.88 chs LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1233 1.6 thorpej } 1234 1.3 pk ph->ph_nmissing++; 1235 1.97 yamt if (ph->ph_nmissing == pp->pr_itemsperpage) { 1236 1.242 maxv KASSERTMSG(((pp->pr_roflags & PR_USEBMAP) || 1237 1.207 riastrad LIST_EMPTY(&ph->ph_itemlist)), 1238 1.213 christos "%s: [%s] nmissing (%u) inconsistent", __func__, 1239 1.213 christos pp->pr_wchan, ph->ph_nmissing); 1240 1.3 pk /* 1241 1.88 chs * This page is now full. Move it to the full list 1242 1.88 chs * and select a new current page. 1243 1.3 pk */ 1244 1.88 chs LIST_REMOVE(ph, ph_pagelist); 1245 1.88 chs LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 1246 1.88 chs pool_update_curpage(pp); 1247 1.1 pk } 1248 1.3 pk 1249 1.3 pk pp->pr_nget++; 1250 1.20 thorpej 1251 1.20 thorpej /* 1252 1.20 thorpej * If we have a low water mark and we are now below that low 1253 1.20 thorpej * water mark, add more items to the pool. 1254 1.20 thorpej */ 1255 1.53 thorpej if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1256 1.20 thorpej /* 1257 1.20 thorpej * XXX: Should we log a warning? Should we set up a timeout 1258 1.20 thorpej * to try again in a second or so? The latter could break 1259 1.20 thorpej * a caller's assumptions about interrupt protection, etc. 1260 1.20 thorpej */ 1261 1.20 thorpej } 1262 1.20 thorpej 1263 1.134 ad mutex_exit(&pp->pr_lock); 1264 1.238 maxv KASSERT((((vaddr_t)v) & (pp->pr_align - 1)) == 0); 1265 1.125 ad FREECHECK_OUT(&pp->pr_freecheck, v); 1266 1.204 maxv pool_redzone_fill(pp, v); 1267 1.262 maxv pool_get_kmsan(pp, v); 1268 1.232 christos if (flags & PR_ZERO) 1269 1.233 maxv memset(v, 0, pp->pr_reqsize); 1270 1.232 christos return v; 1271 1.1 pk } 1272 1.1 pk 1273 1.1 pk /* 1274 1.43 thorpej * Internal version of pool_put(). Pool is already locked/entered. 1275 1.1 pk */ 1276 1.43 thorpej static void 1277 1.101 thorpej pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq) 1278 1.1 pk { 1279 1.3 pk struct pool_item_header *ph; 1280 1.3 pk 1281 1.134 ad KASSERT(mutex_owned(&pp->pr_lock)); 1282 1.204 maxv pool_redzone_check(pp, v); 1283 1.262 maxv pool_put_kmsan(pp, v); 1284 1.125 ad FREECHECK_IN(&pp->pr_freecheck, v); 1285 1.134 ad LOCKDEBUG_MEM_CHECK(v, pp->pr_size); 1286 1.61 chs 1287 1.207 riastrad KASSERTMSG((pp->pr_nout > 0), 1288 1.213 christos "%s: [%s] putting with none out", __func__, pp->pr_wchan); 1289 1.3 pk 1290 1.121 yamt if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) { 1291 1.213 christos panic("%s: [%s] page header missing", __func__, pp->pr_wchan); 1292 1.3 pk } 1293 1.28 thorpej 1294 1.3 pk /* 1295 1.3 pk * Return to item list. 1296 1.3 pk */ 1297 1.242 maxv if (pp->pr_roflags & PR_USEBMAP) { 1298 1.234 maxv pr_item_bitmap_put(pp, ph, v); 1299 1.97 yamt } else { 1300 1.234 maxv pr_item_linkedlist_put(pp, ph, v); 1301 1.97 yamt } 1302 1.79 thorpej KDASSERT(ph->ph_nmissing != 0); 1303 1.3 pk ph->ph_nmissing--; 1304 1.3 pk pp->pr_nput++; 1305 1.20 thorpej pp->pr_nitems++; 1306 1.20 thorpej pp->pr_nout--; 1307 1.3 pk 1308 1.3 pk /* Cancel "pool empty" condition if it exists */ 1309 1.3 pk if (pp->pr_curpage == NULL) 1310 1.3 pk pp->pr_curpage = ph; 1311 1.3 pk 1312 1.3 pk if (pp->pr_flags & PR_WANTED) { 1313 1.3 pk pp->pr_flags &= ~PR_WANTED; 1314 1.134 ad cv_broadcast(&pp->pr_cv); 1315 1.3 pk } 1316 1.3 pk 1317 1.3 pk /* 1318 1.88 chs * If this page is now empty, do one of two things: 1319 1.21 thorpej * 1320 1.88 chs * (1) If we have more pages than the page high water mark, 1321 1.96 thorpej * free the page back to the system. ONLY CONSIDER 1322 1.90 thorpej * FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE 1323 1.90 thorpej * CLAIM. 1324 1.21 thorpej * 1325 1.88 chs * (2) Otherwise, move the page to the empty page list. 1326 1.88 chs * 1327 1.88 chs * Either way, select a new current page (so we use a partially-full 1328 1.88 chs * page if one is available). 1329 1.3 pk */ 1330 1.3 pk if (ph->ph_nmissing == 0) { 1331 1.6 thorpej pp->pr_nidle++; 1332 1.267 chs if (pp->pr_nitems - pp->pr_itemsperpage >= pp->pr_minitems && 1333 1.267 chs pp->pr_npages > pp->pr_minpages && 1334 1.292 chs (pp->pr_npages > pp->pr_maxpages || 1335 1.292 chs pp->pr_nitems > pp->pr_maxitems)) { 1336 1.101 thorpej pr_rmpage(pp, ph, pq); 1337 1.3 pk } else { 1338 1.88 chs LIST_REMOVE(ph, ph_pagelist); 1339 1.88 chs LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1340 1.3 pk 1341 1.21 thorpej /* 1342 1.21 thorpej * Update the timestamp on the page. A page must 1343 1.21 thorpej * be idle for some period of time before it can 1344 1.21 thorpej * be reclaimed by the pagedaemon. This minimizes 1345 1.21 thorpej * ping-pong'ing for memory. 1346 1.151 yamt * 1347 1.151 yamt * note for 64-bit time_t: truncating to 32-bit is not 1348 1.151 yamt * a problem for our usage. 1349 1.21 thorpej */ 1350 1.151 yamt ph->ph_time = time_uptime; 1351 1.1 pk } 1352 1.88 chs pool_update_curpage(pp); 1353 1.1 pk } 1354 1.88 chs 1355 1.21 thorpej /* 1356 1.88 chs * If the page was previously completely full, move it to the 1357 1.88 chs * partially-full list and make it the current page. The next 1358 1.88 chs * allocation will get the item from this page, instead of 1359 1.88 chs * further fragmenting the pool. 1360 1.21 thorpej */ 1361 1.21 thorpej else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 1362 1.88 chs LIST_REMOVE(ph, ph_pagelist); 1363 1.88 chs LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 1364 1.21 thorpej pp->pr_curpage = ph; 1365 1.21 thorpej } 1366 1.43 thorpej } 1367 1.43 thorpej 1368 1.56 sommerfe void 1369 1.56 sommerfe pool_put(struct pool *pp, void *v) 1370 1.56 sommerfe { 1371 1.101 thorpej struct pool_pagelist pq; 1372 1.101 thorpej 1373 1.101 thorpej LIST_INIT(&pq); 1374 1.56 sommerfe 1375 1.134 ad mutex_enter(&pp->pr_lock); 1376 1.249 maxv if (!pool_put_quarantine(pp, v, &pq)) { 1377 1.249 maxv pool_do_put(pp, v, &pq); 1378 1.249 maxv } 1379 1.134 ad mutex_exit(&pp->pr_lock); 1380 1.56 sommerfe 1381 1.102 chs pr_pagelist_free(pp, &pq); 1382 1.56 sommerfe } 1383 1.57 sommerfe 1384 1.74 thorpej /* 1385 1.113 yamt * pool_grow: grow a pool by a page. 1386 1.113 yamt * 1387 1.113 yamt * => called with pool locked. 1388 1.113 yamt * => unlock and relock the pool. 1389 1.113 yamt * => return with pool locked. 1390 1.113 yamt */ 1391 1.113 yamt 1392 1.113 yamt static int 1393 1.113 yamt pool_grow(struct pool *pp, int flags) 1394 1.113 yamt { 1395 1.236 maxv struct pool_item_header *ph; 1396 1.237 maxv char *storage; 1397 1.236 maxv 1398 1.209 riastrad /* 1399 1.209 riastrad * If there's a pool_grow in progress, wait for it to complete 1400 1.209 riastrad * and try again from the top. 1401 1.209 riastrad */ 1402 1.209 riastrad if (pp->pr_flags & PR_GROWING) { 1403 1.209 riastrad if (flags & PR_WAITOK) { 1404 1.209 riastrad do { 1405 1.209 riastrad cv_wait(&pp->pr_cv, &pp->pr_lock); 1406 1.209 riastrad } while (pp->pr_flags & PR_GROWING); 1407 1.209 riastrad return ERESTART; 1408 1.209 riastrad } else { 1409 1.294 bouyer if (pp->pr_flags & PR_GROWINGNOWAIT) { 1410 1.294 bouyer /* 1411 1.294 bouyer * This needs an unlock/relock dance so 1412 1.294 bouyer * that the other caller has a chance to 1413 1.294 bouyer * run and actually do the thing. Note 1414 1.294 bouyer * that this is effectively a busy-wait. 1415 1.294 bouyer */ 1416 1.294 bouyer mutex_exit(&pp->pr_lock); 1417 1.294 bouyer mutex_enter(&pp->pr_lock); 1418 1.294 bouyer return ERESTART; 1419 1.294 bouyer } 1420 1.209 riastrad return EWOULDBLOCK; 1421 1.209 riastrad } 1422 1.209 riastrad } 1423 1.209 riastrad pp->pr_flags |= PR_GROWING; 1424 1.220 christos if (flags & PR_WAITOK) 1425 1.220 christos mutex_exit(&pp->pr_lock); 1426 1.220 christos else 1427 1.219 mrg pp->pr_flags |= PR_GROWINGNOWAIT; 1428 1.113 yamt 1429 1.237 maxv storage = pool_allocator_alloc(pp, flags); 1430 1.237 maxv if (__predict_false(storage == NULL)) 1431 1.216 christos goto out; 1432 1.216 christos 1433 1.237 maxv ph = pool_alloc_item_header(pp, storage, flags); 1434 1.216 christos if (__predict_false(ph == NULL)) { 1435 1.237 maxv pool_allocator_free(pp, storage); 1436 1.209 riastrad goto out; 1437 1.113 yamt } 1438 1.113 yamt 1439 1.220 christos if (flags & PR_WAITOK) 1440 1.220 christos mutex_enter(&pp->pr_lock); 1441 1.237 maxv pool_prime_page(pp, storage, ph); 1442 1.113 yamt pp->pr_npagealloc++; 1443 1.216 christos KASSERT(pp->pr_flags & PR_GROWING); 1444 1.219 mrg pp->pr_flags &= ~(PR_GROWING|PR_GROWINGNOWAIT); 1445 1.209 riastrad /* 1446 1.209 riastrad * If anyone was waiting for pool_grow, notify them that we 1447 1.209 riastrad * may have just done it. 1448 1.209 riastrad */ 1449 1.216 christos cv_broadcast(&pp->pr_cv); 1450 1.216 christos return 0; 1451 1.216 christos out: 1452 1.220 christos if (flags & PR_WAITOK) 1453 1.220 christos mutex_enter(&pp->pr_lock); 1454 1.209 riastrad KASSERT(pp->pr_flags & PR_GROWING); 1455 1.219 mrg pp->pr_flags &= ~(PR_GROWING|PR_GROWINGNOWAIT); 1456 1.216 christos return ENOMEM; 1457 1.113 yamt } 1458 1.113 yamt 1459 1.267 chs void 1460 1.74 thorpej pool_prime(struct pool *pp, int n) 1461 1.74 thorpej { 1462 1.74 thorpej 1463 1.134 ad mutex_enter(&pp->pr_lock); 1464 1.267 chs pp->pr_minpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1465 1.267 chs if (pp->pr_maxpages <= pp->pr_minpages) 1466 1.74 thorpej pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1467 1.267 chs while (pp->pr_npages < pp->pr_minpages) 1468 1.267 chs (void) pool_grow(pp, PR_WAITOK); 1469 1.134 ad mutex_exit(&pp->pr_lock); 1470 1.74 thorpej } 1471 1.55 thorpej 1472 1.55 thorpej /* 1473 1.3 pk * Add a page worth of items to the pool. 1474 1.21 thorpej * 1475 1.21 thorpej * Note, we must be called with the pool descriptor LOCKED. 1476 1.3 pk */ 1477 1.55 thorpej static void 1478 1.128 christos pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph) 1479 1.3 pk { 1480 1.236 maxv const unsigned int align = pp->pr_align; 1481 1.3 pk struct pool_item *pi; 1482 1.128 christos void *cp = storage; 1483 1.55 thorpej int n; 1484 1.36 pk 1485 1.134 ad KASSERT(mutex_owned(&pp->pr_lock)); 1486 1.207 riastrad KASSERTMSG(((pp->pr_roflags & PR_NOALIGN) || 1487 1.207 riastrad (((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) == 0)), 1488 1.213 christos "%s: [%s] unaligned page: %p", __func__, pp->pr_wchan, cp); 1489 1.3 pk 1490 1.3 pk /* 1491 1.3 pk * Insert page header. 1492 1.3 pk */ 1493 1.88 chs LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 1494 1.102 chs LIST_INIT(&ph->ph_itemlist); 1495 1.3 pk ph->ph_page = storage; 1496 1.3 pk ph->ph_nmissing = 0; 1497 1.151 yamt ph->ph_time = time_uptime; 1498 1.245 maxv if (pp->pr_roflags & PR_PHINPAGE) 1499 1.245 maxv ph->ph_poolid = pp->pr_poolid; 1500 1.245 maxv else 1501 1.88 chs SPLAY_INSERT(phtree, &pp->pr_phtree, ph); 1502 1.3 pk 1503 1.6 thorpej pp->pr_nidle++; 1504 1.6 thorpej 1505 1.3 pk /* 1506 1.241 maxv * The item space starts after the on-page header, if any. 1507 1.241 maxv */ 1508 1.241 maxv ph->ph_off = pp->pr_itemoffset; 1509 1.241 maxv 1510 1.241 maxv /* 1511 1.3 pk * Color this page. 1512 1.3 pk */ 1513 1.241 maxv ph->ph_off += pp->pr_curcolor; 1514 1.141 yamt cp = (char *)cp + ph->ph_off; 1515 1.3 pk if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1516 1.3 pk pp->pr_curcolor = 0; 1517 1.3 pk 1518 1.238 maxv KASSERT((((vaddr_t)cp) & (align - 1)) == 0); 1519 1.125 ad 1520 1.3 pk /* 1521 1.3 pk * Insert remaining chunks on the bucket list. 1522 1.3 pk */ 1523 1.3 pk n = pp->pr_itemsperpage; 1524 1.20 thorpej pp->pr_nitems += n; 1525 1.3 pk 1526 1.242 maxv if (pp->pr_roflags & PR_USEBMAP) { 1527 1.234 maxv pr_item_bitmap_init(pp, ph); 1528 1.97 yamt } else { 1529 1.97 yamt while (n--) { 1530 1.97 yamt pi = (struct pool_item *)cp; 1531 1.78 thorpej 1532 1.238 maxv KASSERT((((vaddr_t)pi) & (align - 1)) == 0); 1533 1.3 pk 1534 1.97 yamt /* Insert on page list */ 1535 1.102 chs LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 1536 1.229 maxv #ifdef POOL_CHECK_MAGIC 1537 1.97 yamt pi->pi_magic = PI_MAGIC; 1538 1.3 pk #endif 1539 1.128 christos cp = (char *)cp + pp->pr_size; 1540 1.125 ad 1541 1.238 maxv KASSERT((((vaddr_t)cp) & (align - 1)) == 0); 1542 1.97 yamt } 1543 1.3 pk } 1544 1.3 pk 1545 1.3 pk /* 1546 1.3 pk * If the pool was depleted, point at the new page. 1547 1.3 pk */ 1548 1.3 pk if (pp->pr_curpage == NULL) 1549 1.3 pk pp->pr_curpage = ph; 1550 1.3 pk 1551 1.3 pk if (++pp->pr_npages > pp->pr_hiwat) 1552 1.3 pk pp->pr_hiwat = pp->pr_npages; 1553 1.3 pk } 1554 1.3 pk 1555 1.20 thorpej /* 1556 1.52 thorpej * Used by pool_get() when nitems drops below the low water mark. This 1557 1.88 chs * is used to catch up pr_nitems with the low water mark. 1558 1.20 thorpej * 1559 1.21 thorpej * Note 1, we never wait for memory here, we let the caller decide what to do. 1560 1.20 thorpej * 1561 1.73 thorpej * Note 2, we must be called with the pool already locked, and we return 1562 1.20 thorpej * with it locked. 1563 1.20 thorpej */ 1564 1.20 thorpej static int 1565 1.42 thorpej pool_catchup(struct pool *pp) 1566 1.20 thorpej { 1567 1.20 thorpej int error = 0; 1568 1.20 thorpej 1569 1.54 thorpej while (POOL_NEEDS_CATCHUP(pp)) { 1570 1.113 yamt error = pool_grow(pp, PR_NOWAIT); 1571 1.113 yamt if (error) { 1572 1.214 christos if (error == ERESTART) 1573 1.214 christos continue; 1574 1.20 thorpej break; 1575 1.20 thorpej } 1576 1.20 thorpej } 1577 1.113 yamt return error; 1578 1.20 thorpej } 1579 1.20 thorpej 1580 1.88 chs static void 1581 1.88 chs pool_update_curpage(struct pool *pp) 1582 1.88 chs { 1583 1.88 chs 1584 1.88 chs pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 1585 1.88 chs if (pp->pr_curpage == NULL) { 1586 1.88 chs pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 1587 1.88 chs } 1588 1.289 riastrad KASSERTMSG((pp->pr_curpage == NULL) == (pp->pr_nitems == 0), 1589 1.289 riastrad "pp=%p curpage=%p nitems=%u", pp, pp->pr_curpage, pp->pr_nitems); 1590 1.88 chs } 1591 1.88 chs 1592 1.3 pk void 1593 1.42 thorpej pool_setlowat(struct pool *pp, int n) 1594 1.3 pk { 1595 1.15 pk 1596 1.134 ad mutex_enter(&pp->pr_lock); 1597 1.3 pk pp->pr_minitems = n; 1598 1.20 thorpej 1599 1.20 thorpej /* Make sure we're caught up with the newly-set low water mark. */ 1600 1.75 simonb if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1601 1.20 thorpej /* 1602 1.20 thorpej * XXX: Should we log a warning? Should we set up a timeout 1603 1.20 thorpej * to try again in a second or so? The latter could break 1604 1.20 thorpej * a caller's assumptions about interrupt protection, etc. 1605 1.20 thorpej */ 1606 1.20 thorpej } 1607 1.21 thorpej 1608 1.134 ad mutex_exit(&pp->pr_lock); 1609 1.3 pk } 1610 1.3 pk 1611 1.3 pk void 1612 1.42 thorpej pool_sethiwat(struct pool *pp, int n) 1613 1.3 pk { 1614 1.15 pk 1615 1.134 ad mutex_enter(&pp->pr_lock); 1616 1.21 thorpej 1617 1.267 chs pp->pr_maxitems = n; 1618 1.21 thorpej 1619 1.134 ad mutex_exit(&pp->pr_lock); 1620 1.3 pk } 1621 1.3 pk 1622 1.20 thorpej void 1623 1.42 thorpej pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1624 1.20 thorpej { 1625 1.20 thorpej 1626 1.134 ad mutex_enter(&pp->pr_lock); 1627 1.20 thorpej 1628 1.20 thorpej pp->pr_hardlimit = n; 1629 1.20 thorpej pp->pr_hardlimit_warning = warnmess; 1630 1.31 thorpej pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1631 1.31 thorpej pp->pr_hardlimit_warning_last.tv_sec = 0; 1632 1.31 thorpej pp->pr_hardlimit_warning_last.tv_usec = 0; 1633 1.20 thorpej 1634 1.267 chs pp->pr_maxpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1635 1.21 thorpej 1636 1.134 ad mutex_exit(&pp->pr_lock); 1637 1.20 thorpej } 1638 1.3 pk 1639 1.277 simonb unsigned int 1640 1.277 simonb pool_nget(struct pool *pp) 1641 1.277 simonb { 1642 1.277 simonb 1643 1.277 simonb return pp->pr_nget; 1644 1.277 simonb } 1645 1.277 simonb 1646 1.277 simonb unsigned int 1647 1.277 simonb pool_nput(struct pool *pp) 1648 1.277 simonb { 1649 1.277 simonb 1650 1.277 simonb return pp->pr_nput; 1651 1.277 simonb } 1652 1.277 simonb 1653 1.3 pk /* 1654 1.3 pk * Release all complete pages that have not been used recently. 1655 1.184 rmind * 1656 1.197 jym * Must not be called from interrupt context. 1657 1.3 pk */ 1658 1.66 thorpej int 1659 1.56 sommerfe pool_reclaim(struct pool *pp) 1660 1.3 pk { 1661 1.3 pk struct pool_item_header *ph, *phnext; 1662 1.61 chs struct pool_pagelist pq; 1663 1.281 riastrad struct pool_cache *pc; 1664 1.151 yamt uint32_t curtime; 1665 1.134 ad bool klock; 1666 1.134 ad int rv; 1667 1.3 pk 1668 1.288 riastrad KASSERT(!cpu_intr_p()); 1669 1.288 riastrad KASSERT(!cpu_softintr_p()); 1670 1.184 rmind 1671 1.68 thorpej if (pp->pr_drain_hook != NULL) { 1672 1.68 thorpej /* 1673 1.68 thorpej * The drain hook must be called with the pool unlocked. 1674 1.68 thorpej */ 1675 1.68 thorpej (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1676 1.68 thorpej } 1677 1.68 thorpej 1678 1.134 ad /* 1679 1.157 ad * XXXSMP Because we do not want to cause non-MPSAFE code 1680 1.157 ad * to block. 1681 1.134 ad */ 1682 1.134 ad if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK || 1683 1.134 ad pp->pr_ipl == IPL_SOFTSERIAL) { 1684 1.134 ad KERNEL_LOCK(1, NULL); 1685 1.134 ad klock = true; 1686 1.134 ad } else 1687 1.134 ad klock = false; 1688 1.134 ad 1689 1.134 ad /* Reclaim items from the pool's cache (if any). */ 1690 1.281 riastrad if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) 1691 1.281 riastrad pool_cache_invalidate(pc); 1692 1.134 ad 1693 1.134 ad if (mutex_tryenter(&pp->pr_lock) == 0) { 1694 1.134 ad if (klock) { 1695 1.134 ad KERNEL_UNLOCK_ONE(NULL); 1696 1.134 ad } 1697 1.236 maxv return 0; 1698 1.134 ad } 1699 1.68 thorpej 1700 1.88 chs LIST_INIT(&pq); 1701 1.43 thorpej 1702 1.151 yamt curtime = time_uptime; 1703 1.21 thorpej 1704 1.88 chs for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1705 1.88 chs phnext = LIST_NEXT(ph, ph_pagelist); 1706 1.3 pk 1707 1.3 pk /* Check our minimum page claim */ 1708 1.3 pk if (pp->pr_npages <= pp->pr_minpages) 1709 1.3 pk break; 1710 1.3 pk 1711 1.88 chs KASSERT(ph->ph_nmissing == 0); 1712 1.191 para if (curtime - ph->ph_time < pool_inactive_time) 1713 1.88 chs continue; 1714 1.21 thorpej 1715 1.88 chs /* 1716 1.267 chs * If freeing this page would put us below the minimum free items 1717 1.267 chs * or the minimum pages, stop now. 1718 1.88 chs */ 1719 1.267 chs if (pp->pr_nitems - pp->pr_itemsperpage < pp->pr_minitems || 1720 1.267 chs pp->pr_npages - 1 < pp->pr_minpages) 1721 1.88 chs break; 1722 1.21 thorpej 1723 1.88 chs pr_rmpage(pp, ph, &pq); 1724 1.3 pk } 1725 1.3 pk 1726 1.134 ad mutex_exit(&pp->pr_lock); 1727 1.134 ad 1728 1.134 ad if (LIST_EMPTY(&pq)) 1729 1.134 ad rv = 0; 1730 1.134 ad else { 1731 1.134 ad pr_pagelist_free(pp, &pq); 1732 1.134 ad rv = 1; 1733 1.134 ad } 1734 1.134 ad 1735 1.134 ad if (klock) { 1736 1.134 ad KERNEL_UNLOCK_ONE(NULL); 1737 1.134 ad } 1738 1.66 thorpej 1739 1.236 maxv return rv; 1740 1.3 pk } 1741 1.3 pk 1742 1.3 pk /* 1743 1.197 jym * Drain pools, one at a time. The drained pool is returned within ppp. 1744 1.131 ad * 1745 1.134 ad * Note, must never be called from interrupt context. 1746 1.3 pk */ 1747 1.197 jym bool 1748 1.197 jym pool_drain(struct pool **ppp) 1749 1.3 pk { 1750 1.197 jym bool reclaimed; 1751 1.3 pk struct pool *pp; 1752 1.134 ad 1753 1.145 ad KASSERT(!TAILQ_EMPTY(&pool_head)); 1754 1.3 pk 1755 1.61 chs pp = NULL; 1756 1.134 ad 1757 1.134 ad /* Find next pool to drain, and add a reference. */ 1758 1.134 ad mutex_enter(&pool_head_lock); 1759 1.134 ad do { 1760 1.134 ad if (drainpp == NULL) { 1761 1.145 ad drainpp = TAILQ_FIRST(&pool_head); 1762 1.134 ad } 1763 1.134 ad if (drainpp != NULL) { 1764 1.134 ad pp = drainpp; 1765 1.145 ad drainpp = TAILQ_NEXT(pp, pr_poollist); 1766 1.134 ad } 1767 1.134 ad /* 1768 1.134 ad * Skip completely idle pools. We depend on at least 1769 1.134 ad * one pool in the system being active. 1770 1.134 ad */ 1771 1.134 ad } while (pp == NULL || pp->pr_npages == 0); 1772 1.134 ad pp->pr_refcnt++; 1773 1.134 ad mutex_exit(&pool_head_lock); 1774 1.134 ad 1775 1.134 ad /* Drain the cache (if any) and pool.. */ 1776 1.186 pooka reclaimed = pool_reclaim(pp); 1777 1.134 ad 1778 1.134 ad /* Finally, unlock the pool. */ 1779 1.134 ad mutex_enter(&pool_head_lock); 1780 1.134 ad pp->pr_refcnt--; 1781 1.134 ad cv_broadcast(&pool_busy); 1782 1.134 ad mutex_exit(&pool_head_lock); 1783 1.186 pooka 1784 1.197 jym if (ppp != NULL) 1785 1.197 jym *ppp = pp; 1786 1.197 jym 1787 1.186 pooka return reclaimed; 1788 1.3 pk } 1789 1.3 pk 1790 1.3 pk /* 1791 1.217 mrg * Calculate the total number of pages consumed by pools. 1792 1.217 mrg */ 1793 1.217 mrg int 1794 1.217 mrg pool_totalpages(void) 1795 1.217 mrg { 1796 1.250 skrll 1797 1.250 skrll mutex_enter(&pool_head_lock); 1798 1.250 skrll int pages = pool_totalpages_locked(); 1799 1.250 skrll mutex_exit(&pool_head_lock); 1800 1.250 skrll 1801 1.250 skrll return pages; 1802 1.250 skrll } 1803 1.250 skrll 1804 1.250 skrll int 1805 1.250 skrll pool_totalpages_locked(void) 1806 1.250 skrll { 1807 1.217 mrg struct pool *pp; 1808 1.218 mrg uint64_t total = 0; 1809 1.217 mrg 1810 1.218 mrg TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1811 1.285 simonb uint64_t bytes = 1812 1.285 simonb (uint64_t)pp->pr_npages * pp->pr_alloc->pa_pagesz; 1813 1.218 mrg 1814 1.218 mrg if ((pp->pr_roflags & PR_RECURSIVE) != 0) 1815 1.285 simonb bytes -= ((uint64_t)pp->pr_nout * pp->pr_size); 1816 1.218 mrg total += bytes; 1817 1.218 mrg } 1818 1.217 mrg 1819 1.218 mrg return atop(total); 1820 1.217 mrg } 1821 1.217 mrg 1822 1.217 mrg /* 1823 1.3 pk * Diagnostic helpers. 1824 1.3 pk */ 1825 1.21 thorpej 1826 1.25 thorpej void 1827 1.108 yamt pool_printall(const char *modif, void (*pr)(const char *, ...)) 1828 1.108 yamt { 1829 1.108 yamt struct pool *pp; 1830 1.108 yamt 1831 1.145 ad TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1832 1.108 yamt pool_printit(pp, modif, pr); 1833 1.108 yamt } 1834 1.108 yamt } 1835 1.108 yamt 1836 1.108 yamt void 1837 1.42 thorpej pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1838 1.25 thorpej { 1839 1.25 thorpej 1840 1.25 thorpej if (pp == NULL) { 1841 1.25 thorpej (*pr)("Must specify a pool to print.\n"); 1842 1.25 thorpej return; 1843 1.25 thorpej } 1844 1.25 thorpej 1845 1.25 thorpej pool_print1(pp, modif, pr); 1846 1.25 thorpej } 1847 1.25 thorpej 1848 1.21 thorpej static void 1849 1.124 yamt pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl, 1850 1.97 yamt void (*pr)(const char *, ...)) 1851 1.88 chs { 1852 1.88 chs struct pool_item_header *ph; 1853 1.88 chs 1854 1.88 chs LIST_FOREACH(ph, pl, ph_pagelist) { 1855 1.151 yamt (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n", 1856 1.151 yamt ph->ph_page, ph->ph_nmissing, ph->ph_time); 1857 1.229 maxv #ifdef POOL_CHECK_MAGIC 1858 1.229 maxv struct pool_item *pi; 1859 1.242 maxv if (!(pp->pr_roflags & PR_USEBMAP)) { 1860 1.102 chs LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1861 1.97 yamt if (pi->pi_magic != PI_MAGIC) { 1862 1.97 yamt (*pr)("\t\t\titem %p, magic 0x%x\n", 1863 1.97 yamt pi, pi->pi_magic); 1864 1.97 yamt } 1865 1.88 chs } 1866 1.88 chs } 1867 1.88 chs #endif 1868 1.88 chs } 1869 1.88 chs } 1870 1.88 chs 1871 1.88 chs static void 1872 1.42 thorpej pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1873 1.3 pk { 1874 1.25 thorpej struct pool_item_header *ph; 1875 1.134 ad pool_cache_t pc; 1876 1.134 ad pcg_t *pcg; 1877 1.134 ad pool_cache_cpu_t *cc; 1878 1.271 ad uint64_t cpuhit, cpumiss, pchit, pcmiss; 1879 1.271 ad uint32_t nfull; 1880 1.275 mrg int i; 1881 1.275 mrg bool print_log = false, print_pagelist = false, print_cache = false; 1882 1.275 mrg bool print_short = false, skip_empty = false; 1883 1.25 thorpej char c; 1884 1.25 thorpej 1885 1.25 thorpej while ((c = *modif++) != '\0') { 1886 1.25 thorpej if (c == 'l') 1887 1.275 mrg print_log = true; 1888 1.25 thorpej if (c == 'p') 1889 1.275 mrg print_pagelist = true; 1890 1.44 thorpej if (c == 'c') 1891 1.275 mrg print_cache = true; 1892 1.275 mrg if (c == 's') 1893 1.275 mrg print_short = true; 1894 1.275 mrg if (c == 'S') 1895 1.275 mrg skip_empty = true; 1896 1.25 thorpej } 1897 1.25 thorpej 1898 1.275 mrg if (skip_empty && pp->pr_nget == 0) 1899 1.275 mrg return; 1900 1.275 mrg 1901 1.281 riastrad if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) { 1902 1.275 mrg (*pr)("POOLCACHE"); 1903 1.134 ad } else { 1904 1.134 ad (*pr)("POOL"); 1905 1.134 ad } 1906 1.134 ad 1907 1.275 mrg /* Single line output. */ 1908 1.275 mrg if (print_short) { 1909 1.292 chs (*pr)(" %s:%p:%u:%u:%u:%u:%u:%u:%u:%u:%u:%u:%zu\n", 1910 1.275 mrg pp->pr_wchan, pp, pp->pr_size, pp->pr_align, pp->pr_npages, 1911 1.275 mrg pp->pr_nitems, pp->pr_nout, pp->pr_nget, pp->pr_nput, 1912 1.292 chs pp->pr_npagealloc, pp->pr_npagefree, pp->pr_nidle, 1913 1.292 chs (size_t)pp->pr_npagealloc * pp->pr_alloc->pa_pagesz); 1914 1.275 mrg return; 1915 1.275 mrg } 1916 1.275 mrg 1917 1.292 chs (*pr)(" %s: itemsize %u, totalmem %zu align %u, ioff %u, roflags 0x%08x\n", 1918 1.292 chs pp->pr_wchan, pp->pr_size, 1919 1.292 chs (size_t)pp->pr_npagealloc * pp->pr_alloc->pa_pagesz, 1920 1.292 chs pp->pr_align, pp->pr_itemoffset, pp->pr_roflags); 1921 1.275 mrg (*pr)("\tpool %p, alloc %p\n", pp, pp->pr_alloc); 1922 1.25 thorpej (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1923 1.25 thorpej pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1924 1.25 thorpej (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1925 1.25 thorpej pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1926 1.25 thorpej 1927 1.134 ad (*pr)("\tnget %lu, nfail %lu, nput %lu\n", 1928 1.25 thorpej pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1929 1.25 thorpej (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1930 1.25 thorpej pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1931 1.25 thorpej 1932 1.275 mrg if (!print_pagelist) 1933 1.25 thorpej goto skip_pagelist; 1934 1.25 thorpej 1935 1.88 chs if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1936 1.88 chs (*pr)("\n\tempty page list:\n"); 1937 1.97 yamt pool_print_pagelist(pp, &pp->pr_emptypages, pr); 1938 1.88 chs if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1939 1.88 chs (*pr)("\n\tfull page list:\n"); 1940 1.97 yamt pool_print_pagelist(pp, &pp->pr_fullpages, pr); 1941 1.88 chs if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1942 1.88 chs (*pr)("\n\tpartial-page list:\n"); 1943 1.97 yamt pool_print_pagelist(pp, &pp->pr_partpages, pr); 1944 1.88 chs 1945 1.25 thorpej if (pp->pr_curpage == NULL) 1946 1.25 thorpej (*pr)("\tno current page\n"); 1947 1.25 thorpej else 1948 1.25 thorpej (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1949 1.25 thorpej 1950 1.25 thorpej skip_pagelist: 1951 1.275 mrg if (print_log) 1952 1.25 thorpej goto skip_log; 1953 1.25 thorpej 1954 1.25 thorpej (*pr)("\n"); 1955 1.3 pk 1956 1.25 thorpej skip_log: 1957 1.44 thorpej 1958 1.102 chs #define PR_GROUPLIST(pcg) \ 1959 1.102 chs (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); \ 1960 1.142 ad for (i = 0; i < pcg->pcg_size; i++) { \ 1961 1.102 chs if (pcg->pcg_objects[i].pcgo_pa != \ 1962 1.102 chs POOL_PADDR_INVALID) { \ 1963 1.102 chs (*pr)("\t\t\t%p, 0x%llx\n", \ 1964 1.102 chs pcg->pcg_objects[i].pcgo_va, \ 1965 1.102 chs (unsigned long long) \ 1966 1.102 chs pcg->pcg_objects[i].pcgo_pa); \ 1967 1.102 chs } else { \ 1968 1.102 chs (*pr)("\t\t\t%p\n", \ 1969 1.102 chs pcg->pcg_objects[i].pcgo_va); \ 1970 1.102 chs } \ 1971 1.102 chs } 1972 1.102 chs 1973 1.134 ad if (pc != NULL) { 1974 1.134 ad cpuhit = 0; 1975 1.134 ad cpumiss = 0; 1976 1.271 ad pcmiss = 0; 1977 1.271 ad nfull = 0; 1978 1.183 ad for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 1979 1.134 ad if ((cc = pc->pc_cpus[i]) == NULL) 1980 1.134 ad continue; 1981 1.134 ad cpuhit += cc->cc_hits; 1982 1.134 ad cpumiss += cc->cc_misses; 1983 1.271 ad pcmiss += cc->cc_pcmisses; 1984 1.271 ad nfull += cc->cc_nfull; 1985 1.134 ad } 1986 1.271 ad pchit = cpumiss - pcmiss; 1987 1.134 ad (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss); 1988 1.271 ad (*pr)("\tcache layer hits %llu misses %llu\n", pchit, pcmiss); 1989 1.271 ad (*pr)("\tcache layer full groups %u\n", nfull); 1990 1.134 ad if (print_cache) { 1991 1.134 ad (*pr)("\tfull cache groups:\n"); 1992 1.134 ad for (pcg = pc->pc_fullgroups; pcg != NULL; 1993 1.134 ad pcg = pcg->pcg_next) { 1994 1.134 ad PR_GROUPLIST(pcg); 1995 1.134 ad } 1996 1.103 chs } 1997 1.44 thorpej } 1998 1.102 chs #undef PR_GROUPLIST 1999 1.88 chs } 2000 1.88 chs 2001 1.88 chs static int 2002 1.88 chs pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph) 2003 1.88 chs { 2004 1.88 chs struct pool_item *pi; 2005 1.128 christos void *page; 2006 1.88 chs int n; 2007 1.88 chs 2008 1.121 yamt if ((pp->pr_roflags & PR_NOALIGN) == 0) { 2009 1.253 maxv page = POOL_OBJ_TO_PAGE(pp, ph); 2010 1.121 yamt if (page != ph->ph_page && 2011 1.121 yamt (pp->pr_roflags & PR_PHINPAGE) != 0) { 2012 1.121 yamt if (label != NULL) 2013 1.121 yamt printf("%s: ", label); 2014 1.121 yamt printf("pool(%p:%s): page inconsistency: page %p;" 2015 1.121 yamt " at page head addr %p (p %p)\n", pp, 2016 1.121 yamt pp->pr_wchan, ph->ph_page, 2017 1.121 yamt ph, page); 2018 1.121 yamt return 1; 2019 1.121 yamt } 2020 1.88 chs } 2021 1.3 pk 2022 1.242 maxv if ((pp->pr_roflags & PR_USEBMAP) != 0) 2023 1.97 yamt return 0; 2024 1.97 yamt 2025 1.102 chs for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0; 2026 1.88 chs pi != NULL; 2027 1.102 chs pi = LIST_NEXT(pi,pi_list), n++) { 2028 1.88 chs 2029 1.229 maxv #ifdef POOL_CHECK_MAGIC 2030 1.88 chs if (pi->pi_magic != PI_MAGIC) { 2031 1.88 chs if (label != NULL) 2032 1.88 chs printf("%s: ", label); 2033 1.88 chs printf("pool(%s): free list modified: magic=%x;" 2034 1.121 yamt " page %p; item ordinal %d; addr %p\n", 2035 1.88 chs pp->pr_wchan, pi->pi_magic, ph->ph_page, 2036 1.121 yamt n, pi); 2037 1.88 chs panic("pool"); 2038 1.88 chs } 2039 1.88 chs #endif 2040 1.121 yamt if ((pp->pr_roflags & PR_NOALIGN) != 0) { 2041 1.121 yamt continue; 2042 1.121 yamt } 2043 1.253 maxv page = POOL_OBJ_TO_PAGE(pp, pi); 2044 1.88 chs if (page == ph->ph_page) 2045 1.88 chs continue; 2046 1.88 chs 2047 1.88 chs if (label != NULL) 2048 1.88 chs printf("%s: ", label); 2049 1.88 chs printf("pool(%p:%s): page inconsistency: page %p;" 2050 1.88 chs " item ordinal %d; addr %p (p %p)\n", pp, 2051 1.88 chs pp->pr_wchan, ph->ph_page, 2052 1.88 chs n, pi, page); 2053 1.88 chs return 1; 2054 1.88 chs } 2055 1.88 chs return 0; 2056 1.3 pk } 2057 1.3 pk 2058 1.88 chs 2059 1.3 pk int 2060 1.42 thorpej pool_chk(struct pool *pp, const char *label) 2061 1.3 pk { 2062 1.3 pk struct pool_item_header *ph; 2063 1.3 pk int r = 0; 2064 1.3 pk 2065 1.134 ad mutex_enter(&pp->pr_lock); 2066 1.88 chs LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 2067 1.88 chs r = pool_chk_page(pp, label, ph); 2068 1.88 chs if (r) { 2069 1.88 chs goto out; 2070 1.88 chs } 2071 1.88 chs } 2072 1.88 chs LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 2073 1.88 chs r = pool_chk_page(pp, label, ph); 2074 1.88 chs if (r) { 2075 1.3 pk goto out; 2076 1.3 pk } 2077 1.88 chs } 2078 1.88 chs LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 2079 1.88 chs r = pool_chk_page(pp, label, ph); 2080 1.88 chs if (r) { 2081 1.3 pk goto out; 2082 1.3 pk } 2083 1.3 pk } 2084 1.88 chs 2085 1.3 pk out: 2086 1.134 ad mutex_exit(&pp->pr_lock); 2087 1.236 maxv return r; 2088 1.43 thorpej } 2089 1.43 thorpej 2090 1.43 thorpej /* 2091 1.43 thorpej * pool_cache_init: 2092 1.43 thorpej * 2093 1.43 thorpej * Initialize a pool cache. 2094 1.134 ad */ 2095 1.134 ad pool_cache_t 2096 1.134 ad pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags, 2097 1.134 ad const char *wchan, struct pool_allocator *palloc, int ipl, 2098 1.134 ad int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg) 2099 1.134 ad { 2100 1.134 ad pool_cache_t pc; 2101 1.134 ad 2102 1.134 ad pc = pool_get(&cache_pool, PR_WAITOK); 2103 1.134 ad if (pc == NULL) 2104 1.134 ad return NULL; 2105 1.134 ad 2106 1.134 ad pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan, 2107 1.134 ad palloc, ipl, ctor, dtor, arg); 2108 1.134 ad 2109 1.134 ad return pc; 2110 1.134 ad } 2111 1.134 ad 2112 1.134 ad /* 2113 1.134 ad * pool_cache_bootstrap: 2114 1.43 thorpej * 2115 1.134 ad * Kernel-private version of pool_cache_init(). The caller 2116 1.134 ad * provides initial storage. 2117 1.43 thorpej */ 2118 1.43 thorpej void 2119 1.134 ad pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align, 2120 1.134 ad u_int align_offset, u_int flags, const char *wchan, 2121 1.134 ad struct pool_allocator *palloc, int ipl, 2122 1.134 ad int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), 2123 1.43 thorpej void *arg) 2124 1.43 thorpej { 2125 1.134 ad CPU_INFO_ITERATOR cii; 2126 1.145 ad pool_cache_t pc1; 2127 1.134 ad struct cpu_info *ci; 2128 1.134 ad struct pool *pp; 2129 1.280 riastrad unsigned int ppflags; 2130 1.134 ad 2131 1.134 ad pp = &pc->pc_pool; 2132 1.280 riastrad ppflags = flags; 2133 1.134 ad if (ctor == NULL) { 2134 1.261 christos ctor = NO_CTOR; 2135 1.134 ad } 2136 1.134 ad if (dtor == NULL) { 2137 1.261 christos dtor = NO_DTOR; 2138 1.279 thorpej } else { 2139 1.279 thorpej /* 2140 1.279 thorpej * If we have a destructor, then the pool layer does not 2141 1.279 thorpej * need to worry about PR_PSERIALIZE. 2142 1.279 thorpej */ 2143 1.279 thorpej ppflags &= ~PR_PSERIALIZE; 2144 1.134 ad } 2145 1.43 thorpej 2146 1.279 thorpej pool_init(pp, size, align, align_offset, ppflags, wchan, palloc, ipl); 2147 1.279 thorpej 2148 1.134 ad pc->pc_fullgroups = NULL; 2149 1.134 ad pc->pc_partgroups = NULL; 2150 1.43 thorpej pc->pc_ctor = ctor; 2151 1.43 thorpej pc->pc_dtor = dtor; 2152 1.43 thorpej pc->pc_arg = arg; 2153 1.134 ad pc->pc_refcnt = 0; 2154 1.279 thorpej pc->pc_roflags = flags; 2155 1.136 yamt pc->pc_freecheck = NULL; 2156 1.134 ad 2157 1.142 ad if ((flags & PR_LARGECACHE) != 0) { 2158 1.142 ad pc->pc_pcgsize = PCG_NOBJECTS_LARGE; 2159 1.163 ad pc->pc_pcgpool = &pcg_large_pool; 2160 1.271 ad pc->pc_pcgcache = &pcg_large_cache; 2161 1.142 ad } else { 2162 1.142 ad pc->pc_pcgsize = PCG_NOBJECTS_NORMAL; 2163 1.163 ad pc->pc_pcgpool = &pcg_normal_pool; 2164 1.271 ad pc->pc_pcgcache = &pcg_normal_cache; 2165 1.142 ad } 2166 1.142 ad 2167 1.134 ad /* Allocate per-CPU caches. */ 2168 1.134 ad memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus)); 2169 1.134 ad pc->pc_ncpu = 0; 2170 1.139 ad if (ncpu < 2) { 2171 1.137 ad /* XXX For sparc: boot CPU is not attached yet. */ 2172 1.137 ad pool_cache_cpu_init1(curcpu(), pc); 2173 1.137 ad } else { 2174 1.137 ad for (CPU_INFO_FOREACH(cii, ci)) { 2175 1.137 ad pool_cache_cpu_init1(ci, pc); 2176 1.137 ad } 2177 1.134 ad } 2178 1.145 ad 2179 1.145 ad /* Add to list of all pools. */ 2180 1.145 ad if (__predict_true(!cold)) 2181 1.134 ad mutex_enter(&pool_head_lock); 2182 1.145 ad TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) { 2183 1.145 ad if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0) 2184 1.145 ad break; 2185 1.145 ad } 2186 1.145 ad if (pc1 == NULL) 2187 1.145 ad TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist); 2188 1.145 ad else 2189 1.145 ad TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist); 2190 1.145 ad if (__predict_true(!cold)) 2191 1.134 ad mutex_exit(&pool_head_lock); 2192 1.145 ad 2193 1.281 riastrad atomic_store_release(&pp->pr_cache, pc); 2194 1.43 thorpej } 2195 1.43 thorpej 2196 1.43 thorpej /* 2197 1.43 thorpej * pool_cache_destroy: 2198 1.43 thorpej * 2199 1.43 thorpej * Destroy a pool cache. 2200 1.43 thorpej */ 2201 1.43 thorpej void 2202 1.134 ad pool_cache_destroy(pool_cache_t pc) 2203 1.43 thorpej { 2204 1.191 para 2205 1.191 para pool_cache_bootstrap_destroy(pc); 2206 1.191 para pool_put(&cache_pool, pc); 2207 1.191 para } 2208 1.191 para 2209 1.191 para /* 2210 1.191 para * pool_cache_bootstrap_destroy: 2211 1.191 para * 2212 1.191 para * Destroy a pool cache. 2213 1.191 para */ 2214 1.191 para void 2215 1.191 para pool_cache_bootstrap_destroy(pool_cache_t pc) 2216 1.191 para { 2217 1.134 ad struct pool *pp = &pc->pc_pool; 2218 1.175 jym u_int i; 2219 1.134 ad 2220 1.134 ad /* Remove it from the global list. */ 2221 1.134 ad mutex_enter(&pool_head_lock); 2222 1.134 ad while (pc->pc_refcnt != 0) 2223 1.134 ad cv_wait(&pool_busy, &pool_head_lock); 2224 1.145 ad TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist); 2225 1.134 ad mutex_exit(&pool_head_lock); 2226 1.43 thorpej 2227 1.43 thorpej /* First, invalidate the entire cache. */ 2228 1.43 thorpej pool_cache_invalidate(pc); 2229 1.43 thorpej 2230 1.134 ad /* Disassociate it from the pool. */ 2231 1.134 ad mutex_enter(&pp->pr_lock); 2232 1.281 riastrad atomic_store_relaxed(&pp->pr_cache, NULL); 2233 1.134 ad mutex_exit(&pp->pr_lock); 2234 1.134 ad 2235 1.134 ad /* Destroy per-CPU data */ 2236 1.183 ad for (i = 0; i < __arraycount(pc->pc_cpus); i++) 2237 1.175 jym pool_cache_invalidate_cpu(pc, i); 2238 1.134 ad 2239 1.134 ad /* Finally, destroy it. */ 2240 1.134 ad pool_destroy(pp); 2241 1.134 ad } 2242 1.134 ad 2243 1.134 ad /* 2244 1.134 ad * pool_cache_cpu_init1: 2245 1.134 ad * 2246 1.134 ad * Called for each pool_cache whenever a new CPU is attached. 2247 1.134 ad */ 2248 1.134 ad static void 2249 1.134 ad pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc) 2250 1.134 ad { 2251 1.134 ad pool_cache_cpu_t *cc; 2252 1.137 ad int index; 2253 1.134 ad 2254 1.137 ad index = ci->ci_index; 2255 1.137 ad 2256 1.183 ad KASSERT(index < __arraycount(pc->pc_cpus)); 2257 1.134 ad 2258 1.137 ad if ((cc = pc->pc_cpus[index]) != NULL) { 2259 1.134 ad return; 2260 1.134 ad } 2261 1.134 ad 2262 1.134 ad /* 2263 1.134 ad * The first CPU is 'free'. This needs to be the case for 2264 1.134 ad * bootstrap - we may not be able to allocate yet. 2265 1.134 ad */ 2266 1.134 ad if (pc->pc_ncpu == 0) { 2267 1.134 ad cc = &pc->pc_cpu0; 2268 1.134 ad pc->pc_ncpu = 1; 2269 1.134 ad } else { 2270 1.134 ad pc->pc_ncpu++; 2271 1.134 ad cc = pool_get(&cache_cpu_pool, PR_WAITOK); 2272 1.134 ad } 2273 1.134 ad 2274 1.271 ad cc->cc_current = __UNCONST(&pcg_dummy); 2275 1.271 ad cc->cc_previous = __UNCONST(&pcg_dummy); 2276 1.271 ad cc->cc_pcgcache = pc->pc_pcgcache; 2277 1.134 ad cc->cc_hits = 0; 2278 1.134 ad cc->cc_misses = 0; 2279 1.271 ad cc->cc_pcmisses = 0; 2280 1.271 ad cc->cc_contended = 0; 2281 1.271 ad cc->cc_nfull = 0; 2282 1.271 ad cc->cc_npart = 0; 2283 1.134 ad 2284 1.137 ad pc->pc_cpus[index] = cc; 2285 1.43 thorpej } 2286 1.43 thorpej 2287 1.134 ad /* 2288 1.134 ad * pool_cache_cpu_init: 2289 1.134 ad * 2290 1.134 ad * Called whenever a new CPU is attached. 2291 1.134 ad */ 2292 1.134 ad void 2293 1.134 ad pool_cache_cpu_init(struct cpu_info *ci) 2294 1.43 thorpej { 2295 1.134 ad pool_cache_t pc; 2296 1.134 ad 2297 1.134 ad mutex_enter(&pool_head_lock); 2298 1.145 ad TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) { 2299 1.134 ad pc->pc_refcnt++; 2300 1.134 ad mutex_exit(&pool_head_lock); 2301 1.43 thorpej 2302 1.134 ad pool_cache_cpu_init1(ci, pc); 2303 1.43 thorpej 2304 1.134 ad mutex_enter(&pool_head_lock); 2305 1.134 ad pc->pc_refcnt--; 2306 1.134 ad cv_broadcast(&pool_busy); 2307 1.134 ad } 2308 1.134 ad mutex_exit(&pool_head_lock); 2309 1.43 thorpej } 2310 1.43 thorpej 2311 1.134 ad /* 2312 1.134 ad * pool_cache_reclaim: 2313 1.134 ad * 2314 1.134 ad * Reclaim memory from a pool cache. 2315 1.134 ad */ 2316 1.134 ad bool 2317 1.134 ad pool_cache_reclaim(pool_cache_t pc) 2318 1.43 thorpej { 2319 1.43 thorpej 2320 1.134 ad return pool_reclaim(&pc->pc_pool); 2321 1.134 ad } 2322 1.43 thorpej 2323 1.278 thorpej static inline void 2324 1.278 thorpej pool_cache_pre_destruct(pool_cache_t pc) 2325 1.278 thorpej { 2326 1.278 thorpej /* 2327 1.279 thorpej * Perform a passive serialization barrier before destructing 2328 1.279 thorpej * a batch of one or more objects. 2329 1.278 thorpej */ 2330 1.279 thorpej if (__predict_false(pc_has_pser(pc))) { 2331 1.279 thorpej pool_barrier(); 2332 1.278 thorpej } 2333 1.278 thorpej } 2334 1.278 thorpej 2335 1.136 yamt static void 2336 1.136 yamt pool_cache_destruct_object1(pool_cache_t pc, void *object) 2337 1.136 yamt { 2338 1.136 yamt (*pc->pc_dtor)(pc->pc_arg, object); 2339 1.136 yamt pool_put(&pc->pc_pool, object); 2340 1.136 yamt } 2341 1.136 yamt 2342 1.134 ad /* 2343 1.134 ad * pool_cache_destruct_object: 2344 1.134 ad * 2345 1.134 ad * Force destruction of an object and its release back into 2346 1.134 ad * the pool. 2347 1.134 ad */ 2348 1.134 ad void 2349 1.134 ad pool_cache_destruct_object(pool_cache_t pc, void *object) 2350 1.134 ad { 2351 1.134 ad 2352 1.136 yamt FREECHECK_IN(&pc->pc_freecheck, object); 2353 1.136 yamt 2354 1.278 thorpej pool_cache_pre_destruct(pc); 2355 1.136 yamt pool_cache_destruct_object1(pc, object); 2356 1.43 thorpej } 2357 1.43 thorpej 2358 1.134 ad /* 2359 1.134 ad * pool_cache_invalidate_groups: 2360 1.134 ad * 2361 1.271 ad * Invalidate a chain of groups and destruct all objects. Return the 2362 1.271 ad * number of groups that were invalidated. 2363 1.134 ad */ 2364 1.271 ad static int 2365 1.134 ad pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg) 2366 1.102 chs { 2367 1.134 ad void *object; 2368 1.134 ad pcg_t *next; 2369 1.271 ad int i, n; 2370 1.134 ad 2371 1.278 thorpej if (pcg == NULL) { 2372 1.278 thorpej return 0; 2373 1.278 thorpej } 2374 1.278 thorpej 2375 1.278 thorpej pool_cache_pre_destruct(pc); 2376 1.278 thorpej 2377 1.271 ad for (n = 0; pcg != NULL; pcg = next, n++) { 2378 1.134 ad next = pcg->pcg_next; 2379 1.134 ad 2380 1.134 ad for (i = 0; i < pcg->pcg_avail; i++) { 2381 1.134 ad object = pcg->pcg_objects[i].pcgo_va; 2382 1.136 yamt pool_cache_destruct_object1(pc, object); 2383 1.134 ad } 2384 1.102 chs 2385 1.142 ad if (pcg->pcg_size == PCG_NOBJECTS_LARGE) { 2386 1.142 ad pool_put(&pcg_large_pool, pcg); 2387 1.142 ad } else { 2388 1.142 ad KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL); 2389 1.142 ad pool_put(&pcg_normal_pool, pcg); 2390 1.142 ad } 2391 1.102 chs } 2392 1.271 ad return n; 2393 1.102 chs } 2394 1.102 chs 2395 1.43 thorpej /* 2396 1.134 ad * pool_cache_invalidate: 2397 1.43 thorpej * 2398 1.134 ad * Invalidate a pool cache (destruct and release all of the 2399 1.134 ad * cached objects). Does not reclaim objects from the pool. 2400 1.176 thorpej * 2401 1.176 thorpej * Note: For pool caches that provide constructed objects, there 2402 1.176 thorpej * is an assumption that another level of synchronization is occurring 2403 1.176 thorpej * between the input to the constructor and the cache invalidation. 2404 1.196 jym * 2405 1.196 jym * Invalidation is a costly process and should not be called from 2406 1.196 jym * interrupt context. 2407 1.43 thorpej */ 2408 1.134 ad void 2409 1.134 ad pool_cache_invalidate(pool_cache_t pc) 2410 1.134 ad { 2411 1.196 jym uint64_t where; 2412 1.271 ad pcg_t *pcg; 2413 1.271 ad int n, s; 2414 1.196 jym 2415 1.288 riastrad KASSERT(!cpu_intr_p()); 2416 1.288 riastrad KASSERT(!cpu_softintr_p()); 2417 1.176 thorpej 2418 1.177 jym if (ncpu < 2 || !mp_online) { 2419 1.176 thorpej /* 2420 1.176 thorpej * We might be called early enough in the boot process 2421 1.176 thorpej * for the CPU data structures to not be fully initialized. 2422 1.196 jym * In this case, transfer the content of the local CPU's 2423 1.196 jym * cache back into global cache as only this CPU is currently 2424 1.196 jym * running. 2425 1.176 thorpej */ 2426 1.196 jym pool_cache_transfer(pc); 2427 1.176 thorpej } else { 2428 1.176 thorpej /* 2429 1.196 jym * Signal all CPUs that they must transfer their local 2430 1.196 jym * cache back to the global pool then wait for the xcall to 2431 1.196 jym * complete. 2432 1.176 thorpej */ 2433 1.261 christos where = xc_broadcast(0, 2434 1.261 christos __FPTRCAST(xcfunc_t, pool_cache_transfer), pc, NULL); 2435 1.176 thorpej xc_wait(where); 2436 1.176 thorpej } 2437 1.196 jym 2438 1.271 ad /* Now dequeue and invalidate everything. */ 2439 1.271 ad pcg = pool_pcg_trunc(&pcg_normal_cache); 2440 1.271 ad (void)pool_cache_invalidate_groups(pc, pcg); 2441 1.271 ad 2442 1.271 ad pcg = pool_pcg_trunc(&pcg_large_cache); 2443 1.271 ad (void)pool_cache_invalidate_groups(pc, pcg); 2444 1.271 ad 2445 1.271 ad pcg = pool_pcg_trunc(&pc->pc_fullgroups); 2446 1.271 ad n = pool_cache_invalidate_groups(pc, pcg); 2447 1.271 ad s = splvm(); 2448 1.271 ad ((pool_cache_cpu_t *)pc->pc_cpus[curcpu()->ci_index])->cc_nfull -= n; 2449 1.271 ad splx(s); 2450 1.271 ad 2451 1.271 ad pcg = pool_pcg_trunc(&pc->pc_partgroups); 2452 1.271 ad n = pool_cache_invalidate_groups(pc, pcg); 2453 1.271 ad s = splvm(); 2454 1.271 ad ((pool_cache_cpu_t *)pc->pc_cpus[curcpu()->ci_index])->cc_npart -= n; 2455 1.271 ad splx(s); 2456 1.134 ad } 2457 1.134 ad 2458 1.175 jym /* 2459 1.175 jym * pool_cache_invalidate_cpu: 2460 1.175 jym * 2461 1.175 jym * Invalidate all CPU-bound cached objects in pool cache, the CPU being 2462 1.175 jym * identified by its associated index. 2463 1.175 jym * It is caller's responsibility to ensure that no operation is 2464 1.175 jym * taking place on this pool cache while doing this invalidation. 2465 1.175 jym * WARNING: as no inter-CPU locking is enforced, trying to invalidate 2466 1.175 jym * pool cached objects from a CPU different from the one currently running 2467 1.175 jym * may result in an undefined behaviour. 2468 1.175 jym */ 2469 1.175 jym static void 2470 1.175 jym pool_cache_invalidate_cpu(pool_cache_t pc, u_int index) 2471 1.175 jym { 2472 1.175 jym pool_cache_cpu_t *cc; 2473 1.175 jym pcg_t *pcg; 2474 1.175 jym 2475 1.175 jym if ((cc = pc->pc_cpus[index]) == NULL) 2476 1.175 jym return; 2477 1.175 jym 2478 1.175 jym if ((pcg = cc->cc_current) != &pcg_dummy) { 2479 1.175 jym pcg->pcg_next = NULL; 2480 1.175 jym pool_cache_invalidate_groups(pc, pcg); 2481 1.175 jym } 2482 1.175 jym if ((pcg = cc->cc_previous) != &pcg_dummy) { 2483 1.175 jym pcg->pcg_next = NULL; 2484 1.175 jym pool_cache_invalidate_groups(pc, pcg); 2485 1.175 jym } 2486 1.175 jym if (cc != &pc->pc_cpu0) 2487 1.175 jym pool_put(&cache_cpu_pool, cc); 2488 1.175 jym 2489 1.175 jym } 2490 1.175 jym 2491 1.134 ad void 2492 1.134 ad pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg) 2493 1.134 ad { 2494 1.134 ad 2495 1.134 ad pool_set_drain_hook(&pc->pc_pool, fn, arg); 2496 1.134 ad } 2497 1.134 ad 2498 1.134 ad void 2499 1.134 ad pool_cache_setlowat(pool_cache_t pc, int n) 2500 1.134 ad { 2501 1.134 ad 2502 1.134 ad pool_setlowat(&pc->pc_pool, n); 2503 1.134 ad } 2504 1.134 ad 2505 1.134 ad void 2506 1.134 ad pool_cache_sethiwat(pool_cache_t pc, int n) 2507 1.134 ad { 2508 1.134 ad 2509 1.134 ad pool_sethiwat(&pc->pc_pool, n); 2510 1.134 ad } 2511 1.134 ad 2512 1.134 ad void 2513 1.134 ad pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap) 2514 1.134 ad { 2515 1.134 ad 2516 1.134 ad pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap); 2517 1.134 ad } 2518 1.134 ad 2519 1.267 chs void 2520 1.267 chs pool_cache_prime(pool_cache_t pc, int n) 2521 1.267 chs { 2522 1.267 chs 2523 1.267 chs pool_prime(&pc->pc_pool, n); 2524 1.267 chs } 2525 1.267 chs 2526 1.277 simonb unsigned int 2527 1.277 simonb pool_cache_nget(pool_cache_t pc) 2528 1.277 simonb { 2529 1.277 simonb 2530 1.277 simonb return pool_nget(&pc->pc_pool); 2531 1.277 simonb } 2532 1.277 simonb 2533 1.277 simonb unsigned int 2534 1.277 simonb pool_cache_nput(pool_cache_t pc) 2535 1.277 simonb { 2536 1.277 simonb 2537 1.277 simonb return pool_nput(&pc->pc_pool); 2538 1.277 simonb } 2539 1.277 simonb 2540 1.271 ad /* 2541 1.271 ad * pool_pcg_get: 2542 1.271 ad * 2543 1.271 ad * Get a cache group from the specified list. Return true if 2544 1.271 ad * contention was encountered. Must be called at IPL_VM because 2545 1.271 ad * of spin wait vs. kernel_lock. 2546 1.271 ad */ 2547 1.271 ad static int 2548 1.271 ad pool_pcg_get(pcg_t *volatile *head, pcg_t **pcgp) 2549 1.271 ad { 2550 1.271 ad int count = SPINLOCK_BACKOFF_MIN; 2551 1.271 ad pcg_t *o, *n; 2552 1.271 ad 2553 1.271 ad for (o = atomic_load_relaxed(head);; o = n) { 2554 1.271 ad if (__predict_false(o == &pcg_dummy)) { 2555 1.271 ad /* Wait for concurrent get to complete. */ 2556 1.271 ad SPINLOCK_BACKOFF(count); 2557 1.271 ad n = atomic_load_relaxed(head); 2558 1.271 ad continue; 2559 1.271 ad } 2560 1.271 ad if (__predict_false(o == NULL)) { 2561 1.271 ad break; 2562 1.271 ad } 2563 1.271 ad /* Lock out concurrent get/put. */ 2564 1.271 ad n = atomic_cas_ptr(head, o, __UNCONST(&pcg_dummy)); 2565 1.271 ad if (o == n) { 2566 1.271 ad /* Fetch pointer to next item and then unlock. */ 2567 1.271 ad membar_datadep_consumer(); /* alpha */ 2568 1.271 ad n = atomic_load_relaxed(&o->pcg_next); 2569 1.271 ad atomic_store_release(head, n); 2570 1.271 ad break; 2571 1.271 ad } 2572 1.271 ad } 2573 1.271 ad *pcgp = o; 2574 1.271 ad return count != SPINLOCK_BACKOFF_MIN; 2575 1.271 ad } 2576 1.271 ad 2577 1.271 ad /* 2578 1.271 ad * pool_pcg_trunc: 2579 1.271 ad * 2580 1.271 ad * Chop out entire list of pool cache groups. 2581 1.271 ad */ 2582 1.271 ad static pcg_t * 2583 1.271 ad pool_pcg_trunc(pcg_t *volatile *head) 2584 1.271 ad { 2585 1.271 ad int count = SPINLOCK_BACKOFF_MIN, s; 2586 1.271 ad pcg_t *o, *n; 2587 1.271 ad 2588 1.271 ad s = splvm(); 2589 1.271 ad for (o = atomic_load_relaxed(head);; o = n) { 2590 1.271 ad if (__predict_false(o == &pcg_dummy)) { 2591 1.271 ad /* Wait for concurrent get to complete. */ 2592 1.271 ad SPINLOCK_BACKOFF(count); 2593 1.271 ad n = atomic_load_relaxed(head); 2594 1.271 ad continue; 2595 1.271 ad } 2596 1.271 ad n = atomic_cas_ptr(head, o, NULL); 2597 1.271 ad if (o == n) { 2598 1.271 ad splx(s); 2599 1.271 ad membar_datadep_consumer(); /* alpha */ 2600 1.271 ad return o; 2601 1.271 ad } 2602 1.271 ad } 2603 1.271 ad } 2604 1.271 ad 2605 1.271 ad /* 2606 1.271 ad * pool_pcg_put: 2607 1.271 ad * 2608 1.271 ad * Put a pool cache group to the specified list. Return true if 2609 1.271 ad * contention was encountered. Must be called at IPL_VM because of 2610 1.271 ad * spin wait vs. kernel_lock. 2611 1.271 ad */ 2612 1.271 ad static int 2613 1.271 ad pool_pcg_put(pcg_t *volatile *head, pcg_t *pcg) 2614 1.271 ad { 2615 1.271 ad int count = SPINLOCK_BACKOFF_MIN; 2616 1.271 ad pcg_t *o, *n; 2617 1.271 ad 2618 1.271 ad for (o = atomic_load_relaxed(head);; o = n) { 2619 1.271 ad if (__predict_false(o == &pcg_dummy)) { 2620 1.271 ad /* Wait for concurrent get to complete. */ 2621 1.271 ad SPINLOCK_BACKOFF(count); 2622 1.271 ad n = atomic_load_relaxed(head); 2623 1.271 ad continue; 2624 1.271 ad } 2625 1.271 ad pcg->pcg_next = o; 2626 1.282 riastrad membar_release(); 2627 1.271 ad n = atomic_cas_ptr(head, o, pcg); 2628 1.271 ad if (o == n) { 2629 1.271 ad return count != SPINLOCK_BACKOFF_MIN; 2630 1.271 ad } 2631 1.271 ad } 2632 1.271 ad } 2633 1.271 ad 2634 1.162 ad static bool __noinline 2635 1.271 ad pool_cache_get_slow(pool_cache_t pc, pool_cache_cpu_t *cc, int s, 2636 1.271 ad void **objectp, paddr_t *pap, int flags) 2637 1.43 thorpej { 2638 1.134 ad pcg_t *pcg, *cur; 2639 1.43 thorpej void *object; 2640 1.58 thorpej 2641 1.168 yamt KASSERT(cc->cc_current->pcg_avail == 0); 2642 1.168 yamt KASSERT(cc->cc_previous->pcg_avail == 0); 2643 1.168 yamt 2644 1.134 ad cc->cc_misses++; 2645 1.43 thorpej 2646 1.134 ad /* 2647 1.271 ad * If there's a full group, release our empty group back to the 2648 1.271 ad * cache. Install the full group as cc_current and return. 2649 1.134 ad */ 2650 1.271 ad cc->cc_contended += pool_pcg_get(&pc->pc_fullgroups, &pcg); 2651 1.271 ad if (__predict_true(pcg != NULL)) { 2652 1.271 ad KASSERT(pcg->pcg_avail == pcg->pcg_size); 2653 1.162 ad if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) { 2654 1.134 ad KASSERT(cur->pcg_avail == 0); 2655 1.271 ad (void)pool_pcg_put(cc->cc_pcgcache, cur); 2656 1.87 thorpej } 2657 1.271 ad cc->cc_nfull--; 2658 1.134 ad cc->cc_current = pcg; 2659 1.162 ad return true; 2660 1.134 ad } 2661 1.134 ad 2662 1.134 ad /* 2663 1.134 ad * Nothing available locally or in cache. Take the slow 2664 1.134 ad * path: fetch a new object from the pool and construct 2665 1.134 ad * it. 2666 1.134 ad */ 2667 1.271 ad cc->cc_pcmisses++; 2668 1.162 ad splx(s); 2669 1.134 ad 2670 1.134 ad object = pool_get(&pc->pc_pool, flags); 2671 1.134 ad *objectp = object; 2672 1.211 riastrad if (__predict_false(object == NULL)) { 2673 1.265 chs KASSERT((flags & (PR_NOWAIT|PR_LIMITFAIL)) != 0); 2674 1.162 ad return false; 2675 1.211 riastrad } 2676 1.125 ad 2677 1.162 ad if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) { 2678 1.134 ad pool_put(&pc->pc_pool, object); 2679 1.134 ad *objectp = NULL; 2680 1.162 ad return false; 2681 1.43 thorpej } 2682 1.43 thorpej 2683 1.238 maxv KASSERT((((vaddr_t)object) & (pc->pc_pool.pr_align - 1)) == 0); 2684 1.43 thorpej 2685 1.134 ad if (pap != NULL) { 2686 1.134 ad #ifdef POOL_VTOPHYS 2687 1.134 ad *pap = POOL_VTOPHYS(object); 2688 1.134 ad #else 2689 1.134 ad *pap = POOL_PADDR_INVALID; 2690 1.134 ad #endif 2691 1.102 chs } 2692 1.43 thorpej 2693 1.125 ad FREECHECK_OUT(&pc->pc_freecheck, object); 2694 1.162 ad return false; 2695 1.43 thorpej } 2696 1.43 thorpej 2697 1.43 thorpej /* 2698 1.134 ad * pool_cache_get{,_paddr}: 2699 1.43 thorpej * 2700 1.134 ad * Get an object from a pool cache (optionally returning 2701 1.134 ad * the physical address of the object). 2702 1.43 thorpej */ 2703 1.134 ad void * 2704 1.134 ad pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) 2705 1.43 thorpej { 2706 1.134 ad pool_cache_cpu_t *cc; 2707 1.134 ad pcg_t *pcg; 2708 1.134 ad void *object; 2709 1.60 thorpej int s; 2710 1.43 thorpej 2711 1.215 christos KASSERT(!(flags & PR_NOWAIT) != !(flags & PR_WAITOK)); 2712 1.290 riastrad if (pc->pc_pool.pr_ipl == IPL_NONE && 2713 1.290 riastrad __predict_true(!cold) && 2714 1.290 riastrad __predict_true(panicstr == NULL)) { 2715 1.290 riastrad KASSERTMSG(!cpu_intr_p(), 2716 1.290 riastrad "%s: [%s] is IPL_NONE, but called from interrupt context", 2717 1.290 riastrad __func__, pc->pc_pool.pr_wchan); 2718 1.290 riastrad KASSERTMSG(!cpu_softintr_p(), 2719 1.290 riastrad "%s: [%s] is IPL_NONE," 2720 1.290 riastrad " but called from soft interrupt context", 2721 1.290 riastrad __func__, pc->pc_pool.pr_wchan); 2722 1.290 riastrad } 2723 1.184 rmind 2724 1.155 ad if (flags & PR_WAITOK) { 2725 1.154 yamt ASSERT_SLEEPABLE(); 2726 1.155 ad } 2727 1.125 ad 2728 1.270 maxv if (flags & PR_NOWAIT) { 2729 1.270 maxv if (fault_inject()) 2730 1.270 maxv return NULL; 2731 1.270 maxv } 2732 1.270 maxv 2733 1.162 ad /* Lock out interrupts and disable preemption. */ 2734 1.162 ad s = splvm(); 2735 1.165 yamt while (/* CONSTCOND */ true) { 2736 1.134 ad /* Try and allocate an object from the current group. */ 2737 1.162 ad cc = pc->pc_cpus[curcpu()->ci_index]; 2738 1.134 ad pcg = cc->cc_current; 2739 1.162 ad if (__predict_true(pcg->pcg_avail > 0)) { 2740 1.134 ad object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va; 2741 1.162 ad if (__predict_false(pap != NULL)) 2742 1.134 ad *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa; 2743 1.148 yamt #if defined(DIAGNOSTIC) 2744 1.134 ad pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL; 2745 1.163 ad KASSERT(pcg->pcg_avail < pcg->pcg_size); 2746 1.134 ad KASSERT(object != NULL); 2747 1.163 ad #endif 2748 1.134 ad cc->cc_hits++; 2749 1.162 ad splx(s); 2750 1.134 ad FREECHECK_OUT(&pc->pc_freecheck, object); 2751 1.204 maxv pool_redzone_fill(&pc->pc_pool, object); 2752 1.262 maxv pool_cache_get_kmsan(pc, object); 2753 1.134 ad return object; 2754 1.43 thorpej } 2755 1.43 thorpej 2756 1.43 thorpej /* 2757 1.134 ad * That failed. If the previous group isn't empty, swap 2758 1.134 ad * it with the current group and allocate from there. 2759 1.43 thorpej */ 2760 1.134 ad pcg = cc->cc_previous; 2761 1.162 ad if (__predict_true(pcg->pcg_avail > 0)) { 2762 1.134 ad cc->cc_previous = cc->cc_current; 2763 1.134 ad cc->cc_current = pcg; 2764 1.134 ad continue; 2765 1.43 thorpej } 2766 1.43 thorpej 2767 1.134 ad /* 2768 1.134 ad * Can't allocate from either group: try the slow path. 2769 1.134 ad * If get_slow() allocated an object for us, or if 2770 1.162 ad * no more objects are available, it will return false. 2771 1.134 ad * Otherwise, we need to retry. 2772 1.134 ad */ 2773 1.271 ad if (!pool_cache_get_slow(pc, cc, s, &object, pap, flags)) { 2774 1.269 maxv if (object != NULL) { 2775 1.269 maxv kmsan_orig(object, pc->pc_pool.pr_size, 2776 1.269 maxv KMSAN_TYPE_POOL, __RET_ADDR); 2777 1.269 maxv } 2778 1.165 yamt break; 2779 1.269 maxv } 2780 1.165 yamt } 2781 1.43 thorpej 2782 1.211 riastrad /* 2783 1.211 riastrad * We would like to KASSERT(object || (flags & PR_NOWAIT)), but 2784 1.211 riastrad * pool_cache_get can fail even in the PR_WAITOK case, if the 2785 1.211 riastrad * constructor fails. 2786 1.211 riastrad */ 2787 1.134 ad return object; 2788 1.51 thorpej } 2789 1.51 thorpej 2790 1.162 ad static bool __noinline 2791 1.271 ad pool_cache_put_slow(pool_cache_t pc, pool_cache_cpu_t *cc, int s, void *object) 2792 1.51 thorpej { 2793 1.163 ad pcg_t *pcg, *cur; 2794 1.51 thorpej 2795 1.168 yamt KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size); 2796 1.168 yamt KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size); 2797 1.168 yamt 2798 1.134 ad cc->cc_misses++; 2799 1.43 thorpej 2800 1.171 ad /* 2801 1.271 ad * Try to get an empty group from the cache. If there are no empty 2802 1.271 ad * groups in the cache then allocate one. 2803 1.171 ad */ 2804 1.271 ad (void)pool_pcg_get(cc->cc_pcgcache, &pcg); 2805 1.271 ad if (__predict_false(pcg == NULL)) { 2806 1.171 ad if (__predict_true(!pool_cache_disable)) { 2807 1.171 ad pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT); 2808 1.171 ad } 2809 1.171 ad if (__predict_true(pcg != NULL)) { 2810 1.171 ad pcg->pcg_avail = 0; 2811 1.171 ad pcg->pcg_size = pc->pc_pcgsize; 2812 1.171 ad } 2813 1.171 ad } 2814 1.171 ad 2815 1.162 ad /* 2816 1.271 ad * If there's a empty group, release our full group back to the 2817 1.271 ad * cache. Install the empty group to the local CPU and return. 2818 1.162 ad */ 2819 1.163 ad if (pcg != NULL) { 2820 1.134 ad KASSERT(pcg->pcg_avail == 0); 2821 1.162 ad if (__predict_false(cc->cc_previous == &pcg_dummy)) { 2822 1.146 ad cc->cc_previous = pcg; 2823 1.146 ad } else { 2824 1.162 ad cur = cc->cc_current; 2825 1.162 ad if (__predict_true(cur != &pcg_dummy)) { 2826 1.163 ad KASSERT(cur->pcg_avail == cur->pcg_size); 2827 1.271 ad cc->cc_contended += 2828 1.271 ad pool_pcg_put(&pc->pc_fullgroups, cur); 2829 1.271 ad cc->cc_nfull++; 2830 1.146 ad } 2831 1.146 ad cc->cc_current = pcg; 2832 1.146 ad } 2833 1.162 ad return true; 2834 1.102 chs } 2835 1.105 christos 2836 1.134 ad /* 2837 1.162 ad * Nothing available locally or in cache, and we didn't 2838 1.162 ad * allocate an empty group. Take the slow path and destroy 2839 1.162 ad * the object here and now. 2840 1.134 ad */ 2841 1.271 ad cc->cc_pcmisses++; 2842 1.162 ad splx(s); 2843 1.162 ad pool_cache_destruct_object(pc, object); 2844 1.105 christos 2845 1.162 ad return false; 2846 1.236 maxv } 2847 1.102 chs 2848 1.43 thorpej /* 2849 1.134 ad * pool_cache_put{,_paddr}: 2850 1.43 thorpej * 2851 1.134 ad * Put an object back to the pool cache (optionally caching the 2852 1.134 ad * physical address of the object). 2853 1.43 thorpej */ 2854 1.101 thorpej void 2855 1.134 ad pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa) 2856 1.43 thorpej { 2857 1.134 ad pool_cache_cpu_t *cc; 2858 1.134 ad pcg_t *pcg; 2859 1.134 ad int s; 2860 1.101 thorpej 2861 1.172 yamt KASSERT(object != NULL); 2862 1.262 maxv pool_cache_put_kmsan(pc, object); 2863 1.229 maxv pool_cache_redzone_check(pc, object); 2864 1.134 ad FREECHECK_IN(&pc->pc_freecheck, object); 2865 1.101 thorpej 2866 1.253 maxv if (pc->pc_pool.pr_roflags & PR_PHINPAGE) { 2867 1.253 maxv pc_phinpage_check(pc, object); 2868 1.253 maxv } 2869 1.253 maxv 2870 1.268 maxv if (pool_cache_put_nocache(pc, object)) { 2871 1.249 maxv return; 2872 1.249 maxv } 2873 1.249 maxv 2874 1.162 ad /* Lock out interrupts and disable preemption. */ 2875 1.162 ad s = splvm(); 2876 1.165 yamt while (/* CONSTCOND */ true) { 2877 1.134 ad /* If the current group isn't full, release it there. */ 2878 1.162 ad cc = pc->pc_cpus[curcpu()->ci_index]; 2879 1.134 ad pcg = cc->cc_current; 2880 1.162 ad if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2881 1.134 ad pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object; 2882 1.134 ad pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa; 2883 1.134 ad pcg->pcg_avail++; 2884 1.134 ad cc->cc_hits++; 2885 1.162 ad splx(s); 2886 1.134 ad return; 2887 1.134 ad } 2888 1.43 thorpej 2889 1.134 ad /* 2890 1.162 ad * That failed. If the previous group isn't full, swap 2891 1.134 ad * it with the current group and try again. 2892 1.134 ad */ 2893 1.134 ad pcg = cc->cc_previous; 2894 1.162 ad if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) { 2895 1.134 ad cc->cc_previous = cc->cc_current; 2896 1.134 ad cc->cc_current = pcg; 2897 1.134 ad continue; 2898 1.134 ad } 2899 1.43 thorpej 2900 1.134 ad /* 2901 1.236 maxv * Can't free to either group: try the slow path. 2902 1.134 ad * If put_slow() releases the object for us, it 2903 1.162 ad * will return false. Otherwise we need to retry. 2904 1.134 ad */ 2905 1.271 ad if (!pool_cache_put_slow(pc, cc, s, object)) 2906 1.165 yamt break; 2907 1.165 yamt } 2908 1.43 thorpej } 2909 1.43 thorpej 2910 1.43 thorpej /* 2911 1.196 jym * pool_cache_transfer: 2912 1.43 thorpej * 2913 1.134 ad * Transfer objects from the per-CPU cache to the global cache. 2914 1.134 ad * Run within a cross-call thread. 2915 1.43 thorpej */ 2916 1.43 thorpej static void 2917 1.196 jym pool_cache_transfer(pool_cache_t pc) 2918 1.43 thorpej { 2919 1.134 ad pool_cache_cpu_t *cc; 2920 1.271 ad pcg_t *prev, *cur; 2921 1.162 ad int s; 2922 1.134 ad 2923 1.162 ad s = splvm(); 2924 1.162 ad cc = pc->pc_cpus[curcpu()->ci_index]; 2925 1.134 ad cur = cc->cc_current; 2926 1.169 yamt cc->cc_current = __UNCONST(&pcg_dummy); 2927 1.134 ad prev = cc->cc_previous; 2928 1.169 yamt cc->cc_previous = __UNCONST(&pcg_dummy); 2929 1.162 ad if (cur != &pcg_dummy) { 2930 1.142 ad if (cur->pcg_avail == cur->pcg_size) { 2931 1.271 ad (void)pool_pcg_put(&pc->pc_fullgroups, cur); 2932 1.271 ad cc->cc_nfull++; 2933 1.134 ad } else if (cur->pcg_avail == 0) { 2934 1.271 ad (void)pool_pcg_put(pc->pc_pcgcache, cur); 2935 1.134 ad } else { 2936 1.271 ad (void)pool_pcg_put(&pc->pc_partgroups, cur); 2937 1.271 ad cc->cc_npart++; 2938 1.134 ad } 2939 1.134 ad } 2940 1.162 ad if (prev != &pcg_dummy) { 2941 1.142 ad if (prev->pcg_avail == prev->pcg_size) { 2942 1.271 ad (void)pool_pcg_put(&pc->pc_fullgroups, prev); 2943 1.271 ad cc->cc_nfull++; 2944 1.134 ad } else if (prev->pcg_avail == 0) { 2945 1.271 ad (void)pool_pcg_put(pc->pc_pcgcache, prev); 2946 1.134 ad } else { 2947 1.271 ad (void)pool_pcg_put(&pc->pc_partgroups, prev); 2948 1.271 ad cc->cc_npart++; 2949 1.134 ad } 2950 1.134 ad } 2951 1.134 ad splx(s); 2952 1.3 pk } 2953 1.66 thorpej 2954 1.208 chs static int 2955 1.208 chs pool_bigidx(size_t size) 2956 1.208 chs { 2957 1.208 chs int i; 2958 1.208 chs 2959 1.208 chs for (i = 0; i < __arraycount(pool_allocator_big); i++) { 2960 1.208 chs if (1 << (i + POOL_ALLOCATOR_BIG_BASE) >= size) 2961 1.208 chs return i; 2962 1.208 chs } 2963 1.208 chs panic("pool item size %zu too large, use a custom allocator", size); 2964 1.208 chs } 2965 1.208 chs 2966 1.117 yamt static void * 2967 1.117 yamt pool_allocator_alloc(struct pool *pp, int flags) 2968 1.66 thorpej { 2969 1.117 yamt struct pool_allocator *pa = pp->pr_alloc; 2970 1.66 thorpej void *res; 2971 1.66 thorpej 2972 1.117 yamt res = (*pa->pa_alloc)(pp, flags); 2973 1.117 yamt return res; 2974 1.66 thorpej } 2975 1.66 thorpej 2976 1.117 yamt static void 2977 1.66 thorpej pool_allocator_free(struct pool *pp, void *v) 2978 1.66 thorpej { 2979 1.66 thorpej struct pool_allocator *pa = pp->pr_alloc; 2980 1.66 thorpej 2981 1.229 maxv if (pp->pr_redzone) { 2982 1.279 thorpej KASSERT(!pp_has_pser(pp)); 2983 1.248 maxv kasan_mark(v, pa->pa_pagesz, pa->pa_pagesz, 0); 2984 1.279 thorpej } else if (__predict_false(pp_has_pser(pp))) { 2985 1.279 thorpej /* 2986 1.279 thorpej * Perform a passive serialization barrier before freeing 2987 1.279 thorpej * the pool page back to the system. 2988 1.279 thorpej */ 2989 1.279 thorpej pool_barrier(); 2990 1.229 maxv } 2991 1.66 thorpej (*pa->pa_free)(pp, v); 2992 1.66 thorpej } 2993 1.66 thorpej 2994 1.66 thorpej void * 2995 1.124 yamt pool_page_alloc(struct pool *pp, int flags) 2996 1.66 thorpej { 2997 1.192 rmind const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 2998 1.191 para vmem_addr_t va; 2999 1.192 rmind int ret; 3000 1.191 para 3001 1.192 rmind ret = uvm_km_kmem_alloc(kmem_va_arena, pp->pr_alloc->pa_pagesz, 3002 1.192 rmind vflags | VM_INSTANTFIT, &va); 3003 1.66 thorpej 3004 1.192 rmind return ret ? NULL : (void *)va; 3005 1.66 thorpej } 3006 1.66 thorpej 3007 1.66 thorpej void 3008 1.124 yamt pool_page_free(struct pool *pp, void *v) 3009 1.66 thorpej { 3010 1.66 thorpej 3011 1.191 para uvm_km_kmem_free(kmem_va_arena, (vaddr_t)v, pp->pr_alloc->pa_pagesz); 3012 1.98 yamt } 3013 1.98 yamt 3014 1.98 yamt static void * 3015 1.124 yamt pool_page_alloc_meta(struct pool *pp, int flags) 3016 1.98 yamt { 3017 1.192 rmind const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 3018 1.192 rmind vmem_addr_t va; 3019 1.192 rmind int ret; 3020 1.191 para 3021 1.192 rmind ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, 3022 1.192 rmind vflags | VM_INSTANTFIT, &va); 3023 1.98 yamt 3024 1.192 rmind return ret ? NULL : (void *)va; 3025 1.98 yamt } 3026 1.98 yamt 3027 1.98 yamt static void 3028 1.124 yamt pool_page_free_meta(struct pool *pp, void *v) 3029 1.98 yamt { 3030 1.98 yamt 3031 1.192 rmind vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); 3032 1.66 thorpej } 3033 1.66 thorpej 3034 1.262 maxv #ifdef KMSAN 3035 1.262 maxv static inline void 3036 1.262 maxv pool_get_kmsan(struct pool *pp, void *p) 3037 1.262 maxv { 3038 1.262 maxv kmsan_orig(p, pp->pr_size, KMSAN_TYPE_POOL, __RET_ADDR); 3039 1.262 maxv kmsan_mark(p, pp->pr_size, KMSAN_STATE_UNINIT); 3040 1.262 maxv } 3041 1.262 maxv 3042 1.262 maxv static inline void 3043 1.262 maxv pool_put_kmsan(struct pool *pp, void *p) 3044 1.262 maxv { 3045 1.262 maxv kmsan_mark(p, pp->pr_size, KMSAN_STATE_INITED); 3046 1.262 maxv } 3047 1.262 maxv 3048 1.262 maxv static inline void 3049 1.262 maxv pool_cache_get_kmsan(pool_cache_t pc, void *p) 3050 1.262 maxv { 3051 1.262 maxv if (__predict_false(pc_has_ctor(pc))) { 3052 1.262 maxv return; 3053 1.262 maxv } 3054 1.262 maxv pool_get_kmsan(&pc->pc_pool, p); 3055 1.262 maxv } 3056 1.262 maxv 3057 1.262 maxv static inline void 3058 1.262 maxv pool_cache_put_kmsan(pool_cache_t pc, void *p) 3059 1.262 maxv { 3060 1.262 maxv pool_put_kmsan(&pc->pc_pool, p); 3061 1.262 maxv } 3062 1.262 maxv #endif 3063 1.262 maxv 3064 1.249 maxv #ifdef POOL_QUARANTINE 3065 1.249 maxv static void 3066 1.249 maxv pool_quarantine_init(struct pool *pp) 3067 1.249 maxv { 3068 1.249 maxv pp->pr_quar.rotor = 0; 3069 1.249 maxv memset(&pp->pr_quar, 0, sizeof(pp->pr_quar)); 3070 1.249 maxv } 3071 1.249 maxv 3072 1.249 maxv static void 3073 1.249 maxv pool_quarantine_flush(struct pool *pp) 3074 1.249 maxv { 3075 1.249 maxv pool_quar_t *quar = &pp->pr_quar; 3076 1.249 maxv struct pool_pagelist pq; 3077 1.249 maxv size_t i; 3078 1.249 maxv 3079 1.249 maxv LIST_INIT(&pq); 3080 1.249 maxv 3081 1.249 maxv mutex_enter(&pp->pr_lock); 3082 1.249 maxv for (i = 0; i < POOL_QUARANTINE_DEPTH; i++) { 3083 1.249 maxv if (quar->list[i] == 0) 3084 1.249 maxv continue; 3085 1.249 maxv pool_do_put(pp, (void *)quar->list[i], &pq); 3086 1.249 maxv } 3087 1.249 maxv mutex_exit(&pp->pr_lock); 3088 1.249 maxv 3089 1.249 maxv pr_pagelist_free(pp, &pq); 3090 1.249 maxv } 3091 1.249 maxv 3092 1.249 maxv static bool 3093 1.249 maxv pool_put_quarantine(struct pool *pp, void *v, struct pool_pagelist *pq) 3094 1.249 maxv { 3095 1.249 maxv pool_quar_t *quar = &pp->pr_quar; 3096 1.249 maxv uintptr_t old; 3097 1.249 maxv 3098 1.249 maxv if (pp->pr_roflags & PR_NOTOUCH) { 3099 1.249 maxv return false; 3100 1.249 maxv } 3101 1.249 maxv 3102 1.249 maxv pool_redzone_check(pp, v); 3103 1.249 maxv 3104 1.249 maxv old = quar->list[quar->rotor]; 3105 1.249 maxv quar->list[quar->rotor] = (uintptr_t)v; 3106 1.249 maxv quar->rotor = (quar->rotor + 1) % POOL_QUARANTINE_DEPTH; 3107 1.249 maxv if (old != 0) { 3108 1.249 maxv pool_do_put(pp, (void *)old, pq); 3109 1.249 maxv } 3110 1.249 maxv 3111 1.249 maxv return true; 3112 1.249 maxv } 3113 1.268 maxv #endif 3114 1.249 maxv 3115 1.268 maxv #ifdef POOL_NOCACHE 3116 1.249 maxv static bool 3117 1.268 maxv pool_cache_put_nocache(pool_cache_t pc, void *p) 3118 1.249 maxv { 3119 1.249 maxv pool_cache_destruct_object(pc, p); 3120 1.249 maxv return true; 3121 1.249 maxv } 3122 1.249 maxv #endif 3123 1.249 maxv 3124 1.204 maxv #ifdef POOL_REDZONE 3125 1.204 maxv #if defined(_LP64) 3126 1.204 maxv # define PRIME 0x9e37fffffffc0000UL 3127 1.204 maxv #else /* defined(_LP64) */ 3128 1.204 maxv # define PRIME 0x9e3779b1 3129 1.204 maxv #endif /* defined(_LP64) */ 3130 1.204 maxv #define STATIC_BYTE 0xFE 3131 1.204 maxv CTASSERT(POOL_REDZONE_SIZE > 1); 3132 1.204 maxv 3133 1.224 maxv #ifndef KASAN 3134 1.204 maxv static inline uint8_t 3135 1.204 maxv pool_pattern_generate(const void *p) 3136 1.204 maxv { 3137 1.204 maxv return (uint8_t)(((uintptr_t)p) * PRIME 3138 1.204 maxv >> ((sizeof(uintptr_t) - sizeof(uint8_t))) * CHAR_BIT); 3139 1.204 maxv } 3140 1.224 maxv #endif 3141 1.204 maxv 3142 1.204 maxv static void 3143 1.204 maxv pool_redzone_init(struct pool *pp, size_t requested_size) 3144 1.204 maxv { 3145 1.227 maxv size_t redzsz; 3146 1.204 maxv size_t nsz; 3147 1.204 maxv 3148 1.227 maxv #ifdef KASAN 3149 1.227 maxv redzsz = requested_size; 3150 1.227 maxv kasan_add_redzone(&redzsz); 3151 1.227 maxv redzsz -= requested_size; 3152 1.227 maxv #else 3153 1.227 maxv redzsz = POOL_REDZONE_SIZE; 3154 1.227 maxv #endif 3155 1.227 maxv 3156 1.204 maxv if (pp->pr_roflags & PR_NOTOUCH) { 3157 1.204 maxv pp->pr_redzone = false; 3158 1.204 maxv return; 3159 1.204 maxv } 3160 1.204 maxv 3161 1.204 maxv /* 3162 1.204 maxv * We may have extended the requested size earlier; check if 3163 1.204 maxv * there's naturally space in the padding for a red zone. 3164 1.204 maxv */ 3165 1.227 maxv if (pp->pr_size - requested_size >= redzsz) { 3166 1.229 maxv pp->pr_reqsize_with_redzone = requested_size + redzsz; 3167 1.204 maxv pp->pr_redzone = true; 3168 1.204 maxv return; 3169 1.204 maxv } 3170 1.204 maxv 3171 1.204 maxv /* 3172 1.204 maxv * No space in the natural padding; check if we can extend a 3173 1.204 maxv * bit the size of the pool. 3174 1.276 mrg * 3175 1.276 mrg * Avoid using redzone for allocations half of a page or larger. 3176 1.276 mrg * For pagesize items, we'd waste a whole new page (could be 3177 1.276 mrg * unmapped?), and for half pagesize items, approximately half 3178 1.276 mrg * the space is lost (eg, 4K pages, you get one 2K allocation.) 3179 1.204 maxv */ 3180 1.227 maxv nsz = roundup(pp->pr_size + redzsz, pp->pr_align); 3181 1.276 mrg if (nsz <= (pp->pr_alloc->pa_pagesz / 2)) { 3182 1.204 maxv /* Ok, we can */ 3183 1.204 maxv pp->pr_size = nsz; 3184 1.229 maxv pp->pr_reqsize_with_redzone = requested_size + redzsz; 3185 1.204 maxv pp->pr_redzone = true; 3186 1.204 maxv } else { 3187 1.204 maxv /* No space for a red zone... snif :'( */ 3188 1.204 maxv pp->pr_redzone = false; 3189 1.274 riastrad aprint_debug("pool redzone disabled for '%s'\n", pp->pr_wchan); 3190 1.204 maxv } 3191 1.204 maxv } 3192 1.204 maxv 3193 1.204 maxv static void 3194 1.204 maxv pool_redzone_fill(struct pool *pp, void *p) 3195 1.204 maxv { 3196 1.224 maxv if (!pp->pr_redzone) 3197 1.224 maxv return; 3198 1.279 thorpej KASSERT(!pp_has_pser(pp)); 3199 1.224 maxv #ifdef KASAN 3200 1.248 maxv kasan_mark(p, pp->pr_reqsize, pp->pr_reqsize_with_redzone, 3201 1.248 maxv KASAN_POOL_REDZONE); 3202 1.224 maxv #else 3203 1.204 maxv uint8_t *cp, pat; 3204 1.204 maxv const uint8_t *ep; 3205 1.204 maxv 3206 1.204 maxv cp = (uint8_t *)p + pp->pr_reqsize; 3207 1.204 maxv ep = cp + POOL_REDZONE_SIZE; 3208 1.204 maxv 3209 1.204 maxv /* 3210 1.204 maxv * We really don't want the first byte of the red zone to be '\0'; 3211 1.204 maxv * an off-by-one in a string may not be properly detected. 3212 1.204 maxv */ 3213 1.204 maxv pat = pool_pattern_generate(cp); 3214 1.204 maxv *cp = (pat == '\0') ? STATIC_BYTE: pat; 3215 1.204 maxv cp++; 3216 1.204 maxv 3217 1.204 maxv while (cp < ep) { 3218 1.204 maxv *cp = pool_pattern_generate(cp); 3219 1.204 maxv cp++; 3220 1.204 maxv } 3221 1.224 maxv #endif 3222 1.204 maxv } 3223 1.204 maxv 3224 1.204 maxv static void 3225 1.204 maxv pool_redzone_check(struct pool *pp, void *p) 3226 1.204 maxv { 3227 1.224 maxv if (!pp->pr_redzone) 3228 1.224 maxv return; 3229 1.279 thorpej KASSERT(!pp_has_pser(pp)); 3230 1.224 maxv #ifdef KASAN 3231 1.248 maxv kasan_mark(p, 0, pp->pr_reqsize_with_redzone, KASAN_POOL_FREED); 3232 1.224 maxv #else 3233 1.204 maxv uint8_t *cp, pat, expected; 3234 1.204 maxv const uint8_t *ep; 3235 1.204 maxv 3236 1.204 maxv cp = (uint8_t *)p + pp->pr_reqsize; 3237 1.204 maxv ep = cp + POOL_REDZONE_SIZE; 3238 1.204 maxv 3239 1.204 maxv pat = pool_pattern_generate(cp); 3240 1.204 maxv expected = (pat == '\0') ? STATIC_BYTE: pat; 3241 1.264 maxv if (__predict_false(*cp != expected)) { 3242 1.264 maxv panic("%s: [%s] 0x%02x != 0x%02x", __func__, 3243 1.264 maxv pp->pr_wchan, *cp, expected); 3244 1.204 maxv } 3245 1.204 maxv cp++; 3246 1.204 maxv 3247 1.204 maxv while (cp < ep) { 3248 1.204 maxv expected = pool_pattern_generate(cp); 3249 1.225 maxv if (__predict_false(*cp != expected)) { 3250 1.264 maxv panic("%s: [%s] 0x%02x != 0x%02x", __func__, 3251 1.264 maxv pp->pr_wchan, *cp, expected); 3252 1.204 maxv } 3253 1.204 maxv cp++; 3254 1.204 maxv } 3255 1.224 maxv #endif 3256 1.204 maxv } 3257 1.204 maxv 3258 1.229 maxv static void 3259 1.229 maxv pool_cache_redzone_check(pool_cache_t pc, void *p) 3260 1.229 maxv { 3261 1.229 maxv #ifdef KASAN 3262 1.279 thorpej /* 3263 1.279 thorpej * If there is a ctor/dtor, or if the cache objects use 3264 1.279 thorpej * passive serialization, leave the data as valid. 3265 1.279 thorpej */ 3266 1.279 thorpej if (__predict_false(pc_has_ctor(pc) || pc_has_dtor(pc) || 3267 1.279 thorpej pc_has_pser(pc))) { 3268 1.229 maxv return; 3269 1.229 maxv } 3270 1.229 maxv #endif 3271 1.229 maxv pool_redzone_check(&pc->pc_pool, p); 3272 1.229 maxv } 3273 1.229 maxv 3274 1.204 maxv #endif /* POOL_REDZONE */ 3275 1.204 maxv 3276 1.141 yamt #if defined(DDB) 3277 1.141 yamt static bool 3278 1.141 yamt pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 3279 1.141 yamt { 3280 1.141 yamt 3281 1.141 yamt return (uintptr_t)ph->ph_page <= addr && 3282 1.141 yamt addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz; 3283 1.141 yamt } 3284 1.141 yamt 3285 1.143 yamt static bool 3286 1.143 yamt pool_in_item(struct pool *pp, void *item, uintptr_t addr) 3287 1.143 yamt { 3288 1.143 yamt 3289 1.143 yamt return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size; 3290 1.143 yamt } 3291 1.143 yamt 3292 1.143 yamt static bool 3293 1.143 yamt pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr) 3294 1.143 yamt { 3295 1.143 yamt int i; 3296 1.143 yamt 3297 1.143 yamt if (pcg == NULL) { 3298 1.143 yamt return false; 3299 1.143 yamt } 3300 1.144 yamt for (i = 0; i < pcg->pcg_avail; i++) { 3301 1.143 yamt if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) { 3302 1.143 yamt return true; 3303 1.143 yamt } 3304 1.143 yamt } 3305 1.143 yamt return false; 3306 1.143 yamt } 3307 1.143 yamt 3308 1.143 yamt static bool 3309 1.143 yamt pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr) 3310 1.143 yamt { 3311 1.143 yamt 3312 1.242 maxv if ((pp->pr_roflags & PR_USEBMAP) != 0) { 3313 1.234 maxv unsigned int idx = pr_item_bitmap_index(pp, ph, (void *)addr); 3314 1.143 yamt pool_item_bitmap_t *bitmap = 3315 1.143 yamt ph->ph_bitmap + (idx / BITMAP_SIZE); 3316 1.286 skrll pool_item_bitmap_t mask = 1U << (idx & BITMAP_MASK); 3317 1.143 yamt 3318 1.143 yamt return (*bitmap & mask) == 0; 3319 1.143 yamt } else { 3320 1.143 yamt struct pool_item *pi; 3321 1.143 yamt 3322 1.143 yamt LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) { 3323 1.143 yamt if (pool_in_item(pp, pi, addr)) { 3324 1.143 yamt return false; 3325 1.143 yamt } 3326 1.143 yamt } 3327 1.143 yamt return true; 3328 1.143 yamt } 3329 1.143 yamt } 3330 1.143 yamt 3331 1.141 yamt void 3332 1.141 yamt pool_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 3333 1.141 yamt { 3334 1.141 yamt struct pool *pp; 3335 1.141 yamt 3336 1.145 ad TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 3337 1.141 yamt struct pool_item_header *ph; 3338 1.281 riastrad struct pool_cache *pc; 3339 1.141 yamt uintptr_t item; 3340 1.143 yamt bool allocated = true; 3341 1.143 yamt bool incache = false; 3342 1.143 yamt bool incpucache = false; 3343 1.143 yamt char cpucachestr[32]; 3344 1.141 yamt 3345 1.141 yamt if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 3346 1.141 yamt LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 3347 1.141 yamt if (pool_in_page(pp, ph, addr)) { 3348 1.141 yamt goto found; 3349 1.141 yamt } 3350 1.141 yamt } 3351 1.141 yamt LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 3352 1.141 yamt if (pool_in_page(pp, ph, addr)) { 3353 1.143 yamt allocated = 3354 1.143 yamt pool_allocated(pp, ph, addr); 3355 1.143 yamt goto found; 3356 1.143 yamt } 3357 1.143 yamt } 3358 1.143 yamt LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) { 3359 1.143 yamt if (pool_in_page(pp, ph, addr)) { 3360 1.143 yamt allocated = false; 3361 1.141 yamt goto found; 3362 1.141 yamt } 3363 1.141 yamt } 3364 1.141 yamt continue; 3365 1.141 yamt } else { 3366 1.141 yamt ph = pr_find_pagehead_noalign(pp, (void *)addr); 3367 1.141 yamt if (ph == NULL || !pool_in_page(pp, ph, addr)) { 3368 1.141 yamt continue; 3369 1.141 yamt } 3370 1.143 yamt allocated = pool_allocated(pp, ph, addr); 3371 1.141 yamt } 3372 1.141 yamt found: 3373 1.281 riastrad if (allocated && 3374 1.281 riastrad (pc = atomic_load_consume(&pp->pr_cache)) != NULL) { 3375 1.143 yamt struct pool_cache_group *pcg; 3376 1.143 yamt int i; 3377 1.143 yamt 3378 1.143 yamt for (pcg = pc->pc_fullgroups; pcg != NULL; 3379 1.143 yamt pcg = pcg->pcg_next) { 3380 1.143 yamt if (pool_in_cg(pp, pcg, addr)) { 3381 1.143 yamt incache = true; 3382 1.143 yamt goto print; 3383 1.143 yamt } 3384 1.143 yamt } 3385 1.183 ad for (i = 0; i < __arraycount(pc->pc_cpus); i++) { 3386 1.143 yamt pool_cache_cpu_t *cc; 3387 1.143 yamt 3388 1.143 yamt if ((cc = pc->pc_cpus[i]) == NULL) { 3389 1.143 yamt continue; 3390 1.143 yamt } 3391 1.143 yamt if (pool_in_cg(pp, cc->cc_current, addr) || 3392 1.143 yamt pool_in_cg(pp, cc->cc_previous, addr)) { 3393 1.143 yamt struct cpu_info *ci = 3394 1.170 ad cpu_lookup(i); 3395 1.143 yamt 3396 1.143 yamt incpucache = true; 3397 1.143 yamt snprintf(cpucachestr, 3398 1.143 yamt sizeof(cpucachestr), 3399 1.143 yamt "cached by CPU %u", 3400 1.153 martin ci->ci_index); 3401 1.143 yamt goto print; 3402 1.143 yamt } 3403 1.143 yamt } 3404 1.143 yamt } 3405 1.143 yamt print: 3406 1.141 yamt item = (uintptr_t)ph->ph_page + ph->ph_off; 3407 1.141 yamt item = item + rounddown(addr - item, pp->pr_size); 3408 1.143 yamt (*pr)("%p is %p+%zu in POOL '%s' (%s)\n", 3409 1.141 yamt (void *)addr, item, (size_t)(addr - item), 3410 1.143 yamt pp->pr_wchan, 3411 1.143 yamt incpucache ? cpucachestr : 3412 1.143 yamt incache ? "cached" : allocated ? "allocated" : "free"); 3413 1.141 yamt } 3414 1.141 yamt } 3415 1.141 yamt #endif /* defined(DDB) */ 3416 1.203 joerg 3417 1.203 joerg static int 3418 1.203 joerg pool_sysctl(SYSCTLFN_ARGS) 3419 1.203 joerg { 3420 1.203 joerg struct pool_sysctl data; 3421 1.203 joerg struct pool *pp; 3422 1.203 joerg struct pool_cache *pc; 3423 1.203 joerg pool_cache_cpu_t *cc; 3424 1.203 joerg int error; 3425 1.203 joerg size_t i, written; 3426 1.203 joerg 3427 1.203 joerg if (oldp == NULL) { 3428 1.203 joerg *oldlenp = 0; 3429 1.203 joerg TAILQ_FOREACH(pp, &pool_head, pr_poollist) 3430 1.203 joerg *oldlenp += sizeof(data); 3431 1.203 joerg return 0; 3432 1.203 joerg } 3433 1.203 joerg 3434 1.203 joerg memset(&data, 0, sizeof(data)); 3435 1.203 joerg error = 0; 3436 1.203 joerg written = 0; 3437 1.281 riastrad mutex_enter(&pool_head_lock); 3438 1.203 joerg TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 3439 1.203 joerg if (written + sizeof(data) > *oldlenp) 3440 1.203 joerg break; 3441 1.281 riastrad pp->pr_refcnt++; 3442 1.203 joerg strlcpy(data.pr_wchan, pp->pr_wchan, sizeof(data.pr_wchan)); 3443 1.203 joerg data.pr_pagesize = pp->pr_alloc->pa_pagesz; 3444 1.203 joerg data.pr_flags = pp->pr_roflags | pp->pr_flags; 3445 1.203 joerg #define COPY(field) data.field = pp->field 3446 1.203 joerg COPY(pr_size); 3447 1.203 joerg 3448 1.203 joerg COPY(pr_itemsperpage); 3449 1.203 joerg COPY(pr_nitems); 3450 1.203 joerg COPY(pr_nout); 3451 1.203 joerg COPY(pr_hardlimit); 3452 1.203 joerg COPY(pr_npages); 3453 1.203 joerg COPY(pr_minpages); 3454 1.203 joerg COPY(pr_maxpages); 3455 1.203 joerg 3456 1.203 joerg COPY(pr_nget); 3457 1.203 joerg COPY(pr_nfail); 3458 1.203 joerg COPY(pr_nput); 3459 1.203 joerg COPY(pr_npagealloc); 3460 1.203 joerg COPY(pr_npagefree); 3461 1.203 joerg COPY(pr_hiwat); 3462 1.203 joerg COPY(pr_nidle); 3463 1.203 joerg #undef COPY 3464 1.203 joerg 3465 1.203 joerg data.pr_cache_nmiss_pcpu = 0; 3466 1.203 joerg data.pr_cache_nhit_pcpu = 0; 3467 1.271 ad data.pr_cache_nmiss_global = 0; 3468 1.271 ad data.pr_cache_nempty = 0; 3469 1.271 ad data.pr_cache_ncontended = 0; 3470 1.271 ad data.pr_cache_npartial = 0; 3471 1.281 riastrad if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) { 3472 1.271 ad uint32_t nfull = 0; 3473 1.203 joerg data.pr_cache_meta_size = pc->pc_pcgsize; 3474 1.203 joerg for (i = 0; i < pc->pc_ncpu; ++i) { 3475 1.203 joerg cc = pc->pc_cpus[i]; 3476 1.203 joerg if (cc == NULL) 3477 1.203 joerg continue; 3478 1.271 ad data.pr_cache_ncontended += cc->cc_contended; 3479 1.206 knakahar data.pr_cache_nmiss_pcpu += cc->cc_misses; 3480 1.206 knakahar data.pr_cache_nhit_pcpu += cc->cc_hits; 3481 1.271 ad data.pr_cache_nmiss_global += cc->cc_pcmisses; 3482 1.271 ad nfull += cc->cc_nfull; /* 32-bit rollover! */ 3483 1.272 ad data.pr_cache_npartial += cc->cc_npart; 3484 1.203 joerg } 3485 1.271 ad data.pr_cache_nfull = nfull; 3486 1.203 joerg } else { 3487 1.203 joerg data.pr_cache_meta_size = 0; 3488 1.203 joerg data.pr_cache_nfull = 0; 3489 1.203 joerg } 3490 1.271 ad data.pr_cache_nhit_global = data.pr_cache_nmiss_pcpu - 3491 1.271 ad data.pr_cache_nmiss_global; 3492 1.203 joerg 3493 1.281 riastrad if (pp->pr_refcnt == UINT_MAX) /* XXX possible? */ 3494 1.281 riastrad continue; 3495 1.281 riastrad mutex_exit(&pool_head_lock); 3496 1.203 joerg error = sysctl_copyout(l, &data, oldp, sizeof(data)); 3497 1.281 riastrad mutex_enter(&pool_head_lock); 3498 1.281 riastrad if (--pp->pr_refcnt == 0) 3499 1.281 riastrad cv_broadcast(&pool_busy); 3500 1.203 joerg if (error) 3501 1.203 joerg break; 3502 1.203 joerg written += sizeof(data); 3503 1.203 joerg oldp = (char *)oldp + sizeof(data); 3504 1.203 joerg } 3505 1.281 riastrad mutex_exit(&pool_head_lock); 3506 1.203 joerg 3507 1.203 joerg *oldlenp = written; 3508 1.203 joerg return error; 3509 1.203 joerg } 3510 1.203 joerg 3511 1.203 joerg SYSCTL_SETUP(sysctl_pool_setup, "sysctl kern.pool setup") 3512 1.203 joerg { 3513 1.203 joerg const struct sysctlnode *rnode = NULL; 3514 1.203 joerg 3515 1.203 joerg sysctl_createv(clog, 0, NULL, &rnode, 3516 1.203 joerg CTLFLAG_PERMANENT, 3517 1.203 joerg CTLTYPE_STRUCT, "pool", 3518 1.203 joerg SYSCTL_DESCR("Get pool statistics"), 3519 1.203 joerg pool_sysctl, 0, NULL, 0, 3520 1.203 joerg CTL_KERN, CTL_CREATE, CTL_EOL); 3521 1.203 joerg } 3522