Home | History | Annotate | Line # | Download | only in sys
      1 /*	$NetBSD: pool.h,v 1.96 2021/12/22 16:57:28 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997, 1998, 1999, 2000, 2007, 2020
      5  *     The NetBSD Foundation, Inc.
      6  * All rights reserved.
      7  *
      8  * This code is derived from software contributed to The NetBSD Foundation
      9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
     10  * Simulation Facility, NASA Ames Research Center.
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     31  * POSSIBILITY OF SUCH DAMAGE.
     32  */
     33 
     34 #ifndef _SYS_POOL_H_
     35 #define _SYS_POOL_H_
     36 
     37 #include <sys/stdbool.h>
     38 #include <sys/stdint.h>
     39 
     40 struct pool_sysctl {
     41 	char pr_wchan[16];
     42 	uint64_t pr_flags;
     43 	uint64_t pr_size;
     44 	uint64_t pr_pagesize;
     45 	uint64_t pr_itemsperpage;
     46 	uint64_t pr_nitems;
     47 	uint64_t pr_nout;
     48 	uint64_t pr_hardlimit;
     49 	uint64_t pr_npages;
     50 	uint64_t pr_minpages;
     51 	uint64_t pr_maxpages;
     52 
     53 	uint64_t pr_nget;
     54 	uint64_t pr_nfail;
     55 	uint64_t pr_nput;
     56 	uint64_t pr_npagealloc;
     57 	uint64_t pr_npagefree;
     58 	uint64_t pr_hiwat;
     59 	uint64_t pr_nidle;
     60 
     61 	uint64_t pr_cache_meta_size;
     62 	uint64_t pr_cache_nfull;
     63 	uint64_t pr_cache_npartial;
     64 	uint64_t pr_cache_nempty;
     65 	uint64_t pr_cache_ncontended;
     66 	uint64_t pr_cache_nmiss_global;
     67 	uint64_t pr_cache_nhit_global;
     68 	uint64_t pr_cache_nmiss_pcpu;
     69 	uint64_t pr_cache_nhit_pcpu;
     70 };
     71 
     72 #ifdef _KERNEL
     73 #define __POOL_EXPOSE
     74 #endif
     75 
     76 #ifdef __POOL_EXPOSE
     77 #include <sys/param.h>
     78 #include <sys/mutex.h>
     79 #include <sys/condvar.h>
     80 #include <sys/queue.h>
     81 #include <sys/time.h>
     82 #include <sys/tree.h>
     83 #include <sys/callback.h>
     84 
     85 #ifdef _KERNEL_OPT
     86 #include "opt_pool.h"
     87 #endif
     88 
     89 #define POOL_PADDR_INVALID	((paddr_t) -1)
     90 
     91 struct pool;
     92 
     93 struct pool_allocator {
     94 	void		*(*pa_alloc)(struct pool *, int);
     95 	void		(*pa_free)(struct pool *, void *);
     96 	unsigned int	pa_pagesz;
     97 
     98 	/* The following fields are for internal use only. */
     99 	kmutex_t	pa_lock;
    100 	TAILQ_HEAD(, pool) pa_list;	/* list of pools using this allocator */
    101 	uint32_t	pa_refcnt;	/* number of pools using this allocator */
    102 	int		pa_pagemask;
    103 	int		pa_pageshift;
    104 };
    105 
    106 LIST_HEAD(pool_pagelist,pool_item_header);
    107 SPLAY_HEAD(phtree, pool_item_header);
    108 
    109 #define POOL_QUARANTINE_DEPTH	128
    110 typedef struct {
    111 	size_t rotor;
    112 	intptr_t list[POOL_QUARANTINE_DEPTH];
    113 } pool_quar_t;
    114 
    115 struct pool {
    116 	TAILQ_ENTRY(pool)
    117 			pr_poollist;
    118 	struct pool_pagelist
    119 			pr_emptypages;	/* Empty pages */
    120 	struct pool_pagelist
    121 			pr_fullpages;	/* Full pages */
    122 	struct pool_pagelist
    123 			pr_partpages;	/* Partially-allocated pages */
    124 	struct pool_item_header	*pr_curpage;
    125 	struct pool	*pr_phpool;	/* Pool item header pool */
    126 	struct pool_cache *pr_cache;	/* Cache for this pool */
    127 	unsigned int	pr_size;	/* Size of item */
    128 	unsigned int	pr_align;	/* Requested alignment, must be 2^n */
    129 	unsigned int	pr_itemoffset;	/* offset of the item space */
    130 	unsigned int	pr_minitems;	/* minimum # of free items to keep */
    131 	unsigned int	pr_maxitems;	/* maximum # of free items to keep */
    132 	unsigned int	pr_minpages;	/* minimum # of pages to keep */
    133 	unsigned int	pr_maxpages;	/* maximum # of pages to keep */
    134 	unsigned int	pr_npages;	/* # of pages allocated */
    135 	unsigned int	pr_itemsperpage;/* # items that fit in a page */
    136 	unsigned int	pr_poolid;	/* id of the pool */
    137 	unsigned int	pr_nitems;	/* number of free items in pool */
    138 	unsigned int	pr_nout;	/* # items currently allocated */
    139 	unsigned int	pr_hardlimit;	/* hard limit to number of allocated
    140 					   items */
    141 	unsigned int	pr_refcnt;	/* ref count for pagedaemon, etc */
    142 	struct pool_allocator *pr_alloc;/* back-end allocator */
    143 	TAILQ_ENTRY(pool) pr_alloc_list;/* link on allocator's pool list */
    144 
    145 	/* Drain hook. */
    146 	void		(*pr_drain_hook)(void *, int);
    147 	void		*pr_drain_hook_arg;
    148 
    149 	const char	*pr_wchan;	/* tsleep(9) identifier */
    150 	unsigned int	pr_flags;	/* r/w flags */
    151 	unsigned int	pr_roflags;	/* r/o flags */
    152 #define PR_WAITOK	0x01	/* Note: matches KM_SLEEP */
    153 #define PR_NOWAIT	0x02	/* Note: matches KM_NOSLEEP */
    154 #define PR_WANTED	0x04	/* waiting for free objects */
    155 #define PR_PHINPAGE	0x40	/* page header in page */
    156 #define PR_LIMITFAIL	0x100	/* even if waiting, fail if we hit limit */
    157 #define PR_RECURSIVE	0x200	/* pool contains pools, for vmstat(8) */
    158 #define PR_NOTOUCH	0x400	/* don't use free items to keep internal state*/
    159 #define PR_NOALIGN	0x800	/* don't assume backend alignment */
    160 #define PR_LARGECACHE	0x1000	/* use large cache groups */
    161 #define PR_GROWING	0x2000	/* pool_grow in progress */
    162 #define PR_GROWINGNOWAIT 0x4000	/* pool_grow in progress by PR_NOWAIT alloc */
    163 #define PR_ZERO		0x8000	/* zero data before returning */
    164 #define PR_USEBMAP	0x10000	/* use a bitmap to manage freed items */
    165 #define PR_PSERIALIZE	0x20000	/* needs pserialize sync point before free */
    166 
    167 	/*
    168 	 * `pr_lock' protects the pool's data structures when removing
    169 	 * items from or returning items to the pool, or when reading
    170 	 * or updating read/write fields in the pool descriptor.
    171 	 *
    172 	 * We assume back-end page allocators provide their own locking
    173 	 * scheme.  They will be called with the pool descriptor _unlocked_,
    174 	 * since the page allocators may block.
    175 	 */
    176 	kmutex_t	pr_lock;
    177 	kcondvar_t	pr_cv;
    178 	int		pr_ipl;
    179 
    180 	struct phtree	pr_phtree;
    181 
    182 	int		pr_maxcolor;	/* Cache colouring */
    183 	int		pr_curcolor;
    184 	int		pr_phoffset;	/* unused */
    185 
    186 	/*
    187 	 * Warning message to be issued, and a per-time-delta rate cap,
    188 	 * if the hard limit is reached.
    189 	 */
    190 	const char	*pr_hardlimit_warning;
    191 	struct timeval	pr_hardlimit_ratecap;
    192 	struct timeval	pr_hardlimit_warning_last;
    193 
    194 	/*
    195 	 * Instrumentation
    196 	 */
    197 	unsigned long	pr_nget;	/* # of successful requests */
    198 	unsigned long	pr_nfail;	/* # of unsuccessful requests */
    199 	unsigned long	pr_nput;	/* # of releases */
    200 	unsigned long	pr_npagealloc;	/* # of pages allocated */
    201 	unsigned long	pr_npagefree;	/* # of pages released */
    202 	unsigned int	pr_hiwat;	/* max # of pages in pool */
    203 	unsigned long	pr_nidle;	/* # of idle pages */
    204 
    205 	/*
    206 	 * Diagnostic aides.
    207 	 */
    208 	void		*pr_freecheck;
    209 	void		*pr_qcache;
    210 	bool		pr_redzone;
    211 	size_t		pr_reqsize;
    212 	size_t		pr_reqsize_with_redzone;
    213 #ifdef POOL_QUARANTINE
    214 	pool_quar_t	pr_quar;
    215 #endif
    216 };
    217 
    218 /*
    219  * Cache group sizes, assuming 4-byte paddr_t on !_LP64.
    220  * All groups will be aligned to COHERENCY_UNIT.
    221  */
    222 #ifdef _LP64
    223 #define PCG_NOBJECTS_NORMAL	15	/* 256 byte group */
    224 #define PCG_NOBJECTS_LARGE	63	/* 1024 byte group */
    225 #else
    226 #define PCG_NOBJECTS_NORMAL	14	/* 124 byte group */
    227 #define PCG_NOBJECTS_LARGE	62	/* 508 byte group */
    228 #endif
    229 
    230 typedef struct pcgpair {
    231 	void	*pcgo_va;		/* object virtual address */
    232 	paddr_t	pcgo_pa;		/* object physical address */
    233 } pcgpair_t;
    234 
    235 /* The pool cache group. */
    236 typedef struct pool_cache_group {
    237 	struct pool_cache_group	*pcg_next;	/* link to next group */
    238 	u_int			pcg_avail;	/* # available objects */
    239 	u_int			pcg_size;	/* max number objects */
    240 	pcgpair_t 		pcg_objects[1];	/* the objects */
    241 } pcg_t;
    242 
    243 /* Pool cache CPU.  Sized to 64 bytes on _LP64. */
    244 typedef struct pool_cache_cpu {
    245 	struct pool_cache_group	*cc_current;
    246 	struct pool_cache_group	*cc_previous;
    247 	pcg_t *volatile 	*cc_pcgcache;
    248 	uint64_t		cc_misses;
    249 	uint64_t		cc_hits;
    250 	uint64_t		cc_pcmisses;
    251 	uint64_t		cc_contended;
    252 	uint32_t		cc_nfull;
    253 	uint32_t		cc_npart;
    254 } pool_cache_cpu_t;
    255 
    256 struct pool_cache {
    257 	/* Pool layer. */
    258 	struct pool	pc_pool;
    259 
    260 	/* Cache layer. */
    261 	TAILQ_ENTRY(pool_cache)
    262 			pc_cachelist;	/* entry on global cache list */
    263 	struct pool	*pc_pcgpool;	/* Pool of cache groups */
    264 	pcg_t *volatile *pc_pcgcache;	/* list of empty cache groups */
    265 	int		pc_pcgsize;	/* Use large cache groups? */
    266 	int		pc_ncpu;	/* number cpus set up */
    267 	int		(*pc_ctor)(void *, void *, int);
    268 	void		(*pc_dtor)(void *, void *);
    269 	void		*pc_arg;	/* for ctor/dtor */
    270 	unsigned int	pc_refcnt;	/* ref count for pagedaemon, etc */
    271 	unsigned int	pc_roflags;	/* r/o cache flags */
    272 	void		*pc_cpus[MAXCPUS];
    273 
    274 	/* Diagnostic aides. */
    275 	void		*pc_freecheck;
    276 	bool		pc_redzone;
    277 	size_t		pc_reqsize;
    278 
    279 	/* Hot items. */
    280 	pcg_t *volatile pc_fullgroups	/* list of full cache groups */
    281 	    __aligned(CACHE_LINE_SIZE);
    282 	pcg_t *volatile pc_partgroups;	/* groups for reclamation */
    283 
    284 	/* Boot cpu. */
    285 	pool_cache_cpu_t pc_cpu0 __aligned(CACHE_LINE_SIZE);
    286 };
    287 
    288 #endif /* __POOL_EXPOSE */
    289 
    290 typedef struct pool_cache *pool_cache_t;
    291 
    292 #ifdef _KERNEL
    293 /*
    294  * pool_allocator_kmem is the default that all pools get unless
    295  * otherwise specified.  pool_allocator_nointr is provided for
    296  * pools that know they will never be accessed in interrupt
    297  * context.
    298  */
    299 extern struct pool_allocator pool_allocator_kmem;
    300 extern struct pool_allocator pool_allocator_nointr;
    301 extern struct pool_allocator pool_allocator_meta;
    302 
    303 void		pool_subsystem_init(void);
    304 
    305 void		pool_init(struct pool *, size_t, u_int, u_int,
    306 		    int, const char *, struct pool_allocator *, int);
    307 void		pool_destroy(struct pool *);
    308 
    309 void		pool_set_drain_hook(struct pool *,
    310 		    void (*)(void *, int), void *);
    311 
    312 void		*pool_get(struct pool *, int);
    313 void		pool_put(struct pool *, void *);
    314 int		pool_reclaim(struct pool *);
    315 
    316 void		pool_prime(struct pool *, int);
    317 void		pool_setlowat(struct pool *, int);
    318 void		pool_sethiwat(struct pool *, int);
    319 void		pool_sethardlimit(struct pool *, int, const char *, int);
    320 bool		pool_drain(struct pool **);
    321 int		pool_totalpages(void);
    322 int		pool_totalpages_locked(void);
    323 
    324 unsigned int	pool_nget(struct pool *);
    325 unsigned int	pool_nput(struct pool *);
    326 
    327 /*
    328  * Debugging and diagnostic aides.
    329  */
    330 void		pool_printit(struct pool *, const char *,
    331     void (*)(const char *, ...) __printflike(1, 2));
    332 void		pool_printall(const char *, void (*)(const char *, ...)
    333     __printflike(1, 2));
    334 int		pool_chk(struct pool *, const char *);
    335 
    336 /*
    337  * Pool cache routines.
    338  */
    339 pool_cache_t	pool_cache_init(size_t, u_int, u_int, u_int, const char *,
    340 		    struct pool_allocator *, int, int (*)(void *, void *, int),
    341 		    void (*)(void *, void *), void *);
    342 void		pool_cache_bootstrap(pool_cache_t, size_t, u_int, u_int, u_int,
    343 		    const char *, struct pool_allocator *, int,
    344 		    int (*)(void *, void *, int), void (*)(void *, void *),
    345 		    void *);
    346 void		pool_cache_destroy(pool_cache_t);
    347 void		pool_cache_bootstrap_destroy(pool_cache_t);
    348 void		*pool_cache_get_paddr(pool_cache_t, int, paddr_t *);
    349 void		pool_cache_put_paddr(pool_cache_t, void *, paddr_t);
    350 void		pool_cache_destruct_object(pool_cache_t, void *);
    351 void		pool_cache_invalidate(pool_cache_t);
    352 bool		pool_cache_reclaim(pool_cache_t);
    353 void		pool_cache_set_drain_hook(pool_cache_t,
    354 		    void (*)(void *, int), void *);
    355 void		pool_cache_setlowat(pool_cache_t, int);
    356 void		pool_cache_sethiwat(pool_cache_t, int);
    357 void		pool_cache_sethardlimit(pool_cache_t, int, const char *, int);
    358 void		pool_cache_prime(pool_cache_t, int);
    359 void		pool_cache_cpu_init(struct cpu_info *);
    360 
    361 unsigned int	pool_cache_nget(pool_cache_t);
    362 unsigned int	pool_cache_nput(pool_cache_t);
    363 
    364 #define		pool_cache_get(pc, f) pool_cache_get_paddr((pc), (f), NULL)
    365 #define		pool_cache_put(pc, o) pool_cache_put_paddr((pc), (o), \
    366 				          POOL_PADDR_INVALID)
    367 
    368 void		pool_whatis(uintptr_t, void (*)(const char *, ...)
    369     __printflike(1, 2));
    370 #endif /* _KERNEL */
    371 
    372 #endif /* _SYS_POOL_H_ */
    373