Home | History | Annotate | Line # | Download | only in kern
subr_kmem.c revision 1.38
      1 /*	$NetBSD: subr_kmem.c,v 1.38 2011/11/20 22:58:31 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c)2006 YAMAMOTO Takashi,
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     55  * SUCH DAMAGE.
     56  */
     57 
     58 /*
     59  * allocator of kernel wired memory.
     60  *
     61  * TODO:
     62  * -	worth to have "intrsafe" version?  maybe..
     63  */
     64 
     65 #include <sys/cdefs.h>
     66 __KERNEL_RCSID(0, "$NetBSD: subr_kmem.c,v 1.38 2011/11/20 22:58:31 christos Exp $");
     67 
     68 #include <sys/param.h>
     69 #include <sys/callback.h>
     70 #include <sys/kmem.h>
     71 #include <sys/vmem.h>
     72 #include <sys/debug.h>
     73 #include <sys/lockdebug.h>
     74 #include <sys/cpu.h>
     75 
     76 #include <uvm/uvm_extern.h>
     77 #include <uvm/uvm_map.h>
     78 #include <uvm/uvm_kmguard.h>
     79 
     80 #include <lib/libkern/libkern.h>
     81 
     82 #define	KMEM_QUANTUM_SIZE	(ALIGNBYTES + 1)
     83 #define	KMEM_QCACHE_MAX		(KMEM_QUANTUM_SIZE * 32)
     84 #define	KMEM_CACHE_COUNT	16
     85 
     86 typedef struct kmem_cache {
     87 	pool_cache_t		kc_cache;
     88 	struct pool_allocator	kc_pa;
     89 	char			kc_name[12];
     90 } kmem_cache_t;
     91 
     92 static vmem_t *kmem_arena;
     93 static struct callback_entry kmem_kva_reclaim_entry;
     94 
     95 static kmem_cache_t kmem_cache[KMEM_CACHE_COUNT + 1];
     96 static size_t kmem_cache_max;
     97 static size_t kmem_cache_min;
     98 static size_t kmem_cache_mask;
     99 static int kmem_cache_shift;
    100 
    101 #if defined(DEBUG)
    102 int kmem_guard_depth = 0;
    103 size_t kmem_guard_size;
    104 static struct uvm_kmguard kmem_guard;
    105 static void *kmem_freecheck;
    106 #define	KMEM_POISON
    107 #define	KMEM_REDZONE
    108 #define	KMEM_SIZE
    109 #define	KMEM_GUARD
    110 #endif /* defined(DEBUG) */
    111 
    112 #if defined(KMEM_POISON)
    113 static void kmem_poison_fill(void *, size_t);
    114 static void kmem_poison_check(void *, size_t);
    115 #else /* defined(KMEM_POISON) */
    116 #define	kmem_poison_fill(p, sz)		/* nothing */
    117 #define	kmem_poison_check(p, sz)	/* nothing */
    118 #endif /* defined(KMEM_POISON) */
    119 
    120 #if defined(KMEM_REDZONE)
    121 #define	REDZONE_SIZE	1
    122 #else /* defined(KMEM_REDZONE) */
    123 #define	REDZONE_SIZE	0
    124 #endif /* defined(KMEM_REDZONE) */
    125 
    126 #if defined(KMEM_SIZE)
    127 #define	SIZE_SIZE	(max(KMEM_QUANTUM_SIZE, sizeof(size_t)))
    128 static void kmem_size_set(void *, size_t);
    129 static void kmem_size_check(const void *, size_t);
    130 #else
    131 #define	SIZE_SIZE	0
    132 #define	kmem_size_set(p, sz)	/* nothing */
    133 #define	kmem_size_check(p, sz)	/* nothing */
    134 #endif
    135 
    136 static int kmem_backend_alloc(void *, vmem_size_t, vmem_size_t *,
    137     vm_flag_t, vmem_addr_t *);
    138 static void kmem_backend_free(void *, vmem_addr_t, vmem_size_t);
    139 static int kmem_kva_reclaim_callback(struct callback_entry *, void *, void *);
    140 
    141 CTASSERT(KM_SLEEP == PR_WAITOK);
    142 CTASSERT(KM_NOSLEEP == PR_NOWAIT);
    143 
    144 static inline vm_flag_t
    145 kmf_to_vmf(km_flag_t kmflags)
    146 {
    147 	vm_flag_t vmflags;
    148 
    149 	KASSERT((kmflags & (KM_SLEEP|KM_NOSLEEP)) != 0);
    150 	KASSERT((~kmflags & (KM_SLEEP|KM_NOSLEEP)) != 0);
    151 
    152 	vmflags = 0;
    153 	if ((kmflags & KM_SLEEP) != 0) {
    154 		vmflags |= VM_SLEEP;
    155 	}
    156 	if ((kmflags & KM_NOSLEEP) != 0) {
    157 		vmflags |= VM_NOSLEEP;
    158 	}
    159 
    160 	return vmflags;
    161 }
    162 
    163 static void *
    164 kmem_poolpage_alloc(struct pool *pool, int prflags)
    165 {
    166 	vmem_addr_t addr;
    167 	int rc;
    168 
    169 	rc = vmem_alloc(kmem_arena, pool->pr_alloc->pa_pagesz,
    170 	    kmf_to_vmf(prflags) | VM_INSTANTFIT, &addr);
    171 	return (rc == 0) ? (void *)addr : NULL;
    172 
    173 }
    174 
    175 static void
    176 kmem_poolpage_free(struct pool *pool, void *addr)
    177 {
    178 
    179 	vmem_free(kmem_arena, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
    180 }
    181 
    182 /* ---- kmem API */
    183 
    184 /*
    185  * kmem_alloc: allocate wired memory.
    186  *
    187  * => must not be called from interrupt context.
    188  */
    189 
    190 void *
    191 kmem_alloc(size_t size, km_flag_t kmflags)
    192 {
    193 	kmem_cache_t *kc;
    194 	uint8_t *p;
    195 
    196 	KASSERT(!cpu_intr_p());
    197 	KASSERT(!cpu_softintr_p());
    198 	KASSERT(size > 0);
    199 
    200 #ifdef KMEM_GUARD
    201 	if (size <= kmem_guard_size) {
    202 		return uvm_kmguard_alloc(&kmem_guard, size,
    203 		    (kmflags & KM_SLEEP) != 0);
    204 	}
    205 #endif
    206 
    207 	size += REDZONE_SIZE + SIZE_SIZE;
    208 	if (size >= kmem_cache_min && size <= kmem_cache_max) {
    209 		kc = &kmem_cache[(size + kmem_cache_mask) >> kmem_cache_shift];
    210 		KASSERT(size <= kc->kc_pa.pa_pagesz);
    211 		kmflags &= (KM_SLEEP | KM_NOSLEEP);
    212 		p = pool_cache_get(kc->kc_cache, kmflags);
    213 	} else {
    214 		vmem_addr_t addr;
    215 
    216 		if (vmem_alloc(kmem_arena, size,
    217 		    kmf_to_vmf(kmflags) | VM_INSTANTFIT, &addr) == 0)
    218 			p = (void *)addr;
    219 		else
    220 			p = NULL;
    221 	}
    222 	if (__predict_true(p != NULL)) {
    223 		kmem_poison_check(p, kmem_roundup_size(size));
    224 		FREECHECK_OUT(&kmem_freecheck, p);
    225 		kmem_size_set(p, size);
    226 		p = (uint8_t *)p + SIZE_SIZE;
    227 	}
    228 	return p;
    229 }
    230 
    231 /*
    232  * kmem_zalloc: allocate wired memory.
    233  *
    234  * => must not be called from interrupt context.
    235  */
    236 
    237 void *
    238 kmem_zalloc(size_t size, km_flag_t kmflags)
    239 {
    240 	void *p;
    241 
    242 	p = kmem_alloc(size, kmflags);
    243 	if (p != NULL) {
    244 		memset(p, 0, size);
    245 	}
    246 	return p;
    247 }
    248 
    249 /*
    250  * kmem_free: free wired memory allocated by kmem_alloc.
    251  *
    252  * => must not be called from interrupt context.
    253  */
    254 
    255 void
    256 kmem_free(void *p, size_t size)
    257 {
    258 	kmem_cache_t *kc;
    259 
    260 	KASSERT(!cpu_intr_p());
    261 	KASSERT(!cpu_softintr_p());
    262 	KASSERT(p != NULL);
    263 	KASSERT(size > 0);
    264 
    265 #ifdef KMEM_GUARD
    266 	if (size <= kmem_guard_size) {
    267 		uvm_kmguard_free(&kmem_guard, size, p);
    268 		return;
    269 	}
    270 #endif
    271 	size += SIZE_SIZE;
    272 	p = (uint8_t *)p - SIZE_SIZE;
    273 	kmem_size_check(p, size + REDZONE_SIZE);
    274 	FREECHECK_IN(&kmem_freecheck, p);
    275 	LOCKDEBUG_MEM_CHECK(p, size);
    276 	kmem_poison_check((char *)p + size,
    277 	    kmem_roundup_size(size + REDZONE_SIZE) - size);
    278 	kmem_poison_fill(p, size);
    279 	size += REDZONE_SIZE;
    280 	if (size >= kmem_cache_min && size <= kmem_cache_max) {
    281 		kc = &kmem_cache[(size + kmem_cache_mask) >> kmem_cache_shift];
    282 		KASSERT(size <= kc->kc_pa.pa_pagesz);
    283 		pool_cache_put(kc->kc_cache, p);
    284 	} else {
    285 		vmem_free(kmem_arena, (vmem_addr_t)p, size);
    286 	}
    287 }
    288 
    289 
    290 void
    291 kmem_init(void)
    292 {
    293 	kmem_cache_t *kc;
    294 	size_t sz;
    295 	int i;
    296 
    297 #ifdef KMEM_GUARD
    298 	uvm_kmguard_init(&kmem_guard, &kmem_guard_depth, &kmem_guard_size,
    299 	    kernel_map);
    300 #endif
    301 
    302 	kmem_arena = vmem_create("kmem", 0, 0, KMEM_QUANTUM_SIZE,
    303 	    kmem_backend_alloc, kmem_backend_free, NULL, KMEM_QCACHE_MAX,
    304 	    VM_SLEEP, IPL_NONE);
    305 	callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback,
    306 	    &kmem_kva_reclaim_entry, kmem_arena, kmem_kva_reclaim_callback);
    307 
    308 	/*
    309 	 * kmem caches start at twice the size of the largest vmem qcache
    310 	 * and end at PAGE_SIZE or earlier.  assert that KMEM_QCACHE_MAX
    311 	 * is a power of two.
    312 	 */
    313 	KASSERT(ffs(KMEM_QCACHE_MAX) != 0);
    314 	KASSERT(KMEM_QCACHE_MAX - (1 << (ffs(KMEM_QCACHE_MAX) - 1)) == 0);
    315 	kmem_cache_shift = ffs(KMEM_QCACHE_MAX);
    316 	kmem_cache_min = 1 << kmem_cache_shift;
    317 	kmem_cache_mask = kmem_cache_min - 1;
    318 	for (i = 1; i <= KMEM_CACHE_COUNT; i++) {
    319 		sz = i << kmem_cache_shift;
    320 		if (sz > PAGE_SIZE) {
    321 			break;
    322 		}
    323 		kmem_cache_max = sz;
    324 		kc = &kmem_cache[i];
    325 		kc->kc_pa.pa_pagesz = sz;
    326 		kc->kc_pa.pa_alloc = kmem_poolpage_alloc;
    327 		kc->kc_pa.pa_free = kmem_poolpage_free;
    328 		sprintf(kc->kc_name, "kmem-%zu", sz);
    329 		kc->kc_cache = pool_cache_init(sz,
    330 		    KMEM_QUANTUM_SIZE, 0, PR_NOALIGN | PR_NOTOUCH,
    331 		    kc->kc_name, &kc->kc_pa, IPL_NONE,
    332 		    NULL, NULL, NULL);
    333 		KASSERT(kc->kc_cache != NULL);
    334 	}
    335 }
    336 
    337 size_t
    338 kmem_roundup_size(size_t size)
    339 {
    340 
    341 	return vmem_roundup_size(kmem_arena, size);
    342 }
    343 
    344 /* ---- uvm glue */
    345 
    346 static int
    347 kmem_backend_alloc(void *dummy, vmem_size_t size, vmem_size_t *resultsize,
    348     vm_flag_t vmflags, vmem_addr_t *addrp)
    349 {
    350 	uvm_flag_t uflags;
    351 	vaddr_t va;
    352 
    353 	KASSERT(dummy == NULL);
    354 	KASSERT(size != 0);
    355 	KASSERT((vmflags & (VM_SLEEP|VM_NOSLEEP)) != 0);
    356 	KASSERT((~vmflags & (VM_SLEEP|VM_NOSLEEP)) != 0);
    357 
    358 	if ((vmflags & VM_NOSLEEP) != 0) {
    359 		uflags = UVM_KMF_TRYLOCK | UVM_KMF_NOWAIT;
    360 	} else {
    361 		uflags = UVM_KMF_WAITVA;
    362 	}
    363 	*resultsize = size = round_page(size);
    364 	va = uvm_km_alloc(kernel_map, size, 0,
    365 	    uflags | UVM_KMF_WIRED | UVM_KMF_CANFAIL);
    366 	if (va == 0)
    367 		return ENOMEM;
    368 	kmem_poison_fill((void *)va, size);
    369 	*addrp = (vmem_addr_t)va;
    370 	return 0;
    371 }
    372 
    373 static void
    374 kmem_backend_free(void *dummy, vmem_addr_t addr, vmem_size_t size)
    375 {
    376 
    377 	KASSERT(dummy == NULL);
    378 	KASSERT(addr != 0);
    379 	KASSERT(size != 0);
    380 	KASSERT(size == round_page(size));
    381 
    382 	kmem_poison_check((void *)addr, size);
    383 	uvm_km_free(kernel_map, (vaddr_t)addr, size, UVM_KMF_WIRED);
    384 }
    385 
    386 static int
    387 kmem_kva_reclaim_callback(struct callback_entry *ce, void *obj, void *arg)
    388 {
    389 	vmem_t *vm = obj;
    390 
    391 	vmem_reap(vm);
    392 	return CALLBACK_CHAIN_CONTINUE;
    393 }
    394 
    395 /* ---- debug */
    396 
    397 #if defined(KMEM_POISON)
    398 
    399 #if defined(_LP64)
    400 #define	PRIME	0x9e37fffffffc0001UL
    401 #else /* defined(_LP64) */
    402 #define	PRIME	0x9e3779b1
    403 #endif /* defined(_LP64) */
    404 
    405 static inline uint8_t
    406 kmem_poison_pattern(const void *p)
    407 {
    408 
    409 	return (uint8_t)((((uintptr_t)p) * PRIME)
    410 	    >> ((sizeof(uintptr_t) - sizeof(uint8_t))) * CHAR_BIT);
    411 }
    412 
    413 static void
    414 kmem_poison_fill(void *p, size_t sz)
    415 {
    416 	uint8_t *cp;
    417 	const uint8_t *ep;
    418 
    419 	cp = p;
    420 	ep = cp + sz;
    421 	while (cp < ep) {
    422 		*cp = kmem_poison_pattern(cp);
    423 		cp++;
    424 	}
    425 }
    426 
    427 static void
    428 kmem_poison_check(void *p, size_t sz)
    429 {
    430 	uint8_t *cp;
    431 	const uint8_t *ep;
    432 
    433 	cp = p;
    434 	ep = cp + sz;
    435 	while (cp < ep) {
    436 		const uint8_t expected = kmem_poison_pattern(cp);
    437 
    438 		if (*cp != expected) {
    439 			panic("%s: %p: 0x%02x != 0x%02x\n",
    440 			    __func__, cp, *cp, expected);
    441 		}
    442 		cp++;
    443 	}
    444 }
    445 
    446 #endif /* defined(KMEM_POISON) */
    447 
    448 #if defined(KMEM_SIZE)
    449 static void
    450 kmem_size_set(void *p, size_t sz)
    451 {
    452 
    453 	memcpy(p, &sz, sizeof(sz));
    454 }
    455 
    456 static void
    457 kmem_size_check(const void *p, size_t sz)
    458 {
    459 	size_t psz;
    460 
    461 	memcpy(&psz, p, sizeof(psz));
    462 	if (psz != sz) {
    463 		panic("kmem_free(%p, %zu) != allocated size %zu",
    464 		    (const uint8_t *)p + SIZE_SIZE, sz - SIZE_SIZE, psz);
    465 	}
    466 }
    467 #endif	/* defined(KMEM_SIZE) */
    468 
    469 /*
    470  * Used to dynamically allocate string with kmem accordingly to format.
    471  */
    472 char *
    473 kmem_asprintf(const char *fmt, ...)
    474 {
    475 	int size, len;
    476 	va_list va;
    477 	char *str;
    478 
    479 	va_start(va, fmt);
    480 	len = vsnprintf(NULL, 0, fmt, va);
    481 	va_end(va);
    482 
    483 	str = kmem_alloc(len + 1, KM_SLEEP);
    484 
    485 	va_start(va, fmt);
    486 	size = vsnprintf(str, len + 1, fmt, va);
    487 	va_end(va);
    488 
    489 	KASSERT(size == len);
    490 
    491 	return str;
    492 }
    493