subr_vmem.c revision 1.73 1 /* $NetBSD: subr_vmem.c,v 1.73 2012/03/04 14:28:49 para Exp $ */
2
3 /*-
4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.73 2012/03/04 14:28:49 para Exp $");
38
39 #if defined(_KERNEL)
40 #include "opt_ddb.h"
41 #define QCACHE
42 #endif /* defined(_KERNEL) */
43
44 #include <sys/param.h>
45 #include <sys/hash.h>
46 #include <sys/queue.h>
47 #include <sys/bitops.h>
48
49 #if defined(_KERNEL)
50 #include <sys/systm.h>
51 #include <sys/kernel.h> /* hz */
52 #include <sys/callout.h>
53 #include <sys/kmem.h>
54 #include <sys/pool.h>
55 #include <sys/vmem.h>
56 #include <sys/workqueue.h>
57 #include <sys/atomic.h>
58 #include <uvm/uvm.h>
59 #include <uvm/uvm_extern.h>
60 #include <uvm/uvm_km.h>
61 #include <uvm/uvm_page.h>
62 #include <uvm/uvm_pdaemon.h>
63 #else /* defined(_KERNEL) */
64 #include "../sys/vmem.h"
65 #endif /* defined(_KERNEL) */
66
67
68 #if defined(_KERNEL)
69 #include <sys/evcnt.h>
70 #define VMEM_EVCNT_DEFINE(name) \
71 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \
72 "vmemev", #name); \
73 EVCNT_ATTACH_STATIC(vmem_evcnt_##name);
74 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++
75 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count--
76
77 VMEM_EVCNT_DEFINE(bt_pages)
78 VMEM_EVCNT_DEFINE(bt_count)
79 VMEM_EVCNT_DEFINE(bt_inuse)
80
81 #define LOCK_DECL(name) \
82 kmutex_t name; char lockpad[COHERENCY_UNIT - sizeof(kmutex_t)]
83
84 #define CONDVAR_DECL(name) \
85 kcondvar_t name;
86
87 #else /* defined(_KERNEL) */
88 #include <stdio.h>
89 #include <errno.h>
90 #include <assert.h>
91 #include <stdlib.h>
92 #include <string.h>
93
94 #define VMEM_EVCNT_INCR(ev) /* nothing */
95 #define VMEM_EVCNT_DECR(ev) /* nothing */
96
97 #define UNITTEST
98 #define KASSERT(a) assert(a)
99 #define LOCK_DECL(name) /* nothing */
100 #define CONDVAR_DECL(name) /* nothing */
101 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */
102 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */
103 #define mutex_init(a, b, c) /* nothing */
104 #define mutex_destroy(a) /* nothing */
105 #define mutex_enter(a) /* nothing */
106 #define mutex_tryenter(a) true
107 #define mutex_exit(a) /* nothing */
108 #define mutex_owned(a) /* nothing */
109 #define ASSERT_SLEEPABLE() /* nothing */
110 #define panic(...) printf(__VA_ARGS__); abort()
111 #endif /* defined(_KERNEL) */
112
113 struct vmem;
114 struct vmem_btag;
115
116 #if defined(VMEM_SANITY)
117 static void vmem_check(vmem_t *);
118 #else /* defined(VMEM_SANITY) */
119 #define vmem_check(vm) /* nothing */
120 #endif /* defined(VMEM_SANITY) */
121
122 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
123
124 #define VMEM_HASHSIZE_MIN 1 /* XXX */
125 #define VMEM_HASHSIZE_MAX 65536 /* XXX */
126 #define VMEM_HASHSIZE_INIT 1
127
128 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
129
130 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
131 LIST_HEAD(vmem_freelist, vmem_btag);
132 LIST_HEAD(vmem_hashlist, vmem_btag);
133
134 #if defined(QCACHE)
135 #define VMEM_QCACHE_IDX_MAX 32
136
137 #define QC_NAME_MAX 16
138
139 struct qcache {
140 pool_cache_t qc_cache;
141 vmem_t *qc_vmem;
142 char qc_name[QC_NAME_MAX];
143 };
144 typedef struct qcache qcache_t;
145 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache))
146 #endif /* defined(QCACHE) */
147
148 #define VMEM_NAME_MAX 16
149
150 /* vmem arena */
151 struct vmem {
152 CONDVAR_DECL(vm_cv);
153 LOCK_DECL(vm_lock);
154 vm_flag_t vm_flags;
155 vmem_import_t *vm_importfn;
156 vmem_release_t *vm_releasefn;
157 size_t vm_nfreetags;
158 LIST_HEAD(, vmem_btag) vm_freetags;
159 void *vm_arg;
160 struct vmem_seglist vm_seglist;
161 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
162 size_t vm_hashsize;
163 size_t vm_nbusytag;
164 struct vmem_hashlist *vm_hashlist;
165 struct vmem_hashlist vm_hash0;
166 size_t vm_quantum_mask;
167 int vm_quantum_shift;
168 size_t vm_size;
169 size_t vm_inuse;
170 char vm_name[VMEM_NAME_MAX+1];
171 LIST_ENTRY(vmem) vm_alllist;
172
173 #if defined(QCACHE)
174 /* quantum cache */
175 size_t vm_qcache_max;
176 struct pool_allocator vm_qcache_allocator;
177 qcache_t vm_qcache_store[VMEM_QCACHE_IDX_MAX];
178 qcache_t *vm_qcache[VMEM_QCACHE_IDX_MAX];
179 #endif /* defined(QCACHE) */
180 };
181
182 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock)
183 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock)
184 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock)
185 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl)
186 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock)
187 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock))
188
189 #if defined(_KERNEL)
190 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan)
191 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv)
192 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock)
193 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv)
194 #endif /* defined(_KERNEL) */
195
196 /* boundary tag */
197 struct vmem_btag {
198 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
199 union {
200 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
201 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
202 } bt_u;
203 #define bt_hashlist bt_u.u_hashlist
204 #define bt_freelist bt_u.u_freelist
205 vmem_addr_t bt_start;
206 vmem_size_t bt_size;
207 int bt_type;
208 };
209
210 #define BT_TYPE_SPAN 1
211 #define BT_TYPE_SPAN_STATIC 2
212 #define BT_TYPE_FREE 3
213 #define BT_TYPE_BUSY 4
214 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
215
216 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1)
217
218 typedef struct vmem_btag bt_t;
219
220 #if defined(_KERNEL)
221 static kmutex_t vmem_list_lock;
222 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
223 #endif /* defined(_KERNEL) */
224
225 /* ---- misc */
226
227 #define VMEM_ALIGNUP(addr, align) \
228 (-(-(addr) & -(align)))
229
230 #define VMEM_CROSS_P(addr1, addr2, boundary) \
231 ((((addr1) ^ (addr2)) & -(boundary)) != 0)
232
233 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
234 #define SIZE2ORDER(size) ((int)ilog2(size))
235
236 #if !defined(_KERNEL)
237 #define xmalloc(sz, flags) malloc(sz)
238 #define xfree(p, sz) free(p)
239 #define bt_alloc(vm, flags) malloc(sizeof(bt_t))
240 #define bt_free(vm, bt) free(bt)
241 #else /* defined(_KERNEL) */
242
243 #define xmalloc(sz, flags) \
244 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
245 #define xfree(p, sz) kmem_free(p, sz);
246
247 #define BT_MINRESERVE 6
248 #define BT_MAXFREE 64
249 #define STATIC_VMEM_COUNT 5
250 #define STATIC_BT_COUNT 200
251 /* must be equal or greater then qcache multiplier for kmem_va_arena */
252 #define STATIC_QC_POOL_COUNT 8
253
254 static struct vmem static_vmems[STATIC_VMEM_COUNT];
255 static int static_vmem_count = STATIC_VMEM_COUNT;
256
257 static struct vmem_btag static_bts[STATIC_BT_COUNT];
258 static int static_bt_count = STATIC_BT_COUNT;
259
260 static struct pool_cache static_qc_pools[STATIC_QC_POOL_COUNT];
261 static int static_qc_pool_count = STATIC_QC_POOL_COUNT;
262
263 vmem_t *kmem_va_meta_arena;
264 vmem_t *kmem_meta_arena;
265
266 static kmutex_t vmem_btag_lock;
267 static LIST_HEAD(, vmem_btag) vmem_btag_freelist;
268 static size_t vmem_btag_freelist_count = 0;
269 static size_t vmem_btag_count = STATIC_BT_COUNT;
270
271 /* ---- boundary tag */
272
273 #define BT_PER_PAGE (PAGE_SIZE / sizeof(bt_t))
274
275 static int bt_refill(vmem_t *vm, vm_flag_t flags);
276
277 static int
278 bt_refillglobal(vm_flag_t flags)
279 {
280 vmem_addr_t va;
281 bt_t *btp;
282 bt_t *bt;
283 int i;
284
285 mutex_enter(&vmem_btag_lock);
286 if (vmem_btag_freelist_count > (BT_MINRESERVE * 16)) {
287 mutex_exit(&vmem_btag_lock);
288 return 0;
289 }
290
291 if (vmem_alloc(kmem_meta_arena, PAGE_SIZE,
292 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va) != 0) {
293 mutex_exit(&vmem_btag_lock);
294 return ENOMEM;
295 }
296 VMEM_EVCNT_INCR(bt_pages);
297
298 btp = (void *) va;
299 for (i = 0; i < (BT_PER_PAGE); i++) {
300 bt = btp;
301 memset(bt, 0, sizeof(*bt));
302 LIST_INSERT_HEAD(&vmem_btag_freelist, bt,
303 bt_freelist);
304 vmem_btag_freelist_count++;
305 vmem_btag_count++;
306 VMEM_EVCNT_INCR(bt_count);
307 btp++;
308 }
309 mutex_exit(&vmem_btag_lock);
310
311 bt_refill(kmem_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT);
312 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT);
313 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT);
314
315 return 0;
316 }
317
318 static int
319 bt_refill(vmem_t *vm, vm_flag_t flags)
320 {
321 bt_t *bt;
322
323 bt_refillglobal(flags);
324
325 VMEM_LOCK(vm);
326 mutex_enter(&vmem_btag_lock);
327 while (!LIST_EMPTY(&vmem_btag_freelist) &&
328 vm->vm_nfreetags < (BT_MINRESERVE * 2)) {
329 bt = LIST_FIRST(&vmem_btag_freelist);
330 LIST_REMOVE(bt, bt_freelist);
331 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
332 vm->vm_nfreetags++;
333 vmem_btag_freelist_count--;
334 }
335 mutex_exit(&vmem_btag_lock);
336
337 if (vm->vm_nfreetags == 0) {
338 VMEM_UNLOCK(vm);
339 return ENOMEM;
340 }
341 VMEM_UNLOCK(vm);
342
343 return 0;
344 }
345
346 static inline bt_t *
347 bt_alloc(vmem_t *vm, vm_flag_t flags)
348 {
349 bt_t *bt;
350 again:
351 VMEM_LOCK(vm);
352 if (vm->vm_nfreetags < BT_MINRESERVE &&
353 (flags & VM_POPULATING) == 0) {
354 VMEM_UNLOCK(vm);
355 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) {
356 return NULL;
357 }
358 goto again;
359 }
360 bt = LIST_FIRST(&vm->vm_freetags);
361 LIST_REMOVE(bt, bt_freelist);
362 vm->vm_nfreetags--;
363 VMEM_UNLOCK(vm);
364 VMEM_EVCNT_INCR(bt_inuse);
365
366 return bt;
367 }
368
369 static inline void
370 bt_free(vmem_t *vm, bt_t *bt)
371 {
372
373 VMEM_LOCK(vm);
374 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
375 vm->vm_nfreetags++;
376 while (vm->vm_nfreetags > BT_MAXFREE) {
377 bt = LIST_FIRST(&vm->vm_freetags);
378 LIST_REMOVE(bt, bt_freelist);
379 vm->vm_nfreetags--;
380 mutex_enter(&vmem_btag_lock);
381 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist);
382 vmem_btag_freelist_count++;
383 mutex_exit(&vmem_btag_lock);
384 }
385 VMEM_UNLOCK(vm);
386 VMEM_EVCNT_DECR(bt_inuse);
387 }
388
389 #endif /* defined(_KERNEL) */
390
391 /*
392 * freelist[0] ... [1, 1]
393 * freelist[1] ... [2, 3]
394 * freelist[2] ... [4, 7]
395 * freelist[3] ... [8, 15]
396 * :
397 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
398 * :
399 */
400
401 static struct vmem_freelist *
402 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
403 {
404 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
405 const int idx = SIZE2ORDER(qsize);
406
407 KASSERT(size != 0 && qsize != 0);
408 KASSERT((size & vm->vm_quantum_mask) == 0);
409 KASSERT(idx >= 0);
410 KASSERT(idx < VMEM_MAXORDER);
411
412 return &vm->vm_freelist[idx];
413 }
414
415 /*
416 * bt_freehead_toalloc: return the freelist for the given size and allocation
417 * strategy.
418 *
419 * for VM_INSTANTFIT, return the list in which any blocks are large enough
420 * for the requested size. otherwise, return the list which can have blocks
421 * large enough for the requested size.
422 */
423
424 static struct vmem_freelist *
425 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
426 {
427 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
428 int idx = SIZE2ORDER(qsize);
429
430 KASSERT(size != 0 && qsize != 0);
431 KASSERT((size & vm->vm_quantum_mask) == 0);
432
433 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
434 idx++;
435 /* check too large request? */
436 }
437 KASSERT(idx >= 0);
438 KASSERT(idx < VMEM_MAXORDER);
439
440 return &vm->vm_freelist[idx];
441 }
442
443 /* ---- boundary tag hash */
444
445 static struct vmem_hashlist *
446 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
447 {
448 struct vmem_hashlist *list;
449 unsigned int hash;
450
451 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
452 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
453
454 return list;
455 }
456
457 static bt_t *
458 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
459 {
460 struct vmem_hashlist *list;
461 bt_t *bt;
462
463 list = bt_hashhead(vm, addr);
464 LIST_FOREACH(bt, list, bt_hashlist) {
465 if (bt->bt_start == addr) {
466 break;
467 }
468 }
469
470 return bt;
471 }
472
473 static void
474 bt_rembusy(vmem_t *vm, bt_t *bt)
475 {
476
477 KASSERT(vm->vm_nbusytag > 0);
478 vm->vm_inuse -= bt->bt_size;
479 vm->vm_nbusytag--;
480 LIST_REMOVE(bt, bt_hashlist);
481 }
482
483 static void
484 bt_insbusy(vmem_t *vm, bt_t *bt)
485 {
486 struct vmem_hashlist *list;
487
488 KASSERT(bt->bt_type == BT_TYPE_BUSY);
489
490 list = bt_hashhead(vm, bt->bt_start);
491 LIST_INSERT_HEAD(list, bt, bt_hashlist);
492 vm->vm_nbusytag++;
493 vm->vm_inuse += bt->bt_size;
494 }
495
496 /* ---- boundary tag list */
497
498 static void
499 bt_remseg(vmem_t *vm, bt_t *bt)
500 {
501
502 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
503 }
504
505 static void
506 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
507 {
508
509 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
510 }
511
512 static void
513 bt_insseg_tail(vmem_t *vm, bt_t *bt)
514 {
515
516 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
517 }
518
519 static void
520 bt_remfree(vmem_t *vm, bt_t *bt)
521 {
522
523 KASSERT(bt->bt_type == BT_TYPE_FREE);
524
525 LIST_REMOVE(bt, bt_freelist);
526 }
527
528 static void
529 bt_insfree(vmem_t *vm, bt_t *bt)
530 {
531 struct vmem_freelist *list;
532
533 list = bt_freehead_tofree(vm, bt->bt_size);
534 LIST_INSERT_HEAD(list, bt, bt_freelist);
535 }
536
537 /* ---- vmem internal functions */
538
539 #if defined(QCACHE)
540 static inline vm_flag_t
541 prf_to_vmf(int prflags)
542 {
543 vm_flag_t vmflags;
544
545 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
546 if ((prflags & PR_WAITOK) != 0) {
547 vmflags = VM_SLEEP;
548 } else {
549 vmflags = VM_NOSLEEP;
550 }
551 return vmflags;
552 }
553
554 static inline int
555 vmf_to_prf(vm_flag_t vmflags)
556 {
557 int prflags;
558
559 if ((vmflags & VM_SLEEP) != 0) {
560 prflags = PR_WAITOK;
561 } else {
562 prflags = PR_NOWAIT;
563 }
564 return prflags;
565 }
566
567 static size_t
568 qc_poolpage_size(size_t qcache_max)
569 {
570 int i;
571
572 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
573 /* nothing */
574 }
575 return ORDER2SIZE(i);
576 }
577
578 static void *
579 qc_poolpage_alloc(struct pool *pool, int prflags)
580 {
581 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
582 vmem_t *vm = qc->qc_vmem;
583 vmem_addr_t addr;
584
585 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
586 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0)
587 return NULL;
588 return (void *)addr;
589 }
590
591 static void
592 qc_poolpage_free(struct pool *pool, void *addr)
593 {
594 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
595 vmem_t *vm = qc->qc_vmem;
596
597 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
598 }
599
600 static void
601 qc_init(vmem_t *vm, size_t qcache_max, int ipl)
602 {
603 qcache_t *prevqc;
604 struct pool_allocator *pa;
605 int qcache_idx_max;
606 int i;
607
608 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
609 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
610 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
611 }
612 vm->vm_qcache_max = qcache_max;
613 pa = &vm->vm_qcache_allocator;
614 memset(pa, 0, sizeof(*pa));
615 pa->pa_alloc = qc_poolpage_alloc;
616 pa->pa_free = qc_poolpage_free;
617 pa->pa_pagesz = qc_poolpage_size(qcache_max);
618
619 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
620 prevqc = NULL;
621 for (i = qcache_idx_max; i > 0; i--) {
622 qcache_t *qc = &vm->vm_qcache_store[i - 1];
623 size_t size = i << vm->vm_quantum_shift;
624 pool_cache_t pc;
625
626 qc->qc_vmem = vm;
627 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
628 vm->vm_name, size);
629
630 if (vm->vm_flags & VM_BOOTSTRAP) {
631 KASSERT(static_qc_pool_count > 0);
632 pc = &static_qc_pools[--static_qc_pool_count];
633 pool_cache_bootstrap(pc, size,
634 ORDER2SIZE(vm->vm_quantum_shift), 0,
635 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */,
636 qc->qc_name, pa, ipl, NULL, NULL, NULL);
637 } else {
638 pc = pool_cache_init(size,
639 ORDER2SIZE(vm->vm_quantum_shift), 0,
640 PR_NOALIGN | PR_NOTOUCH /* XXX */,
641 qc->qc_name, pa, ipl, NULL, NULL, NULL);
642 }
643 qc->qc_cache = pc;
644 KASSERT(qc->qc_cache != NULL); /* XXX */
645 if (prevqc != NULL &&
646 qc->qc_cache->pc_pool.pr_itemsperpage ==
647 prevqc->qc_cache->pc_pool.pr_itemsperpage) {
648 if (vm->vm_flags & VM_BOOTSTRAP) {
649 pool_cache_bootstrap_destroy(pc);
650 //static_qc_pool_count++;
651 } else {
652 pool_cache_destroy(qc->qc_cache);
653 }
654 vm->vm_qcache[i - 1] = prevqc;
655 continue;
656 }
657 qc->qc_cache->pc_pool.pr_qcache = qc;
658 vm->vm_qcache[i - 1] = qc;
659 prevqc = qc;
660 }
661 }
662
663 static void
664 qc_destroy(vmem_t *vm)
665 {
666 const qcache_t *prevqc;
667 int i;
668 int qcache_idx_max;
669
670 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
671 prevqc = NULL;
672 for (i = 0; i < qcache_idx_max; i++) {
673 qcache_t *qc = vm->vm_qcache[i];
674
675 if (prevqc == qc) {
676 continue;
677 }
678 if (vm->vm_flags & VM_BOOTSTRAP) {
679 pool_cache_bootstrap_destroy(qc->qc_cache);
680 } else {
681 pool_cache_destroy(qc->qc_cache);
682 }
683 prevqc = qc;
684 }
685 }
686 #endif
687
688 #if defined(_KERNEL)
689 void
690 vmem_bootstrap(void)
691 {
692
693 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM);
694 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM);
695
696 while (static_bt_count-- > 0) {
697 bt_t *bt = &static_bts[static_bt_count];
698 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist);
699 VMEM_EVCNT_INCR(bt_count);
700 vmem_btag_freelist_count++;
701 }
702 }
703
704 void
705 vmem_init(vmem_t *vm)
706 {
707
708 kmem_va_meta_arena = vmem_create("vmem-va", 0, 0, PAGE_SIZE,
709 vmem_alloc, vmem_free, vm,
710 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT,
711 IPL_VM);
712
713 kmem_meta_arena = vmem_create("vmem-meta", 0, 0, PAGE_SIZE,
714 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena,
715 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
716 }
717 #endif /* defined(_KERNEL) */
718
719 static int
720 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
721 int spanbttype)
722 {
723 bt_t *btspan;
724 bt_t *btfree;
725
726 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
727 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
728 KASSERT(spanbttype == BT_TYPE_SPAN ||
729 spanbttype == BT_TYPE_SPAN_STATIC);
730
731 btspan = bt_alloc(vm, flags);
732 if (btspan == NULL) {
733 return ENOMEM;
734 }
735 btfree = bt_alloc(vm, flags);
736 if (btfree == NULL) {
737 bt_free(vm, btspan);
738 return ENOMEM;
739 }
740
741 btspan->bt_type = spanbttype;
742 btspan->bt_start = addr;
743 btspan->bt_size = size;
744
745 btfree->bt_type = BT_TYPE_FREE;
746 btfree->bt_start = addr;
747 btfree->bt_size = size;
748
749 VMEM_LOCK(vm);
750 bt_insseg_tail(vm, btspan);
751 bt_insseg(vm, btfree, btspan);
752 bt_insfree(vm, btfree);
753 vm->vm_size += size;
754 VMEM_UNLOCK(vm);
755
756 return 0;
757 }
758
759 static void
760 vmem_destroy1(vmem_t *vm)
761 {
762
763 #if defined(QCACHE)
764 qc_destroy(vm);
765 #endif /* defined(QCACHE) */
766 if (vm->vm_hashlist != NULL) {
767 int i;
768
769 for (i = 0; i < vm->vm_hashsize; i++) {
770 bt_t *bt;
771
772 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
773 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
774 bt_free(vm, bt);
775 }
776 }
777 if (vm->vm_hashlist != &vm->vm_hash0) {
778 xfree(vm->vm_hashlist,
779 sizeof(struct vmem_hashlist *) * vm->vm_hashsize);
780 }
781 }
782
783 while (vm->vm_nfreetags > 0) {
784 bt_t *bt = LIST_FIRST(&vm->vm_freetags);
785 LIST_REMOVE(bt, bt_freelist);
786 vm->vm_nfreetags--;
787 mutex_enter(&vmem_btag_lock);
788 #if defined (_KERNEL)
789 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist);
790 vmem_btag_freelist_count++;
791 #endif /* defined(_KERNEL) */
792 mutex_exit(&vmem_btag_lock);
793 }
794
795 VMEM_LOCK_DESTROY(vm);
796 xfree(vm, sizeof(*vm));
797 }
798
799 static int
800 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
801 {
802 vmem_addr_t addr;
803 int rc;
804
805 if (vm->vm_importfn == NULL) {
806 return EINVAL;
807 }
808
809 if (vm->vm_flags & VM_LARGEIMPORT) {
810 size *= 8;
811 }
812
813 if (vm->vm_flags & VM_XIMPORT) {
814 rc = ((vmem_ximport_t *)vm->vm_importfn)(vm->vm_arg, size,
815 &size, flags, &addr);
816 } else {
817 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr);
818 }
819 if (rc) {
820 return ENOMEM;
821 }
822
823 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) {
824 (*vm->vm_releasefn)(vm->vm_arg, addr, size);
825 return ENOMEM;
826 }
827
828 return 0;
829 }
830
831 static int
832 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
833 {
834 bt_t *bt;
835 int i;
836 struct vmem_hashlist *newhashlist;
837 struct vmem_hashlist *oldhashlist;
838 size_t oldhashsize;
839
840 KASSERT(newhashsize > 0);
841
842 newhashlist =
843 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags);
844 if (newhashlist == NULL) {
845 return ENOMEM;
846 }
847 for (i = 0; i < newhashsize; i++) {
848 LIST_INIT(&newhashlist[i]);
849 }
850
851 if (!VMEM_TRYLOCK(vm)) {
852 xfree(newhashlist,
853 sizeof(struct vmem_hashlist *) * newhashsize);
854 return EBUSY;
855 }
856 oldhashlist = vm->vm_hashlist;
857 oldhashsize = vm->vm_hashsize;
858 vm->vm_hashlist = newhashlist;
859 vm->vm_hashsize = newhashsize;
860 if (oldhashlist == NULL) {
861 VMEM_UNLOCK(vm);
862 return 0;
863 }
864 for (i = 0; i < oldhashsize; i++) {
865 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
866 bt_rembusy(vm, bt); /* XXX */
867 bt_insbusy(vm, bt);
868 }
869 }
870 VMEM_UNLOCK(vm);
871
872 if (oldhashlist != &vm->vm_hash0) {
873 xfree(oldhashlist,
874 sizeof(struct vmem_hashlist *) * oldhashsize);
875 }
876
877 return 0;
878 }
879
880 /*
881 * vmem_fit: check if a bt can satisfy the given restrictions.
882 *
883 * it's a caller's responsibility to ensure the region is big enough
884 * before calling us.
885 */
886
887 static int
888 vmem_fit(const bt_t const *bt, vmem_size_t size, vmem_size_t align,
889 vmem_size_t phase, vmem_size_t nocross,
890 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp)
891 {
892 vmem_addr_t start;
893 vmem_addr_t end;
894
895 KASSERT(size > 0);
896 KASSERT(bt->bt_size >= size); /* caller's responsibility */
897
898 /*
899 * XXX assumption: vmem_addr_t and vmem_size_t are
900 * unsigned integer of the same size.
901 */
902
903 start = bt->bt_start;
904 if (start < minaddr) {
905 start = minaddr;
906 }
907 end = BT_END(bt);
908 if (end > maxaddr) {
909 end = maxaddr;
910 }
911 if (start > end) {
912 return ENOMEM;
913 }
914
915 start = VMEM_ALIGNUP(start - phase, align) + phase;
916 if (start < bt->bt_start) {
917 start += align;
918 }
919 if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
920 KASSERT(align < nocross);
921 start = VMEM_ALIGNUP(start - phase, nocross) + phase;
922 }
923 if (start <= end && end - start >= size - 1) {
924 KASSERT((start & (align - 1)) == phase);
925 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross));
926 KASSERT(minaddr <= start);
927 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr);
928 KASSERT(bt->bt_start <= start);
929 KASSERT(BT_END(bt) - start >= size - 1);
930 *addrp = start;
931 return 0;
932 }
933 return ENOMEM;
934 }
935
936
937 /*
938 * vmem_create_internal: creates a vmem arena.
939 */
940
941 static vmem_t *
942 vmem_create_internal(const char *name, vmem_addr_t base, vmem_size_t size,
943 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn,
944 void *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl)
945 {
946 vmem_t *vm = NULL;
947 int i;
948
949 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
950 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
951 KASSERT(quantum > 0);
952
953 if (flags & VM_BOOTSTRAP) {
954 #if defined(_KERNEL)
955 KASSERT(static_vmem_count > 0);
956 vm = &static_vmems[--static_vmem_count];
957 #endif /* defined(_KERNEL) */
958 } else {
959 vm = xmalloc(sizeof(*vm), flags);
960 }
961 if (vm == NULL) {
962 return NULL;
963 }
964
965 VMEM_CONDVAR_INIT(vm, "vmem");
966 VMEM_LOCK_INIT(vm, ipl);
967 vm->vm_flags = flags;
968 vm->vm_nfreetags = 0;
969 LIST_INIT(&vm->vm_freetags);
970 strlcpy(vm->vm_name, name, sizeof(vm->vm_name));
971 vm->vm_quantum_mask = quantum - 1;
972 vm->vm_quantum_shift = SIZE2ORDER(quantum);
973 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
974 vm->vm_importfn = importfn;
975 vm->vm_releasefn = releasefn;
976 vm->vm_arg = arg;
977 vm->vm_nbusytag = 0;
978 vm->vm_size = 0;
979 vm->vm_inuse = 0;
980 #if defined(QCACHE)
981 qc_init(vm, qcache_max, ipl);
982 #endif /* defined(QCACHE) */
983
984 CIRCLEQ_INIT(&vm->vm_seglist);
985 for (i = 0; i < VMEM_MAXORDER; i++) {
986 LIST_INIT(&vm->vm_freelist[i]);
987 }
988 vm->vm_hashlist = NULL;
989 if (flags & VM_BOOTSTRAP) {
990 vm->vm_hashsize = 1;
991 vm->vm_hashlist = &vm->vm_hash0;
992 } else if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
993 vmem_destroy1(vm);
994 return NULL;
995 }
996
997 if (size != 0) {
998 if (vmem_add(vm, base, size, flags) != 0) {
999 vmem_destroy1(vm);
1000 return NULL;
1001 }
1002 }
1003
1004 #if defined(_KERNEL)
1005 if (flags & VM_BOOTSTRAP) {
1006 bt_refill(vm, VM_NOSLEEP);
1007 }
1008
1009 mutex_enter(&vmem_list_lock);
1010 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist);
1011 mutex_exit(&vmem_list_lock);
1012 #endif /* defined(_KERNEL) */
1013
1014 return vm;
1015 }
1016
1017
1018 /* ---- vmem API */
1019
1020 /*
1021 * vmem_create: create an arena.
1022 *
1023 * => must not be called from interrupt context.
1024 */
1025
1026 vmem_t *
1027 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
1028 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn,
1029 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl)
1030 {
1031
1032 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1033 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1034 KASSERT((flags & (VM_XIMPORT)) == 0);
1035
1036 return vmem_create_internal(name, base, size, quantum,
1037 importfn, releasefn, source, qcache_max, flags, ipl);
1038 }
1039
1040 /*
1041 * vmem_xcreate: create an arena takes alternative import func.
1042 *
1043 * => must not be called from interrupt context.
1044 */
1045
1046 vmem_t *
1047 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size,
1048 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn,
1049 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl)
1050 {
1051
1052 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1053 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1054 KASSERT((flags & (VM_XIMPORT)) == 0);
1055
1056 return vmem_create_internal(name, base, size, quantum,
1057 (vmem_import_t *)importfn, releasefn, source,
1058 qcache_max, flags | VM_XIMPORT, ipl);
1059 }
1060
1061 void
1062 vmem_destroy(vmem_t *vm)
1063 {
1064
1065 #if defined(_KERNEL)
1066 mutex_enter(&vmem_list_lock);
1067 LIST_REMOVE(vm, vm_alllist);
1068 mutex_exit(&vmem_list_lock);
1069 #endif /* defined(_KERNEL) */
1070
1071 vmem_destroy1(vm);
1072 }
1073
1074 vmem_size_t
1075 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
1076 {
1077
1078 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
1079 }
1080
1081 /*
1082 * vmem_alloc:
1083 *
1084 * => caller must ensure appropriate spl,
1085 * if the arena can be accessed from interrupt context.
1086 */
1087
1088 int
1089 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp)
1090 {
1091 const vm_flag_t strat __unused = flags & VM_FITMASK;
1092
1093 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1094 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1095
1096 KASSERT(size > 0);
1097 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
1098 if ((flags & VM_SLEEP) != 0) {
1099 ASSERT_SLEEPABLE();
1100 }
1101
1102 #if defined(QCACHE)
1103 if (size <= vm->vm_qcache_max) {
1104 void *p;
1105 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
1106 qcache_t *qc = vm->vm_qcache[qidx - 1];
1107
1108 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags));
1109 if (addrp != NULL)
1110 *addrp = (vmem_addr_t)p;
1111 return (p == NULL) ? ENOMEM : 0;
1112 }
1113 #endif /* defined(QCACHE) */
1114
1115 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX,
1116 flags, addrp);
1117 }
1118
1119 int
1120 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align,
1121 const vmem_size_t phase, const vmem_size_t nocross,
1122 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags,
1123 vmem_addr_t *addrp)
1124 {
1125 struct vmem_freelist *list;
1126 struct vmem_freelist *first;
1127 struct vmem_freelist *end;
1128 bt_t *bt;
1129 bt_t *btnew;
1130 bt_t *btnew2;
1131 const vmem_size_t size = vmem_roundup_size(vm, size0);
1132 vm_flag_t strat = flags & VM_FITMASK;
1133 vmem_addr_t start;
1134 int rc;
1135
1136 KASSERT(size0 > 0);
1137 KASSERT(size > 0);
1138 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
1139 if ((flags & VM_SLEEP) != 0) {
1140 ASSERT_SLEEPABLE();
1141 }
1142 KASSERT((align & vm->vm_quantum_mask) == 0);
1143 KASSERT((align & (align - 1)) == 0);
1144 KASSERT((phase & vm->vm_quantum_mask) == 0);
1145 KASSERT((nocross & vm->vm_quantum_mask) == 0);
1146 KASSERT((nocross & (nocross - 1)) == 0);
1147 KASSERT((align == 0 && phase == 0) || phase < align);
1148 KASSERT(nocross == 0 || nocross >= size);
1149 KASSERT(minaddr <= maxaddr);
1150 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
1151
1152 if (align == 0) {
1153 align = vm->vm_quantum_mask + 1;
1154 }
1155
1156 /*
1157 * allocate boundary tags before acquiring the vmem lock.
1158 */
1159 btnew = bt_alloc(vm, flags);
1160 if (btnew == NULL) {
1161 return ENOMEM;
1162 }
1163 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */
1164 if (btnew2 == NULL) {
1165 bt_free(vm, btnew);
1166 return ENOMEM;
1167 }
1168
1169 /*
1170 * choose a free block from which we allocate.
1171 */
1172 retry_strat:
1173 first = bt_freehead_toalloc(vm, size, strat);
1174 end = &vm->vm_freelist[VMEM_MAXORDER];
1175 retry:
1176 bt = NULL;
1177 VMEM_LOCK(vm);
1178 vmem_check(vm);
1179 if (strat == VM_INSTANTFIT) {
1180 /*
1181 * just choose the first block which satisfies our restrictions.
1182 *
1183 * note that we don't need to check the size of the blocks
1184 * because any blocks found on these list should be larger than
1185 * the given size.
1186 */
1187 for (list = first; list < end; list++) {
1188 bt = LIST_FIRST(list);
1189 if (bt != NULL) {
1190 rc = vmem_fit(bt, size, align, phase,
1191 nocross, minaddr, maxaddr, &start);
1192 if (rc == 0) {
1193 goto gotit;
1194 }
1195 /*
1196 * don't bother to follow the bt_freelist link
1197 * here. the list can be very long and we are
1198 * told to run fast. blocks from the later free
1199 * lists are larger and have better chances to
1200 * satisfy our restrictions.
1201 */
1202 }
1203 }
1204 } else { /* VM_BESTFIT */
1205 /*
1206 * we assume that, for space efficiency, it's better to
1207 * allocate from a smaller block. thus we will start searching
1208 * from the lower-order list than VM_INSTANTFIT.
1209 * however, don't bother to find the smallest block in a free
1210 * list because the list can be very long. we can revisit it
1211 * if/when it turns out to be a problem.
1212 *
1213 * note that the 'first' list can contain blocks smaller than
1214 * the requested size. thus we need to check bt_size.
1215 */
1216 for (list = first; list < end; list++) {
1217 LIST_FOREACH(bt, list, bt_freelist) {
1218 if (bt->bt_size >= size) {
1219 rc = vmem_fit(bt, size, align, phase,
1220 nocross, minaddr, maxaddr, &start);
1221 if (rc == 0) {
1222 goto gotit;
1223 }
1224 }
1225 }
1226 }
1227 }
1228 VMEM_UNLOCK(vm);
1229 #if 1
1230 if (strat == VM_INSTANTFIT) {
1231 strat = VM_BESTFIT;
1232 goto retry_strat;
1233 }
1234 #endif
1235 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) {
1236
1237 /*
1238 * XXX should try to import a region large enough to
1239 * satisfy restrictions?
1240 */
1241
1242 goto fail;
1243 }
1244 /* XXX eeek, minaddr & maxaddr not respected */
1245 if (vmem_import(vm, size, flags) == 0) {
1246 goto retry;
1247 }
1248 /* XXX */
1249
1250 if ((flags & VM_SLEEP) != 0) {
1251 #if defined(_KERNEL) && !defined(_RUMPKERNEL)
1252 mutex_spin_enter(&uvm_fpageqlock);
1253 uvm_kick_pdaemon();
1254 mutex_spin_exit(&uvm_fpageqlock);
1255 #endif
1256 VMEM_LOCK(vm);
1257 VMEM_CONDVAR_WAIT(vm);
1258 VMEM_UNLOCK(vm);
1259 goto retry;
1260 }
1261 fail:
1262 bt_free(vm, btnew);
1263 bt_free(vm, btnew2);
1264 return ENOMEM;
1265
1266 gotit:
1267 KASSERT(bt->bt_type == BT_TYPE_FREE);
1268 KASSERT(bt->bt_size >= size);
1269 bt_remfree(vm, bt);
1270 vmem_check(vm);
1271 if (bt->bt_start != start) {
1272 btnew2->bt_type = BT_TYPE_FREE;
1273 btnew2->bt_start = bt->bt_start;
1274 btnew2->bt_size = start - bt->bt_start;
1275 bt->bt_start = start;
1276 bt->bt_size -= btnew2->bt_size;
1277 bt_insfree(vm, btnew2);
1278 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
1279 btnew2 = NULL;
1280 vmem_check(vm);
1281 }
1282 KASSERT(bt->bt_start == start);
1283 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
1284 /* split */
1285 btnew->bt_type = BT_TYPE_BUSY;
1286 btnew->bt_start = bt->bt_start;
1287 btnew->bt_size = size;
1288 bt->bt_start = bt->bt_start + size;
1289 bt->bt_size -= size;
1290 bt_insfree(vm, bt);
1291 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
1292 bt_insbusy(vm, btnew);
1293 vmem_check(vm);
1294 VMEM_UNLOCK(vm);
1295 } else {
1296 bt->bt_type = BT_TYPE_BUSY;
1297 bt_insbusy(vm, bt);
1298 vmem_check(vm);
1299 VMEM_UNLOCK(vm);
1300 bt_free(vm, btnew);
1301 btnew = bt;
1302 }
1303 if (btnew2 != NULL) {
1304 bt_free(vm, btnew2);
1305 }
1306 KASSERT(btnew->bt_size >= size);
1307 btnew->bt_type = BT_TYPE_BUSY;
1308
1309 if (addrp != NULL)
1310 *addrp = btnew->bt_start;
1311 return 0;
1312 }
1313
1314 /*
1315 * vmem_free:
1316 *
1317 * => caller must ensure appropriate spl,
1318 * if the arena can be accessed from interrupt context.
1319 */
1320
1321 void
1322 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1323 {
1324
1325 KASSERT(size > 0);
1326
1327 #if defined(QCACHE)
1328 if (size <= vm->vm_qcache_max) {
1329 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
1330 qcache_t *qc = vm->vm_qcache[qidx - 1];
1331
1332 pool_cache_put(qc->qc_cache, (void *)addr);
1333 return;
1334 }
1335 #endif /* defined(QCACHE) */
1336
1337 vmem_xfree(vm, addr, size);
1338 }
1339
1340 void
1341 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1342 {
1343 bt_t *bt;
1344 bt_t *t;
1345 LIST_HEAD(, vmem_btag) tofree;
1346
1347 LIST_INIT(&tofree);
1348
1349 KASSERT(size > 0);
1350
1351 VMEM_LOCK(vm);
1352
1353 bt = bt_lookupbusy(vm, addr);
1354 KASSERT(bt != NULL);
1355 KASSERT(bt->bt_start == addr);
1356 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
1357 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
1358 KASSERT(bt->bt_type == BT_TYPE_BUSY);
1359 bt_rembusy(vm, bt);
1360 bt->bt_type = BT_TYPE_FREE;
1361
1362 /* coalesce */
1363 t = CIRCLEQ_NEXT(bt, bt_seglist);
1364 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1365 KASSERT(BT_END(bt) < t->bt_start); /* YYY */
1366 bt_remfree(vm, t);
1367 bt_remseg(vm, t);
1368 bt->bt_size += t->bt_size;
1369 LIST_INSERT_HEAD(&tofree, t, bt_freelist);
1370 }
1371 t = CIRCLEQ_PREV(bt, bt_seglist);
1372 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1373 KASSERT(BT_END(t) < bt->bt_start); /* YYY */
1374 bt_remfree(vm, t);
1375 bt_remseg(vm, t);
1376 bt->bt_size += t->bt_size;
1377 bt->bt_start = t->bt_start;
1378 LIST_INSERT_HEAD(&tofree, t, bt_freelist);
1379 }
1380
1381 t = CIRCLEQ_PREV(bt, bt_seglist);
1382 KASSERT(t != NULL);
1383 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1384 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1385 t->bt_size == bt->bt_size) {
1386 vmem_addr_t spanaddr;
1387 vmem_size_t spansize;
1388
1389 KASSERT(t->bt_start == bt->bt_start);
1390 spanaddr = bt->bt_start;
1391 spansize = bt->bt_size;
1392 bt_remseg(vm, bt);
1393 LIST_INSERT_HEAD(&tofree, bt, bt_freelist);
1394 bt_remseg(vm, t);
1395 LIST_INSERT_HEAD(&tofree, t, bt_freelist);
1396 vm->vm_size -= spansize;
1397 VMEM_CONDVAR_BROADCAST(vm);
1398 VMEM_UNLOCK(vm);
1399 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize);
1400 } else {
1401 bt_insfree(vm, bt);
1402 VMEM_CONDVAR_BROADCAST(vm);
1403 VMEM_UNLOCK(vm);
1404 }
1405
1406 while (!LIST_EMPTY(&tofree)) {
1407 t = LIST_FIRST(&tofree);
1408 LIST_REMOVE(t, bt_freelist);
1409 bt_free(vm, t);
1410 }
1411 }
1412
1413 /*
1414 * vmem_add:
1415 *
1416 * => caller must ensure appropriate spl,
1417 * if the arena can be accessed from interrupt context.
1418 */
1419
1420 int
1421 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1422 {
1423
1424 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1425 }
1426
1427 /*
1428 * vmem_size: information about arenas size
1429 *
1430 * => return free/allocated size in arena
1431 */
1432 vmem_size_t
1433 vmem_size(vmem_t *vm, int typemask)
1434 {
1435
1436 switch (typemask) {
1437 case VMEM_ALLOC:
1438 return vm->vm_inuse;
1439 case VMEM_FREE:
1440 return vm->vm_size - vm->vm_inuse;
1441 case VMEM_FREE|VMEM_ALLOC:
1442 return vm->vm_size;
1443 default:
1444 panic("vmem_size");
1445 }
1446 }
1447
1448 /* ---- rehash */
1449
1450 #if defined(_KERNEL)
1451 static struct callout vmem_rehash_ch;
1452 static int vmem_rehash_interval;
1453 static struct workqueue *vmem_rehash_wq;
1454 static struct work vmem_rehash_wk;
1455
1456 static void
1457 vmem_rehash_all(struct work *wk, void *dummy)
1458 {
1459 vmem_t *vm;
1460
1461 KASSERT(wk == &vmem_rehash_wk);
1462 mutex_enter(&vmem_list_lock);
1463 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1464 size_t desired;
1465 size_t current;
1466
1467 if (!VMEM_TRYLOCK(vm)) {
1468 continue;
1469 }
1470 desired = vm->vm_nbusytag;
1471 current = vm->vm_hashsize;
1472 VMEM_UNLOCK(vm);
1473
1474 if (desired > VMEM_HASHSIZE_MAX) {
1475 desired = VMEM_HASHSIZE_MAX;
1476 } else if (desired < VMEM_HASHSIZE_MIN) {
1477 desired = VMEM_HASHSIZE_MIN;
1478 }
1479 if (desired > current * 2 || desired * 2 < current) {
1480 vmem_rehash(vm, desired, VM_NOSLEEP);
1481 }
1482 }
1483 mutex_exit(&vmem_list_lock);
1484
1485 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval);
1486 }
1487
1488 static void
1489 vmem_rehash_all_kick(void *dummy)
1490 {
1491
1492 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL);
1493 }
1494
1495 void
1496 vmem_rehash_start(void)
1497 {
1498 int error;
1499
1500 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash",
1501 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE);
1502 if (error) {
1503 panic("%s: workqueue_create %d\n", __func__, error);
1504 }
1505 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE);
1506 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL);
1507
1508 vmem_rehash_interval = hz * 10;
1509 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval);
1510 }
1511 #endif /* defined(_KERNEL) */
1512
1513 /* ---- debug */
1514
1515 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY)
1516
1517 static void bt_dump(const bt_t *, void (*)(const char *, ...));
1518
1519 static const char *
1520 bt_type_string(int type)
1521 {
1522 static const char * const table[] = {
1523 [BT_TYPE_BUSY] = "busy",
1524 [BT_TYPE_FREE] = "free",
1525 [BT_TYPE_SPAN] = "span",
1526 [BT_TYPE_SPAN_STATIC] = "static span",
1527 };
1528
1529 if (type >= __arraycount(table)) {
1530 return "BOGUS";
1531 }
1532 return table[type];
1533 }
1534
1535 static void
1536 bt_dump(const bt_t *bt, void (*pr)(const char *, ...))
1537 {
1538
1539 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n",
1540 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
1541 bt->bt_type, bt_type_string(bt->bt_type));
1542 }
1543
1544 static void
1545 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...))
1546 {
1547 const bt_t *bt;
1548 int i;
1549
1550 (*pr)("vmem %p '%s'\n", vm, vm->vm_name);
1551 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1552 bt_dump(bt, pr);
1553 }
1554
1555 for (i = 0; i < VMEM_MAXORDER; i++) {
1556 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1557
1558 if (LIST_EMPTY(fl)) {
1559 continue;
1560 }
1561
1562 (*pr)("freelist[%d]\n", i);
1563 LIST_FOREACH(bt, fl, bt_freelist) {
1564 bt_dump(bt, pr);
1565 }
1566 }
1567 }
1568
1569 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */
1570
1571 #if defined(DDB)
1572 static bt_t *
1573 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr)
1574 {
1575 bt_t *bt;
1576
1577 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1578 if (BT_ISSPAN_P(bt)) {
1579 continue;
1580 }
1581 if (bt->bt_start <= addr && addr <= BT_END(bt)) {
1582 return bt;
1583 }
1584 }
1585
1586 return NULL;
1587 }
1588
1589 void
1590 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...))
1591 {
1592 vmem_t *vm;
1593
1594 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1595 bt_t *bt;
1596
1597 bt = vmem_whatis_lookup(vm, addr);
1598 if (bt == NULL) {
1599 continue;
1600 }
1601 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n",
1602 (void *)addr, (void *)bt->bt_start,
1603 (size_t)(addr - bt->bt_start), vm->vm_name,
1604 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free");
1605 }
1606 }
1607
1608 void
1609 vmem_printall(const char *modif, void (*pr)(const char *, ...))
1610 {
1611 const vmem_t *vm;
1612
1613 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1614 vmem_dump(vm, pr);
1615 }
1616 }
1617
1618 void
1619 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...))
1620 {
1621 const vmem_t *vm = (const void *)addr;
1622
1623 vmem_dump(vm, pr);
1624 }
1625 #endif /* defined(DDB) */
1626
1627 #if defined(_KERNEL)
1628 #define vmem_printf printf
1629 #else
1630 #include <stdio.h>
1631 #include <stdarg.h>
1632
1633 static void
1634 vmem_printf(const char *fmt, ...)
1635 {
1636 va_list ap;
1637 va_start(ap, fmt);
1638 vprintf(fmt, ap);
1639 va_end(ap);
1640 }
1641 #endif
1642
1643 #if defined(VMEM_SANITY)
1644
1645 static bool
1646 vmem_check_sanity(vmem_t *vm)
1647 {
1648 const bt_t *bt, *bt2;
1649
1650 KASSERT(vm != NULL);
1651
1652 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1653 if (bt->bt_start > BT_END(bt)) {
1654 printf("corrupted tag\n");
1655 bt_dump(bt, vmem_printf);
1656 return false;
1657 }
1658 }
1659 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1660 CIRCLEQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) {
1661 if (bt == bt2) {
1662 continue;
1663 }
1664 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) {
1665 continue;
1666 }
1667 if (bt->bt_start <= BT_END(bt2) &&
1668 bt2->bt_start <= BT_END(bt)) {
1669 printf("overwrapped tags\n");
1670 bt_dump(bt, vmem_printf);
1671 bt_dump(bt2, vmem_printf);
1672 return false;
1673 }
1674 }
1675 }
1676
1677 return true;
1678 }
1679
1680 static void
1681 vmem_check(vmem_t *vm)
1682 {
1683
1684 if (!vmem_check_sanity(vm)) {
1685 panic("insanity vmem %p", vm);
1686 }
1687 }
1688
1689 #endif /* defined(VMEM_SANITY) */
1690
1691 #if defined(UNITTEST)
1692 int
1693 main(void)
1694 {
1695 int rc;
1696 vmem_t *vm;
1697 vmem_addr_t p;
1698 struct reg {
1699 vmem_addr_t p;
1700 vmem_size_t sz;
1701 bool x;
1702 } *reg = NULL;
1703 int nreg = 0;
1704 int nalloc = 0;
1705 int nfree = 0;
1706 vmem_size_t total = 0;
1707 #if 1
1708 vm_flag_t strat = VM_INSTANTFIT;
1709 #else
1710 vm_flag_t strat = VM_BESTFIT;
1711 #endif
1712
1713 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP,
1714 #ifdef _KERNEL
1715 IPL_NONE
1716 #else
1717 0
1718 #endif
1719 );
1720 if (vm == NULL) {
1721 printf("vmem_create\n");
1722 exit(EXIT_FAILURE);
1723 }
1724 vmem_dump(vm, vmem_printf);
1725
1726 rc = vmem_add(vm, 0, 50, VM_SLEEP);
1727 assert(rc == 0);
1728 rc = vmem_add(vm, 100, 200, VM_SLEEP);
1729 assert(rc == 0);
1730 rc = vmem_add(vm, 2000, 1, VM_SLEEP);
1731 assert(rc == 0);
1732 rc = vmem_add(vm, 40000, 65536, VM_SLEEP);
1733 assert(rc == 0);
1734 rc = vmem_add(vm, 10000, 10000, VM_SLEEP);
1735 assert(rc == 0);
1736 rc = vmem_add(vm, 500, 1000, VM_SLEEP);
1737 assert(rc == 0);
1738 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP);
1739 assert(rc == 0);
1740 rc = vmem_xalloc(vm, 0x101, 0, 0, 0,
1741 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p);
1742 assert(rc != 0);
1743 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p);
1744 assert(rc == 0 && p == 0);
1745 vmem_xfree(vm, p, 50);
1746 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p);
1747 assert(rc == 0 && p == 0);
1748 rc = vmem_xalloc(vm, 0x100, 0, 0, 0,
1749 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p);
1750 assert(rc != 0);
1751 rc = vmem_xalloc(vm, 0x100, 0, 0, 0,
1752 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p);
1753 assert(rc != 0);
1754 rc = vmem_xalloc(vm, 0x100, 0, 0, 0,
1755 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p);
1756 assert(rc == 0);
1757 vmem_dump(vm, vmem_printf);
1758 for (;;) {
1759 struct reg *r;
1760 int t = rand() % 100;
1761
1762 if (t > 45) {
1763 /* alloc */
1764 vmem_size_t sz = rand() % 500 + 1;
1765 bool x;
1766 vmem_size_t align, phase, nocross;
1767 vmem_addr_t minaddr, maxaddr;
1768
1769 if (t > 70) {
1770 x = true;
1771 /* XXX */
1772 align = 1 << (rand() % 15);
1773 phase = rand() % 65536;
1774 nocross = 1 << (rand() % 15);
1775 if (align <= phase) {
1776 phase = 0;
1777 }
1778 if (VMEM_CROSS_P(phase, phase + sz - 1,
1779 nocross)) {
1780 nocross = 0;
1781 }
1782 do {
1783 minaddr = rand() % 50000;
1784 maxaddr = rand() % 70000;
1785 } while (minaddr > maxaddr);
1786 printf("=== xalloc %" PRIu64
1787 " align=%" PRIu64 ", phase=%" PRIu64
1788 ", nocross=%" PRIu64 ", min=%" PRIu64
1789 ", max=%" PRIu64 "\n",
1790 (uint64_t)sz,
1791 (uint64_t)align,
1792 (uint64_t)phase,
1793 (uint64_t)nocross,
1794 (uint64_t)minaddr,
1795 (uint64_t)maxaddr);
1796 rc = vmem_xalloc(vm, sz, align, phase, nocross,
1797 minaddr, maxaddr, strat|VM_SLEEP, &p);
1798 } else {
1799 x = false;
1800 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1801 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p);
1802 }
1803 printf("-> %" PRIu64 "\n", (uint64_t)p);
1804 vmem_dump(vm, vmem_printf);
1805 if (rc != 0) {
1806 if (x) {
1807 continue;
1808 }
1809 break;
1810 }
1811 nreg++;
1812 reg = realloc(reg, sizeof(*reg) * nreg);
1813 r = ®[nreg - 1];
1814 r->p = p;
1815 r->sz = sz;
1816 r->x = x;
1817 total += sz;
1818 nalloc++;
1819 } else if (nreg != 0) {
1820 /* free */
1821 r = ®[rand() % nreg];
1822 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1823 (uint64_t)r->p, (uint64_t)r->sz);
1824 if (r->x) {
1825 vmem_xfree(vm, r->p, r->sz);
1826 } else {
1827 vmem_free(vm, r->p, r->sz);
1828 }
1829 total -= r->sz;
1830 vmem_dump(vm, vmem_printf);
1831 *r = reg[nreg - 1];
1832 nreg--;
1833 nfree++;
1834 }
1835 printf("total=%" PRIu64 "\n", (uint64_t)total);
1836 }
1837 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1838 (uint64_t)total, nalloc, nfree);
1839 exit(EXIT_SUCCESS);
1840 }
1841 #endif /* defined(UNITTEST) */
1842