subr_vmem.c revision 1.36.2.1 1 /* $NetBSD: subr_vmem.c,v 1.36.2.1 2007/12/10 12:56:11 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2006, 2007 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 *
35 * todo:
36 * - decide how to import segments for vmem_xalloc.
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.36.2.1 2007/12/10 12:56:11 yamt Exp $");
41
42 #define VMEM_DEBUG
43 #if defined(_KERNEL)
44 #define QCACHE
45 #endif /* defined(_KERNEL) */
46
47 #include <sys/param.h>
48 #include <sys/hash.h>
49 #include <sys/queue.h>
50
51 #if defined(_KERNEL)
52 #include <sys/systm.h>
53 #include <sys/kernel.h> /* hz */
54 #include <sys/callout.h>
55 #include <sys/lock.h>
56 #include <sys/once.h>
57 #include <sys/pool.h>
58 #include <sys/proc.h>
59 #include <sys/vmem.h>
60 #include <sys/kmem.h>
61 #include <sys/workqueue.h>
62
63 #include <uvm/uvm_extern.h>
64 #include <uvm/uvm_map.h>
65 #include <uvm/uvm_pdaemon.h>
66 #else /* defined(_KERNEL) */
67 #include "../sys/vmem.h"
68 #endif /* defined(_KERNEL) */
69
70 #if defined(_KERNEL)
71 #define LOCK_DECL(name) kmutex_t name
72 #else /* defined(_KERNEL) */
73 #include <errno.h>
74 #include <assert.h>
75 #include <stdlib.h>
76
77 #define KASSERT(a) assert(a)
78 #define LOCK_DECL(name) /* nothing */
79 #define mutex_init(a, b, c) /* nothing */
80 #define mutex_destroy(a) /* nothing */
81 #define mutex_enter(a) /* nothing */
82 #define mutex_exit(a) /* nothing */
83 #define mutex_owned(a) /* nothing */
84 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */
85 #define IPL_VM 0
86 #endif /* defined(_KERNEL) */
87
88 struct vmem;
89 struct vmem_btag;
90
91 #if defined(VMEM_DEBUG)
92 void vmem_dump(const vmem_t *);
93 void vmem_dump_seglist(const vmem_t *);
94 void vmem_dump_freelist(const vmem_t *);
95 #if defined(QCACHE)
96 void vmem_dump_qc(const vmem_t *);
97 #endif /* defined(QCACHE) */
98 #endif /* defined(VMEM_DEBUG) */
99
100 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
101
102 #define VMEM_HASHSIZE_MIN 1 /* XXX */
103 #define VMEM_HASHSIZE_MAX 8192 /* XXX */
104 #define VMEM_HASHSIZE_INIT VMEM_HASHSIZE_MIN
105
106 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
107
108 /* vm_flag_t (internal uses) */
109 #define VM_BTPAGE 0x00008000
110
111 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
112 LIST_HEAD(vmem_freelist, vmem_btag);
113 LIST_HEAD(vmem_hashlist, vmem_btag);
114 typedef struct vmem_hashlist vmem_hashlist_t;
115
116 #if defined(QCACHE)
117 #define VMEM_QCACHE_IDX_MAX 32
118
119 #define QC_NAME_MAX 16
120
121 struct qcache {
122 pool_cache_t qc_cache;
123 vmem_t *qc_vmem;
124 char qc_name[QC_NAME_MAX];
125 };
126 typedef struct qcache qcache_t;
127 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache))
128 #endif /* defined(QCACHE) */
129
130 /* vmem arena */
131 struct vmem {
132 LOCK_DECL(vm_lock);
133 vm_flag_t vm_flags;
134 int vm_freetags;
135 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
136 vm_flag_t);
137 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
138 vmem_t *vm_source;
139 struct vmem_seglist vm_seglist;
140 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
141 LIST_HEAD(, btpage_header) vm_btpagelist;
142 size_t vm_hashsize;
143 size_t vm_nbusytag;
144 vmem_hashlist_t *vm_hashlist;
145 size_t vm_quantum_mask;
146 int vm_quantum_shift;
147 const char *vm_name;
148 LIST_ENTRY(vmem) vm_alllist;
149
150 #if defined(QCACHE)
151 /* quantum cache */
152 size_t vm_qcache_max;
153 struct pool_allocator vm_qcache_allocator;
154 qcache_t vm_qcache_store[VMEM_QCACHE_IDX_MAX];
155 qcache_t *vm_qcache[VMEM_QCACHE_IDX_MAX];
156 #endif /* defined(QCACHE) */
157 };
158
159 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock)
160 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock)
161 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock)
162 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl)
163 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock)
164 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock))
165
166 #define vmem_bootstrap_p(vm) (((vm)->vm_flags & VMC_KVA) != 0)
167
168 /* boundary tag */
169 struct vmem_btag {
170 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
171 union {
172 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
173 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
174 SLIST_ENTRY(vmem_btag) u_sfreelist; /* in btpage_header */
175 SLIST_ENTRY(vmem_btag) u_tmplist; /* temp use in vmem_xfree */
176 } bt_u;
177 #define bt_hashlist bt_u.u_hashlist
178 #define bt_freelist bt_u.u_freelist
179 #define bt_sfreelist bt_u.u_sfreelist
180 #define bt_tmplist bt_u.u_tmplist
181 vmem_addr_t bt_start;
182 vmem_size_t bt_size;
183 int bt_type;
184 };
185
186 #define BT_TYPE_SPAN 1
187 #define BT_TYPE_SPAN_STATIC 2
188 #define BT_TYPE_FREE 3
189 #define BT_TYPE_BUSY 4
190 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
191
192 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
193
194 typedef struct vmem_btag bt_t;
195
196 /* ---- misc */
197
198 #define VMEM_ALIGNUP(addr, align) \
199 (-(-(addr) & -(align)))
200 #define VMEM_CROSS_P(addr1, addr2, boundary) \
201 ((((addr1) ^ (addr2)) & -(boundary)) != 0)
202
203 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
204
205 static int
206 calc_order(vmem_size_t size)
207 {
208 vmem_size_t target;
209 int i;
210
211 KASSERT(size != 0);
212
213 i = 0;
214 target = size >> 1;
215 while (ORDER2SIZE(i) <= target) {
216 i++;
217 }
218
219 KASSERT(ORDER2SIZE(i) <= size);
220 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
221
222 return i;
223 }
224
225 static void *
226 xmalloc(size_t sz, vm_flag_t flags)
227 {
228
229 #if defined(_KERNEL)
230 return kmem_alloc(sz, (flags & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
231 #else /* defined(_KERNEL) */
232 return malloc(sz);
233 #endif /* defined(_KERNEL) */
234 }
235
236 static void
237 xfree(void *p, size_t sz)
238 {
239
240 #if defined(_KERNEL)
241 kmem_free(p, sz);
242 #else /* defined(_KERNEL) */
243 return free(p);
244 #endif /* defined(_KERNEL) */
245 }
246
247 /* ---- static storage for bootstrap */
248
249 #define STATIC_POOL_NAME(type) static_ ## type
250 #define STATIC_POOL_IDX(type) static_ ## type ## _idx
251 #define STATIC_POOL_DEFINE(type, n) \
252 type STATIC_POOL_NAME(type)[(n)] __unused ; \
253 int STATIC_POOL_IDX(type) __unused
254 #define STATIC_POOL_ALLOC(var, type) \
255 (var) = &STATIC_POOL_NAME(type)[STATIC_POOL_IDX(type)++]; \
256 KASSERT(STATIC_POOL_ELEM_P(type, var))
257 #define STATIC_POOL_FREE(type, var) \
258 KASSERT(STATIC_POOL_ELEM_P(type, var)); \
259 KASSERT((var) == &STATIC_POOL_NAME(type)[STATIC_POOL_IDX(type)-1]); \
260 STATIC_POOL_IDX(type)--
261 #define STATIC_POOL_ELEM_P(type, var) \
262 (&STATIC_POOL_NAME(type)[0] <= (var) && \
263 (var) < &STATIC_POOL_NAME(type)[__arraycount(STATIC_POOL_NAME(type))])
264
265 static STATIC_POOL_DEFINE(bt_t, 3);
266 static STATIC_POOL_DEFINE(vmem_t, 2);
267 static STATIC_POOL_DEFINE(vmem_hashlist_t, 2);
268 typedef struct pool_cache vmem_pool_cache_t; /* XXX */
269 static STATIC_POOL_DEFINE(vmem_pool_cache_t, VMEM_QCACHE_IDX_MAX+1);
270
271 /* ---- boundary tag */
272
273 #if defined(_KERNEL)
274 static struct pool_cache bt_cache;
275 #endif /* defined(_KERNEL) */
276
277 struct btpage_header {
278 LIST_ENTRY(btpage_header) bh_q;
279 int bh_nfree;
280 SLIST_HEAD(, vmem_btag) bh_freelist;
281 bt_t bh_bt[];
282 };
283 typedef struct btpage_header btpage_header_t;
284
285 #define BT_PER_PAGE \
286 ((PAGE_SIZE - sizeof(btpage_header_t)) / sizeof(bt_t))
287
288 static int
289 btpage_alloc(vmem_t *vm, vm_flag_t flags)
290 {
291 vmem_addr_t va;
292
293 va = vmem_xalloc(vm, PAGE_SIZE, PAGE_SIZE, 0, 0, 0, 0,
294 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_BTPAGE);
295 if (va == 0) {
296 return ENOMEM;
297 }
298 return 0;
299 }
300
301 static void
302 btpage_init(vmem_t *vm, struct vm_page *pg, vaddr_t va)
303 {
304 btpage_header_t *bh;
305 int i;
306
307 VMEM_ASSERT_LOCKED(vm);
308 KASSERT((va & PAGE_MASK) == 0);
309 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE);
310 pmap_update(pmap_kernel());
311 bh = (void *)va;
312 SLIST_INIT(&bh->bh_freelist);
313 for (i = 0; i < BT_PER_PAGE; i++) {
314 SLIST_INSERT_HEAD(&bh->bh_freelist, &bh->bh_bt[i],
315 bt_sfreelist);
316 }
317 LIST_INSERT_HEAD(&vm->vm_btpagelist, bh, bh_q);
318 bh->bh_nfree = BT_PER_PAGE;
319 vm->vm_freetags += bh->bh_nfree;
320 }
321
322 static void
323 btpage_free(vmem_t *vm, btpage_header_t *bh)
324 {
325
326 KASSERT(vmem_bootstrap_p(vm));
327 pmap_kremove((vaddr_t)bh, PAGE_SIZE);
328 pmap_update(pmap_kernel());
329 vmem_xfree(vm, (vmem_addr_t)bh, PAGE_SIZE);
330 }
331
332 static btpage_header_t *
333 btpage_lookup(bt_t *bt)
334 {
335
336 return (void *)trunc_page((vaddr_t)bt);
337 }
338
339 static bt_t *
340 bt_alloc_bootstrap(vmem_t *vm)
341 {
342 btpage_header_t *bh;
343 bt_t *bt;
344
345 KASSERT(vmem_bootstrap_p(vm));
346 VMEM_ASSERT_LOCKED(vm);
347 bh = LIST_FIRST(&vm->vm_btpagelist);
348 if (__predict_false(bh == NULL)) {
349 STATIC_POOL_ALLOC(bt, bt_t);
350 return bt;
351 }
352 KASSERT(bh->bh_nfree > 0);
353 bt = SLIST_FIRST(&bh->bh_freelist);
354 KASSERT(bt != NULL);
355 SLIST_REMOVE_HEAD(&bh->bh_freelist, bt_sfreelist);
356 bh->bh_nfree--;
357 vm->vm_freetags--;
358 if (SLIST_EMPTY(&bh->bh_freelist)) {
359 KASSERT(bh->bh_nfree == 0);
360 LIST_REMOVE(bh, bh_q);
361 }
362 return bt;
363 }
364
365 #define BT_MINRESERVE 1
366
367 static bt_t *
368 bt_alloc(vmem_t *vm, vm_flag_t flags)
369 {
370 bt_t *bt;
371
372 #if defined(_KERNEL)
373 if (vmem_bootstrap_p(vm)) {
374 again:
375 VMEM_LOCK(vm);
376 if (vm->vm_freetags <= BT_MINRESERVE &&
377 (flags & VM_BTPAGE) == 0) {
378 VMEM_UNLOCK(vm);
379 if (btpage_alloc(vm, flags)) {
380 return NULL;
381 }
382 goto again;
383 }
384 bt = bt_alloc_bootstrap(vm);
385 VMEM_UNLOCK(vm);
386 } else {
387 bt = pool_cache_get(&bt_cache,
388 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
389 }
390 #else /* defined(_KERNEL) */
391 bt = malloc(sizeof *bt);
392 #endif /* defined(_KERNEL) */
393
394 return bt;
395 }
396
397 static void
398 bt_free(vmem_t *vm, bt_t *bt)
399 {
400
401 KASSERT(bt != NULL);
402 KASSERT(!STATIC_POOL_ELEM_P(bt_t, bt));
403 #if defined(_KERNEL)
404 if (vmem_bootstrap_p(vm)) {
405 btpage_header_t *bh;
406
407 bh = btpage_lookup(bt);
408 VMEM_LOCK(vm);
409 if (SLIST_EMPTY(&bh->bh_freelist)) {
410 KASSERT(bh->bh_nfree == 0);
411 LIST_INSERT_HEAD(&vm->vm_btpagelist, bh, bh_q);
412 }
413 SLIST_INSERT_HEAD(&bh->bh_freelist, bt, bt_sfreelist);
414 bh->bh_nfree++;
415 vm->vm_freetags++;
416 if (vm->vm_freetags >= BT_PER_PAGE + BT_MINRESERVE &&
417 bh->bh_nfree == BT_PER_PAGE) {
418 LIST_REMOVE(bh, bh_q);
419 vm->vm_freetags -= BT_PER_PAGE;
420 VMEM_UNLOCK(vm);
421 btpage_free(vm, bh);
422 } else {
423 VMEM_UNLOCK(vm);
424 }
425 } else {
426 pool_cache_put(&bt_cache, bt);
427 }
428 #else /* defined(_KERNEL) */
429 free(bt);
430 #endif /* defined(_KERNEL) */
431 }
432
433 /*
434 * freelist[0] ... [1, 1]
435 * freelist[1] ... [2, 3]
436 * freelist[2] ... [4, 7]
437 * freelist[3] ... [8, 15]
438 * :
439 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
440 * :
441 */
442
443 static struct vmem_freelist *
444 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
445 {
446 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
447 int idx;
448
449 KASSERT((size & vm->vm_quantum_mask) == 0);
450 KASSERT(size != 0);
451
452 idx = calc_order(qsize);
453 KASSERT(idx >= 0);
454 KASSERT(idx < VMEM_MAXORDER);
455
456 return &vm->vm_freelist[idx];
457 }
458
459 static struct vmem_freelist *
460 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
461 {
462 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
463 int idx;
464
465 KASSERT((size & vm->vm_quantum_mask) == 0);
466 KASSERT(size != 0);
467
468 idx = calc_order(qsize);
469 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
470 idx++;
471 /* check too large request? */
472 }
473 KASSERT(idx >= 0);
474 KASSERT(idx < VMEM_MAXORDER);
475
476 return &vm->vm_freelist[idx];
477 }
478
479 /* ---- boundary tag hash */
480
481 static vmem_hashlist_t *
482 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
483 {
484 vmem_hashlist_t *list;
485 unsigned int hash;
486
487 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
488 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
489
490 return list;
491 }
492
493 static bt_t *
494 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
495 {
496 vmem_hashlist_t *list;
497 bt_t *bt;
498
499 list = bt_hashhead(vm, addr);
500 LIST_FOREACH(bt, list, bt_hashlist) {
501 if (bt->bt_start == addr) {
502 break;
503 }
504 }
505
506 return bt;
507 }
508
509 static void
510 bt_rembusy(vmem_t *vm, bt_t *bt)
511 {
512
513 KASSERT(vm->vm_nbusytag > 0);
514 vm->vm_nbusytag--;
515 LIST_REMOVE(bt, bt_hashlist);
516 }
517
518 static void
519 bt_insbusy(vmem_t *vm, bt_t *bt)
520 {
521 vmem_hashlist_t *list;
522
523 KASSERT(bt->bt_type == BT_TYPE_BUSY);
524
525 list = bt_hashhead(vm, bt->bt_start);
526 LIST_INSERT_HEAD(list, bt, bt_hashlist);
527 vm->vm_nbusytag++;
528 }
529
530 /* ---- boundary tag list */
531
532 static void
533 bt_remseg(vmem_t *vm, bt_t *bt)
534 {
535
536 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
537 }
538
539 static void
540 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
541 {
542
543 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
544 }
545
546 static void
547 bt_insseg_tail(vmem_t *vm, bt_t *bt)
548 {
549
550 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
551 }
552
553 static void
554 bt_remfree(vmem_t *vm, bt_t *bt)
555 {
556
557 KASSERT(bt->bt_type == BT_TYPE_FREE);
558
559 LIST_REMOVE(bt, bt_freelist);
560 }
561
562 static void
563 bt_insfree(vmem_t *vm, bt_t *bt)
564 {
565 struct vmem_freelist *list;
566
567 list = bt_freehead_tofree(vm, bt->bt_size);
568 LIST_INSERT_HEAD(list, bt, bt_freelist);
569 }
570
571 /* ---- vmem internal functions */
572
573 #if defined(_KERNEL)
574 static kmutex_t vmem_list_lock;
575 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
576 #endif /* defined(_KERNEL) */
577
578 #if defined(QCACHE)
579 static inline vm_flag_t
580 prf_to_vmf(int prflags)
581 {
582 vm_flag_t vmflags;
583
584 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
585 if ((prflags & PR_WAITOK) != 0) {
586 vmflags = VM_SLEEP;
587 } else {
588 vmflags = VM_NOSLEEP;
589 }
590 return vmflags;
591 }
592
593 static inline int
594 vmf_to_prf(vm_flag_t vmflags)
595 {
596 int prflags;
597
598 if ((vmflags & VM_SLEEP) != 0) {
599 prflags = PR_WAITOK;
600 } else {
601 prflags = PR_NOWAIT;
602 }
603 return prflags;
604 }
605
606 static size_t
607 qc_poolpage_size(size_t qcache_max)
608 {
609 int i;
610
611 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
612 /* nothing */
613 }
614 return ORDER2SIZE(i);
615 }
616
617 static void *
618 qc_poolpage_alloc(struct pool *pool, int prflags)
619 {
620 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
621 vmem_t *vm = qc->qc_vmem;
622
623 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
624 prf_to_vmf(prflags) | VM_INSTANTFIT);
625 }
626
627 static void
628 qc_poolpage_free(struct pool *pool, void *addr)
629 {
630 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
631 vmem_t *vm = qc->qc_vmem;
632
633 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
634 }
635
636 static void
637 qc_init(vmem_t *vm, size_t qcache_max, int ipl)
638 {
639 qcache_t *prevqc;
640 struct pool_allocator *pa;
641 int qcache_idx_max;
642 int i;
643
644 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
645 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
646 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
647 }
648 vm->vm_qcache_max = qcache_max;
649 pa = &vm->vm_qcache_allocator;
650 memset(pa, 0, sizeof(*pa));
651 pa->pa_alloc = qc_poolpage_alloc;
652 pa->pa_free = qc_poolpage_free;
653 pa->pa_pagesz = qc_poolpage_size(qcache_max);
654
655 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
656 prevqc = NULL;
657 for (i = qcache_idx_max; i > 0; i--) {
658 qcache_t *qc = &vm->vm_qcache_store[i - 1];
659 size_t size = i << vm->vm_quantum_shift;
660 pool_cache_t pc;
661
662 qc->qc_vmem = vm;
663 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
664 vm->vm_name, size);
665 if (!kmem_running_p()) {
666 STATIC_POOL_ALLOC(pc, vmem_pool_cache_t);
667 pool_cache_bootstrap(pc, size,
668 ORDER2SIZE(vm->vm_quantum_shift), 0,
669 PR_NOALIGN | PR_NOTOUCH /* XXX */,
670 qc->qc_name, pa, ipl, NULL, NULL, NULL);
671 } else {
672 pc = pool_cache_init(size,
673 ORDER2SIZE(vm->vm_quantum_shift), 0,
674 PR_NOALIGN | PR_NOTOUCH /* XXX */,
675 qc->qc_name, pa, ipl, NULL, NULL, NULL);
676 }
677 qc->qc_cache = pc;
678 if (prevqc != NULL &&
679 qc->qc_cache->pc_pool.pr_itemsperpage ==
680 prevqc->qc_cache->pc_pool.pr_itemsperpage) {
681 if (!kmem_running_p()) {
682 pool_cache_bootstrap_destroy(pc);
683 STATIC_POOL_FREE(vmem_pool_cache_t, pc);
684 } else {
685 pool_cache_destroy(pc);
686 }
687 vm->vm_qcache[i - 1] = prevqc;
688 continue;
689 }
690 qc->qc_cache->pc_pool.pr_qcache = qc;
691 vm->vm_qcache[i - 1] = qc;
692 prevqc = qc;
693 }
694 }
695
696 static void
697 qc_destroy(vmem_t *vm)
698 {
699 const qcache_t *prevqc;
700 int i;
701 int qcache_idx_max;
702
703 KASSERT(!vmem_bootstrap_p(vm));
704 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
705 prevqc = NULL;
706 for (i = 0; i < qcache_idx_max; i++) {
707 qcache_t *qc = vm->vm_qcache[i];
708
709 if (prevqc == qc) {
710 continue;
711 }
712 pool_cache_destroy(qc->qc_cache);
713 prevqc = qc;
714 }
715 }
716
717 static bool
718 qc_reap(vmem_t *vm)
719 {
720 const qcache_t *prevqc;
721 int i;
722 int qcache_idx_max;
723 bool didsomething = false;
724
725 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
726 prevqc = NULL;
727 for (i = 0; i < qcache_idx_max; i++) {
728 qcache_t *qc = vm->vm_qcache[i];
729
730 if (prevqc == qc) {
731 continue;
732 }
733 if (pool_cache_reclaim(qc->qc_cache) != 0) {
734 didsomething = true;
735 }
736 prevqc = qc;
737 }
738
739 return didsomething;
740 }
741 #endif /* defined(QCACHE) */
742
743 #if defined(_KERNEL)
744 static int
745 vmem_init(void)
746 {
747
748 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_NONE);
749 pool_cache_bootstrap(&bt_cache, sizeof(bt_t), 0, 0, 0, "vmembt",
750 NULL, IPL_VM, NULL, NULL, NULL);
751 return 0;
752 }
753 #endif /* defined(_KERNEL) */
754
755 static vmem_addr_t
756 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
757 int spanbttype)
758 {
759 bt_t *btspan;
760 bt_t *btfree;
761
762 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
763 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
764
765 if ((flags & VMC_KVA) != 0) {
766 KASSERT(vmem_bootstrap_p(vm));
767 KASSERT(CIRCLEQ_EMPTY(&vm->vm_seglist));
768 STATIC_POOL_ALLOC(btspan, bt_t);
769 STATIC_POOL_ALLOC(btfree, bt_t);
770 } else {
771 btspan = bt_alloc(vm, flags);
772 if (btspan == NULL) {
773 return VMEM_ADDR_NULL;
774 }
775 btfree = bt_alloc(vm, flags);
776 if (btfree == NULL) {
777 bt_free(vm, btspan);
778 return VMEM_ADDR_NULL;
779 }
780 }
781
782 btspan->bt_type = spanbttype;
783 btspan->bt_start = addr;
784 btspan->bt_size = size;
785
786 btfree->bt_type = BT_TYPE_FREE;
787 btfree->bt_start = addr;
788 btfree->bt_size = size;
789
790 VMEM_LOCK(vm);
791 bt_insseg_tail(vm, btspan);
792 bt_insseg(vm, btfree, btspan);
793 bt_insfree(vm, btfree);
794 VMEM_UNLOCK(vm);
795
796 if ((flags & VMC_KVA) != 0) {
797 bt_t *bt;
798
799 /*
800 * leak a bt.
801 * this ensure that
802 */
803
804 bt = bt_alloc(vm, VM_NOSLEEP);
805 KASSERT(bt != NULL);
806
807 /*
808 * don't leave "btfree" on the segment list because
809 * bt_free() doesn't expect static tags.
810 */
811
812 bt = bt_alloc(vm, flags);
813 VMEM_LOCK(vm);
814 KASSERT(vm->vm_nbusytag == 1);
815 bt->bt_start = btfree->bt_start;
816 bt->bt_size = btfree->bt_size;
817 bt->bt_type = btfree->bt_type;
818 bt_insfree(vm, bt);
819 bt_insseg(vm, bt, btfree);
820 bt_remseg(vm, btfree);
821 bt_remfree(vm, btfree);
822 VMEM_UNLOCK(vm);
823 }
824
825 return addr;
826 }
827
828 static void
829 vmem_destroy1(vmem_t *vm)
830 {
831
832 KASSERT(!vmem_bootstrap_p(vm));
833
834 #if defined(QCACHE)
835 qc_destroy(vm);
836 #endif /* defined(QCACHE) */
837 if (vm->vm_hashlist != NULL) {
838 int i;
839
840 for (i = 0; i < vm->vm_hashsize; i++) {
841 bt_t *bt;
842
843 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
844 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
845 bt_free(vm, bt);
846 }
847 }
848 xfree(vm->vm_hashlist,
849 sizeof(vmem_hashlist_t *) * vm->vm_hashsize);
850 }
851 VMEM_LOCK_DESTROY(vm);
852 xfree(vm, sizeof(*vm));
853 }
854
855 static int
856 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
857 {
858 vmem_addr_t addr;
859
860 if (vm->vm_allocfn == NULL) {
861 return EINVAL;
862 }
863
864 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
865 if (addr == VMEM_ADDR_NULL) {
866 return ENOMEM;
867 }
868
869 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
870 (*vm->vm_freefn)(vm->vm_source, addr, size);
871 return ENOMEM;
872 }
873
874 return 0;
875 }
876
877 static int
878 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
879 {
880 bt_t *bt;
881 int i;
882 vmem_hashlist_t *newhashlist;
883 vmem_hashlist_t *oldhashlist;
884 size_t oldhashsize;
885
886 KASSERT(newhashsize > 0);
887
888 newhashlist = xmalloc(sizeof(vmem_hashlist_t *) * newhashsize, flags);
889 if (newhashlist == NULL) {
890 return ENOMEM;
891 }
892 for (i = 0; i < newhashsize; i++) {
893 LIST_INIT(&newhashlist[i]);
894 }
895
896 if (!VMEM_TRYLOCK(vm)) {
897 xfree(newhashlist, sizeof(vmem_hashlist_t *) * newhashsize);
898 return EBUSY;
899 }
900 oldhashlist = vm->vm_hashlist;
901 oldhashsize = vm->vm_hashsize;
902 vm->vm_hashlist = newhashlist;
903 vm->vm_hashsize = newhashsize;
904 if (oldhashlist == NULL) {
905 VMEM_UNLOCK(vm);
906 return 0;
907 }
908 for (i = 0; i < oldhashsize; i++) {
909 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
910 bt_rembusy(vm, bt); /* XXX */
911 bt_insbusy(vm, bt);
912 }
913 }
914 VMEM_UNLOCK(vm);
915
916 if (!STATIC_POOL_ELEM_P(vmem_hashlist_t, oldhashlist)) {
917 xfree(oldhashlist, sizeof(vmem_hashlist_t *) * oldhashsize);
918 }
919
920 return 0;
921 }
922
923 /*
924 * vmem_fit: check if a bt can satisfy the given restrictions.
925 */
926
927 static vmem_addr_t
928 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase,
929 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
930 {
931 vmem_addr_t start;
932 vmem_addr_t end;
933
934 KASSERT(bt->bt_size >= size);
935
936 /*
937 * XXX assumption: vmem_addr_t and vmem_size_t are
938 * unsigned integer of the same size.
939 */
940
941 start = bt->bt_start;
942 if (start < minaddr) {
943 start = minaddr;
944 }
945 end = BT_END(bt);
946 if (end > maxaddr - 1) {
947 end = maxaddr - 1;
948 }
949 if (start >= end) {
950 return VMEM_ADDR_NULL;
951 }
952
953 start = VMEM_ALIGNUP(start - phase, align) + phase;
954 if (start < bt->bt_start) {
955 start += align;
956 }
957 if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
958 KASSERT(align < nocross);
959 start = VMEM_ALIGNUP(start - phase, nocross) + phase;
960 }
961 if (start < end && end - start >= size) {
962 KASSERT((start & (align - 1)) == phase);
963 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross));
964 KASSERT(minaddr <= start);
965 KASSERT(maxaddr == 0 || start + size <= maxaddr);
966 KASSERT(bt->bt_start <= start);
967 KASSERT(start + size <= BT_END(bt));
968 return start;
969 }
970 return VMEM_ADDR_NULL;
971 }
972
973 /* ---- vmem API */
974
975 /*
976 * vmem_create: create an arena.
977 *
978 * => must not be called from interrupt context.
979 */
980
981 vmem_t *
982 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
983 vmem_size_t quantum,
984 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
985 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
986 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags,
987 int ipl)
988 {
989 vmem_t *vm;
990 int i;
991 #if defined(_KERNEL)
992 static ONCE_DECL(control);
993 #endif /* defined(_KERNEL) */
994
995 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
996 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
997
998 #if defined(_KERNEL)
999 if (RUN_ONCE(&control, vmem_init)) {
1000 return NULL;
1001 }
1002 #endif /* defined(_KERNEL) */
1003 if ((flags & (VMC_KVA|VMC_KMEM)) != 0) {
1004 STATIC_POOL_ALLOC(vm, vmem_t);
1005 } else {
1006 vm = xmalloc(sizeof(*vm), flags);
1007 if (vm == NULL) {
1008 return NULL;
1009 }
1010 }
1011
1012 VMEM_LOCK_INIT(vm, ipl);
1013 vm->vm_name = name;
1014 vm->vm_flags = flags;
1015 vm->vm_freetags = 0;
1016 vm->vm_quantum_mask = quantum - 1;
1017 vm->vm_quantum_shift = calc_order(quantum);
1018 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
1019 vm->vm_allocfn = allocfn;
1020 vm->vm_freefn = freefn;
1021 vm->vm_source = source;
1022 vm->vm_nbusytag = 0;
1023 #if defined(QCACHE)
1024 qc_init(vm, qcache_max, ipl);
1025 #endif /* defined(QCACHE) */
1026
1027 CIRCLEQ_INIT(&vm->vm_seglist);
1028 for (i = 0; i < VMEM_MAXORDER; i++) {
1029 LIST_INIT(&vm->vm_freelist[i]);
1030 }
1031 vm->vm_hashlist = NULL;
1032 if ((flags & (VMC_KVA|VMC_KMEM)) != 0) {
1033 STATIC_POOL_ALLOC(vm->vm_hashlist, vmem_hashlist_t);
1034 LIST_INIT(&vm->vm_hashlist[0]);
1035 vm->vm_hashsize = 1;
1036 } else if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
1037 vmem_destroy1(vm);
1038 return NULL;
1039 }
1040
1041 if (size != 0) {
1042 if (vmem_add(vm, base, size, flags) == 0) {
1043 vmem_destroy1(vm);
1044 return NULL;
1045 }
1046 }
1047
1048 #if defined(_KERNEL)
1049 mutex_enter(&vmem_list_lock);
1050 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist);
1051 mutex_exit(&vmem_list_lock);
1052 #endif /* defined(_KERNEL) */
1053
1054 #if 0
1055 if (vmem_bootstrap_p(vm)) {
1056 vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags);
1057 }
1058 #endif
1059
1060 return vm;
1061 }
1062
1063 void
1064 vmem_destroy(vmem_t *vm)
1065 {
1066
1067 #if defined(_KERNEL)
1068 mutex_enter(&vmem_list_lock);
1069 LIST_REMOVE(vm, vm_alllist);
1070 mutex_exit(&vmem_list_lock);
1071 #endif /* defined(_KERNEL) */
1072
1073 vmem_destroy1(vm);
1074 }
1075
1076 vmem_size_t
1077 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
1078 {
1079
1080 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
1081 }
1082
1083 /*
1084 * vmem_alloc:
1085 *
1086 * => caller must ensure appropriate spl,
1087 * if the arena can be accessed from interrupt context.
1088 */
1089
1090 vmem_addr_t
1091 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
1092 {
1093 const vmem_size_t size __unused = vmem_roundup_size(vm, size0);
1094 const vm_flag_t strat __unused = flags & VM_FITMASK;
1095
1096 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1097 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1098
1099 KASSERT(size0 > 0);
1100 KASSERT(size > 0);
1101 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
1102 if ((flags & VM_SLEEP) != 0) {
1103 ASSERT_SLEEPABLE(NULL, __func__);
1104 }
1105
1106 #if defined(QCACHE)
1107 if (size <= vm->vm_qcache_max) {
1108 int qidx = size >> vm->vm_quantum_shift;
1109 qcache_t *qc = vm->vm_qcache[qidx - 1];
1110
1111 return (vmem_addr_t)pool_cache_get(qc->qc_cache,
1112 vmf_to_prf(flags));
1113 }
1114 #endif /* defined(QCACHE) */
1115
1116 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags);
1117 }
1118
1119 vmem_addr_t
1120 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase,
1121 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
1122 vm_flag_t flags)
1123 {
1124 struct vmem_freelist *list;
1125 struct vmem_freelist *first;
1126 struct vmem_freelist *end;
1127 bt_t *bt;
1128 bt_t *btnew;
1129 bt_t *btnew2;
1130 const vmem_size_t size = vmem_roundup_size(vm, size0);
1131 vm_flag_t strat = flags & VM_FITMASK;
1132 vmem_addr_t start;
1133 struct vm_page *pg;
1134
1135 KASSERT(size0 > 0);
1136 KASSERT(size > 0);
1137 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
1138 if ((flags & VM_SLEEP) != 0) {
1139 ASSERT_SLEEPABLE(NULL, __func__);
1140 }
1141 KASSERT((align & vm->vm_quantum_mask) == 0);
1142 KASSERT((align & (align - 1)) == 0);
1143 KASSERT((phase & vm->vm_quantum_mask) == 0);
1144 KASSERT((nocross & vm->vm_quantum_mask) == 0);
1145 KASSERT((nocross & (nocross - 1)) == 0);
1146 KASSERT((align == 0 && phase == 0) || phase < align);
1147 KASSERT(nocross == 0 || nocross >= size);
1148 KASSERT(maxaddr == 0 || minaddr < maxaddr);
1149 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
1150
1151 if (align == 0) {
1152 align = vm->vm_quantum_mask + 1;
1153 }
1154 pg = NULL;
1155 if ((flags & VM_BTPAGE) != 0) {
1156 KASSERT(size == PAGE_SIZE);
1157 KASSERT(align == PAGE_SIZE);
1158 while (pg == NULL) {
1159 pg = uvm_pagealloc(NULL, 0, NULL, 0);
1160 if (pg == NULL) {
1161 if ((flags & VM_NOSLEEP) != 0) {
1162 return ENOMEM;
1163 }
1164 uvm_wait("btpage");
1165 }
1166 }
1167 btnew = NULL; /* XXX: gcc */
1168 btnew2 = NULL;
1169 } else {
1170 btnew = bt_alloc(vm, flags);
1171 if (btnew == NULL) {
1172 return VMEM_ADDR_NULL;
1173 }
1174 /* XXX not necessary if no restrictions */
1175 btnew2 = bt_alloc(vm, flags);
1176 if (btnew2 == NULL) {
1177 bt_free(vm, btnew);
1178 return VMEM_ADDR_NULL;
1179 }
1180 }
1181
1182 retry_strat:
1183 first = bt_freehead_toalloc(vm, size, strat);
1184 end = &vm->vm_freelist[VMEM_MAXORDER];
1185 retry:
1186 bt = NULL;
1187 VMEM_LOCK(vm);
1188 if (strat == VM_INSTANTFIT) {
1189 for (list = first; list < end; list++) {
1190 bt = LIST_FIRST(list);
1191 if (bt != NULL) {
1192 start = vmem_fit(bt, size, align, phase,
1193 nocross, minaddr, maxaddr);
1194 if (start != VMEM_ADDR_NULL) {
1195 goto gotit;
1196 }
1197 }
1198 }
1199 } else { /* VM_BESTFIT */
1200 for (list = first; list < end; list++) {
1201 LIST_FOREACH(bt, list, bt_freelist) {
1202 if (bt->bt_size >= size) {
1203 start = vmem_fit(bt, size, align, phase,
1204 nocross, minaddr, maxaddr);
1205 if (start != VMEM_ADDR_NULL) {
1206 goto gotit;
1207 }
1208 }
1209 }
1210 }
1211 }
1212 VMEM_UNLOCK(vm);
1213 #if 1
1214 if (strat == VM_INSTANTFIT) {
1215 strat = VM_BESTFIT;
1216 goto retry_strat;
1217 }
1218 #endif
1219 if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
1220 nocross != 0 || minaddr != 0 || maxaddr != 0) {
1221
1222 /*
1223 * XXX should try to import a region large enough to
1224 * satisfy restrictions?
1225 */
1226
1227 goto fail;
1228 }
1229 if (vmem_import(vm, size, flags) == 0) {
1230 goto retry;
1231 }
1232 /* XXX */
1233 fail:
1234 if ((flags & VM_BTPAGE) != 0) {
1235 uvm_pagefree(pg);
1236 } else {
1237 bt_free(vm, btnew);
1238 bt_free(vm, btnew2);
1239 }
1240 return VMEM_ADDR_NULL;
1241
1242 gotit:
1243 #if defined(PMAP_GROWKERNEL)
1244 if ((vm->vm_flags & VMC_KVA) != 0) {
1245 uvm_growkernel(start + size);
1246 }
1247 #endif /* defined(PMAP_GROWKERNEL) */
1248 if ((flags & VM_BTPAGE) != 0) {
1249 vaddr_t va = (vaddr_t)start;
1250
1251 KASSERT(bt->bt_start == start);
1252 btnew = bt_alloc_bootstrap(vm);
1253 btpage_init(vm, pg, va);
1254 }
1255 KASSERT(bt->bt_type == BT_TYPE_FREE);
1256 KASSERT(bt->bt_size >= size);
1257 bt_remfree(vm, bt);
1258 if (bt->bt_start != start) {
1259 btnew2->bt_type = BT_TYPE_FREE;
1260 btnew2->bt_start = bt->bt_start;
1261 btnew2->bt_size = start - bt->bt_start;
1262 bt->bt_start = start;
1263 bt->bt_size -= btnew2->bt_size;
1264 bt_insfree(vm, btnew2);
1265 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
1266 btnew2 = NULL;
1267 }
1268 KASSERT(bt->bt_start == start);
1269 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
1270 /* split */
1271 btnew->bt_type = BT_TYPE_BUSY;
1272 btnew->bt_start = bt->bt_start;
1273 btnew->bt_size = size;
1274 bt->bt_start = bt->bt_start + size;
1275 bt->bt_size -= size;
1276 bt_insfree(vm, bt);
1277 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
1278 bt_insbusy(vm, btnew);
1279 VMEM_UNLOCK(vm);
1280 } else {
1281 bt->bt_type = BT_TYPE_BUSY;
1282 bt_insbusy(vm, bt);
1283 VMEM_UNLOCK(vm);
1284 bt_free(vm, btnew);
1285 btnew = bt;
1286 }
1287 if (btnew2 != NULL) {
1288 bt_free(vm, btnew2);
1289 }
1290 KASSERT(btnew->bt_size >= size);
1291 btnew->bt_type = BT_TYPE_BUSY;
1292
1293 return btnew->bt_start;
1294 }
1295
1296 /*
1297 * vmem_free:
1298 *
1299 * => caller must ensure appropriate spl,
1300 * if the arena can be accessed from interrupt context.
1301 */
1302
1303 void
1304 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1305 {
1306
1307 KASSERT(addr != VMEM_ADDR_NULL);
1308 KASSERT(size > 0);
1309
1310 #if defined(QCACHE)
1311 if (size <= vm->vm_qcache_max) {
1312 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
1313 qcache_t *qc = vm->vm_qcache[qidx - 1];
1314
1315 return pool_cache_put(qc->qc_cache, (void *)addr);
1316 }
1317 #endif /* defined(QCACHE) */
1318
1319 vmem_xfree(vm, addr, size);
1320 }
1321
1322 void
1323 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1324 {
1325 bt_t *bt;
1326 bt_t *t;
1327 SLIST_HEAD(, vmem_btag) tofree;
1328
1329 KASSERT(addr != VMEM_ADDR_NULL);
1330 KASSERT(size > 0);
1331
1332 SLIST_INIT(&tofree);
1333
1334 VMEM_LOCK(vm);
1335
1336 bt = bt_lookupbusy(vm, addr);
1337 KASSERT(bt != NULL);
1338 KASSERT(bt->bt_start == addr);
1339 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
1340 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
1341 KASSERT(bt->bt_type == BT_TYPE_BUSY);
1342 bt_rembusy(vm, bt);
1343 bt->bt_type = BT_TYPE_FREE;
1344
1345 /* coalesce */
1346 t = CIRCLEQ_NEXT(bt, bt_seglist);
1347 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1348 KASSERT(BT_END(bt) == t->bt_start);
1349 bt_remfree(vm, t);
1350 bt_remseg(vm, t);
1351 bt->bt_size += t->bt_size;
1352 SLIST_INSERT_HEAD(&tofree, t, bt_tmplist);
1353 }
1354 t = CIRCLEQ_PREV(bt, bt_seglist);
1355 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1356 KASSERT(BT_END(t) == bt->bt_start);
1357 bt_remfree(vm, t);
1358 bt_remseg(vm, t);
1359 bt->bt_size += t->bt_size;
1360 bt->bt_start = t->bt_start;
1361 SLIST_INSERT_HEAD(&tofree, t, bt_tmplist);
1362 }
1363
1364 t = CIRCLEQ_PREV(bt, bt_seglist);
1365 KASSERT(t != NULL);
1366 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1367 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1368 t->bt_size == bt->bt_size) {
1369 vmem_addr_t spanaddr;
1370 vmem_size_t spansize;
1371
1372 KASSERT(t->bt_start == bt->bt_start);
1373 spanaddr = bt->bt_start;
1374 spansize = bt->bt_size;
1375 bt_remseg(vm, bt);
1376 SLIST_INSERT_HEAD(&tofree, bt, bt_tmplist);
1377 bt_remseg(vm, t);
1378 SLIST_INSERT_HEAD(&tofree, t, bt_tmplist);
1379 VMEM_UNLOCK(vm);
1380 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
1381 } else {
1382 bt_insfree(vm, bt);
1383 VMEM_UNLOCK(vm);
1384 }
1385 while ((t = SLIST_FIRST(&tofree)) != NULL) {
1386 SLIST_REMOVE_HEAD(&tofree, bt_tmplist);
1387 bt_free(vm, t);
1388 }
1389 }
1390
1391 /*
1392 * vmem_add:
1393 *
1394 * => caller must ensure appropriate spl,
1395 * if the arena can be accessed from interrupt context.
1396 */
1397
1398 vmem_addr_t
1399 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1400 {
1401
1402 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1403 }
1404
1405 /*
1406 * vmem_reap: reap unused resources.
1407 *
1408 * => return true if we successfully reaped something.
1409 */
1410
1411 bool
1412 vmem_reap(vmem_t *vm)
1413 {
1414 bool didsomething = false;
1415
1416 #if defined(QCACHE)
1417 didsomething = qc_reap(vm);
1418 #endif /* defined(QCACHE) */
1419 return didsomething;
1420 }
1421
1422 /* ---- rehash */
1423
1424 #if defined(_KERNEL)
1425 static struct callout vmem_rehash_ch;
1426 static int vmem_rehash_interval;
1427 static struct workqueue *vmem_rehash_wq;
1428 static struct work vmem_rehash_wk;
1429
1430 static void
1431 vmem_rehash_all(struct work *wk, void *dummy)
1432 {
1433 vmem_t *vm;
1434
1435 KASSERT(wk == &vmem_rehash_wk);
1436 mutex_enter(&vmem_list_lock);
1437 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1438 size_t desired;
1439 size_t current;
1440
1441 if (!VMEM_TRYLOCK(vm)) {
1442 continue;
1443 }
1444 desired = vm->vm_nbusytag;
1445 current = vm->vm_hashsize;
1446 VMEM_UNLOCK(vm);
1447
1448 if (desired > VMEM_HASHSIZE_MAX) {
1449 desired = VMEM_HASHSIZE_MAX;
1450 } else if (desired < VMEM_HASHSIZE_MIN) {
1451 desired = VMEM_HASHSIZE_MIN;
1452 }
1453 if (desired > current * 2 || desired * 2 < current) {
1454 vmem_rehash(vm, desired, VM_NOSLEEP);
1455 }
1456 }
1457 mutex_exit(&vmem_list_lock);
1458
1459 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval);
1460 }
1461
1462 static void
1463 vmem_rehash_all_kick(void *dummy)
1464 {
1465
1466 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL);
1467 }
1468
1469 void
1470 vmem_rehash_start(void)
1471 {
1472 int error;
1473
1474 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash",
1475 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, 0);
1476 if (error) {
1477 panic("%s: workqueue_create %d\n", __func__, error);
1478 }
1479 callout_init(&vmem_rehash_ch, 0);
1480 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL);
1481
1482 vmem_rehash_interval = hz * 10;
1483 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval);
1484 }
1485 #endif /* defined(_KERNEL) */
1486
1487 /* ---- debug */
1488
1489 #if defined(VMEM_DEBUG)
1490
1491 #if !defined(_KERNEL)
1492 #include <stdio.h>
1493 #endif /* !defined(_KERNEL) */
1494
1495 void bt_dump(const bt_t *);
1496
1497 void
1498 bt_dump(const bt_t *bt)
1499 {
1500
1501 printf("\t%p: %" PRIu64 "(0x%" PRIx64 "), %" PRIu64 "(0x%" PRIx64
1502 "), %d\n",
1503 bt,
1504 (uint64_t)bt->bt_start, (uint64_t)bt->bt_start,
1505 (uint64_t)bt->bt_size, (uint64_t)bt->bt_size,
1506 bt->bt_type);
1507 }
1508
1509 void
1510 vmem_dump_seglist(const vmem_t *vm)
1511 {
1512 const bt_t *bt;
1513
1514 printf("vmem %p '%s' SEGLIST\n", vm, vm->vm_name);
1515
1516 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1517 bt_dump(bt);
1518 }
1519 }
1520
1521 void
1522 vmem_dump_freelist(const vmem_t *vm)
1523 {
1524 const bt_t *bt;
1525 int i;
1526
1527 printf("vmem %p '%s' FREELIST\n", vm, vm->vm_name);
1528
1529 for (i = 0; i < VMEM_MAXORDER; i++) {
1530 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1531
1532 if (LIST_EMPTY(fl)) {
1533 continue;
1534 }
1535
1536 printf("freelist[%d]\n", i);
1537 LIST_FOREACH(bt, fl, bt_freelist) {
1538 bt_dump(bt);
1539 }
1540 }
1541 }
1542
1543 #if defined(QCACHE)
1544 void
1545 vmem_dump_qc(const vmem_t *vm)
1546 {
1547 int qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
1548 int i;
1549 const qcache_t *prevqc;
1550
1551 printf("qcache_max=%zu\n", vm->vm_qcache_max);
1552
1553 prevqc = NULL;
1554 for (i = 0; i < qcache_idx_max; i++) {
1555 const qcache_t *qc;
1556
1557 qc = vm->vm_qcache[i];
1558 if (prevqc != qc) {
1559 printf("CACHE[%d] (%zu-) %p\n",
1560 i, (size_t)i << vm->vm_quantum_shift, qc->qc_cache);
1561 }
1562 prevqc = qc;
1563 }
1564 }
1565 #endif /* defined(QCACHE) */
1566
1567 void
1568 vmem_dump(const vmem_t *vm)
1569 {
1570
1571 printf("vmem %p '%s'\n", vm, vm->vm_name);
1572 vmem_dump_seglist(vm);
1573 vmem_dump_freelist(vm);
1574 #if defined(QCACHE)
1575 vmem_dump_qc(vm);
1576 #endif /* defined(QCACHE) */
1577 }
1578
1579 #if !defined(_KERNEL)
1580
1581 int
1582 main()
1583 {
1584 vmem_t *vm;
1585 vmem_addr_t p;
1586 struct reg {
1587 vmem_addr_t p;
1588 vmem_size_t sz;
1589 bool x;
1590 } *reg = NULL;
1591 int nreg = 0;
1592 int nalloc = 0;
1593 int nfree = 0;
1594 vmem_size_t total = 0;
1595 #if 1
1596 vm_flag_t strat = VM_INSTANTFIT;
1597 #else
1598 vm_flag_t strat = VM_BESTFIT;
1599 #endif
1600
1601 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1602 NULL, NULL, NULL, 0, VM_SLEEP);
1603 if (vm == NULL) {
1604 printf("vmem_create\n");
1605 exit(EXIT_FAILURE);
1606 }
1607 vmem_dump(vm);
1608
1609 p = vmem_add(vm, 100, 200, VM_SLEEP);
1610 p = vmem_add(vm, 2000, 1, VM_SLEEP);
1611 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1612 p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1613 p = vmem_add(vm, 500, 1000, VM_SLEEP);
1614 vmem_dump(vm);
1615 for (;;) {
1616 struct reg *r;
1617 int t = rand() % 100;
1618
1619 if (t > 45) {
1620 /* alloc */
1621 vmem_size_t sz = rand() % 500 + 1;
1622 bool x;
1623 vmem_size_t align, phase, nocross;
1624 vmem_addr_t minaddr, maxaddr;
1625
1626 if (t > 70) {
1627 x = true;
1628 /* XXX */
1629 align = 1 << (rand() % 15);
1630 phase = rand() % 65536;
1631 nocross = 1 << (rand() % 15);
1632 if (align <= phase) {
1633 phase = 0;
1634 }
1635 if (VMEM_CROSS_P(phase, phase + sz - 1,
1636 nocross)) {
1637 nocross = 0;
1638 }
1639 minaddr = rand() % 50000;
1640 maxaddr = rand() % 70000;
1641 if (minaddr > maxaddr) {
1642 minaddr = 0;
1643 maxaddr = 0;
1644 }
1645 printf("=== xalloc %" PRIu64
1646 " align=%" PRIu64 ", phase=%" PRIu64
1647 ", nocross=%" PRIu64 ", min=%" PRIu64
1648 ", max=%" PRIu64 "\n",
1649 (uint64_t)sz,
1650 (uint64_t)align,
1651 (uint64_t)phase,
1652 (uint64_t)nocross,
1653 (uint64_t)minaddr,
1654 (uint64_t)maxaddr);
1655 p = vmem_xalloc(vm, sz, align, phase, nocross,
1656 minaddr, maxaddr, strat|VM_SLEEP);
1657 } else {
1658 x = false;
1659 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1660 p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1661 }
1662 printf("-> %" PRIu64 "\n", (uint64_t)p);
1663 vmem_dump(vm);
1664 if (p == VMEM_ADDR_NULL) {
1665 if (x) {
1666 continue;
1667 }
1668 break;
1669 }
1670 nreg++;
1671 reg = realloc(reg, sizeof(*reg) * nreg);
1672 r = ®[nreg - 1];
1673 r->p = p;
1674 r->sz = sz;
1675 r->x = x;
1676 total += sz;
1677 nalloc++;
1678 } else if (nreg != 0) {
1679 /* free */
1680 r = ®[rand() % nreg];
1681 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1682 (uint64_t)r->p, (uint64_t)r->sz);
1683 if (r->x) {
1684 vmem_xfree(vm, r->p, r->sz);
1685 } else {
1686 vmem_free(vm, r->p, r->sz);
1687 }
1688 total -= r->sz;
1689 vmem_dump(vm);
1690 *r = reg[nreg - 1];
1691 nreg--;
1692 nfree++;
1693 }
1694 printf("total=%" PRIu64 "\n", (uint64_t)total);
1695 }
1696 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1697 (uint64_t)total, nalloc, nfree);
1698 exit(EXIT_SUCCESS);
1699 }
1700 #endif /* !defined(_KERNEL) */
1701 #endif /* defined(VMEM_DEBUG) */
1702