subr_vmem.c revision 1.36.2.2 1 /* $NetBSD: subr_vmem.c,v 1.36.2.2 2007/12/13 05:06:01 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2006, 2007 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 *
35 * todo:
36 * - decide how to import segments for vmem_xalloc.
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.36.2.2 2007/12/13 05:06:01 yamt Exp $");
41
42 #define VMEM_DEBUG
43 #if defined(_KERNEL)
44 #include "opt_ddb.h"
45 #define QCACHE
46 #endif /* defined(_KERNEL) */
47
48 #include <sys/param.h>
49 #include <sys/hash.h>
50 #include <sys/queue.h>
51
52 #if defined(_KERNEL)
53 #include <sys/systm.h>
54 #include <sys/kernel.h> /* hz */
55 #include <sys/callout.h>
56 #include <sys/lock.h>
57 #include <sys/once.h>
58 #include <sys/pool.h>
59 #include <sys/proc.h>
60 #include <sys/vmem.h>
61 #include <sys/kmem.h>
62 #include <sys/workqueue.h>
63
64 #include <uvm/uvm_extern.h>
65 #include <uvm/uvm_map.h>
66 #include <uvm/uvm_pdaemon.h>
67 #else /* defined(_KERNEL) */
68 #include "../sys/vmem.h"
69 #endif /* defined(_KERNEL) */
70
71 #if defined(_KERNEL)
72 #define LOCK_DECL(name) kmutex_t name
73 #else /* defined(_KERNEL) */
74 #include <errno.h>
75 #include <assert.h>
76 #include <stdlib.h>
77
78 #define KASSERT(a) assert(a)
79 #define LOCK_DECL(name) /* nothing */
80 #define mutex_init(a, b, c) /* nothing */
81 #define mutex_destroy(a) /* nothing */
82 #define mutex_enter(a) /* nothing */
83 #define mutex_exit(a) /* nothing */
84 #define mutex_owned(a) /* nothing */
85 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */
86 #define IPL_VM 0
87 #endif /* defined(_KERNEL) */
88
89 struct vmem;
90 struct vmem_btag;
91
92 #if defined(VMEM_DEBUG)
93 void vmem_dump(const vmem_t *);
94 void vmem_dump_seglist(const vmem_t *);
95 void vmem_dump_freelist(const vmem_t *);
96 #if defined(QCACHE)
97 void vmem_dump_qc(const vmem_t *);
98 #endif /* defined(QCACHE) */
99 #endif /* defined(VMEM_DEBUG) */
100
101 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
102
103 #define VMEM_HASHSIZE_MIN 1 /* XXX */
104 #define VMEM_HASHSIZE_MAX 8192 /* XXX */
105 #define VMEM_HASHSIZE_INIT VMEM_HASHSIZE_MIN
106
107 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
108
109 /* vm_flag_t (internal uses) */
110 #define VM_BTPAGE 0x00008000
111
112 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
113 LIST_HEAD(vmem_freelist, vmem_btag);
114 LIST_HEAD(vmem_hashlist, vmem_btag);
115 typedef struct vmem_hashlist vmem_hashlist_t;
116
117 #if defined(QCACHE)
118 #define VMEM_QCACHE_IDX_MAX 32
119
120 #define QC_NAME_MAX 16
121
122 struct qcache {
123 pool_cache_t qc_cache;
124 vmem_t *qc_vmem;
125 char qc_name[QC_NAME_MAX];
126 };
127 typedef struct qcache qcache_t;
128 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache))
129 #endif /* defined(QCACHE) */
130
131 /* vmem arena */
132 struct vmem {
133 LOCK_DECL(vm_lock);
134 vm_flag_t vm_flags;
135 int vm_freetags;
136 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
137 vm_flag_t);
138 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
139 vmem_t *vm_source;
140 struct vmem_seglist vm_seglist;
141 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
142 LIST_HEAD(, btpage_header) vm_btpagelist;
143 size_t vm_hashsize;
144 size_t vm_nbusytag;
145 vmem_hashlist_t *vm_hashlist;
146 size_t vm_quantum_mask;
147 int vm_quantum_shift;
148 const char *vm_name;
149 LIST_ENTRY(vmem) vm_alllist;
150
151 #if defined(QCACHE)
152 /* quantum cache */
153 size_t vm_qcache_max;
154 struct pool_allocator vm_qcache_allocator;
155 qcache_t vm_qcache_store[VMEM_QCACHE_IDX_MAX];
156 qcache_t *vm_qcache[VMEM_QCACHE_IDX_MAX];
157 #endif /* defined(QCACHE) */
158 };
159
160 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock)
161 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock)
162 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock)
163 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl)
164 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock)
165 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock))
166
167 #define vmem_bootstrap_p(vm) (((vm)->vm_flags & VMC_KVA) != 0)
168
169 /* boundary tag */
170 struct vmem_btag {
171 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
172 union {
173 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
174 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
175 SLIST_ENTRY(vmem_btag) u_sfreelist; /* in btpage_header */
176 SLIST_ENTRY(vmem_btag) u_tmplist; /* temp use in vmem_xfree */
177 } bt_u;
178 #define bt_hashlist bt_u.u_hashlist
179 #define bt_freelist bt_u.u_freelist
180 #define bt_sfreelist bt_u.u_sfreelist
181 #define bt_tmplist bt_u.u_tmplist
182 vmem_addr_t bt_start;
183 vmem_size_t bt_size;
184 int bt_type;
185 };
186
187 #define BT_TYPE_SPAN 1
188 #define BT_TYPE_SPAN_STATIC 2
189 #define BT_TYPE_FREE 3
190 #define BT_TYPE_BUSY 4
191 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
192
193 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
194
195 typedef struct vmem_btag bt_t;
196
197 /* ---- misc */
198
199 #define VMEM_ALIGNUP(addr, align) \
200 (-(-(addr) & -(align)))
201 #define VMEM_CROSS_P(addr1, addr2, boundary) \
202 ((((addr1) ^ (addr2)) & -(boundary)) != 0)
203
204 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
205
206 static int
207 calc_order(vmem_size_t size)
208 {
209 vmem_size_t target;
210 int i;
211
212 KASSERT(size != 0);
213
214 i = 0;
215 target = size >> 1;
216 while (ORDER2SIZE(i) <= target) {
217 i++;
218 }
219
220 KASSERT(ORDER2SIZE(i) <= size);
221 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
222
223 return i;
224 }
225
226 static void *
227 xmalloc(size_t sz, vm_flag_t flags)
228 {
229
230 #if defined(_KERNEL)
231 return kmem_alloc(sz, (flags & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
232 #else /* defined(_KERNEL) */
233 return malloc(sz);
234 #endif /* defined(_KERNEL) */
235 }
236
237 static void
238 xfree(void *p, size_t sz)
239 {
240
241 #if defined(_KERNEL)
242 kmem_free(p, sz);
243 #else /* defined(_KERNEL) */
244 return free(p);
245 #endif /* defined(_KERNEL) */
246 }
247
248 /* ---- static storage for bootstrap */
249
250 #define STATIC_POOL_NAME(type) static_ ## type
251 #define STATIC_POOL_IDX(type) static_ ## type ## _idx
252 #define STATIC_POOL_DEFINE(type, n) \
253 type STATIC_POOL_NAME(type)[(n)] __unused ; \
254 int STATIC_POOL_IDX(type) __unused
255 #define STATIC_POOL_ALLOC(var, type) \
256 (var) = &STATIC_POOL_NAME(type)[STATIC_POOL_IDX(type)++]; \
257 KASSERT(STATIC_POOL_ELEM_P(type, var))
258 #define STATIC_POOL_FREE(type, var) \
259 KASSERT(STATIC_POOL_ELEM_P(type, var)); \
260 KASSERT((var) == &STATIC_POOL_NAME(type)[STATIC_POOL_IDX(type)-1]); \
261 STATIC_POOL_IDX(type)--
262 #define STATIC_POOL_ELEM_P(type, var) \
263 (&STATIC_POOL_NAME(type)[0] <= (var) && \
264 (var) < &STATIC_POOL_NAME(type)[__arraycount(STATIC_POOL_NAME(type))])
265
266 static STATIC_POOL_DEFINE(bt_t, 3);
267 static STATIC_POOL_DEFINE(vmem_t, 2);
268 static STATIC_POOL_DEFINE(vmem_hashlist_t, 2);
269 typedef struct pool_cache vmem_pool_cache_t; /* XXX */
270 static STATIC_POOL_DEFINE(vmem_pool_cache_t, VMEM_QCACHE_IDX_MAX+1);
271
272 /* ---- boundary tag */
273
274 #if defined(_KERNEL)
275 static struct pool_cache bt_cache;
276 #endif /* defined(_KERNEL) */
277
278 struct btpage_header {
279 LIST_ENTRY(btpage_header) bh_q;
280 int bh_nfree;
281 SLIST_HEAD(, vmem_btag) bh_freelist;
282 bt_t bh_bt[];
283 };
284 typedef struct btpage_header btpage_header_t;
285
286 #define BT_PER_PAGE \
287 ((PAGE_SIZE - sizeof(btpage_header_t)) / sizeof(bt_t))
288
289 static int
290 btpage_alloc(vmem_t *vm, vm_flag_t flags)
291 {
292 vmem_addr_t va;
293
294 va = vmem_xalloc(vm, PAGE_SIZE, PAGE_SIZE, 0, 0, 0, 0,
295 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_BTPAGE);
296 if (va == 0) {
297 return ENOMEM;
298 }
299 return 0;
300 }
301
302 static void
303 btpage_init(vmem_t *vm, struct vm_page *pg, vaddr_t va)
304 {
305 btpage_header_t *bh;
306 int i;
307
308 VMEM_ASSERT_LOCKED(vm);
309 KASSERT((va & PAGE_MASK) == 0);
310 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE);
311 pmap_update(pmap_kernel());
312 bh = (void *)va;
313 SLIST_INIT(&bh->bh_freelist);
314 for (i = 0; i < BT_PER_PAGE; i++) {
315 SLIST_INSERT_HEAD(&bh->bh_freelist, &bh->bh_bt[i],
316 bt_sfreelist);
317 }
318 LIST_INSERT_HEAD(&vm->vm_btpagelist, bh, bh_q);
319 bh->bh_nfree = BT_PER_PAGE;
320 vm->vm_freetags += bh->bh_nfree;
321 }
322
323 static void
324 btpage_free(vmem_t *vm, btpage_header_t *bh)
325 {
326
327 KASSERT(vmem_bootstrap_p(vm));
328 pmap_kremove((vaddr_t)bh, PAGE_SIZE);
329 pmap_update(pmap_kernel());
330 vmem_xfree(vm, (vmem_addr_t)bh, PAGE_SIZE);
331 }
332
333 static btpage_header_t *
334 btpage_lookup(bt_t *bt)
335 {
336
337 return (void *)trunc_page((vaddr_t)bt);
338 }
339
340 static bt_t *
341 bt_alloc_bootstrap(vmem_t *vm)
342 {
343 btpage_header_t *bh;
344 bt_t *bt;
345
346 KASSERT(vmem_bootstrap_p(vm));
347 VMEM_ASSERT_LOCKED(vm);
348 bh = LIST_FIRST(&vm->vm_btpagelist);
349 if (__predict_false(bh == NULL)) {
350 STATIC_POOL_ALLOC(bt, bt_t);
351 return bt;
352 }
353 KASSERT(bh->bh_nfree > 0);
354 bt = SLIST_FIRST(&bh->bh_freelist);
355 KASSERT(bt != NULL);
356 SLIST_REMOVE_HEAD(&bh->bh_freelist, bt_sfreelist);
357 bh->bh_nfree--;
358 vm->vm_freetags--;
359 if (SLIST_EMPTY(&bh->bh_freelist)) {
360 KASSERT(bh->bh_nfree == 0);
361 LIST_REMOVE(bh, bh_q);
362 }
363 return bt;
364 }
365
366 #define BT_MINRESERVE 1
367
368 static bt_t *
369 bt_alloc(vmem_t *vm, vm_flag_t flags)
370 {
371 bt_t *bt;
372
373 #if defined(_KERNEL)
374 if (vmem_bootstrap_p(vm)) {
375 again:
376 VMEM_LOCK(vm);
377 if (vm->vm_freetags <= BT_MINRESERVE &&
378 (flags & VM_BTPAGE) == 0) {
379 VMEM_UNLOCK(vm);
380 if (btpage_alloc(vm, flags)) {
381 return NULL;
382 }
383 goto again;
384 }
385 bt = bt_alloc_bootstrap(vm);
386 VMEM_UNLOCK(vm);
387 } else {
388 bt = pool_cache_get(&bt_cache,
389 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
390 }
391 #else /* defined(_KERNEL) */
392 bt = malloc(sizeof *bt);
393 #endif /* defined(_KERNEL) */
394
395 return bt;
396 }
397
398 static void
399 bt_free(vmem_t *vm, bt_t *bt)
400 {
401
402 KASSERT(bt != NULL);
403 KASSERT(!STATIC_POOL_ELEM_P(bt_t, bt));
404 #if defined(_KERNEL)
405 if (vmem_bootstrap_p(vm)) {
406 btpage_header_t *bh;
407
408 bh = btpage_lookup(bt);
409 VMEM_LOCK(vm);
410 if (SLIST_EMPTY(&bh->bh_freelist)) {
411 KASSERT(bh->bh_nfree == 0);
412 LIST_INSERT_HEAD(&vm->vm_btpagelist, bh, bh_q);
413 }
414 SLIST_INSERT_HEAD(&bh->bh_freelist, bt, bt_sfreelist);
415 bh->bh_nfree++;
416 vm->vm_freetags++;
417 if (vm->vm_freetags >= BT_PER_PAGE + BT_MINRESERVE &&
418 bh->bh_nfree == BT_PER_PAGE) {
419 LIST_REMOVE(bh, bh_q);
420 vm->vm_freetags -= BT_PER_PAGE;
421 VMEM_UNLOCK(vm);
422 btpage_free(vm, bh);
423 } else {
424 VMEM_UNLOCK(vm);
425 }
426 } else {
427 pool_cache_put(&bt_cache, bt);
428 }
429 #else /* defined(_KERNEL) */
430 free(bt);
431 #endif /* defined(_KERNEL) */
432 }
433
434 /*
435 * freelist[0] ... [1, 1]
436 * freelist[1] ... [2, 3]
437 * freelist[2] ... [4, 7]
438 * freelist[3] ... [8, 15]
439 * :
440 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
441 * :
442 */
443
444 static struct vmem_freelist *
445 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
446 {
447 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
448 int idx;
449
450 KASSERT((size & vm->vm_quantum_mask) == 0);
451 KASSERT(size != 0);
452
453 idx = calc_order(qsize);
454 KASSERT(idx >= 0);
455 KASSERT(idx < VMEM_MAXORDER);
456
457 return &vm->vm_freelist[idx];
458 }
459
460 static struct vmem_freelist *
461 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
462 {
463 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
464 int idx;
465
466 KASSERT((size & vm->vm_quantum_mask) == 0);
467 KASSERT(size != 0);
468
469 idx = calc_order(qsize);
470 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
471 idx++;
472 /* check too large request? */
473 }
474 KASSERT(idx >= 0);
475 KASSERT(idx < VMEM_MAXORDER);
476
477 return &vm->vm_freelist[idx];
478 }
479
480 /* ---- boundary tag hash */
481
482 static vmem_hashlist_t *
483 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
484 {
485 vmem_hashlist_t *list;
486 unsigned int hash;
487
488 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
489 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
490
491 return list;
492 }
493
494 static bt_t *
495 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
496 {
497 vmem_hashlist_t *list;
498 bt_t *bt;
499
500 list = bt_hashhead(vm, addr);
501 LIST_FOREACH(bt, list, bt_hashlist) {
502 if (bt->bt_start == addr) {
503 break;
504 }
505 }
506
507 return bt;
508 }
509
510 static void
511 bt_rembusy(vmem_t *vm, bt_t *bt)
512 {
513
514 KASSERT(vm->vm_nbusytag > 0);
515 vm->vm_nbusytag--;
516 LIST_REMOVE(bt, bt_hashlist);
517 }
518
519 static void
520 bt_insbusy(vmem_t *vm, bt_t *bt)
521 {
522 vmem_hashlist_t *list;
523
524 KASSERT(bt->bt_type == BT_TYPE_BUSY);
525
526 list = bt_hashhead(vm, bt->bt_start);
527 LIST_INSERT_HEAD(list, bt, bt_hashlist);
528 vm->vm_nbusytag++;
529 }
530
531 /* ---- boundary tag list */
532
533 static void
534 bt_remseg(vmem_t *vm, bt_t *bt)
535 {
536
537 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
538 }
539
540 static void
541 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
542 {
543
544 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
545 }
546
547 static void
548 bt_insseg_tail(vmem_t *vm, bt_t *bt)
549 {
550
551 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
552 }
553
554 static void
555 bt_remfree(vmem_t *vm, bt_t *bt)
556 {
557
558 KASSERT(bt->bt_type == BT_TYPE_FREE);
559
560 LIST_REMOVE(bt, bt_freelist);
561 }
562
563 static void
564 bt_insfree(vmem_t *vm, bt_t *bt)
565 {
566 struct vmem_freelist *list;
567
568 list = bt_freehead_tofree(vm, bt->bt_size);
569 LIST_INSERT_HEAD(list, bt, bt_freelist);
570 }
571
572 /* ---- vmem internal functions */
573
574 #if defined(_KERNEL)
575 static kmutex_t vmem_list_lock;
576 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
577 #endif /* defined(_KERNEL) */
578
579 #if defined(QCACHE)
580 static inline vm_flag_t
581 prf_to_vmf(int prflags)
582 {
583 vm_flag_t vmflags;
584
585 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
586 if ((prflags & PR_WAITOK) != 0) {
587 vmflags = VM_SLEEP;
588 } else {
589 vmflags = VM_NOSLEEP;
590 }
591 return vmflags;
592 }
593
594 static inline int
595 vmf_to_prf(vm_flag_t vmflags)
596 {
597 int prflags;
598
599 if ((vmflags & VM_SLEEP) != 0) {
600 prflags = PR_WAITOK;
601 } else {
602 prflags = PR_NOWAIT;
603 }
604 return prflags;
605 }
606
607 static size_t
608 qc_poolpage_size(size_t qcache_max)
609 {
610 int i;
611
612 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
613 /* nothing */
614 }
615 return ORDER2SIZE(i);
616 }
617
618 static void *
619 qc_poolpage_alloc(struct pool *pool, int prflags)
620 {
621 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
622 vmem_t *vm = qc->qc_vmem;
623
624 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
625 prf_to_vmf(prflags) | VM_INSTANTFIT);
626 }
627
628 static void
629 qc_poolpage_free(struct pool *pool, void *addr)
630 {
631 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
632 vmem_t *vm = qc->qc_vmem;
633
634 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
635 }
636
637 static void
638 qc_init(vmem_t *vm, size_t qcache_max, int ipl)
639 {
640 qcache_t *prevqc;
641 struct pool_allocator *pa;
642 int qcache_idx_max;
643 int i;
644
645 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
646 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
647 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
648 }
649 vm->vm_qcache_max = qcache_max;
650 pa = &vm->vm_qcache_allocator;
651 memset(pa, 0, sizeof(*pa));
652 pa->pa_alloc = qc_poolpage_alloc;
653 pa->pa_free = qc_poolpage_free;
654 pa->pa_pagesz = qc_poolpage_size(qcache_max);
655
656 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
657 prevqc = NULL;
658 for (i = qcache_idx_max; i > 0; i--) {
659 qcache_t *qc = &vm->vm_qcache_store[i - 1];
660 size_t size = i << vm->vm_quantum_shift;
661 pool_cache_t pc;
662
663 qc->qc_vmem = vm;
664 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
665 vm->vm_name, size);
666 if (!kmem_running_p()) {
667 STATIC_POOL_ALLOC(pc, vmem_pool_cache_t);
668 pool_cache_bootstrap(pc, size,
669 ORDER2SIZE(vm->vm_quantum_shift), 0,
670 PR_NOALIGN | PR_NOTOUCH /* XXX */,
671 qc->qc_name, pa, ipl, NULL, NULL, NULL);
672 } else {
673 pc = pool_cache_init(size,
674 ORDER2SIZE(vm->vm_quantum_shift), 0,
675 PR_NOALIGN | PR_NOTOUCH /* XXX */,
676 qc->qc_name, pa, ipl, NULL, NULL, NULL);
677 }
678 qc->qc_cache = pc;
679 if (prevqc != NULL &&
680 qc->qc_cache->pc_pool.pr_itemsperpage ==
681 prevqc->qc_cache->pc_pool.pr_itemsperpage) {
682 if (!kmem_running_p()) {
683 pool_cache_bootstrap_destroy(pc);
684 STATIC_POOL_FREE(vmem_pool_cache_t, pc);
685 } else {
686 pool_cache_destroy(pc);
687 }
688 vm->vm_qcache[i - 1] = prevqc;
689 continue;
690 }
691 qc->qc_cache->pc_pool.pr_qcache = qc;
692 vm->vm_qcache[i - 1] = qc;
693 prevqc = qc;
694 }
695 }
696
697 static void
698 qc_destroy(vmem_t *vm)
699 {
700 const qcache_t *prevqc;
701 int i;
702 int qcache_idx_max;
703
704 KASSERT(!vmem_bootstrap_p(vm));
705 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
706 prevqc = NULL;
707 for (i = 0; i < qcache_idx_max; i++) {
708 qcache_t *qc = vm->vm_qcache[i];
709
710 if (prevqc == qc) {
711 continue;
712 }
713 pool_cache_destroy(qc->qc_cache);
714 prevqc = qc;
715 }
716 }
717
718 static bool
719 qc_reap(vmem_t *vm)
720 {
721 const qcache_t *prevqc;
722 int i;
723 int qcache_idx_max;
724 bool didsomething = false;
725
726 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
727 prevqc = NULL;
728 for (i = 0; i < qcache_idx_max; i++) {
729 qcache_t *qc = vm->vm_qcache[i];
730
731 if (prevqc == qc) {
732 continue;
733 }
734 if (pool_cache_reclaim(qc->qc_cache) != 0) {
735 didsomething = true;
736 }
737 prevqc = qc;
738 }
739
740 return didsomething;
741 }
742 #endif /* defined(QCACHE) */
743
744 #if defined(_KERNEL)
745 static int
746 vmem_init(void)
747 {
748
749 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_NONE);
750 pool_cache_bootstrap(&bt_cache, sizeof(bt_t), 0, 0, 0, "vmembt",
751 NULL, IPL_VM, NULL, NULL, NULL);
752 return 0;
753 }
754 #endif /* defined(_KERNEL) */
755
756 static vmem_addr_t
757 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
758 int spanbttype)
759 {
760 bt_t *btspan;
761 bt_t *btfree;
762
763 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
764 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
765
766 if ((flags & VMC_KVA) != 0) {
767 KASSERT(vmem_bootstrap_p(vm));
768 KASSERT(CIRCLEQ_EMPTY(&vm->vm_seglist));
769 STATIC_POOL_ALLOC(btspan, bt_t);
770 STATIC_POOL_ALLOC(btfree, bt_t);
771 } else {
772 btspan = bt_alloc(vm, flags);
773 if (btspan == NULL) {
774 return VMEM_ADDR_NULL;
775 }
776 btfree = bt_alloc(vm, flags);
777 if (btfree == NULL) {
778 bt_free(vm, btspan);
779 return VMEM_ADDR_NULL;
780 }
781 }
782
783 btspan->bt_type = spanbttype;
784 btspan->bt_start = addr;
785 btspan->bt_size = size;
786
787 btfree->bt_type = BT_TYPE_FREE;
788 btfree->bt_start = addr;
789 btfree->bt_size = size;
790
791 VMEM_LOCK(vm);
792 bt_insseg_tail(vm, btspan);
793 bt_insseg(vm, btfree, btspan);
794 bt_insfree(vm, btfree);
795 VMEM_UNLOCK(vm);
796
797 if ((flags & VMC_KVA) != 0) {
798 bt_t *bt;
799
800 /*
801 * leak a bt.
802 * this ensure that
803 */
804
805 bt = bt_alloc(vm, VM_NOSLEEP);
806 KASSERT(bt != NULL);
807
808 /*
809 * don't leave "btfree" on the segment list because
810 * bt_free() doesn't expect static tags.
811 */
812
813 bt = bt_alloc(vm, flags);
814 VMEM_LOCK(vm);
815 KASSERT(vm->vm_nbusytag == 1);
816 bt->bt_start = btfree->bt_start;
817 bt->bt_size = btfree->bt_size;
818 bt->bt_type = btfree->bt_type;
819 bt_insfree(vm, bt);
820 bt_insseg(vm, bt, btfree);
821 bt_remseg(vm, btfree);
822 bt_remfree(vm, btfree);
823 VMEM_UNLOCK(vm);
824 }
825
826 return addr;
827 }
828
829 static void
830 vmem_destroy1(vmem_t *vm)
831 {
832
833 KASSERT(!vmem_bootstrap_p(vm));
834
835 #if defined(QCACHE)
836 qc_destroy(vm);
837 #endif /* defined(QCACHE) */
838 if (vm->vm_hashlist != NULL) {
839 int i;
840
841 for (i = 0; i < vm->vm_hashsize; i++) {
842 bt_t *bt;
843
844 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
845 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
846 bt_free(vm, bt);
847 }
848 }
849 xfree(vm->vm_hashlist,
850 sizeof(vmem_hashlist_t *) * vm->vm_hashsize);
851 }
852 VMEM_LOCK_DESTROY(vm);
853 xfree(vm, sizeof(*vm));
854 }
855
856 static int
857 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
858 {
859 vmem_addr_t addr;
860
861 if (vm->vm_allocfn == NULL) {
862 return EINVAL;
863 }
864
865 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
866 if (addr == VMEM_ADDR_NULL) {
867 return ENOMEM;
868 }
869
870 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
871 (*vm->vm_freefn)(vm->vm_source, addr, size);
872 return ENOMEM;
873 }
874
875 return 0;
876 }
877
878 static int
879 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
880 {
881 bt_t *bt;
882 int i;
883 vmem_hashlist_t *newhashlist;
884 vmem_hashlist_t *oldhashlist;
885 size_t oldhashsize;
886
887 KASSERT(newhashsize > 0);
888
889 newhashlist = xmalloc(sizeof(vmem_hashlist_t *) * newhashsize, flags);
890 if (newhashlist == NULL) {
891 return ENOMEM;
892 }
893 for (i = 0; i < newhashsize; i++) {
894 LIST_INIT(&newhashlist[i]);
895 }
896
897 if (!VMEM_TRYLOCK(vm)) {
898 xfree(newhashlist, sizeof(vmem_hashlist_t *) * newhashsize);
899 return EBUSY;
900 }
901 oldhashlist = vm->vm_hashlist;
902 oldhashsize = vm->vm_hashsize;
903 vm->vm_hashlist = newhashlist;
904 vm->vm_hashsize = newhashsize;
905 if (oldhashlist == NULL) {
906 VMEM_UNLOCK(vm);
907 return 0;
908 }
909 for (i = 0; i < oldhashsize; i++) {
910 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
911 bt_rembusy(vm, bt); /* XXX */
912 bt_insbusy(vm, bt);
913 }
914 }
915 VMEM_UNLOCK(vm);
916
917 if (!STATIC_POOL_ELEM_P(vmem_hashlist_t, oldhashlist)) {
918 xfree(oldhashlist, sizeof(vmem_hashlist_t *) * oldhashsize);
919 }
920
921 return 0;
922 }
923
924 /*
925 * vmem_fit: check if a bt can satisfy the given restrictions.
926 */
927
928 static vmem_addr_t
929 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase,
930 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
931 {
932 vmem_addr_t start;
933 vmem_addr_t end;
934
935 KASSERT(bt->bt_size >= size);
936
937 /*
938 * XXX assumption: vmem_addr_t and vmem_size_t are
939 * unsigned integer of the same size.
940 */
941
942 start = bt->bt_start;
943 if (start < minaddr) {
944 start = minaddr;
945 }
946 end = BT_END(bt);
947 if (end > maxaddr - 1) {
948 end = maxaddr - 1;
949 }
950 if (start >= end) {
951 return VMEM_ADDR_NULL;
952 }
953
954 start = VMEM_ALIGNUP(start - phase, align) + phase;
955 if (start < bt->bt_start) {
956 start += align;
957 }
958 if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
959 KASSERT(align < nocross);
960 start = VMEM_ALIGNUP(start - phase, nocross) + phase;
961 }
962 if (start < end && end - start >= size) {
963 KASSERT((start & (align - 1)) == phase);
964 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross));
965 KASSERT(minaddr <= start);
966 KASSERT(maxaddr == 0 || start + size <= maxaddr);
967 KASSERT(bt->bt_start <= start);
968 KASSERT(start + size <= BT_END(bt));
969 return start;
970 }
971 return VMEM_ADDR_NULL;
972 }
973
974 /* ---- vmem API */
975
976 /*
977 * vmem_create: create an arena.
978 *
979 * => must not be called from interrupt context.
980 */
981
982 vmem_t *
983 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
984 vmem_size_t quantum,
985 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
986 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
987 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags,
988 int ipl)
989 {
990 vmem_t *vm;
991 int i;
992 #if defined(_KERNEL)
993 static ONCE_DECL(control);
994 #endif /* defined(_KERNEL) */
995
996 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
997 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
998
999 #if defined(_KERNEL)
1000 if (RUN_ONCE(&control, vmem_init)) {
1001 return NULL;
1002 }
1003 #endif /* defined(_KERNEL) */
1004 if ((flags & (VMC_KVA|VMC_KMEM)) != 0) {
1005 STATIC_POOL_ALLOC(vm, vmem_t);
1006 } else {
1007 vm = xmalloc(sizeof(*vm), flags);
1008 if (vm == NULL) {
1009 return NULL;
1010 }
1011 }
1012
1013 VMEM_LOCK_INIT(vm, ipl);
1014 vm->vm_name = name;
1015 vm->vm_flags = flags;
1016 vm->vm_freetags = 0;
1017 vm->vm_quantum_mask = quantum - 1;
1018 vm->vm_quantum_shift = calc_order(quantum);
1019 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
1020 vm->vm_allocfn = allocfn;
1021 vm->vm_freefn = freefn;
1022 vm->vm_source = source;
1023 vm->vm_nbusytag = 0;
1024 #if defined(QCACHE)
1025 qc_init(vm, qcache_max, ipl);
1026 #endif /* defined(QCACHE) */
1027
1028 CIRCLEQ_INIT(&vm->vm_seglist);
1029 for (i = 0; i < VMEM_MAXORDER; i++) {
1030 LIST_INIT(&vm->vm_freelist[i]);
1031 }
1032 vm->vm_hashlist = NULL;
1033 if ((flags & (VMC_KVA|VMC_KMEM)) != 0) {
1034 STATIC_POOL_ALLOC(vm->vm_hashlist, vmem_hashlist_t);
1035 LIST_INIT(&vm->vm_hashlist[0]);
1036 vm->vm_hashsize = 1;
1037 } else if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
1038 vmem_destroy1(vm);
1039 return NULL;
1040 }
1041
1042 if (size != 0) {
1043 if (vmem_add(vm, base, size, flags) == 0) {
1044 vmem_destroy1(vm);
1045 return NULL;
1046 }
1047 }
1048
1049 #if defined(_KERNEL)
1050 mutex_enter(&vmem_list_lock);
1051 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist);
1052 mutex_exit(&vmem_list_lock);
1053 #endif /* defined(_KERNEL) */
1054
1055 #if 0
1056 if (vmem_bootstrap_p(vm)) {
1057 vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags);
1058 }
1059 #endif
1060
1061 return vm;
1062 }
1063
1064 void
1065 vmem_destroy(vmem_t *vm)
1066 {
1067
1068 #if defined(_KERNEL)
1069 mutex_enter(&vmem_list_lock);
1070 LIST_REMOVE(vm, vm_alllist);
1071 mutex_exit(&vmem_list_lock);
1072 #endif /* defined(_KERNEL) */
1073
1074 vmem_destroy1(vm);
1075 }
1076
1077 vmem_size_t
1078 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
1079 {
1080
1081 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
1082 }
1083
1084 /*
1085 * vmem_alloc:
1086 *
1087 * => caller must ensure appropriate spl,
1088 * if the arena can be accessed from interrupt context.
1089 */
1090
1091 vmem_addr_t
1092 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
1093 {
1094 const vmem_size_t size __unused = vmem_roundup_size(vm, size0);
1095 const vm_flag_t strat __unused = flags & VM_FITMASK;
1096
1097 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1098 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
1099
1100 KASSERT(size0 > 0);
1101 KASSERT(size > 0);
1102 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
1103 if ((flags & VM_SLEEP) != 0) {
1104 ASSERT_SLEEPABLE(NULL, __func__);
1105 }
1106
1107 #if defined(QCACHE)
1108 if (size <= vm->vm_qcache_max) {
1109 int qidx = size >> vm->vm_quantum_shift;
1110 qcache_t *qc = vm->vm_qcache[qidx - 1];
1111
1112 return (vmem_addr_t)pool_cache_get(qc->qc_cache,
1113 vmf_to_prf(flags));
1114 }
1115 #endif /* defined(QCACHE) */
1116
1117 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags);
1118 }
1119
1120 vmem_addr_t
1121 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase,
1122 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
1123 vm_flag_t flags)
1124 {
1125 struct vmem_freelist *list;
1126 struct vmem_freelist *first;
1127 struct vmem_freelist *end;
1128 bt_t *bt;
1129 bt_t *btnew;
1130 bt_t *btnew2;
1131 const vmem_size_t size = vmem_roundup_size(vm, size0);
1132 vm_flag_t strat = flags & VM_FITMASK;
1133 vmem_addr_t start;
1134 struct vm_page *pg;
1135
1136 KASSERT(size0 > 0);
1137 KASSERT(size > 0);
1138 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
1139 if ((flags & VM_SLEEP) != 0) {
1140 ASSERT_SLEEPABLE(NULL, __func__);
1141 }
1142 KASSERT((align & vm->vm_quantum_mask) == 0);
1143 KASSERT((align & (align - 1)) == 0);
1144 KASSERT((phase & vm->vm_quantum_mask) == 0);
1145 KASSERT((nocross & vm->vm_quantum_mask) == 0);
1146 KASSERT((nocross & (nocross - 1)) == 0);
1147 KASSERT((align == 0 && phase == 0) || phase < align);
1148 KASSERT(nocross == 0 || nocross >= size);
1149 KASSERT(maxaddr == 0 || minaddr < maxaddr);
1150 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
1151
1152 if (align == 0) {
1153 align = vm->vm_quantum_mask + 1;
1154 }
1155 pg = NULL;
1156 if ((flags & VM_BTPAGE) != 0) {
1157 KASSERT(size == PAGE_SIZE);
1158 KASSERT(align == PAGE_SIZE);
1159 while (pg == NULL) {
1160 pg = uvm_pagealloc(NULL, 0, NULL, 0);
1161 if (pg == NULL) {
1162 if ((flags & VM_NOSLEEP) != 0) {
1163 return ENOMEM;
1164 }
1165 uvm_wait("btpage");
1166 }
1167 }
1168 btnew = NULL; /* XXX: gcc */
1169 btnew2 = NULL;
1170 } else {
1171 btnew = bt_alloc(vm, flags);
1172 if (btnew == NULL) {
1173 return VMEM_ADDR_NULL;
1174 }
1175 /* XXX not necessary if no restrictions */
1176 btnew2 = bt_alloc(vm, flags);
1177 if (btnew2 == NULL) {
1178 bt_free(vm, btnew);
1179 return VMEM_ADDR_NULL;
1180 }
1181 }
1182
1183 retry_strat:
1184 first = bt_freehead_toalloc(vm, size, strat);
1185 end = &vm->vm_freelist[VMEM_MAXORDER];
1186 retry:
1187 bt = NULL;
1188 VMEM_LOCK(vm);
1189 if (strat == VM_INSTANTFIT) {
1190 for (list = first; list < end; list++) {
1191 bt = LIST_FIRST(list);
1192 if (bt != NULL) {
1193 start = vmem_fit(bt, size, align, phase,
1194 nocross, minaddr, maxaddr);
1195 if (start != VMEM_ADDR_NULL) {
1196 goto gotit;
1197 }
1198 }
1199 }
1200 } else { /* VM_BESTFIT */
1201 for (list = first; list < end; list++) {
1202 LIST_FOREACH(bt, list, bt_freelist) {
1203 if (bt->bt_size >= size) {
1204 start = vmem_fit(bt, size, align, phase,
1205 nocross, minaddr, maxaddr);
1206 if (start != VMEM_ADDR_NULL) {
1207 goto gotit;
1208 }
1209 }
1210 }
1211 }
1212 }
1213 VMEM_UNLOCK(vm);
1214 #if 1
1215 if (strat == VM_INSTANTFIT) {
1216 strat = VM_BESTFIT;
1217 goto retry_strat;
1218 }
1219 #endif
1220 if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
1221 nocross != 0 || minaddr != 0 || maxaddr != 0) {
1222
1223 /*
1224 * XXX should try to import a region large enough to
1225 * satisfy restrictions?
1226 */
1227
1228 goto fail;
1229 }
1230 if (vmem_import(vm, size, flags) == 0) {
1231 goto retry;
1232 }
1233 /* XXX */
1234 fail:
1235 if ((flags & VM_BTPAGE) != 0) {
1236 uvm_pagefree(pg);
1237 } else {
1238 bt_free(vm, btnew);
1239 bt_free(vm, btnew2);
1240 }
1241 return VMEM_ADDR_NULL;
1242
1243 gotit:
1244 #if defined(PMAP_GROWKERNEL)
1245 if ((vm->vm_flags & VMC_KVA) != 0) {
1246 uvm_growkernel(start + size);
1247 }
1248 #endif /* defined(PMAP_GROWKERNEL) */
1249 if ((flags & VM_BTPAGE) != 0) {
1250 vaddr_t va = (vaddr_t)start;
1251
1252 KASSERT(bt->bt_start == start);
1253 btnew = bt_alloc_bootstrap(vm);
1254 btpage_init(vm, pg, va);
1255 }
1256 KASSERT(bt->bt_type == BT_TYPE_FREE);
1257 KASSERT(bt->bt_size >= size);
1258 bt_remfree(vm, bt);
1259 if (bt->bt_start != start) {
1260 btnew2->bt_type = BT_TYPE_FREE;
1261 btnew2->bt_start = bt->bt_start;
1262 btnew2->bt_size = start - bt->bt_start;
1263 bt->bt_start = start;
1264 bt->bt_size -= btnew2->bt_size;
1265 bt_insfree(vm, btnew2);
1266 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
1267 btnew2 = NULL;
1268 }
1269 KASSERT(bt->bt_start == start);
1270 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
1271 /* split */
1272 btnew->bt_type = BT_TYPE_BUSY;
1273 btnew->bt_start = bt->bt_start;
1274 btnew->bt_size = size;
1275 bt->bt_start = bt->bt_start + size;
1276 bt->bt_size -= size;
1277 bt_insfree(vm, bt);
1278 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
1279 bt_insbusy(vm, btnew);
1280 VMEM_UNLOCK(vm);
1281 } else {
1282 bt->bt_type = BT_TYPE_BUSY;
1283 bt_insbusy(vm, bt);
1284 VMEM_UNLOCK(vm);
1285 bt_free(vm, btnew);
1286 btnew = bt;
1287 }
1288 if (btnew2 != NULL) {
1289 bt_free(vm, btnew2);
1290 }
1291 KASSERT(btnew->bt_size >= size);
1292 btnew->bt_type = BT_TYPE_BUSY;
1293
1294 return btnew->bt_start;
1295 }
1296
1297 /*
1298 * vmem_free:
1299 *
1300 * => caller must ensure appropriate spl,
1301 * if the arena can be accessed from interrupt context.
1302 */
1303
1304 void
1305 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1306 {
1307
1308 KASSERT(addr != VMEM_ADDR_NULL);
1309 KASSERT(size > 0);
1310
1311 #if defined(QCACHE)
1312 if (size <= vm->vm_qcache_max) {
1313 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
1314 qcache_t *qc = vm->vm_qcache[qidx - 1];
1315
1316 return pool_cache_put(qc->qc_cache, (void *)addr);
1317 }
1318 #endif /* defined(QCACHE) */
1319
1320 vmem_xfree(vm, addr, size);
1321 }
1322
1323 void
1324 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1325 {
1326 bt_t *bt;
1327 bt_t *t;
1328 SLIST_HEAD(, vmem_btag) tofree;
1329
1330 KASSERT(addr != VMEM_ADDR_NULL);
1331 KASSERT(size > 0);
1332
1333 SLIST_INIT(&tofree);
1334
1335 VMEM_LOCK(vm);
1336
1337 bt = bt_lookupbusy(vm, addr);
1338 KASSERT(bt != NULL);
1339 KASSERT(bt->bt_start == addr);
1340 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
1341 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
1342 KASSERT(bt->bt_type == BT_TYPE_BUSY);
1343 bt_rembusy(vm, bt);
1344 bt->bt_type = BT_TYPE_FREE;
1345
1346 /* coalesce */
1347 t = CIRCLEQ_NEXT(bt, bt_seglist);
1348 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1349 KASSERT(BT_END(bt) == t->bt_start);
1350 bt_remfree(vm, t);
1351 bt_remseg(vm, t);
1352 bt->bt_size += t->bt_size;
1353 SLIST_INSERT_HEAD(&tofree, t, bt_tmplist);
1354 }
1355 t = CIRCLEQ_PREV(bt, bt_seglist);
1356 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1357 KASSERT(BT_END(t) == bt->bt_start);
1358 bt_remfree(vm, t);
1359 bt_remseg(vm, t);
1360 bt->bt_size += t->bt_size;
1361 bt->bt_start = t->bt_start;
1362 SLIST_INSERT_HEAD(&tofree, t, bt_tmplist);
1363 }
1364
1365 t = CIRCLEQ_PREV(bt, bt_seglist);
1366 KASSERT(t != NULL);
1367 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1368 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1369 t->bt_size == bt->bt_size) {
1370 vmem_addr_t spanaddr;
1371 vmem_size_t spansize;
1372
1373 KASSERT(t->bt_start == bt->bt_start);
1374 spanaddr = bt->bt_start;
1375 spansize = bt->bt_size;
1376 bt_remseg(vm, bt);
1377 SLIST_INSERT_HEAD(&tofree, bt, bt_tmplist);
1378 bt_remseg(vm, t);
1379 SLIST_INSERT_HEAD(&tofree, t, bt_tmplist);
1380 VMEM_UNLOCK(vm);
1381 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
1382 } else {
1383 bt_insfree(vm, bt);
1384 VMEM_UNLOCK(vm);
1385 }
1386 while ((t = SLIST_FIRST(&tofree)) != NULL) {
1387 SLIST_REMOVE_HEAD(&tofree, bt_tmplist);
1388 bt_free(vm, t);
1389 }
1390 }
1391
1392 /*
1393 * vmem_add:
1394 *
1395 * => caller must ensure appropriate spl,
1396 * if the arena can be accessed from interrupt context.
1397 */
1398
1399 vmem_addr_t
1400 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1401 {
1402
1403 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1404 }
1405
1406 /*
1407 * vmem_reap: reap unused resources.
1408 *
1409 * => return true if we successfully reaped something.
1410 */
1411
1412 bool
1413 vmem_reap(vmem_t *vm)
1414 {
1415 bool didsomething = false;
1416
1417 #if defined(QCACHE)
1418 didsomething = qc_reap(vm);
1419 #endif /* defined(QCACHE) */
1420 return didsomething;
1421 }
1422
1423 /* ---- rehash */
1424
1425 #if defined(_KERNEL)
1426 static struct callout vmem_rehash_ch;
1427 static int vmem_rehash_interval;
1428 static struct workqueue *vmem_rehash_wq;
1429 static struct work vmem_rehash_wk;
1430
1431 static void
1432 vmem_rehash_all(struct work *wk, void *dummy)
1433 {
1434 vmem_t *vm;
1435
1436 KASSERT(wk == &vmem_rehash_wk);
1437 mutex_enter(&vmem_list_lock);
1438 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1439 size_t desired;
1440 size_t current;
1441
1442 if (!VMEM_TRYLOCK(vm)) {
1443 continue;
1444 }
1445 desired = vm->vm_nbusytag;
1446 current = vm->vm_hashsize;
1447 VMEM_UNLOCK(vm);
1448
1449 if (desired > VMEM_HASHSIZE_MAX) {
1450 desired = VMEM_HASHSIZE_MAX;
1451 } else if (desired < VMEM_HASHSIZE_MIN) {
1452 desired = VMEM_HASHSIZE_MIN;
1453 }
1454 if (desired > current * 2 || desired * 2 < current) {
1455 vmem_rehash(vm, desired, VM_NOSLEEP);
1456 }
1457 }
1458 mutex_exit(&vmem_list_lock);
1459
1460 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval);
1461 }
1462
1463 static void
1464 vmem_rehash_all_kick(void *dummy)
1465 {
1466
1467 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL);
1468 }
1469
1470 void
1471 vmem_rehash_start(void)
1472 {
1473 int error;
1474
1475 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash",
1476 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, 0);
1477 if (error) {
1478 panic("%s: workqueue_create %d\n", __func__, error);
1479 }
1480 callout_init(&vmem_rehash_ch, 0);
1481 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL);
1482
1483 vmem_rehash_interval = hz * 10;
1484 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval);
1485 }
1486 #endif /* defined(_KERNEL) */
1487
1488 /* ---- debug */
1489
1490 #if defined(DDB)
1491 static bt_t *
1492 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr)
1493 {
1494 int i;
1495
1496 for (i = 0; i < vm->vm_hashsize; i++) {
1497 bt_t *bt;
1498
1499 LIST_FOREACH(bt, &vm->vm_hashlist[i], bt_hashlist) {
1500 if (bt->bt_start <= addr && addr < BT_END(bt)) {
1501 return bt;
1502 }
1503 }
1504 }
1505
1506 return NULL;
1507 }
1508
1509 void
1510 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...))
1511 {
1512 vmem_t *vm;
1513
1514 LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1515 bt_t *bt;
1516
1517 bt = vmem_whatis_lookup(vm, addr);
1518 if (bt == NULL) {
1519 continue;
1520 }
1521 (*pr)("%p is %p+%zu from VMEM '%s'\n",
1522 (void *)addr, (void *)bt->bt_start,
1523 (size_t)(addr - bt->bt_start), vm->vm_name);
1524 }
1525 }
1526 #endif /* defined(DDB) */
1527
1528 #if defined(VMEM_DEBUG)
1529
1530 #if !defined(_KERNEL)
1531 #include <stdio.h>
1532 #endif /* !defined(_KERNEL) */
1533
1534 void bt_dump(const bt_t *);
1535
1536 void
1537 bt_dump(const bt_t *bt)
1538 {
1539
1540 printf("\t%p: %" PRIu64 "(0x%" PRIx64 "), %" PRIu64 "(0x%" PRIx64
1541 "), %d\n",
1542 bt,
1543 (uint64_t)bt->bt_start, (uint64_t)bt->bt_start,
1544 (uint64_t)bt->bt_size, (uint64_t)bt->bt_size,
1545 bt->bt_type);
1546 }
1547
1548 void
1549 vmem_dump_seglist(const vmem_t *vm)
1550 {
1551 const bt_t *bt;
1552
1553 printf("vmem %p '%s' SEGLIST\n", vm, vm->vm_name);
1554
1555 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1556 bt_dump(bt);
1557 }
1558 }
1559
1560 void
1561 vmem_dump_freelist(const vmem_t *vm)
1562 {
1563 const bt_t *bt;
1564 int i;
1565
1566 printf("vmem %p '%s' FREELIST\n", vm, vm->vm_name);
1567
1568 for (i = 0; i < VMEM_MAXORDER; i++) {
1569 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1570
1571 if (LIST_EMPTY(fl)) {
1572 continue;
1573 }
1574
1575 printf("freelist[%d]\n", i);
1576 LIST_FOREACH(bt, fl, bt_freelist) {
1577 bt_dump(bt);
1578 }
1579 }
1580 }
1581
1582 #if defined(QCACHE)
1583 void
1584 vmem_dump_qc(const vmem_t *vm)
1585 {
1586 int qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
1587 int i;
1588 const qcache_t *prevqc;
1589
1590 printf("qcache_max=%zu\n", vm->vm_qcache_max);
1591
1592 prevqc = NULL;
1593 for (i = 0; i < qcache_idx_max; i++) {
1594 const qcache_t *qc;
1595
1596 qc = vm->vm_qcache[i];
1597 if (prevqc != qc) {
1598 printf("CACHE[%d] (%zu-) %p\n",
1599 i, (size_t)i << vm->vm_quantum_shift, qc->qc_cache);
1600 }
1601 prevqc = qc;
1602 }
1603 }
1604 #endif /* defined(QCACHE) */
1605
1606 void
1607 vmem_dump(const vmem_t *vm)
1608 {
1609
1610 printf("vmem %p '%s'\n", vm, vm->vm_name);
1611 vmem_dump_seglist(vm);
1612 vmem_dump_freelist(vm);
1613 #if defined(QCACHE)
1614 vmem_dump_qc(vm);
1615 #endif /* defined(QCACHE) */
1616 }
1617
1618 #if !defined(_KERNEL)
1619
1620 int
1621 main()
1622 {
1623 vmem_t *vm;
1624 vmem_addr_t p;
1625 struct reg {
1626 vmem_addr_t p;
1627 vmem_size_t sz;
1628 bool x;
1629 } *reg = NULL;
1630 int nreg = 0;
1631 int nalloc = 0;
1632 int nfree = 0;
1633 vmem_size_t total = 0;
1634 #if 1
1635 vm_flag_t strat = VM_INSTANTFIT;
1636 #else
1637 vm_flag_t strat = VM_BESTFIT;
1638 #endif
1639
1640 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1641 NULL, NULL, NULL, 0, VM_SLEEP);
1642 if (vm == NULL) {
1643 printf("vmem_create\n");
1644 exit(EXIT_FAILURE);
1645 }
1646 vmem_dump(vm);
1647
1648 p = vmem_add(vm, 100, 200, VM_SLEEP);
1649 p = vmem_add(vm, 2000, 1, VM_SLEEP);
1650 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1651 p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1652 p = vmem_add(vm, 500, 1000, VM_SLEEP);
1653 vmem_dump(vm);
1654 for (;;) {
1655 struct reg *r;
1656 int t = rand() % 100;
1657
1658 if (t > 45) {
1659 /* alloc */
1660 vmem_size_t sz = rand() % 500 + 1;
1661 bool x;
1662 vmem_size_t align, phase, nocross;
1663 vmem_addr_t minaddr, maxaddr;
1664
1665 if (t > 70) {
1666 x = true;
1667 /* XXX */
1668 align = 1 << (rand() % 15);
1669 phase = rand() % 65536;
1670 nocross = 1 << (rand() % 15);
1671 if (align <= phase) {
1672 phase = 0;
1673 }
1674 if (VMEM_CROSS_P(phase, phase + sz - 1,
1675 nocross)) {
1676 nocross = 0;
1677 }
1678 minaddr = rand() % 50000;
1679 maxaddr = rand() % 70000;
1680 if (minaddr > maxaddr) {
1681 minaddr = 0;
1682 maxaddr = 0;
1683 }
1684 printf("=== xalloc %" PRIu64
1685 " align=%" PRIu64 ", phase=%" PRIu64
1686 ", nocross=%" PRIu64 ", min=%" PRIu64
1687 ", max=%" PRIu64 "\n",
1688 (uint64_t)sz,
1689 (uint64_t)align,
1690 (uint64_t)phase,
1691 (uint64_t)nocross,
1692 (uint64_t)minaddr,
1693 (uint64_t)maxaddr);
1694 p = vmem_xalloc(vm, sz, align, phase, nocross,
1695 minaddr, maxaddr, strat|VM_SLEEP);
1696 } else {
1697 x = false;
1698 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1699 p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1700 }
1701 printf("-> %" PRIu64 "\n", (uint64_t)p);
1702 vmem_dump(vm);
1703 if (p == VMEM_ADDR_NULL) {
1704 if (x) {
1705 continue;
1706 }
1707 break;
1708 }
1709 nreg++;
1710 reg = realloc(reg, sizeof(*reg) * nreg);
1711 r = ®[nreg - 1];
1712 r->p = p;
1713 r->sz = sz;
1714 r->x = x;
1715 total += sz;
1716 nalloc++;
1717 } else if (nreg != 0) {
1718 /* free */
1719 r = ®[rand() % nreg];
1720 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1721 (uint64_t)r->p, (uint64_t)r->sz);
1722 if (r->x) {
1723 vmem_xfree(vm, r->p, r->sz);
1724 } else {
1725 vmem_free(vm, r->p, r->sz);
1726 }
1727 total -= r->sz;
1728 vmem_dump(vm);
1729 *r = reg[nreg - 1];
1730 nreg--;
1731 nfree++;
1732 }
1733 printf("total=%" PRIu64 "\n", (uint64_t)total);
1734 }
1735 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1736 (uint64_t)total, nalloc, nfree);
1737 exit(EXIT_SUCCESS);
1738 }
1739 #endif /* !defined(_KERNEL) */
1740 #endif /* defined(VMEM_DEBUG) */
1741