subr_vmem.c revision 1.27.2.2 1 /* $NetBSD: subr_vmem.c,v 1.27.2.2 2007/03/13 17:50:58 ad Exp $ */
2
3 /*-
4 * Copyright (c)2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 *
35 * todo:
36 * - decide how to import segments for vmem_xalloc.
37 * - don't rely on malloc(9).
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.27.2.2 2007/03/13 17:50:58 ad Exp $");
42
43 #define VMEM_DEBUG
44 #if defined(_KERNEL)
45 #define QCACHE
46 #endif /* defined(_KERNEL) */
47
48 #include <sys/param.h>
49 #include <sys/hash.h>
50 #include <sys/queue.h>
51
52 #if defined(_KERNEL)
53 #include <sys/systm.h>
54 #include <sys/mutex.h>
55 #include <sys/malloc.h>
56 #include <sys/once.h>
57 #include <sys/pool.h>
58 #include <sys/proc.h>
59 #include <sys/vmem.h>
60 #else /* defined(_KERNEL) */
61 #include "../sys/vmem.h"
62 #endif /* defined(_KERNEL) */
63
64 #if defined(_KERNEL)
65 #define LOCK_DECL(name) kmutex_t name
66 #else /* defined(_KERNEL) */
67 #include <errno.h>
68 #include <assert.h>
69 #include <stdlib.h>
70
71 #define KASSERT(a) assert(a)
72 #define LOCK_DECL(name) /* nothing */
73 #define mutex_init(a, b, c) /* nothing */
74 #define mutex_destroy(a) /* nothing */
75 #define mutex_enter(a) /* nothing */
76 #define mutex_exit(a) /* nothing */
77 #define mutex_owned(a) /* nothing */
78 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */
79 #endif /* defined(_KERNEL) */
80
81 struct vmem;
82 struct vmem_btag;
83
84 #if defined(VMEM_DEBUG)
85 void vmem_dump(const vmem_t *);
86 #endif /* defined(VMEM_DEBUG) */
87
88 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
89 #define VMEM_HASHSIZE_INIT 4096 /* XXX */
90
91 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
92
93 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
94 LIST_HEAD(vmem_freelist, vmem_btag);
95 LIST_HEAD(vmem_hashlist, vmem_btag);
96
97 #if defined(QCACHE)
98 #define VMEM_QCACHE_IDX_MAX 32
99
100 #define QC_NAME_MAX 16
101
102 struct qcache {
103 struct pool qc_pool;
104 struct pool_cache qc_cache;
105 vmem_t *qc_vmem;
106 char qc_name[QC_NAME_MAX];
107 };
108 typedef struct qcache qcache_t;
109 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool))
110 #endif /* defined(QCACHE) */
111
112 /* vmem arena */
113 struct vmem {
114 LOCK_DECL(vm_lock);
115 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
116 vm_flag_t);
117 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
118 vmem_t *vm_source;
119 struct vmem_seglist vm_seglist;
120 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
121 size_t vm_hashsize;
122 size_t vm_nbusytag;
123 struct vmem_hashlist *vm_hashlist;
124 size_t vm_quantum_mask;
125 int vm_quantum_shift;
126 const char *vm_name;
127
128 #if defined(QCACHE)
129 /* quantum cache */
130 size_t vm_qcache_max;
131 struct pool_allocator vm_qcache_allocator;
132 qcache_t vm_qcache_store[VMEM_QCACHE_IDX_MAX];
133 qcache_t *vm_qcache[VMEM_QCACHE_IDX_MAX];
134 #endif /* defined(QCACHE) */
135 };
136
137 /* boundary tag */
138 struct vmem_btag {
139 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
140 union {
141 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
142 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
143 } bt_u;
144 #define bt_hashlist bt_u.u_hashlist
145 #define bt_freelist bt_u.u_freelist
146 vmem_addr_t bt_start;
147 vmem_size_t bt_size;
148 int bt_type;
149 };
150
151 #define BT_TYPE_SPAN 1
152 #define BT_TYPE_SPAN_STATIC 2
153 #define BT_TYPE_FREE 3
154 #define BT_TYPE_BUSY 4
155 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
156
157 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
158
159 typedef struct vmem_btag bt_t;
160
161 /* ---- misc */
162
163 #define VMEM_ALIGNUP(addr, align) \
164 (-(-(addr) & -(align)))
165 #define VMEM_CROSS_P(addr1, addr2, boundary) \
166 ((((addr1) ^ (addr2)) & -(boundary)) != 0)
167
168 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
169
170 static int
171 calc_order(vmem_size_t size)
172 {
173 vmem_size_t target;
174 int i;
175
176 KASSERT(size != 0);
177
178 i = 0;
179 target = size >> 1;
180 while (ORDER2SIZE(i) <= target) {
181 i++;
182 }
183
184 KASSERT(ORDER2SIZE(i) <= size);
185 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
186
187 return i;
188 }
189
190 #if defined(_KERNEL)
191 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem");
192 #endif /* defined(_KERNEL) */
193
194 static void *
195 xmalloc(size_t sz, vm_flag_t flags)
196 {
197
198 #if defined(_KERNEL)
199 return malloc(sz, M_VMEM,
200 M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT));
201 #else /* defined(_KERNEL) */
202 return malloc(sz);
203 #endif /* defined(_KERNEL) */
204 }
205
206 static void
207 xfree(void *p)
208 {
209
210 #if defined(_KERNEL)
211 return free(p, M_VMEM);
212 #else /* defined(_KERNEL) */
213 return free(p);
214 #endif /* defined(_KERNEL) */
215 }
216
217 /* ---- boundary tag */
218
219 #if defined(_KERNEL)
220 static struct pool_cache bt_poolcache;
221 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL, IPL_VM);
222 #endif /* defined(_KERNEL) */
223
224 static bt_t *
225 bt_alloc(vmem_t *vm, vm_flag_t flags)
226 {
227 bt_t *bt;
228
229 #if defined(_KERNEL)
230 /* XXX bootstrap */
231 bt = pool_cache_get(&bt_poolcache,
232 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
233 #else /* defined(_KERNEL) */
234 bt = malloc(sizeof *bt);
235 #endif /* defined(_KERNEL) */
236
237 return bt;
238 }
239
240 static void
241 bt_free(vmem_t *vm, bt_t *bt)
242 {
243
244 #if defined(_KERNEL)
245 int s;
246
247 /* XXX bootstrap */
248 s = splvm();
249 pool_cache_put(&bt_poolcache, bt);
250 splx(s);
251 #else /* defined(_KERNEL) */
252 free(bt);
253 #endif /* defined(_KERNEL) */
254 }
255
256 /*
257 * freelist[0] ... [1, 1]
258 * freelist[1] ... [2, 3]
259 * freelist[2] ... [4, 7]
260 * freelist[3] ... [8, 15]
261 * :
262 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
263 * :
264 */
265
266 static struct vmem_freelist *
267 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
268 {
269 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
270 int idx;
271
272 KASSERT((size & vm->vm_quantum_mask) == 0);
273 KASSERT(size != 0);
274
275 idx = calc_order(qsize);
276 KASSERT(idx >= 0);
277 KASSERT(idx < VMEM_MAXORDER);
278
279 return &vm->vm_freelist[idx];
280 }
281
282 static struct vmem_freelist *
283 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
284 {
285 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
286 int idx;
287
288 KASSERT((size & vm->vm_quantum_mask) == 0);
289 KASSERT(size != 0);
290
291 idx = calc_order(qsize);
292 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
293 idx++;
294 /* check too large request? */
295 }
296 KASSERT(idx >= 0);
297 KASSERT(idx < VMEM_MAXORDER);
298
299 return &vm->vm_freelist[idx];
300 }
301
302 /* ---- boundary tag hash */
303
304 static struct vmem_hashlist *
305 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
306 {
307 struct vmem_hashlist *list;
308 unsigned int hash;
309
310 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
311 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
312
313 return list;
314 }
315
316 static bt_t *
317 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
318 {
319 struct vmem_hashlist *list;
320 bt_t *bt;
321
322 list = bt_hashhead(vm, addr);
323 LIST_FOREACH(bt, list, bt_hashlist) {
324 if (bt->bt_start == addr) {
325 break;
326 }
327 }
328
329 return bt;
330 }
331
332 static void
333 bt_rembusy(vmem_t *vm, bt_t *bt)
334 {
335
336 KASSERT(vm->vm_nbusytag > 0);
337 vm->vm_nbusytag--;
338 LIST_REMOVE(bt, bt_hashlist);
339 }
340
341 static void
342 bt_insbusy(vmem_t *vm, bt_t *bt)
343 {
344 struct vmem_hashlist *list;
345
346 KASSERT(bt->bt_type == BT_TYPE_BUSY);
347
348 list = bt_hashhead(vm, bt->bt_start);
349 LIST_INSERT_HEAD(list, bt, bt_hashlist);
350 vm->vm_nbusytag++;
351 }
352
353 /* ---- boundary tag list */
354
355 static void
356 bt_remseg(vmem_t *vm, bt_t *bt)
357 {
358
359 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
360 }
361
362 static void
363 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
364 {
365
366 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
367 }
368
369 static void
370 bt_insseg_tail(vmem_t *vm, bt_t *bt)
371 {
372
373 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
374 }
375
376 static void
377 bt_remfree(vmem_t *vm, bt_t *bt)
378 {
379
380 KASSERT(bt->bt_type == BT_TYPE_FREE);
381
382 LIST_REMOVE(bt, bt_freelist);
383 }
384
385 static void
386 bt_insfree(vmem_t *vm, bt_t *bt)
387 {
388 struct vmem_freelist *list;
389
390 list = bt_freehead_tofree(vm, bt->bt_size);
391 LIST_INSERT_HEAD(list, bt, bt_freelist);
392 }
393
394 /* ---- vmem internal functions */
395
396 #if defined(QCACHE)
397 static inline vm_flag_t
398 prf_to_vmf(int prflags)
399 {
400 vm_flag_t vmflags;
401
402 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
403 if ((prflags & PR_WAITOK) != 0) {
404 vmflags = VM_SLEEP;
405 } else {
406 vmflags = VM_NOSLEEP;
407 }
408 return vmflags;
409 }
410
411 static inline int
412 vmf_to_prf(vm_flag_t vmflags)
413 {
414 int prflags;
415
416 if ((vmflags & VM_SLEEP) != 0) {
417 prflags = PR_WAITOK;
418 } else {
419 prflags = PR_NOWAIT;
420 }
421 return prflags;
422 }
423
424 static size_t
425 qc_poolpage_size(size_t qcache_max)
426 {
427 int i;
428
429 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
430 /* nothing */
431 }
432 return ORDER2SIZE(i);
433 }
434
435 static void *
436 qc_poolpage_alloc(struct pool *pool, int prflags)
437 {
438 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
439 vmem_t *vm = qc->qc_vmem;
440
441 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
442 prf_to_vmf(prflags) | VM_INSTANTFIT);
443 }
444
445 static void
446 qc_poolpage_free(struct pool *pool, void *addr)
447 {
448 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
449 vmem_t *vm = qc->qc_vmem;
450
451 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
452 }
453
454 static void
455 qc_init(vmem_t *vm, size_t qcache_max)
456 {
457 qcache_t *prevqc;
458 struct pool_allocator *pa;
459 int qcache_idx_max;
460 int i;
461
462 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
463 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
464 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
465 }
466 vm->vm_qcache_max = qcache_max;
467 pa = &vm->vm_qcache_allocator;
468 memset(pa, 0, sizeof(*pa));
469 pa->pa_alloc = qc_poolpage_alloc;
470 pa->pa_free = qc_poolpage_free;
471 pa->pa_pagesz = qc_poolpage_size(qcache_max);
472
473 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
474 prevqc = NULL;
475 for (i = qcache_idx_max; i > 0; i--) {
476 qcache_t *qc = &vm->vm_qcache_store[i - 1];
477 size_t size = i << vm->vm_quantum_shift;
478
479 qc->qc_vmem = vm;
480 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
481 vm->vm_name, size);
482 pool_init(&qc->qc_pool, size, ORDER2SIZE(vm->vm_quantum_shift),
483 0, PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa,
484 IPL_NONE);
485 if (prevqc != NULL &&
486 qc->qc_pool.pr_itemsperpage ==
487 prevqc->qc_pool.pr_itemsperpage) {
488 pool_destroy(&qc->qc_pool);
489 vm->vm_qcache[i - 1] = prevqc;
490 continue;
491 }
492 pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL);
493 vm->vm_qcache[i - 1] = qc;
494 prevqc = qc;
495 }
496 }
497
498 static void
499 qc_destroy(vmem_t *vm)
500 {
501 const qcache_t *prevqc;
502 int i;
503 int qcache_idx_max;
504
505 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
506 prevqc = NULL;
507 for (i = 0; i < qcache_idx_max; i++) {
508 qcache_t *qc = vm->vm_qcache[i];
509
510 if (prevqc == qc) {
511 continue;
512 }
513 pool_cache_destroy(&qc->qc_cache);
514 pool_destroy(&qc->qc_pool);
515 prevqc = qc;
516 }
517 }
518
519 static bool
520 qc_reap(vmem_t *vm)
521 {
522 const qcache_t *prevqc;
523 int i;
524 int qcache_idx_max;
525 bool didsomething = false;
526
527 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
528 prevqc = NULL;
529 for (i = 0; i < qcache_idx_max; i++) {
530 qcache_t *qc = vm->vm_qcache[i];
531
532 if (prevqc == qc) {
533 continue;
534 }
535 if (pool_reclaim(&qc->qc_pool) != 0) {
536 didsomething = true;
537 }
538 prevqc = qc;
539 }
540
541 return didsomething;
542 }
543 #endif /* defined(QCACHE) */
544
545 #if defined(_KERNEL)
546 static int
547 vmem_init(void)
548 {
549
550 pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL);
551 return 0;
552 }
553 #endif /* defined(_KERNEL) */
554
555 static vmem_addr_t
556 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
557 int spanbttype)
558 {
559 bt_t *btspan;
560 bt_t *btfree;
561
562 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
563 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
564
565 btspan = bt_alloc(vm, flags);
566 if (btspan == NULL) {
567 return VMEM_ADDR_NULL;
568 }
569 btfree = bt_alloc(vm, flags);
570 if (btfree == NULL) {
571 bt_free(vm, btspan);
572 return VMEM_ADDR_NULL;
573 }
574
575 btspan->bt_type = spanbttype;
576 btspan->bt_start = addr;
577 btspan->bt_size = size;
578
579 btfree->bt_type = BT_TYPE_FREE;
580 btfree->bt_start = addr;
581 btfree->bt_size = size;
582
583 mutex_enter(&vm->vm_lock);
584 bt_insseg_tail(vm, btspan);
585 bt_insseg(vm, btfree, btspan);
586 bt_insfree(vm, btfree);
587 mutex_exit(&vm->vm_lock);
588
589 return addr;
590 }
591
592 static int
593 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
594 {
595 vmem_addr_t addr;
596
597 if (vm->vm_allocfn == NULL) {
598 return EINVAL;
599 }
600
601 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
602 if (addr == VMEM_ADDR_NULL) {
603 return ENOMEM;
604 }
605
606 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
607 (*vm->vm_freefn)(vm->vm_source, addr, size);
608 return ENOMEM;
609 }
610
611 return 0;
612 }
613
614 static int
615 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
616 {
617 bt_t *bt;
618 int i;
619 struct vmem_hashlist *newhashlist;
620 struct vmem_hashlist *oldhashlist;
621 size_t oldhashsize;
622
623 KASSERT(newhashsize > 0);
624
625 newhashlist =
626 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags);
627 if (newhashlist == NULL) {
628 return ENOMEM;
629 }
630 for (i = 0; i < newhashsize; i++) {
631 LIST_INIT(&newhashlist[i]);
632 }
633
634 mutex_enter(&vm->vm_lock);
635 oldhashlist = vm->vm_hashlist;
636 oldhashsize = vm->vm_hashsize;
637 vm->vm_hashlist = newhashlist;
638 vm->vm_hashsize = newhashsize;
639 if (oldhashlist == NULL) {
640 mutex_exit(&vm->vm_lock);
641 return 0;
642 }
643 for (i = 0; i < oldhashsize; i++) {
644 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
645 bt_rembusy(vm, bt); /* XXX */
646 bt_insbusy(vm, bt);
647 }
648 }
649 mutex_exit(&vm->vm_lock);
650
651 xfree(oldhashlist);
652
653 return 0;
654 }
655
656 /*
657 * vmem_fit: check if a bt can satisfy the given restrictions.
658 */
659
660 static vmem_addr_t
661 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase,
662 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
663 {
664 vmem_addr_t start;
665 vmem_addr_t end;
666
667 KASSERT(bt->bt_size >= size);
668
669 /*
670 * XXX assumption: vmem_addr_t and vmem_size_t are
671 * unsigned integer of the same size.
672 */
673
674 start = bt->bt_start;
675 if (start < minaddr) {
676 start = minaddr;
677 }
678 end = BT_END(bt);
679 if (end > maxaddr - 1) {
680 end = maxaddr - 1;
681 }
682 if (start >= end) {
683 return VMEM_ADDR_NULL;
684 }
685
686 start = VMEM_ALIGNUP(start - phase, align) + phase;
687 if (start < bt->bt_start) {
688 start += align;
689 }
690 if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
691 KASSERT(align < nocross);
692 start = VMEM_ALIGNUP(start - phase, nocross) + phase;
693 }
694 if (start < end && end - start >= size) {
695 KASSERT((start & (align - 1)) == phase);
696 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross));
697 KASSERT(minaddr <= start);
698 KASSERT(maxaddr == 0 || start + size <= maxaddr);
699 KASSERT(bt->bt_start <= start);
700 KASSERT(start + size <= BT_END(bt));
701 return start;
702 }
703 return VMEM_ADDR_NULL;
704 }
705
706 /* ---- vmem API */
707
708 /*
709 * vmem_create: create an arena.
710 *
711 * => must not be called from interrupt context.
712 */
713
714 vmem_t *
715 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
716 vmem_size_t quantum,
717 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
718 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
719 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags)
720 {
721 vmem_t *vm;
722 int i;
723 #if defined(_KERNEL)
724 static ONCE_DECL(control);
725 #endif /* defined(_KERNEL) */
726
727 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
728 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
729
730 #if defined(_KERNEL)
731 if (RUN_ONCE(&control, vmem_init)) {
732 return NULL;
733 }
734 #endif /* defined(_KERNEL) */
735 vm = xmalloc(sizeof(*vm), flags);
736 if (vm == NULL) {
737 return NULL;
738 }
739
740 if ((flags & VM_NOSLEEP) != 0) {
741 mutex_init(&vm->vm_lock, MUTEX_DRIVER, IPL_VM);
742 } else {
743 mutex_init(&vm->vm_lock, MUTEX_DRIVER, IPL_NONE);
744 }
745
746 vm->vm_name = name;
747 vm->vm_quantum_mask = quantum - 1;
748 vm->vm_quantum_shift = calc_order(quantum);
749 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
750 vm->vm_allocfn = allocfn;
751 vm->vm_freefn = freefn;
752 vm->vm_source = source;
753 vm->vm_nbusytag = 0;
754 #if defined(QCACHE)
755 qc_init(vm, qcache_max);
756 #endif /* defined(QCACHE) */
757
758 CIRCLEQ_INIT(&vm->vm_seglist);
759 for (i = 0; i < VMEM_MAXORDER; i++) {
760 LIST_INIT(&vm->vm_freelist[i]);
761 }
762 vm->vm_hashlist = NULL;
763 if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
764 vmem_destroy(vm);
765 return NULL;
766 }
767
768 if (size != 0) {
769 if (vmem_add(vm, base, size, flags) == 0) {
770 vmem_destroy(vm);
771 return NULL;
772 }
773 }
774
775 return vm;
776 }
777
778 void
779 vmem_destroy(vmem_t *vm)
780 {
781
782 #if defined(QCACHE)
783 qc_destroy(vm);
784 #endif /* defined(QCACHE) */
785 if (vm->vm_hashlist != NULL) {
786 int i;
787
788 for (i = 0; i < vm->vm_hashsize; i++) {
789 bt_t *bt;
790
791 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
792 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
793 bt_free(vm, bt);
794 }
795 }
796 xfree(vm->vm_hashlist);
797 }
798 mutex_destroy(&vm->vm_lock);
799 xfree(vm);
800 }
801
802 vmem_size_t
803 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
804 {
805
806 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
807 }
808
809 /*
810 * vmem_alloc:
811 *
812 * => caller must ensure appropriate spl,
813 * if the arena can be accessed from interrupt context.
814 */
815
816 vmem_addr_t
817 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
818 {
819 const vmem_size_t size __unused = vmem_roundup_size(vm, size0);
820 const vm_flag_t strat __unused = flags & VM_FITMASK;
821
822 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
823 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
824
825 KASSERT(size0 > 0);
826 KASSERT(size > 0);
827 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
828 if ((flags & VM_SLEEP) != 0) {
829 ASSERT_SLEEPABLE(NULL, __func__);
830 }
831
832 #if defined(QCACHE)
833 if (size <= vm->vm_qcache_max) {
834 int qidx = size >> vm->vm_quantum_shift;
835 qcache_t *qc = vm->vm_qcache[qidx - 1];
836
837 return (vmem_addr_t)pool_cache_get(&qc->qc_cache,
838 vmf_to_prf(flags));
839 }
840 #endif /* defined(QCACHE) */
841
842 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags);
843 }
844
845 vmem_addr_t
846 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase,
847 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
848 vm_flag_t flags)
849 {
850 struct vmem_freelist *list;
851 struct vmem_freelist *first;
852 struct vmem_freelist *end;
853 bt_t *bt;
854 bt_t *btnew;
855 bt_t *btnew2;
856 const vmem_size_t size = vmem_roundup_size(vm, size0);
857 vm_flag_t strat = flags & VM_FITMASK;
858 vmem_addr_t start;
859
860 KASSERT(size0 > 0);
861 KASSERT(size > 0);
862 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
863 if ((flags & VM_SLEEP) != 0) {
864 ASSERT_SLEEPABLE(NULL, __func__);
865 }
866 KASSERT((align & vm->vm_quantum_mask) == 0);
867 KASSERT((align & (align - 1)) == 0);
868 KASSERT((phase & vm->vm_quantum_mask) == 0);
869 KASSERT((nocross & vm->vm_quantum_mask) == 0);
870 KASSERT((nocross & (nocross - 1)) == 0);
871 KASSERT((align == 0 && phase == 0) || phase < align);
872 KASSERT(nocross == 0 || nocross >= size);
873 KASSERT(maxaddr == 0 || minaddr < maxaddr);
874 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
875
876 if (align == 0) {
877 align = vm->vm_quantum_mask + 1;
878 }
879 btnew = bt_alloc(vm, flags);
880 if (btnew == NULL) {
881 return VMEM_ADDR_NULL;
882 }
883 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */
884 if (btnew2 == NULL) {
885 bt_free(vm, btnew);
886 return VMEM_ADDR_NULL;
887 }
888
889 retry_strat:
890 first = bt_freehead_toalloc(vm, size, strat);
891 end = &vm->vm_freelist[VMEM_MAXORDER];
892 retry:
893 bt = NULL;
894 mutex_enter(&vm->vm_lock);
895 if (strat == VM_INSTANTFIT) {
896 for (list = first; list < end; list++) {
897 bt = LIST_FIRST(list);
898 if (bt != NULL) {
899 start = vmem_fit(bt, size, align, phase,
900 nocross, minaddr, maxaddr);
901 if (start != VMEM_ADDR_NULL) {
902 goto gotit;
903 }
904 }
905 }
906 } else { /* VM_BESTFIT */
907 for (list = first; list < end; list++) {
908 LIST_FOREACH(bt, list, bt_freelist) {
909 if (bt->bt_size >= size) {
910 start = vmem_fit(bt, size, align, phase,
911 nocross, minaddr, maxaddr);
912 if (start != VMEM_ADDR_NULL) {
913 goto gotit;
914 }
915 }
916 }
917 }
918 }
919 mutex_exit(&vm->vm_lock);
920 #if 1
921 if (strat == VM_INSTANTFIT) {
922 strat = VM_BESTFIT;
923 goto retry_strat;
924 }
925 #endif
926 if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
927 nocross != 0 || minaddr != 0 || maxaddr != 0) {
928
929 /*
930 * XXX should try to import a region large enough to
931 * satisfy restrictions?
932 */
933
934 goto fail;
935 }
936 if (vmem_import(vm, size, flags) == 0) {
937 goto retry;
938 }
939 /* XXX */
940 fail:
941 bt_free(vm, btnew);
942 bt_free(vm, btnew2);
943 return VMEM_ADDR_NULL;
944
945 gotit:
946 KASSERT(bt->bt_type == BT_TYPE_FREE);
947 KASSERT(bt->bt_size >= size);
948 bt_remfree(vm, bt);
949 if (bt->bt_start != start) {
950 btnew2->bt_type = BT_TYPE_FREE;
951 btnew2->bt_start = bt->bt_start;
952 btnew2->bt_size = start - bt->bt_start;
953 bt->bt_start = start;
954 bt->bt_size -= btnew2->bt_size;
955 bt_insfree(vm, btnew2);
956 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
957 btnew2 = NULL;
958 }
959 KASSERT(bt->bt_start == start);
960 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
961 /* split */
962 btnew->bt_type = BT_TYPE_BUSY;
963 btnew->bt_start = bt->bt_start;
964 btnew->bt_size = size;
965 bt->bt_start = bt->bt_start + size;
966 bt->bt_size -= size;
967 bt_insfree(vm, bt);
968 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
969 bt_insbusy(vm, btnew);
970 mutex_exit(&vm->vm_lock);
971 } else {
972 bt->bt_type = BT_TYPE_BUSY;
973 bt_insbusy(vm, bt);
974 mutex_exit(&vm->vm_lock);
975 bt_free(vm, btnew);
976 btnew = bt;
977 }
978 if (btnew2 != NULL) {
979 bt_free(vm, btnew2);
980 }
981 KASSERT(btnew->bt_size >= size);
982 btnew->bt_type = BT_TYPE_BUSY;
983
984 return btnew->bt_start;
985 }
986
987 /*
988 * vmem_free:
989 *
990 * => caller must ensure appropriate spl,
991 * if the arena can be accessed from interrupt context.
992 */
993
994 void
995 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
996 {
997
998 KASSERT(addr != VMEM_ADDR_NULL);
999 KASSERT(size > 0);
1000
1001 #if defined(QCACHE)
1002 if (size <= vm->vm_qcache_max) {
1003 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
1004 qcache_t *qc = vm->vm_qcache[qidx - 1];
1005
1006 return pool_cache_put(&qc->qc_cache, (void *)addr);
1007 }
1008 #endif /* defined(QCACHE) */
1009
1010 vmem_xfree(vm, addr, size);
1011 }
1012
1013 void
1014 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1015 {
1016 bt_t *bt;
1017 bt_t *t;
1018
1019 KASSERT(addr != VMEM_ADDR_NULL);
1020 KASSERT(size > 0);
1021
1022 mutex_enter(&vm->vm_lock);
1023
1024 bt = bt_lookupbusy(vm, addr);
1025 KASSERT(bt != NULL);
1026 KASSERT(bt->bt_start == addr);
1027 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
1028 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
1029 KASSERT(bt->bt_type == BT_TYPE_BUSY);
1030 bt_rembusy(vm, bt);
1031 bt->bt_type = BT_TYPE_FREE;
1032
1033 /* coalesce */
1034 t = CIRCLEQ_NEXT(bt, bt_seglist);
1035 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1036 KASSERT(BT_END(bt) == t->bt_start);
1037 bt_remfree(vm, t);
1038 bt_remseg(vm, t);
1039 bt->bt_size += t->bt_size;
1040 bt_free(vm, t);
1041 }
1042 t = CIRCLEQ_PREV(bt, bt_seglist);
1043 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1044 KASSERT(BT_END(t) == bt->bt_start);
1045 bt_remfree(vm, t);
1046 bt_remseg(vm, t);
1047 bt->bt_size += t->bt_size;
1048 bt->bt_start = t->bt_start;
1049 bt_free(vm, t);
1050 }
1051
1052 t = CIRCLEQ_PREV(bt, bt_seglist);
1053 KASSERT(t != NULL);
1054 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1055 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1056 t->bt_size == bt->bt_size) {
1057 vmem_addr_t spanaddr;
1058 vmem_size_t spansize;
1059
1060 KASSERT(t->bt_start == bt->bt_start);
1061 spanaddr = bt->bt_start;
1062 spansize = bt->bt_size;
1063 bt_remseg(vm, bt);
1064 bt_free(vm, bt);
1065 bt_remseg(vm, t);
1066 bt_free(vm, t);
1067 mutex_exit(&vm->vm_lock);
1068 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
1069 } else {
1070 bt_insfree(vm, bt);
1071 mutex_exit(&vm->vm_lock);
1072 }
1073 }
1074
1075 /*
1076 * vmem_add:
1077 *
1078 * => caller must ensure appropriate spl,
1079 * if the arena can be accessed from interrupt context.
1080 */
1081
1082 vmem_addr_t
1083 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1084 {
1085
1086 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1087 }
1088
1089 /*
1090 * vmem_reap: reap unused resources.
1091 *
1092 * => return true if we successfully reaped something.
1093 */
1094
1095 bool
1096 vmem_reap(vmem_t *vm)
1097 {
1098 bool didsomething = false;
1099
1100 #if defined(QCACHE)
1101 didsomething = qc_reap(vm);
1102 #endif /* defined(QCACHE) */
1103 return didsomething;
1104 }
1105
1106 /* ---- debug */
1107
1108 #if defined(VMEM_DEBUG)
1109
1110 #if !defined(_KERNEL)
1111 #include <stdio.h>
1112 #endif /* !defined(_KERNEL) */
1113
1114 void bt_dump(const bt_t *);
1115
1116 void
1117 bt_dump(const bt_t *bt)
1118 {
1119
1120 printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n",
1121 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
1122 bt->bt_type);
1123 }
1124
1125 void
1126 vmem_dump(const vmem_t *vm)
1127 {
1128 const bt_t *bt;
1129 int i;
1130
1131 printf("vmem %p '%s'\n", vm, vm->vm_name);
1132 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1133 bt_dump(bt);
1134 }
1135
1136 for (i = 0; i < VMEM_MAXORDER; i++) {
1137 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1138
1139 if (LIST_EMPTY(fl)) {
1140 continue;
1141 }
1142
1143 printf("freelist[%d]\n", i);
1144 LIST_FOREACH(bt, fl, bt_freelist) {
1145 bt_dump(bt);
1146 if (bt->bt_size) {
1147 }
1148 }
1149 }
1150 }
1151
1152 #if !defined(_KERNEL)
1153
1154 #include <stdlib.h>
1155
1156 int
1157 main()
1158 {
1159 vmem_t *vm;
1160 vmem_addr_t p;
1161 struct reg {
1162 vmem_addr_t p;
1163 vmem_size_t sz;
1164 bool x;
1165 } *reg = NULL;
1166 int nreg = 0;
1167 int nalloc = 0;
1168 int nfree = 0;
1169 vmem_size_t total = 0;
1170 #if 1
1171 vm_flag_t strat = VM_INSTANTFIT;
1172 #else
1173 vm_flag_t strat = VM_BESTFIT;
1174 #endif
1175
1176 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1177 NULL, NULL, NULL, 0, VM_NOSLEEP);
1178 if (vm == NULL) {
1179 printf("vmem_create\n");
1180 exit(EXIT_FAILURE);
1181 }
1182 vmem_dump(vm);
1183
1184 p = vmem_add(vm, 100, 200, VM_SLEEP);
1185 p = vmem_add(vm, 2000, 1, VM_SLEEP);
1186 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1187 p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1188 p = vmem_add(vm, 500, 1000, VM_SLEEP);
1189 vmem_dump(vm);
1190 for (;;) {
1191 struct reg *r;
1192 int t = rand() % 100;
1193
1194 if (t > 45) {
1195 /* alloc */
1196 vmem_size_t sz = rand() % 500 + 1;
1197 bool x;
1198 vmem_size_t align, phase, nocross;
1199 vmem_addr_t minaddr, maxaddr;
1200
1201 if (t > 70) {
1202 x = true;
1203 /* XXX */
1204 align = 1 << (rand() % 15);
1205 phase = rand() % 65536;
1206 nocross = 1 << (rand() % 15);
1207 if (align <= phase) {
1208 phase = 0;
1209 }
1210 if (VMEM_CROSS_P(phase, phase + sz - 1,
1211 nocross)) {
1212 nocross = 0;
1213 }
1214 minaddr = rand() % 50000;
1215 maxaddr = rand() % 70000;
1216 if (minaddr > maxaddr) {
1217 minaddr = 0;
1218 maxaddr = 0;
1219 }
1220 printf("=== xalloc %" PRIu64
1221 " align=%" PRIu64 ", phase=%" PRIu64
1222 ", nocross=%" PRIu64 ", min=%" PRIu64
1223 ", max=%" PRIu64 "\n",
1224 (uint64_t)sz,
1225 (uint64_t)align,
1226 (uint64_t)phase,
1227 (uint64_t)nocross,
1228 (uint64_t)minaddr,
1229 (uint64_t)maxaddr);
1230 p = vmem_xalloc(vm, sz, align, phase, nocross,
1231 minaddr, maxaddr, strat|VM_SLEEP);
1232 } else {
1233 x = false;
1234 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1235 p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1236 }
1237 printf("-> %" PRIu64 "\n", (uint64_t)p);
1238 vmem_dump(vm);
1239 if (p == VMEM_ADDR_NULL) {
1240 if (x) {
1241 continue;
1242 }
1243 break;
1244 }
1245 nreg++;
1246 reg = realloc(reg, sizeof(*reg) * nreg);
1247 r = ®[nreg - 1];
1248 r->p = p;
1249 r->sz = sz;
1250 r->x = x;
1251 total += sz;
1252 nalloc++;
1253 } else if (nreg != 0) {
1254 /* free */
1255 r = ®[rand() % nreg];
1256 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1257 (uint64_t)r->p, (uint64_t)r->sz);
1258 if (r->x) {
1259 vmem_xfree(vm, r->p, r->sz);
1260 } else {
1261 vmem_free(vm, r->p, r->sz);
1262 }
1263 total -= r->sz;
1264 vmem_dump(vm);
1265 *r = reg[nreg - 1];
1266 nreg--;
1267 nfree++;
1268 }
1269 printf("total=%" PRIu64 "\n", (uint64_t)total);
1270 }
1271 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1272 (uint64_t)total, nalloc, nfree);
1273 exit(EXIT_SUCCESS);
1274 }
1275 #endif /* !defined(_KERNEL) */
1276 #endif /* defined(VMEM_DEBUG) */
1277