subr_vmem.c revision 1.20 1 /* $NetBSD: subr_vmem.c,v 1.20 2006/11/09 10:08:53 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 *
35 * todo:
36 * - decide how to import segments for vmem_xalloc.
37 * - don't rely on malloc(9).
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.20 2006/11/09 10:08:53 yamt Exp $");
42
43 #define VMEM_DEBUG
44 #if defined(_KERNEL)
45 #define QCACHE
46 #endif /* defined(_KERNEL) */
47
48 #include <sys/param.h>
49 #include <sys/hash.h>
50 #include <sys/queue.h>
51
52 #if defined(_KERNEL)
53 #include <sys/systm.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/once.h>
57 #include <sys/pool.h>
58 #include <sys/proc.h>
59 #include <sys/vmem.h>
60 #else /* defined(_KERNEL) */
61 #include "../sys/vmem.h"
62 #endif /* defined(_KERNEL) */
63
64 #if defined(_KERNEL)
65 #define SIMPLELOCK_DECL(name) struct simplelock name
66 #else /* defined(_KERNEL) */
67 #include <errno.h>
68 #include <assert.h>
69 #include <stdlib.h>
70
71 #define KASSERT(a) assert(a)
72 #define SIMPLELOCK_DECL(name) /* nothing */
73 #define LOCK_ASSERT(a) /* nothing */
74 #define simple_lock_init(a) /* nothing */
75 #define simple_lock(a) /* nothing */
76 #define simple_unlock(a) /* nothing */
77 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */
78 #endif /* defined(_KERNEL) */
79
80 struct vmem;
81 struct vmem_btag;
82
83 #if defined(VMEM_DEBUG)
84 void vmem_dump(const vmem_t *);
85 #endif /* defined(VMEM_DEBUG) */
86
87 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
88 #define VMEM_HASHSIZE_INIT 4096 /* XXX */
89
90 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
91
92 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
93 LIST_HEAD(vmem_freelist, vmem_btag);
94 LIST_HEAD(vmem_hashlist, vmem_btag);
95
96 #if defined(QCACHE)
97 #define VMEM_QCACHE_IDX_MAX 32
98
99 #define QC_NAME_MAX 16
100
101 struct qcache {
102 struct pool qc_pool;
103 struct pool_cache qc_cache;
104 vmem_t *qc_vmem;
105 char qc_name[QC_NAME_MAX];
106 };
107 typedef struct qcache qcache_t;
108 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool))
109 #endif /* defined(QCACHE) */
110
111 /* vmem arena */
112 struct vmem {
113 SIMPLELOCK_DECL(vm_lock);
114 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
115 vm_flag_t);
116 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
117 vmem_t *vm_source;
118 struct vmem_seglist vm_seglist;
119 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
120 size_t vm_hashsize;
121 size_t vm_nbusytag;
122 struct vmem_hashlist *vm_hashlist;
123 size_t vm_quantum_mask;
124 int vm_quantum_shift;
125 const char *vm_name;
126
127 #if defined(QCACHE)
128 /* quantum cache */
129 size_t vm_qcache_max;
130 struct pool_allocator vm_qcache_allocator;
131 qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
132 #endif /* defined(QCACHE) */
133 };
134
135 #define VMEM_LOCK(vm) simple_lock(&vm->vm_lock)
136 #define VMEM_UNLOCK(vm) simple_unlock(&vm->vm_lock)
137 #define VMEM_LOCK_INIT(vm) simple_lock_init(&vm->vm_lock);
138 #define VMEM_ASSERT_LOCKED(vm) \
139 LOCK_ASSERT(simple_lock_held(&vm->vm_lock))
140 #define VMEM_ASSERT_UNLOCKED(vm) \
141 LOCK_ASSERT(!simple_lock_held(&vm->vm_lock))
142
143 /* boundary tag */
144 struct vmem_btag {
145 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
146 union {
147 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
148 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
149 } bt_u;
150 #define bt_hashlist bt_u.u_hashlist
151 #define bt_freelist bt_u.u_freelist
152 vmem_addr_t bt_start;
153 vmem_size_t bt_size;
154 int bt_type;
155 };
156
157 #define BT_TYPE_SPAN 1
158 #define BT_TYPE_SPAN_STATIC 2
159 #define BT_TYPE_FREE 3
160 #define BT_TYPE_BUSY 4
161 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
162
163 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
164
165 typedef struct vmem_btag bt_t;
166
167 /* ---- misc */
168
169 #define VMEM_ALIGNUP(addr, align) \
170 (-(-(addr) & -(align)))
171 #define VMEM_CROSS_P(addr1, addr2, boundary) \
172 ((((addr1) ^ (addr2)) & -(boundary)) != 0)
173
174 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
175
176 static int
177 calc_order(vmem_size_t size)
178 {
179 vmem_size_t target;
180 int i;
181
182 KASSERT(size != 0);
183
184 i = 0;
185 target = size >> 1;
186 while (ORDER2SIZE(i) <= target) {
187 i++;
188 }
189
190 KASSERT(ORDER2SIZE(i) <= size);
191 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
192
193 return i;
194 }
195
196 #if defined(_KERNEL)
197 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem");
198 #endif /* defined(_KERNEL) */
199
200 static void *
201 xmalloc(size_t sz, vm_flag_t flags)
202 {
203
204 #if defined(_KERNEL)
205 return malloc(sz, M_VMEM,
206 M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT));
207 #else /* defined(_KERNEL) */
208 return malloc(sz);
209 #endif /* defined(_KERNEL) */
210 }
211
212 static void
213 xfree(void *p)
214 {
215
216 #if defined(_KERNEL)
217 return free(p, M_VMEM);
218 #else /* defined(_KERNEL) */
219 return free(p);
220 #endif /* defined(_KERNEL) */
221 }
222
223 /* ---- boundary tag */
224
225 #if defined(_KERNEL)
226 static struct pool_cache bt_poolcache;
227 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL);
228 #endif /* defined(_KERNEL) */
229
230 static bt_t *
231 bt_alloc(vmem_t *vm, vm_flag_t flags)
232 {
233 bt_t *bt;
234
235 #if defined(_KERNEL)
236 /* XXX bootstrap */
237 bt = pool_cache_get(&bt_poolcache,
238 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
239 #else /* defined(_KERNEL) */
240 bt = malloc(sizeof *bt);
241 #endif /* defined(_KERNEL) */
242
243 return bt;
244 }
245
246 static void
247 bt_free(vmem_t *vm, bt_t *bt)
248 {
249
250 #if defined(_KERNEL)
251 /* XXX bootstrap */
252 pool_cache_put(&bt_poolcache, bt);
253 #else /* defined(_KERNEL) */
254 free(bt);
255 #endif /* defined(_KERNEL) */
256 }
257
258 /*
259 * freelist[0] ... [1, 1]
260 * freelist[1] ... [2, 3]
261 * freelist[2] ... [4, 7]
262 * freelist[3] ... [8, 15]
263 * :
264 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
265 * :
266 */
267
268 static struct vmem_freelist *
269 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
270 {
271 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
272 int idx;
273
274 KASSERT((size & vm->vm_quantum_mask) == 0);
275 KASSERT(size != 0);
276
277 idx = calc_order(qsize);
278 KASSERT(idx >= 0);
279 KASSERT(idx < VMEM_MAXORDER);
280
281 return &vm->vm_freelist[idx];
282 }
283
284 static struct vmem_freelist *
285 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
286 {
287 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
288 int idx;
289
290 KASSERT((size & vm->vm_quantum_mask) == 0);
291 KASSERT(size != 0);
292
293 idx = calc_order(qsize);
294 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
295 idx++;
296 /* check too large request? */
297 }
298 KASSERT(idx >= 0);
299 KASSERT(idx < VMEM_MAXORDER);
300
301 return &vm->vm_freelist[idx];
302 }
303
304 /* ---- boundary tag hash */
305
306 static struct vmem_hashlist *
307 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
308 {
309 struct vmem_hashlist *list;
310 unsigned int hash;
311
312 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
313 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
314
315 return list;
316 }
317
318 static bt_t *
319 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
320 {
321 struct vmem_hashlist *list;
322 bt_t *bt;
323
324 list = bt_hashhead(vm, addr);
325 LIST_FOREACH(bt, list, bt_hashlist) {
326 if (bt->bt_start == addr) {
327 break;
328 }
329 }
330
331 return bt;
332 }
333
334 static void
335 bt_rembusy(vmem_t *vm, bt_t *bt)
336 {
337
338 KASSERT(vm->vm_nbusytag > 0);
339 vm->vm_nbusytag--;
340 LIST_REMOVE(bt, bt_hashlist);
341 }
342
343 static void
344 bt_insbusy(vmem_t *vm, bt_t *bt)
345 {
346 struct vmem_hashlist *list;
347
348 KASSERT(bt->bt_type == BT_TYPE_BUSY);
349
350 list = bt_hashhead(vm, bt->bt_start);
351 LIST_INSERT_HEAD(list, bt, bt_hashlist);
352 vm->vm_nbusytag++;
353 }
354
355 /* ---- boundary tag list */
356
357 static void
358 bt_remseg(vmem_t *vm, bt_t *bt)
359 {
360
361 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
362 }
363
364 static void
365 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
366 {
367
368 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
369 }
370
371 static void
372 bt_insseg_tail(vmem_t *vm, bt_t *bt)
373 {
374
375 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
376 }
377
378 static void
379 bt_remfree(vmem_t *vm, bt_t *bt)
380 {
381
382 KASSERT(bt->bt_type == BT_TYPE_FREE);
383
384 LIST_REMOVE(bt, bt_freelist);
385 }
386
387 static void
388 bt_insfree(vmem_t *vm, bt_t *bt)
389 {
390 struct vmem_freelist *list;
391
392 list = bt_freehead_tofree(vm, bt->bt_size);
393 LIST_INSERT_HEAD(list, bt, bt_freelist);
394 }
395
396 /* ---- vmem internal functions */
397
398 #if defined(QCACHE)
399 static inline vm_flag_t
400 prf_to_vmf(int prflags)
401 {
402 vm_flag_t vmflags;
403
404 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
405 if ((prflags & PR_WAITOK) != 0) {
406 vmflags = VM_SLEEP;
407 } else {
408 vmflags = VM_NOSLEEP;
409 }
410 return vmflags;
411 }
412
413 static inline int
414 vmf_to_prf(vm_flag_t vmflags)
415 {
416 int prflags;
417
418 if ((vmflags & VM_SLEEP) != 0) {
419 prflags = PR_WAITOK;
420 } else {
421 prflags = PR_NOWAIT;
422 }
423 return prflags;
424 }
425
426 static size_t
427 qc_poolpage_size(size_t qcache_max)
428 {
429 int i;
430
431 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
432 /* nothing */
433 }
434 return ORDER2SIZE(i);
435 }
436
437 static void *
438 qc_poolpage_alloc(struct pool *pool, int prflags)
439 {
440 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
441 vmem_t *vm = qc->qc_vmem;
442
443 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
444 prf_to_vmf(prflags) | VM_INSTANTFIT);
445 }
446
447 static void
448 qc_poolpage_free(struct pool *pool, void *addr)
449 {
450 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
451 vmem_t *vm = qc->qc_vmem;
452
453 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
454 }
455
456 static void
457 qc_init(vmem_t *vm, size_t qcache_max)
458 {
459 struct pool_allocator *pa;
460 int qcache_idx_max;
461 int i;
462
463 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
464 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
465 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
466 }
467 vm->vm_qcache_max = qcache_max;
468 pa = &vm->vm_qcache_allocator;
469 memset(pa, 0, sizeof(*pa));
470 pa->pa_alloc = qc_poolpage_alloc;
471 pa->pa_free = qc_poolpage_free;
472 pa->pa_pagesz = qc_poolpage_size(qcache_max);
473
474 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
475 for (i = 1; i <= qcache_idx_max; i++) {
476 qcache_t *qc = &vm->vm_qcache[i - 1];
477 size_t size = i << vm->vm_quantum_shift;
478
479 qc->qc_vmem = vm;
480 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
481 vm->vm_name, size);
482 pool_init(&qc->qc_pool, size, ORDER2SIZE(vm->vm_quantum_shift),
483 0, PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa);
484 pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL);
485 }
486 }
487
488 static boolean_t
489 qc_reap(vmem_t *vm)
490 {
491 int i;
492 int qcache_idx_max;
493 boolean_t didsomething = FALSE;
494
495 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
496 for (i = 1; i <= qcache_idx_max; i++) {
497 qcache_t *qc = &vm->vm_qcache[i - 1];
498
499 if (pool_reclaim(&qc->qc_pool) != 0) {
500 didsomething = TRUE;
501 }
502 }
503
504 return didsomething;
505 }
506 #endif /* defined(QCACHE) */
507
508 #if defined(_KERNEL)
509 static int
510 vmem_init(void)
511 {
512
513 pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL);
514 return 0;
515 }
516 #endif /* defined(_KERNEL) */
517
518 static vmem_addr_t
519 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
520 int spanbttype)
521 {
522 bt_t *btspan;
523 bt_t *btfree;
524
525 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
526 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
527 VMEM_ASSERT_UNLOCKED(vm);
528
529 btspan = bt_alloc(vm, flags);
530 if (btspan == NULL) {
531 return VMEM_ADDR_NULL;
532 }
533 btfree = bt_alloc(vm, flags);
534 if (btfree == NULL) {
535 bt_free(vm, btspan);
536 return VMEM_ADDR_NULL;
537 }
538
539 btspan->bt_type = spanbttype;
540 btspan->bt_start = addr;
541 btspan->bt_size = size;
542
543 btfree->bt_type = BT_TYPE_FREE;
544 btfree->bt_start = addr;
545 btfree->bt_size = size;
546
547 VMEM_LOCK(vm);
548 bt_insseg_tail(vm, btspan);
549 bt_insseg(vm, btfree, btspan);
550 bt_insfree(vm, btfree);
551 VMEM_UNLOCK(vm);
552
553 return addr;
554 }
555
556 static int
557 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
558 {
559 vmem_addr_t addr;
560
561 VMEM_ASSERT_UNLOCKED(vm);
562
563 if (vm->vm_allocfn == NULL) {
564 return EINVAL;
565 }
566
567 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
568 if (addr == VMEM_ADDR_NULL) {
569 return ENOMEM;
570 }
571
572 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
573 (*vm->vm_freefn)(vm->vm_source, addr, size);
574 return ENOMEM;
575 }
576
577 return 0;
578 }
579
580 static int
581 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
582 {
583 bt_t *bt;
584 int i;
585 struct vmem_hashlist *newhashlist;
586 struct vmem_hashlist *oldhashlist;
587 size_t oldhashsize;
588
589 KASSERT(newhashsize > 0);
590 VMEM_ASSERT_UNLOCKED(vm);
591
592 newhashlist =
593 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags);
594 if (newhashlist == NULL) {
595 return ENOMEM;
596 }
597 for (i = 0; i < newhashsize; i++) {
598 LIST_INIT(&newhashlist[i]);
599 }
600
601 VMEM_LOCK(vm);
602 oldhashlist = vm->vm_hashlist;
603 oldhashsize = vm->vm_hashsize;
604 vm->vm_hashlist = newhashlist;
605 vm->vm_hashsize = newhashsize;
606 if (oldhashlist == NULL) {
607 VMEM_UNLOCK(vm);
608 return 0;
609 }
610 for (i = 0; i < oldhashsize; i++) {
611 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
612 bt_rembusy(vm, bt); /* XXX */
613 bt_insbusy(vm, bt);
614 }
615 }
616 VMEM_UNLOCK(vm);
617
618 xfree(oldhashlist);
619
620 return 0;
621 }
622
623 /*
624 * vmem_fit: check if a bt can satisfy the given restrictions.
625 */
626
627 static vmem_addr_t
628 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase,
629 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
630 {
631 vmem_addr_t start;
632 vmem_addr_t end;
633
634 KASSERT(bt->bt_size >= size);
635
636 /*
637 * XXX assumption: vmem_addr_t and vmem_size_t are
638 * unsigned integer of the same size.
639 */
640
641 start = bt->bt_start;
642 if (start < minaddr) {
643 start = minaddr;
644 }
645 end = BT_END(bt);
646 if (end > maxaddr - 1) {
647 end = maxaddr - 1;
648 }
649 if (start >= end) {
650 return VMEM_ADDR_NULL;
651 }
652
653 start = VMEM_ALIGNUP(start - phase, align) + phase;
654 if (start < bt->bt_start) {
655 start += align;
656 }
657 if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
658 KASSERT(align < nocross);
659 start = VMEM_ALIGNUP(start - phase, nocross) + phase;
660 }
661 if (start < end && end - start >= size) {
662 KASSERT((start & (align - 1)) == phase);
663 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross));
664 KASSERT(minaddr <= start);
665 KASSERT(maxaddr == 0 || start + size <= maxaddr);
666 KASSERT(bt->bt_start <= start);
667 KASSERT(start + size <= BT_END(bt));
668 return start;
669 }
670 return VMEM_ADDR_NULL;
671 }
672
673 /* ---- vmem API */
674
675 /*
676 * vmem_create: create an arena.
677 *
678 * => must not be called from interrupt context.
679 */
680
681 vmem_t *
682 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
683 vmem_size_t quantum,
684 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
685 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
686 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags)
687 {
688 vmem_t *vm;
689 int i;
690 #if defined(_KERNEL)
691 static ONCE_DECL(control);
692 #endif /* defined(_KERNEL) */
693
694 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
695 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
696
697 #if defined(_KERNEL)
698 if (RUN_ONCE(&control, vmem_init)) {
699 return NULL;
700 }
701 #endif /* defined(_KERNEL) */
702 vm = xmalloc(sizeof(*vm), flags);
703 if (vm == NULL) {
704 return NULL;
705 }
706
707 VMEM_LOCK_INIT(vm);
708 vm->vm_name = name;
709 vm->vm_quantum_mask = quantum - 1;
710 vm->vm_quantum_shift = calc_order(quantum);
711 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
712 vm->vm_allocfn = allocfn;
713 vm->vm_freefn = freefn;
714 vm->vm_source = source;
715 vm->vm_nbusytag = 0;
716 #if defined(QCACHE)
717 qc_init(vm, qcache_max);
718 #endif /* defined(QCACHE) */
719
720 CIRCLEQ_INIT(&vm->vm_seglist);
721 for (i = 0; i < VMEM_MAXORDER; i++) {
722 LIST_INIT(&vm->vm_freelist[i]);
723 }
724 vm->vm_hashlist = NULL;
725 if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
726 vmem_destroy(vm);
727 return NULL;
728 }
729
730 if (size != 0) {
731 if (vmem_add(vm, base, size, flags) == 0) {
732 vmem_destroy(vm);
733 return NULL;
734 }
735 }
736
737 return vm;
738 }
739
740 void
741 vmem_destroy(vmem_t *vm)
742 {
743
744 VMEM_ASSERT_UNLOCKED(vm);
745
746 if (vm->vm_hashlist != NULL) {
747 int i;
748
749 for (i = 0; i < vm->vm_hashsize; i++) {
750 bt_t *bt;
751
752 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
753 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
754 bt_free(vm, bt);
755 }
756 }
757 xfree(vm->vm_hashlist);
758 }
759 xfree(vm);
760 }
761
762 vmem_size_t
763 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
764 {
765
766 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
767 }
768
769 /*
770 * vmem_alloc:
771 *
772 * => caller must ensure appropriate spl,
773 * if the arena can be accessed from interrupt context.
774 */
775
776 vmem_addr_t
777 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
778 {
779 const vmem_size_t size __unused = vmem_roundup_size(vm, size0);
780 const vm_flag_t strat __unused = flags & VM_FITMASK;
781
782 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
783 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
784 VMEM_ASSERT_UNLOCKED(vm);
785
786 KASSERT(size0 > 0);
787 KASSERT(size > 0);
788 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
789 if ((flags & VM_SLEEP) != 0) {
790 ASSERT_SLEEPABLE(NULL, __func__);
791 }
792
793 #if defined(QCACHE)
794 if (size <= vm->vm_qcache_max) {
795 int qidx = size >> vm->vm_quantum_shift;
796 qcache_t *qc = &vm->vm_qcache[qidx - 1];
797
798 return (vmem_addr_t)pool_cache_get(&qc->qc_cache,
799 vmf_to_prf(flags));
800 }
801 #endif /* defined(QCACHE) */
802
803 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags);
804 }
805
806 vmem_addr_t
807 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase,
808 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
809 vm_flag_t flags)
810 {
811 struct vmem_freelist *list;
812 struct vmem_freelist *first;
813 struct vmem_freelist *end;
814 bt_t *bt;
815 bt_t *btnew;
816 bt_t *btnew2;
817 const vmem_size_t size = vmem_roundup_size(vm, size0);
818 vm_flag_t strat = flags & VM_FITMASK;
819 vmem_addr_t start;
820
821 KASSERT(size0 > 0);
822 KASSERT(size > 0);
823 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
824 if ((flags & VM_SLEEP) != 0) {
825 ASSERT_SLEEPABLE(NULL, __func__);
826 }
827 KASSERT((align & vm->vm_quantum_mask) == 0);
828 KASSERT((align & (align - 1)) == 0);
829 KASSERT((phase & vm->vm_quantum_mask) == 0);
830 KASSERT((nocross & vm->vm_quantum_mask) == 0);
831 KASSERT((nocross & (nocross - 1)) == 0);
832 KASSERT((align == 0 && phase == 0) || phase < align);
833 KASSERT(nocross == 0 || nocross >= size);
834 KASSERT(maxaddr == 0 || minaddr < maxaddr);
835 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
836
837 if (align == 0) {
838 align = vm->vm_quantum_mask + 1;
839 }
840 btnew = bt_alloc(vm, flags);
841 if (btnew == NULL) {
842 return VMEM_ADDR_NULL;
843 }
844 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */
845 if (btnew2 == NULL) {
846 bt_free(vm, btnew);
847 return VMEM_ADDR_NULL;
848 }
849
850 retry_strat:
851 first = bt_freehead_toalloc(vm, size, strat);
852 end = &vm->vm_freelist[VMEM_MAXORDER];
853 retry:
854 bt = NULL;
855 VMEM_LOCK(vm);
856 if (strat == VM_INSTANTFIT) {
857 for (list = first; list < end; list++) {
858 bt = LIST_FIRST(list);
859 if (bt != NULL) {
860 start = vmem_fit(bt, size, align, phase,
861 nocross, minaddr, maxaddr);
862 if (start != VMEM_ADDR_NULL) {
863 goto gotit;
864 }
865 }
866 }
867 } else { /* VM_BESTFIT */
868 for (list = first; list < end; list++) {
869 LIST_FOREACH(bt, list, bt_freelist) {
870 if (bt->bt_size >= size) {
871 start = vmem_fit(bt, size, align, phase,
872 nocross, minaddr, maxaddr);
873 if (start != VMEM_ADDR_NULL) {
874 goto gotit;
875 }
876 }
877 }
878 }
879 }
880 VMEM_UNLOCK(vm);
881 #if 1
882 if (strat == VM_INSTANTFIT) {
883 strat = VM_BESTFIT;
884 goto retry_strat;
885 }
886 #endif
887 if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
888 nocross != 0 || minaddr != 0 || maxaddr != 0) {
889
890 /*
891 * XXX should try to import a region large enough to
892 * satisfy restrictions?
893 */
894
895 goto fail;
896 }
897 if (vmem_import(vm, size, flags) == 0) {
898 goto retry;
899 }
900 /* XXX */
901 fail:
902 bt_free(vm, btnew);
903 bt_free(vm, btnew2);
904 return VMEM_ADDR_NULL;
905
906 gotit:
907 KASSERT(bt->bt_type == BT_TYPE_FREE);
908 KASSERT(bt->bt_size >= size);
909 bt_remfree(vm, bt);
910 if (bt->bt_start != start) {
911 btnew2->bt_type = BT_TYPE_FREE;
912 btnew2->bt_start = bt->bt_start;
913 btnew2->bt_size = start - bt->bt_start;
914 bt->bt_start = start;
915 bt->bt_size -= btnew2->bt_size;
916 bt_insfree(vm, btnew2);
917 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
918 btnew2 = NULL;
919 }
920 KASSERT(bt->bt_start == start);
921 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
922 /* split */
923 btnew->bt_type = BT_TYPE_BUSY;
924 btnew->bt_start = bt->bt_start;
925 btnew->bt_size = size;
926 bt->bt_start = bt->bt_start + size;
927 bt->bt_size -= size;
928 bt_insfree(vm, bt);
929 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
930 bt_insbusy(vm, btnew);
931 VMEM_UNLOCK(vm);
932 } else {
933 bt->bt_type = BT_TYPE_BUSY;
934 bt_insbusy(vm, bt);
935 VMEM_UNLOCK(vm);
936 bt_free(vm, btnew);
937 btnew = bt;
938 }
939 if (btnew2 != NULL) {
940 bt_free(vm, btnew2);
941 }
942 KASSERT(btnew->bt_size >= size);
943 btnew->bt_type = BT_TYPE_BUSY;
944
945 return btnew->bt_start;
946 }
947
948 /*
949 * vmem_free:
950 *
951 * => caller must ensure appropriate spl,
952 * if the arena can be accessed from interrupt context.
953 */
954
955 void
956 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
957 {
958
959 VMEM_ASSERT_UNLOCKED(vm);
960 KASSERT(addr != VMEM_ADDR_NULL);
961 KASSERT(size > 0);
962
963 #if defined(QCACHE)
964 if (size <= vm->vm_qcache_max) {
965 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
966 qcache_t *qc = &vm->vm_qcache[qidx - 1];
967
968 return pool_cache_put(&qc->qc_cache, (void *)addr);
969 }
970 #endif /* defined(QCACHE) */
971
972 vmem_xfree(vm, addr, size);
973 }
974
975 void
976 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
977 {
978 bt_t *bt;
979 bt_t *t;
980
981 VMEM_ASSERT_UNLOCKED(vm);
982 KASSERT(addr != VMEM_ADDR_NULL);
983 KASSERT(size > 0);
984
985 VMEM_LOCK(vm);
986
987 bt = bt_lookupbusy(vm, addr);
988 KASSERT(bt != NULL);
989 KASSERT(bt->bt_start == addr);
990 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
991 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
992 KASSERT(bt->bt_type == BT_TYPE_BUSY);
993 bt_rembusy(vm, bt);
994 bt->bt_type = BT_TYPE_FREE;
995
996 /* coalesce */
997 t = CIRCLEQ_NEXT(bt, bt_seglist);
998 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
999 KASSERT(BT_END(bt) == t->bt_start);
1000 bt_remfree(vm, t);
1001 bt_remseg(vm, t);
1002 bt->bt_size += t->bt_size;
1003 bt_free(vm, t);
1004 }
1005 t = CIRCLEQ_PREV(bt, bt_seglist);
1006 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1007 KASSERT(BT_END(t) == bt->bt_start);
1008 bt_remfree(vm, t);
1009 bt_remseg(vm, t);
1010 bt->bt_size += t->bt_size;
1011 bt->bt_start = t->bt_start;
1012 bt_free(vm, t);
1013 }
1014
1015 t = CIRCLEQ_PREV(bt, bt_seglist);
1016 KASSERT(t != NULL);
1017 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1018 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1019 t->bt_size == bt->bt_size) {
1020 vmem_addr_t spanaddr;
1021 vmem_size_t spansize;
1022
1023 KASSERT(t->bt_start == bt->bt_start);
1024 spanaddr = bt->bt_start;
1025 spansize = bt->bt_size;
1026 bt_remseg(vm, bt);
1027 bt_free(vm, bt);
1028 bt_remseg(vm, t);
1029 bt_free(vm, t);
1030 VMEM_UNLOCK(vm);
1031 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
1032 } else {
1033 bt_insfree(vm, bt);
1034 VMEM_UNLOCK(vm);
1035 }
1036 }
1037
1038 /*
1039 * vmem_add:
1040 *
1041 * => caller must ensure appropriate spl,
1042 * if the arena can be accessed from interrupt context.
1043 */
1044
1045 vmem_addr_t
1046 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1047 {
1048
1049 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1050 }
1051
1052 /*
1053 * vmem_reap: reap unused resources.
1054 *
1055 * => return TRUE if we successfully reaped something.
1056 */
1057
1058 boolean_t
1059 vmem_reap(vmem_t *vm)
1060 {
1061 boolean_t didsomething = FALSE;
1062
1063 VMEM_ASSERT_UNLOCKED(vm);
1064
1065 #if defined(QCACHE)
1066 didsomething = qc_reap(vm);
1067 #endif /* defined(QCACHE) */
1068 return didsomething;
1069 }
1070
1071 /* ---- debug */
1072
1073 #if defined(VMEM_DEBUG)
1074
1075 #if !defined(_KERNEL)
1076 #include <stdio.h>
1077 #endif /* !defined(_KERNEL) */
1078
1079 void bt_dump(const bt_t *);
1080
1081 void
1082 bt_dump(const bt_t *bt)
1083 {
1084
1085 printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n",
1086 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
1087 bt->bt_type);
1088 }
1089
1090 void
1091 vmem_dump(const vmem_t *vm)
1092 {
1093 const bt_t *bt;
1094 int i;
1095
1096 printf("vmem %p '%s'\n", vm, vm->vm_name);
1097 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1098 bt_dump(bt);
1099 }
1100
1101 for (i = 0; i < VMEM_MAXORDER; i++) {
1102 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1103
1104 if (LIST_EMPTY(fl)) {
1105 continue;
1106 }
1107
1108 printf("freelist[%d]\n", i);
1109 LIST_FOREACH(bt, fl, bt_freelist) {
1110 bt_dump(bt);
1111 if (bt->bt_size) {
1112 }
1113 }
1114 }
1115 }
1116
1117 #if !defined(_KERNEL)
1118
1119 #include <stdlib.h>
1120
1121 int
1122 main()
1123 {
1124 vmem_t *vm;
1125 vmem_addr_t p;
1126 struct reg {
1127 vmem_addr_t p;
1128 vmem_size_t sz;
1129 boolean_t x;
1130 } *reg = NULL;
1131 int nreg = 0;
1132 int nalloc = 0;
1133 int nfree = 0;
1134 vmem_size_t total = 0;
1135 #if 1
1136 vm_flag_t strat = VM_INSTANTFIT;
1137 #else
1138 vm_flag_t strat = VM_BESTFIT;
1139 #endif
1140
1141 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1142 NULL, NULL, NULL, 0, VM_NOSLEEP);
1143 if (vm == NULL) {
1144 printf("vmem_create\n");
1145 exit(EXIT_FAILURE);
1146 }
1147 vmem_dump(vm);
1148
1149 p = vmem_add(vm, 100, 200, VM_SLEEP);
1150 p = vmem_add(vm, 2000, 1, VM_SLEEP);
1151 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1152 p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1153 p = vmem_add(vm, 500, 1000, VM_SLEEP);
1154 vmem_dump(vm);
1155 for (;;) {
1156 struct reg *r;
1157 int t = rand() % 100;
1158
1159 if (t > 45) {
1160 /* alloc */
1161 vmem_size_t sz = rand() % 500 + 1;
1162 boolean_t x;
1163 vmem_size_t align, phase, nocross;
1164 vmem_addr_t minaddr, maxaddr;
1165
1166 if (t > 70) {
1167 x = TRUE;
1168 /* XXX */
1169 align = 1 << (rand() % 15);
1170 phase = rand() % 65536;
1171 nocross = 1 << (rand() % 15);
1172 if (align <= phase) {
1173 phase = 0;
1174 }
1175 if (VMEM_CROSS_P(phase, phase + sz - 1,
1176 nocross)) {
1177 nocross = 0;
1178 }
1179 minaddr = rand() % 50000;
1180 maxaddr = rand() % 70000;
1181 if (minaddr > maxaddr) {
1182 minaddr = 0;
1183 maxaddr = 0;
1184 }
1185 printf("=== xalloc %" PRIu64
1186 " align=%" PRIu64 ", phase=%" PRIu64
1187 ", nocross=%" PRIu64 ", min=%" PRIu64
1188 ", max=%" PRIu64 "\n",
1189 (uint64_t)sz,
1190 (uint64_t)align,
1191 (uint64_t)phase,
1192 (uint64_t)nocross,
1193 (uint64_t)minaddr,
1194 (uint64_t)maxaddr);
1195 p = vmem_xalloc(vm, sz, align, phase, nocross,
1196 minaddr, maxaddr, strat|VM_SLEEP);
1197 } else {
1198 x = FALSE;
1199 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1200 p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1201 }
1202 printf("-> %" PRIu64 "\n", (uint64_t)p);
1203 vmem_dump(vm);
1204 if (p == VMEM_ADDR_NULL) {
1205 if (x) {
1206 continue;
1207 }
1208 break;
1209 }
1210 nreg++;
1211 reg = realloc(reg, sizeof(*reg) * nreg);
1212 r = ®[nreg - 1];
1213 r->p = p;
1214 r->sz = sz;
1215 r->x = x;
1216 total += sz;
1217 nalloc++;
1218 } else if (nreg != 0) {
1219 /* free */
1220 r = ®[rand() % nreg];
1221 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1222 (uint64_t)r->p, (uint64_t)r->sz);
1223 if (r->x) {
1224 vmem_xfree(vm, r->p, r->sz);
1225 } else {
1226 vmem_free(vm, r->p, r->sz);
1227 }
1228 total -= r->sz;
1229 vmem_dump(vm);
1230 *r = reg[nreg - 1];
1231 nreg--;
1232 nfree++;
1233 }
1234 printf("total=%" PRIu64 "\n", (uint64_t)total);
1235 }
1236 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1237 (uint64_t)total, nalloc, nfree);
1238 exit(EXIT_SUCCESS);
1239 }
1240 #endif /* !defined(_KERNEL) */
1241 #endif /* defined(VMEM_DEBUG) */
1242