subr_vmem.c revision 1.21 1 /* $NetBSD: subr_vmem.c,v 1.21 2006/11/12 22:28:17 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 *
35 * todo:
36 * - decide how to import segments for vmem_xalloc.
37 * - don't rely on malloc(9).
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.21 2006/11/12 22:28:17 yamt Exp $");
42
43 #define VMEM_DEBUG
44 #if defined(_KERNEL)
45 #define QCACHE
46 #endif /* defined(_KERNEL) */
47
48 #include <sys/param.h>
49 #include <sys/hash.h>
50 #include <sys/queue.h>
51
52 #if defined(_KERNEL)
53 #include <sys/systm.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/once.h>
57 #include <sys/pool.h>
58 #include <sys/proc.h>
59 #include <sys/vmem.h>
60 #else /* defined(_KERNEL) */
61 #include "../sys/vmem.h"
62 #endif /* defined(_KERNEL) */
63
64 #if defined(_KERNEL)
65 #define SIMPLELOCK_DECL(name) struct simplelock name
66 #else /* defined(_KERNEL) */
67 #include <errno.h>
68 #include <assert.h>
69 #include <stdlib.h>
70
71 #define KASSERT(a) assert(a)
72 #define SIMPLELOCK_DECL(name) /* nothing */
73 #define LOCK_ASSERT(a) /* nothing */
74 #define simple_lock_init(a) /* nothing */
75 #define simple_lock(a) /* nothing */
76 #define simple_unlock(a) /* nothing */
77 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */
78 #endif /* defined(_KERNEL) */
79
80 struct vmem;
81 struct vmem_btag;
82
83 #if defined(VMEM_DEBUG)
84 void vmem_dump(const vmem_t *);
85 #endif /* defined(VMEM_DEBUG) */
86
87 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
88 #define VMEM_HASHSIZE_INIT 4096 /* XXX */
89
90 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
91
92 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
93 LIST_HEAD(vmem_freelist, vmem_btag);
94 LIST_HEAD(vmem_hashlist, vmem_btag);
95
96 #if defined(QCACHE)
97 #define VMEM_QCACHE_IDX_MAX 32
98
99 #define QC_NAME_MAX 16
100
101 struct qcache {
102 struct pool qc_pool;
103 struct pool_cache qc_cache;
104 vmem_t *qc_vmem;
105 char qc_name[QC_NAME_MAX];
106 };
107 typedef struct qcache qcache_t;
108 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool))
109 #endif /* defined(QCACHE) */
110
111 /* vmem arena */
112 struct vmem {
113 SIMPLELOCK_DECL(vm_lock);
114 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
115 vm_flag_t);
116 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
117 vmem_t *vm_source;
118 struct vmem_seglist vm_seglist;
119 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
120 size_t vm_hashsize;
121 size_t vm_nbusytag;
122 struct vmem_hashlist *vm_hashlist;
123 size_t vm_quantum_mask;
124 int vm_quantum_shift;
125 const char *vm_name;
126
127 #if defined(QCACHE)
128 /* quantum cache */
129 size_t vm_qcache_max;
130 struct pool_allocator vm_qcache_allocator;
131 qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
132 #endif /* defined(QCACHE) */
133 };
134
135 #define VMEM_LOCK(vm) simple_lock(&vm->vm_lock)
136 #define VMEM_UNLOCK(vm) simple_unlock(&vm->vm_lock)
137 #define VMEM_LOCK_INIT(vm) simple_lock_init(&vm->vm_lock);
138 #define VMEM_ASSERT_LOCKED(vm) \
139 LOCK_ASSERT(simple_lock_held(&vm->vm_lock))
140 #define VMEM_ASSERT_UNLOCKED(vm) \
141 LOCK_ASSERT(!simple_lock_held(&vm->vm_lock))
142
143 /* boundary tag */
144 struct vmem_btag {
145 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
146 union {
147 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
148 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
149 } bt_u;
150 #define bt_hashlist bt_u.u_hashlist
151 #define bt_freelist bt_u.u_freelist
152 vmem_addr_t bt_start;
153 vmem_size_t bt_size;
154 int bt_type;
155 };
156
157 #define BT_TYPE_SPAN 1
158 #define BT_TYPE_SPAN_STATIC 2
159 #define BT_TYPE_FREE 3
160 #define BT_TYPE_BUSY 4
161 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
162
163 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
164
165 typedef struct vmem_btag bt_t;
166
167 /* ---- misc */
168
169 #define VMEM_ALIGNUP(addr, align) \
170 (-(-(addr) & -(align)))
171 #define VMEM_CROSS_P(addr1, addr2, boundary) \
172 ((((addr1) ^ (addr2)) & -(boundary)) != 0)
173
174 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
175
176 static int
177 calc_order(vmem_size_t size)
178 {
179 vmem_size_t target;
180 int i;
181
182 KASSERT(size != 0);
183
184 i = 0;
185 target = size >> 1;
186 while (ORDER2SIZE(i) <= target) {
187 i++;
188 }
189
190 KASSERT(ORDER2SIZE(i) <= size);
191 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
192
193 return i;
194 }
195
196 #if defined(_KERNEL)
197 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem");
198 #endif /* defined(_KERNEL) */
199
200 static void *
201 xmalloc(size_t sz, vm_flag_t flags)
202 {
203
204 #if defined(_KERNEL)
205 return malloc(sz, M_VMEM,
206 M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT));
207 #else /* defined(_KERNEL) */
208 return malloc(sz);
209 #endif /* defined(_KERNEL) */
210 }
211
212 static void
213 xfree(void *p)
214 {
215
216 #if defined(_KERNEL)
217 return free(p, M_VMEM);
218 #else /* defined(_KERNEL) */
219 return free(p);
220 #endif /* defined(_KERNEL) */
221 }
222
223 /* ---- boundary tag */
224
225 #if defined(_KERNEL)
226 static struct pool_cache bt_poolcache;
227 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL);
228 #endif /* defined(_KERNEL) */
229
230 static bt_t *
231 bt_alloc(vmem_t *vm, vm_flag_t flags)
232 {
233 bt_t *bt;
234
235 #if defined(_KERNEL)
236 int s;
237
238 /* XXX bootstrap */
239 s = splvm();
240 bt = pool_cache_get(&bt_poolcache,
241 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
242 splx(s);
243 #else /* defined(_KERNEL) */
244 bt = malloc(sizeof *bt);
245 #endif /* defined(_KERNEL) */
246
247 return bt;
248 }
249
250 static void
251 bt_free(vmem_t *vm, bt_t *bt)
252 {
253
254 #if defined(_KERNEL)
255 int s;
256
257 /* XXX bootstrap */
258 s = splvm();
259 pool_cache_put(&bt_poolcache, bt);
260 splx(s);
261 #else /* defined(_KERNEL) */
262 free(bt);
263 #endif /* defined(_KERNEL) */
264 }
265
266 /*
267 * freelist[0] ... [1, 1]
268 * freelist[1] ... [2, 3]
269 * freelist[2] ... [4, 7]
270 * freelist[3] ... [8, 15]
271 * :
272 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
273 * :
274 */
275
276 static struct vmem_freelist *
277 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
278 {
279 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
280 int idx;
281
282 KASSERT((size & vm->vm_quantum_mask) == 0);
283 KASSERT(size != 0);
284
285 idx = calc_order(qsize);
286 KASSERT(idx >= 0);
287 KASSERT(idx < VMEM_MAXORDER);
288
289 return &vm->vm_freelist[idx];
290 }
291
292 static struct vmem_freelist *
293 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
294 {
295 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
296 int idx;
297
298 KASSERT((size & vm->vm_quantum_mask) == 0);
299 KASSERT(size != 0);
300
301 idx = calc_order(qsize);
302 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
303 idx++;
304 /* check too large request? */
305 }
306 KASSERT(idx >= 0);
307 KASSERT(idx < VMEM_MAXORDER);
308
309 return &vm->vm_freelist[idx];
310 }
311
312 /* ---- boundary tag hash */
313
314 static struct vmem_hashlist *
315 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
316 {
317 struct vmem_hashlist *list;
318 unsigned int hash;
319
320 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
321 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
322
323 return list;
324 }
325
326 static bt_t *
327 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
328 {
329 struct vmem_hashlist *list;
330 bt_t *bt;
331
332 list = bt_hashhead(vm, addr);
333 LIST_FOREACH(bt, list, bt_hashlist) {
334 if (bt->bt_start == addr) {
335 break;
336 }
337 }
338
339 return bt;
340 }
341
342 static void
343 bt_rembusy(vmem_t *vm, bt_t *bt)
344 {
345
346 KASSERT(vm->vm_nbusytag > 0);
347 vm->vm_nbusytag--;
348 LIST_REMOVE(bt, bt_hashlist);
349 }
350
351 static void
352 bt_insbusy(vmem_t *vm, bt_t *bt)
353 {
354 struct vmem_hashlist *list;
355
356 KASSERT(bt->bt_type == BT_TYPE_BUSY);
357
358 list = bt_hashhead(vm, bt->bt_start);
359 LIST_INSERT_HEAD(list, bt, bt_hashlist);
360 vm->vm_nbusytag++;
361 }
362
363 /* ---- boundary tag list */
364
365 static void
366 bt_remseg(vmem_t *vm, bt_t *bt)
367 {
368
369 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
370 }
371
372 static void
373 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
374 {
375
376 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
377 }
378
379 static void
380 bt_insseg_tail(vmem_t *vm, bt_t *bt)
381 {
382
383 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
384 }
385
386 static void
387 bt_remfree(vmem_t *vm, bt_t *bt)
388 {
389
390 KASSERT(bt->bt_type == BT_TYPE_FREE);
391
392 LIST_REMOVE(bt, bt_freelist);
393 }
394
395 static void
396 bt_insfree(vmem_t *vm, bt_t *bt)
397 {
398 struct vmem_freelist *list;
399
400 list = bt_freehead_tofree(vm, bt->bt_size);
401 LIST_INSERT_HEAD(list, bt, bt_freelist);
402 }
403
404 /* ---- vmem internal functions */
405
406 #if defined(QCACHE)
407 static inline vm_flag_t
408 prf_to_vmf(int prflags)
409 {
410 vm_flag_t vmflags;
411
412 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
413 if ((prflags & PR_WAITOK) != 0) {
414 vmflags = VM_SLEEP;
415 } else {
416 vmflags = VM_NOSLEEP;
417 }
418 return vmflags;
419 }
420
421 static inline int
422 vmf_to_prf(vm_flag_t vmflags)
423 {
424 int prflags;
425
426 if ((vmflags & VM_SLEEP) != 0) {
427 prflags = PR_WAITOK;
428 } else {
429 prflags = PR_NOWAIT;
430 }
431 return prflags;
432 }
433
434 static size_t
435 qc_poolpage_size(size_t qcache_max)
436 {
437 int i;
438
439 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
440 /* nothing */
441 }
442 return ORDER2SIZE(i);
443 }
444
445 static void *
446 qc_poolpage_alloc(struct pool *pool, int prflags)
447 {
448 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
449 vmem_t *vm = qc->qc_vmem;
450
451 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
452 prf_to_vmf(prflags) | VM_INSTANTFIT);
453 }
454
455 static void
456 qc_poolpage_free(struct pool *pool, void *addr)
457 {
458 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
459 vmem_t *vm = qc->qc_vmem;
460
461 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
462 }
463
464 static void
465 qc_init(vmem_t *vm, size_t qcache_max)
466 {
467 struct pool_allocator *pa;
468 int qcache_idx_max;
469 int i;
470
471 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
472 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
473 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
474 }
475 vm->vm_qcache_max = qcache_max;
476 pa = &vm->vm_qcache_allocator;
477 memset(pa, 0, sizeof(*pa));
478 pa->pa_alloc = qc_poolpage_alloc;
479 pa->pa_free = qc_poolpage_free;
480 pa->pa_pagesz = qc_poolpage_size(qcache_max);
481
482 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
483 for (i = 1; i <= qcache_idx_max; i++) {
484 qcache_t *qc = &vm->vm_qcache[i - 1];
485 size_t size = i << vm->vm_quantum_shift;
486
487 qc->qc_vmem = vm;
488 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
489 vm->vm_name, size);
490 pool_init(&qc->qc_pool, size, ORDER2SIZE(vm->vm_quantum_shift),
491 0, PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa);
492 pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL);
493 }
494 }
495
496 static boolean_t
497 qc_reap(vmem_t *vm)
498 {
499 int i;
500 int qcache_idx_max;
501 boolean_t didsomething = FALSE;
502
503 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
504 for (i = 1; i <= qcache_idx_max; i++) {
505 qcache_t *qc = &vm->vm_qcache[i - 1];
506
507 if (pool_reclaim(&qc->qc_pool) != 0) {
508 didsomething = TRUE;
509 }
510 }
511
512 return didsomething;
513 }
514 #endif /* defined(QCACHE) */
515
516 #if defined(_KERNEL)
517 static int
518 vmem_init(void)
519 {
520
521 pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL);
522 return 0;
523 }
524 #endif /* defined(_KERNEL) */
525
526 static vmem_addr_t
527 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
528 int spanbttype)
529 {
530 bt_t *btspan;
531 bt_t *btfree;
532
533 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
534 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
535 VMEM_ASSERT_UNLOCKED(vm);
536
537 btspan = bt_alloc(vm, flags);
538 if (btspan == NULL) {
539 return VMEM_ADDR_NULL;
540 }
541 btfree = bt_alloc(vm, flags);
542 if (btfree == NULL) {
543 bt_free(vm, btspan);
544 return VMEM_ADDR_NULL;
545 }
546
547 btspan->bt_type = spanbttype;
548 btspan->bt_start = addr;
549 btspan->bt_size = size;
550
551 btfree->bt_type = BT_TYPE_FREE;
552 btfree->bt_start = addr;
553 btfree->bt_size = size;
554
555 VMEM_LOCK(vm);
556 bt_insseg_tail(vm, btspan);
557 bt_insseg(vm, btfree, btspan);
558 bt_insfree(vm, btfree);
559 VMEM_UNLOCK(vm);
560
561 return addr;
562 }
563
564 static int
565 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
566 {
567 vmem_addr_t addr;
568
569 VMEM_ASSERT_UNLOCKED(vm);
570
571 if (vm->vm_allocfn == NULL) {
572 return EINVAL;
573 }
574
575 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
576 if (addr == VMEM_ADDR_NULL) {
577 return ENOMEM;
578 }
579
580 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
581 (*vm->vm_freefn)(vm->vm_source, addr, size);
582 return ENOMEM;
583 }
584
585 return 0;
586 }
587
588 static int
589 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
590 {
591 bt_t *bt;
592 int i;
593 struct vmem_hashlist *newhashlist;
594 struct vmem_hashlist *oldhashlist;
595 size_t oldhashsize;
596
597 KASSERT(newhashsize > 0);
598 VMEM_ASSERT_UNLOCKED(vm);
599
600 newhashlist =
601 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags);
602 if (newhashlist == NULL) {
603 return ENOMEM;
604 }
605 for (i = 0; i < newhashsize; i++) {
606 LIST_INIT(&newhashlist[i]);
607 }
608
609 VMEM_LOCK(vm);
610 oldhashlist = vm->vm_hashlist;
611 oldhashsize = vm->vm_hashsize;
612 vm->vm_hashlist = newhashlist;
613 vm->vm_hashsize = newhashsize;
614 if (oldhashlist == NULL) {
615 VMEM_UNLOCK(vm);
616 return 0;
617 }
618 for (i = 0; i < oldhashsize; i++) {
619 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
620 bt_rembusy(vm, bt); /* XXX */
621 bt_insbusy(vm, bt);
622 }
623 }
624 VMEM_UNLOCK(vm);
625
626 xfree(oldhashlist);
627
628 return 0;
629 }
630
631 /*
632 * vmem_fit: check if a bt can satisfy the given restrictions.
633 */
634
635 static vmem_addr_t
636 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase,
637 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
638 {
639 vmem_addr_t start;
640 vmem_addr_t end;
641
642 KASSERT(bt->bt_size >= size);
643
644 /*
645 * XXX assumption: vmem_addr_t and vmem_size_t are
646 * unsigned integer of the same size.
647 */
648
649 start = bt->bt_start;
650 if (start < minaddr) {
651 start = minaddr;
652 }
653 end = BT_END(bt);
654 if (end > maxaddr - 1) {
655 end = maxaddr - 1;
656 }
657 if (start >= end) {
658 return VMEM_ADDR_NULL;
659 }
660
661 start = VMEM_ALIGNUP(start - phase, align) + phase;
662 if (start < bt->bt_start) {
663 start += align;
664 }
665 if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
666 KASSERT(align < nocross);
667 start = VMEM_ALIGNUP(start - phase, nocross) + phase;
668 }
669 if (start < end && end - start >= size) {
670 KASSERT((start & (align - 1)) == phase);
671 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross));
672 KASSERT(minaddr <= start);
673 KASSERT(maxaddr == 0 || start + size <= maxaddr);
674 KASSERT(bt->bt_start <= start);
675 KASSERT(start + size <= BT_END(bt));
676 return start;
677 }
678 return VMEM_ADDR_NULL;
679 }
680
681 /* ---- vmem API */
682
683 /*
684 * vmem_create: create an arena.
685 *
686 * => must not be called from interrupt context.
687 */
688
689 vmem_t *
690 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
691 vmem_size_t quantum,
692 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
693 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
694 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags)
695 {
696 vmem_t *vm;
697 int i;
698 #if defined(_KERNEL)
699 static ONCE_DECL(control);
700 #endif /* defined(_KERNEL) */
701
702 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
703 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
704
705 #if defined(_KERNEL)
706 if (RUN_ONCE(&control, vmem_init)) {
707 return NULL;
708 }
709 #endif /* defined(_KERNEL) */
710 vm = xmalloc(sizeof(*vm), flags);
711 if (vm == NULL) {
712 return NULL;
713 }
714
715 VMEM_LOCK_INIT(vm);
716 vm->vm_name = name;
717 vm->vm_quantum_mask = quantum - 1;
718 vm->vm_quantum_shift = calc_order(quantum);
719 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
720 vm->vm_allocfn = allocfn;
721 vm->vm_freefn = freefn;
722 vm->vm_source = source;
723 vm->vm_nbusytag = 0;
724 #if defined(QCACHE)
725 qc_init(vm, qcache_max);
726 #endif /* defined(QCACHE) */
727
728 CIRCLEQ_INIT(&vm->vm_seglist);
729 for (i = 0; i < VMEM_MAXORDER; i++) {
730 LIST_INIT(&vm->vm_freelist[i]);
731 }
732 vm->vm_hashlist = NULL;
733 if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
734 vmem_destroy(vm);
735 return NULL;
736 }
737
738 if (size != 0) {
739 if (vmem_add(vm, base, size, flags) == 0) {
740 vmem_destroy(vm);
741 return NULL;
742 }
743 }
744
745 return vm;
746 }
747
748 void
749 vmem_destroy(vmem_t *vm)
750 {
751
752 VMEM_ASSERT_UNLOCKED(vm);
753
754 if (vm->vm_hashlist != NULL) {
755 int i;
756
757 for (i = 0; i < vm->vm_hashsize; i++) {
758 bt_t *bt;
759
760 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
761 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
762 bt_free(vm, bt);
763 }
764 }
765 xfree(vm->vm_hashlist);
766 }
767 xfree(vm);
768 }
769
770 vmem_size_t
771 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
772 {
773
774 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
775 }
776
777 /*
778 * vmem_alloc:
779 *
780 * => caller must ensure appropriate spl,
781 * if the arena can be accessed from interrupt context.
782 */
783
784 vmem_addr_t
785 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
786 {
787 const vmem_size_t size __unused = vmem_roundup_size(vm, size0);
788 const vm_flag_t strat __unused = flags & VM_FITMASK;
789
790 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
791 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
792 VMEM_ASSERT_UNLOCKED(vm);
793
794 KASSERT(size0 > 0);
795 KASSERT(size > 0);
796 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
797 if ((flags & VM_SLEEP) != 0) {
798 ASSERT_SLEEPABLE(NULL, __func__);
799 }
800
801 #if defined(QCACHE)
802 if (size <= vm->vm_qcache_max) {
803 int qidx = size >> vm->vm_quantum_shift;
804 qcache_t *qc = &vm->vm_qcache[qidx - 1];
805
806 return (vmem_addr_t)pool_cache_get(&qc->qc_cache,
807 vmf_to_prf(flags));
808 }
809 #endif /* defined(QCACHE) */
810
811 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags);
812 }
813
814 vmem_addr_t
815 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase,
816 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
817 vm_flag_t flags)
818 {
819 struct vmem_freelist *list;
820 struct vmem_freelist *first;
821 struct vmem_freelist *end;
822 bt_t *bt;
823 bt_t *btnew;
824 bt_t *btnew2;
825 const vmem_size_t size = vmem_roundup_size(vm, size0);
826 vm_flag_t strat = flags & VM_FITMASK;
827 vmem_addr_t start;
828
829 KASSERT(size0 > 0);
830 KASSERT(size > 0);
831 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
832 if ((flags & VM_SLEEP) != 0) {
833 ASSERT_SLEEPABLE(NULL, __func__);
834 }
835 KASSERT((align & vm->vm_quantum_mask) == 0);
836 KASSERT((align & (align - 1)) == 0);
837 KASSERT((phase & vm->vm_quantum_mask) == 0);
838 KASSERT((nocross & vm->vm_quantum_mask) == 0);
839 KASSERT((nocross & (nocross - 1)) == 0);
840 KASSERT((align == 0 && phase == 0) || phase < align);
841 KASSERT(nocross == 0 || nocross >= size);
842 KASSERT(maxaddr == 0 || minaddr < maxaddr);
843 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
844
845 if (align == 0) {
846 align = vm->vm_quantum_mask + 1;
847 }
848 btnew = bt_alloc(vm, flags);
849 if (btnew == NULL) {
850 return VMEM_ADDR_NULL;
851 }
852 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */
853 if (btnew2 == NULL) {
854 bt_free(vm, btnew);
855 return VMEM_ADDR_NULL;
856 }
857
858 retry_strat:
859 first = bt_freehead_toalloc(vm, size, strat);
860 end = &vm->vm_freelist[VMEM_MAXORDER];
861 retry:
862 bt = NULL;
863 VMEM_LOCK(vm);
864 if (strat == VM_INSTANTFIT) {
865 for (list = first; list < end; list++) {
866 bt = LIST_FIRST(list);
867 if (bt != NULL) {
868 start = vmem_fit(bt, size, align, phase,
869 nocross, minaddr, maxaddr);
870 if (start != VMEM_ADDR_NULL) {
871 goto gotit;
872 }
873 }
874 }
875 } else { /* VM_BESTFIT */
876 for (list = first; list < end; list++) {
877 LIST_FOREACH(bt, list, bt_freelist) {
878 if (bt->bt_size >= size) {
879 start = vmem_fit(bt, size, align, phase,
880 nocross, minaddr, maxaddr);
881 if (start != VMEM_ADDR_NULL) {
882 goto gotit;
883 }
884 }
885 }
886 }
887 }
888 VMEM_UNLOCK(vm);
889 #if 1
890 if (strat == VM_INSTANTFIT) {
891 strat = VM_BESTFIT;
892 goto retry_strat;
893 }
894 #endif
895 if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
896 nocross != 0 || minaddr != 0 || maxaddr != 0) {
897
898 /*
899 * XXX should try to import a region large enough to
900 * satisfy restrictions?
901 */
902
903 goto fail;
904 }
905 if (vmem_import(vm, size, flags) == 0) {
906 goto retry;
907 }
908 /* XXX */
909 fail:
910 bt_free(vm, btnew);
911 bt_free(vm, btnew2);
912 return VMEM_ADDR_NULL;
913
914 gotit:
915 KASSERT(bt->bt_type == BT_TYPE_FREE);
916 KASSERT(bt->bt_size >= size);
917 bt_remfree(vm, bt);
918 if (bt->bt_start != start) {
919 btnew2->bt_type = BT_TYPE_FREE;
920 btnew2->bt_start = bt->bt_start;
921 btnew2->bt_size = start - bt->bt_start;
922 bt->bt_start = start;
923 bt->bt_size -= btnew2->bt_size;
924 bt_insfree(vm, btnew2);
925 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
926 btnew2 = NULL;
927 }
928 KASSERT(bt->bt_start == start);
929 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
930 /* split */
931 btnew->bt_type = BT_TYPE_BUSY;
932 btnew->bt_start = bt->bt_start;
933 btnew->bt_size = size;
934 bt->bt_start = bt->bt_start + size;
935 bt->bt_size -= size;
936 bt_insfree(vm, bt);
937 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
938 bt_insbusy(vm, btnew);
939 VMEM_UNLOCK(vm);
940 } else {
941 bt->bt_type = BT_TYPE_BUSY;
942 bt_insbusy(vm, bt);
943 VMEM_UNLOCK(vm);
944 bt_free(vm, btnew);
945 btnew = bt;
946 }
947 if (btnew2 != NULL) {
948 bt_free(vm, btnew2);
949 }
950 KASSERT(btnew->bt_size >= size);
951 btnew->bt_type = BT_TYPE_BUSY;
952
953 return btnew->bt_start;
954 }
955
956 /*
957 * vmem_free:
958 *
959 * => caller must ensure appropriate spl,
960 * if the arena can be accessed from interrupt context.
961 */
962
963 void
964 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
965 {
966
967 VMEM_ASSERT_UNLOCKED(vm);
968 KASSERT(addr != VMEM_ADDR_NULL);
969 KASSERT(size > 0);
970
971 #if defined(QCACHE)
972 if (size <= vm->vm_qcache_max) {
973 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
974 qcache_t *qc = &vm->vm_qcache[qidx - 1];
975
976 return pool_cache_put(&qc->qc_cache, (void *)addr);
977 }
978 #endif /* defined(QCACHE) */
979
980 vmem_xfree(vm, addr, size);
981 }
982
983 void
984 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
985 {
986 bt_t *bt;
987 bt_t *t;
988
989 VMEM_ASSERT_UNLOCKED(vm);
990 KASSERT(addr != VMEM_ADDR_NULL);
991 KASSERT(size > 0);
992
993 VMEM_LOCK(vm);
994
995 bt = bt_lookupbusy(vm, addr);
996 KASSERT(bt != NULL);
997 KASSERT(bt->bt_start == addr);
998 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
999 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
1000 KASSERT(bt->bt_type == BT_TYPE_BUSY);
1001 bt_rembusy(vm, bt);
1002 bt->bt_type = BT_TYPE_FREE;
1003
1004 /* coalesce */
1005 t = CIRCLEQ_NEXT(bt, bt_seglist);
1006 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1007 KASSERT(BT_END(bt) == t->bt_start);
1008 bt_remfree(vm, t);
1009 bt_remseg(vm, t);
1010 bt->bt_size += t->bt_size;
1011 bt_free(vm, t);
1012 }
1013 t = CIRCLEQ_PREV(bt, bt_seglist);
1014 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1015 KASSERT(BT_END(t) == bt->bt_start);
1016 bt_remfree(vm, t);
1017 bt_remseg(vm, t);
1018 bt->bt_size += t->bt_size;
1019 bt->bt_start = t->bt_start;
1020 bt_free(vm, t);
1021 }
1022
1023 t = CIRCLEQ_PREV(bt, bt_seglist);
1024 KASSERT(t != NULL);
1025 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1026 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1027 t->bt_size == bt->bt_size) {
1028 vmem_addr_t spanaddr;
1029 vmem_size_t spansize;
1030
1031 KASSERT(t->bt_start == bt->bt_start);
1032 spanaddr = bt->bt_start;
1033 spansize = bt->bt_size;
1034 bt_remseg(vm, bt);
1035 bt_free(vm, bt);
1036 bt_remseg(vm, t);
1037 bt_free(vm, t);
1038 VMEM_UNLOCK(vm);
1039 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
1040 } else {
1041 bt_insfree(vm, bt);
1042 VMEM_UNLOCK(vm);
1043 }
1044 }
1045
1046 /*
1047 * vmem_add:
1048 *
1049 * => caller must ensure appropriate spl,
1050 * if the arena can be accessed from interrupt context.
1051 */
1052
1053 vmem_addr_t
1054 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1055 {
1056
1057 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1058 }
1059
1060 /*
1061 * vmem_reap: reap unused resources.
1062 *
1063 * => return TRUE if we successfully reaped something.
1064 */
1065
1066 boolean_t
1067 vmem_reap(vmem_t *vm)
1068 {
1069 boolean_t didsomething = FALSE;
1070
1071 VMEM_ASSERT_UNLOCKED(vm);
1072
1073 #if defined(QCACHE)
1074 didsomething = qc_reap(vm);
1075 #endif /* defined(QCACHE) */
1076 return didsomething;
1077 }
1078
1079 /* ---- debug */
1080
1081 #if defined(VMEM_DEBUG)
1082
1083 #if !defined(_KERNEL)
1084 #include <stdio.h>
1085 #endif /* !defined(_KERNEL) */
1086
1087 void bt_dump(const bt_t *);
1088
1089 void
1090 bt_dump(const bt_t *bt)
1091 {
1092
1093 printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n",
1094 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
1095 bt->bt_type);
1096 }
1097
1098 void
1099 vmem_dump(const vmem_t *vm)
1100 {
1101 const bt_t *bt;
1102 int i;
1103
1104 printf("vmem %p '%s'\n", vm, vm->vm_name);
1105 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1106 bt_dump(bt);
1107 }
1108
1109 for (i = 0; i < VMEM_MAXORDER; i++) {
1110 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1111
1112 if (LIST_EMPTY(fl)) {
1113 continue;
1114 }
1115
1116 printf("freelist[%d]\n", i);
1117 LIST_FOREACH(bt, fl, bt_freelist) {
1118 bt_dump(bt);
1119 if (bt->bt_size) {
1120 }
1121 }
1122 }
1123 }
1124
1125 #if !defined(_KERNEL)
1126
1127 #include <stdlib.h>
1128
1129 int
1130 main()
1131 {
1132 vmem_t *vm;
1133 vmem_addr_t p;
1134 struct reg {
1135 vmem_addr_t p;
1136 vmem_size_t sz;
1137 boolean_t x;
1138 } *reg = NULL;
1139 int nreg = 0;
1140 int nalloc = 0;
1141 int nfree = 0;
1142 vmem_size_t total = 0;
1143 #if 1
1144 vm_flag_t strat = VM_INSTANTFIT;
1145 #else
1146 vm_flag_t strat = VM_BESTFIT;
1147 #endif
1148
1149 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1150 NULL, NULL, NULL, 0, VM_NOSLEEP);
1151 if (vm == NULL) {
1152 printf("vmem_create\n");
1153 exit(EXIT_FAILURE);
1154 }
1155 vmem_dump(vm);
1156
1157 p = vmem_add(vm, 100, 200, VM_SLEEP);
1158 p = vmem_add(vm, 2000, 1, VM_SLEEP);
1159 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1160 p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1161 p = vmem_add(vm, 500, 1000, VM_SLEEP);
1162 vmem_dump(vm);
1163 for (;;) {
1164 struct reg *r;
1165 int t = rand() % 100;
1166
1167 if (t > 45) {
1168 /* alloc */
1169 vmem_size_t sz = rand() % 500 + 1;
1170 boolean_t x;
1171 vmem_size_t align, phase, nocross;
1172 vmem_addr_t minaddr, maxaddr;
1173
1174 if (t > 70) {
1175 x = TRUE;
1176 /* XXX */
1177 align = 1 << (rand() % 15);
1178 phase = rand() % 65536;
1179 nocross = 1 << (rand() % 15);
1180 if (align <= phase) {
1181 phase = 0;
1182 }
1183 if (VMEM_CROSS_P(phase, phase + sz - 1,
1184 nocross)) {
1185 nocross = 0;
1186 }
1187 minaddr = rand() % 50000;
1188 maxaddr = rand() % 70000;
1189 if (minaddr > maxaddr) {
1190 minaddr = 0;
1191 maxaddr = 0;
1192 }
1193 printf("=== xalloc %" PRIu64
1194 " align=%" PRIu64 ", phase=%" PRIu64
1195 ", nocross=%" PRIu64 ", min=%" PRIu64
1196 ", max=%" PRIu64 "\n",
1197 (uint64_t)sz,
1198 (uint64_t)align,
1199 (uint64_t)phase,
1200 (uint64_t)nocross,
1201 (uint64_t)minaddr,
1202 (uint64_t)maxaddr);
1203 p = vmem_xalloc(vm, sz, align, phase, nocross,
1204 minaddr, maxaddr, strat|VM_SLEEP);
1205 } else {
1206 x = FALSE;
1207 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1208 p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1209 }
1210 printf("-> %" PRIu64 "\n", (uint64_t)p);
1211 vmem_dump(vm);
1212 if (p == VMEM_ADDR_NULL) {
1213 if (x) {
1214 continue;
1215 }
1216 break;
1217 }
1218 nreg++;
1219 reg = realloc(reg, sizeof(*reg) * nreg);
1220 r = ®[nreg - 1];
1221 r->p = p;
1222 r->sz = sz;
1223 r->x = x;
1224 total += sz;
1225 nalloc++;
1226 } else if (nreg != 0) {
1227 /* free */
1228 r = ®[rand() % nreg];
1229 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1230 (uint64_t)r->p, (uint64_t)r->sz);
1231 if (r->x) {
1232 vmem_xfree(vm, r->p, r->sz);
1233 } else {
1234 vmem_free(vm, r->p, r->sz);
1235 }
1236 total -= r->sz;
1237 vmem_dump(vm);
1238 *r = reg[nreg - 1];
1239 nreg--;
1240 nfree++;
1241 }
1242 printf("total=%" PRIu64 "\n", (uint64_t)total);
1243 }
1244 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1245 (uint64_t)total, nalloc, nfree);
1246 exit(EXIT_SUCCESS);
1247 }
1248 #endif /* !defined(_KERNEL) */
1249 #endif /* defined(VMEM_DEBUG) */
1250