subr_vmem.c revision 1.18 1 /* $NetBSD: subr_vmem.c,v 1.18 2006/11/04 13:25:52 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 *
35 * todo:
36 * - decide how to import segments for vmem_xalloc.
37 * - don't rely on malloc(9).
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.18 2006/11/04 13:25:52 yamt Exp $");
42
43 #define VMEM_DEBUG
44 #if defined(_KERNEL)
45 #define QCACHE
46 #endif /* defined(_KERNEL) */
47
48 #include <sys/param.h>
49 #include <sys/hash.h>
50 #include <sys/queue.h>
51
52 #if defined(_KERNEL)
53 #include <sys/systm.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/once.h>
57 #include <sys/pool.h>
58 #include <sys/proc.h>
59 #include <sys/vmem.h>
60 #else /* defined(_KERNEL) */
61 #include "../sys/vmem.h"
62 #endif /* defined(_KERNEL) */
63
64 #if defined(_KERNEL)
65 #define SIMPLELOCK_DECL(name) struct simplelock name
66 #else /* defined(_KERNEL) */
67 #include <errno.h>
68 #include <assert.h>
69 #include <stdlib.h>
70
71 #define KASSERT(a) assert(a)
72 #define SIMPLELOCK_DECL(name) /* nothing */
73 #define LOCK_ASSERT(a) /* nothing */
74 #define simple_lock_init(a) /* nothing */
75 #define simple_lock(a) /* nothing */
76 #define simple_unlock(a) /* nothing */
77 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */
78 #endif /* defined(_KERNEL) */
79
80 struct vmem;
81 struct vmem_btag;
82
83 #if defined(VMEM_DEBUG)
84 void vmem_dump(const vmem_t *);
85 #endif /* defined(VMEM_DEBUG) */
86
87 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
88 #define VMEM_HASHSIZE_INIT 4096 /* XXX */
89
90 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
91
92 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
93 LIST_HEAD(vmem_freelist, vmem_btag);
94 LIST_HEAD(vmem_hashlist, vmem_btag);
95
96 #if defined(QCACHE)
97 #define VMEM_QCACHE_IDX_MAX 32
98
99 #define QC_NAME_MAX 16
100
101 struct qcache {
102 struct pool qc_pool;
103 struct pool_cache qc_cache;
104 vmem_t *qc_vmem;
105 char qc_name[QC_NAME_MAX];
106 };
107 typedef struct qcache qcache_t;
108 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool))
109 #endif /* defined(QCACHE) */
110
111 /* vmem arena */
112 struct vmem {
113 SIMPLELOCK_DECL(vm_lock);
114 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
115 vm_flag_t);
116 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
117 vmem_t *vm_source;
118 struct vmem_seglist vm_seglist;
119 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
120 size_t vm_hashsize;
121 size_t vm_nbusytag;
122 struct vmem_hashlist *vm_hashlist;
123 size_t vm_quantum_mask;
124 int vm_quantum_shift;
125 const char *vm_name;
126
127 #if defined(QCACHE)
128 /* quantum cache */
129 size_t vm_qcache_max;
130 struct pool_allocator vm_qcache_allocator;
131 qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
132 #endif /* defined(QCACHE) */
133 };
134
135 #define VMEM_LOCK(vm) simple_lock(&vm->vm_lock)
136 #define VMEM_UNLOCK(vm) simple_unlock(&vm->vm_lock)
137 #define VMEM_LOCK_INIT(vm) simple_lock_init(&vm->vm_lock);
138 #define VMEM_ASSERT_LOCKED(vm) \
139 LOCK_ASSERT(simple_lock_held(&vm->vm_lock))
140 #define VMEM_ASSERT_UNLOCKED(vm) \
141 LOCK_ASSERT(!simple_lock_held(&vm->vm_lock))
142
143 /* boundary tag */
144 struct vmem_btag {
145 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
146 union {
147 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
148 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
149 } bt_u;
150 #define bt_hashlist bt_u.u_hashlist
151 #define bt_freelist bt_u.u_freelist
152 vmem_addr_t bt_start;
153 vmem_size_t bt_size;
154 int bt_type;
155 };
156
157 #define BT_TYPE_SPAN 1
158 #define BT_TYPE_SPAN_STATIC 2
159 #define BT_TYPE_FREE 3
160 #define BT_TYPE_BUSY 4
161 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
162
163 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
164
165 typedef struct vmem_btag bt_t;
166
167 /* ---- misc */
168
169 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
170
171 static int
172 calc_order(vmem_size_t size)
173 {
174 vmem_size_t target;
175 int i;
176
177 KASSERT(size != 0);
178
179 i = 0;
180 target = size >> 1;
181 while (ORDER2SIZE(i) <= target) {
182 i++;
183 }
184
185 KASSERT(ORDER2SIZE(i) <= size);
186 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
187
188 return i;
189 }
190
191 #if defined(_KERNEL)
192 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem");
193 #endif /* defined(_KERNEL) */
194
195 static void *
196 xmalloc(size_t sz, vm_flag_t flags)
197 {
198
199 #if defined(_KERNEL)
200 return malloc(sz, M_VMEM,
201 M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT));
202 #else /* defined(_KERNEL) */
203 return malloc(sz);
204 #endif /* defined(_KERNEL) */
205 }
206
207 static void
208 xfree(void *p)
209 {
210
211 #if defined(_KERNEL)
212 return free(p, M_VMEM);
213 #else /* defined(_KERNEL) */
214 return free(p);
215 #endif /* defined(_KERNEL) */
216 }
217
218 /* ---- boundary tag */
219
220 #if defined(_KERNEL)
221 static struct pool_cache bt_poolcache;
222 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL);
223 #endif /* defined(_KERNEL) */
224
225 static bt_t *
226 bt_alloc(vmem_t *vm, vm_flag_t flags)
227 {
228 bt_t *bt;
229
230 #if defined(_KERNEL)
231 /* XXX bootstrap */
232 bt = pool_cache_get(&bt_poolcache,
233 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
234 #else /* defined(_KERNEL) */
235 bt = malloc(sizeof *bt);
236 #endif /* defined(_KERNEL) */
237
238 return bt;
239 }
240
241 static void
242 bt_free(vmem_t *vm, bt_t *bt)
243 {
244
245 #if defined(_KERNEL)
246 /* XXX bootstrap */
247 pool_cache_put(&bt_poolcache, bt);
248 #else /* defined(_KERNEL) */
249 free(bt);
250 #endif /* defined(_KERNEL) */
251 }
252
253 /*
254 * freelist[0] ... [1, 1]
255 * freelist[1] ... [2, 3]
256 * freelist[2] ... [4, 7]
257 * freelist[3] ... [8, 15]
258 * :
259 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
260 * :
261 */
262
263 static struct vmem_freelist *
264 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
265 {
266 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
267 int idx;
268
269 KASSERT((size & vm->vm_quantum_mask) == 0);
270 KASSERT(size != 0);
271
272 idx = calc_order(qsize);
273 KASSERT(idx >= 0);
274 KASSERT(idx < VMEM_MAXORDER);
275
276 return &vm->vm_freelist[idx];
277 }
278
279 static struct vmem_freelist *
280 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
281 {
282 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
283 int idx;
284
285 KASSERT((size & vm->vm_quantum_mask) == 0);
286 KASSERT(size != 0);
287
288 idx = calc_order(qsize);
289 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
290 idx++;
291 /* check too large request? */
292 }
293 KASSERT(idx >= 0);
294 KASSERT(idx < VMEM_MAXORDER);
295
296 return &vm->vm_freelist[idx];
297 }
298
299 /* ---- boundary tag hash */
300
301 static struct vmem_hashlist *
302 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
303 {
304 struct vmem_hashlist *list;
305 unsigned int hash;
306
307 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
308 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
309
310 return list;
311 }
312
313 static bt_t *
314 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
315 {
316 struct vmem_hashlist *list;
317 bt_t *bt;
318
319 list = bt_hashhead(vm, addr);
320 LIST_FOREACH(bt, list, bt_hashlist) {
321 if (bt->bt_start == addr) {
322 break;
323 }
324 }
325
326 return bt;
327 }
328
329 static void
330 bt_rembusy(vmem_t *vm, bt_t *bt)
331 {
332
333 KASSERT(vm->vm_nbusytag > 0);
334 vm->vm_nbusytag--;
335 LIST_REMOVE(bt, bt_hashlist);
336 }
337
338 static void
339 bt_insbusy(vmem_t *vm, bt_t *bt)
340 {
341 struct vmem_hashlist *list;
342
343 KASSERT(bt->bt_type == BT_TYPE_BUSY);
344
345 list = bt_hashhead(vm, bt->bt_start);
346 LIST_INSERT_HEAD(list, bt, bt_hashlist);
347 vm->vm_nbusytag++;
348 }
349
350 /* ---- boundary tag list */
351
352 static void
353 bt_remseg(vmem_t *vm, bt_t *bt)
354 {
355
356 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
357 }
358
359 static void
360 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
361 {
362
363 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
364 }
365
366 static void
367 bt_insseg_tail(vmem_t *vm, bt_t *bt)
368 {
369
370 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
371 }
372
373 static void
374 bt_remfree(vmem_t *vm, bt_t *bt)
375 {
376
377 KASSERT(bt->bt_type == BT_TYPE_FREE);
378
379 LIST_REMOVE(bt, bt_freelist);
380 }
381
382 static void
383 bt_insfree(vmem_t *vm, bt_t *bt)
384 {
385 struct vmem_freelist *list;
386
387 list = bt_freehead_tofree(vm, bt->bt_size);
388 LIST_INSERT_HEAD(list, bt, bt_freelist);
389 }
390
391 /* ---- vmem internal functions */
392
393 #if defined(QCACHE)
394 static inline vm_flag_t
395 prf_to_vmf(int prflags)
396 {
397 vm_flag_t vmflags;
398
399 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
400 if ((prflags & PR_WAITOK) != 0) {
401 vmflags = VM_SLEEP;
402 } else {
403 vmflags = VM_NOSLEEP;
404 }
405 return vmflags;
406 }
407
408 static inline int
409 vmf_to_prf(vm_flag_t vmflags)
410 {
411 int prflags;
412
413 if ((vmflags & VM_SLEEP) != 0) {
414 prflags = PR_WAITOK;
415 } else {
416 prflags = PR_NOWAIT;
417 }
418 return prflags;
419 }
420
421 static size_t
422 qc_poolpage_size(size_t qcache_max)
423 {
424 int i;
425
426 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
427 /* nothing */
428 }
429 return ORDER2SIZE(i);
430 }
431
432 static void *
433 qc_poolpage_alloc(struct pool *pool, int prflags)
434 {
435 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
436 vmem_t *vm = qc->qc_vmem;
437
438 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
439 prf_to_vmf(prflags) | VM_INSTANTFIT);
440 }
441
442 static void
443 qc_poolpage_free(struct pool *pool, void *addr)
444 {
445 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
446 vmem_t *vm = qc->qc_vmem;
447
448 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
449 }
450
451 static void
452 qc_init(vmem_t *vm, size_t qcache_max)
453 {
454 struct pool_allocator *pa;
455 int qcache_idx_max;
456 int i;
457
458 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
459 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
460 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
461 }
462 vm->vm_qcache_max = qcache_max;
463 pa = &vm->vm_qcache_allocator;
464 memset(pa, 0, sizeof(*pa));
465 pa->pa_alloc = qc_poolpage_alloc;
466 pa->pa_free = qc_poolpage_free;
467 pa->pa_pagesz = qc_poolpage_size(qcache_max);
468
469 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
470 for (i = 1; i <= qcache_idx_max; i++) {
471 qcache_t *qc = &vm->vm_qcache[i - 1];
472 size_t size = i << vm->vm_quantum_shift;
473
474 qc->qc_vmem = vm;
475 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
476 vm->vm_name, size);
477 pool_init(&qc->qc_pool, size, ORDER2SIZE(vm->vm_quantum_shift),
478 0, PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa);
479 pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL);
480 }
481 }
482
483 static boolean_t
484 qc_reap(vmem_t *vm)
485 {
486 int i;
487 int qcache_idx_max;
488 boolean_t didsomething = FALSE;
489
490 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
491 for (i = 1; i <= qcache_idx_max; i++) {
492 qcache_t *qc = &vm->vm_qcache[i - 1];
493
494 if (pool_reclaim(&qc->qc_pool) != 0) {
495 didsomething = TRUE;
496 }
497 }
498
499 return didsomething;
500 }
501 #endif /* defined(QCACHE) */
502
503 #if defined(_KERNEL)
504 static int
505 vmem_init(void)
506 {
507
508 pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL);
509 return 0;
510 }
511 #endif /* defined(_KERNEL) */
512
513 static vmem_addr_t
514 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
515 int spanbttype)
516 {
517 bt_t *btspan;
518 bt_t *btfree;
519
520 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
521 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
522 VMEM_ASSERT_UNLOCKED(vm);
523
524 btspan = bt_alloc(vm, flags);
525 if (btspan == NULL) {
526 return VMEM_ADDR_NULL;
527 }
528 btfree = bt_alloc(vm, flags);
529 if (btfree == NULL) {
530 bt_free(vm, btspan);
531 return VMEM_ADDR_NULL;
532 }
533
534 btspan->bt_type = spanbttype;
535 btspan->bt_start = addr;
536 btspan->bt_size = size;
537
538 btfree->bt_type = BT_TYPE_FREE;
539 btfree->bt_start = addr;
540 btfree->bt_size = size;
541
542 VMEM_LOCK(vm);
543 bt_insseg_tail(vm, btspan);
544 bt_insseg(vm, btfree, btspan);
545 bt_insfree(vm, btfree);
546 VMEM_UNLOCK(vm);
547
548 return addr;
549 }
550
551 static int
552 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
553 {
554 vmem_addr_t addr;
555
556 VMEM_ASSERT_UNLOCKED(vm);
557
558 if (vm->vm_allocfn == NULL) {
559 return EINVAL;
560 }
561
562 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
563 if (addr == VMEM_ADDR_NULL) {
564 return ENOMEM;
565 }
566
567 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
568 (*vm->vm_freefn)(vm->vm_source, addr, size);
569 return ENOMEM;
570 }
571
572 return 0;
573 }
574
575 static int
576 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
577 {
578 bt_t *bt;
579 int i;
580 struct vmem_hashlist *newhashlist;
581 struct vmem_hashlist *oldhashlist;
582 size_t oldhashsize;
583
584 KASSERT(newhashsize > 0);
585 VMEM_ASSERT_UNLOCKED(vm);
586
587 newhashlist =
588 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags);
589 if (newhashlist == NULL) {
590 return ENOMEM;
591 }
592 for (i = 0; i < newhashsize; i++) {
593 LIST_INIT(&newhashlist[i]);
594 }
595
596 VMEM_LOCK(vm);
597 oldhashlist = vm->vm_hashlist;
598 oldhashsize = vm->vm_hashsize;
599 vm->vm_hashlist = newhashlist;
600 vm->vm_hashsize = newhashsize;
601 if (oldhashlist == NULL) {
602 VMEM_UNLOCK(vm);
603 return 0;
604 }
605 for (i = 0; i < oldhashsize; i++) {
606 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
607 bt_rembusy(vm, bt); /* XXX */
608 bt_insbusy(vm, bt);
609 }
610 }
611 VMEM_UNLOCK(vm);
612
613 xfree(oldhashlist);
614
615 return 0;
616 }
617
618 /*
619 * vmem_fit: check if a bt can satisfy the given restrictions.
620 */
621
622 static vmem_addr_t
623 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase,
624 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
625 {
626 vmem_addr_t start;
627 vmem_addr_t end;
628
629 KASSERT(bt->bt_size >= size);
630
631 /*
632 * XXX assumption: vmem_addr_t and vmem_size_t are
633 * unsigned integer of the same size.
634 */
635
636 start = bt->bt_start;
637 if (start < minaddr) {
638 start = minaddr;
639 }
640 end = BT_END(bt);
641 if (end > maxaddr - 1) {
642 end = maxaddr - 1;
643 }
644 if (start >= end) {
645 return VMEM_ADDR_NULL;
646 }
647 start = -(-(start - phase) & -align) + phase;
648 if (start < bt->bt_start) {
649 start += align;
650 }
651 if (((start ^ (start + size - 1)) & -nocross) != 0) {
652 KASSERT(align < nocross);
653 start = -(-(start - phase) & -nocross) + phase;
654 }
655 if (start < end && end - start >= size) {
656 KASSERT((start & (align - 1)) == phase);
657 KASSERT(((start ^ (start + size - 1)) & -nocross) == 0);
658 KASSERT(minaddr <= start);
659 KASSERT(maxaddr == 0 || start + size <= maxaddr);
660 KASSERT(bt->bt_start <= start);
661 KASSERT(start + size <= BT_END(bt));
662 return start;
663 }
664 return VMEM_ADDR_NULL;
665 }
666
667 /* ---- vmem API */
668
669 /*
670 * vmem_create: create an arena.
671 *
672 * => must not be called from interrupt context.
673 */
674
675 vmem_t *
676 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
677 vmem_size_t quantum,
678 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
679 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
680 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags)
681 {
682 vmem_t *vm;
683 int i;
684 #if defined(_KERNEL)
685 static ONCE_DECL(control);
686 #endif /* defined(_KERNEL) */
687
688 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
689 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
690
691 #if defined(_KERNEL)
692 if (RUN_ONCE(&control, vmem_init)) {
693 return NULL;
694 }
695 #endif /* defined(_KERNEL) */
696 vm = xmalloc(sizeof(*vm), flags);
697 if (vm == NULL) {
698 return NULL;
699 }
700
701 VMEM_LOCK_INIT(vm);
702 vm->vm_name = name;
703 vm->vm_quantum_mask = quantum - 1;
704 vm->vm_quantum_shift = calc_order(quantum);
705 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
706 vm->vm_allocfn = allocfn;
707 vm->vm_freefn = freefn;
708 vm->vm_source = source;
709 vm->vm_nbusytag = 0;
710 #if defined(QCACHE)
711 qc_init(vm, qcache_max);
712 #endif /* defined(QCACHE) */
713
714 CIRCLEQ_INIT(&vm->vm_seglist);
715 for (i = 0; i < VMEM_MAXORDER; i++) {
716 LIST_INIT(&vm->vm_freelist[i]);
717 }
718 vm->vm_hashlist = NULL;
719 if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
720 vmem_destroy(vm);
721 return NULL;
722 }
723
724 if (size != 0) {
725 if (vmem_add(vm, base, size, flags) == 0) {
726 vmem_destroy(vm);
727 return NULL;
728 }
729 }
730
731 return vm;
732 }
733
734 void
735 vmem_destroy(vmem_t *vm)
736 {
737
738 VMEM_ASSERT_UNLOCKED(vm);
739
740 if (vm->vm_hashlist != NULL) {
741 int i;
742
743 for (i = 0; i < vm->vm_hashsize; i++) {
744 bt_t *bt;
745
746 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
747 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
748 bt_free(vm, bt);
749 }
750 }
751 xfree(vm->vm_hashlist);
752 }
753 xfree(vm);
754 }
755
756 vmem_size_t
757 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
758 {
759
760 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
761 }
762
763 /*
764 * vmem_alloc:
765 *
766 * => caller must ensure appropriate spl,
767 * if the arena can be accessed from interrupt context.
768 */
769
770 vmem_addr_t
771 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
772 {
773 const vmem_size_t size __unused = vmem_roundup_size(vm, size0);
774 const vm_flag_t strat __unused = flags & VM_FITMASK;
775
776 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
777 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
778 VMEM_ASSERT_UNLOCKED(vm);
779
780 KASSERT(size0 > 0);
781 KASSERT(size > 0);
782 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
783 if ((flags & VM_SLEEP) != 0) {
784 ASSERT_SLEEPABLE(NULL, __func__);
785 }
786
787 #if defined(QCACHE)
788 if (size <= vm->vm_qcache_max) {
789 int qidx = size >> vm->vm_quantum_shift;
790 qcache_t *qc = &vm->vm_qcache[qidx - 1];
791
792 return (vmem_addr_t)pool_cache_get(&qc->qc_cache,
793 vmf_to_prf(flags));
794 }
795 #endif /* defined(QCACHE) */
796
797 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags);
798 }
799
800 vmem_addr_t
801 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase,
802 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
803 vm_flag_t flags)
804 {
805 struct vmem_freelist *list;
806 struct vmem_freelist *first;
807 struct vmem_freelist *end;
808 bt_t *bt;
809 bt_t *btnew;
810 bt_t *btnew2;
811 const vmem_size_t size = vmem_roundup_size(vm, size0);
812 vm_flag_t strat = flags & VM_FITMASK;
813 vmem_addr_t start;
814
815 KASSERT(size0 > 0);
816 KASSERT(size > 0);
817 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
818 if ((flags & VM_SLEEP) != 0) {
819 ASSERT_SLEEPABLE(NULL, __func__);
820 }
821 KASSERT((align & vm->vm_quantum_mask) == 0);
822 KASSERT((align & (align - 1)) == 0);
823 KASSERT((phase & vm->vm_quantum_mask) == 0);
824 KASSERT((nocross & vm->vm_quantum_mask) == 0);
825 KASSERT((nocross & (nocross - 1)) == 0);
826 KASSERT((align == 0 && phase == 0) || phase < align);
827 KASSERT(nocross == 0 || nocross >= size);
828 KASSERT(maxaddr == 0 || minaddr < maxaddr);
829 KASSERT(((phase ^ (phase + size - 1)) & -nocross) == 0);
830
831 if (align == 0) {
832 align = vm->vm_quantum_mask + 1;
833 }
834 btnew = bt_alloc(vm, flags);
835 if (btnew == NULL) {
836 return VMEM_ADDR_NULL;
837 }
838 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */
839 if (btnew2 == NULL) {
840 bt_free(vm, btnew);
841 return VMEM_ADDR_NULL;
842 }
843
844 retry_strat:
845 first = bt_freehead_toalloc(vm, size, strat);
846 end = &vm->vm_freelist[VMEM_MAXORDER];
847 retry:
848 bt = NULL;
849 VMEM_LOCK(vm);
850 if (strat == VM_INSTANTFIT) {
851 for (list = first; list < end; list++) {
852 bt = LIST_FIRST(list);
853 if (bt != NULL) {
854 start = vmem_fit(bt, size, align, phase,
855 nocross, minaddr, maxaddr);
856 if (start != VMEM_ADDR_NULL) {
857 goto gotit;
858 }
859 }
860 }
861 } else { /* VM_BESTFIT */
862 for (list = first; list < end; list++) {
863 LIST_FOREACH(bt, list, bt_freelist) {
864 if (bt->bt_size >= size) {
865 start = vmem_fit(bt, size, align, phase,
866 nocross, minaddr, maxaddr);
867 if (start != VMEM_ADDR_NULL) {
868 goto gotit;
869 }
870 }
871 }
872 }
873 }
874 VMEM_UNLOCK(vm);
875 #if 1
876 if (strat == VM_INSTANTFIT) {
877 strat = VM_BESTFIT;
878 goto retry_strat;
879 }
880 #endif
881 if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
882 nocross != 0 || minaddr != 0 || maxaddr != 0) {
883
884 /*
885 * XXX should try to import a region large enough to
886 * satisfy restrictions?
887 */
888
889 return VMEM_ADDR_NULL;
890 }
891 if (vmem_import(vm, size, flags) == 0) {
892 goto retry;
893 }
894 /* XXX */
895 return VMEM_ADDR_NULL;
896
897 gotit:
898 KASSERT(bt->bt_type == BT_TYPE_FREE);
899 KASSERT(bt->bt_size >= size);
900 bt_remfree(vm, bt);
901 if (bt->bt_start != start) {
902 btnew2->bt_type = BT_TYPE_FREE;
903 btnew2->bt_start = bt->bt_start;
904 btnew2->bt_size = start - bt->bt_start;
905 bt->bt_start = start;
906 bt->bt_size -= btnew2->bt_size;
907 bt_insfree(vm, btnew2);
908 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
909 btnew2 = NULL;
910 }
911 KASSERT(bt->bt_start == start);
912 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
913 /* split */
914 btnew->bt_type = BT_TYPE_BUSY;
915 btnew->bt_start = bt->bt_start;
916 btnew->bt_size = size;
917 bt->bt_start = bt->bt_start + size;
918 bt->bt_size -= size;
919 bt_insfree(vm, bt);
920 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
921 bt_insbusy(vm, btnew);
922 VMEM_UNLOCK(vm);
923 } else {
924 bt->bt_type = BT_TYPE_BUSY;
925 bt_insbusy(vm, bt);
926 VMEM_UNLOCK(vm);
927 bt_free(vm, btnew);
928 btnew = bt;
929 }
930 if (btnew2 != NULL) {
931 bt_free(vm, btnew2);
932 }
933 KASSERT(btnew->bt_size >= size);
934 btnew->bt_type = BT_TYPE_BUSY;
935
936 return btnew->bt_start;
937 }
938
939 /*
940 * vmem_free:
941 *
942 * => caller must ensure appropriate spl,
943 * if the arena can be accessed from interrupt context.
944 */
945
946 void
947 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
948 {
949
950 VMEM_ASSERT_UNLOCKED(vm);
951 KASSERT(addr != VMEM_ADDR_NULL);
952 KASSERT(size > 0);
953
954 #if defined(QCACHE)
955 if (size <= vm->vm_qcache_max) {
956 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
957 qcache_t *qc = &vm->vm_qcache[qidx - 1];
958
959 return pool_cache_put(&qc->qc_cache, (void *)addr);
960 }
961 #endif /* defined(QCACHE) */
962
963 vmem_xfree(vm, addr, size);
964 }
965
966 void
967 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
968 {
969 bt_t *bt;
970 bt_t *t;
971
972 VMEM_ASSERT_UNLOCKED(vm);
973 KASSERT(addr != VMEM_ADDR_NULL);
974 KASSERT(size > 0);
975
976 VMEM_LOCK(vm);
977
978 bt = bt_lookupbusy(vm, addr);
979 KASSERT(bt != NULL);
980 KASSERT(bt->bt_start == addr);
981 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
982 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
983 KASSERT(bt->bt_type == BT_TYPE_BUSY);
984 bt_rembusy(vm, bt);
985 bt->bt_type = BT_TYPE_FREE;
986
987 /* coalesce */
988 t = CIRCLEQ_NEXT(bt, bt_seglist);
989 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
990 KASSERT(BT_END(bt) == t->bt_start);
991 bt_remfree(vm, t);
992 bt_remseg(vm, t);
993 bt->bt_size += t->bt_size;
994 bt_free(vm, t);
995 }
996 t = CIRCLEQ_PREV(bt, bt_seglist);
997 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
998 KASSERT(BT_END(t) == bt->bt_start);
999 bt_remfree(vm, t);
1000 bt_remseg(vm, t);
1001 bt->bt_size += t->bt_size;
1002 bt->bt_start = t->bt_start;
1003 bt_free(vm, t);
1004 }
1005
1006 t = CIRCLEQ_PREV(bt, bt_seglist);
1007 KASSERT(t != NULL);
1008 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1009 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1010 t->bt_size == bt->bt_size) {
1011 vmem_addr_t spanaddr;
1012 vmem_size_t spansize;
1013
1014 KASSERT(t->bt_start == bt->bt_start);
1015 spanaddr = bt->bt_start;
1016 spansize = bt->bt_size;
1017 bt_remseg(vm, bt);
1018 bt_free(vm, bt);
1019 bt_remseg(vm, t);
1020 bt_free(vm, t);
1021 VMEM_UNLOCK(vm);
1022 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
1023 } else {
1024 bt_insfree(vm, bt);
1025 VMEM_UNLOCK(vm);
1026 }
1027 }
1028
1029 /*
1030 * vmem_add:
1031 *
1032 * => caller must ensure appropriate spl,
1033 * if the arena can be accessed from interrupt context.
1034 */
1035
1036 vmem_addr_t
1037 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1038 {
1039
1040 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1041 }
1042
1043 /*
1044 * vmem_reap: reap unused resources.
1045 *
1046 * => return TRUE if we successfully reaped something.
1047 */
1048
1049 boolean_t
1050 vmem_reap(vmem_t *vm)
1051 {
1052 boolean_t didsomething = FALSE;
1053
1054 VMEM_ASSERT_UNLOCKED(vm);
1055
1056 #if defined(QCACHE)
1057 didsomething = qc_reap(vm);
1058 #endif /* defined(QCACHE) */
1059 return didsomething;
1060 }
1061
1062 /* ---- debug */
1063
1064 #if defined(VMEM_DEBUG)
1065
1066 #if !defined(_KERNEL)
1067 #include <stdio.h>
1068 #endif /* !defined(_KERNEL) */
1069
1070 void bt_dump(const bt_t *);
1071
1072 void
1073 bt_dump(const bt_t *bt)
1074 {
1075
1076 printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n",
1077 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
1078 bt->bt_type);
1079 }
1080
1081 void
1082 vmem_dump(const vmem_t *vm)
1083 {
1084 const bt_t *bt;
1085 int i;
1086
1087 printf("vmem %p '%s'\n", vm, vm->vm_name);
1088 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1089 bt_dump(bt);
1090 }
1091
1092 for (i = 0; i < VMEM_MAXORDER; i++) {
1093 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1094
1095 if (LIST_EMPTY(fl)) {
1096 continue;
1097 }
1098
1099 printf("freelist[%d]\n", i);
1100 LIST_FOREACH(bt, fl, bt_freelist) {
1101 bt_dump(bt);
1102 if (bt->bt_size) {
1103 }
1104 }
1105 }
1106 }
1107
1108 #if !defined(_KERNEL)
1109
1110 #include <stdlib.h>
1111
1112 int
1113 main()
1114 {
1115 vmem_t *vm;
1116 vmem_addr_t p;
1117 struct reg {
1118 vmem_addr_t p;
1119 vmem_size_t sz;
1120 boolean_t x;
1121 } *reg = NULL;
1122 int nreg = 0;
1123 int nalloc = 0;
1124 int nfree = 0;
1125 vmem_size_t total = 0;
1126 #if 1
1127 vm_flag_t strat = VM_INSTANTFIT;
1128 #else
1129 vm_flag_t strat = VM_BESTFIT;
1130 #endif
1131
1132 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1133 NULL, NULL, NULL, 0, VM_NOSLEEP);
1134 if (vm == NULL) {
1135 printf("vmem_create\n");
1136 exit(EXIT_FAILURE);
1137 }
1138 vmem_dump(vm);
1139
1140 p = vmem_add(vm, 100, 200, VM_SLEEP);
1141 p = vmem_add(vm, 2000, 1, VM_SLEEP);
1142 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1143 p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1144 p = vmem_add(vm, 500, 1000, VM_SLEEP);
1145 vmem_dump(vm);
1146 for (;;) {
1147 struct reg *r;
1148 int t = rand() % 100;
1149
1150 if (t > 45) {
1151 /* alloc */
1152 vmem_size_t sz = rand() % 500 + 1;
1153 boolean_t x;
1154 vmem_size_t align, phase, nocross;
1155 vmem_addr_t minaddr, maxaddr;
1156
1157 if (t > 70) {
1158 x = TRUE;
1159 /* XXX */
1160 align = 1 << (rand() % 15);
1161 phase = rand() % 65536;
1162 nocross = 1 << (rand() % 15);
1163 if (align <= phase) {
1164 phase = 0;
1165 }
1166 if (((phase ^ (phase + sz)) & -nocross) != 0) {
1167 nocross = 0;
1168 }
1169 minaddr = rand() % 50000;
1170 maxaddr = rand() % 70000;
1171 if (minaddr > maxaddr) {
1172 minaddr = 0;
1173 maxaddr = 0;
1174 }
1175 printf("=== xalloc %" PRIu64
1176 " align=%" PRIu64 ", phase=%" PRIu64
1177 ", nocross=%" PRIu64 ", min=%" PRIu64
1178 ", max=%" PRIu64 "\n",
1179 (uint64_t)sz,
1180 (uint64_t)align,
1181 (uint64_t)phase,
1182 (uint64_t)nocross,
1183 (uint64_t)minaddr,
1184 (uint64_t)maxaddr);
1185 p = vmem_xalloc(vm, sz, align, phase, nocross,
1186 minaddr, maxaddr, strat|VM_SLEEP);
1187 } else {
1188 x = FALSE;
1189 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1190 p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1191 }
1192 printf("-> %" PRIu64 "\n", (uint64_t)p);
1193 vmem_dump(vm);
1194 if (p == VMEM_ADDR_NULL) {
1195 if (x) {
1196 continue;
1197 }
1198 break;
1199 }
1200 nreg++;
1201 reg = realloc(reg, sizeof(*reg) * nreg);
1202 r = ®[nreg - 1];
1203 r->p = p;
1204 r->sz = sz;
1205 r->x = x;
1206 total += sz;
1207 nalloc++;
1208 } else if (nreg != 0) {
1209 /* free */
1210 r = ®[rand() % nreg];
1211 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1212 (uint64_t)r->p, (uint64_t)r->sz);
1213 if (r->x) {
1214 vmem_xfree(vm, r->p, r->sz);
1215 } else {
1216 vmem_free(vm, r->p, r->sz);
1217 }
1218 total -= r->sz;
1219 vmem_dump(vm);
1220 *r = reg[nreg - 1];
1221 nreg--;
1222 nfree++;
1223 }
1224 printf("total=%" PRIu64 "\n", (uint64_t)total);
1225 }
1226 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1227 (uint64_t)total, nalloc, nfree);
1228 exit(EXIT_SUCCESS);
1229 }
1230 #endif /* !defined(_KERNEL) */
1231 #endif /* defined(VMEM_DEBUG) */
1232