subr_vmem.c revision 1.16 1 /* $NetBSD: subr_vmem.c,v 1.16 2006/10/27 15:05:16 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.16 2006/10/27 15:05:16 yamt Exp $");
38
39 #define VMEM_DEBUG
40 #if defined(_KERNEL)
41 #define QCACHE
42 #endif /* defined(_KERNEL) */
43
44 #include <sys/param.h>
45 #include <sys/hash.h>
46 #include <sys/queue.h>
47
48 #if defined(_KERNEL)
49 #include <sys/systm.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/once.h>
53 #include <sys/pool.h>
54 #include <sys/proc.h>
55 #include <sys/vmem.h>
56 #else /* defined(_KERNEL) */
57 #include "../sys/vmem.h"
58 #endif /* defined(_KERNEL) */
59
60 #if defined(_KERNEL)
61 #define SIMPLELOCK_DECL(name) struct simplelock name
62 #else /* defined(_KERNEL) */
63 #include <errno.h>
64 #include <assert.h>
65 #include <stdlib.h>
66
67 #define KASSERT(a) assert(a)
68 #define SIMPLELOCK_DECL(name) /* nothing */
69 #define LOCK_ASSERT(a) /* nothing */
70 #define simple_lock_init(a) /* nothing */
71 #define simple_lock(a) /* nothing */
72 #define simple_unlock(a) /* nothing */
73 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */
74 #endif /* defined(_KERNEL) */
75
76 struct vmem;
77 struct vmem_btag;
78
79 #if defined(VMEM_DEBUG)
80 void vmem_dump(const vmem_t *);
81 #endif /* defined(VMEM_DEBUG) */
82
83 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
84 #define VMEM_HASHSIZE_INIT 4096 /* XXX */
85
86 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
87
88 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
89 LIST_HEAD(vmem_freelist, vmem_btag);
90 LIST_HEAD(vmem_hashlist, vmem_btag);
91
92 #if defined(QCACHE)
93 #define VMEM_QCACHE_IDX_MAX 32
94
95 #define QC_NAME_MAX 16
96
97 struct qcache {
98 struct pool qc_pool;
99 struct pool_cache qc_cache;
100 vmem_t *qc_vmem;
101 char qc_name[QC_NAME_MAX];
102 };
103 typedef struct qcache qcache_t;
104 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool))
105 #endif /* defined(QCACHE) */
106
107 /* vmem arena */
108 struct vmem {
109 SIMPLELOCK_DECL(vm_lock);
110 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
111 vm_flag_t);
112 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
113 vmem_t *vm_source;
114 struct vmem_seglist vm_seglist;
115 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
116 size_t vm_hashsize;
117 size_t vm_nbusytag;
118 struct vmem_hashlist *vm_hashlist;
119 size_t vm_quantum_mask;
120 int vm_quantum_shift;
121 const char *vm_name;
122
123 #if defined(QCACHE)
124 /* quantum cache */
125 size_t vm_qcache_max;
126 struct pool_allocator vm_qcache_allocator;
127 qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
128 #endif /* defined(QCACHE) */
129 };
130
131 #define VMEM_LOCK(vm) simple_lock(&vm->vm_lock)
132 #define VMEM_UNLOCK(vm) simple_unlock(&vm->vm_lock)
133 #define VMEM_LOCK_INIT(vm) simple_lock_init(&vm->vm_lock);
134 #define VMEM_ASSERT_LOCKED(vm) \
135 LOCK_ASSERT(simple_lock_held(&vm->vm_lock))
136 #define VMEM_ASSERT_UNLOCKED(vm) \
137 LOCK_ASSERT(!simple_lock_held(&vm->vm_lock))
138
139 /* boundary tag */
140 struct vmem_btag {
141 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
142 union {
143 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
144 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
145 } bt_u;
146 #define bt_hashlist bt_u.u_hashlist
147 #define bt_freelist bt_u.u_freelist
148 vmem_addr_t bt_start;
149 vmem_size_t bt_size;
150 int bt_type;
151 };
152
153 #define BT_TYPE_SPAN 1
154 #define BT_TYPE_SPAN_STATIC 2
155 #define BT_TYPE_FREE 3
156 #define BT_TYPE_BUSY 4
157 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
158
159 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
160
161 typedef struct vmem_btag bt_t;
162
163 /* ---- misc */
164
165 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
166
167 static int
168 calc_order(vmem_size_t size)
169 {
170 vmem_size_t target;
171 int i;
172
173 KASSERT(size != 0);
174
175 i = 0;
176 target = size >> 1;
177 while (ORDER2SIZE(i) <= target) {
178 i++;
179 }
180
181 KASSERT(ORDER2SIZE(i) <= size);
182 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
183
184 return i;
185 }
186
187 #if defined(_KERNEL)
188 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem");
189 #endif /* defined(_KERNEL) */
190
191 static void *
192 xmalloc(size_t sz, vm_flag_t flags)
193 {
194
195 #if defined(_KERNEL)
196 return malloc(sz, M_VMEM,
197 M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT));
198 #else /* defined(_KERNEL) */
199 return malloc(sz);
200 #endif /* defined(_KERNEL) */
201 }
202
203 static void
204 xfree(void *p)
205 {
206
207 #if defined(_KERNEL)
208 return free(p, M_VMEM);
209 #else /* defined(_KERNEL) */
210 return free(p);
211 #endif /* defined(_KERNEL) */
212 }
213
214 /* ---- boundary tag */
215
216 #if defined(_KERNEL)
217 static struct pool_cache bt_poolcache;
218 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL);
219 #endif /* defined(_KERNEL) */
220
221 static bt_t *
222 bt_alloc(vmem_t *vm __unused, vm_flag_t flags)
223 {
224 bt_t *bt;
225
226 #if defined(_KERNEL)
227 /* XXX bootstrap */
228 bt = pool_cache_get(&bt_poolcache,
229 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
230 #else /* defined(_KERNEL) */
231 bt = malloc(sizeof *bt);
232 #endif /* defined(_KERNEL) */
233
234 return bt;
235 }
236
237 static void
238 bt_free(vmem_t *vm __unused, bt_t *bt)
239 {
240
241 #if defined(_KERNEL)
242 /* XXX bootstrap */
243 pool_cache_put(&bt_poolcache, bt);
244 #else /* defined(_KERNEL) */
245 free(bt);
246 #endif /* defined(_KERNEL) */
247 }
248
249 /*
250 * freelist[0] ... [1, 1]
251 * freelist[1] ... [2, 3]
252 * freelist[2] ... [4, 7]
253 * freelist[3] ... [8, 15]
254 * :
255 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
256 * :
257 */
258
259 static struct vmem_freelist *
260 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
261 {
262 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
263 int idx;
264
265 KASSERT((size & vm->vm_quantum_mask) == 0);
266 KASSERT(size != 0);
267
268 idx = calc_order(qsize);
269 KASSERT(idx >= 0);
270 KASSERT(idx < VMEM_MAXORDER);
271
272 return &vm->vm_freelist[idx];
273 }
274
275 static struct vmem_freelist *
276 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
277 {
278 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
279 int idx;
280
281 KASSERT((size & vm->vm_quantum_mask) == 0);
282 KASSERT(size != 0);
283
284 idx = calc_order(qsize);
285 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
286 idx++;
287 /* check too large request? */
288 }
289 KASSERT(idx >= 0);
290 KASSERT(idx < VMEM_MAXORDER);
291
292 return &vm->vm_freelist[idx];
293 }
294
295 /* ---- boundary tag hash */
296
297 static struct vmem_hashlist *
298 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
299 {
300 struct vmem_hashlist *list;
301 unsigned int hash;
302
303 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
304 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
305
306 return list;
307 }
308
309 static bt_t *
310 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
311 {
312 struct vmem_hashlist *list;
313 bt_t *bt;
314
315 list = bt_hashhead(vm, addr);
316 LIST_FOREACH(bt, list, bt_hashlist) {
317 if (bt->bt_start == addr) {
318 break;
319 }
320 }
321
322 return bt;
323 }
324
325 static void
326 bt_rembusy(vmem_t *vm, bt_t *bt)
327 {
328
329 KASSERT(vm->vm_nbusytag > 0);
330 vm->vm_nbusytag--;
331 LIST_REMOVE(bt, bt_hashlist);
332 }
333
334 static void
335 bt_insbusy(vmem_t *vm, bt_t *bt)
336 {
337 struct vmem_hashlist *list;
338
339 KASSERT(bt->bt_type == BT_TYPE_BUSY);
340
341 list = bt_hashhead(vm, bt->bt_start);
342 LIST_INSERT_HEAD(list, bt, bt_hashlist);
343 vm->vm_nbusytag++;
344 }
345
346 /* ---- boundary tag list */
347
348 static void
349 bt_remseg(vmem_t *vm, bt_t *bt)
350 {
351
352 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
353 }
354
355 static void
356 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
357 {
358
359 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
360 }
361
362 static void
363 bt_insseg_tail(vmem_t *vm, bt_t *bt)
364 {
365
366 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
367 }
368
369 static void
370 bt_remfree(vmem_t *vm __unused, bt_t *bt)
371 {
372
373 KASSERT(bt->bt_type == BT_TYPE_FREE);
374
375 LIST_REMOVE(bt, bt_freelist);
376 }
377
378 static void
379 bt_insfree(vmem_t *vm, bt_t *bt)
380 {
381 struct vmem_freelist *list;
382
383 list = bt_freehead_tofree(vm, bt->bt_size);
384 LIST_INSERT_HEAD(list, bt, bt_freelist);
385 }
386
387 /* ---- vmem internal functions */
388
389 #if defined(QCACHE)
390 static inline vm_flag_t
391 prf_to_vmf(int prflags)
392 {
393 vm_flag_t vmflags;
394
395 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
396 if ((prflags & PR_WAITOK) != 0) {
397 vmflags = VM_SLEEP;
398 } else {
399 vmflags = VM_NOSLEEP;
400 }
401 return vmflags;
402 }
403
404 static inline int
405 vmf_to_prf(vm_flag_t vmflags)
406 {
407 int prflags;
408
409 if ((vmflags & VM_SLEEP) != 0) {
410 prflags = PR_WAITOK;
411 } else {
412 prflags = PR_NOWAIT;
413 }
414 return prflags;
415 }
416
417 static size_t
418 qc_poolpage_size(size_t qcache_max)
419 {
420 int i;
421
422 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
423 /* nothing */
424 }
425 return ORDER2SIZE(i);
426 }
427
428 static void *
429 qc_poolpage_alloc(struct pool *pool, int prflags)
430 {
431 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
432 vmem_t *vm = qc->qc_vmem;
433
434 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
435 prf_to_vmf(prflags) | VM_INSTANTFIT);
436 }
437
438 static void
439 qc_poolpage_free(struct pool *pool, void *addr)
440 {
441 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
442 vmem_t *vm = qc->qc_vmem;
443
444 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
445 }
446
447 static void
448 qc_init(vmem_t *vm, size_t qcache_max)
449 {
450 struct pool_allocator *pa;
451 int qcache_idx_max;
452 int i;
453
454 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
455 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
456 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
457 }
458 vm->vm_qcache_max = qcache_max;
459 pa = &vm->vm_qcache_allocator;
460 memset(pa, 0, sizeof(*pa));
461 pa->pa_alloc = qc_poolpage_alloc;
462 pa->pa_free = qc_poolpage_free;
463 pa->pa_pagesz = qc_poolpage_size(qcache_max);
464
465 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
466 for (i = 1; i <= qcache_idx_max; i++) {
467 qcache_t *qc = &vm->vm_qcache[i - 1];
468 size_t size = i << vm->vm_quantum_shift;
469
470 qc->qc_vmem = vm;
471 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
472 vm->vm_name, size);
473 pool_init(&qc->qc_pool, size, ORDER2SIZE(vm->vm_quantum_shift),
474 0, PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa);
475 pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL);
476 }
477 }
478
479 static boolean_t
480 qc_reap(vmem_t *vm)
481 {
482 int i;
483 int qcache_idx_max;
484 boolean_t didsomething = FALSE;
485
486 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
487 for (i = 1; i <= qcache_idx_max; i++) {
488 qcache_t *qc = &vm->vm_qcache[i - 1];
489
490 if (pool_reclaim(&qc->qc_pool) != 0) {
491 didsomething = TRUE;
492 }
493 }
494
495 return didsomething;
496 }
497 #endif /* defined(QCACHE) */
498
499 #if defined(_KERNEL)
500 static int
501 vmem_init(void)
502 {
503
504 pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL);
505 return 0;
506 }
507 #endif /* defined(_KERNEL) */
508
509 static vmem_addr_t
510 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
511 int spanbttype)
512 {
513 bt_t *btspan;
514 bt_t *btfree;
515
516 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
517 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
518 VMEM_ASSERT_UNLOCKED(vm);
519
520 btspan = bt_alloc(vm, flags);
521 if (btspan == NULL) {
522 return VMEM_ADDR_NULL;
523 }
524 btfree = bt_alloc(vm, flags);
525 if (btfree == NULL) {
526 bt_free(vm, btspan);
527 return VMEM_ADDR_NULL;
528 }
529
530 btspan->bt_type = spanbttype;
531 btspan->bt_start = addr;
532 btspan->bt_size = size;
533
534 btfree->bt_type = BT_TYPE_FREE;
535 btfree->bt_start = addr;
536 btfree->bt_size = size;
537
538 VMEM_LOCK(vm);
539 bt_insseg_tail(vm, btspan);
540 bt_insseg(vm, btfree, btspan);
541 bt_insfree(vm, btfree);
542 VMEM_UNLOCK(vm);
543
544 return addr;
545 }
546
547 static int
548 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
549 {
550 vmem_addr_t addr;
551
552 VMEM_ASSERT_UNLOCKED(vm);
553
554 if (vm->vm_allocfn == NULL) {
555 return EINVAL;
556 }
557
558 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
559 if (addr == VMEM_ADDR_NULL) {
560 return ENOMEM;
561 }
562
563 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
564 (*vm->vm_freefn)(vm->vm_source, addr, size);
565 return ENOMEM;
566 }
567
568 return 0;
569 }
570
571 static int
572 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
573 {
574 bt_t *bt;
575 int i;
576 struct vmem_hashlist *newhashlist;
577 struct vmem_hashlist *oldhashlist;
578 size_t oldhashsize;
579
580 KASSERT(newhashsize > 0);
581 VMEM_ASSERT_UNLOCKED(vm);
582
583 newhashlist =
584 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags);
585 if (newhashlist == NULL) {
586 return ENOMEM;
587 }
588 for (i = 0; i < newhashsize; i++) {
589 LIST_INIT(&newhashlist[i]);
590 }
591
592 VMEM_LOCK(vm);
593 oldhashlist = vm->vm_hashlist;
594 oldhashsize = vm->vm_hashsize;
595 vm->vm_hashlist = newhashlist;
596 vm->vm_hashsize = newhashsize;
597 if (oldhashlist == NULL) {
598 VMEM_UNLOCK(vm);
599 return 0;
600 }
601 for (i = 0; i < oldhashsize; i++) {
602 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
603 bt_rembusy(vm, bt); /* XXX */
604 bt_insbusy(vm, bt);
605 }
606 }
607 VMEM_UNLOCK(vm);
608
609 xfree(oldhashlist);
610
611 return 0;
612 }
613
614 /*
615 * vmem_fit: check if a bt can satisfy the given restrictions.
616 */
617
618 static vmem_addr_t
619 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase,
620 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
621 {
622 vmem_addr_t start;
623 vmem_addr_t end;
624
625 KASSERT(bt->bt_size >= size);
626
627 /*
628 * XXX assumption: vmem_addr_t and vmem_size_t are
629 * unsigned integer of the same size.
630 */
631
632 start = bt->bt_start;
633 if (start < minaddr) {
634 start = minaddr;
635 }
636 end = BT_END(bt);
637 if (end > maxaddr - 1) {
638 end = maxaddr - 1;
639 }
640 if (start >= end) {
641 return VMEM_ADDR_NULL;
642 }
643 start = -(-(start - phase) & -align) + phase;
644 if (start < bt->bt_start) {
645 start += align;
646 }
647 if (((start ^ (start + size - 1)) & -nocross) != 0) {
648 KASSERT(align < nocross);
649 start = -(-(start - phase) & -nocross) + phase;
650 }
651 if (start < end && end - start >= size) {
652 KASSERT((start & (align - 1)) == phase);
653 KASSERT(((start ^ (start + size - 1)) & -nocross) == 0);
654 KASSERT(minaddr <= start);
655 KASSERT(maxaddr == 0 || start + size <= maxaddr);
656 KASSERT(bt->bt_start <= start);
657 KASSERT(start + size <= BT_END(bt));
658 return start;
659 }
660 return VMEM_ADDR_NULL;
661 }
662
663 /* ---- vmem API */
664
665 /*
666 * vmem_create: create an arena.
667 *
668 * => must not be called from interrupt context.
669 */
670
671 vmem_t *
672 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
673 vmem_size_t quantum,
674 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
675 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
676 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags)
677 {
678 vmem_t *vm;
679 int i;
680 #if defined(_KERNEL)
681 static ONCE_DECL(control);
682 #endif /* defined(_KERNEL) */
683
684 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
685 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
686
687 #if defined(_KERNEL)
688 if (RUN_ONCE(&control, vmem_init)) {
689 return NULL;
690 }
691 #endif /* defined(_KERNEL) */
692 vm = xmalloc(sizeof(*vm), flags);
693 if (vm == NULL) {
694 return NULL;
695 }
696
697 VMEM_LOCK_INIT(vm);
698 vm->vm_name = name;
699 vm->vm_quantum_mask = quantum - 1;
700 vm->vm_quantum_shift = calc_order(quantum);
701 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
702 vm->vm_allocfn = allocfn;
703 vm->vm_freefn = freefn;
704 vm->vm_source = source;
705 vm->vm_nbusytag = 0;
706 #if defined(QCACHE)
707 qc_init(vm, qcache_max);
708 #endif /* defined(QCACHE) */
709
710 CIRCLEQ_INIT(&vm->vm_seglist);
711 for (i = 0; i < VMEM_MAXORDER; i++) {
712 LIST_INIT(&vm->vm_freelist[i]);
713 }
714 vm->vm_hashlist = NULL;
715 if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
716 vmem_destroy(vm);
717 return NULL;
718 }
719
720 if (size != 0) {
721 if (vmem_add(vm, base, size, flags) == 0) {
722 vmem_destroy(vm);
723 return NULL;
724 }
725 }
726
727 return vm;
728 }
729
730 void
731 vmem_destroy(vmem_t *vm)
732 {
733
734 VMEM_ASSERT_UNLOCKED(vm);
735
736 if (vm->vm_hashlist != NULL) {
737 int i;
738
739 for (i = 0; i < vm->vm_hashsize; i++) {
740 bt_t *bt;
741
742 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
743 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
744 bt_free(vm, bt);
745 }
746 }
747 xfree(vm->vm_hashlist);
748 }
749 xfree(vm);
750 }
751
752 vmem_size_t
753 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
754 {
755
756 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
757 }
758
759 /*
760 * vmem_alloc:
761 *
762 * => caller must ensure appropriate spl,
763 * if the arena can be accessed from interrupt context.
764 */
765
766 vmem_addr_t
767 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
768 {
769 const vmem_size_t size __unused = vmem_roundup_size(vm, size0);
770 const vm_flag_t strat __unused = flags & VM_FITMASK;
771
772 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
773 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
774 VMEM_ASSERT_UNLOCKED(vm);
775
776 KASSERT(size0 > 0);
777 KASSERT(size > 0);
778 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
779 if ((flags & VM_SLEEP) != 0) {
780 ASSERT_SLEEPABLE(NULL, __func__);
781 }
782
783 #if defined(QCACHE)
784 if (size <= vm->vm_qcache_max) {
785 int qidx = size >> vm->vm_quantum_shift;
786 qcache_t *qc = &vm->vm_qcache[qidx - 1];
787
788 return (vmem_addr_t)pool_cache_get(&qc->qc_cache,
789 vmf_to_prf(flags));
790 }
791 #endif /* defined(QCACHE) */
792
793 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags);
794 }
795
796 vmem_addr_t
797 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase,
798 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
799 vm_flag_t flags)
800 {
801 struct vmem_freelist *list;
802 struct vmem_freelist *first;
803 struct vmem_freelist *end;
804 bt_t *bt;
805 bt_t *btnew;
806 bt_t *btnew2;
807 const vmem_size_t size = vmem_roundup_size(vm, size0);
808 vm_flag_t strat = flags & VM_FITMASK;
809 vmem_addr_t start;
810
811 KASSERT(size0 > 0);
812 KASSERT(size > 0);
813 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
814 if ((flags & VM_SLEEP) != 0) {
815 ASSERT_SLEEPABLE(NULL, __func__);
816 }
817 KASSERT((align & vm->vm_quantum_mask) == 0);
818 KASSERT((align & (align - 1)) == 0);
819 KASSERT((phase & vm->vm_quantum_mask) == 0);
820 KASSERT((nocross & vm->vm_quantum_mask) == 0);
821 KASSERT((nocross & (nocross - 1)) == 0);
822 KASSERT((align == 0 && phase == 0) || phase < align);
823 KASSERT(nocross == 0 || nocross >= size);
824 KASSERT(maxaddr == 0 || minaddr < maxaddr);
825 KASSERT(((phase ^ (phase + size - 1)) & -nocross) == 0);
826
827 if (align == 0) {
828 align = vm->vm_quantum_mask + 1;
829 }
830 btnew = bt_alloc(vm, flags);
831 if (btnew == NULL) {
832 return VMEM_ADDR_NULL;
833 }
834 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */
835 if (btnew2 == NULL) {
836 bt_free(vm, btnew);
837 return VMEM_ADDR_NULL;
838 }
839
840 retry_strat:
841 first = bt_freehead_toalloc(vm, size, strat);
842 end = &vm->vm_freelist[VMEM_MAXORDER];
843 retry:
844 bt = NULL;
845 VMEM_LOCK(vm);
846 if (strat == VM_INSTANTFIT) {
847 for (list = first; list < end; list++) {
848 bt = LIST_FIRST(list);
849 if (bt != NULL) {
850 start = vmem_fit(bt, size, align, phase,
851 nocross, minaddr, maxaddr);
852 if (start != VMEM_ADDR_NULL) {
853 goto gotit;
854 }
855 }
856 }
857 } else { /* VM_BESTFIT */
858 for (list = first; list < end; list++) {
859 LIST_FOREACH(bt, list, bt_freelist) {
860 if (bt->bt_size >= size) {
861 start = vmem_fit(bt, size, align, phase,
862 nocross, minaddr, maxaddr);
863 if (start != VMEM_ADDR_NULL) {
864 goto gotit;
865 }
866 }
867 }
868 }
869 }
870 VMEM_UNLOCK(vm);
871 #if 1
872 if (strat == VM_INSTANTFIT) {
873 strat = VM_BESTFIT;
874 goto retry_strat;
875 }
876 #endif
877 if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
878 nocross != 0 || minaddr != 0 || maxaddr != 0) {
879
880 /*
881 * XXX should try to import a region large enough to
882 * satisfy restrictions?
883 */
884
885 return VMEM_ADDR_NULL;
886 }
887 if (vmem_import(vm, size, flags) == 0) {
888 goto retry;
889 }
890 /* XXX */
891 return VMEM_ADDR_NULL;
892
893 gotit:
894 KASSERT(bt->bt_type == BT_TYPE_FREE);
895 KASSERT(bt->bt_size >= size);
896 bt_remfree(vm, bt);
897 if (bt->bt_start != start) {
898 btnew2->bt_type = BT_TYPE_FREE;
899 btnew2->bt_start = bt->bt_start;
900 btnew2->bt_size = start - bt->bt_start;
901 bt->bt_start = start;
902 bt->bt_size -= btnew2->bt_size;
903 bt_insfree(vm, btnew2);
904 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
905 btnew2 = NULL;
906 }
907 KASSERT(bt->bt_start == start);
908 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
909 /* split */
910 btnew->bt_type = BT_TYPE_BUSY;
911 btnew->bt_start = bt->bt_start;
912 btnew->bt_size = size;
913 bt->bt_start = bt->bt_start + size;
914 bt->bt_size -= size;
915 bt_insfree(vm, bt);
916 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
917 bt_insbusy(vm, btnew);
918 VMEM_UNLOCK(vm);
919 } else {
920 bt->bt_type = BT_TYPE_BUSY;
921 bt_insbusy(vm, bt);
922 VMEM_UNLOCK(vm);
923 bt_free(vm, btnew);
924 btnew = bt;
925 }
926 if (btnew2 != NULL) {
927 bt_free(vm, btnew2);
928 }
929 KASSERT(btnew->bt_size >= size);
930 btnew->bt_type = BT_TYPE_BUSY;
931
932 return btnew->bt_start;
933 }
934
935 /*
936 * vmem_free:
937 *
938 * => caller must ensure appropriate spl,
939 * if the arena can be accessed from interrupt context.
940 */
941
942 void
943 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
944 {
945
946 VMEM_ASSERT_UNLOCKED(vm);
947 KASSERT(addr != VMEM_ADDR_NULL);
948 KASSERT(size > 0);
949
950 #if defined(QCACHE)
951 if (size <= vm->vm_qcache_max) {
952 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
953 qcache_t *qc = &vm->vm_qcache[qidx - 1];
954
955 return pool_cache_put(&qc->qc_cache, (void *)addr);
956 }
957 #endif /* defined(QCACHE) */
958
959 vmem_xfree(vm, addr, size);
960 }
961
962 void
963 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size __unused)
964 {
965 bt_t *bt;
966 bt_t *t;
967
968 VMEM_ASSERT_UNLOCKED(vm);
969 KASSERT(addr != VMEM_ADDR_NULL);
970 KASSERT(size > 0);
971
972 VMEM_LOCK(vm);
973
974 bt = bt_lookupbusy(vm, addr);
975 KASSERT(bt != NULL);
976 KASSERT(bt->bt_start == addr);
977 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
978 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
979 KASSERT(bt->bt_type == BT_TYPE_BUSY);
980 bt_rembusy(vm, bt);
981 bt->bt_type = BT_TYPE_FREE;
982
983 /* coalesce */
984 t = CIRCLEQ_NEXT(bt, bt_seglist);
985 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
986 KASSERT(BT_END(bt) == t->bt_start);
987 bt_remfree(vm, t);
988 bt_remseg(vm, t);
989 bt->bt_size += t->bt_size;
990 bt_free(vm, t);
991 }
992 t = CIRCLEQ_PREV(bt, bt_seglist);
993 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
994 KASSERT(BT_END(t) == bt->bt_start);
995 bt_remfree(vm, t);
996 bt_remseg(vm, t);
997 bt->bt_size += t->bt_size;
998 bt->bt_start = t->bt_start;
999 bt_free(vm, t);
1000 }
1001
1002 t = CIRCLEQ_PREV(bt, bt_seglist);
1003 KASSERT(t != NULL);
1004 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1005 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1006 t->bt_size == bt->bt_size) {
1007 vmem_addr_t spanaddr;
1008 vmem_size_t spansize;
1009
1010 KASSERT(t->bt_start == bt->bt_start);
1011 spanaddr = bt->bt_start;
1012 spansize = bt->bt_size;
1013 bt_remseg(vm, bt);
1014 bt_free(vm, bt);
1015 bt_remseg(vm, t);
1016 bt_free(vm, t);
1017 VMEM_UNLOCK(vm);
1018 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
1019 } else {
1020 bt_insfree(vm, bt);
1021 VMEM_UNLOCK(vm);
1022 }
1023 }
1024
1025 /*
1026 * vmem_add:
1027 *
1028 * => caller must ensure appropriate spl,
1029 * if the arena can be accessed from interrupt context.
1030 */
1031
1032 vmem_addr_t
1033 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1034 {
1035
1036 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1037 }
1038
1039 /*
1040 * vmem_reap: reap unused resources.
1041 *
1042 * => return TRUE if we successfully reaped something.
1043 */
1044
1045 boolean_t
1046 vmem_reap(vmem_t *vm)
1047 {
1048 boolean_t didsomething = FALSE;
1049
1050 VMEM_ASSERT_UNLOCKED(vm);
1051
1052 #if defined(QCACHE)
1053 didsomething = qc_reap(vm);
1054 #endif /* defined(QCACHE) */
1055 return didsomething;
1056 }
1057
1058 /* ---- debug */
1059
1060 #if defined(VMEM_DEBUG)
1061
1062 #if !defined(_KERNEL)
1063 #include <stdio.h>
1064 #endif /* !defined(_KERNEL) */
1065
1066 void bt_dump(const bt_t *);
1067
1068 void
1069 bt_dump(const bt_t *bt)
1070 {
1071
1072 printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n",
1073 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
1074 bt->bt_type);
1075 }
1076
1077 void
1078 vmem_dump(const vmem_t *vm)
1079 {
1080 const bt_t *bt;
1081 int i;
1082
1083 printf("vmem %p '%s'\n", vm, vm->vm_name);
1084 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1085 bt_dump(bt);
1086 }
1087
1088 for (i = 0; i < VMEM_MAXORDER; i++) {
1089 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1090
1091 if (LIST_EMPTY(fl)) {
1092 continue;
1093 }
1094
1095 printf("freelist[%d]\n", i);
1096 LIST_FOREACH(bt, fl, bt_freelist) {
1097 bt_dump(bt);
1098 if (bt->bt_size) {
1099 }
1100 }
1101 }
1102 }
1103
1104 #if !defined(_KERNEL)
1105
1106 #include <stdlib.h>
1107
1108 int
1109 main()
1110 {
1111 vmem_t *vm;
1112 vmem_addr_t p;
1113 struct reg {
1114 vmem_addr_t p;
1115 vmem_size_t sz;
1116 boolean_t x;
1117 } *reg = NULL;
1118 int nreg = 0;
1119 int nalloc = 0;
1120 int nfree = 0;
1121 vmem_size_t total = 0;
1122 #if 1
1123 vm_flag_t strat = VM_INSTANTFIT;
1124 #else
1125 vm_flag_t strat = VM_BESTFIT;
1126 #endif
1127
1128 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1129 NULL, NULL, NULL, 0, VM_NOSLEEP);
1130 if (vm == NULL) {
1131 printf("vmem_create\n");
1132 exit(EXIT_FAILURE);
1133 }
1134 vmem_dump(vm);
1135
1136 p = vmem_add(vm, 100, 200, VM_SLEEP);
1137 p = vmem_add(vm, 2000, 1, VM_SLEEP);
1138 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1139 p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1140 p = vmem_add(vm, 500, 1000, VM_SLEEP);
1141 vmem_dump(vm);
1142 for (;;) {
1143 struct reg *r;
1144 int t = rand() % 100;
1145
1146 if (t > 45) {
1147 /* alloc */
1148 vmem_size_t sz = rand() % 500 + 1;
1149 boolean_t x;
1150 vmem_size_t align, phase, nocross;
1151 vmem_addr_t minaddr, maxaddr;
1152
1153 if (t > 70) {
1154 x = TRUE;
1155 /* XXX */
1156 align = 1 << (rand() % 15);
1157 phase = rand() % 65536;
1158 nocross = 1 << (rand() % 15);
1159 if (align <= phase) {
1160 phase = 0;
1161 }
1162 if (((phase ^ (phase + sz)) & -nocross) != 0) {
1163 nocross = 0;
1164 }
1165 minaddr = rand() % 50000;
1166 maxaddr = rand() % 70000;
1167 if (minaddr > maxaddr) {
1168 minaddr = 0;
1169 maxaddr = 0;
1170 }
1171 printf("=== xalloc %" PRIu64
1172 " align=%" PRIu64 ", phase=%" PRIu64
1173 ", nocross=%" PRIu64 ", min=%" PRIu64
1174 ", max=%" PRIu64 "\n",
1175 (uint64_t)sz,
1176 (uint64_t)align,
1177 (uint64_t)phase,
1178 (uint64_t)nocross,
1179 (uint64_t)minaddr,
1180 (uint64_t)maxaddr);
1181 p = vmem_xalloc(vm, sz, align, phase, nocross,
1182 minaddr, maxaddr, strat|VM_SLEEP);
1183 } else {
1184 x = FALSE;
1185 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1186 p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1187 }
1188 printf("-> %" PRIu64 "\n", (uint64_t)p);
1189 vmem_dump(vm);
1190 if (p == VMEM_ADDR_NULL) {
1191 if (x) {
1192 continue;
1193 }
1194 break;
1195 }
1196 nreg++;
1197 reg = realloc(reg, sizeof(*reg) * nreg);
1198 r = ®[nreg - 1];
1199 r->p = p;
1200 r->sz = sz;
1201 r->x = x;
1202 total += sz;
1203 nalloc++;
1204 } else if (nreg != 0) {
1205 /* free */
1206 r = ®[rand() % nreg];
1207 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1208 (uint64_t)r->p, (uint64_t)r->sz);
1209 if (r->x) {
1210 vmem_xfree(vm, r->p, r->sz);
1211 } else {
1212 vmem_free(vm, r->p, r->sz);
1213 }
1214 total -= r->sz;
1215 vmem_dump(vm);
1216 *r = reg[nreg - 1];
1217 nreg--;
1218 nfree++;
1219 }
1220 printf("total=%" PRIu64 "\n", (uint64_t)total);
1221 }
1222 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1223 (uint64_t)total, nalloc, nfree);
1224 exit(EXIT_SUCCESS);
1225 }
1226 #endif /* !defined(_KERNEL) */
1227 #endif /* defined(VMEM_DEBUG) */
1228