subr_vmem.c revision 1.12 1 /* $NetBSD: subr_vmem.c,v 1.12 2006/10/17 08:54:03 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2006 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * reference:
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
34 *
35 * TODO:
36 * - implement vmem_xalloc/vmem_xfree
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.12 2006/10/17 08:54:03 yamt Exp $");
41
42 #define VMEM_DEBUG
43 #if defined(_KERNEL)
44 #define QCACHE
45 #endif /* defined(_KERNEL) */
46
47 #include <sys/param.h>
48 #include <sys/hash.h>
49 #include <sys/queue.h>
50
51 #if defined(_KERNEL)
52 #include <sys/systm.h>
53 #include <sys/lock.h>
54 #include <sys/malloc.h>
55 #include <sys/once.h>
56 #include <sys/pool.h>
57 #include <sys/proc.h>
58 #include <sys/vmem.h>
59 #else /* defined(_KERNEL) */
60 #include "../sys/vmem.h"
61 #endif /* defined(_KERNEL) */
62
63 #if defined(_KERNEL)
64 #define SIMPLELOCK_DECL(name) struct simplelock name
65 #else /* defined(_KERNEL) */
66 #include <errno.h>
67 #include <assert.h>
68 #include <stdlib.h>
69
70 #define KASSERT(a) assert(a)
71 #define SIMPLELOCK_DECL(name) /* nothing */
72 #define LOCK_ASSERT(a) /* nothing */
73 #define simple_lock_init(a) /* nothing */
74 #define simple_lock(a) /* nothing */
75 #define simple_unlock(a) /* nothing */
76 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */
77 #endif /* defined(_KERNEL) */
78
79 struct vmem;
80 struct vmem_btag;
81
82 #if defined(VMEM_DEBUG)
83 void vmem_dump(const vmem_t *);
84 #endif /* defined(VMEM_DEBUG) */
85
86 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
87 #define VMEM_HASHSIZE_INIT 4096 /* XXX */
88
89 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
90
91 CIRCLEQ_HEAD(vmem_seglist, vmem_btag);
92 LIST_HEAD(vmem_freelist, vmem_btag);
93 LIST_HEAD(vmem_hashlist, vmem_btag);
94
95 #if defined(QCACHE)
96 #define VMEM_QCACHE_IDX_MAX 32
97
98 #define QC_NAME_MAX 16
99
100 struct qcache {
101 struct pool qc_pool;
102 struct pool_cache qc_cache;
103 vmem_t *qc_vmem;
104 char qc_name[QC_NAME_MAX];
105 };
106 typedef struct qcache qcache_t;
107 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool))
108 #endif /* defined(QCACHE) */
109
110 /* vmem arena */
111 struct vmem {
112 SIMPLELOCK_DECL(vm_lock);
113 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *,
114 vm_flag_t);
115 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t);
116 vmem_t *vm_source;
117 struct vmem_seglist vm_seglist;
118 struct vmem_freelist vm_freelist[VMEM_MAXORDER];
119 size_t vm_hashsize;
120 size_t vm_nbusytag;
121 struct vmem_hashlist *vm_hashlist;
122 size_t vm_quantum_mask;
123 int vm_quantum_shift;
124 const char *vm_name;
125
126 #if defined(QCACHE)
127 /* quantum cache */
128 size_t vm_qcache_max;
129 struct pool_allocator vm_qcache_allocator;
130 qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
131 #endif /* defined(QCACHE) */
132 };
133
134 #define VMEM_LOCK(vm) simple_lock(&vm->vm_lock)
135 #define VMEM_UNLOCK(vm) simple_unlock(&vm->vm_lock)
136 #define VMEM_LOCK_INIT(vm) simple_lock_init(&vm->vm_lock);
137 #define VMEM_ASSERT_LOCKED(vm) \
138 LOCK_ASSERT(simple_lock_held(&vm->vm_lock))
139 #define VMEM_ASSERT_UNLOCKED(vm) \
140 LOCK_ASSERT(!simple_lock_held(&vm->vm_lock))
141
142 /* boundary tag */
143 struct vmem_btag {
144 CIRCLEQ_ENTRY(vmem_btag) bt_seglist;
145 union {
146 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
147 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
148 } bt_u;
149 #define bt_hashlist bt_u.u_hashlist
150 #define bt_freelist bt_u.u_freelist
151 vmem_addr_t bt_start;
152 vmem_size_t bt_size;
153 int bt_type;
154 };
155
156 #define BT_TYPE_SPAN 1
157 #define BT_TYPE_SPAN_STATIC 2
158 #define BT_TYPE_FREE 3
159 #define BT_TYPE_BUSY 4
160 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
161
162 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
163
164 typedef struct vmem_btag bt_t;
165
166 /* ---- misc */
167
168 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
169
170 static int
171 calc_order(vmem_size_t size)
172 {
173 vmem_size_t target;
174 int i;
175
176 KASSERT(size != 0);
177
178 i = 0;
179 target = size >> 1;
180 while (ORDER2SIZE(i) <= target) {
181 i++;
182 }
183
184 KASSERT(ORDER2SIZE(i) <= size);
185 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i));
186
187 return i;
188 }
189
190 #if defined(_KERNEL)
191 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem");
192 #endif /* defined(_KERNEL) */
193
194 static void *
195 xmalloc(size_t sz, vm_flag_t flags)
196 {
197
198 #if defined(_KERNEL)
199 return malloc(sz, M_VMEM,
200 M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT));
201 #else /* defined(_KERNEL) */
202 return malloc(sz);
203 #endif /* defined(_KERNEL) */
204 }
205
206 static void
207 xfree(void *p)
208 {
209
210 #if defined(_KERNEL)
211 return free(p, M_VMEM);
212 #else /* defined(_KERNEL) */
213 return free(p);
214 #endif /* defined(_KERNEL) */
215 }
216
217 /* ---- boundary tag */
218
219 #if defined(_KERNEL)
220 static struct pool_cache bt_poolcache;
221 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL);
222 #endif /* defined(_KERNEL) */
223
224 static bt_t *
225 bt_alloc(vmem_t *vm __unused, vm_flag_t flags)
226 {
227 bt_t *bt;
228
229 #if defined(_KERNEL)
230 /* XXX bootstrap */
231 bt = pool_cache_get(&bt_poolcache,
232 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT);
233 #else /* defined(_KERNEL) */
234 bt = malloc(sizeof *bt);
235 #endif /* defined(_KERNEL) */
236
237 return bt;
238 }
239
240 static void
241 bt_free(vmem_t *vm __unused, bt_t *bt)
242 {
243
244 #if defined(_KERNEL)
245 /* XXX bootstrap */
246 pool_cache_put(&bt_poolcache, bt);
247 #else /* defined(_KERNEL) */
248 free(bt);
249 #endif /* defined(_KERNEL) */
250 }
251
252 /*
253 * freelist[0] ... [1, 1]
254 * freelist[1] ... [2, 3]
255 * freelist[2] ... [4, 7]
256 * freelist[3] ... [8, 15]
257 * :
258 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
259 * :
260 */
261
262 static struct vmem_freelist *
263 bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
264 {
265 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
266 int idx;
267
268 KASSERT((size & vm->vm_quantum_mask) == 0);
269 KASSERT(size != 0);
270
271 idx = calc_order(qsize);
272 KASSERT(idx >= 0);
273 KASSERT(idx < VMEM_MAXORDER);
274
275 return &vm->vm_freelist[idx];
276 }
277
278 static struct vmem_freelist *
279 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
280 {
281 const vmem_size_t qsize = size >> vm->vm_quantum_shift;
282 int idx;
283
284 KASSERT((size & vm->vm_quantum_mask) == 0);
285 KASSERT(size != 0);
286
287 idx = calc_order(qsize);
288 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
289 idx++;
290 /* check too large request? */
291 }
292 KASSERT(idx >= 0);
293 KASSERT(idx < VMEM_MAXORDER);
294
295 return &vm->vm_freelist[idx];
296 }
297
298 /* ---- boundary tag hash */
299
300 static struct vmem_hashlist *
301 bt_hashhead(vmem_t *vm, vmem_addr_t addr)
302 {
303 struct vmem_hashlist *list;
304 unsigned int hash;
305
306 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
307 list = &vm->vm_hashlist[hash % vm->vm_hashsize];
308
309 return list;
310 }
311
312 static bt_t *
313 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
314 {
315 struct vmem_hashlist *list;
316 bt_t *bt;
317
318 list = bt_hashhead(vm, addr);
319 LIST_FOREACH(bt, list, bt_hashlist) {
320 if (bt->bt_start == addr) {
321 break;
322 }
323 }
324
325 return bt;
326 }
327
328 static void
329 bt_rembusy(vmem_t *vm, bt_t *bt)
330 {
331
332 KASSERT(vm->vm_nbusytag > 0);
333 vm->vm_nbusytag--;
334 LIST_REMOVE(bt, bt_hashlist);
335 }
336
337 static void
338 bt_insbusy(vmem_t *vm, bt_t *bt)
339 {
340 struct vmem_hashlist *list;
341
342 KASSERT(bt->bt_type == BT_TYPE_BUSY);
343
344 list = bt_hashhead(vm, bt->bt_start);
345 LIST_INSERT_HEAD(list, bt, bt_hashlist);
346 vm->vm_nbusytag++;
347 }
348
349 /* ---- boundary tag list */
350
351 static void
352 bt_remseg(vmem_t *vm, bt_t *bt)
353 {
354
355 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
356 }
357
358 static void
359 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
360 {
361
362 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
363 }
364
365 static void
366 bt_insseg_tail(vmem_t *vm, bt_t *bt)
367 {
368
369 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
370 }
371
372 static void
373 bt_remfree(vmem_t *vm __unused, bt_t *bt)
374 {
375
376 KASSERT(bt->bt_type == BT_TYPE_FREE);
377
378 LIST_REMOVE(bt, bt_freelist);
379 }
380
381 static void
382 bt_insfree(vmem_t *vm, bt_t *bt)
383 {
384 struct vmem_freelist *list;
385
386 list = bt_freehead_tofree(vm, bt->bt_size);
387 LIST_INSERT_HEAD(list, bt, bt_freelist);
388 }
389
390 /* ---- vmem internal functions */
391
392 #if defined(QCACHE)
393 static inline vm_flag_t
394 prf_to_vmf(int prflags)
395 {
396 vm_flag_t vmflags;
397
398 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
399 if ((prflags & PR_WAITOK) != 0) {
400 vmflags = VM_SLEEP;
401 } else {
402 vmflags = VM_NOSLEEP;
403 }
404 return vmflags;
405 }
406
407 static inline int
408 vmf_to_prf(vm_flag_t vmflags)
409 {
410 int prflags;
411
412 if ((vmflags & VM_SLEEP) != 0) {
413 prflags = PR_WAITOK;
414 } else {
415 prflags = PR_NOWAIT;
416 }
417 return prflags;
418 }
419
420 static size_t
421 qc_poolpage_size(size_t qcache_max)
422 {
423 int i;
424
425 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
426 /* nothing */
427 }
428 return ORDER2SIZE(i);
429 }
430
431 static void *
432 qc_poolpage_alloc(struct pool *pool, int prflags)
433 {
434 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
435 vmem_t *vm = qc->qc_vmem;
436
437 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
438 prf_to_vmf(prflags) | VM_INSTANTFIT);
439 }
440
441 static void
442 qc_poolpage_free(struct pool *pool, void *addr)
443 {
444 qcache_t *qc = QC_POOL_TO_QCACHE(pool);
445 vmem_t *vm = qc->qc_vmem;
446
447 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
448 }
449
450 static void
451 qc_init(vmem_t *vm, size_t qcache_max)
452 {
453 struct pool_allocator *pa;
454 int qcache_idx_max;
455 int i;
456
457 KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
458 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
459 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
460 }
461 vm->vm_qcache_max = qcache_max;
462 pa = &vm->vm_qcache_allocator;
463 memset(pa, 0, sizeof(*pa));
464 pa->pa_alloc = qc_poolpage_alloc;
465 pa->pa_free = qc_poolpage_free;
466 pa->pa_pagesz = qc_poolpage_size(qcache_max);
467
468 qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
469 for (i = 1; i <= qcache_idx_max; i++) {
470 qcache_t *qc = &vm->vm_qcache[i - 1];
471 size_t size = i << vm->vm_quantum_shift;
472
473 qc->qc_vmem = vm;
474 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
475 vm->vm_name, size);
476 pool_init(&qc->qc_pool, size, 0, 0,
477 PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa);
478 pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL);
479 }
480 }
481
482 static boolean_t
483 qc_reap(vmem_t *vm)
484 {
485 int i;
486 int qcache_idx_max;
487 boolean_t didsomething = FALSE;
488
489 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
490 for (i = 1; i <= qcache_idx_max; i++) {
491 qcache_t *qc = &vm->vm_qcache[i - 1];
492
493 if (pool_reclaim(&qc->qc_pool) != 0) {
494 didsomething = TRUE;
495 }
496 }
497
498 return didsomething;
499 }
500 #endif /* defined(QCACHE) */
501
502 #if defined(_KERNEL)
503 static int
504 vmem_init(void)
505 {
506
507 pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL);
508 return 0;
509 }
510 #endif /* defined(_KERNEL) */
511
512 static vmem_addr_t
513 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
514 int spanbttype)
515 {
516 bt_t *btspan;
517 bt_t *btfree;
518
519 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
520 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
521 VMEM_ASSERT_UNLOCKED(vm);
522
523 btspan = bt_alloc(vm, flags);
524 if (btspan == NULL) {
525 return VMEM_ADDR_NULL;
526 }
527 btfree = bt_alloc(vm, flags);
528 if (btfree == NULL) {
529 bt_free(vm, btspan);
530 return VMEM_ADDR_NULL;
531 }
532
533 btspan->bt_type = spanbttype;
534 btspan->bt_start = addr;
535 btspan->bt_size = size;
536
537 btfree->bt_type = BT_TYPE_FREE;
538 btfree->bt_start = addr;
539 btfree->bt_size = size;
540
541 VMEM_LOCK(vm);
542 bt_insseg_tail(vm, btspan);
543 bt_insseg(vm, btfree, btspan);
544 bt_insfree(vm, btfree);
545 VMEM_UNLOCK(vm);
546
547 return addr;
548 }
549
550 static int
551 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
552 {
553 vmem_addr_t addr;
554
555 VMEM_ASSERT_UNLOCKED(vm);
556
557 if (vm->vm_allocfn == NULL) {
558 return EINVAL;
559 }
560
561 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags);
562 if (addr == VMEM_ADDR_NULL) {
563 return ENOMEM;
564 }
565
566 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) {
567 (*vm->vm_freefn)(vm->vm_source, addr, size);
568 return ENOMEM;
569 }
570
571 return 0;
572 }
573
574 static int
575 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
576 {
577 bt_t *bt;
578 int i;
579 struct vmem_hashlist *newhashlist;
580 struct vmem_hashlist *oldhashlist;
581 size_t oldhashsize;
582
583 KASSERT(newhashsize > 0);
584 VMEM_ASSERT_UNLOCKED(vm);
585
586 newhashlist =
587 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags);
588 if (newhashlist == NULL) {
589 return ENOMEM;
590 }
591 for (i = 0; i < newhashsize; i++) {
592 LIST_INIT(&newhashlist[i]);
593 }
594
595 VMEM_LOCK(vm);
596 oldhashlist = vm->vm_hashlist;
597 oldhashsize = vm->vm_hashsize;
598 vm->vm_hashlist = newhashlist;
599 vm->vm_hashsize = newhashsize;
600 if (oldhashlist == NULL) {
601 VMEM_UNLOCK(vm);
602 return 0;
603 }
604 for (i = 0; i < oldhashsize; i++) {
605 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
606 bt_rembusy(vm, bt); /* XXX */
607 bt_insbusy(vm, bt);
608 }
609 }
610 VMEM_UNLOCK(vm);
611
612 xfree(oldhashlist);
613
614 return 0;
615 }
616
617 /*
618 * vmem_fit: check if a bt can satisfy the given restrictions.
619 */
620
621 static vmem_addr_t
622 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase,
623 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr)
624 {
625 vmem_addr_t start;
626 vmem_addr_t end;
627
628 KASSERT(bt->bt_size >= size);
629
630 /*
631 * XXX assumption: vmem_addr_t and vmem_size_t are
632 * unsigned integer of the same size.
633 */
634
635 start = bt->bt_start;
636 if (start < minaddr) {
637 start = minaddr;
638 }
639 end = BT_END(bt);
640 if (end > maxaddr - 1) {
641 end = maxaddr - 1;
642 }
643 if (start >= end) {
644 return VMEM_ADDR_NULL;
645 }
646 start = -(-(start - phase) & -align) + phase;
647 if (start < bt->bt_start) {
648 start += align;
649 }
650 if (((start ^ (start + size - 1)) & -nocross) != 0) {
651 KASSERT(align < nocross);
652 start = -(-(start - phase) & -nocross) + phase;
653 }
654 if (start < end && end - start >= size) {
655 KASSERT((start & (align - 1)) == phase);
656 KASSERT(((start ^ (start + size - 1)) & -nocross) == 0);
657 KASSERT(minaddr <= start);
658 KASSERT(maxaddr == 0 || start + size <= maxaddr);
659 KASSERT(bt->bt_start <= start);
660 KASSERT(start + size <= BT_END(bt));
661 return start;
662 }
663 return VMEM_ADDR_NULL;
664 }
665
666 /* ---- vmem API */
667
668 /*
669 * vmem_create: create an arena.
670 *
671 * => must not be called from interrupt context.
672 */
673
674 vmem_t *
675 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
676 vmem_size_t quantum,
677 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t),
678 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t),
679 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags)
680 {
681 vmem_t *vm;
682 int i;
683 #if defined(_KERNEL)
684 static ONCE_DECL(control);
685 #endif /* defined(_KERNEL) */
686
687 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
688 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
689
690 #if defined(_KERNEL)
691 if (RUN_ONCE(&control, vmem_init)) {
692 return NULL;
693 }
694 #endif /* defined(_KERNEL) */
695 vm = xmalloc(sizeof(*vm), flags);
696 if (vm == NULL) {
697 return NULL;
698 }
699
700 VMEM_LOCK_INIT(vm);
701 vm->vm_name = name;
702 vm->vm_quantum_mask = quantum - 1;
703 vm->vm_quantum_shift = calc_order(quantum);
704 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
705 vm->vm_allocfn = allocfn;
706 vm->vm_freefn = freefn;
707 vm->vm_source = source;
708 vm->vm_nbusytag = 0;
709 #if defined(QCACHE)
710 qc_init(vm, qcache_max);
711 #endif /* defined(QCACHE) */
712
713 CIRCLEQ_INIT(&vm->vm_seglist);
714 for (i = 0; i < VMEM_MAXORDER; i++) {
715 LIST_INIT(&vm->vm_freelist[i]);
716 }
717 vm->vm_hashlist = NULL;
718 if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) {
719 vmem_destroy(vm);
720 return NULL;
721 }
722
723 if (size != 0) {
724 if (vmem_add(vm, base, size, flags) == 0) {
725 vmem_destroy(vm);
726 return NULL;
727 }
728 }
729
730 return vm;
731 }
732
733 void
734 vmem_destroy(vmem_t *vm)
735 {
736
737 VMEM_ASSERT_UNLOCKED(vm);
738
739 if (vm->vm_hashlist != NULL) {
740 int i;
741
742 for (i = 0; i < vm->vm_hashsize; i++) {
743 bt_t *bt;
744
745 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
746 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
747 bt_free(vm, bt);
748 }
749 }
750 xfree(vm->vm_hashlist);
751 }
752 xfree(vm);
753 }
754
755 vmem_size_t
756 vmem_roundup_size(vmem_t *vm, vmem_size_t size)
757 {
758
759 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
760 }
761
762 /*
763 * vmem_alloc:
764 *
765 * => caller must ensure appropriate spl,
766 * if the arena can be accessed from interrupt context.
767 */
768
769 vmem_addr_t
770 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags)
771 {
772 const vmem_size_t size __unused = vmem_roundup_size(vm, size0);
773 const vm_flag_t strat __unused = flags & VM_FITMASK;
774
775 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
776 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
777 VMEM_ASSERT_UNLOCKED(vm);
778
779 KASSERT(size0 > 0);
780 KASSERT(size > 0);
781 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
782 if ((flags & VM_SLEEP) != 0) {
783 ASSERT_SLEEPABLE(NULL, "vmem_alloc");
784 }
785
786 #if defined(QCACHE)
787 if (size <= vm->vm_qcache_max) {
788 int qidx = size >> vm->vm_quantum_shift;
789 qcache_t *qc = &vm->vm_qcache[qidx - 1];
790
791 return (vmem_addr_t)pool_cache_get(&qc->qc_cache,
792 vmf_to_prf(flags));
793 }
794 #endif /* defined(QCACHE) */
795
796 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags);
797 }
798
799 vmem_addr_t
800 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase,
801 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr,
802 vm_flag_t flags)
803 {
804 struct vmem_freelist *list;
805 struct vmem_freelist *first;
806 struct vmem_freelist *end;
807 bt_t *bt;
808 bt_t *btnew;
809 bt_t *btnew2;
810 const vmem_size_t size = vmem_roundup_size(vm, size0);
811 vm_flag_t strat = flags & VM_FITMASK;
812 vmem_addr_t start;
813
814 KASSERT(size0 > 0);
815 KASSERT(size > 0);
816 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
817 if ((flags & VM_SLEEP) != 0) {
818 ASSERT_SLEEPABLE(NULL, "vmem_alloc");
819 }
820 KASSERT((align & vm->vm_quantum_mask) == 0);
821 KASSERT((align & (align - 1)) == 0);
822 KASSERT((phase & vm->vm_quantum_mask) == 0);
823 KASSERT((nocross & vm->vm_quantum_mask) == 0);
824 KASSERT((nocross & (nocross - 1)) == 0);
825 KASSERT((align == 0 && phase == 0) || phase < align);
826 KASSERT(nocross == 0 || nocross >= size);
827 KASSERT(maxaddr == 0 || minaddr < maxaddr);
828 KASSERT(((phase ^ (phase + size - 1)) & -nocross) == 0);
829
830 if (align == 0) {
831 align = vm->vm_quantum_mask + 1;
832 }
833 btnew = bt_alloc(vm, flags);
834 if (btnew == NULL) {
835 return VMEM_ADDR_NULL;
836 }
837 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */
838 if (btnew2 == NULL) {
839 bt_free(vm, btnew);
840 return VMEM_ADDR_NULL;
841 }
842
843 retry_strat:
844 first = bt_freehead_toalloc(vm, size, strat);
845 end = &vm->vm_freelist[VMEM_MAXORDER];
846 retry:
847 bt = NULL;
848 VMEM_LOCK(vm);
849 if (strat == VM_INSTANTFIT) {
850 for (list = first; list < end; list++) {
851 bt = LIST_FIRST(list);
852 if (bt != NULL) {
853 start = vmem_fit(bt, size, align, phase,
854 nocross, minaddr, maxaddr);
855 if (start != VMEM_ADDR_NULL) {
856 goto gotit;
857 }
858 }
859 }
860 } else { /* VM_BESTFIT */
861 for (list = first; list < end; list++) {
862 LIST_FOREACH(bt, list, bt_freelist) {
863 if (bt->bt_size >= size) {
864 start = vmem_fit(bt, size, align, phase,
865 nocross, minaddr, maxaddr);
866 if (start != VMEM_ADDR_NULL) {
867 goto gotit;
868 }
869 }
870 }
871 }
872 }
873 VMEM_UNLOCK(vm);
874 #if 1
875 if (strat == VM_INSTANTFIT) {
876 strat = VM_BESTFIT;
877 goto retry_strat;
878 }
879 #endif
880 if (align != vm->vm_quantum_mask + 1 || phase != 0 ||
881 nocross != 0 || minaddr != 0 || maxaddr != 0) {
882
883 /*
884 * XXX should try to import a region large enough to
885 * satisfy restrictions?
886 */
887
888 return VMEM_ADDR_NULL;
889 }
890 if (vmem_import(vm, size, flags) == 0) {
891 goto retry;
892 }
893 /* XXX */
894 return VMEM_ADDR_NULL;
895
896 gotit:
897 KASSERT(bt->bt_type == BT_TYPE_FREE);
898 KASSERT(bt->bt_size >= size);
899 bt_remfree(vm, bt);
900 if (bt->bt_start != start) {
901 btnew2->bt_type = BT_TYPE_FREE;
902 btnew2->bt_start = bt->bt_start;
903 btnew2->bt_size = start - bt->bt_start;
904 bt->bt_start = start;
905 bt->bt_size -= btnew2->bt_size;
906 bt_insfree(vm, btnew2);
907 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist));
908 btnew2 = NULL;
909 }
910 KASSERT(bt->bt_start == start);
911 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
912 /* split */
913 btnew->bt_type = BT_TYPE_BUSY;
914 btnew->bt_start = bt->bt_start;
915 btnew->bt_size = size;
916 bt->bt_start = bt->bt_start + size;
917 bt->bt_size -= size;
918 bt_insfree(vm, bt);
919 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist));
920 bt_insbusy(vm, btnew);
921 VMEM_UNLOCK(vm);
922 } else {
923 bt->bt_type = BT_TYPE_BUSY;
924 bt_insbusy(vm, bt);
925 VMEM_UNLOCK(vm);
926 bt_free(vm, btnew);
927 btnew = bt;
928 }
929 if (btnew2 != NULL) {
930 bt_free(vm, btnew2);
931 }
932 KASSERT(btnew->bt_size >= size);
933 btnew->bt_type = BT_TYPE_BUSY;
934
935 return btnew->bt_start;
936 }
937
938 /*
939 * vmem_free:
940 *
941 * => caller must ensure appropriate spl,
942 * if the arena can be accessed from interrupt context.
943 */
944
945 void
946 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
947 {
948
949 VMEM_ASSERT_UNLOCKED(vm);
950 KASSERT(addr != VMEM_ADDR_NULL);
951 KASSERT(size > 0);
952
953 #if defined(QCACHE)
954 if (size <= vm->vm_qcache_max) {
955 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
956 qcache_t *qc = &vm->vm_qcache[qidx - 1];
957
958 return pool_cache_put(&qc->qc_cache, (void *)addr);
959 }
960 #endif /* defined(QCACHE) */
961
962 vmem_xfree(vm, addr, size);
963 }
964
965 void
966 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size __unused)
967 {
968 bt_t *bt;
969 bt_t *t;
970
971 VMEM_ASSERT_UNLOCKED(vm);
972 KASSERT(addr != VMEM_ADDR_NULL);
973 KASSERT(size > 0);
974
975 VMEM_LOCK(vm);
976
977 bt = bt_lookupbusy(vm, addr);
978 KASSERT(bt != NULL);
979 KASSERT(bt->bt_start == addr);
980 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
981 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
982 KASSERT(bt->bt_type == BT_TYPE_BUSY);
983 bt_rembusy(vm, bt);
984 bt->bt_type = BT_TYPE_FREE;
985
986 /* coalesce */
987 t = CIRCLEQ_NEXT(bt, bt_seglist);
988 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
989 KASSERT(BT_END(bt) == t->bt_start);
990 bt_remfree(vm, t);
991 bt_remseg(vm, t);
992 bt->bt_size += t->bt_size;
993 bt_free(vm, t);
994 }
995 t = CIRCLEQ_PREV(bt, bt_seglist);
996 if (t != NULL && t->bt_type == BT_TYPE_FREE) {
997 KASSERT(BT_END(t) == bt->bt_start);
998 bt_remfree(vm, t);
999 bt_remseg(vm, t);
1000 bt->bt_size += t->bt_size;
1001 bt->bt_start = t->bt_start;
1002 bt_free(vm, t);
1003 }
1004
1005 t = CIRCLEQ_PREV(bt, bt_seglist);
1006 KASSERT(t != NULL);
1007 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1008 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1009 t->bt_size == bt->bt_size) {
1010 vmem_addr_t spanaddr;
1011 vmem_size_t spansize;
1012
1013 KASSERT(t->bt_start == bt->bt_start);
1014 spanaddr = bt->bt_start;
1015 spansize = bt->bt_size;
1016 bt_remseg(vm, bt);
1017 bt_free(vm, bt);
1018 bt_remseg(vm, t);
1019 bt_free(vm, t);
1020 VMEM_UNLOCK(vm);
1021 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize);
1022 } else {
1023 bt_insfree(vm, bt);
1024 VMEM_UNLOCK(vm);
1025 }
1026 }
1027
1028 /*
1029 * vmem_add:
1030 *
1031 * => caller must ensure appropriate spl,
1032 * if the arena can be accessed from interrupt context.
1033 */
1034
1035 vmem_addr_t
1036 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
1037 {
1038
1039 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
1040 }
1041
1042 /*
1043 * vmem_reap: reap unused resources.
1044 *
1045 * => return TRUE if we successfully reaped something.
1046 */
1047
1048 boolean_t
1049 vmem_reap(vmem_t *vm)
1050 {
1051 boolean_t didsomething = FALSE;
1052
1053 VMEM_ASSERT_UNLOCKED(vm);
1054
1055 #if defined(QCACHE)
1056 didsomething = qc_reap(vm);
1057 #endif /* defined(QCACHE) */
1058 return didsomething;
1059 }
1060
1061 /* ---- debug */
1062
1063 #if defined(VMEM_DEBUG)
1064
1065 #if !defined(_KERNEL)
1066 #include <stdio.h>
1067 #endif /* !defined(_KERNEL) */
1068
1069 void bt_dump(const bt_t *);
1070
1071 void
1072 bt_dump(const bt_t *bt)
1073 {
1074
1075 printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n",
1076 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
1077 bt->bt_type);
1078 }
1079
1080 void
1081 vmem_dump(const vmem_t *vm)
1082 {
1083 const bt_t *bt;
1084 int i;
1085
1086 printf("vmem %p '%s'\n", vm, vm->vm_name);
1087 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1088 bt_dump(bt);
1089 }
1090
1091 for (i = 0; i < VMEM_MAXORDER; i++) {
1092 const struct vmem_freelist *fl = &vm->vm_freelist[i];
1093
1094 if (LIST_EMPTY(fl)) {
1095 continue;
1096 }
1097
1098 printf("freelist[%d]\n", i);
1099 LIST_FOREACH(bt, fl, bt_freelist) {
1100 bt_dump(bt);
1101 if (bt->bt_size) {
1102 }
1103 }
1104 }
1105 }
1106
1107 #if !defined(_KERNEL)
1108
1109 #include <stdlib.h>
1110
1111 int
1112 main()
1113 {
1114 vmem_t *vm;
1115 vmem_addr_t p;
1116 struct reg {
1117 vmem_addr_t p;
1118 vmem_size_t sz;
1119 boolean_t x;
1120 } *reg = NULL;
1121 int nreg = 0;
1122 int nalloc = 0;
1123 int nfree = 0;
1124 vmem_size_t total = 0;
1125 #if 1
1126 vm_flag_t strat = VM_INSTANTFIT;
1127 #else
1128 vm_flag_t strat = VM_BESTFIT;
1129 #endif
1130
1131 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1,
1132 NULL, NULL, NULL, 0, VM_NOSLEEP);
1133 if (vm == NULL) {
1134 printf("vmem_create\n");
1135 exit(EXIT_FAILURE);
1136 }
1137 vmem_dump(vm);
1138
1139 p = vmem_add(vm, 100, 200, VM_SLEEP);
1140 p = vmem_add(vm, 2000, 1, VM_SLEEP);
1141 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP);
1142 p = vmem_add(vm, 10000, 10000, VM_SLEEP);
1143 p = vmem_add(vm, 500, 1000, VM_SLEEP);
1144 vmem_dump(vm);
1145 for (;;) {
1146 struct reg *r;
1147 int t = rand() % 100;
1148
1149 if (t > 45) {
1150 /* alloc */
1151 vmem_size_t sz = rand() % 500 + 1;
1152 boolean_t x;
1153 vmem_size_t align, phase, nocross;
1154 vmem_addr_t minaddr, maxaddr;
1155
1156 if (t > 70) {
1157 x = TRUE;
1158 /* XXX */
1159 align = 1 << (rand() % 15);
1160 phase = rand() % 65536;
1161 nocross = 1 << (rand() % 15);
1162 if (align <= phase) {
1163 phase = 0;
1164 }
1165 if (((phase ^ (phase + sz)) & -nocross) != 0) {
1166 nocross = 0;
1167 }
1168 minaddr = rand() % 50000;
1169 maxaddr = rand() % 70000;
1170 if (minaddr > maxaddr) {
1171 minaddr = 0;
1172 maxaddr = 0;
1173 }
1174 printf("=== xalloc %" PRIu64
1175 " align=%" PRIu64 ", phase=%" PRIu64
1176 ", nocross=%" PRIu64 ", min=%" PRIu64
1177 ", max=%" PRIu64 "\n",
1178 (uint64_t)sz,
1179 (uint64_t)align,
1180 (uint64_t)phase,
1181 (uint64_t)nocross,
1182 (uint64_t)minaddr,
1183 (uint64_t)maxaddr);
1184 p = vmem_xalloc(vm, sz, align, phase, nocross,
1185 minaddr, maxaddr, strat|VM_SLEEP);
1186 } else {
1187 x = FALSE;
1188 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
1189 p = vmem_alloc(vm, sz, strat|VM_SLEEP);
1190 }
1191 printf("-> %" PRIu64 "\n", (uint64_t)p);
1192 vmem_dump(vm);
1193 if (p == VMEM_ADDR_NULL) {
1194 if (x) {
1195 continue;
1196 }
1197 break;
1198 }
1199 nreg++;
1200 reg = realloc(reg, sizeof(*reg) * nreg);
1201 r = ®[nreg - 1];
1202 r->p = p;
1203 r->sz = sz;
1204 r->x = x;
1205 total += sz;
1206 nalloc++;
1207 } else if (nreg != 0) {
1208 /* free */
1209 r = ®[rand() % nreg];
1210 printf("=== free %" PRIu64 ", %" PRIu64 "\n",
1211 (uint64_t)r->p, (uint64_t)r->sz);
1212 if (r->x) {
1213 vmem_xfree(vm, r->p, r->sz);
1214 } else {
1215 vmem_free(vm, r->p, r->sz);
1216 }
1217 total -= r->sz;
1218 vmem_dump(vm);
1219 *r = reg[nreg - 1];
1220 nreg--;
1221 nfree++;
1222 }
1223 printf("total=%" PRIu64 "\n", (uint64_t)total);
1224 }
1225 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
1226 (uint64_t)total, nalloc, nfree);
1227 exit(EXIT_SUCCESS);
1228 }
1229 #endif /* !defined(_KERNEL) */
1230 #endif /* defined(VMEM_DEBUG) */
1231