subr_extent.c revision 1.73 1 /* $NetBSD: subr_extent.c,v 1.73 2012/01/27 18:53:09 para Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1998, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Matthias Drochner.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * General purpose extent manager.
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: subr_extent.c,v 1.73 2012/01/27 18:53:09 para Exp $");
38
39 #ifdef _KERNEL
40 #include "opt_lockdebug.h"
41
42 #include <sys/param.h>
43 #include <sys/extent.h>
44 #include <sys/kmem.h>
45 #include <sys/pool.h>
46 #include <sys/time.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49
50 #include <uvm/uvm_extern.h>
51
52 #define KMEM_IS_RUNNING (kmem_map != NULL)
53 #elif defined(_EXTENT_TESTING)
54 /*
55 * user-land definitions, so it can fit into a testing harness.
56 */
57 #include <sys/param.h>
58 #include <sys/pool.h>
59 #include <sys/extent.h>
60
61 #include <errno.h>
62 #include <stdlib.h>
63 #include <stdio.h>
64 #include <string.h>
65
66 /*
67 * Use multi-line #defines to avoid screwing up the kernel tags file;
68 * without this, ctags produces a tags file where panic() shows up
69 * in subr_extent.c rather than subr_prf.c.
70 */
71 #define \
72 kmem_alloc(s, flags) malloc(s)
73 #define \
74 kmem_free(p, s) free(p)
75 #define \
76 cv_wait_sig(cv, lock) (EWOULDBLOCK)
77 #define \
78 pool_get(pool, flags) kmem_alloc((pool)->pr_size,0)
79 #define \
80 pool_put(pool, rp) kmem_free(rp,0)
81 #define \
82 panic(a) printf(a)
83 #define mutex_init(a, b, c)
84 #define mutex_destroy(a)
85 #define mutex_enter(l)
86 #define mutex_exit(l)
87 #define cv_wait(cv, lock)
88 #define cv_broadcast(cv)
89 #define cv_init(a, b)
90 #define cv_destroy(a)
91 #define KMEM_IS_RUNNING (1)
92 #define IPL_VM (0)
93 #define MUTEX_DEFAULT (0)
94 #endif
95
96 static struct pool expool;
97
98 /*
99 * Macro to align to an arbitrary power-of-two boundary.
100 */
101 #define EXTENT_ALIGN(_start, _align, _skew) \
102 (((((_start) - (_skew)) + ((_align) - 1)) & (-(_align))) + (_skew))
103
104 /*
105 * Create the extent_region pool.
106 */
107 void
108 extent_init(void)
109 {
110
111 #if defined(_KERNEL)
112 pool_init(&expool, sizeof(struct extent_region), 0, 0, 0,
113 "extent", NULL, IPL_VM);
114 #else
115 expool.pr_size = sizeof(struct extent_region);
116 #endif
117 }
118
119 /*
120 * Allocate an extent region descriptor. EXTENT MUST NOT BE LOCKED.
121 * We will handle any locking we may need.
122 */
123 static struct extent_region *
124 extent_alloc_region_descriptor(struct extent *ex, int flags)
125 {
126 struct extent_region *rp;
127 int exflags, error;
128
129 /*
130 * If the kernel memory allocator is not yet running, we can't
131 * use it (obviously).
132 */
133 if (KMEM_IS_RUNNING == 0)
134 flags &= ~EX_MALLOCOK;
135
136 /*
137 * XXX Make a static, create-time flags word, so we don't
138 * XXX have to lock to read it!
139 */
140 mutex_enter(&ex->ex_lock);
141 exflags = ex->ex_flags;
142 mutex_exit(&ex->ex_lock);
143
144 if (exflags & EXF_FIXED) {
145 struct extent_fixed *fex = (struct extent_fixed *)ex;
146
147 mutex_enter(&ex->ex_lock);
148 for (;;) {
149 if ((rp = LIST_FIRST(&fex->fex_freelist)) != NULL) {
150 /*
151 * Don't muck with flags after pulling it off
152 * the freelist; it may have been dynamically
153 * allocated, and kindly given to us. We
154 * need to remember that information.
155 */
156 LIST_REMOVE(rp, er_link);
157 mutex_exit(&ex->ex_lock);
158 return (rp);
159 }
160 if (flags & EX_MALLOCOK) {
161 mutex_exit(&ex->ex_lock);
162 goto alloc;
163 }
164 if ((flags & EX_WAITOK) == 0) {
165 mutex_exit(&ex->ex_lock);
166 return (NULL);
167 }
168 ex->ex_flags |= EXF_FLWANTED;
169 if ((flags & EX_CATCH) != 0)
170 error = cv_wait_sig(&ex->ex_cv, &ex->ex_lock);
171 else {
172 cv_wait(&ex->ex_cv, &ex->ex_lock);
173 error = 0;
174 }
175 if (error != 0) {
176 mutex_exit(&ex->ex_lock);
177 return (NULL);
178 }
179 }
180 }
181
182 alloc:
183 rp = pool_get(&expool, (flags & EX_WAITOK) ? PR_WAITOK : 0);
184
185 if (rp != NULL)
186 rp->er_flags = ER_ALLOC;
187
188 return (rp);
189 }
190
191 /*
192 * Free an extent region descriptor. EXTENT _MUST_ BE LOCKED!
193 */
194 static void
195 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
196 {
197
198 if (ex->ex_flags & EXF_FIXED) {
199 struct extent_fixed *fex = (struct extent_fixed *)ex;
200
201 /*
202 * If someone's waiting for a region descriptor,
203 * be nice and give them this one, rather than
204 * just free'ing it back to the system.
205 */
206 if (rp->er_flags & ER_ALLOC) {
207 if (ex->ex_flags & EXF_FLWANTED) {
208 /* Clear all but ER_ALLOC flag. */
209 rp->er_flags = ER_ALLOC;
210 LIST_INSERT_HEAD(&fex->fex_freelist, rp,
211 er_link);
212 goto wake_em_up;
213 } else
214 pool_put(&expool, rp);
215 } else {
216 /* Clear all flags. */
217 rp->er_flags = 0;
218 LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
219 }
220
221 wake_em_up:
222 ex->ex_flags &= ~EXF_FLWANTED;
223 cv_broadcast(&ex->ex_cv);
224 return;
225 }
226
227 /*
228 * We know it's dynamically allocated if we get here.
229 */
230 pool_put(&expool, rp);
231 }
232
233 /*
234 * Allocate and initialize an extent map.
235 */
236 struct extent *
237 extent_create(const char *name, u_long start, u_long end,
238 void *storage, size_t storagesize, int flags)
239 {
240 struct extent *ex;
241 char *cp = storage;
242 size_t sz = storagesize;
243 struct extent_region *rp;
244 int fixed_extent = (storage != NULL);
245
246 #ifndef _KERNEL
247 extent_init();
248 #endif
249
250 #ifdef DIAGNOSTIC
251 /* Check arguments. */
252 if (name == NULL)
253 panic("extent_create: name == NULL");
254 if (end < start) {
255 printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
256 name, start, end);
257 panic("extent_create: end < start");
258 }
259 if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
260 panic("extent_create: fixed extent, bad storagesize 0x%lx",
261 (u_long)storagesize);
262 if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
263 panic("extent_create: storage provided for non-fixed");
264 #endif
265
266 /* Allocate extent descriptor. */
267 if (fixed_extent) {
268 struct extent_fixed *fex;
269
270 memset(storage, 0, storagesize);
271
272 /*
273 * Align all descriptors on "long" boundaries.
274 */
275 fex = (struct extent_fixed *)cp;
276 ex = (struct extent *)fex;
277 cp += ALIGN(sizeof(struct extent_fixed));
278 sz -= ALIGN(sizeof(struct extent_fixed));
279 fex->fex_storage = storage;
280 fex->fex_storagesize = storagesize;
281
282 /*
283 * In a fixed extent, we have to pre-allocate region
284 * descriptors and place them in the extent's freelist.
285 */
286 LIST_INIT(&fex->fex_freelist);
287 while (sz >= ALIGN(sizeof(struct extent_region))) {
288 rp = (struct extent_region *)cp;
289 cp += ALIGN(sizeof(struct extent_region));
290 sz -= ALIGN(sizeof(struct extent_region));
291 LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
292 }
293 } else {
294 ex = (struct extent *)kmem_alloc(sizeof(struct extent),
295 (flags & EX_WAITOK) ? KM_SLEEP : KM_NOSLEEP);
296 if (ex == NULL)
297 return (NULL);
298 }
299
300 /* Fill in the extent descriptor and return it to the caller. */
301 mutex_init(&ex->ex_lock, MUTEX_DEFAULT, IPL_VM);
302 cv_init(&ex->ex_cv, "extent");
303 LIST_INIT(&ex->ex_regions);
304 ex->ex_name = name;
305 ex->ex_start = start;
306 ex->ex_end = end;
307 ex->ex_flags = 0;
308 if (fixed_extent)
309 ex->ex_flags |= EXF_FIXED;
310 if (flags & EX_NOCOALESCE)
311 ex->ex_flags |= EXF_NOCOALESCE;
312 return (ex);
313 }
314
315 /*
316 * Destroy an extent map.
317 * Since we're freeing the data, there can't be any references
318 * so we don't need any locking.
319 */
320 void
321 extent_destroy(struct extent *ex)
322 {
323 struct extent_region *rp, *orp;
324
325 #ifdef DIAGNOSTIC
326 /* Check arguments. */
327 if (ex == NULL)
328 panic("extent_destroy: NULL extent");
329 #endif
330
331 /* Free all region descriptors in extent. */
332 for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
333 orp = rp;
334 rp = LIST_NEXT(rp, er_link);
335 LIST_REMOVE(orp, er_link);
336 extent_free_region_descriptor(ex, orp);
337 }
338
339 cv_destroy(&ex->ex_cv);
340 mutex_destroy(&ex->ex_lock);
341
342 /* If we're not a fixed extent, free the extent descriptor itself. */
343 if ((ex->ex_flags & EXF_FIXED) == 0)
344 kmem_free(ex, sizeof(*ex));
345 }
346
347 /*
348 * Insert a region descriptor into the sorted region list after the
349 * entry "after" or at the head of the list (if "after" is NULL).
350 * The region descriptor we insert is passed in "rp". We must
351 * allocate the region descriptor before calling this function!
352 * If we don't need the region descriptor, it will be freed here.
353 */
354 static void
355 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
356 int flags, struct extent_region *after, struct extent_region *rp)
357 {
358 struct extent_region *nextr;
359 int appended = 0;
360
361 if (after == NULL) {
362 /*
363 * We're the first in the region list. If there's
364 * a region after us, attempt to coalesce to save
365 * descriptor overhead.
366 */
367 if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
368 (LIST_FIRST(&ex->ex_regions) != NULL) &&
369 ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
370 /*
371 * We can coalesce. Prepend us to the first region.
372 */
373 LIST_FIRST(&ex->ex_regions)->er_start = start;
374 extent_free_region_descriptor(ex, rp);
375 return;
376 }
377
378 /*
379 * Can't coalesce. Fill in the region descriptor
380 * in, and insert us at the head of the region list.
381 */
382 rp->er_start = start;
383 rp->er_end = start + (size - 1);
384 LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
385 return;
386 }
387
388 /*
389 * If EXF_NOCOALESCE is set, coalescing is disallowed.
390 */
391 if (ex->ex_flags & EXF_NOCOALESCE)
392 goto cant_coalesce;
393
394 /*
395 * Attempt to coalesce with the region before us.
396 */
397 if ((after->er_end + 1) == start) {
398 /*
399 * We can coalesce. Append ourselves and make
400 * note of it.
401 */
402 after->er_end = start + (size - 1);
403 appended = 1;
404 }
405
406 /*
407 * Attempt to coalesce with the region after us.
408 */
409 if ((LIST_NEXT(after, er_link) != NULL) &&
410 ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
411 /*
412 * We can coalesce. Note that if we appended ourselves
413 * to the previous region, we exactly fit the gap, and
414 * can free the "next" region descriptor.
415 */
416 if (appended) {
417 /*
418 * Yup, we can free it up.
419 */
420 after->er_end = LIST_NEXT(after, er_link)->er_end;
421 nextr = LIST_NEXT(after, er_link);
422 LIST_REMOVE(nextr, er_link);
423 extent_free_region_descriptor(ex, nextr);
424 } else {
425 /*
426 * Nope, just prepend us to the next region.
427 */
428 LIST_NEXT(after, er_link)->er_start = start;
429 }
430
431 extent_free_region_descriptor(ex, rp);
432 return;
433 }
434
435 /*
436 * We weren't able to coalesce with the next region, but
437 * we don't need to allocate a region descriptor if we
438 * appended ourselves to the previous region.
439 */
440 if (appended) {
441 extent_free_region_descriptor(ex, rp);
442 return;
443 }
444
445 cant_coalesce:
446
447 /*
448 * Fill in the region descriptor and insert ourselves
449 * into the region list.
450 */
451 rp->er_start = start;
452 rp->er_end = start + (size - 1);
453 LIST_INSERT_AFTER(after, rp, er_link);
454 }
455
456 /*
457 * Allocate a specific region in an extent map.
458 */
459 int
460 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
461 {
462 struct extent_region *rp, *last, *myrp;
463 u_long end = start + (size - 1);
464 int error;
465
466 #ifdef DIAGNOSTIC
467 /* Check arguments. */
468 if (ex == NULL)
469 panic("extent_alloc_region: NULL extent");
470 if (size < 1) {
471 printf("extent_alloc_region: extent `%s', size 0x%lx\n",
472 ex->ex_name, size);
473 panic("extent_alloc_region: bad size");
474 }
475 if (end < start) {
476 printf(
477 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
478 ex->ex_name, start, size);
479 panic("extent_alloc_region: overflow");
480 }
481 #endif
482 #ifdef LOCKDEBUG
483 if (flags & EX_WAITSPACE) {
484 ASSERT_SLEEPABLE();
485 }
486 #endif
487
488 /*
489 * Make sure the requested region lies within the
490 * extent.
491 *
492 * We don't lock to check the range, because those values
493 * are never modified, and if another thread deletes the
494 * extent, we're screwed anyway.
495 */
496 if ((start < ex->ex_start) || (end > ex->ex_end)) {
497 #ifdef DIAGNOSTIC
498 printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
499 ex->ex_name, ex->ex_start, ex->ex_end);
500 printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
501 start, end);
502 panic("extent_alloc_region: region lies outside extent");
503 #else
504 return (EINVAL);
505 #endif
506 }
507
508 /*
509 * Allocate the region descriptor. It will be freed later
510 * if we can coalesce with another region. Don't lock before
511 * here! This could block.
512 */
513 myrp = extent_alloc_region_descriptor(ex, flags);
514 if (myrp == NULL) {
515 #ifdef DIAGNOSTIC
516 printf(
517 "extent_alloc_region: can't allocate region descriptor\n");
518 #endif
519 return (ENOMEM);
520 }
521
522 mutex_enter(&ex->ex_lock);
523 alloc_start:
524
525 /*
526 * Attempt to place ourselves in the desired area of the
527 * extent. We save ourselves some work by keeping the list sorted.
528 * In other words, if the start of the current region is greater
529 * than the end of our region, we don't have to search any further.
530 */
531
532 /*
533 * Keep a pointer to the last region we looked at so
534 * that we don't have to traverse the list again when
535 * we insert ourselves. If "last" is NULL when we
536 * finally insert ourselves, we go at the head of the
537 * list. See extent_insert_and_optimize() for details.
538 */
539 last = NULL;
540
541 LIST_FOREACH(rp, &ex->ex_regions, er_link) {
542 if (rp->er_start > end) {
543 /*
544 * We lie before this region and don't
545 * conflict.
546 */
547 break;
548 }
549
550 /*
551 * The current region begins before we end.
552 * Check for a conflict.
553 */
554 if (rp->er_end >= start) {
555 /*
556 * We conflict. If we can (and want to) wait,
557 * do so.
558 */
559 if (flags & EX_WAITSPACE) {
560 if ((flags & EX_CATCH) != 0)
561 error = cv_wait_sig(&ex->ex_cv,
562 &ex->ex_lock);
563 else {
564 cv_wait(&ex->ex_cv, &ex->ex_lock);
565 error = 0;
566 }
567 if (error == 0)
568 goto alloc_start;
569 mutex_exit(&ex->ex_lock);
570 } else {
571 mutex_exit(&ex->ex_lock);
572 error = EAGAIN;
573 }
574 extent_free_region_descriptor(ex, myrp);
575 return error;
576 }
577 /*
578 * We don't conflict, but this region lies before
579 * us. Keep a pointer to this region, and keep
580 * trying.
581 */
582 last = rp;
583 }
584
585 /*
586 * We don't conflict with any regions. "last" points
587 * to the region we fall after, or is NULL if we belong
588 * at the beginning of the region list. Insert ourselves.
589 */
590 extent_insert_and_optimize(ex, start, size, flags, last, myrp);
591 mutex_exit(&ex->ex_lock);
592 return (0);
593 }
594
595 /*
596 * Macro to check (x + y) <= z. This check is designed to fail
597 * if an overflow occurs.
598 */
599 #define LE_OV(x, y, z) ((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
600
601 /*
602 * Allocate a region in an extent map subregion.
603 *
604 * If EX_FAST is specified, we return the first fit in the map.
605 * Otherwise, we try to minimize fragmentation by finding the
606 * smallest gap that will hold the request.
607 *
608 * The allocated region is aligned to "alignment", which must be
609 * a power of 2.
610 */
611 int
612 extent_alloc_subregion1(struct extent *ex, u_long substart, u_long subend,
613 u_long size, u_long alignment, u_long skew, u_long boundary,
614 int flags, u_long *result)
615 {
616 struct extent_region *rp, *myrp, *last, *bestlast;
617 u_long newstart, newend, exend, beststart, bestovh, ovh;
618 u_long dontcross;
619 int error;
620
621 #ifdef DIAGNOSTIC
622 /*
623 * Check arguments.
624 *
625 * We don't lock to check these, because these values
626 * are never modified, and if another thread deletes the
627 * extent, we're screwed anyway.
628 */
629 if (ex == NULL)
630 panic("extent_alloc_subregion: NULL extent");
631 if (result == NULL)
632 panic("extent_alloc_subregion: NULL result pointer");
633 if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
634 (subend > ex->ex_end) || (subend < ex->ex_start)) {
635 printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
636 ex->ex_name, ex->ex_start, ex->ex_end);
637 printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
638 substart, subend);
639 panic("extent_alloc_subregion: bad subregion");
640 }
641 if ((size < 1) || ((size - 1) > (subend - substart))) {
642 printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
643 ex->ex_name, size);
644 panic("extent_alloc_subregion: bad size");
645 }
646 if (alignment == 0)
647 panic("extent_alloc_subregion: bad alignment");
648 if (boundary && (boundary < size)) {
649 printf(
650 "extent_alloc_subregion: extent `%s', size 0x%lx, "
651 "boundary 0x%lx\n", ex->ex_name, size, boundary);
652 panic("extent_alloc_subregion: bad boundary");
653 }
654 #endif
655 #ifdef LOCKDEBUG
656 if (flags & EX_WAITSPACE) {
657 ASSERT_SLEEPABLE();
658 }
659 #endif
660
661 /*
662 * Allocate the region descriptor. It will be freed later
663 * if we can coalesce with another region. Don't lock before
664 * here! This could block.
665 */
666 myrp = extent_alloc_region_descriptor(ex, flags);
667 if (myrp == NULL) {
668 #ifdef DIAGNOSTIC
669 printf(
670 "extent_alloc_subregion: can't allocate region descriptor\n");
671 #endif
672 return (ENOMEM);
673 }
674
675 alloc_start:
676 mutex_enter(&ex->ex_lock);
677
678 /*
679 * Keep a pointer to the last region we looked at so
680 * that we don't have to traverse the list again when
681 * we insert ourselves. If "last" is NULL when we
682 * finally insert ourselves, we go at the head of the
683 * list. See extent_insert_and_optimize() for deatails.
684 */
685 last = NULL;
686
687 /*
688 * Keep track of size and location of the smallest
689 * chunk we fit in.
690 *
691 * Since the extent can be as large as the numeric range
692 * of the CPU (0 - 0xffffffff for 32-bit systems), the
693 * best overhead value can be the maximum unsigned integer.
694 * Thus, we initialize "bestovh" to 0, since we insert ourselves
695 * into the region list immediately on an exact match (which
696 * is the only case where "bestovh" would be set to 0).
697 */
698 bestovh = 0;
699 beststart = 0;
700 bestlast = NULL;
701
702 /*
703 * Keep track of end of free region. This is either the end of extent
704 * or the start of a region past the subend.
705 */
706 exend = ex->ex_end;
707
708 /*
709 * For N allocated regions, we must make (N + 1)
710 * checks for unallocated space. The first chunk we
711 * check is the area from the beginning of the subregion
712 * to the first allocated region after that point.
713 */
714 newstart = EXTENT_ALIGN(substart, alignment, skew);
715 if (newstart < ex->ex_start) {
716 #ifdef DIAGNOSTIC
717 printf(
718 "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
719 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
720 mutex_exit(&ex->ex_lock);
721 panic("extent_alloc_subregion: overflow after alignment");
722 #else
723 extent_free_region_descriptor(ex, myrp);
724 mutex_exit(&ex->ex_lock);
725 return (EINVAL);
726 #endif
727 }
728
729 /*
730 * Find the first allocated region that begins on or after
731 * the subregion start, advancing the "last" pointer along
732 * the way.
733 */
734 LIST_FOREACH(rp, &ex->ex_regions, er_link) {
735 if (rp->er_start >= newstart)
736 break;
737 last = rp;
738 }
739
740 /*
741 * Relocate the start of our candidate region to the end of
742 * the last allocated region (if there was one overlapping
743 * our subrange).
744 */
745 if (last != NULL && last->er_end >= newstart)
746 newstart = EXTENT_ALIGN((last->er_end + 1), alignment, skew);
747
748 for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
749 /*
750 * If the region pasts the subend, bail out and see
751 * if we fit against the subend.
752 */
753 if (rp->er_start > subend) {
754 exend = rp->er_start;
755 break;
756 }
757
758 /*
759 * Check the chunk before "rp". Note that our
760 * comparison is safe from overflow conditions.
761 */
762 if (LE_OV(newstart, size, rp->er_start)) {
763 /*
764 * Do a boundary check, if necessary. Note
765 * that a region may *begin* on the boundary,
766 * but it must end before the boundary.
767 */
768 if (boundary) {
769 newend = newstart + (size - 1);
770
771 /*
772 * Calculate the next boundary after the start
773 * of this region.
774 */
775 dontcross = EXTENT_ALIGN(newstart+1, boundary,
776 (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
777 - 1;
778
779 #if 0
780 printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
781 newstart, newend, ex->ex_start, ex->ex_end,
782 boundary, dontcross);
783 #endif
784
785 /* Check for overflow */
786 if (dontcross < ex->ex_start)
787 dontcross = ex->ex_end;
788 else if (newend > dontcross) {
789 /*
790 * Candidate region crosses boundary.
791 * Throw away the leading part and see
792 * if we still fit.
793 */
794 newstart = dontcross + 1;
795 newend = newstart + (size - 1);
796 dontcross += boundary;
797 if (!LE_OV(newstart, size, rp->er_start))
798 goto skip;
799 }
800
801 /*
802 * If we run past the end of
803 * the extent or the boundary
804 * overflows, then the request
805 * can't fit.
806 */
807 if (newstart + size - 1 > ex->ex_end ||
808 dontcross < newstart)
809 goto fail;
810 }
811
812 /*
813 * We would fit into this space. Calculate
814 * the overhead (wasted space). If we exactly
815 * fit, or we're taking the first fit, insert
816 * ourselves into the region list.
817 */
818 ovh = rp->er_start - newstart - size;
819 if ((flags & EX_FAST) || (ovh == 0))
820 goto found;
821
822 /*
823 * Don't exactly fit, but check to see
824 * if we're better than any current choice.
825 */
826 if ((bestovh == 0) || (ovh < bestovh)) {
827 bestovh = ovh;
828 beststart = newstart;
829 bestlast = last;
830 }
831 }
832
833 skip:
834 /*
835 * Skip past the current region and check again.
836 */
837 newstart = EXTENT_ALIGN((rp->er_end + 1), alignment, skew);
838 if (newstart < rp->er_end) {
839 /*
840 * Overflow condition. Don't error out, since
841 * we might have a chunk of space that we can
842 * use.
843 */
844 goto fail;
845 }
846
847 last = rp;
848 }
849
850 /*
851 * The final check is from the current starting point to the
852 * end of the subregion. If there were no allocated regions,
853 * "newstart" is set to the beginning of the subregion, or
854 * just past the end of the last allocated region, adjusted
855 * for alignment in either case.
856 */
857 if (LE_OV(newstart, (size - 1), subend)) {
858 /*
859 * Do a boundary check, if necessary. Note
860 * that a region may *begin* on the boundary,
861 * but it must end before the boundary.
862 */
863 if (boundary) {
864 newend = newstart + (size - 1);
865
866 /*
867 * Calculate the next boundary after the start
868 * of this region.
869 */
870 dontcross = EXTENT_ALIGN(newstart+1, boundary,
871 (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
872 - 1;
873
874 #if 0
875 printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
876 newstart, newend, ex->ex_start, ex->ex_end,
877 boundary, dontcross);
878 #endif
879
880 /* Check for overflow */
881 if (dontcross < ex->ex_start)
882 dontcross = ex->ex_end;
883 else if (newend > dontcross) {
884 /*
885 * Candidate region crosses boundary.
886 * Throw away the leading part and see
887 * if we still fit.
888 */
889 newstart = dontcross + 1;
890 newend = newstart + (size - 1);
891 dontcross += boundary;
892 if (!LE_OV(newstart, (size - 1), subend))
893 goto fail;
894 }
895
896 /*
897 * If we run past the end of
898 * the extent or the boundary
899 * overflows, then the request
900 * can't fit.
901 */
902 if (newstart + size - 1 > ex->ex_end ||
903 dontcross < newstart)
904 goto fail;
905 }
906
907 /*
908 * We would fit into this space. Calculate
909 * the overhead (wasted space). If we exactly
910 * fit, or we're taking the first fit, insert
911 * ourselves into the region list.
912 */
913 ovh = exend - newstart - (size - 1);
914 if ((flags & EX_FAST) || (ovh == 0))
915 goto found;
916
917 /*
918 * Don't exactly fit, but check to see
919 * if we're better than any current choice.
920 */
921 if ((bestovh == 0) || (ovh < bestovh)) {
922 bestovh = ovh;
923 beststart = newstart;
924 bestlast = last;
925 }
926 }
927
928 fail:
929 /*
930 * One of the following two conditions have
931 * occurred:
932 *
933 * There is no chunk large enough to hold the request.
934 *
935 * If EX_FAST was not specified, there is not an
936 * exact match for the request.
937 *
938 * Note that if we reach this point and EX_FAST is
939 * set, then we know there is no space in the extent for
940 * the request.
941 */
942 if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
943 /*
944 * We have a match that's "good enough".
945 */
946 newstart = beststart;
947 last = bestlast;
948 goto found;
949 }
950
951 /*
952 * No space currently available. Wait for it to free up,
953 * if possible.
954 */
955 if (flags & EX_WAITSPACE) {
956 if ((flags & EX_CATCH) != 0) {
957 error = cv_wait_sig(&ex->ex_cv, &ex->ex_lock);
958 } else {
959 cv_wait(&ex->ex_cv, &ex->ex_lock);
960 error = 0;
961 }
962 if (error == 0)
963 goto alloc_start;
964 mutex_exit(&ex->ex_lock);
965 } else {
966 mutex_exit(&ex->ex_lock);
967 error = EAGAIN;
968 }
969
970 extent_free_region_descriptor(ex, myrp);
971 return error;
972
973 found:
974 /*
975 * Insert ourselves into the region list.
976 */
977 extent_insert_and_optimize(ex, newstart, size, flags, last, myrp);
978 mutex_exit(&ex->ex_lock);
979 *result = newstart;
980 return (0);
981 }
982
983 int
984 extent_alloc_subregion(struct extent *ex, u_long start, u_long end, u_long size,
985 u_long alignment, u_long boundary, int flags, u_long *result)
986 {
987
988 return (extent_alloc_subregion1(ex, start, end, size, alignment,
989 0, boundary, flags, result));
990 }
991
992 int
993 extent_alloc(struct extent *ex, u_long size, u_long alignment, u_long boundary,
994 int flags, u_long *result)
995 {
996
997 return (extent_alloc_subregion1(ex, ex->ex_start, ex->ex_end,
998 size, alignment, 0, boundary,
999 flags, result));
1000 }
1001
1002 int
1003 extent_alloc1(struct extent *ex, u_long size, u_long alignment, u_long skew,
1004 u_long boundary, int flags, u_long *result)
1005 {
1006
1007 return (extent_alloc_subregion1(ex, ex->ex_start, ex->ex_end,
1008 size, alignment, skew, boundary,
1009 flags, result));
1010 }
1011
1012 int
1013 extent_free(struct extent *ex, u_long start, u_long size, int flags)
1014 {
1015 struct extent_region *rp, *nrp = NULL;
1016 u_long end = start + (size - 1);
1017 int coalesce;
1018
1019 #ifdef DIAGNOSTIC
1020 /*
1021 * Check arguments.
1022 *
1023 * We don't lock to check these, because these values
1024 * are never modified, and if another thread deletes the
1025 * extent, we're screwed anyway.
1026 */
1027 if (ex == NULL)
1028 panic("extent_free: NULL extent");
1029 if ((start < ex->ex_start) || (end > ex->ex_end)) {
1030 extent_print(ex);
1031 printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
1032 ex->ex_name, start, size);
1033 panic("extent_free: extent `%s', region not within extent",
1034 ex->ex_name);
1035 }
1036 /* Check for an overflow. */
1037 if (end < start) {
1038 extent_print(ex);
1039 printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
1040 ex->ex_name, start, size);
1041 panic("extent_free: overflow");
1042 }
1043 #endif
1044
1045 /*
1046 * If we're allowing coalescing, we must allocate a region
1047 * descriptor now, since it might block.
1048 *
1049 * XXX Make a static, create-time flags word, so we don't
1050 * XXX have to lock to read it!
1051 */
1052 mutex_enter(&ex->ex_lock);
1053 coalesce = (ex->ex_flags & EXF_NOCOALESCE) == 0;
1054 mutex_exit(&ex->ex_lock);
1055
1056 if (coalesce) {
1057 /* Allocate a region descriptor. */
1058 nrp = extent_alloc_region_descriptor(ex, flags);
1059 if (nrp == NULL)
1060 return (ENOMEM);
1061 }
1062
1063 mutex_enter(&ex->ex_lock);
1064
1065 /*
1066 * Find region and deallocate. Several possibilities:
1067 *
1068 * 1. (start == er_start) && (end == er_end):
1069 * Free descriptor.
1070 *
1071 * 2. (start == er_start) && (end < er_end):
1072 * Adjust er_start.
1073 *
1074 * 3. (start > er_start) && (end == er_end):
1075 * Adjust er_end.
1076 *
1077 * 4. (start > er_start) && (end < er_end):
1078 * Fragment region. Requires descriptor alloc.
1079 *
1080 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1081 * is not set.
1082 */
1083 LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1084 /*
1085 * Save ourselves some comparisons; does the current
1086 * region end before chunk to be freed begins? If so,
1087 * then we haven't found the appropriate region descriptor.
1088 */
1089 if (rp->er_end < start)
1090 continue;
1091
1092 /*
1093 * Save ourselves some traversal; does the current
1094 * region begin after the chunk to be freed ends? If so,
1095 * then we've already passed any possible region descriptors
1096 * that might have contained the chunk to be freed.
1097 */
1098 if (rp->er_start > end)
1099 break;
1100
1101 /* Case 1. */
1102 if ((start == rp->er_start) && (end == rp->er_end)) {
1103 LIST_REMOVE(rp, er_link);
1104 extent_free_region_descriptor(ex, rp);
1105 goto done;
1106 }
1107
1108 /*
1109 * The following cases all require that EXF_NOCOALESCE
1110 * is not set.
1111 */
1112 if (!coalesce)
1113 continue;
1114
1115 /* Case 2. */
1116 if ((start == rp->er_start) && (end < rp->er_end)) {
1117 rp->er_start = (end + 1);
1118 goto done;
1119 }
1120
1121 /* Case 3. */
1122 if ((start > rp->er_start) && (end == rp->er_end)) {
1123 rp->er_end = (start - 1);
1124 goto done;
1125 }
1126
1127 /* Case 4. */
1128 if ((start > rp->er_start) && (end < rp->er_end)) {
1129 /* Fill in new descriptor. */
1130 nrp->er_start = end + 1;
1131 nrp->er_end = rp->er_end;
1132
1133 /* Adjust current descriptor. */
1134 rp->er_end = start - 1;
1135
1136 /* Insert new descriptor after current. */
1137 LIST_INSERT_AFTER(rp, nrp, er_link);
1138
1139 /* We used the new descriptor, so don't free it below */
1140 nrp = NULL;
1141 goto done;
1142 }
1143 }
1144
1145 /* Region not found, or request otherwise invalid. */
1146 mutex_exit(&ex->ex_lock);
1147 extent_print(ex);
1148 printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
1149 panic("extent_free: region not found");
1150
1151 done:
1152 if (nrp != NULL)
1153 extent_free_region_descriptor(ex, nrp);
1154 cv_broadcast(&ex->ex_cv);
1155 mutex_exit(&ex->ex_lock);
1156 return (0);
1157 }
1158
1159 void
1160 extent_print(struct extent *ex)
1161 {
1162 struct extent_region *rp;
1163
1164 if (ex == NULL)
1165 panic("extent_print: NULL extent");
1166
1167 mutex_enter(&ex->ex_lock);
1168
1169 printf("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1170 ex->ex_start, ex->ex_end, ex->ex_flags);
1171
1172 LIST_FOREACH(rp, &ex->ex_regions, er_link)
1173 printf(" 0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1174
1175 mutex_exit(&ex->ex_lock);
1176 }
1177