uvm_map.c revision 1.157 1 /* $NetBSD: uvm_map.c,v 1.157 2004/02/07 08:02:21 yamt Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
42 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 /*
70 * uvm_map.c: uvm map operations
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.157 2004/02/07 08:02:21 yamt Exp $");
75
76 #include "opt_ddb.h"
77 #include "opt_uvmhist.h"
78 #include "opt_sysv.h"
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/mman.h>
83 #include <sys/proc.h>
84 #include <sys/malloc.h>
85 #include <sys/pool.h>
86 #include <sys/kernel.h>
87 #include <sys/mount.h>
88 #include <sys/vnode.h>
89
90 #ifdef SYSVSHM
91 #include <sys/shm.h>
92 #endif
93
94 #define UVM_MAP
95 #include <uvm/uvm.h>
96 #undef RB_AUGMENT
97 #define RB_AUGMENT(x) uvm_rb_augment(x)
98
99 #ifdef DDB
100 #include <uvm/uvm_ddb.h>
101 #endif
102
103 struct uvm_cnt map_ubackmerge, map_uforwmerge;
104 struct uvm_cnt map_ubimerge, map_unomerge;
105 struct uvm_cnt map_kbackmerge, map_kforwmerge;
106 struct uvm_cnt map_kbimerge, map_knomerge;
107 struct uvm_cnt uvm_map_call, uvm_mlk_call, uvm_mlk_hint;
108 const char vmmapbsy[] = "vmmapbsy";
109
110 /*
111 * pool for vmspace structures.
112 */
113
114 struct pool uvm_vmspace_pool;
115
116 /*
117 * pool for dynamically-allocated map entries.
118 */
119
120 struct pool uvm_map_entry_pool;
121
122 MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures");
123 MALLOC_DEFINE(M_VMPMAP, "VM pmap", "VM pmap");
124
125 #ifdef PMAP_GROWKERNEL
126 /*
127 * This global represents the end of the kernel virtual address
128 * space. If we want to exceed this, we must grow the kernel
129 * virtual address space dynamically.
130 *
131 * Note, this variable is locked by kernel_map's lock.
132 */
133 vaddr_t uvm_maxkaddr;
134 #endif
135
136 /*
137 * macros
138 */
139
140 /*
141 * VM_MAP_USE_KMAPENT: determine if uvm_kmapent_alloc/free is used
142 * for the vm_map.
143 *
144 * we exclude pager_map because it needs pager_map_wanted handling
145 * when doing map/unmap.
146 */
147 extern struct vm_map *pager_map; /* XXX */
148 #define VM_MAP_USE_KMAPENT(map) \
149 (vm_map_pmap(map) == pmap_kernel() && (map) != pager_map)
150
151 /*
152 * uvm_map_entry_link: insert entry into a map
153 *
154 * => map must be locked
155 */
156 #define uvm_map_entry_link(map, after_where, entry) do { \
157 KASSERT(entry->start < entry->end); \
158 (map)->nentries++; \
159 (entry)->prev = (after_where); \
160 (entry)->next = (after_where)->next; \
161 (entry)->prev->next = (entry); \
162 (entry)->next->prev = (entry); \
163 uvm_rb_insert((map), (entry)); \
164 } while (/*CONSTCOND*/ 0)
165
166 /*
167 * uvm_map_entry_unlink: remove entry from a map
168 *
169 * => map must be locked
170 */
171 #define uvm_map_entry_unlink(map, entry) do { \
172 (map)->nentries--; \
173 (entry)->next->prev = (entry)->prev; \
174 (entry)->prev->next = (entry)->next; \
175 uvm_rb_remove((map), (entry)); \
176 } while (/*CONSTCOND*/ 0)
177
178 /*
179 * SAVE_HINT: saves the specified entry as the hint for future lookups.
180 *
181 * => map need not be locked (protected by hint_lock).
182 */
183 #define SAVE_HINT(map,check,value) do { \
184 simple_lock(&(map)->hint_lock); \
185 if ((map)->hint == (check)) \
186 (map)->hint = (value); \
187 simple_unlock(&(map)->hint_lock); \
188 } while (/*CONSTCOND*/ 0)
189
190 /*
191 * VM_MAP_RANGE_CHECK: check and correct range
192 *
193 * => map must at least be read locked
194 */
195
196 #define VM_MAP_RANGE_CHECK(map, start, end) do { \
197 if (start < vm_map_min(map)) \
198 start = vm_map_min(map); \
199 if (end > vm_map_max(map)) \
200 end = vm_map_max(map); \
201 if (start > end) \
202 start = end; \
203 } while (/*CONSTCOND*/ 0)
204
205 /*
206 * local prototypes
207 */
208
209 static struct vm_map_entry *
210 uvm_mapent_alloc(struct vm_map *, int);
211 static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
212 static void uvm_mapent_free(struct vm_map_entry *);
213 static struct vm_map_entry *
214 uvm_kmapent_alloc(struct vm_map *, int);
215 static void uvm_kmapent_free(struct vm_map_entry *);
216 static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
217 static void uvm_map_reference_amap(struct vm_map_entry *, int);
218 static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int,
219 struct vm_map_entry *);
220 static void uvm_map_unreference_amap(struct vm_map_entry *, int);
221
222 int _uvm_tree_sanity(struct vm_map *, const char *);
223 static vsize_t uvm_rb_subtree_space(const struct vm_map_entry *);
224
225 static __inline int
226 uvm_compare(const struct vm_map_entry *a, const struct vm_map_entry *b)
227 {
228
229 if (a->start < b->start)
230 return (-1);
231 else if (a->start > b->start)
232 return (1);
233
234 return (0);
235 }
236
237 static __inline void
238 uvm_rb_augment(struct vm_map_entry *entry)
239 {
240
241 entry->space = uvm_rb_subtree_space(entry);
242 }
243
244 RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
245
246 RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
247
248 static __inline vsize_t
249 uvm_rb_space(const struct vm_map *map, const struct vm_map_entry *entry)
250 {
251 /* XXX map is not used */
252
253 KASSERT(entry->next != NULL);
254 return entry->next->start - entry->end;
255 }
256
257 static vsize_t
258 uvm_rb_subtree_space(const struct vm_map_entry *entry)
259 {
260 vaddr_t space, tmp;
261
262 space = entry->ownspace;
263 if (RB_LEFT(entry, rb_entry)) {
264 tmp = RB_LEFT(entry, rb_entry)->space;
265 if (tmp > space)
266 space = tmp;
267 }
268
269 if (RB_RIGHT(entry, rb_entry)) {
270 tmp = RB_RIGHT(entry, rb_entry)->space;
271 if (tmp > space)
272 space = tmp;
273 }
274
275 return (space);
276 }
277
278 static __inline void
279 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
280 {
281 /* We need to traverse to the very top */
282 do {
283 entry->ownspace = uvm_rb_space(map, entry);
284 entry->space = uvm_rb_subtree_space(entry);
285 } while ((entry = RB_PARENT(entry, rb_entry)) != NULL);
286 }
287
288 static __inline void
289 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
290 {
291 vaddr_t space = uvm_rb_space(map, entry);
292 struct vm_map_entry *tmp;
293
294 entry->ownspace = entry->space = space;
295 tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry);
296 #ifdef DIAGNOSTIC
297 if (tmp != NULL)
298 panic("uvm_rb_insert: duplicate entry?");
299 #endif
300 uvm_rb_fixup(map, entry);
301 if (entry->prev != &map->header)
302 uvm_rb_fixup(map, entry->prev);
303 }
304
305 static __inline void
306 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
307 {
308 struct vm_map_entry *parent;
309
310 parent = RB_PARENT(entry, rb_entry);
311 RB_REMOVE(uvm_tree, &(map)->rbhead, entry);
312 if (entry->prev != &map->header)
313 uvm_rb_fixup(map, entry->prev);
314 if (parent)
315 uvm_rb_fixup(map, parent);
316 }
317
318 #ifdef DEBUG
319 #define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y)
320 #else
321 #define uvm_tree_sanity(x,y)
322 #endif
323
324 int
325 _uvm_tree_sanity(struct vm_map *map, const char *name)
326 {
327 struct vm_map_entry *tmp, *trtmp;
328 int n = 0, i = 1;
329
330 RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
331 if (tmp->ownspace != uvm_rb_space(map, tmp)) {
332 printf("%s: %d/%d ownspace %lx != %lx %s\n",
333 name, n + 1, map->nentries,
334 (ulong)tmp->ownspace, (ulong)uvm_rb_space(map, tmp),
335 tmp->next == &map->header ? "(last)" : "");
336 goto error;
337 }
338 }
339 trtmp = NULL;
340 RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
341 if (tmp->space != uvm_rb_subtree_space(tmp)) {
342 printf("%s: space %lx != %lx\n",
343 name, (ulong)tmp->space,
344 (ulong)uvm_rb_subtree_space(tmp));
345 goto error;
346 }
347 if (trtmp != NULL && trtmp->start >= tmp->start) {
348 printf("%s: corrupt: 0x%lx >= 0x%lx\n",
349 name, trtmp->start, tmp->start);
350 goto error;
351 }
352 n++;
353
354 trtmp = tmp;
355 }
356
357 if (n != map->nentries) {
358 printf("%s: nentries: %d vs %d\n",
359 name, n, map->nentries);
360 goto error;
361 }
362
363 for (tmp = map->header.next; tmp && tmp != &map->header;
364 tmp = tmp->next, i++) {
365 trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp);
366 if (trtmp != tmp) {
367 printf("%s: lookup: %d: %p - %p: %p\n",
368 name, i, tmp, trtmp,
369 RB_PARENT(tmp, rb_entry));
370 goto error;
371 }
372 }
373
374 return (0);
375 error:
376 #ifdef DDB
377 /* handy breakpoint location for error case */
378 __asm(".globl treesanity_label\ntreesanity_label:");
379 #endif
380 return (-1);
381 }
382
383 /*
384 * local inlines
385 */
386
387 /*
388 * uvm_mapent_alloc: allocate a map entry
389 */
390
391 static __inline struct vm_map_entry *
392 uvm_mapent_alloc(struct vm_map *map, int flags)
393 {
394 struct vm_map_entry *me;
395 int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK;
396 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist);
397
398 if (VM_MAP_USE_KMAPENT(map)) {
399 me = uvm_kmapent_alloc(map, flags);
400 } else {
401 me = pool_get(&uvm_map_entry_pool, pflags);
402 if (__predict_false(me == NULL))
403 return NULL;
404 me->flags = 0;
405 }
406
407 UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me,
408 ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0);
409 return (me);
410 }
411
412 /*
413 * uvm_mapent_free: free map entry
414 */
415
416 static __inline void
417 uvm_mapent_free(struct vm_map_entry *me)
418 {
419 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist);
420
421 UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]",
422 me, me->flags, 0, 0);
423 if (me->flags & UVM_MAP_KERNEL) {
424 uvm_kmapent_free(me);
425 } else {
426 pool_put(&uvm_map_entry_pool, me);
427 }
428 }
429
430 /*
431 * uvm_mapent_copy: copy a map entry, preserving flags
432 */
433
434 static __inline void
435 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
436 {
437
438 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) -
439 ((char *)src));
440 }
441
442 /*
443 * uvm_map_entry_unwire: unwire a map entry
444 *
445 * => map should be locked by caller
446 */
447
448 static __inline void
449 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
450 {
451
452 entry->wired_count = 0;
453 uvm_fault_unwire_locked(map, entry->start, entry->end);
454 }
455
456
457 /*
458 * wrapper for calling amap_ref()
459 */
460 static __inline void
461 uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
462 {
463
464 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
465 (entry->end - entry->start) >> PAGE_SHIFT, flags);
466 }
467
468
469 /*
470 * wrapper for calling amap_unref()
471 */
472 static __inline void
473 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
474 {
475
476 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
477 (entry->end - entry->start) >> PAGE_SHIFT, flags);
478 }
479
480
481 /*
482 * uvm_map_init: init mapping system at boot time. note that we allocate
483 * and init the static pool of struct vm_map_entry *'s for the kernel here.
484 */
485
486 void
487 uvm_map_init(void)
488 {
489 #if defined(UVMHIST)
490 static struct uvm_history_ent maphistbuf[100];
491 static struct uvm_history_ent pdhistbuf[100];
492 #endif
493
494 /*
495 * first, init logging system.
496 */
497
498 UVMHIST_FUNC("uvm_map_init");
499 UVMHIST_INIT_STATIC(maphist, maphistbuf);
500 UVMHIST_INIT_STATIC(pdhist, pdhistbuf);
501 UVMHIST_CALLED(maphist);
502 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);
503 UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0,
504 "# uvm_map() successful calls", 0);
505
506 UVMCNT_INIT(map_ubackmerge, UVMCNT_CNT, 0,
507 "# uvm_map() back umerges", 0);
508 UVMCNT_INIT(map_uforwmerge, UVMCNT_CNT, 0,
509 "# uvm_map() forward umerges", 0);
510 UVMCNT_INIT(map_ubimerge, UVMCNT_CNT, 0,
511 "# uvm_map() dual umerge", 0);
512 UVMCNT_INIT(map_unomerge, UVMCNT_CNT, 0,
513 "# uvm_map() no umerge", 0);
514
515 UVMCNT_INIT(map_kbackmerge, UVMCNT_CNT, 0,
516 "# uvm_map() back kmerges", 0);
517 UVMCNT_INIT(map_kforwmerge, UVMCNT_CNT, 0,
518 "# uvm_map() forward kmerges", 0);
519 UVMCNT_INIT(map_kbimerge, UVMCNT_CNT, 0,
520 "# uvm_map() dual kmerge", 0);
521 UVMCNT_INIT(map_knomerge, UVMCNT_CNT, 0,
522 "# uvm_map() no kmerge", 0);
523
524 UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0);
525 UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0);
526
527 /*
528 * initialize the global lock for kernel map entry.
529 *
530 * XXX is it worth it to have per-map locks instead?
531 */
532
533 simple_lock_init(&uvm.kentry_lock);
534
535 /*
536 * initialize the map-related pools.
537 */
538 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
539 0, 0, 0, "vmsppl", &pool_allocator_nointr);
540 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
541 0, 0, 0, "vmmpepl", &pool_allocator_nointr);
542 }
543
544 /*
545 * clippers
546 */
547
548 /*
549 * uvm_map_clip_start: ensure that the entry begins at or after
550 * the starting address, if it doesn't we split the entry.
551 *
552 * => caller should use UVM_MAP_CLIP_START macro rather than calling
553 * this directly
554 * => map must be locked by caller
555 */
556
557 void
558 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
559 vaddr_t start)
560 {
561 struct vm_map_entry *new_entry;
562 vaddr_t new_adj;
563
564 /* uvm_map_simplify_entry(map, entry); */ /* XXX */
565
566 uvm_tree_sanity(map, "clip_start entry");
567
568 /*
569 * Split off the front portion. note that we must insert the new
570 * entry BEFORE this one, so that this entry has the specified
571 * starting address.
572 */
573
574 new_entry = uvm_mapent_alloc(map, 0);
575 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
576
577 new_entry->end = start;
578 new_adj = start - new_entry->start;
579 if (entry->object.uvm_obj)
580 entry->offset += new_adj; /* shift start over */
581
582 /* Does not change order for the RB tree */
583 entry->start = start;
584
585 if (new_entry->aref.ar_amap) {
586 amap_splitref(&new_entry->aref, &entry->aref, new_adj);
587 }
588
589 uvm_map_entry_link(map, entry->prev, new_entry);
590
591 if (UVM_ET_ISSUBMAP(entry)) {
592 /* ... unlikely to happen, but play it safe */
593 uvm_map_reference(new_entry->object.sub_map);
594 } else {
595 if (UVM_ET_ISOBJ(entry) &&
596 entry->object.uvm_obj->pgops &&
597 entry->object.uvm_obj->pgops->pgo_reference)
598 entry->object.uvm_obj->pgops->pgo_reference(
599 entry->object.uvm_obj);
600 }
601
602 uvm_tree_sanity(map, "clip_start leave");
603 }
604
605 /*
606 * uvm_map_clip_end: ensure that the entry ends at or before
607 * the ending address, if it does't we split the reference
608 *
609 * => caller should use UVM_MAP_CLIP_END macro rather than calling
610 * this directly
611 * => map must be locked by caller
612 */
613
614 void
615 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end)
616 {
617 struct vm_map_entry * new_entry;
618 vaddr_t new_adj; /* #bytes we move start forward */
619
620 uvm_tree_sanity(map, "clip_end entry");
621 /*
622 * Create a new entry and insert it
623 * AFTER the specified entry
624 */
625
626 new_entry = uvm_mapent_alloc(map, 0);
627 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
628
629 new_entry->start = entry->end = end;
630 new_adj = end - entry->start;
631 if (new_entry->object.uvm_obj)
632 new_entry->offset += new_adj;
633
634 if (entry->aref.ar_amap)
635 amap_splitref(&entry->aref, &new_entry->aref, new_adj);
636
637 uvm_rb_fixup(map, entry);
638
639 uvm_map_entry_link(map, entry, new_entry);
640
641 if (UVM_ET_ISSUBMAP(entry)) {
642 /* ... unlikely to happen, but play it safe */
643 uvm_map_reference(new_entry->object.sub_map);
644 } else {
645 if (UVM_ET_ISOBJ(entry) &&
646 entry->object.uvm_obj->pgops &&
647 entry->object.uvm_obj->pgops->pgo_reference)
648 entry->object.uvm_obj->pgops->pgo_reference(
649 entry->object.uvm_obj);
650 }
651
652 uvm_tree_sanity(map, "clip_end leave");
653 }
654
655
656 /*
657 * M A P - m a i n e n t r y p o i n t
658 */
659 /*
660 * uvm_map: establish a valid mapping in a map
661 *
662 * => assume startp is page aligned.
663 * => assume size is a multiple of PAGE_SIZE.
664 * => assume sys_mmap provides enough of a "hint" to have us skip
665 * over text/data/bss area.
666 * => map must be unlocked (we will lock it)
667 * => <uobj,uoffset> value meanings (4 cases):
668 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER
669 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER
670 * [3] <uobj,uoffset> == normal mapping
671 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA
672 *
673 * case [4] is for kernel mappings where we don't know the offset until
674 * we've found a virtual address. note that kernel object offsets are
675 * always relative to vm_map_min(kernel_map).
676 *
677 * => if `align' is non-zero, we try to align the virtual address to
678 * the specified alignment. this is only a hint; if we can't
679 * do it, the address will be unaligned. this is provided as
680 * a mechanism for large pages.
681 *
682 * => XXXCDC: need way to map in external amap?
683 */
684
685 int
686 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size,
687 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
688 {
689 struct uvm_map_args args;
690 struct vm_map_entry *new_entry;
691 int error;
692
693 /*
694 * for pager_map, allocate the new entry first to avoid sleeping
695 * for memory while we have the map locked.
696 *
697 * because we allocate entries for in-kernel maps
698 * a bit differently (cf. uvm_kmapent_alloc/free), we need to
699 * allocate them before locking the map.
700 */
701
702 new_entry = NULL;
703 if (VM_MAP_USE_KMAPENT(map) || map == pager_map) {
704 flags |= UVM_FLAG_NOMERGE;
705 new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT));
706 if (__predict_false(new_entry == NULL))
707 return ENOMEM;
708 }
709
710 error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align,
711 flags, &args);
712 if (!error) {
713 error = uvm_map_enter(map, &args, &new_entry);
714 *startp = args.uma_start;
715 }
716
717 if (new_entry)
718 uvm_mapent_free(new_entry);
719
720 return error;
721 }
722
723 int
724 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size,
725 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
726 struct uvm_map_args *args)
727 {
728 struct vm_map_entry *prev_entry;
729 vm_prot_t prot = UVM_PROTECTION(flags);
730 vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
731
732 UVMHIST_FUNC("uvm_map_prepare");
733 UVMHIST_CALLED(maphist);
734
735 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)",
736 map, start, size, flags);
737 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0);
738
739 /*
740 * detect a popular device driver bug.
741 */
742
743 KASSERT(doing_shutdown || curlwp != NULL ||
744 (map->flags & VM_MAP_INTRSAFE));
745
746 /*
747 * zero-sized mapping doesn't make any sense.
748 */
749 KASSERT(size > 0);
750
751 uvm_tree_sanity(map, "map entry");
752
753 /*
754 * check sanity of protection code
755 */
756
757 if ((prot & maxprot) != prot) {
758 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x",
759 prot, maxprot,0,0);
760 return EACCES;
761 }
762
763 /*
764 * figure out where to put new VM range
765 */
766
767 if (vm_map_lock_try(map) == FALSE) {
768 if (flags & UVM_FLAG_TRYLOCK) {
769 return EAGAIN;
770 }
771 vm_map_lock(map); /* could sleep here */
772 }
773 if ((prev_entry = uvm_map_findspace(map, start, size, &start,
774 uobj, uoffset, align, flags)) == NULL) {
775 UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0);
776 vm_map_unlock(map);
777 return ENOMEM;
778 }
779
780 #ifdef PMAP_GROWKERNEL
781 /*
782 * If the kernel pmap can't map the requested space,
783 * then allocate more resources for it.
784 */
785 if (map == kernel_map && uvm_maxkaddr < (start + size))
786 uvm_maxkaddr = pmap_growkernel(start + size);
787 #endif
788
789 UVMCNT_INCR(uvm_map_call);
790
791 /*
792 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
793 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in
794 * either case we want to zero it before storing it in the map entry
795 * (because it looks strange and confusing when debugging...)
796 *
797 * if uobj is not null
798 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
799 * and we do not need to change uoffset.
800 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
801 * now (based on the starting address of the map). this case is
802 * for kernel object mappings where we don't know the offset until
803 * the virtual address is found (with uvm_map_findspace). the
804 * offset is the distance we are from the start of the map.
805 */
806
807 if (uobj == NULL) {
808 uoffset = 0;
809 } else {
810 if (uoffset == UVM_UNKNOWN_OFFSET) {
811 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
812 uoffset = start - vm_map_min(kernel_map);
813 }
814 }
815
816 args->uma_flags = flags;
817 args->uma_prev = prev_entry;
818 args->uma_start = start;
819 args->uma_size = size;
820 args->uma_uobj = uobj;
821 args->uma_uoffset = uoffset;
822
823 return 0;
824 }
825
826 int
827 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args,
828 struct vm_map_entry **new_entryp)
829 {
830 struct vm_map_entry *prev_entry = args->uma_prev;
831 struct vm_map_entry *new_entry = *new_entryp;
832
833 const uvm_flag_t flags = args->uma_flags;
834 const vm_prot_t prot = UVM_PROTECTION(flags);
835 const vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
836 const vm_inherit_t inherit = UVM_INHERIT(flags);
837 const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ?
838 AMAP_EXTEND_NOWAIT : 0;
839 const int advice = UVM_ADVICE(flags);
840
841 vaddr_t start = args->uma_start;
842 vsize_t size = args->uma_size;
843 struct uvm_object *uobj = args->uma_uobj;
844 voff_t uoffset = args->uma_uoffset;
845
846 const int kmap = (vm_map_pmap(map) == pmap_kernel());
847 int merged = 0;
848 int error;
849
850 UVMHIST_FUNC("uvm_map_enter");
851 UVMHIST_CALLED(maphist);
852
853 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)",
854 map, start, size, flags);
855 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0);
856
857
858 /*
859 * try and insert in map by extending previous entry, if possible.
860 * XXX: we don't try and pull back the next entry. might be useful
861 * for a stack, but we are currently allocating our stack in advance.
862 */
863
864 if (flags & UVM_FLAG_NOMERGE)
865 goto nomerge;
866
867 if (prev_entry->end == start &&
868 prev_entry != &map->header &&
869 prev_entry->object.uvm_obj == uobj) {
870
871 if (uobj && prev_entry->offset +
872 (prev_entry->end - prev_entry->start) != uoffset)
873 goto forwardmerge;
874
875 if (UVM_ET_ISSUBMAP(prev_entry))
876 goto forwardmerge;
877
878 if (prev_entry->protection != prot ||
879 prev_entry->max_protection != maxprot)
880 goto forwardmerge;
881
882 if (prev_entry->inheritance != inherit ||
883 prev_entry->advice != advice)
884 goto forwardmerge;
885
886 /* wiring status must match (new area is unwired) */
887 if (VM_MAPENT_ISWIRED(prev_entry))
888 goto forwardmerge;
889
890 /*
891 * can't extend a shared amap. note: no need to lock amap to
892 * look at refs since we don't care about its exact value.
893 * if it is one (i.e. we have only reference) it will stay there
894 */
895
896 if (prev_entry->aref.ar_amap &&
897 amap_refs(prev_entry->aref.ar_amap) != 1) {
898 goto forwardmerge;
899 }
900
901 if (prev_entry->aref.ar_amap) {
902 error = amap_extend(prev_entry, size,
903 amapwaitflag | AMAP_EXTEND_FORWARDS);
904 if (error) {
905 vm_map_unlock(map);
906 return error;
907 }
908 }
909
910 if (kmap)
911 UVMCNT_INCR(map_kbackmerge);
912 else
913 UVMCNT_INCR(map_ubackmerge);
914 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0);
915
916 /*
917 * drop our reference to uobj since we are extending a reference
918 * that we already have (the ref count can not drop to zero).
919 */
920
921 if (uobj && uobj->pgops->pgo_detach)
922 uobj->pgops->pgo_detach(uobj);
923
924 prev_entry->end += size;
925 uvm_rb_fixup(map, prev_entry);
926
927 uvm_tree_sanity(map, "map backmerged");
928
929 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
930 merged++;
931 }
932
933 forwardmerge:
934 if (prev_entry->next->start == (start + size) &&
935 prev_entry->next != &map->header &&
936 prev_entry->next->object.uvm_obj == uobj) {
937
938 if (uobj && prev_entry->next->offset != uoffset + size)
939 goto nomerge;
940
941 if (UVM_ET_ISSUBMAP(prev_entry->next))
942 goto nomerge;
943
944 if (prev_entry->next->protection != prot ||
945 prev_entry->next->max_protection != maxprot)
946 goto nomerge;
947
948 if (prev_entry->next->inheritance != inherit ||
949 prev_entry->next->advice != advice)
950 goto nomerge;
951
952 /* wiring status must match (new area is unwired) */
953 if (VM_MAPENT_ISWIRED(prev_entry->next))
954 goto nomerge;
955
956 /*
957 * can't extend a shared amap. note: no need to lock amap to
958 * look at refs since we don't care about its exact value.
959 * if it is one (i.e. we have only reference) it will stay there.
960 *
961 * note that we also can't merge two amaps, so if we
962 * merged with the previous entry which has an amap,
963 * and the next entry also has an amap, we give up.
964 *
965 * Interesting cases:
966 * amap, new, amap -> give up second merge (single fwd extend)
967 * amap, new, none -> double forward extend (extend again here)
968 * none, new, amap -> double backward extend (done here)
969 * uobj, new, amap -> single backward extend (done here)
970 *
971 * XXX should we attempt to deal with someone refilling
972 * the deallocated region between two entries that are
973 * backed by the same amap (ie, arefs is 2, "prev" and
974 * "next" refer to it, and adding this allocation will
975 * close the hole, thus restoring arefs to 1 and
976 * deallocating the "next" vm_map_entry)? -- @@@
977 */
978
979 if (prev_entry->next->aref.ar_amap &&
980 (amap_refs(prev_entry->next->aref.ar_amap) != 1 ||
981 (merged && prev_entry->aref.ar_amap))) {
982 goto nomerge;
983 }
984
985 if (merged) {
986 /*
987 * Try to extend the amap of the previous entry to
988 * cover the next entry as well. If it doesn't work
989 * just skip on, don't actually give up, since we've
990 * already completed the back merge.
991 */
992 if (prev_entry->aref.ar_amap) {
993 if (amap_extend(prev_entry,
994 prev_entry->next->end -
995 prev_entry->next->start,
996 amapwaitflag | AMAP_EXTEND_FORWARDS))
997 goto nomerge;
998 }
999
1000 /*
1001 * Try to extend the amap of the *next* entry
1002 * back to cover the new allocation *and* the
1003 * previous entry as well (the previous merge
1004 * didn't have an amap already otherwise we
1005 * wouldn't be checking here for an amap). If
1006 * it doesn't work just skip on, again, don't
1007 * actually give up, since we've already
1008 * completed the back merge.
1009 */
1010 else if (prev_entry->next->aref.ar_amap) {
1011 if (amap_extend(prev_entry->next,
1012 prev_entry->end -
1013 prev_entry->start,
1014 amapwaitflag | AMAP_EXTEND_BACKWARDS))
1015 goto nomerge;
1016 }
1017 } else {
1018 /*
1019 * Pull the next entry's amap backwards to cover this
1020 * new allocation.
1021 */
1022 if (prev_entry->next->aref.ar_amap) {
1023 error = amap_extend(prev_entry->next, size,
1024 amapwaitflag | AMAP_EXTEND_BACKWARDS);
1025 if (error) {
1026 vm_map_unlock(map);
1027 return error;
1028 }
1029 }
1030 }
1031
1032 if (merged) {
1033 if (kmap) {
1034 UVMCNT_DECR(map_kbackmerge);
1035 UVMCNT_INCR(map_kbimerge);
1036 } else {
1037 UVMCNT_DECR(map_ubackmerge);
1038 UVMCNT_INCR(map_ubimerge);
1039 }
1040 } else {
1041 if (kmap)
1042 UVMCNT_INCR(map_kforwmerge);
1043 else
1044 UVMCNT_INCR(map_uforwmerge);
1045 }
1046 UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0);
1047
1048 /*
1049 * drop our reference to uobj since we are extending a reference
1050 * that we already have (the ref count can not drop to zero).
1051 * (if merged, we've already detached)
1052 */
1053 if (uobj && uobj->pgops->pgo_detach && !merged)
1054 uobj->pgops->pgo_detach(uobj);
1055
1056 if (merged) {
1057 struct vm_map_entry *dead = prev_entry->next;
1058 prev_entry->end = dead->end;
1059 uvm_map_entry_unlink(map, dead);
1060 if (dead->aref.ar_amap != NULL) {
1061 prev_entry->aref = dead->aref;
1062 dead->aref.ar_amap = NULL;
1063 }
1064 uvm_mapent_free(dead);
1065 } else {
1066 prev_entry->next->start -= size;
1067 if (prev_entry != &map->header)
1068 uvm_rb_fixup(map, prev_entry);
1069 if (uobj)
1070 prev_entry->next->offset = uoffset;
1071 }
1072
1073 uvm_tree_sanity(map, "map forwardmerged");
1074
1075 UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0);
1076 merged++;
1077 }
1078
1079 nomerge:
1080 if (!merged) {
1081 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0);
1082 if (kmap)
1083 UVMCNT_INCR(map_knomerge);
1084 else
1085 UVMCNT_INCR(map_unomerge);
1086
1087 /*
1088 * allocate new entry and link it in.
1089 */
1090
1091 if (new_entry == NULL) {
1092 new_entry = uvm_mapent_alloc(map,
1093 (flags & UVM_FLAG_NOWAIT));
1094 if (__predict_false(new_entry == NULL)) {
1095 vm_map_unlock(map);
1096 return ENOMEM;
1097 }
1098 }
1099 new_entry->start = start;
1100 new_entry->end = new_entry->start + size;
1101 new_entry->object.uvm_obj = uobj;
1102 new_entry->offset = uoffset;
1103
1104 if (uobj)
1105 new_entry->etype = UVM_ET_OBJ;
1106 else
1107 new_entry->etype = 0;
1108
1109 if (flags & UVM_FLAG_COPYONW) {
1110 new_entry->etype |= UVM_ET_COPYONWRITE;
1111 if ((flags & UVM_FLAG_OVERLAY) == 0)
1112 new_entry->etype |= UVM_ET_NEEDSCOPY;
1113 }
1114
1115 new_entry->protection = prot;
1116 new_entry->max_protection = maxprot;
1117 new_entry->inheritance = inherit;
1118 new_entry->wired_count = 0;
1119 new_entry->advice = advice;
1120 if (flags & UVM_FLAG_OVERLAY) {
1121
1122 /*
1123 * to_add: for BSS we overallocate a little since we
1124 * are likely to extend
1125 */
1126
1127 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
1128 UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
1129 struct vm_amap *amap = amap_alloc(size, to_add,
1130 (flags & UVM_FLAG_NOWAIT) ? M_NOWAIT : M_WAITOK);
1131 if (__predict_false(amap == NULL)) {
1132 vm_map_unlock(map);
1133 if (*new_entryp == NULL)
1134 uvm_mapent_free(new_entry);
1135 return ENOMEM;
1136 }
1137 new_entry->aref.ar_pageoff = 0;
1138 new_entry->aref.ar_amap = amap;
1139 } else {
1140 new_entry->aref.ar_pageoff = 0;
1141 new_entry->aref.ar_amap = NULL;
1142 }
1143 uvm_map_entry_link(map, prev_entry, new_entry);
1144
1145 /*
1146 * Update the free space hint
1147 */
1148
1149 if ((map->first_free == prev_entry) &&
1150 (prev_entry->end >= new_entry->start))
1151 map->first_free = new_entry;
1152
1153 /*
1154 * note that the entry was consumed.
1155 */
1156 *new_entryp = NULL;
1157 }
1158
1159 map->size += size;
1160
1161 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1162 vm_map_unlock(map);
1163 return 0;
1164 }
1165
1166 /*
1167 * uvm_map_lookup_entry: find map entry at or before an address
1168 *
1169 * => map must at least be read-locked by caller
1170 * => entry is returned in "entry"
1171 * => return value is true if address is in the returned entry
1172 */
1173
1174 boolean_t
1175 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1176 struct vm_map_entry **entry /* OUT */)
1177 {
1178 struct vm_map_entry *cur;
1179 boolean_t use_tree = FALSE;
1180 UVMHIST_FUNC("uvm_map_lookup_entry");
1181 UVMHIST_CALLED(maphist);
1182
1183 UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)",
1184 map, address, entry, 0);
1185
1186 /*
1187 * start looking either from the head of the
1188 * list, or from the hint.
1189 */
1190
1191 simple_lock(&map->hint_lock);
1192 cur = map->hint;
1193 simple_unlock(&map->hint_lock);
1194
1195 if (cur == &map->header)
1196 cur = cur->next;
1197
1198 UVMCNT_INCR(uvm_mlk_call);
1199 if (address >= cur->start) {
1200
1201 /*
1202 * go from hint to end of list.
1203 *
1204 * but first, make a quick check to see if
1205 * we are already looking at the entry we
1206 * want (which is usually the case).
1207 * note also that we don't need to save the hint
1208 * here... it is the same hint (unless we are
1209 * at the header, in which case the hint didn't
1210 * buy us anything anyway).
1211 */
1212
1213 if (cur != &map->header && cur->end > address) {
1214 UVMCNT_INCR(uvm_mlk_hint);
1215 *entry = cur;
1216 UVMHIST_LOG(maphist,"<- got it via hint (0x%x)",
1217 cur, 0, 0, 0);
1218 return (TRUE);
1219 }
1220
1221 if (map->nentries > 30)
1222 use_tree = TRUE;
1223 } else {
1224
1225 /*
1226 * invalid hint. use tree.
1227 */
1228 use_tree = TRUE;
1229 }
1230
1231 uvm_tree_sanity(map, __func__);
1232
1233 if (use_tree) {
1234 struct vm_map_entry *prev = &map->header;
1235 cur = RB_ROOT(&map->rbhead);
1236
1237 /*
1238 * Simple lookup in the tree. Happens when the hint is
1239 * invalid, or nentries reach a threshold.
1240 */
1241 while (cur) {
1242 if (address >= cur->start) {
1243 if (address < cur->end) {
1244 *entry = cur;
1245 goto got;
1246 }
1247 prev = cur;
1248 cur = RB_RIGHT(cur, rb_entry);
1249 } else
1250 cur = RB_LEFT(cur, rb_entry);
1251 }
1252 *entry = prev;
1253 goto failed;
1254 }
1255
1256 /*
1257 * search linearly
1258 */
1259
1260 while (cur != &map->header) {
1261 if (cur->end > address) {
1262 if (address >= cur->start) {
1263 /*
1264 * save this lookup for future
1265 * hints, and return
1266 */
1267
1268 *entry = cur;
1269 got:
1270 SAVE_HINT(map, map->hint, *entry);
1271 UVMHIST_LOG(maphist,"<- search got it (0x%x)",
1272 cur, 0, 0, 0);
1273 KDASSERT((*entry)->start <= address);
1274 KDASSERT(address < (*entry)->end);
1275 return (TRUE);
1276 }
1277 break;
1278 }
1279 cur = cur->next;
1280 }
1281 *entry = cur->prev;
1282 failed:
1283 SAVE_HINT(map, map->hint, *entry);
1284 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
1285 KDASSERT((*entry) == &map->header || (*entry)->end <= address);
1286 KDASSERT((*entry)->next == &map->header ||
1287 address < (*entry)->next->start);
1288 return (FALSE);
1289 }
1290
1291 /*
1292 * See if the range between start and start + length fits in the gap
1293 * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't
1294 * fit, and -1 address wraps around.
1295 */
1296 static __inline int
1297 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset,
1298 vsize_t align, int topdown, struct vm_map_entry *entry)
1299 {
1300 vaddr_t end;
1301
1302 #ifdef PMAP_PREFER
1303 /*
1304 * push start address forward as needed to avoid VAC alias problems.
1305 * we only do this if a valid offset is specified.
1306 */
1307
1308 if (uoffset != UVM_UNKNOWN_OFFSET)
1309 PMAP_PREFER(uoffset, start);
1310 #endif
1311 if (align != 0) {
1312 if ((*start & (align - 1)) != 0) {
1313 if (topdown)
1314 *start &= ~(align - 1);
1315 else
1316 *start = roundup(*start, align);
1317 }
1318 /*
1319 * XXX Should we PMAP_PREFER() here again?
1320 */
1321 }
1322
1323 /*
1324 * Find the end of the proposed new region. Be sure we didn't
1325 * wrap around the address; if so, we lose. Otherwise, if the
1326 * proposed new region fits before the next entry, we win.
1327 */
1328
1329 end = *start + length;
1330 if (end < *start)
1331 return (-1);
1332
1333 if (entry->next->start >= end && *start >= entry->end)
1334 return (1);
1335
1336 return (0);
1337 }
1338
1339 /*
1340 * uvm_map_findspace: find "length" sized space in "map".
1341 *
1342 * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is
1343 * set (in which case we insist on using "hint").
1344 * => "result" is VA returned
1345 * => uobj/uoffset are to be used to handle VAC alignment, if required
1346 * => if `align' is non-zero, we attempt to align to that value.
1347 * => caller must at least have read-locked map
1348 * => returns NULL on failure, or pointer to prev. map entry if success
1349 * => note this is a cross between the old vm_map_findspace and vm_map_find
1350 */
1351
1352 struct vm_map_entry *
1353 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
1354 vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset,
1355 vsize_t align, int flags)
1356 {
1357 struct vm_map_entry *entry;
1358 struct vm_map_entry *child, *prev, *tmp;
1359 vaddr_t orig_hint;
1360 const int topdown = map->flags & VM_MAP_TOPDOWN;
1361 UVMHIST_FUNC("uvm_map_findspace");
1362 UVMHIST_CALLED(maphist);
1363
1364 UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)",
1365 map, hint, length, flags);
1366 KASSERT((align & (align - 1)) == 0);
1367 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
1368
1369 uvm_tree_sanity(map, "map_findspace entry");
1370
1371 /*
1372 * remember the original hint. if we are aligning, then we
1373 * may have to try again with no alignment constraint if
1374 * we fail the first time.
1375 */
1376
1377 orig_hint = hint;
1378 if (hint < map->min_offset) { /* check ranges ... */
1379 if (flags & UVM_FLAG_FIXED) {
1380 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
1381 return (NULL);
1382 }
1383 hint = map->min_offset;
1384 }
1385 if (hint > map->max_offset) {
1386 UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]",
1387 hint, map->min_offset, map->max_offset, 0);
1388 return (NULL);
1389 }
1390
1391 /*
1392 * Look for the first possible address; if there's already
1393 * something at this address, we have to start after it.
1394 */
1395
1396 /*
1397 * @@@: there are four, no, eight cases to consider.
1398 *
1399 * 0: found, fixed, bottom up -> fail
1400 * 1: found, fixed, top down -> fail
1401 * 2: found, not fixed, bottom up -> start after entry->end,
1402 * loop up
1403 * 3: found, not fixed, top down -> start before entry->start,
1404 * loop down
1405 * 4: not found, fixed, bottom up -> check entry->next->start, fail
1406 * 5: not found, fixed, top down -> check entry->next->start, fail
1407 * 6: not found, not fixed, bottom up -> check entry->next->start,
1408 * loop up
1409 * 7: not found, not fixed, top down -> check entry->next->start,
1410 * loop down
1411 *
1412 * as you can see, it reduces to roughly five cases, and that
1413 * adding top down mapping only adds one unique case (without
1414 * it, there would be four cases).
1415 */
1416
1417 if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) {
1418 entry = map->first_free;
1419 } else {
1420 if (uvm_map_lookup_entry(map, hint, &entry)) {
1421 /* "hint" address already in use ... */
1422 if (flags & UVM_FLAG_FIXED) {
1423 UVMHIST_LOG(maphist, "<- fixed & VA in use",
1424 0, 0, 0, 0);
1425 return (NULL);
1426 }
1427 if (topdown)
1428 /* Start from lower gap. */
1429 entry = entry->prev;
1430 } else if (flags & UVM_FLAG_FIXED) {
1431 if (entry->next->start >= hint + length &&
1432 hint + length > hint)
1433 goto found;
1434
1435 /* "hint" address is gap but too small */
1436 UVMHIST_LOG(maphist, "<- fixed mapping failed",
1437 0, 0, 0, 0);
1438 return (NULL); /* only one shot at it ... */
1439 } else {
1440 /*
1441 * See if given hint fits in this gap.
1442 */
1443 switch (uvm_map_space_avail(&hint, length,
1444 uoffset, align, topdown, entry)) {
1445 case 1:
1446 goto found;
1447 case -1:
1448 goto wraparound;
1449 }
1450
1451 if (topdown) {
1452 /*
1453 * Still there is a chance to fit
1454 * if hint > entry->end.
1455 */
1456 } else {
1457 /* Start from higer gap. */
1458 entry = entry->next;
1459 if (entry == &map->header)
1460 goto notfound;
1461 goto nextgap;
1462 }
1463 }
1464 }
1465
1466 /*
1467 * Note that all UVM_FLAGS_FIXED case is already handled.
1468 */
1469 KDASSERT((flags & UVM_FLAG_FIXED) == 0);
1470
1471 /* Try to find the space in the red-black tree */
1472
1473 /* Check slot before any entry */
1474 hint = topdown ? entry->next->start - length : entry->end;
1475 switch (uvm_map_space_avail(&hint, length, uoffset, align,
1476 topdown, entry)) {
1477 case 1:
1478 goto found;
1479 case -1:
1480 goto wraparound;
1481 }
1482
1483 nextgap:
1484 KDASSERT((flags & UVM_FLAG_FIXED) == 0);
1485 /* If there is not enough space in the whole tree, we fail */
1486 tmp = RB_ROOT(&map->rbhead);
1487 if (tmp == NULL || tmp->space < length)
1488 goto notfound;
1489
1490 prev = NULL; /* previous candidate */
1491
1492 /* Find an entry close to hint that has enough space */
1493 for (; tmp;) {
1494 KASSERT(tmp->next->start == tmp->end + tmp->ownspace);
1495 if (topdown) {
1496 if (tmp->next->start < hint + length &&
1497 (prev == NULL || tmp->end > prev->end)) {
1498 if (tmp->ownspace >= length)
1499 prev = tmp;
1500 else if ((child = RB_LEFT(tmp, rb_entry))
1501 != NULL && child->space >= length)
1502 prev = tmp;
1503 }
1504 } else {
1505 if (tmp->end >= hint &&
1506 (prev == NULL || tmp->end < prev->end)) {
1507 if (tmp->ownspace >= length)
1508 prev = tmp;
1509 else if ((child = RB_RIGHT(tmp, rb_entry))
1510 != NULL && child->space >= length)
1511 prev = tmp;
1512 }
1513 }
1514 if (tmp->next->start < hint + length)
1515 child = RB_RIGHT(tmp, rb_entry);
1516 else if (tmp->end > hint)
1517 child = RB_LEFT(tmp, rb_entry);
1518 else {
1519 if (tmp->ownspace >= length)
1520 break;
1521 if (topdown)
1522 child = RB_LEFT(tmp, rb_entry);
1523 else
1524 child = RB_RIGHT(tmp, rb_entry);
1525 }
1526 if (child == NULL || child->space < length)
1527 break;
1528 tmp = child;
1529 }
1530
1531 if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) {
1532 /*
1533 * Check if the entry that we found satifies the
1534 * space requirement
1535 */
1536 if (topdown) {
1537 if (hint > tmp->next->start - length)
1538 hint = tmp->next->start - length;
1539 } else {
1540 if (hint < tmp->end)
1541 hint = tmp->end;
1542 }
1543 switch (uvm_map_space_avail(&hint, length, uoffset, align,
1544 topdown, tmp)) {
1545 case 1:
1546 entry = tmp;
1547 goto found;
1548 case -1:
1549 goto wraparound;
1550 }
1551 if (tmp->ownspace >= length)
1552 goto listsearch;
1553 }
1554 if (prev == NULL)
1555 goto notfound;
1556
1557 if (topdown) {
1558 KASSERT(orig_hint >= prev->next->start - length ||
1559 prev->next->start - length > prev->next->start);
1560 hint = prev->next->start - length;
1561 } else {
1562 KASSERT(orig_hint <= prev->end);
1563 hint = prev->end;
1564 }
1565 switch (uvm_map_space_avail(&hint, length, uoffset, align,
1566 topdown, prev)) {
1567 case 1:
1568 entry = prev;
1569 goto found;
1570 case -1:
1571 goto wraparound;
1572 }
1573 if (prev->ownspace >= length)
1574 goto listsearch;
1575
1576 if (topdown)
1577 tmp = RB_LEFT(prev, rb_entry);
1578 else
1579 tmp = RB_RIGHT(prev, rb_entry);
1580 for (;;) {
1581 KASSERT(tmp && tmp->space >= length);
1582 if (topdown)
1583 child = RB_RIGHT(tmp, rb_entry);
1584 else
1585 child = RB_LEFT(tmp, rb_entry);
1586 if (child && child->space >= length) {
1587 tmp = child;
1588 continue;
1589 }
1590 if (tmp->ownspace >= length)
1591 break;
1592 if (topdown)
1593 tmp = RB_LEFT(tmp, rb_entry);
1594 else
1595 tmp = RB_RIGHT(tmp, rb_entry);
1596 }
1597
1598 if (topdown) {
1599 KASSERT(orig_hint >= tmp->next->start - length ||
1600 tmp->next->start - length > tmp->next->start);
1601 hint = tmp->next->start - length;
1602 } else {
1603 KASSERT(orig_hint <= tmp->end);
1604 hint = tmp->end;
1605 }
1606 switch (uvm_map_space_avail(&hint, length, uoffset, align,
1607 topdown, tmp)) {
1608 case 1:
1609 entry = tmp;
1610 goto found;
1611 case -1:
1612 goto wraparound;
1613 }
1614
1615 /*
1616 * The tree fails to find an entry because of offset or alignment
1617 * restrictions. Search the list instead.
1618 */
1619 listsearch:
1620 /*
1621 * Look through the rest of the map, trying to fit a new region in
1622 * the gap between existing regions, or after the very last region.
1623 * note: entry->end = base VA of current gap,
1624 * entry->next->start = VA of end of current gap
1625 */
1626
1627 for (;;) {
1628 /* Update hint for current gap. */
1629 hint = topdown ? entry->next->start - length : entry->end;
1630
1631 /* See if it fits. */
1632 switch (uvm_map_space_avail(&hint, length, uoffset, align,
1633 topdown, entry)) {
1634 case 1:
1635 goto found;
1636 case -1:
1637 goto wraparound;
1638 }
1639
1640 /* Advance to next/previous gap */
1641 if (topdown) {
1642 if (entry == &map->header) {
1643 UVMHIST_LOG(maphist, "<- failed (off start)",
1644 0,0,0,0);
1645 goto notfound;
1646 }
1647 entry = entry->prev;
1648 } else {
1649 entry = entry->next;
1650 if (entry == &map->header) {
1651 UVMHIST_LOG(maphist, "<- failed (off end)",
1652 0,0,0,0);
1653 goto notfound;
1654 }
1655 }
1656 }
1657
1658 found:
1659 SAVE_HINT(map, map->hint, entry);
1660 *result = hint;
1661 UVMHIST_LOG(maphist,"<- got it! (result=0x%x)", hint, 0,0,0);
1662 KASSERT( topdown || hint >= orig_hint);
1663 KASSERT(!topdown || hint <= orig_hint);
1664 KASSERT(entry->end <= hint);
1665 KASSERT(hint + length <= entry->next->start);
1666 return (entry);
1667
1668 wraparound:
1669 UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0);
1670
1671 notfound:
1672 if (align != 0) {
1673 UVMHIST_LOG(maphist, "calling recursively, no align",
1674 0,0,0,0);
1675 return (uvm_map_findspace(map, orig_hint,
1676 length, result, uobj, uoffset, 0, flags));
1677 }
1678 return (NULL);
1679 }
1680
1681 /*
1682 * U N M A P - m a i n h e l p e r f u n c t i o n s
1683 */
1684
1685 /*
1686 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
1687 *
1688 * => caller must check alignment and size
1689 * => map must be locked by caller
1690 * => we return a list of map entries that we've remove from the map
1691 * in "entry_list"
1692 */
1693
1694 void
1695 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
1696 struct vm_map_entry **entry_list /* OUT */)
1697 {
1698 struct vm_map_entry *entry, *first_entry, *next;
1699 vaddr_t len;
1700 UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist);
1701
1702 UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)",
1703 map, start, end, 0);
1704 VM_MAP_RANGE_CHECK(map, start, end);
1705
1706 uvm_tree_sanity(map, "unmap_remove entry");
1707
1708 /*
1709 * find first entry
1710 */
1711
1712 if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {
1713 /* clip and go... */
1714 entry = first_entry;
1715 UVM_MAP_CLIP_START(map, entry, start);
1716 /* critical! prevents stale hint */
1717 SAVE_HINT(map, entry, entry->prev);
1718 } else {
1719 entry = first_entry->next;
1720 }
1721
1722 /*
1723 * Save the free space hint
1724 */
1725
1726 if (map->first_free->start >= start)
1727 map->first_free = entry->prev;
1728
1729 /*
1730 * note: we now re-use first_entry for a different task. we remove
1731 * a number of map entries from the map and save them in a linked
1732 * list headed by "first_entry". once we remove them from the map
1733 * the caller should unlock the map and drop the references to the
1734 * backing objects [c.f. uvm_unmap_detach]. the object is to
1735 * separate unmapping from reference dropping. why?
1736 * [1] the map has to be locked for unmapping
1737 * [2] the map need not be locked for reference dropping
1738 * [3] dropping references may trigger pager I/O, and if we hit
1739 * a pager that does synchronous I/O we may have to wait for it.
1740 * [4] we would like all waiting for I/O to occur with maps unlocked
1741 * so that we don't block other threads.
1742 */
1743
1744 first_entry = NULL;
1745 *entry_list = NULL;
1746
1747 /*
1748 * break up the area into map entry sized regions and unmap. note
1749 * that all mappings have to be removed before we can even consider
1750 * dropping references to amaps or VM objects (otherwise we could end
1751 * up with a mapping to a page on the free list which would be very bad)
1752 */
1753
1754 while ((entry != &map->header) && (entry->start < end)) {
1755 KASSERT((entry->flags & UVM_MAP_FIRST) == 0);
1756
1757 UVM_MAP_CLIP_END(map, entry, end);
1758 next = entry->next;
1759 len = entry->end - entry->start;
1760
1761 /*
1762 * unwire before removing addresses from the pmap; otherwise
1763 * unwiring will put the entries back into the pmap (XXX).
1764 */
1765
1766 if (VM_MAPENT_ISWIRED(entry)) {
1767 uvm_map_entry_unwire(map, entry);
1768 }
1769 if ((map->flags & VM_MAP_PAGEABLE) == 0) {
1770
1771 /*
1772 * if the map is non-pageable, any pages mapped there
1773 * must be wired and entered with pmap_kenter_pa(),
1774 * and we should free any such pages immediately.
1775 * this is mostly used for kmem_map and mb_map.
1776 */
1777
1778 if ((entry->flags & UVM_MAP_KMAPENT) == 0) {
1779 uvm_km_pgremove_intrsafe(entry->start,
1780 entry->end);
1781 pmap_kremove(entry->start, len);
1782 }
1783 } else if (UVM_ET_ISOBJ(entry) &&
1784 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1785 KASSERT(vm_map_pmap(map) == pmap_kernel());
1786
1787 /*
1788 * note: kernel object mappings are currently used in
1789 * two ways:
1790 * [1] "normal" mappings of pages in the kernel object
1791 * [2] uvm_km_valloc'd allocations in which we
1792 * pmap_enter in some non-kernel-object page
1793 * (e.g. vmapbuf).
1794 *
1795 * for case [1], we need to remove the mapping from
1796 * the pmap and then remove the page from the kernel
1797 * object (because, once pages in a kernel object are
1798 * unmapped they are no longer needed, unlike, say,
1799 * a vnode where you might want the data to persist
1800 * until flushed out of a queue).
1801 *
1802 * for case [2], we need to remove the mapping from
1803 * the pmap. there shouldn't be any pages at the
1804 * specified offset in the kernel object [but it
1805 * doesn't hurt to call uvm_km_pgremove just to be
1806 * safe?]
1807 *
1808 * uvm_km_pgremove currently does the following:
1809 * for pages in the kernel object in range:
1810 * - drops the swap slot
1811 * - uvm_pagefree the page
1812 */
1813
1814 /*
1815 * remove mappings from pmap and drop the pages
1816 * from the object. offsets are always relative
1817 * to vm_map_min(kernel_map).
1818 */
1819
1820 pmap_remove(pmap_kernel(), entry->start,
1821 entry->start + len);
1822 uvm_km_pgremove(entry->object.uvm_obj,
1823 entry->start - vm_map_min(kernel_map),
1824 entry->end - vm_map_min(kernel_map));
1825
1826 /*
1827 * null out kernel_object reference, we've just
1828 * dropped it
1829 */
1830
1831 entry->etype &= ~UVM_ET_OBJ;
1832 entry->object.uvm_obj = NULL;
1833 } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) {
1834
1835 /*
1836 * remove mappings the standard way.
1837 */
1838
1839 pmap_remove(map->pmap, entry->start, entry->end);
1840 }
1841
1842 /*
1843 * remove entry from map and put it on our list of entries
1844 * that we've nuked. then go to next entry.
1845 */
1846
1847 UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0);
1848
1849 /* critical! prevents stale hint */
1850 SAVE_HINT(map, entry, entry->prev);
1851
1852 uvm_map_entry_unlink(map, entry);
1853 KASSERT(map->size >= len);
1854 map->size -= len;
1855 entry->prev = NULL;
1856 entry->next = first_entry;
1857 first_entry = entry;
1858 entry = next;
1859 }
1860 if ((map->flags & VM_MAP_DYING) == 0) {
1861 pmap_update(vm_map_pmap(map));
1862 }
1863
1864 uvm_tree_sanity(map, "unmap_remove leave");
1865
1866 /*
1867 * now we've cleaned up the map and are ready for the caller to drop
1868 * references to the mapped objects.
1869 */
1870
1871 *entry_list = first_entry;
1872 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1873 }
1874
1875 /*
1876 * uvm_unmap_detach: drop references in a chain of map entries
1877 *
1878 * => we will free the map entries as we traverse the list.
1879 */
1880
1881 void
1882 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
1883 {
1884 struct vm_map_entry *next_entry;
1885 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
1886
1887 while (first_entry) {
1888 KASSERT(!VM_MAPENT_ISWIRED(first_entry));
1889 UVMHIST_LOG(maphist,
1890 " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d",
1891 first_entry, first_entry->aref.ar_amap,
1892 first_entry->object.uvm_obj,
1893 UVM_ET_ISSUBMAP(first_entry));
1894
1895 /*
1896 * drop reference to amap, if we've got one
1897 */
1898
1899 if (first_entry->aref.ar_amap)
1900 uvm_map_unreference_amap(first_entry, flags);
1901
1902 /*
1903 * drop reference to our backing object, if we've got one
1904 */
1905
1906 KASSERT(!UVM_ET_ISSUBMAP(first_entry));
1907 if (UVM_ET_ISOBJ(first_entry) &&
1908 first_entry->object.uvm_obj->pgops->pgo_detach) {
1909 (*first_entry->object.uvm_obj->pgops->pgo_detach)
1910 (first_entry->object.uvm_obj);
1911 }
1912 next_entry = first_entry->next;
1913 uvm_mapent_free(first_entry);
1914 first_entry = next_entry;
1915 }
1916 UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
1917 }
1918
1919 /*
1920 * E X T R A C T I O N F U N C T I O N S
1921 */
1922
1923 /*
1924 * uvm_map_reserve: reserve space in a vm_map for future use.
1925 *
1926 * => we reserve space in a map by putting a dummy map entry in the
1927 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
1928 * => map should be unlocked (we will write lock it)
1929 * => we return true if we were able to reserve space
1930 * => XXXCDC: should be inline?
1931 */
1932
1933 int
1934 uvm_map_reserve(struct vm_map *map, vsize_t size,
1935 vaddr_t offset /* hint for pmap_prefer */,
1936 vsize_t align /* alignment hint */,
1937 vaddr_t *raddr /* IN:hint, OUT: reserved VA */)
1938 {
1939 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist);
1940
1941 UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)",
1942 map,size,offset,raddr);
1943
1944 size = round_page(size);
1945 if (*raddr < vm_map_min(map))
1946 *raddr = vm_map_min(map); /* hint */
1947
1948 /*
1949 * reserve some virtual space.
1950 */
1951
1952 if (uvm_map(map, raddr, size, NULL, offset, 0,
1953 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
1954 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
1955 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
1956 return (FALSE);
1957 }
1958
1959 UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0);
1960 return (TRUE);
1961 }
1962
1963 /*
1964 * uvm_map_replace: replace a reserved (blank) area of memory with
1965 * real mappings.
1966 *
1967 * => caller must WRITE-LOCK the map
1968 * => we return TRUE if replacement was a success
1969 * => we expect the newents chain to have nnewents entrys on it and
1970 * we expect newents->prev to point to the last entry on the list
1971 * => note newents is allowed to be NULL
1972 */
1973
1974 int
1975 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
1976 struct vm_map_entry *newents, int nnewents)
1977 {
1978 struct vm_map_entry *oldent, *last;
1979
1980 uvm_tree_sanity(map, "map_replace entry");
1981
1982 /*
1983 * first find the blank map entry at the specified address
1984 */
1985
1986 if (!uvm_map_lookup_entry(map, start, &oldent)) {
1987 return (FALSE);
1988 }
1989
1990 /*
1991 * check to make sure we have a proper blank entry
1992 */
1993
1994 if (oldent->start != start || oldent->end != end ||
1995 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
1996 return (FALSE);
1997 }
1998
1999 #ifdef DIAGNOSTIC
2000
2001 /*
2002 * sanity check the newents chain
2003 */
2004
2005 {
2006 struct vm_map_entry *tmpent = newents;
2007 int nent = 0;
2008 vaddr_t cur = start;
2009
2010 while (tmpent) {
2011 nent++;
2012 if (tmpent->start < cur)
2013 panic("uvm_map_replace1");
2014 if (tmpent->start > tmpent->end || tmpent->end > end) {
2015 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n",
2016 tmpent->start, tmpent->end, end);
2017 panic("uvm_map_replace2");
2018 }
2019 cur = tmpent->end;
2020 if (tmpent->next) {
2021 if (tmpent->next->prev != tmpent)
2022 panic("uvm_map_replace3");
2023 } else {
2024 if (newents->prev != tmpent)
2025 panic("uvm_map_replace4");
2026 }
2027 tmpent = tmpent->next;
2028 }
2029 if (nent != nnewents)
2030 panic("uvm_map_replace5");
2031 }
2032 #endif
2033
2034 /*
2035 * map entry is a valid blank! replace it. (this does all the
2036 * work of map entry link/unlink...).
2037 */
2038
2039 if (newents) {
2040 last = newents->prev;
2041
2042 /* critical: flush stale hints out of map */
2043 SAVE_HINT(map, map->hint, newents);
2044 if (map->first_free == oldent)
2045 map->first_free = last;
2046
2047 last->next = oldent->next;
2048 last->next->prev = last;
2049
2050 /* Fix RB tree */
2051 uvm_rb_remove(map, oldent);
2052
2053 newents->prev = oldent->prev;
2054 newents->prev->next = newents;
2055 map->nentries = map->nentries + (nnewents - 1);
2056
2057 /* Fixup the RB tree */
2058 {
2059 int i;
2060 struct vm_map_entry *tmp;
2061
2062 tmp = newents;
2063 for (i = 0; i < nnewents && tmp; i++) {
2064 uvm_rb_insert(map, tmp);
2065 tmp = tmp->next;
2066 }
2067 }
2068 } else {
2069
2070 /* critical: flush stale hints out of map */
2071 SAVE_HINT(map, map->hint, oldent->prev);
2072 if (map->first_free == oldent)
2073 map->first_free = oldent->prev;
2074
2075 /* NULL list of new entries: just remove the old one */
2076 uvm_map_entry_unlink(map, oldent);
2077 }
2078
2079 uvm_tree_sanity(map, "map_replace leave");
2080
2081 /*
2082 * now we can free the old blank entry, unlock the map and return.
2083 */
2084
2085 uvm_mapent_free(oldent);
2086 return (TRUE);
2087 }
2088
2089 /*
2090 * uvm_map_extract: extract a mapping from a map and put it somewhere
2091 * (maybe removing the old mapping)
2092 *
2093 * => maps should be unlocked (we will write lock them)
2094 * => returns 0 on success, error code otherwise
2095 * => start must be page aligned
2096 * => len must be page sized
2097 * => flags:
2098 * UVM_EXTRACT_REMOVE: remove mappings from srcmap
2099 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
2100 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
2101 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
2102 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
2103 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
2104 * be used from within the kernel in a kernel level map <<<
2105 */
2106
2107 int
2108 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
2109 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
2110 {
2111 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge,
2112 oldstart;
2113 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry,
2114 *deadentry, *oldentry;
2115 vsize_t elen;
2116 int nchain, error, copy_ok;
2117 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
2118
2119 UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start,
2120 len,0);
2121 UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0);
2122
2123 uvm_tree_sanity(srcmap, "map_extract src enter");
2124 uvm_tree_sanity(dstmap, "map_extract dst enter");
2125
2126 /*
2127 * step 0: sanity check: start must be on a page boundary, length
2128 * must be page sized. can't ask for CONTIG/QREF if you asked for
2129 * REMOVE.
2130 */
2131
2132 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
2133 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
2134 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);
2135
2136 /*
2137 * step 1: reserve space in the target map for the extracted area
2138 */
2139
2140 dstaddr = vm_map_min(dstmap);
2141 if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE)
2142 return (ENOMEM);
2143 *dstaddrp = dstaddr; /* pass address back to caller */
2144 UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0);
2145
2146 /*
2147 * step 2: setup for the extraction process loop by init'ing the
2148 * map entry chain, locking src map, and looking up the first useful
2149 * entry in the map.
2150 */
2151
2152 end = start + len;
2153 newend = dstaddr + len;
2154 chain = endchain = NULL;
2155 nchain = 0;
2156 vm_map_lock(srcmap);
2157
2158 if (uvm_map_lookup_entry(srcmap, start, &entry)) {
2159
2160 /* "start" is within an entry */
2161 if (flags & UVM_EXTRACT_QREF) {
2162
2163 /*
2164 * for quick references we don't clip the entry, so
2165 * the entry may map space "before" the starting
2166 * virtual address... this is the "fudge" factor
2167 * (which can be non-zero only the first time
2168 * through the "while" loop in step 3).
2169 */
2170
2171 fudge = start - entry->start;
2172 } else {
2173
2174 /*
2175 * normal reference: we clip the map to fit (thus
2176 * fudge is zero)
2177 */
2178
2179 UVM_MAP_CLIP_START(srcmap, entry, start);
2180 SAVE_HINT(srcmap, srcmap->hint, entry->prev);
2181 fudge = 0;
2182 }
2183 } else {
2184
2185 /* "start" is not within an entry ... skip to next entry */
2186 if (flags & UVM_EXTRACT_CONTIG) {
2187 error = EINVAL;
2188 goto bad; /* definite hole here ... */
2189 }
2190
2191 entry = entry->next;
2192 fudge = 0;
2193 }
2194
2195 /* save values from srcmap for step 6 */
2196 orig_entry = entry;
2197 orig_fudge = fudge;
2198
2199 /*
2200 * step 3: now start looping through the map entries, extracting
2201 * as we go.
2202 */
2203
2204 while (entry->start < end && entry != &srcmap->header) {
2205
2206 /* if we are not doing a quick reference, clip it */
2207 if ((flags & UVM_EXTRACT_QREF) == 0)
2208 UVM_MAP_CLIP_END(srcmap, entry, end);
2209
2210 /* clear needs_copy (allow chunking) */
2211 if (UVM_ET_ISNEEDSCOPY(entry)) {
2212 if (fudge)
2213 oldstart = entry->start;
2214 else
2215 oldstart = 0; /* XXX: gcc */
2216 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
2217 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */
2218 error = ENOMEM;
2219 goto bad;
2220 }
2221
2222 /* amap_copy could clip (during chunk)! update fudge */
2223 if (fudge) {
2224 fudge = fudge - (entry->start - oldstart);
2225 orig_fudge = fudge;
2226 }
2227 }
2228
2229 /* calculate the offset of this from "start" */
2230 oldoffset = (entry->start + fudge) - start;
2231
2232 /* allocate a new map entry */
2233 newentry = uvm_mapent_alloc(dstmap, 0);
2234 if (newentry == NULL) {
2235 error = ENOMEM;
2236 goto bad;
2237 }
2238
2239 /* set up new map entry */
2240 newentry->next = NULL;
2241 newentry->prev = endchain;
2242 newentry->start = dstaddr + oldoffset;
2243 newentry->end =
2244 newentry->start + (entry->end - (entry->start + fudge));
2245 if (newentry->end > newend || newentry->end < newentry->start)
2246 newentry->end = newend;
2247 newentry->object.uvm_obj = entry->object.uvm_obj;
2248 if (newentry->object.uvm_obj) {
2249 if (newentry->object.uvm_obj->pgops->pgo_reference)
2250 newentry->object.uvm_obj->pgops->
2251 pgo_reference(newentry->object.uvm_obj);
2252 newentry->offset = entry->offset + fudge;
2253 } else {
2254 newentry->offset = 0;
2255 }
2256 newentry->etype = entry->etype;
2257 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
2258 entry->max_protection : entry->protection;
2259 newentry->max_protection = entry->max_protection;
2260 newentry->inheritance = entry->inheritance;
2261 newentry->wired_count = 0;
2262 newentry->aref.ar_amap = entry->aref.ar_amap;
2263 if (newentry->aref.ar_amap) {
2264 newentry->aref.ar_pageoff =
2265 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
2266 uvm_map_reference_amap(newentry, AMAP_SHARED |
2267 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
2268 } else {
2269 newentry->aref.ar_pageoff = 0;
2270 }
2271 newentry->advice = entry->advice;
2272
2273 /* now link it on the chain */
2274 nchain++;
2275 if (endchain == NULL) {
2276 chain = endchain = newentry;
2277 } else {
2278 endchain->next = newentry;
2279 endchain = newentry;
2280 }
2281
2282 /* end of 'while' loop! */
2283 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
2284 (entry->next == &srcmap->header ||
2285 entry->next->start != entry->end)) {
2286 error = EINVAL;
2287 goto bad;
2288 }
2289 entry = entry->next;
2290 fudge = 0;
2291 }
2292
2293 /*
2294 * step 4: close off chain (in format expected by uvm_map_replace)
2295 */
2296
2297 if (chain)
2298 chain->prev = endchain;
2299
2300 /*
2301 * step 5: attempt to lock the dest map so we can pmap_copy.
2302 * note usage of copy_ok:
2303 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
2304 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
2305 */
2306
2307 if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) {
2308 copy_ok = 1;
2309 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
2310 nchain)) {
2311 if (srcmap != dstmap)
2312 vm_map_unlock(dstmap);
2313 error = EIO;
2314 goto bad;
2315 }
2316 } else {
2317 copy_ok = 0;
2318 /* replace defered until step 7 */
2319 }
2320
2321 /*
2322 * step 6: traverse the srcmap a second time to do the following:
2323 * - if we got a lock on the dstmap do pmap_copy
2324 * - if UVM_EXTRACT_REMOVE remove the entries
2325 * we make use of orig_entry and orig_fudge (saved in step 2)
2326 */
2327
2328 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
2329
2330 /* purge possible stale hints from srcmap */
2331 if (flags & UVM_EXTRACT_REMOVE) {
2332 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
2333 if (srcmap->first_free->start >= start)
2334 srcmap->first_free = orig_entry->prev;
2335 }
2336
2337 entry = orig_entry;
2338 fudge = orig_fudge;
2339 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */
2340
2341 while (entry->start < end && entry != &srcmap->header) {
2342 if (copy_ok) {
2343 oldoffset = (entry->start + fudge) - start;
2344 elen = MIN(end, entry->end) -
2345 (entry->start + fudge);
2346 pmap_copy(dstmap->pmap, srcmap->pmap,
2347 dstaddr + oldoffset, elen,
2348 entry->start + fudge);
2349 }
2350
2351 /* we advance "entry" in the following if statement */
2352 if (flags & UVM_EXTRACT_REMOVE) {
2353 pmap_remove(srcmap->pmap, entry->start,
2354 entry->end);
2355 oldentry = entry; /* save entry */
2356 entry = entry->next; /* advance */
2357 uvm_map_entry_unlink(srcmap, oldentry);
2358 /* add to dead list */
2359 oldentry->next = deadentry;
2360 deadentry = oldentry;
2361 } else {
2362 entry = entry->next; /* advance */
2363 }
2364
2365 /* end of 'while' loop */
2366 fudge = 0;
2367 }
2368 pmap_update(srcmap->pmap);
2369
2370 /*
2371 * unlock dstmap. we will dispose of deadentry in
2372 * step 7 if needed
2373 */
2374
2375 if (copy_ok && srcmap != dstmap)
2376 vm_map_unlock(dstmap);
2377
2378 } else {
2379 deadentry = NULL;
2380 }
2381
2382 /*
2383 * step 7: we are done with the source map, unlock. if copy_ok
2384 * is 0 then we have not replaced the dummy mapping in dstmap yet
2385 * and we need to do so now.
2386 */
2387
2388 vm_map_unlock(srcmap);
2389 if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
2390 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */
2391
2392 /* now do the replacement if we didn't do it in step 5 */
2393 if (copy_ok == 0) {
2394 vm_map_lock(dstmap);
2395 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
2396 nchain);
2397 vm_map_unlock(dstmap);
2398
2399 if (error == FALSE) {
2400 error = EIO;
2401 goto bad2;
2402 }
2403 }
2404
2405 uvm_tree_sanity(srcmap, "map_extract src leave");
2406 uvm_tree_sanity(dstmap, "map_extract dst leave");
2407
2408 return (0);
2409
2410 /*
2411 * bad: failure recovery
2412 */
2413 bad:
2414 vm_map_unlock(srcmap);
2415 bad2: /* src already unlocked */
2416 if (chain)
2417 uvm_unmap_detach(chain,
2418 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
2419
2420 uvm_tree_sanity(srcmap, "map_extract src err leave");
2421 uvm_tree_sanity(dstmap, "map_extract dst err leave");
2422
2423 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */
2424 return (error);
2425 }
2426
2427 /* end of extraction functions */
2428
2429 /*
2430 * uvm_map_submap: punch down part of a map into a submap
2431 *
2432 * => only the kernel_map is allowed to be submapped
2433 * => the purpose of submapping is to break up the locking granularity
2434 * of a larger map
2435 * => the range specified must have been mapped previously with a uvm_map()
2436 * call [with uobj==NULL] to create a blank map entry in the main map.
2437 * [And it had better still be blank!]
2438 * => maps which contain submaps should never be copied or forked.
2439 * => to remove a submap, use uvm_unmap() on the main map
2440 * and then uvm_map_deallocate() the submap.
2441 * => main map must be unlocked.
2442 * => submap must have been init'd and have a zero reference count.
2443 * [need not be locked as we don't actually reference it]
2444 */
2445
2446 int
2447 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
2448 struct vm_map *submap)
2449 {
2450 struct vm_map_entry *entry;
2451 int error;
2452
2453 vm_map_lock(map);
2454 VM_MAP_RANGE_CHECK(map, start, end);
2455
2456 if (uvm_map_lookup_entry(map, start, &entry)) {
2457 UVM_MAP_CLIP_START(map, entry, start);
2458 UVM_MAP_CLIP_END(map, entry, end); /* to be safe */
2459 } else {
2460 entry = NULL;
2461 }
2462
2463 if (entry != NULL &&
2464 entry->start == start && entry->end == end &&
2465 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
2466 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
2467 entry->etype |= UVM_ET_SUBMAP;
2468 entry->object.sub_map = submap;
2469 entry->offset = 0;
2470 uvm_map_reference(submap);
2471 error = 0;
2472 } else {
2473 error = EINVAL;
2474 }
2475 vm_map_unlock(map);
2476 return error;
2477 }
2478
2479
2480 /*
2481 * uvm_map_protect: change map protection
2482 *
2483 * => set_max means set max_protection.
2484 * => map must be unlocked.
2485 */
2486
2487 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \
2488 ~VM_PROT_WRITE : VM_PROT_ALL)
2489
2490 int
2491 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
2492 vm_prot_t new_prot, boolean_t set_max)
2493 {
2494 struct vm_map_entry *current, *entry;
2495 int error = 0;
2496 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
2497 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)",
2498 map, start, end, new_prot);
2499
2500 vm_map_lock(map);
2501 VM_MAP_RANGE_CHECK(map, start, end);
2502 if (uvm_map_lookup_entry(map, start, &entry)) {
2503 UVM_MAP_CLIP_START(map, entry, start);
2504 } else {
2505 entry = entry->next;
2506 }
2507
2508 /*
2509 * make a first pass to check for protection violations.
2510 */
2511
2512 current = entry;
2513 while ((current != &map->header) && (current->start < end)) {
2514 if (UVM_ET_ISSUBMAP(current)) {
2515 error = EINVAL;
2516 goto out;
2517 }
2518 if ((new_prot & current->max_protection) != new_prot) {
2519 error = EACCES;
2520 goto out;
2521 }
2522 /*
2523 * Don't allow VM_PROT_EXECUTE to be set on entries that
2524 * point to vnodes that are associated with a NOEXEC file
2525 * system.
2526 */
2527 if (UVM_ET_ISOBJ(current) &&
2528 UVM_OBJ_IS_VNODE(current->object.uvm_obj)) {
2529 struct vnode *vp =
2530 (struct vnode *) current->object.uvm_obj;
2531
2532 if ((new_prot & VM_PROT_EXECUTE) != 0 &&
2533 (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) {
2534 error = EACCES;
2535 goto out;
2536 }
2537 }
2538 current = current->next;
2539 }
2540
2541 /* go back and fix up protections (no need to clip this time). */
2542
2543 current = entry;
2544 while ((current != &map->header) && (current->start < end)) {
2545 vm_prot_t old_prot;
2546
2547 UVM_MAP_CLIP_END(map, current, end);
2548 old_prot = current->protection;
2549 if (set_max)
2550 current->protection =
2551 (current->max_protection = new_prot) & old_prot;
2552 else
2553 current->protection = new_prot;
2554
2555 /*
2556 * update physical map if necessary. worry about copy-on-write
2557 * here -- CHECK THIS XXX
2558 */
2559
2560 if (current->protection != old_prot) {
2561 /* update pmap! */
2562 pmap_protect(map->pmap, current->start, current->end,
2563 current->protection & MASK(entry));
2564
2565 /*
2566 * If this entry points at a vnode, and the
2567 * protection includes VM_PROT_EXECUTE, mark
2568 * the vnode as VEXECMAP.
2569 */
2570 if (UVM_ET_ISOBJ(current)) {
2571 struct uvm_object *uobj =
2572 current->object.uvm_obj;
2573
2574 if (UVM_OBJ_IS_VNODE(uobj) &&
2575 (current->protection & VM_PROT_EXECUTE))
2576 vn_markexec((struct vnode *) uobj);
2577 }
2578 }
2579
2580 /*
2581 * If the map is configured to lock any future mappings,
2582 * wire this entry now if the old protection was VM_PROT_NONE
2583 * and the new protection is not VM_PROT_NONE.
2584 */
2585
2586 if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
2587 VM_MAPENT_ISWIRED(entry) == 0 &&
2588 old_prot == VM_PROT_NONE &&
2589 new_prot != VM_PROT_NONE) {
2590 if (uvm_map_pageable(map, entry->start,
2591 entry->end, FALSE,
2592 UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
2593
2594 /*
2595 * If locking the entry fails, remember the
2596 * error if it's the first one. Note we
2597 * still continue setting the protection in
2598 * the map, but will return the error
2599 * condition regardless.
2600 *
2601 * XXX Ignore what the actual error is,
2602 * XXX just call it a resource shortage
2603 * XXX so that it doesn't get confused
2604 * XXX what uvm_map_protect() itself would
2605 * XXX normally return.
2606 */
2607
2608 error = ENOMEM;
2609 }
2610 }
2611 current = current->next;
2612 }
2613 pmap_update(map->pmap);
2614
2615 out:
2616 vm_map_unlock(map);
2617 UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0);
2618 return error;
2619 }
2620
2621 #undef MASK
2622
2623 /*
2624 * uvm_map_inherit: set inheritance code for range of addrs in map.
2625 *
2626 * => map must be unlocked
2627 * => note that the inherit code is used during a "fork". see fork
2628 * code for details.
2629 */
2630
2631 int
2632 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
2633 vm_inherit_t new_inheritance)
2634 {
2635 struct vm_map_entry *entry, *temp_entry;
2636 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
2637 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)",
2638 map, start, end, new_inheritance);
2639
2640 switch (new_inheritance) {
2641 case MAP_INHERIT_NONE:
2642 case MAP_INHERIT_COPY:
2643 case MAP_INHERIT_SHARE:
2644 break;
2645 default:
2646 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
2647 return EINVAL;
2648 }
2649
2650 vm_map_lock(map);
2651 VM_MAP_RANGE_CHECK(map, start, end);
2652 if (uvm_map_lookup_entry(map, start, &temp_entry)) {
2653 entry = temp_entry;
2654 UVM_MAP_CLIP_START(map, entry, start);
2655 } else {
2656 entry = temp_entry->next;
2657 }
2658 while ((entry != &map->header) && (entry->start < end)) {
2659 UVM_MAP_CLIP_END(map, entry, end);
2660 entry->inheritance = new_inheritance;
2661 entry = entry->next;
2662 }
2663 vm_map_unlock(map);
2664 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
2665 return 0;
2666 }
2667
2668 /*
2669 * uvm_map_advice: set advice code for range of addrs in map.
2670 *
2671 * => map must be unlocked
2672 */
2673
2674 int
2675 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
2676 {
2677 struct vm_map_entry *entry, *temp_entry;
2678 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
2679 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)",
2680 map, start, end, new_advice);
2681
2682 vm_map_lock(map);
2683 VM_MAP_RANGE_CHECK(map, start, end);
2684 if (uvm_map_lookup_entry(map, start, &temp_entry)) {
2685 entry = temp_entry;
2686 UVM_MAP_CLIP_START(map, entry, start);
2687 } else {
2688 entry = temp_entry->next;
2689 }
2690
2691 /*
2692 * XXXJRT: disallow holes?
2693 */
2694
2695 while ((entry != &map->header) && (entry->start < end)) {
2696 UVM_MAP_CLIP_END(map, entry, end);
2697
2698 switch (new_advice) {
2699 case MADV_NORMAL:
2700 case MADV_RANDOM:
2701 case MADV_SEQUENTIAL:
2702 /* nothing special here */
2703 break;
2704
2705 default:
2706 vm_map_unlock(map);
2707 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
2708 return EINVAL;
2709 }
2710 entry->advice = new_advice;
2711 entry = entry->next;
2712 }
2713
2714 vm_map_unlock(map);
2715 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
2716 return 0;
2717 }
2718
2719 /*
2720 * uvm_map_pageable: sets the pageability of a range in a map.
2721 *
2722 * => wires map entries. should not be used for transient page locking.
2723 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
2724 * => regions sepcified as not pageable require lock-down (wired) memory
2725 * and page tables.
2726 * => map must never be read-locked
2727 * => if islocked is TRUE, map is already write-locked
2728 * => we always unlock the map, since we must downgrade to a read-lock
2729 * to call uvm_fault_wire()
2730 * => XXXCDC: check this and try and clean it up.
2731 */
2732
2733 int
2734 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2735 boolean_t new_pageable, int lockflags)
2736 {
2737 struct vm_map_entry *entry, *start_entry, *failed_entry;
2738 int rv;
2739 #ifdef DIAGNOSTIC
2740 u_int timestamp_save;
2741 #endif
2742 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
2743 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)",
2744 map, start, end, new_pageable);
2745 KASSERT(map->flags & VM_MAP_PAGEABLE);
2746
2747 if ((lockflags & UVM_LK_ENTER) == 0)
2748 vm_map_lock(map);
2749 VM_MAP_RANGE_CHECK(map, start, end);
2750
2751 /*
2752 * only one pageability change may take place at one time, since
2753 * uvm_fault_wire assumes it will be called only once for each
2754 * wiring/unwiring. therefore, we have to make sure we're actually
2755 * changing the pageability for the entire region. we do so before
2756 * making any changes.
2757 */
2758
2759 if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {
2760 if ((lockflags & UVM_LK_EXIT) == 0)
2761 vm_map_unlock(map);
2762
2763 UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0);
2764 return EFAULT;
2765 }
2766 entry = start_entry;
2767
2768 /*
2769 * handle wiring and unwiring separately.
2770 */
2771
2772 if (new_pageable) { /* unwire */
2773 UVM_MAP_CLIP_START(map, entry, start);
2774
2775 /*
2776 * unwiring. first ensure that the range to be unwired is
2777 * really wired down and that there are no holes.
2778 */
2779
2780 while ((entry != &map->header) && (entry->start < end)) {
2781 if (entry->wired_count == 0 ||
2782 (entry->end < end &&
2783 (entry->next == &map->header ||
2784 entry->next->start > entry->end))) {
2785 if ((lockflags & UVM_LK_EXIT) == 0)
2786 vm_map_unlock(map);
2787 UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0);
2788 return EINVAL;
2789 }
2790 entry = entry->next;
2791 }
2792
2793 /*
2794 * POSIX 1003.1b - a single munlock call unlocks a region,
2795 * regardless of the number of mlock calls made on that
2796 * region.
2797 */
2798
2799 entry = start_entry;
2800 while ((entry != &map->header) && (entry->start < end)) {
2801 UVM_MAP_CLIP_END(map, entry, end);
2802 if (VM_MAPENT_ISWIRED(entry))
2803 uvm_map_entry_unwire(map, entry);
2804 entry = entry->next;
2805 }
2806 if ((lockflags & UVM_LK_EXIT) == 0)
2807 vm_map_unlock(map);
2808 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
2809 return 0;
2810 }
2811
2812 /*
2813 * wire case: in two passes [XXXCDC: ugly block of code here]
2814 *
2815 * 1: holding the write lock, we create any anonymous maps that need
2816 * to be created. then we clip each map entry to the region to
2817 * be wired and increment its wiring count.
2818 *
2819 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2820 * in the pages for any newly wired area (wired_count == 1).
2821 *
2822 * downgrading to a read lock for uvm_fault_wire avoids a possible
2823 * deadlock with another thread that may have faulted on one of
2824 * the pages to be wired (it would mark the page busy, blocking
2825 * us, then in turn block on the map lock that we hold). because
2826 * of problems in the recursive lock package, we cannot upgrade
2827 * to a write lock in vm_map_lookup. thus, any actions that
2828 * require the write lock must be done beforehand. because we
2829 * keep the read lock on the map, the copy-on-write status of the
2830 * entries we modify here cannot change.
2831 */
2832
2833 while ((entry != &map->header) && (entry->start < end)) {
2834 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
2835
2836 /*
2837 * perform actions of vm_map_lookup that need the
2838 * write lock on the map: create an anonymous map
2839 * for a copy-on-write region, or an anonymous map
2840 * for a zero-fill region. (XXXCDC: submap case
2841 * ok?)
2842 */
2843
2844 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
2845 if (UVM_ET_ISNEEDSCOPY(entry) &&
2846 ((entry->max_protection & VM_PROT_WRITE) ||
2847 (entry->object.uvm_obj == NULL))) {
2848 amap_copy(map, entry, M_WAITOK, TRUE,
2849 start, end);
2850 /* XXXCDC: wait OK? */
2851 }
2852 }
2853 }
2854 UVM_MAP_CLIP_START(map, entry, start);
2855 UVM_MAP_CLIP_END(map, entry, end);
2856 entry->wired_count++;
2857
2858 /*
2859 * Check for holes
2860 */
2861
2862 if (entry->protection == VM_PROT_NONE ||
2863 (entry->end < end &&
2864 (entry->next == &map->header ||
2865 entry->next->start > entry->end))) {
2866
2867 /*
2868 * found one. amap creation actions do not need to
2869 * be undone, but the wired counts need to be restored.
2870 */
2871
2872 while (entry != &map->header && entry->end > start) {
2873 entry->wired_count--;
2874 entry = entry->prev;
2875 }
2876 if ((lockflags & UVM_LK_EXIT) == 0)
2877 vm_map_unlock(map);
2878 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
2879 return EINVAL;
2880 }
2881 entry = entry->next;
2882 }
2883
2884 /*
2885 * Pass 2.
2886 */
2887
2888 #ifdef DIAGNOSTIC
2889 timestamp_save = map->timestamp;
2890 #endif
2891 vm_map_busy(map);
2892 vm_map_downgrade(map);
2893
2894 rv = 0;
2895 entry = start_entry;
2896 while (entry != &map->header && entry->start < end) {
2897 if (entry->wired_count == 1) {
2898 rv = uvm_fault_wire(map, entry->start, entry->end,
2899 VM_FAULT_WIREMAX, entry->max_protection);
2900 if (rv) {
2901
2902 /*
2903 * wiring failed. break out of the loop.
2904 * we'll clean up the map below, once we
2905 * have a write lock again.
2906 */
2907
2908 break;
2909 }
2910 }
2911 entry = entry->next;
2912 }
2913
2914 if (rv) { /* failed? */
2915
2916 /*
2917 * Get back to an exclusive (write) lock.
2918 */
2919
2920 vm_map_upgrade(map);
2921 vm_map_unbusy(map);
2922
2923 #ifdef DIAGNOSTIC
2924 if (timestamp_save != map->timestamp)
2925 panic("uvm_map_pageable: stale map");
2926 #endif
2927
2928 /*
2929 * first drop the wiring count on all the entries
2930 * which haven't actually been wired yet.
2931 */
2932
2933 failed_entry = entry;
2934 while (entry != &map->header && entry->start < end) {
2935 entry->wired_count--;
2936 entry = entry->next;
2937 }
2938
2939 /*
2940 * now, unwire all the entries that were successfully
2941 * wired above.
2942 */
2943
2944 entry = start_entry;
2945 while (entry != failed_entry) {
2946 entry->wired_count--;
2947 if (VM_MAPENT_ISWIRED(entry) == 0)
2948 uvm_map_entry_unwire(map, entry);
2949 entry = entry->next;
2950 }
2951 if ((lockflags & UVM_LK_EXIT) == 0)
2952 vm_map_unlock(map);
2953 UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
2954 return (rv);
2955 }
2956
2957 /* We are holding a read lock here. */
2958 if ((lockflags & UVM_LK_EXIT) == 0) {
2959 vm_map_unbusy(map);
2960 vm_map_unlock_read(map);
2961 } else {
2962
2963 /*
2964 * Get back to an exclusive (write) lock.
2965 */
2966
2967 vm_map_upgrade(map);
2968 vm_map_unbusy(map);
2969 }
2970
2971 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
2972 return 0;
2973 }
2974
2975 /*
2976 * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2977 * all mapped regions.
2978 *
2979 * => map must not be locked.
2980 * => if no flags are specified, all regions are unwired.
2981 * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
2982 */
2983
2984 int
2985 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2986 {
2987 struct vm_map_entry *entry, *failed_entry;
2988 vsize_t size;
2989 int rv;
2990 #ifdef DIAGNOSTIC
2991 u_int timestamp_save;
2992 #endif
2993 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist);
2994 UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0);
2995
2996 KASSERT(map->flags & VM_MAP_PAGEABLE);
2997
2998 vm_map_lock(map);
2999
3000 /*
3001 * handle wiring and unwiring separately.
3002 */
3003
3004 if (flags == 0) { /* unwire */
3005
3006 /*
3007 * POSIX 1003.1b -- munlockall unlocks all regions,
3008 * regardless of how many times mlockall has been called.
3009 */
3010
3011 for (entry = map->header.next; entry != &map->header;
3012 entry = entry->next) {
3013 if (VM_MAPENT_ISWIRED(entry))
3014 uvm_map_entry_unwire(map, entry);
3015 }
3016 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3017 vm_map_unlock(map);
3018 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
3019 return 0;
3020 }
3021
3022 if (flags & MCL_FUTURE) {
3023
3024 /*
3025 * must wire all future mappings; remember this.
3026 */
3027
3028 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
3029 }
3030
3031 if ((flags & MCL_CURRENT) == 0) {
3032
3033 /*
3034 * no more work to do!
3035 */
3036
3037 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
3038 vm_map_unlock(map);
3039 return 0;
3040 }
3041
3042 /*
3043 * wire case: in three passes [XXXCDC: ugly block of code here]
3044 *
3045 * 1: holding the write lock, count all pages mapped by non-wired
3046 * entries. if this would cause us to go over our limit, we fail.
3047 *
3048 * 2: still holding the write lock, we create any anonymous maps that
3049 * need to be created. then we increment its wiring count.
3050 *
3051 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
3052 * in the pages for any newly wired area (wired_count == 1).
3053 *
3054 * downgrading to a read lock for uvm_fault_wire avoids a possible
3055 * deadlock with another thread that may have faulted on one of
3056 * the pages to be wired (it would mark the page busy, blocking
3057 * us, then in turn block on the map lock that we hold). because
3058 * of problems in the recursive lock package, we cannot upgrade
3059 * to a write lock in vm_map_lookup. thus, any actions that
3060 * require the write lock must be done beforehand. because we
3061 * keep the read lock on the map, the copy-on-write status of the
3062 * entries we modify here cannot change.
3063 */
3064
3065 for (size = 0, entry = map->header.next; entry != &map->header;
3066 entry = entry->next) {
3067 if (entry->protection != VM_PROT_NONE &&
3068 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
3069 size += entry->end - entry->start;
3070 }
3071 }
3072
3073 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
3074 vm_map_unlock(map);
3075 return ENOMEM;
3076 }
3077
3078 /* XXX non-pmap_wired_count case must be handled by caller */
3079 #ifdef pmap_wired_count
3080 if (limit != 0 &&
3081 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
3082 vm_map_unlock(map);
3083 return ENOMEM;
3084 }
3085 #endif
3086
3087 /*
3088 * Pass 2.
3089 */
3090
3091 for (entry = map->header.next; entry != &map->header;
3092 entry = entry->next) {
3093 if (entry->protection == VM_PROT_NONE)
3094 continue;
3095 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
3096
3097 /*
3098 * perform actions of vm_map_lookup that need the
3099 * write lock on the map: create an anonymous map
3100 * for a copy-on-write region, or an anonymous map
3101 * for a zero-fill region. (XXXCDC: submap case
3102 * ok?)
3103 */
3104
3105 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
3106 if (UVM_ET_ISNEEDSCOPY(entry) &&
3107 ((entry->max_protection & VM_PROT_WRITE) ||
3108 (entry->object.uvm_obj == NULL))) {
3109 amap_copy(map, entry, M_WAITOK, TRUE,
3110 entry->start, entry->end);
3111 /* XXXCDC: wait OK? */
3112 }
3113 }
3114 }
3115 entry->wired_count++;
3116 }
3117
3118 /*
3119 * Pass 3.
3120 */
3121
3122 #ifdef DIAGNOSTIC
3123 timestamp_save = map->timestamp;
3124 #endif
3125 vm_map_busy(map);
3126 vm_map_downgrade(map);
3127
3128 rv = 0;
3129 for (entry = map->header.next; entry != &map->header;
3130 entry = entry->next) {
3131 if (entry->wired_count == 1) {
3132 rv = uvm_fault_wire(map, entry->start, entry->end,
3133 VM_FAULT_WIREMAX, entry->max_protection);
3134 if (rv) {
3135
3136 /*
3137 * wiring failed. break out of the loop.
3138 * we'll clean up the map below, once we
3139 * have a write lock again.
3140 */
3141
3142 break;
3143 }
3144 }
3145 }
3146
3147 if (rv) {
3148
3149 /*
3150 * Get back an exclusive (write) lock.
3151 */
3152
3153 vm_map_upgrade(map);
3154 vm_map_unbusy(map);
3155
3156 #ifdef DIAGNOSTIC
3157 if (timestamp_save != map->timestamp)
3158 panic("uvm_map_pageable_all: stale map");
3159 #endif
3160
3161 /*
3162 * first drop the wiring count on all the entries
3163 * which haven't actually been wired yet.
3164 *
3165 * Skip VM_PROT_NONE entries like we did above.
3166 */
3167
3168 failed_entry = entry;
3169 for (/* nothing */; entry != &map->header;
3170 entry = entry->next) {
3171 if (entry->protection == VM_PROT_NONE)
3172 continue;
3173 entry->wired_count--;
3174 }
3175
3176 /*
3177 * now, unwire all the entries that were successfully
3178 * wired above.
3179 *
3180 * Skip VM_PROT_NONE entries like we did above.
3181 */
3182
3183 for (entry = map->header.next; entry != failed_entry;
3184 entry = entry->next) {
3185 if (entry->protection == VM_PROT_NONE)
3186 continue;
3187 entry->wired_count--;
3188 if (VM_MAPENT_ISWIRED(entry))
3189 uvm_map_entry_unwire(map, entry);
3190 }
3191 vm_map_unlock(map);
3192 UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0);
3193 return (rv);
3194 }
3195
3196 /* We are holding a read lock here. */
3197 vm_map_unbusy(map);
3198 vm_map_unlock_read(map);
3199
3200 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
3201 return 0;
3202 }
3203
3204 /*
3205 * uvm_map_clean: clean out a map range
3206 *
3207 * => valid flags:
3208 * if (flags & PGO_CLEANIT): dirty pages are cleaned first
3209 * if (flags & PGO_SYNCIO): dirty pages are written synchronously
3210 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
3211 * if (flags & PGO_FREE): any cached pages are freed after clean
3212 * => returns an error if any part of the specified range isn't mapped
3213 * => never a need to flush amap layer since the anonymous memory has
3214 * no permanent home, but may deactivate pages there
3215 * => called from sys_msync() and sys_madvise()
3216 * => caller must not write-lock map (read OK).
3217 * => we may sleep while cleaning if SYNCIO [with map read-locked]
3218 */
3219
3220 int
3221 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
3222 {
3223 struct vm_map_entry *current, *entry;
3224 struct uvm_object *uobj;
3225 struct vm_amap *amap;
3226 struct vm_anon *anon;
3227 struct vm_page *pg;
3228 vaddr_t offset;
3229 vsize_t size;
3230 int error, refs;
3231 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist);
3232
3233 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)",
3234 map, start, end, flags);
3235 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
3236 (PGO_FREE|PGO_DEACTIVATE));
3237
3238 vm_map_lock_read(map);
3239 VM_MAP_RANGE_CHECK(map, start, end);
3240 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
3241 vm_map_unlock_read(map);
3242 return EFAULT;
3243 }
3244
3245 /*
3246 * Make a first pass to check for holes.
3247 */
3248
3249 for (current = entry; current->start < end; current = current->next) {
3250 if (UVM_ET_ISSUBMAP(current)) {
3251 vm_map_unlock_read(map);
3252 return EINVAL;
3253 }
3254 if (end <= current->end) {
3255 break;
3256 }
3257 if (current->end != current->next->start) {
3258 vm_map_unlock_read(map);
3259 return EFAULT;
3260 }
3261 }
3262
3263 error = 0;
3264 for (current = entry; start < end; current = current->next) {
3265 amap = current->aref.ar_amap; /* top layer */
3266 uobj = current->object.uvm_obj; /* bottom layer */
3267 KASSERT(start >= current->start);
3268
3269 /*
3270 * No amap cleaning necessary if:
3271 *
3272 * (1) There's no amap.
3273 *
3274 * (2) We're not deactivating or freeing pages.
3275 */
3276
3277 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
3278 goto flush_object;
3279
3280 amap_lock(amap);
3281 offset = start - current->start;
3282 size = MIN(end, current->end) - start;
3283 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
3284 anon = amap_lookup(¤t->aref, offset);
3285 if (anon == NULL)
3286 continue;
3287
3288 simple_lock(&anon->an_lock);
3289 pg = anon->u.an_page;
3290 if (pg == NULL) {
3291 simple_unlock(&anon->an_lock);
3292 continue;
3293 }
3294
3295 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
3296
3297 /*
3298 * In these first 3 cases, we just deactivate the page.
3299 */
3300
3301 case PGO_CLEANIT|PGO_FREE:
3302 case PGO_CLEANIT|PGO_DEACTIVATE:
3303 case PGO_DEACTIVATE:
3304 deactivate_it:
3305 /*
3306 * skip the page if it's loaned or wired,
3307 * since it shouldn't be on a paging queue
3308 * at all in these cases.
3309 */
3310
3311 uvm_lock_pageq();
3312 if (pg->loan_count != 0 ||
3313 pg->wire_count != 0) {
3314 uvm_unlock_pageq();
3315 simple_unlock(&anon->an_lock);
3316 continue;
3317 }
3318 KASSERT(pg->uanon == anon);
3319 pmap_clear_reference(pg);
3320 uvm_pagedeactivate(pg);
3321 uvm_unlock_pageq();
3322 simple_unlock(&anon->an_lock);
3323 continue;
3324
3325 case PGO_FREE:
3326
3327 /*
3328 * If there are multiple references to
3329 * the amap, just deactivate the page.
3330 */
3331
3332 if (amap_refs(amap) > 1)
3333 goto deactivate_it;
3334
3335 /* skip the page if it's wired */
3336 if (pg->wire_count != 0) {
3337 simple_unlock(&anon->an_lock);
3338 continue;
3339 }
3340 amap_unadd(¤t->aref, offset);
3341 refs = --anon->an_ref;
3342 simple_unlock(&anon->an_lock);
3343 if (refs == 0)
3344 uvm_anfree(anon);
3345 continue;
3346 }
3347 }
3348 amap_unlock(amap);
3349
3350 flush_object:
3351 /*
3352 * flush pages if we've got a valid backing object.
3353 * note that we must always clean object pages before
3354 * freeing them since otherwise we could reveal stale
3355 * data from files.
3356 */
3357
3358 offset = current->offset + (start - current->start);
3359 size = MIN(end, current->end) - start;
3360 if (uobj != NULL) {
3361 simple_lock(&uobj->vmobjlock);
3362 if (uobj->pgops->pgo_put != NULL)
3363 error = (uobj->pgops->pgo_put)(uobj, offset,
3364 offset + size, flags | PGO_CLEANIT);
3365 else
3366 error = 0;
3367 }
3368 start += size;
3369 }
3370 vm_map_unlock_read(map);
3371 return (error);
3372 }
3373
3374
3375 /*
3376 * uvm_map_checkprot: check protection in map
3377 *
3378 * => must allow specified protection in a fully allocated region.
3379 * => map must be read or write locked by caller.
3380 */
3381
3382 boolean_t
3383 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
3384 vm_prot_t protection)
3385 {
3386 struct vm_map_entry *entry;
3387 struct vm_map_entry *tmp_entry;
3388
3389 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
3390 return (FALSE);
3391 }
3392 entry = tmp_entry;
3393 while (start < end) {
3394 if (entry == &map->header) {
3395 return (FALSE);
3396 }
3397
3398 /*
3399 * no holes allowed
3400 */
3401
3402 if (start < entry->start) {
3403 return (FALSE);
3404 }
3405
3406 /*
3407 * check protection associated with entry
3408 */
3409
3410 if ((entry->protection & protection) != protection) {
3411 return (FALSE);
3412 }
3413 start = entry->end;
3414 entry = entry->next;
3415 }
3416 return (TRUE);
3417 }
3418
3419 /*
3420 * uvmspace_alloc: allocate a vmspace structure.
3421 *
3422 * - structure includes vm_map and pmap
3423 * - XXX: no locking on this structure
3424 * - refcnt set to 1, rest must be init'd by caller
3425 */
3426 struct vmspace *
3427 uvmspace_alloc(vaddr_t min, vaddr_t max)
3428 {
3429 struct vmspace *vm;
3430 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
3431
3432 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK);
3433 uvmspace_init(vm, NULL, min, max);
3434 UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0);
3435 return (vm);
3436 }
3437
3438 /*
3439 * uvmspace_init: initialize a vmspace structure.
3440 *
3441 * - XXX: no locking on this structure
3442 * - refcnt set to 1, rest must be init'd by caller
3443 */
3444 void
3445 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max)
3446 {
3447 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
3448
3449 memset(vm, 0, sizeof(*vm));
3450 uvm_map_setup(&vm->vm_map, min, max, VM_MAP_PAGEABLE
3451 #ifdef __USING_TOPDOWN_VM
3452 | VM_MAP_TOPDOWN
3453 #endif
3454 );
3455 if (pmap)
3456 pmap_reference(pmap);
3457 else
3458 pmap = pmap_create();
3459 vm->vm_map.pmap = pmap;
3460 vm->vm_refcnt = 1;
3461 UVMHIST_LOG(maphist,"<- done",0,0,0,0);
3462 }
3463
3464 /*
3465 * uvmspace_share: share a vmspace between two proceses
3466 *
3467 * - XXX: no locking on vmspace
3468 * - used for vfork, threads(?)
3469 */
3470
3471 void
3472 uvmspace_share(struct proc *p1, struct proc *p2)
3473 {
3474
3475 p2->p_vmspace = p1->p_vmspace;
3476 p1->p_vmspace->vm_refcnt++;
3477 }
3478
3479 /*
3480 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
3481 *
3482 * - XXX: no locking on vmspace
3483 */
3484
3485 void
3486 uvmspace_unshare(struct lwp *l)
3487 {
3488 struct proc *p = l->l_proc;
3489 struct vmspace *nvm, *ovm = p->p_vmspace;
3490
3491 if (ovm->vm_refcnt == 1)
3492 /* nothing to do: vmspace isn't shared in the first place */
3493 return;
3494
3495 /* make a new vmspace, still holding old one */
3496 nvm = uvmspace_fork(ovm);
3497
3498 pmap_deactivate(l); /* unbind old vmspace */
3499 p->p_vmspace = nvm;
3500 pmap_activate(l); /* switch to new vmspace */
3501
3502 uvmspace_free(ovm); /* drop reference to old vmspace */
3503 }
3504
3505 /*
3506 * uvmspace_exec: the process wants to exec a new program
3507 *
3508 * - XXX: no locking on vmspace
3509 */
3510
3511 void
3512 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end)
3513 {
3514 struct proc *p = l->l_proc;
3515 struct vmspace *nvm, *ovm = p->p_vmspace;
3516 struct vm_map *map = &ovm->vm_map;
3517
3518 #ifdef __sparc__
3519 /* XXX cgd 960926: the sparc #ifdef should be a MD hook */
3520 kill_user_windows(l); /* before stack addresses go away */
3521 #endif
3522
3523 /*
3524 * see if more than one process is using this vmspace...
3525 */
3526
3527 if (ovm->vm_refcnt == 1) {
3528
3529 /*
3530 * if p is the only process using its vmspace then we can safely
3531 * recycle that vmspace for the program that is being exec'd.
3532 */
3533
3534 #ifdef SYSVSHM
3535 /*
3536 * SYSV SHM semantics require us to kill all segments on an exec
3537 */
3538
3539 if (ovm->vm_shm)
3540 shmexit(ovm);
3541 #endif
3542
3543 /*
3544 * POSIX 1003.1b -- "lock future mappings" is revoked
3545 * when a process execs another program image.
3546 */
3547
3548 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3549
3550 /*
3551 * now unmap the old program
3552 */
3553
3554 pmap_remove_all(map->pmap);
3555 uvm_unmap(map, map->min_offset, map->max_offset);
3556 KASSERT(map->header.prev == &map->header);
3557 KASSERT(map->nentries == 0);
3558
3559 /*
3560 * resize the map
3561 */
3562
3563 map->min_offset = start;
3564 map->max_offset = end;
3565 } else {
3566
3567 /*
3568 * p's vmspace is being shared, so we can't reuse it for p since
3569 * it is still being used for others. allocate a new vmspace
3570 * for p
3571 */
3572
3573 nvm = uvmspace_alloc(start, end);
3574
3575 /*
3576 * install new vmspace and drop our ref to the old one.
3577 */
3578
3579 pmap_deactivate(l);
3580 p->p_vmspace = nvm;
3581 pmap_activate(l);
3582
3583 uvmspace_free(ovm);
3584 }
3585 }
3586
3587 /*
3588 * uvmspace_free: free a vmspace data structure
3589 *
3590 * - XXX: no locking on vmspace
3591 */
3592
3593 void
3594 uvmspace_free(struct vmspace *vm)
3595 {
3596 struct vm_map_entry *dead_entries;
3597 struct vm_map *map;
3598 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
3599
3600 UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0);
3601 if (--vm->vm_refcnt > 0) {
3602 return;
3603 }
3604
3605 /*
3606 * at this point, there should be no other references to the map.
3607 * delete all of the mappings, then destroy the pmap.
3608 */
3609
3610 map = &vm->vm_map;
3611 map->flags |= VM_MAP_DYING;
3612 pmap_remove_all(map->pmap);
3613 #ifdef SYSVSHM
3614 /* Get rid of any SYSV shared memory segments. */
3615 if (vm->vm_shm != NULL)
3616 shmexit(vm);
3617 #endif
3618 if (map->nentries) {
3619 uvm_unmap_remove(map, map->min_offset, map->max_offset,
3620 &dead_entries);
3621 if (dead_entries != NULL)
3622 uvm_unmap_detach(dead_entries, 0);
3623 }
3624 KASSERT(map->nentries == 0);
3625 KASSERT(map->size == 0);
3626 pmap_destroy(map->pmap);
3627 pool_put(&uvm_vmspace_pool, vm);
3628 }
3629
3630 /*
3631 * F O R K - m a i n e n t r y p o i n t
3632 */
3633 /*
3634 * uvmspace_fork: fork a process' main map
3635 *
3636 * => create a new vmspace for child process from parent.
3637 * => parent's map must not be locked.
3638 */
3639
3640 struct vmspace *
3641 uvmspace_fork(struct vmspace *vm1)
3642 {
3643 struct vmspace *vm2;
3644 struct vm_map *old_map = &vm1->vm_map;
3645 struct vm_map *new_map;
3646 struct vm_map_entry *old_entry;
3647 struct vm_map_entry *new_entry;
3648 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
3649
3650 vm_map_lock(old_map);
3651
3652 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset);
3653 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3654 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3655 new_map = &vm2->vm_map; /* XXX */
3656
3657 old_entry = old_map->header.next;
3658
3659 /*
3660 * go entry-by-entry
3661 */
3662
3663 while (old_entry != &old_map->header) {
3664
3665 /*
3666 * first, some sanity checks on the old entry
3667 */
3668
3669 KASSERT(!UVM_ET_ISSUBMAP(old_entry));
3670 KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) ||
3671 !UVM_ET_ISNEEDSCOPY(old_entry));
3672
3673 switch (old_entry->inheritance) {
3674 case MAP_INHERIT_NONE:
3675
3676 /*
3677 * drop the mapping
3678 */
3679
3680 break;
3681
3682 case MAP_INHERIT_SHARE:
3683
3684 /*
3685 * share the mapping: this means we want the old and
3686 * new entries to share amaps and backing objects.
3687 */
3688 /*
3689 * if the old_entry needs a new amap (due to prev fork)
3690 * then we need to allocate it now so that we have
3691 * something we own to share with the new_entry. [in
3692 * other words, we need to clear needs_copy]
3693 */
3694
3695 if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3696 /* get our own amap, clears needs_copy */
3697 amap_copy(old_map, old_entry, M_WAITOK, FALSE,
3698 0, 0);
3699 /* XXXCDC: WAITOK??? */
3700 }
3701
3702 new_entry = uvm_mapent_alloc(new_map, 0);
3703 /* old_entry -> new_entry */
3704 uvm_mapent_copy(old_entry, new_entry);
3705
3706 /* new pmap has nothing wired in it */
3707 new_entry->wired_count = 0;
3708
3709 /*
3710 * gain reference to object backing the map (can't
3711 * be a submap, already checked this case).
3712 */
3713
3714 if (new_entry->aref.ar_amap)
3715 uvm_map_reference_amap(new_entry, AMAP_SHARED);
3716
3717 if (new_entry->object.uvm_obj &&
3718 new_entry->object.uvm_obj->pgops->pgo_reference)
3719 new_entry->object.uvm_obj->
3720 pgops->pgo_reference(
3721 new_entry->object.uvm_obj);
3722
3723 /* insert entry at end of new_map's entry list */
3724 uvm_map_entry_link(new_map, new_map->header.prev,
3725 new_entry);
3726
3727 break;
3728
3729 case MAP_INHERIT_COPY:
3730
3731 /*
3732 * copy-on-write the mapping (using mmap's
3733 * MAP_PRIVATE semantics)
3734 *
3735 * allocate new_entry, adjust reference counts.
3736 * (note that new references are read-only).
3737 */
3738
3739 new_entry = uvm_mapent_alloc(new_map, 0);
3740 /* old_entry -> new_entry */
3741 uvm_mapent_copy(old_entry, new_entry);
3742
3743 if (new_entry->aref.ar_amap)
3744 uvm_map_reference_amap(new_entry, 0);
3745
3746 if (new_entry->object.uvm_obj &&
3747 new_entry->object.uvm_obj->pgops->pgo_reference)
3748 new_entry->object.uvm_obj->pgops->pgo_reference
3749 (new_entry->object.uvm_obj);
3750
3751 /* new pmap has nothing wired in it */
3752 new_entry->wired_count = 0;
3753
3754 new_entry->etype |=
3755 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3756 uvm_map_entry_link(new_map, new_map->header.prev,
3757 new_entry);
3758
3759 /*
3760 * the new entry will need an amap. it will either
3761 * need to be copied from the old entry or created
3762 * from scratch (if the old entry does not have an
3763 * amap). can we defer this process until later
3764 * (by setting "needs_copy") or do we need to copy
3765 * the amap now?
3766 *
3767 * we must copy the amap now if any of the following
3768 * conditions hold:
3769 * 1. the old entry has an amap and that amap is
3770 * being shared. this means that the old (parent)
3771 * process is sharing the amap with another
3772 * process. if we do not clear needs_copy here
3773 * we will end up in a situation where both the
3774 * parent and child process are refering to the
3775 * same amap with "needs_copy" set. if the
3776 * parent write-faults, the fault routine will
3777 * clear "needs_copy" in the parent by allocating
3778 * a new amap. this is wrong because the
3779 * parent is supposed to be sharing the old amap
3780 * and the new amap will break that.
3781 *
3782 * 2. if the old entry has an amap and a non-zero
3783 * wire count then we are going to have to call
3784 * amap_cow_now to avoid page faults in the
3785 * parent process. since amap_cow_now requires
3786 * "needs_copy" to be clear we might as well
3787 * clear it here as well.
3788 *
3789 */
3790
3791 if (old_entry->aref.ar_amap != NULL) {
3792 if ((amap_flags(old_entry->aref.ar_amap) &
3793 AMAP_SHARED) != 0 ||
3794 VM_MAPENT_ISWIRED(old_entry)) {
3795
3796 amap_copy(new_map, new_entry, M_WAITOK,
3797 FALSE, 0, 0);
3798 /* XXXCDC: M_WAITOK ... ok? */
3799 }
3800 }
3801
3802 /*
3803 * if the parent's entry is wired down, then the
3804 * parent process does not want page faults on
3805 * access to that memory. this means that we
3806 * cannot do copy-on-write because we can't write
3807 * protect the old entry. in this case we
3808 * resolve all copy-on-write faults now, using
3809 * amap_cow_now. note that we have already
3810 * allocated any needed amap (above).
3811 */
3812
3813 if (VM_MAPENT_ISWIRED(old_entry)) {
3814
3815 /*
3816 * resolve all copy-on-write faults now
3817 * (note that there is nothing to do if
3818 * the old mapping does not have an amap).
3819 */
3820 if (old_entry->aref.ar_amap)
3821 amap_cow_now(new_map, new_entry);
3822
3823 } else {
3824
3825 /*
3826 * setup mappings to trigger copy-on-write faults
3827 * we must write-protect the parent if it has
3828 * an amap and it is not already "needs_copy"...
3829 * if it is already "needs_copy" then the parent
3830 * has already been write-protected by a previous
3831 * fork operation.
3832 */
3833
3834 if (old_entry->aref.ar_amap &&
3835 !UVM_ET_ISNEEDSCOPY(old_entry)) {
3836 if (old_entry->max_protection & VM_PROT_WRITE) {
3837 pmap_protect(old_map->pmap,
3838 old_entry->start,
3839 old_entry->end,
3840 old_entry->protection &
3841 ~VM_PROT_WRITE);
3842 pmap_update(old_map->pmap);
3843 }
3844 old_entry->etype |= UVM_ET_NEEDSCOPY;
3845 }
3846 }
3847 break;
3848 } /* end of switch statement */
3849 old_entry = old_entry->next;
3850 }
3851
3852 new_map->size = old_map->size;
3853 vm_map_unlock(old_map);
3854
3855 #ifdef SYSVSHM
3856 if (vm1->vm_shm)
3857 shmfork(vm1, vm2);
3858 #endif
3859
3860 #ifdef PMAP_FORK
3861 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
3862 #endif
3863
3864 UVMHIST_LOG(maphist,"<- done",0,0,0,0);
3865 return (vm2);
3866 }
3867
3868
3869 /*
3870 * in-kernel map entry allocation.
3871 */
3872
3873 int ukh_alloc, ukh_free;
3874 int uke_alloc, uke_free;
3875
3876 struct uvm_kmapent_hdr {
3877 LIST_ENTRY(uvm_kmapent_hdr) ukh_listq;
3878 int ukh_nused;
3879 struct vm_map_entry *ukh_freelist;
3880 struct vm_map *ukh_map;
3881 struct vm_map_entry ukh_entries[0];
3882 };
3883
3884 #define UVM_KMAPENT_CHUNK \
3885 ((PAGE_SIZE - sizeof(struct uvm_kmapent_hdr)) \
3886 / sizeof(struct vm_map_entry))
3887
3888 #define UVM_KHDR_FIND(entry) \
3889 ((struct uvm_kmapent_hdr *)(((vaddr_t)entry) & ~PAGE_MASK))
3890
3891 static __inline struct vm_map_entry *uvm_kmapent_get(struct uvm_kmapent_hdr *);
3892 static __inline void uvm_kmapent_put(struct uvm_kmapent_hdr *,
3893 struct vm_map_entry *);
3894
3895 static __inline struct vm_map_entry *
3896 uvm_kmapent_get(struct uvm_kmapent_hdr *ukh)
3897 {
3898 struct vm_map_entry *entry;
3899
3900 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
3901 KASSERT(ukh->ukh_nused >= 0);
3902
3903 entry = ukh->ukh_freelist;
3904 if (entry) {
3905 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT))
3906 == UVM_MAP_KERNEL);
3907 ukh->ukh_freelist = entry->next;
3908 ukh->ukh_nused++;
3909 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
3910 } else {
3911 KASSERT(ukh->ukh_nused == UVM_KMAPENT_CHUNK);
3912 }
3913
3914 return entry;
3915 }
3916
3917 static __inline void
3918 uvm_kmapent_put(struct uvm_kmapent_hdr *ukh, struct vm_map_entry *entry)
3919 {
3920
3921 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT))
3922 == UVM_MAP_KERNEL);
3923 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
3924 KASSERT(ukh->ukh_nused > 0);
3925 KASSERT(ukh->ukh_freelist != NULL ||
3926 ukh->ukh_nused == UVM_KMAPENT_CHUNK);
3927 KASSERT(ukh->ukh_freelist == NULL ||
3928 ukh->ukh_nused < UVM_KMAPENT_CHUNK);
3929
3930 ukh->ukh_nused--;
3931 entry->next = ukh->ukh_freelist;
3932 ukh->ukh_freelist = entry;
3933 }
3934
3935 /*
3936 * uvm_kmapent_alloc: allocate a map entry for in-kernel map
3937 */
3938
3939 static struct vm_map_entry *
3940 uvm_kmapent_alloc(struct vm_map *map, int flags)
3941 {
3942 struct vm_page *pg;
3943 struct uvm_map_args args;
3944 struct uvm_kmapent_hdr *ukh;
3945 struct vm_map_entry *entry;
3946 uvm_flag_t mapflags = UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
3947 UVM_INH_NONE, UVM_ADV_RANDOM, flags | UVM_FLAG_NOMERGE);
3948 vaddr_t va;
3949 int error;
3950 int i;
3951 int s;
3952
3953 KDASSERT(UVM_KMAPENT_CHUNK > 2);
3954 KDASSERT(kernel_map != NULL);
3955 KASSERT(vm_map_pmap(map) == pmap_kernel());
3956
3957 uke_alloc++;
3958 entry = NULL;
3959 again:
3960 /*
3961 * try to grab an entry from freelist.
3962 */
3963 s = splvm();
3964 simple_lock(&uvm.kentry_lock);
3965 ukh = LIST_FIRST(&map->kentry_free);
3966 if (ukh) {
3967 entry = uvm_kmapent_get(ukh);
3968 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK)
3969 LIST_REMOVE(ukh, ukh_listq);
3970 }
3971 simple_unlock(&uvm.kentry_lock);
3972 splx(s);
3973
3974 if (entry)
3975 return entry;
3976
3977 /*
3978 * there's no free entry for this vm_map.
3979 * now we need to allocate some vm_map_entry.
3980 *
3981 * if kmem_map is already up, allocate a entry from it
3982 * so that we won't try to vm_map_lock recursively.
3983 * XXX assuming usage pattern of kmem_map.
3984 */
3985
3986 if (__predict_true(kmem_map != NULL) && map != kmem_map)
3987 return uvm_kmapent_alloc(kmem_map, flags);
3988
3989 /*
3990 * for simplicity, always allocate one page chunk of them at once.
3991 */
3992
3993 pg = uvm_pagealloc(NULL, 0, NULL, 0);
3994 if (__predict_false(pg == NULL)) {
3995 if (flags & UVM_FLAG_NOWAIT)
3996 return NULL;
3997 uvm_wait("kme_alloc");
3998 goto again;
3999 }
4000
4001 error = uvm_map_prepare(map, 0, PAGE_SIZE, NULL, 0, 0, mapflags, &args);
4002 if (error) {
4003 uvm_pagefree(pg);
4004 return NULL;
4005 }
4006
4007 va = args.uma_start;
4008
4009 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE);
4010 pmap_update(vm_map_pmap(map));
4011
4012 ukh = (void *)va;
4013
4014 /*
4015 * use the first entry for ukh itsself.
4016 */
4017
4018 entry = &ukh->ukh_entries[0];
4019 entry->flags = UVM_MAP_KERNEL | UVM_MAP_KMAPENT;
4020 error = uvm_map_enter(map, &args, &entry);
4021 KASSERT(error == 0);
4022
4023 ukh->ukh_nused = UVM_KMAPENT_CHUNK;
4024 ukh->ukh_map = map;
4025 ukh->ukh_freelist = NULL;
4026 for (i = UVM_KMAPENT_CHUNK - 1; i >= 2; i--) {
4027 struct vm_map_entry *entry = &ukh->ukh_entries[i];
4028
4029 entry->flags = UVM_MAP_KERNEL;
4030 uvm_kmapent_put(ukh, entry);
4031 }
4032 KASSERT(ukh->ukh_nused == 2);
4033
4034 s = splvm();
4035 simple_lock(&uvm.kentry_lock);
4036 LIST_INSERT_HEAD(&map->kentry_free, ukh, ukh_listq);
4037 simple_unlock(&uvm.kentry_lock);
4038 splx(s);
4039
4040 /*
4041 * return the second entry.
4042 */
4043
4044 entry = &ukh->ukh_entries[1];
4045 entry->flags = UVM_MAP_KERNEL;
4046 ukh_alloc++;
4047 return entry;
4048 }
4049
4050 /*
4051 * uvm_mapent_free: free map entry for in-kernel map
4052 */
4053
4054 static void
4055 uvm_kmapent_free(struct vm_map_entry *entry)
4056 {
4057 struct uvm_kmapent_hdr *ukh;
4058 struct vm_page *pg;
4059 struct vm_map *map;
4060 struct pmap *pmap;
4061 vaddr_t va;
4062 paddr_t pa;
4063 struct vm_map_entry *deadentry;
4064 int s;
4065
4066 uke_free++;
4067 ukh = UVM_KHDR_FIND(entry);
4068 map = ukh->ukh_map;
4069
4070 s = splvm();
4071 simple_lock(&uvm.kentry_lock);
4072 uvm_kmapent_put(ukh, entry);
4073 if (ukh->ukh_nused > 1) {
4074 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK - 1)
4075 LIST_INSERT_HEAD(&map->kentry_free, ukh, ukh_listq);
4076 simple_unlock(&uvm.kentry_lock);
4077 splx(s);
4078 return;
4079 }
4080
4081 /*
4082 * now we can free this ukh.
4083 *
4084 * however, keep an empty ukh to avoid ping-pong.
4085 */
4086
4087 if (LIST_FIRST(&map->kentry_free) == ukh &&
4088 LIST_NEXT(ukh, ukh_listq) == NULL) {
4089 simple_unlock(&uvm.kentry_lock);
4090 splx(s);
4091 return;
4092 }
4093 LIST_REMOVE(ukh, ukh_listq);
4094 simple_unlock(&uvm.kentry_lock);
4095 splx(s);
4096
4097 KASSERT(ukh->ukh_nused == 1);
4098
4099 /*
4100 * remove map entry for ukh itsself.
4101 */
4102
4103 va = (vaddr_t)ukh;
4104 KASSERT((va & PAGE_MASK) == 0);
4105 uvm_unmap_remove(map, va, va + PAGE_SIZE, &deadentry);
4106 KASSERT(deadentry->flags & UVM_MAP_KERNEL);
4107 KASSERT(deadentry->flags & UVM_MAP_KMAPENT);
4108 KASSERT(deadentry->next == NULL);
4109 KASSERT(deadentry == &ukh->ukh_entries[0]);
4110
4111 /*
4112 * unmap the page from pmap and free it.
4113 */
4114
4115 pmap = vm_map_pmap(map);
4116 KASSERT(pmap == pmap_kernel());
4117 if (!pmap_extract(pmap, va, &pa))
4118 panic("%s: no mapping", __func__);
4119 pmap_kremove(va, PAGE_SIZE);
4120 pg = PHYS_TO_VM_PAGE(pa);
4121 uvm_pagefree(pg);
4122 ukh_free++;
4123 }
4124
4125 #if defined(DDB)
4126
4127 /*
4128 * DDB hooks
4129 */
4130
4131 /*
4132 * uvm_map_printit: actually prints the map
4133 */
4134
4135 void
4136 uvm_map_printit(struct vm_map *map, boolean_t full,
4137 void (*pr)(const char *, ...))
4138 {
4139 struct vm_map_entry *entry;
4140
4141 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
4142 (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n",
4143 map->nentries, map->size, map->ref_count, map->timestamp,
4144 map->flags);
4145 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
4146 pmap_resident_count(map->pmap));
4147 if (!full)
4148 return;
4149 for (entry = map->header.next; entry != &map->header;
4150 entry = entry->next) {
4151 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
4152 entry, entry->start, entry->end, entry->object.uvm_obj,
4153 (long long)entry->offset, entry->aref.ar_amap,
4154 entry->aref.ar_pageoff);
4155 (*pr)(
4156 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
4157 "wc=%d, adv=%d\n",
4158 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
4159 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
4160 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
4161 entry->protection, entry->max_protection,
4162 entry->inheritance, entry->wired_count, entry->advice);
4163 }
4164 }
4165
4166 /*
4167 * uvm_object_printit: actually prints the object
4168 */
4169
4170 void
4171 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
4172 void (*pr)(const char *, ...))
4173 {
4174 struct vm_page *pg;
4175 int cnt = 0;
4176
4177 (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ",
4178 uobj, uobj->vmobjlock.lock_data, uobj->pgops, uobj->uo_npages);
4179 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
4180 (*pr)("refs=<SYSTEM>\n");
4181 else
4182 (*pr)("refs=%d\n", uobj->uo_refs);
4183
4184 if (!full) {
4185 return;
4186 }
4187 (*pr)(" PAGES <pg,offset>:\n ");
4188 TAILQ_FOREACH(pg, &uobj->memq, listq) {
4189 cnt++;
4190 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
4191 if ((cnt % 3) == 0) {
4192 (*pr)("\n ");
4193 }
4194 }
4195 if ((cnt % 3) != 0) {
4196 (*pr)("\n");
4197 }
4198 }
4199
4200 /*
4201 * uvm_page_printit: actually print the page
4202 */
4203
4204 static const char page_flagbits[] =
4205 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5PAGEOUT\6RELEASED\7FAKE\10RDONLY"
4206 "\11ZERO\15PAGER1";
4207 static const char page_pqflagbits[] =
4208 "\20\1FREE\2INACTIVE\3ACTIVE\5ANON\6AOBJ";
4209
4210 void
4211 uvm_page_printit(struct vm_page *pg, boolean_t full,
4212 void (*pr)(const char *, ...))
4213 {
4214 struct vm_page *tpg;
4215 struct uvm_object *uobj;
4216 struct pglist *pgl;
4217 char pgbuf[128];
4218 char pqbuf[128];
4219
4220 (*pr)("PAGE %p:\n", pg);
4221 bitmask_snprintf(pg->flags, page_flagbits, pgbuf, sizeof(pgbuf));
4222 bitmask_snprintf(pg->pqflags, page_pqflagbits, pqbuf, sizeof(pqbuf));
4223 (*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n",
4224 pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg));
4225 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
4226 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
4227 #if defined(UVM_PAGE_TRKOWN)
4228 if (pg->flags & PG_BUSY)
4229 (*pr)(" owning process = %d, tag=%s\n",
4230 pg->owner, pg->owner_tag);
4231 else
4232 (*pr)(" page not busy, no owner\n");
4233 #else
4234 (*pr)(" [page ownership tracking disabled]\n");
4235 #endif
4236
4237 if (!full)
4238 return;
4239
4240 /* cross-verify object/anon */
4241 if ((pg->pqflags & PQ_FREE) == 0) {
4242 if (pg->pqflags & PQ_ANON) {
4243 if (pg->uanon == NULL || pg->uanon->u.an_page != pg)
4244 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
4245 (pg->uanon) ? pg->uanon->u.an_page : NULL);
4246 else
4247 (*pr)(" anon backpointer is OK\n");
4248 } else {
4249 uobj = pg->uobject;
4250 if (uobj) {
4251 (*pr)(" checking object list\n");
4252 TAILQ_FOREACH(tpg, &uobj->memq, listq) {
4253 if (tpg == pg) {
4254 break;
4255 }
4256 }
4257 if (tpg)
4258 (*pr)(" page found on object list\n");
4259 else
4260 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
4261 }
4262 }
4263 }
4264
4265 /* cross-verify page queue */
4266 if (pg->pqflags & PQ_FREE) {
4267 int fl = uvm_page_lookup_freelist(pg);
4268 int color = VM_PGCOLOR_BUCKET(pg);
4269 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[
4270 ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN];
4271 } else if (pg->pqflags & PQ_INACTIVE) {
4272 pgl = &uvm.page_inactive;
4273 } else if (pg->pqflags & PQ_ACTIVE) {
4274 pgl = &uvm.page_active;
4275 } else {
4276 pgl = NULL;
4277 }
4278
4279 if (pgl) {
4280 (*pr)(" checking pageq list\n");
4281 TAILQ_FOREACH(tpg, pgl, pageq) {
4282 if (tpg == pg) {
4283 break;
4284 }
4285 }
4286 if (tpg)
4287 (*pr)(" page found on pageq list\n");
4288 else
4289 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
4290 }
4291 }
4292 #endif
4293