1 1.427 riastrad /* $NetBSD: uvm_map.c,v 1.427 2025/04/27 17:40:55 riastradh Exp $ */ 2 1.1 mrg 3 1.98 chs /* 4 1.1 mrg * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 1.98 chs * Copyright (c) 1991, 1993, The Regents of the University of California. 6 1.1 mrg * 7 1.1 mrg * All rights reserved. 8 1.1 mrg * 9 1.1 mrg * This code is derived from software contributed to Berkeley by 10 1.1 mrg * The Mach Operating System project at Carnegie-Mellon University. 11 1.1 mrg * 12 1.1 mrg * Redistribution and use in source and binary forms, with or without 13 1.1 mrg * modification, are permitted provided that the following conditions 14 1.1 mrg * are met: 15 1.1 mrg * 1. Redistributions of source code must retain the above copyright 16 1.1 mrg * notice, this list of conditions and the following disclaimer. 17 1.1 mrg * 2. Redistributions in binary form must reproduce the above copyright 18 1.1 mrg * notice, this list of conditions and the following disclaimer in the 19 1.1 mrg * documentation and/or other materials provided with the distribution. 20 1.295 chuck * 3. Neither the name of the University nor the names of its contributors 21 1.1 mrg * may be used to endorse or promote products derived from this software 22 1.1 mrg * without specific prior written permission. 23 1.1 mrg * 24 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 1.1 mrg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 1.1 mrg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 1.1 mrg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 1.1 mrg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 1.1 mrg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 1.1 mrg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 1.1 mrg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 1.1 mrg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 1.1 mrg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 1.1 mrg * SUCH DAMAGE. 35 1.1 mrg * 36 1.1 mrg * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 37 1.3 mrg * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 38 1.1 mrg * 39 1.1 mrg * 40 1.1 mrg * Copyright (c) 1987, 1990 Carnegie-Mellon University. 41 1.1 mrg * All rights reserved. 42 1.98 chs * 43 1.1 mrg * Permission to use, copy, modify and distribute this software and 44 1.1 mrg * its documentation is hereby granted, provided that both the copyright 45 1.1 mrg * notice and this permission notice appear in all copies of the 46 1.1 mrg * software, derivative works or modified versions, and any portions 47 1.1 mrg * thereof, and that both notices appear in supporting documentation. 48 1.98 chs * 49 1.98 chs * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 50 1.98 chs * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 51 1.1 mrg * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 52 1.98 chs * 53 1.1 mrg * Carnegie Mellon requests users of this software to return to 54 1.1 mrg * 55 1.1 mrg * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 56 1.1 mrg * School of Computer Science 57 1.1 mrg * Carnegie Mellon University 58 1.1 mrg * Pittsburgh PA 15213-3890 59 1.1 mrg * 60 1.1 mrg * any improvements or extensions that they make and grant Carnegie the 61 1.1 mrg * rights to redistribute these changes. 62 1.1 mrg */ 63 1.1 mrg 64 1.114 lukem /* 65 1.114 lukem * uvm_map.c: uvm map operations 66 1.114 lukem */ 67 1.114 lukem 68 1.114 lukem #include <sys/cdefs.h> 69 1.427 riastrad __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.427 2025/04/27 17:40:55 riastradh Exp $"); 70 1.114 lukem 71 1.21 jonathan #include "opt_ddb.h" 72 1.344 joerg #include "opt_pax.h" 73 1.6 mrg #include "opt_uvmhist.h" 74 1.169 petrov #include "opt_uvm.h" 75 1.31 tron #include "opt_sysv.h" 76 1.1 mrg 77 1.1 mrg #include <sys/param.h> 78 1.1 mrg #include <sys/systm.h> 79 1.1 mrg #include <sys/mman.h> 80 1.1 mrg #include <sys/proc.h> 81 1.25 thorpej #include <sys/pool.h> 82 1.104 chs #include <sys/kernel.h> 83 1.112 thorpej #include <sys/mount.h> 84 1.344 joerg #include <sys/pax.h> 85 1.109 thorpej #include <sys/vnode.h> 86 1.335 christos #include <sys/filedesc.h> 87 1.244 yamt #include <sys/lockdebug.h> 88 1.248 ad #include <sys/atomic.h> 89 1.335 christos #include <sys/sysctl.h> 90 1.288 drochner #ifndef __USER_VA0_IS_SAFE 91 1.288 drochner #include <sys/kauth.h> 92 1.290 drochner #include "opt_user_va0_disable_default.h" 93 1.288 drochner #endif 94 1.1 mrg 95 1.1 mrg #include <sys/shm.h> 96 1.1 mrg 97 1.1 mrg #include <uvm/uvm.h> 98 1.271 yamt #include <uvm/uvm_readahead.h> 99 1.21 jonathan 100 1.270 pooka #if defined(DDB) || defined(DEBUGPRINT) 101 1.21 jonathan #include <uvm/uvm_ddb.h> 102 1.21 jonathan #endif 103 1.21 jonathan 104 1.318 matt #ifdef UVMHIST 105 1.342 mrg #ifndef UVMHIST_MAPHIST_SIZE 106 1.342 mrg #define UVMHIST_MAPHIST_SIZE 100 107 1.342 mrg #endif 108 1.342 mrg static struct kern_history_ent maphistbuf[UVMHIST_MAPHIST_SIZE]; 109 1.328 matt UVMHIST_DEFINE(maphist) = UVMHIST_INITIALIZER(maphist, maphistbuf); 110 1.318 matt #endif 111 1.318 matt 112 1.258 ad #if !defined(UVMMAP_COUNTERS) 113 1.207 yamt 114 1.207 yamt #define UVMMAP_EVCNT_DEFINE(name) /* nothing */ 115 1.207 yamt #define UVMMAP_EVCNT_INCR(ev) /* nothing */ 116 1.207 yamt #define UVMMAP_EVCNT_DECR(ev) /* nothing */ 117 1.207 yamt 118 1.207 yamt #else /* defined(UVMMAP_NOCOUNTERS) */ 119 1.207 yamt 120 1.228 yamt #include <sys/evcnt.h> 121 1.207 yamt #define UVMMAP_EVCNT_DEFINE(name) \ 122 1.207 yamt struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 123 1.207 yamt "uvmmap", #name); \ 124 1.207 yamt EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name); 125 1.207 yamt #define UVMMAP_EVCNT_INCR(ev) uvmmap_evcnt_##ev.ev_count++ 126 1.207 yamt #define UVMMAP_EVCNT_DECR(ev) uvmmap_evcnt_##ev.ev_count-- 127 1.207 yamt 128 1.207 yamt #endif /* defined(UVMMAP_NOCOUNTERS) */ 129 1.207 yamt 130 1.207 yamt UVMMAP_EVCNT_DEFINE(ubackmerge) 131 1.207 yamt UVMMAP_EVCNT_DEFINE(uforwmerge) 132 1.207 yamt UVMMAP_EVCNT_DEFINE(ubimerge) 133 1.207 yamt UVMMAP_EVCNT_DEFINE(unomerge) 134 1.207 yamt UVMMAP_EVCNT_DEFINE(kbackmerge) 135 1.207 yamt UVMMAP_EVCNT_DEFINE(kforwmerge) 136 1.207 yamt UVMMAP_EVCNT_DEFINE(kbimerge) 137 1.207 yamt UVMMAP_EVCNT_DEFINE(knomerge) 138 1.207 yamt UVMMAP_EVCNT_DEFINE(map_call) 139 1.207 yamt UVMMAP_EVCNT_DEFINE(mlk_call) 140 1.207 yamt UVMMAP_EVCNT_DEFINE(mlk_hint) 141 1.263 matt UVMMAP_EVCNT_DEFINE(mlk_tree) 142 1.263 matt UVMMAP_EVCNT_DEFINE(mlk_treeloop) 143 1.169 petrov 144 1.87 enami const char vmmapbsy[] = "vmmapbsy"; 145 1.1 mrg 146 1.1 mrg /* 147 1.248 ad * cache for dynamically-allocated map entries. 148 1.26 thorpej */ 149 1.26 thorpej 150 1.248 ad static struct pool_cache uvm_map_entry_cache; 151 1.130 thorpej 152 1.40 thorpej #ifdef PMAP_GROWKERNEL 153 1.40 thorpej /* 154 1.40 thorpej * This global represents the end of the kernel virtual address 155 1.40 thorpej * space. If we want to exceed this, we must grow the kernel 156 1.40 thorpej * virtual address space dynamically. 157 1.40 thorpej * 158 1.40 thorpej * Note, this variable is locked by kernel_map's lock. 159 1.40 thorpej */ 160 1.40 thorpej vaddr_t uvm_maxkaddr; 161 1.40 thorpej #endif 162 1.40 thorpej 163 1.288 drochner #ifndef __USER_VA0_IS_SAFE 164 1.290 drochner #ifndef __USER_VA0_DISABLE_DEFAULT 165 1.290 drochner #define __USER_VA0_DISABLE_DEFAULT 1 166 1.288 drochner #endif 167 1.290 drochner #ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */ 168 1.290 drochner #undef __USER_VA0_DISABLE_DEFAULT 169 1.290 drochner #define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT 170 1.288 drochner #endif 171 1.341 maxv int user_va0_disable = __USER_VA0_DISABLE_DEFAULT; 172 1.288 drochner #endif 173 1.288 drochner 174 1.25 thorpej /* 175 1.1 mrg * macros 176 1.1 mrg */ 177 1.1 mrg 178 1.1 mrg /* 179 1.365 rin * uvm_map_align_va: round down or up virtual address 180 1.365 rin */ 181 1.365 rin static __inline void 182 1.365 rin uvm_map_align_va(vaddr_t *vap, vsize_t align, int topdown) 183 1.365 rin { 184 1.365 rin 185 1.365 rin KASSERT(powerof2(align)); 186 1.365 rin 187 1.365 rin if (align != 0 && (*vap & (align - 1)) != 0) { 188 1.365 rin if (topdown) 189 1.365 rin *vap = rounddown2(*vap, align); 190 1.365 rin else 191 1.365 rin *vap = roundup2(*vap, align); 192 1.365 rin } 193 1.365 rin } 194 1.365 rin 195 1.365 rin /* 196 1.194 yamt * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging 197 1.194 yamt */ 198 1.311 para extern struct vm_map *pager_map; 199 1.194 yamt 200 1.194 yamt #define UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \ 201 1.194 yamt prot, maxprot, inh, adv, wire) \ 202 1.194 yamt ((ent)->etype == (type) && \ 203 1.311 para (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE)) == 0 && \ 204 1.194 yamt (ent)->object.uvm_obj == (uobj) && \ 205 1.194 yamt (ent)->protection == (prot) && \ 206 1.194 yamt (ent)->max_protection == (maxprot) && \ 207 1.194 yamt (ent)->inheritance == (inh) && \ 208 1.194 yamt (ent)->advice == (adv) && \ 209 1.194 yamt (ent)->wired_count == (wire)) 210 1.194 yamt 211 1.194 yamt /* 212 1.1 mrg * uvm_map_entry_link: insert entry into a map 213 1.1 mrg * 214 1.1 mrg * => map must be locked 215 1.1 mrg */ 216 1.10 mrg #define uvm_map_entry_link(map, after_where, entry) do { \ 217 1.218 yamt uvm_mapent_check(entry); \ 218 1.10 mrg (map)->nentries++; \ 219 1.10 mrg (entry)->prev = (after_where); \ 220 1.10 mrg (entry)->next = (after_where)->next; \ 221 1.10 mrg (entry)->prev->next = (entry); \ 222 1.10 mrg (entry)->next->prev = (entry); \ 223 1.144 yamt uvm_rb_insert((map), (entry)); \ 224 1.124 perry } while (/*CONSTCOND*/ 0) 225 1.10 mrg 226 1.1 mrg /* 227 1.1 mrg * uvm_map_entry_unlink: remove entry from a map 228 1.1 mrg * 229 1.1 mrg * => map must be locked 230 1.1 mrg */ 231 1.10 mrg #define uvm_map_entry_unlink(map, entry) do { \ 232 1.221 yamt KASSERT((entry) != (map)->first_free); \ 233 1.221 yamt KASSERT((entry) != (map)->hint); \ 234 1.218 yamt uvm_mapent_check(entry); \ 235 1.10 mrg (map)->nentries--; \ 236 1.10 mrg (entry)->next->prev = (entry)->prev; \ 237 1.10 mrg (entry)->prev->next = (entry)->next; \ 238 1.144 yamt uvm_rb_remove((map), (entry)); \ 239 1.124 perry } while (/*CONSTCOND*/ 0) 240 1.1 mrg 241 1.1 mrg /* 242 1.1 mrg * SAVE_HINT: saves the specified entry as the hint for future lookups. 243 1.1 mrg * 244 1.248 ad * => map need not be locked. 245 1.1 mrg */ 246 1.248 ad #define SAVE_HINT(map, check, value) do { \ 247 1.258 ad if ((map)->hint == (check)) \ 248 1.258 ad (map)->hint = (value); \ 249 1.124 perry } while (/*CONSTCOND*/ 0) 250 1.1 mrg 251 1.1 mrg /* 252 1.221 yamt * clear_hints: ensure that hints don't point to the entry. 253 1.221 yamt * 254 1.221 yamt * => map must be write-locked. 255 1.221 yamt */ 256 1.221 yamt static void 257 1.221 yamt clear_hints(struct vm_map *map, struct vm_map_entry *ent) 258 1.221 yamt { 259 1.221 yamt 260 1.221 yamt SAVE_HINT(map, ent, ent->prev); 261 1.221 yamt if (map->first_free == ent) { 262 1.221 yamt map->first_free = ent->prev; 263 1.221 yamt } 264 1.221 yamt } 265 1.221 yamt 266 1.221 yamt /* 267 1.1 mrg * VM_MAP_RANGE_CHECK: check and correct range 268 1.1 mrg * 269 1.1 mrg * => map must at least be read locked 270 1.1 mrg */ 271 1.1 mrg 272 1.10 mrg #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 273 1.139 enami if (start < vm_map_min(map)) \ 274 1.139 enami start = vm_map_min(map); \ 275 1.139 enami if (end > vm_map_max(map)) \ 276 1.139 enami end = vm_map_max(map); \ 277 1.139 enami if (start > end) \ 278 1.139 enami start = end; \ 279 1.124 perry } while (/*CONSTCOND*/ 0) 280 1.1 mrg 281 1.1 mrg /* 282 1.1 mrg * local prototypes 283 1.1 mrg */ 284 1.1 mrg 285 1.138 enami static struct vm_map_entry * 286 1.138 enami uvm_mapent_alloc(struct vm_map *, int); 287 1.138 enami static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 288 1.138 enami static void uvm_mapent_free(struct vm_map_entry *); 289 1.218 yamt #if defined(DEBUG) 290 1.384 maxv static void _uvm_mapent_check(const struct vm_map_entry *, int); 291 1.384 maxv #define uvm_mapent_check(map) _uvm_mapent_check(map, __LINE__) 292 1.218 yamt #else /* defined(DEBUG) */ 293 1.218 yamt #define uvm_mapent_check(e) /* nothing */ 294 1.218 yamt #endif /* defined(DEBUG) */ 295 1.219 yamt 296 1.138 enami static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 297 1.138 enami static void uvm_map_reference_amap(struct vm_map_entry *, int); 298 1.140 enami static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int, 299 1.304 matt int, struct vm_map_entry *); 300 1.138 enami static void uvm_map_unreference_amap(struct vm_map_entry *, int); 301 1.1 mrg 302 1.222 yamt int _uvm_map_sanity(struct vm_map *); 303 1.222 yamt int _uvm_tree_sanity(struct vm_map *); 304 1.263 matt static vsize_t uvm_rb_maxgap(const struct vm_map_entry *); 305 1.144 yamt 306 1.417 riastrad /* 307 1.417 riastrad * Tree iteration. We violate the rbtree(9) abstraction for various 308 1.417 riastrad * things here. Entries are ascending left to right, so, provided the 309 1.417 riastrad * child entry in question exists: 310 1.417 riastrad * 311 1.417 riastrad * LEFT_ENTRY(entry)->end <= entry->start 312 1.417 riastrad * entry->end <= RIGHT_ENTRY(entry)->start 313 1.417 riastrad */ 314 1.417 riastrad __CTASSERT(offsetof(struct vm_map_entry, rb_node) == 0); 315 1.417 riastrad #define ROOT_ENTRY(map) \ 316 1.417 riastrad ((struct vm_map_entry *)(map)->rb_tree.rbt_root) 317 1.417 riastrad #define LEFT_ENTRY(entry) \ 318 1.417 riastrad ((struct vm_map_entry *)(entry)->rb_node.rb_left) 319 1.417 riastrad #define RIGHT_ENTRY(entry) \ 320 1.417 riastrad ((struct vm_map_entry *)(entry)->rb_node.rb_right) 321 1.417 riastrad #define PARENT_ENTRY(map, entry) \ 322 1.417 riastrad (ROOT_ENTRY(map) == (entry) \ 323 1.293 rmind ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node)) 324 1.263 matt 325 1.336 pgoyette /* 326 1.336 pgoyette * These get filled in if/when SYSVSHM shared memory code is loaded 327 1.336 pgoyette * 328 1.336 pgoyette * We do this with function pointers rather the #ifdef SYSVSHM so the 329 1.336 pgoyette * SYSVSHM code can be loaded and unloaded 330 1.336 pgoyette */ 331 1.336 pgoyette void (*uvm_shmexit)(struct vmspace *) = NULL; 332 1.336 pgoyette void (*uvm_shmfork)(struct vmspace *, struct vmspace *) = NULL; 333 1.336 pgoyette 334 1.263 matt static int 335 1.293 rmind uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey) 336 1.144 yamt { 337 1.293 rmind const struct vm_map_entry *eparent = nparent; 338 1.293 rmind const struct vm_map_entry *ekey = nkey; 339 1.144 yamt 340 1.263 matt KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end); 341 1.263 matt KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end); 342 1.164 junyoung 343 1.293 rmind if (eparent->start < ekey->start) 344 1.263 matt return -1; 345 1.293 rmind if (eparent->end >= ekey->start) 346 1.263 matt return 1; 347 1.263 matt return 0; 348 1.144 yamt } 349 1.144 yamt 350 1.263 matt static int 351 1.293 rmind uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey) 352 1.144 yamt { 353 1.293 rmind const struct vm_map_entry *eparent = nparent; 354 1.263 matt const vaddr_t va = *(const vaddr_t *) vkey; 355 1.144 yamt 356 1.293 rmind if (eparent->start < va) 357 1.263 matt return -1; 358 1.293 rmind if (eparent->end >= va) 359 1.263 matt return 1; 360 1.263 matt return 0; 361 1.144 yamt } 362 1.144 yamt 363 1.293 rmind static const rb_tree_ops_t uvm_map_tree_ops = { 364 1.263 matt .rbto_compare_nodes = uvm_map_compare_nodes, 365 1.263 matt .rbto_compare_key = uvm_map_compare_key, 366 1.293 rmind .rbto_node_offset = offsetof(struct vm_map_entry, rb_node), 367 1.293 rmind .rbto_context = NULL 368 1.263 matt }; 369 1.144 yamt 370 1.293 rmind /* 371 1.293 rmind * uvm_rb_gap: return the gap size between our entry and next entry. 372 1.293 rmind */ 373 1.206 perry static inline vsize_t 374 1.263 matt uvm_rb_gap(const struct vm_map_entry *entry) 375 1.144 yamt { 376 1.293 rmind 377 1.144 yamt KASSERT(entry->next != NULL); 378 1.144 yamt return entry->next->start - entry->end; 379 1.144 yamt } 380 1.144 yamt 381 1.144 yamt static vsize_t 382 1.263 matt uvm_rb_maxgap(const struct vm_map_entry *entry) 383 1.144 yamt { 384 1.263 matt struct vm_map_entry *child; 385 1.263 matt vsize_t maxgap = entry->gap; 386 1.144 yamt 387 1.263 matt /* 388 1.263 matt * We need maxgap to be the largest gap of us or any of our 389 1.263 matt * descendents. Since each of our children's maxgap is the 390 1.263 matt * cached value of their largest gap of themselves or their 391 1.263 matt * descendents, we can just use that value and avoid recursing 392 1.263 matt * down the tree to calculate it. 393 1.263 matt */ 394 1.263 matt if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 395 1.263 matt maxgap = child->maxgap; 396 1.144 yamt 397 1.263 matt if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 398 1.263 matt maxgap = child->maxgap; 399 1.144 yamt 400 1.263 matt return maxgap; 401 1.144 yamt } 402 1.144 yamt 403 1.263 matt static void 404 1.144 yamt uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 405 1.144 yamt { 406 1.263 matt struct vm_map_entry *parent; 407 1.263 matt 408 1.263 matt KASSERT(entry->gap == uvm_rb_gap(entry)); 409 1.263 matt entry->maxgap = uvm_rb_maxgap(entry); 410 1.263 matt 411 1.263 matt while ((parent = PARENT_ENTRY(map, entry)) != NULL) { 412 1.263 matt struct vm_map_entry *brother; 413 1.263 matt vsize_t maxgap = parent->gap; 414 1.293 rmind unsigned int which; 415 1.263 matt 416 1.263 matt KDASSERT(parent->gap == uvm_rb_gap(parent)); 417 1.263 matt if (maxgap < entry->maxgap) 418 1.263 matt maxgap = entry->maxgap; 419 1.263 matt /* 420 1.293 rmind * Since we work towards the root, we know entry's maxgap 421 1.293 rmind * value is OK, but its brothers may now be out-of-date due 422 1.293 rmind * to rebalancing. So refresh it. 423 1.263 matt */ 424 1.293 rmind which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER; 425 1.293 rmind brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which]; 426 1.263 matt if (brother != NULL) { 427 1.263 matt KDASSERT(brother->gap == uvm_rb_gap(brother)); 428 1.263 matt brother->maxgap = uvm_rb_maxgap(brother); 429 1.263 matt if (maxgap < brother->maxgap) 430 1.263 matt maxgap = brother->maxgap; 431 1.263 matt } 432 1.263 matt 433 1.263 matt parent->maxgap = maxgap; 434 1.263 matt entry = parent; 435 1.263 matt } 436 1.144 yamt } 437 1.144 yamt 438 1.203 thorpej static void 439 1.144 yamt uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 440 1.144 yamt { 441 1.326 martin struct vm_map_entry *ret __diagused; 442 1.293 rmind 443 1.263 matt entry->gap = entry->maxgap = uvm_rb_gap(entry); 444 1.263 matt if (entry->prev != &map->header) 445 1.263 matt entry->prev->gap = uvm_rb_gap(entry->prev); 446 1.144 yamt 447 1.293 rmind ret = rb_tree_insert_node(&map->rb_tree, entry); 448 1.293 rmind KASSERTMSG(ret == entry, 449 1.305 jym "uvm_rb_insert: map %p: duplicate entry %p", map, ret); 450 1.263 matt 451 1.263 matt /* 452 1.263 matt * If the previous entry is not our immediate left child, then it's an 453 1.263 matt * ancestor and will be fixed up on the way to the root. We don't 454 1.263 matt * have to check entry->prev against &map->header since &map->header 455 1.263 matt * will never be in the tree. 456 1.263 matt */ 457 1.263 matt uvm_rb_fixup(map, 458 1.263 matt LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry); 459 1.144 yamt } 460 1.144 yamt 461 1.203 thorpej static void 462 1.144 yamt uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 463 1.144 yamt { 464 1.263 matt struct vm_map_entry *prev_parent = NULL, *next_parent = NULL; 465 1.144 yamt 466 1.263 matt /* 467 1.263 matt * If we are removing an interior node, then an adjacent node will 468 1.263 matt * be used to replace its position in the tree. Therefore we will 469 1.263 matt * need to fixup the tree starting at the parent of the replacement 470 1.263 matt * node. So record their parents for later use. 471 1.263 matt */ 472 1.144 yamt if (entry->prev != &map->header) 473 1.263 matt prev_parent = PARENT_ENTRY(map, entry->prev); 474 1.263 matt if (entry->next != &map->header) 475 1.263 matt next_parent = PARENT_ENTRY(map, entry->next); 476 1.263 matt 477 1.293 rmind rb_tree_remove_node(&map->rb_tree, entry); 478 1.263 matt 479 1.263 matt /* 480 1.263 matt * If the previous node has a new parent, fixup the tree starting 481 1.263 matt * at the previous node's old parent. 482 1.263 matt */ 483 1.263 matt if (entry->prev != &map->header) { 484 1.263 matt /* 485 1.263 matt * Update the previous entry's gap due to our absence. 486 1.263 matt */ 487 1.263 matt entry->prev->gap = uvm_rb_gap(entry->prev); 488 1.144 yamt uvm_rb_fixup(map, entry->prev); 489 1.263 matt if (prev_parent != NULL 490 1.263 matt && prev_parent != entry 491 1.263 matt && prev_parent != PARENT_ENTRY(map, entry->prev)) 492 1.263 matt uvm_rb_fixup(map, prev_parent); 493 1.263 matt } 494 1.263 matt 495 1.263 matt /* 496 1.263 matt * If the next node has a new parent, fixup the tree starting 497 1.263 matt * at the next node's old parent. 498 1.263 matt */ 499 1.263 matt if (entry->next != &map->header) { 500 1.263 matt uvm_rb_fixup(map, entry->next); 501 1.263 matt if (next_parent != NULL 502 1.263 matt && next_parent != entry 503 1.263 matt && next_parent != PARENT_ENTRY(map, entry->next)) 504 1.263 matt uvm_rb_fixup(map, next_parent); 505 1.263 matt } 506 1.144 yamt } 507 1.144 yamt 508 1.222 yamt #if defined(DEBUG) 509 1.222 yamt int uvm_debug_check_map = 0; 510 1.159 yamt int uvm_debug_check_rbtree = 0; 511 1.222 yamt #define uvm_map_check(map, name) \ 512 1.222 yamt _uvm_map_check((map), (name), __FILE__, __LINE__) 513 1.222 yamt static void 514 1.222 yamt _uvm_map_check(struct vm_map *map, const char *name, 515 1.222 yamt const char *file, int line) 516 1.222 yamt { 517 1.222 yamt 518 1.222 yamt if ((uvm_debug_check_map && _uvm_map_sanity(map)) || 519 1.222 yamt (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) { 520 1.222 yamt panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)", 521 1.222 yamt name, map, file, line); 522 1.222 yamt } 523 1.222 yamt } 524 1.222 yamt #else /* defined(DEBUG) */ 525 1.222 yamt #define uvm_map_check(map, name) /* nothing */ 526 1.222 yamt #endif /* defined(DEBUG) */ 527 1.222 yamt 528 1.222 yamt #if defined(DEBUG) || defined(DDB) 529 1.222 yamt int 530 1.222 yamt _uvm_map_sanity(struct vm_map *map) 531 1.222 yamt { 532 1.234 thorpej bool first_free_found = false; 533 1.234 thorpej bool hint_found = false; 534 1.222 yamt const struct vm_map_entry *e; 535 1.272 yamt struct vm_map_entry *hint = map->hint; 536 1.222 yamt 537 1.340 msaitoh e = &map->header; 538 1.222 yamt for (;;) { 539 1.222 yamt if (map->first_free == e) { 540 1.234 thorpej first_free_found = true; 541 1.222 yamt } else if (!first_free_found && e->next->start > e->end) { 542 1.222 yamt printf("first_free %p should be %p\n", 543 1.222 yamt map->first_free, e); 544 1.222 yamt return -1; 545 1.222 yamt } 546 1.272 yamt if (hint == e) { 547 1.234 thorpej hint_found = true; 548 1.222 yamt } 549 1.222 yamt 550 1.222 yamt e = e->next; 551 1.222 yamt if (e == &map->header) { 552 1.222 yamt break; 553 1.222 yamt } 554 1.222 yamt } 555 1.222 yamt if (!first_free_found) { 556 1.222 yamt printf("stale first_free\n"); 557 1.222 yamt return -1; 558 1.222 yamt } 559 1.222 yamt if (!hint_found) { 560 1.222 yamt printf("stale hint\n"); 561 1.222 yamt return -1; 562 1.222 yamt } 563 1.222 yamt return 0; 564 1.222 yamt } 565 1.144 yamt 566 1.144 yamt int 567 1.222 yamt _uvm_tree_sanity(struct vm_map *map) 568 1.144 yamt { 569 1.144 yamt struct vm_map_entry *tmp, *trtmp; 570 1.144 yamt int n = 0, i = 1; 571 1.144 yamt 572 1.263 matt for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 573 1.263 matt if (tmp->gap != uvm_rb_gap(tmp)) { 574 1.334 matt printf("%d/%d gap %#lx != %#lx %s\n", 575 1.222 yamt n + 1, map->nentries, 576 1.263 matt (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp), 577 1.144 yamt tmp->next == &map->header ? "(last)" : ""); 578 1.144 yamt goto error; 579 1.144 yamt } 580 1.263 matt /* 581 1.263 matt * If any entries are out of order, tmp->gap will be unsigned 582 1.263 matt * and will likely exceed the size of the map. 583 1.263 matt */ 584 1.273 yamt if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) { 585 1.273 yamt printf("too large gap %zu\n", (size_t)tmp->gap); 586 1.273 yamt goto error; 587 1.273 yamt } 588 1.263 matt n++; 589 1.263 matt } 590 1.263 matt 591 1.263 matt if (n != map->nentries) { 592 1.263 matt printf("nentries: %d vs %d\n", n, map->nentries); 593 1.263 matt goto error; 594 1.144 yamt } 595 1.263 matt 596 1.144 yamt trtmp = NULL; 597 1.263 matt for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 598 1.263 matt if (tmp->maxgap != uvm_rb_maxgap(tmp)) { 599 1.334 matt printf("maxgap %#lx != %#lx\n", 600 1.263 matt (ulong)tmp->maxgap, 601 1.263 matt (ulong)uvm_rb_maxgap(tmp)); 602 1.144 yamt goto error; 603 1.144 yamt } 604 1.144 yamt if (trtmp != NULL && trtmp->start >= tmp->start) { 605 1.285 matt printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n", 606 1.222 yamt trtmp->start, tmp->start); 607 1.144 yamt goto error; 608 1.144 yamt } 609 1.144 yamt 610 1.144 yamt trtmp = tmp; 611 1.144 yamt } 612 1.144 yamt 613 1.263 matt for (tmp = map->header.next; tmp != &map->header; 614 1.144 yamt tmp = tmp->next, i++) { 615 1.293 rmind trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT); 616 1.263 matt if (trtmp == NULL) 617 1.263 matt trtmp = &map->header; 618 1.263 matt if (tmp->prev != trtmp) { 619 1.263 matt printf("lookup: %d: %p->prev=%p: %p\n", 620 1.263 matt i, tmp, tmp->prev, trtmp); 621 1.263 matt goto error; 622 1.263 matt } 623 1.293 rmind trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT); 624 1.263 matt if (trtmp == NULL) 625 1.263 matt trtmp = &map->header; 626 1.263 matt if (tmp->next != trtmp) { 627 1.263 matt printf("lookup: %d: %p->next=%p: %p\n", 628 1.263 matt i, tmp, tmp->next, trtmp); 629 1.263 matt goto error; 630 1.263 matt } 631 1.293 rmind trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start); 632 1.144 yamt if (trtmp != tmp) { 633 1.222 yamt printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp, 634 1.263 matt PARENT_ENTRY(map, tmp)); 635 1.144 yamt goto error; 636 1.144 yamt } 637 1.144 yamt } 638 1.144 yamt 639 1.144 yamt return (0); 640 1.144 yamt error: 641 1.144 yamt return (-1); 642 1.144 yamt } 643 1.222 yamt #endif /* defined(DEBUG) || defined(DDB) */ 644 1.144 yamt 645 1.1 mrg /* 646 1.238 ad * vm_map_lock: acquire an exclusive (write) lock on a map. 647 1.238 ad * 648 1.238 ad * => The locking protocol provides for guaranteed upgrade from shared -> 649 1.238 ad * exclusive by whichever thread currently has the map marked busy. 650 1.238 ad * See "LOCKING PROTOCOL NOTES" in uvm_map.h. This is horrible; among 651 1.238 ad * other problems, it defeats any fairness guarantees provided by RW 652 1.238 ad * locks. 653 1.238 ad */ 654 1.238 ad 655 1.238 ad void 656 1.238 ad vm_map_lock(struct vm_map *map) 657 1.238 ad { 658 1.238 ad 659 1.238 ad for (;;) { 660 1.238 ad rw_enter(&map->lock, RW_WRITER); 661 1.314 rmind if (map->busy == NULL || map->busy == curlwp) { 662 1.249 yamt break; 663 1.314 rmind } 664 1.238 ad mutex_enter(&map->misc_lock); 665 1.238 ad rw_exit(&map->lock); 666 1.314 rmind if (map->busy != NULL) { 667 1.248 ad cv_wait(&map->cv, &map->misc_lock); 668 1.314 rmind } 669 1.238 ad mutex_exit(&map->misc_lock); 670 1.238 ad } 671 1.238 ad map->timestamp++; 672 1.238 ad } 673 1.238 ad 674 1.238 ad /* 675 1.238 ad * vm_map_lock_try: try to lock a map, failing if it is already locked. 676 1.238 ad */ 677 1.238 ad 678 1.238 ad bool 679 1.238 ad vm_map_lock_try(struct vm_map *map) 680 1.238 ad { 681 1.238 ad 682 1.314 rmind if (!rw_tryenter(&map->lock, RW_WRITER)) { 683 1.238 ad return false; 684 1.314 rmind } 685 1.238 ad if (map->busy != NULL) { 686 1.238 ad rw_exit(&map->lock); 687 1.238 ad return false; 688 1.238 ad } 689 1.238 ad map->timestamp++; 690 1.238 ad return true; 691 1.238 ad } 692 1.238 ad 693 1.238 ad /* 694 1.238 ad * vm_map_unlock: release an exclusive lock on a map. 695 1.238 ad */ 696 1.238 ad 697 1.238 ad void 698 1.238 ad vm_map_unlock(struct vm_map *map) 699 1.238 ad { 700 1.238 ad 701 1.314 rmind KASSERT(rw_write_held(&map->lock)); 702 1.314 rmind KASSERT(map->busy == NULL || map->busy == curlwp); 703 1.314 rmind rw_exit(&map->lock); 704 1.238 ad } 705 1.238 ad 706 1.238 ad /* 707 1.238 ad * vm_map_unbusy: mark the map as unbusy, and wake any waiters that 708 1.238 ad * want an exclusive lock. 709 1.238 ad */ 710 1.238 ad 711 1.238 ad void 712 1.238 ad vm_map_unbusy(struct vm_map *map) 713 1.238 ad { 714 1.238 ad 715 1.238 ad KASSERT(map->busy == curlwp); 716 1.238 ad 717 1.238 ad /* 718 1.238 ad * Safe to clear 'busy' and 'waiters' with only a read lock held: 719 1.238 ad * 720 1.238 ad * o they can only be set with a write lock held 721 1.238 ad * o writers are blocked out with a read or write hold 722 1.238 ad * o at any time, only one thread owns the set of values 723 1.238 ad */ 724 1.248 ad mutex_enter(&map->misc_lock); 725 1.238 ad map->busy = NULL; 726 1.238 ad cv_broadcast(&map->cv); 727 1.238 ad mutex_exit(&map->misc_lock); 728 1.238 ad } 729 1.238 ad 730 1.238 ad /* 731 1.248 ad * vm_map_lock_read: acquire a shared (read) lock on a map. 732 1.248 ad */ 733 1.248 ad 734 1.248 ad void 735 1.248 ad vm_map_lock_read(struct vm_map *map) 736 1.248 ad { 737 1.248 ad 738 1.248 ad rw_enter(&map->lock, RW_READER); 739 1.248 ad } 740 1.248 ad 741 1.248 ad /* 742 1.248 ad * vm_map_unlock_read: release a shared lock on a map. 743 1.248 ad */ 744 1.314 rmind 745 1.248 ad void 746 1.248 ad vm_map_unlock_read(struct vm_map *map) 747 1.248 ad { 748 1.248 ad 749 1.248 ad rw_exit(&map->lock); 750 1.248 ad } 751 1.248 ad 752 1.248 ad /* 753 1.248 ad * vm_map_busy: mark a map as busy. 754 1.248 ad * 755 1.248 ad * => the caller must hold the map write locked 756 1.248 ad */ 757 1.248 ad 758 1.248 ad void 759 1.248 ad vm_map_busy(struct vm_map *map) 760 1.248 ad { 761 1.248 ad 762 1.248 ad KASSERT(rw_write_held(&map->lock)); 763 1.248 ad KASSERT(map->busy == NULL); 764 1.248 ad 765 1.248 ad map->busy = curlwp; 766 1.248 ad } 767 1.248 ad 768 1.248 ad /* 769 1.248 ad * vm_map_locked_p: return true if the map is write locked. 770 1.269 yamt * 771 1.269 yamt * => only for debug purposes like KASSERTs. 772 1.269 yamt * => should not be used to verify that a map is not locked. 773 1.248 ad */ 774 1.248 ad 775 1.248 ad bool 776 1.248 ad vm_map_locked_p(struct vm_map *map) 777 1.248 ad { 778 1.248 ad 779 1.314 rmind return rw_write_held(&map->lock); 780 1.248 ad } 781 1.248 ad 782 1.248 ad /* 783 1.1 mrg * uvm_mapent_alloc: allocate a map entry 784 1.1 mrg */ 785 1.1 mrg 786 1.203 thorpej static struct vm_map_entry * 787 1.138 enami uvm_mapent_alloc(struct vm_map *map, int flags) 788 1.10 mrg { 789 1.99 chs struct vm_map_entry *me; 790 1.127 thorpej int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK; 791 1.385 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 792 1.1 mrg 793 1.311 para me = pool_cache_get(&uvm_map_entry_cache, pflags); 794 1.314 rmind if (__predict_false(me == NULL)) { 795 1.311 para return NULL; 796 1.314 rmind } 797 1.311 para me->flags = 0; 798 1.311 para 799 1.353 pgoyette UVMHIST_LOG(maphist, "<- new entry=%#jx [kentry=%jd]", (uintptr_t)me, 800 1.314 rmind (map == kernel_map), 0, 0); 801 1.314 rmind return me; 802 1.1 mrg } 803 1.1 mrg 804 1.1 mrg /* 805 1.1 mrg * uvm_mapent_free: free map entry 806 1.1 mrg */ 807 1.1 mrg 808 1.203 thorpej static void 809 1.138 enami uvm_mapent_free(struct vm_map_entry *me) 810 1.1 mrg { 811 1.385 skrll UVMHIST_FUNC(__func__); 812 1.386 skrll UVMHIST_CALLARGS(maphist,"<- freeing map entry=%#jx [flags=%#jx]", 813 1.353 pgoyette (uintptr_t)me, me->flags, 0, 0); 814 1.311 para pool_cache_put(&uvm_map_entry_cache, me); 815 1.1 mrg } 816 1.1 mrg 817 1.1 mrg /* 818 1.1 mrg * uvm_mapent_copy: copy a map entry, preserving flags 819 1.1 mrg */ 820 1.1 mrg 821 1.206 perry static inline void 822 1.138 enami uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 823 1.10 mrg { 824 1.139 enami 825 1.373 ad memcpy(dst, src, sizeof(*dst)); 826 1.373 ad dst->flags = 0; 827 1.1 mrg } 828 1.1 mrg 829 1.218 yamt #if defined(DEBUG) 830 1.218 yamt static void 831 1.384 maxv _uvm_mapent_check(const struct vm_map_entry *entry, int line) 832 1.218 yamt { 833 1.218 yamt 834 1.218 yamt if (entry->start >= entry->end) { 835 1.218 yamt goto bad; 836 1.218 yamt } 837 1.218 yamt if (UVM_ET_ISOBJ(entry)) { 838 1.218 yamt if (entry->object.uvm_obj == NULL) { 839 1.218 yamt goto bad; 840 1.218 yamt } 841 1.218 yamt } else if (UVM_ET_ISSUBMAP(entry)) { 842 1.218 yamt if (entry->object.sub_map == NULL) { 843 1.218 yamt goto bad; 844 1.218 yamt } 845 1.218 yamt } else { 846 1.218 yamt if (entry->object.uvm_obj != NULL || 847 1.218 yamt entry->object.sub_map != NULL) { 848 1.218 yamt goto bad; 849 1.218 yamt } 850 1.218 yamt } 851 1.218 yamt if (!UVM_ET_ISOBJ(entry)) { 852 1.218 yamt if (entry->offset != 0) { 853 1.218 yamt goto bad; 854 1.218 yamt } 855 1.218 yamt } 856 1.218 yamt 857 1.218 yamt return; 858 1.218 yamt 859 1.218 yamt bad: 860 1.384 maxv panic("%s: bad entry %p, line %d", __func__, entry, line); 861 1.218 yamt } 862 1.218 yamt #endif /* defined(DEBUG) */ 863 1.218 yamt 864 1.1 mrg /* 865 1.1 mrg * uvm_map_entry_unwire: unwire a map entry 866 1.1 mrg * 867 1.1 mrg * => map should be locked by caller 868 1.1 mrg */ 869 1.1 mrg 870 1.206 perry static inline void 871 1.138 enami uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 872 1.10 mrg { 873 1.139 enami 874 1.10 mrg entry->wired_count = 0; 875 1.57 thorpej uvm_fault_unwire_locked(map, entry->start, entry->end); 876 1.1 mrg } 877 1.1 mrg 878 1.85 chs 879 1.85 chs /* 880 1.85 chs * wrapper for calling amap_ref() 881 1.85 chs */ 882 1.206 perry static inline void 883 1.138 enami uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 884 1.85 chs { 885 1.139 enami 886 1.99 chs amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 887 1.139 enami (entry->end - entry->start) >> PAGE_SHIFT, flags); 888 1.85 chs } 889 1.85 chs 890 1.85 chs 891 1.85 chs /* 892 1.98 chs * wrapper for calling amap_unref() 893 1.85 chs */ 894 1.206 perry static inline void 895 1.138 enami uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 896 1.85 chs { 897 1.139 enami 898 1.99 chs amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 899 1.139 enami (entry->end - entry->start) >> PAGE_SHIFT, flags); 900 1.85 chs } 901 1.85 chs 902 1.85 chs 903 1.1 mrg /* 904 1.248 ad * uvm_map_init: init mapping system at boot time. 905 1.1 mrg */ 906 1.1 mrg 907 1.10 mrg void 908 1.138 enami uvm_map_init(void) 909 1.1 mrg { 910 1.10 mrg /* 911 1.10 mrg * first, init logging system. 912 1.10 mrg */ 913 1.1 mrg 914 1.385 skrll UVMHIST_FUNC(__func__); 915 1.328 matt UVMHIST_LINK_STATIC(maphist); 916 1.387 mrg UVMHIST_LINK_STATIC(pdhist); 917 1.10 mrg UVMHIST_CALLED(maphist); 918 1.10 mrg UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 919 1.10 mrg 920 1.10 mrg /* 921 1.174 yamt * initialize the global lock for kernel map entry. 922 1.10 mrg */ 923 1.10 mrg 924 1.238 ad mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM); 925 1.311 para } 926 1.248 ad 927 1.311 para /* 928 1.311 para * uvm_map_init_caches: init mapping system caches. 929 1.311 para */ 930 1.311 para void 931 1.311 para uvm_map_init_caches(void) 932 1.312 rmind { 933 1.248 ad /* 934 1.248 ad * initialize caches. 935 1.248 ad */ 936 1.248 ad 937 1.248 ad pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry), 938 1.377 ad coherency_unit, 0, PR_LARGECACHE, "vmmpepl", NULL, IPL_NONE, NULL, 939 1.377 ad NULL, NULL); 940 1.1 mrg } 941 1.1 mrg 942 1.1 mrg /* 943 1.1 mrg * clippers 944 1.1 mrg */ 945 1.1 mrg 946 1.1 mrg /* 947 1.218 yamt * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy. 948 1.218 yamt */ 949 1.218 yamt 950 1.218 yamt static void 951 1.218 yamt uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2, 952 1.218 yamt vaddr_t splitat) 953 1.218 yamt { 954 1.218 yamt vaddr_t adj; 955 1.218 yamt 956 1.218 yamt KASSERT(entry1->start < splitat); 957 1.218 yamt KASSERT(splitat < entry1->end); 958 1.218 yamt 959 1.218 yamt adj = splitat - entry1->start; 960 1.218 yamt entry1->end = entry2->start = splitat; 961 1.218 yamt 962 1.218 yamt if (entry1->aref.ar_amap) { 963 1.218 yamt amap_splitref(&entry1->aref, &entry2->aref, adj); 964 1.218 yamt } 965 1.218 yamt if (UVM_ET_ISSUBMAP(entry1)) { 966 1.218 yamt /* ... unlikely to happen, but play it safe */ 967 1.218 yamt uvm_map_reference(entry1->object.sub_map); 968 1.218 yamt } else if (UVM_ET_ISOBJ(entry1)) { 969 1.218 yamt KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */ 970 1.218 yamt entry2->offset += adj; 971 1.218 yamt if (entry1->object.uvm_obj->pgops && 972 1.218 yamt entry1->object.uvm_obj->pgops->pgo_reference) 973 1.218 yamt entry1->object.uvm_obj->pgops->pgo_reference( 974 1.218 yamt entry1->object.uvm_obj); 975 1.218 yamt } 976 1.218 yamt } 977 1.218 yamt 978 1.218 yamt /* 979 1.1 mrg * uvm_map_clip_start: ensure that the entry begins at or after 980 1.1 mrg * the starting address, if it doesn't we split the entry. 981 1.98 chs * 982 1.1 mrg * => caller should use UVM_MAP_CLIP_START macro rather than calling 983 1.1 mrg * this directly 984 1.1 mrg * => map must be locked by caller 985 1.1 mrg */ 986 1.1 mrg 987 1.99 chs void 988 1.138 enami uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 989 1.311 para vaddr_t start) 990 1.1 mrg { 991 1.99 chs struct vm_map_entry *new_entry; 992 1.1 mrg 993 1.1 mrg /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 994 1.1 mrg 995 1.222 yamt uvm_map_check(map, "clip_start entry"); 996 1.218 yamt uvm_mapent_check(entry); 997 1.144 yamt 998 1.10 mrg /* 999 1.10 mrg * Split off the front portion. note that we must insert the new 1000 1.10 mrg * entry BEFORE this one, so that this entry has the specified 1001 1.1 mrg * starting address. 1002 1.10 mrg */ 1003 1.311 para new_entry = uvm_mapent_alloc(map, 0); 1004 1.1 mrg uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1005 1.218 yamt uvm_mapent_splitadj(new_entry, entry, start); 1006 1.10 mrg uvm_map_entry_link(map, entry->prev, new_entry); 1007 1.85 chs 1008 1.222 yamt uvm_map_check(map, "clip_start leave"); 1009 1.1 mrg } 1010 1.1 mrg 1011 1.1 mrg /* 1012 1.1 mrg * uvm_map_clip_end: ensure that the entry ends at or before 1013 1.1 mrg * the ending address, if it does't we split the reference 1014 1.98 chs * 1015 1.1 mrg * => caller should use UVM_MAP_CLIP_END macro rather than calling 1016 1.1 mrg * this directly 1017 1.1 mrg * => map must be locked by caller 1018 1.1 mrg */ 1019 1.1 mrg 1020 1.10 mrg void 1021 1.311 para uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) 1022 1.1 mrg { 1023 1.218 yamt struct vm_map_entry *new_entry; 1024 1.1 mrg 1025 1.222 yamt uvm_map_check(map, "clip_end entry"); 1026 1.218 yamt uvm_mapent_check(entry); 1027 1.174 yamt 1028 1.1 mrg /* 1029 1.1 mrg * Create a new entry and insert it 1030 1.1 mrg * AFTER the specified entry 1031 1.1 mrg */ 1032 1.311 para new_entry = uvm_mapent_alloc(map, 0); 1033 1.1 mrg uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1034 1.218 yamt uvm_mapent_splitadj(entry, new_entry, end); 1035 1.1 mrg uvm_map_entry_link(map, entry, new_entry); 1036 1.1 mrg 1037 1.222 yamt uvm_map_check(map, "clip_end leave"); 1038 1.1 mrg } 1039 1.1 mrg 1040 1.1 mrg /* 1041 1.1 mrg * M A P - m a i n e n t r y p o i n t 1042 1.1 mrg */ 1043 1.1 mrg /* 1044 1.1 mrg * uvm_map: establish a valid mapping in a map 1045 1.1 mrg * 1046 1.1 mrg * => assume startp is page aligned. 1047 1.1 mrg * => assume size is a multiple of PAGE_SIZE. 1048 1.1 mrg * => assume sys_mmap provides enough of a "hint" to have us skip 1049 1.1 mrg * over text/data/bss area. 1050 1.1 mrg * => map must be unlocked (we will lock it) 1051 1.1 mrg * => <uobj,uoffset> value meanings (4 cases): 1052 1.139 enami * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1053 1.1 mrg * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1054 1.1 mrg * [3] <uobj,uoffset> == normal mapping 1055 1.1 mrg * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1056 1.98 chs * 1057 1.1 mrg * case [4] is for kernel mappings where we don't know the offset until 1058 1.8 chuck * we've found a virtual address. note that kernel object offsets are 1059 1.8 chuck * always relative to vm_map_min(kernel_map). 1060 1.81 thorpej * 1061 1.165 yamt * => if `align' is non-zero, we align the virtual address to the specified 1062 1.165 yamt * alignment. 1063 1.165 yamt * this is provided as a mechanism for large pages. 1064 1.81 thorpej * 1065 1.1 mrg * => XXXCDC: need way to map in external amap? 1066 1.1 mrg */ 1067 1.1 mrg 1068 1.10 mrg int 1069 1.138 enami uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size, 1070 1.138 enami struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 1071 1.10 mrg { 1072 1.174 yamt struct uvm_map_args args; 1073 1.174 yamt struct vm_map_entry *new_entry; 1074 1.174 yamt int error; 1075 1.174 yamt 1076 1.187 yamt KASSERT((size & PAGE_MASK) == 0); 1077 1.365 rin KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1078 1.174 yamt 1079 1.174 yamt /* 1080 1.174 yamt * for pager_map, allocate the new entry first to avoid sleeping 1081 1.174 yamt * for memory while we have the map locked. 1082 1.174 yamt */ 1083 1.174 yamt 1084 1.174 yamt new_entry = NULL; 1085 1.311 para if (map == pager_map) { 1086 1.174 yamt new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); 1087 1.174 yamt if (__predict_false(new_entry == NULL)) 1088 1.174 yamt return ENOMEM; 1089 1.174 yamt } 1090 1.174 yamt if (map == pager_map) 1091 1.174 yamt flags |= UVM_FLAG_NOMERGE; 1092 1.174 yamt 1093 1.174 yamt error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align, 1094 1.174 yamt flags, &args); 1095 1.174 yamt if (!error) { 1096 1.174 yamt error = uvm_map_enter(map, &args, new_entry); 1097 1.174 yamt *startp = args.uma_start; 1098 1.189 yamt } else if (new_entry) { 1099 1.189 yamt uvm_mapent_free(new_entry); 1100 1.174 yamt } 1101 1.174 yamt 1102 1.187 yamt #if defined(DEBUG) 1103 1.333 christos if (!error && VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) { 1104 1.264 ad uvm_km_check_empty(map, *startp, *startp + size); 1105 1.187 yamt } 1106 1.187 yamt #endif /* defined(DEBUG) */ 1107 1.187 yamt 1108 1.174 yamt return error; 1109 1.174 yamt } 1110 1.174 yamt 1111 1.307 yamt /* 1112 1.307 yamt * uvm_map_prepare: 1113 1.307 yamt * 1114 1.307 yamt * called with map unlocked. 1115 1.307 yamt * on success, returns the map locked. 1116 1.307 yamt */ 1117 1.307 yamt 1118 1.174 yamt int 1119 1.174 yamt uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, 1120 1.174 yamt struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 1121 1.174 yamt struct uvm_map_args *args) 1122 1.174 yamt { 1123 1.174 yamt struct vm_map_entry *prev_entry; 1124 1.174 yamt vm_prot_t prot = UVM_PROTECTION(flags); 1125 1.174 yamt vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1126 1.174 yamt 1127 1.385 skrll UVMHIST_FUNC(__func__); 1128 1.391 skrll UVMHIST_CALLARGS(maphist, "(map=%#jx, start=%#jx, size=%jx, flags=%#jx)", 1129 1.353 pgoyette (uintptr_t)map, start, size, flags); 1130 1.353 pgoyette UVMHIST_LOG(maphist, " uobj/offset %#jx/%jd", (uintptr_t)uobj, 1131 1.353 pgoyette uoffset,0,0); 1132 1.107 chs 1133 1.107 chs /* 1134 1.107 chs * detect a popular device driver bug. 1135 1.107 chs */ 1136 1.107 chs 1137 1.314 rmind KASSERT(doing_shutdown || curlwp != NULL); 1138 1.1 mrg 1139 1.10 mrg /* 1140 1.144 yamt * zero-sized mapping doesn't make any sense. 1141 1.144 yamt */ 1142 1.144 yamt KASSERT(size > 0); 1143 1.144 yamt 1144 1.180 yamt KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0); 1145 1.180 yamt 1146 1.222 yamt uvm_map_check(map, "map entry"); 1147 1.144 yamt 1148 1.144 yamt /* 1149 1.106 chs * check sanity of protection code 1150 1.10 mrg */ 1151 1.1 mrg 1152 1.10 mrg if ((prot & maxprot) != prot) { 1153 1.353 pgoyette UVMHIST_LOG(maphist, "<- prot. failure: prot=%#jx, max=%#jx", 1154 1.10 mrg prot, maxprot,0,0); 1155 1.94 chs return EACCES; 1156 1.10 mrg } 1157 1.1 mrg 1158 1.10 mrg /* 1159 1.106 chs * figure out where to put new VM range 1160 1.10 mrg */ 1161 1.180 yamt retry: 1162 1.234 thorpej if (vm_map_lock_try(map) == false) { 1163 1.314 rmind if ((flags & UVM_FLAG_TRYLOCK) != 0) { 1164 1.94 chs return EAGAIN; 1165 1.106 chs } 1166 1.10 mrg vm_map_lock(map); /* could sleep here */ 1167 1.10 mrg } 1168 1.349 chs if (flags & UVM_FLAG_UNMAP) { 1169 1.349 chs KASSERT(flags & UVM_FLAG_FIXED); 1170 1.347 chs KASSERT((flags & UVM_FLAG_NOWAIT) == 0); 1171 1.347 chs 1172 1.347 chs /* 1173 1.347 chs * Set prev_entry to what it will need to be after any existing 1174 1.347 chs * entries are removed later in uvm_map_enter(). 1175 1.347 chs */ 1176 1.347 chs 1177 1.347 chs if (uvm_map_lookup_entry(map, start, &prev_entry)) { 1178 1.347 chs if (start == prev_entry->start) 1179 1.347 chs prev_entry = prev_entry->prev; 1180 1.347 chs else 1181 1.347 chs UVM_MAP_CLIP_END(map, prev_entry, start); 1182 1.347 chs SAVE_HINT(map, map->hint, prev_entry); 1183 1.347 chs } 1184 1.347 chs } else { 1185 1.347 chs prev_entry = uvm_map_findspace(map, start, size, &start, 1186 1.347 chs uobj, uoffset, align, flags); 1187 1.347 chs } 1188 1.226 yamt if (prev_entry == NULL) { 1189 1.180 yamt unsigned int timestamp; 1190 1.180 yamt 1191 1.180 yamt timestamp = map->timestamp; 1192 1.353 pgoyette UVMHIST_LOG(maphist,"waiting va timestamp=%#jx", 1193 1.180 yamt timestamp,0,0,0); 1194 1.180 yamt map->flags |= VM_MAP_WANTVA; 1195 1.10 mrg vm_map_unlock(map); 1196 1.180 yamt 1197 1.180 yamt /* 1198 1.226 yamt * try to reclaim kva and wait until someone does unmap. 1199 1.238 ad * fragile locking here, so we awaken every second to 1200 1.238 ad * recheck the condition. 1201 1.180 yamt */ 1202 1.180 yamt 1203 1.238 ad mutex_enter(&map->misc_lock); 1204 1.180 yamt while ((map->flags & VM_MAP_WANTVA) != 0 && 1205 1.180 yamt map->timestamp == timestamp) { 1206 1.226 yamt if ((flags & UVM_FLAG_WAITVA) == 0) { 1207 1.238 ad mutex_exit(&map->misc_lock); 1208 1.226 yamt UVMHIST_LOG(maphist, 1209 1.226 yamt "<- uvm_map_findspace failed!", 0,0,0,0); 1210 1.226 yamt return ENOMEM; 1211 1.226 yamt } else { 1212 1.238 ad cv_timedwait(&map->cv, &map->misc_lock, hz); 1213 1.226 yamt } 1214 1.180 yamt } 1215 1.238 ad mutex_exit(&map->misc_lock); 1216 1.180 yamt goto retry; 1217 1.10 mrg } 1218 1.1 mrg 1219 1.40 thorpej #ifdef PMAP_GROWKERNEL 1220 1.152 simonb /* 1221 1.152 simonb * If the kernel pmap can't map the requested space, 1222 1.152 simonb * then allocate more resources for it. 1223 1.152 simonb */ 1224 1.229 yamt if (map == kernel_map && uvm_maxkaddr < (start + size)) 1225 1.229 yamt uvm_maxkaddr = pmap_growkernel(start + size); 1226 1.10 mrg #endif 1227 1.10 mrg 1228 1.207 yamt UVMMAP_EVCNT_INCR(map_call); 1229 1.10 mrg 1230 1.10 mrg /* 1231 1.10 mrg * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 1232 1.98 chs * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 1233 1.98 chs * either case we want to zero it before storing it in the map entry 1234 1.10 mrg * (because it looks strange and confusing when debugging...) 1235 1.98 chs * 1236 1.98 chs * if uobj is not null 1237 1.10 mrg * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 1238 1.10 mrg * and we do not need to change uoffset. 1239 1.10 mrg * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 1240 1.10 mrg * now (based on the starting address of the map). this case is 1241 1.10 mrg * for kernel object mappings where we don't know the offset until 1242 1.10 mrg * the virtual address is found (with uvm_map_findspace). the 1243 1.10 mrg * offset is the distance we are from the start of the map. 1244 1.10 mrg */ 1245 1.10 mrg 1246 1.10 mrg if (uobj == NULL) { 1247 1.10 mrg uoffset = 0; 1248 1.10 mrg } else { 1249 1.10 mrg if (uoffset == UVM_UNKNOWN_OFFSET) { 1250 1.85 chs KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1251 1.174 yamt uoffset = start - vm_map_min(kernel_map); 1252 1.10 mrg } 1253 1.10 mrg } 1254 1.10 mrg 1255 1.174 yamt args->uma_flags = flags; 1256 1.174 yamt args->uma_prev = prev_entry; 1257 1.174 yamt args->uma_start = start; 1258 1.174 yamt args->uma_size = size; 1259 1.174 yamt args->uma_uobj = uobj; 1260 1.174 yamt args->uma_uoffset = uoffset; 1261 1.174 yamt 1262 1.276 matt UVMHIST_LOG(maphist, "<- done!", 0,0,0,0); 1263 1.174 yamt return 0; 1264 1.174 yamt } 1265 1.174 yamt 1266 1.307 yamt /* 1267 1.307 yamt * uvm_map_enter: 1268 1.307 yamt * 1269 1.307 yamt * called with map locked. 1270 1.307 yamt * unlock the map before returning. 1271 1.307 yamt */ 1272 1.307 yamt 1273 1.174 yamt int 1274 1.174 yamt uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args, 1275 1.174 yamt struct vm_map_entry *new_entry) 1276 1.174 yamt { 1277 1.174 yamt struct vm_map_entry *prev_entry = args->uma_prev; 1278 1.347 chs struct vm_map_entry *dead = NULL, *dead_entries = NULL; 1279 1.174 yamt 1280 1.174 yamt const uvm_flag_t flags = args->uma_flags; 1281 1.174 yamt const vm_prot_t prot = UVM_PROTECTION(flags); 1282 1.174 yamt const vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1283 1.174 yamt const vm_inherit_t inherit = UVM_INHERIT(flags); 1284 1.174 yamt const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ? 1285 1.174 yamt AMAP_EXTEND_NOWAIT : 0; 1286 1.174 yamt const int advice = UVM_ADVICE(flags); 1287 1.174 yamt 1288 1.174 yamt vaddr_t start = args->uma_start; 1289 1.174 yamt vsize_t size = args->uma_size; 1290 1.174 yamt struct uvm_object *uobj = args->uma_uobj; 1291 1.174 yamt voff_t uoffset = args->uma_uoffset; 1292 1.174 yamt 1293 1.174 yamt const int kmap = (vm_map_pmap(map) == pmap_kernel()); 1294 1.174 yamt int merged = 0; 1295 1.174 yamt int error; 1296 1.176 yamt int newetype; 1297 1.174 yamt 1298 1.385 skrll UVMHIST_FUNC(__func__); 1299 1.385 skrll UVMHIST_CALLARGS(maphist, "(map=%#jx, start=%#jx, size=%ju, flags=%#jx)", 1300 1.353 pgoyette (uintptr_t)map, start, size, flags); 1301 1.353 pgoyette UVMHIST_LOG(maphist, " uobj/offset %#jx/%jd", (uintptr_t)uobj, 1302 1.353 pgoyette uoffset,0,0); 1303 1.174 yamt 1304 1.221 yamt KASSERT(map->hint == prev_entry); /* bimerge case assumes this */ 1305 1.307 yamt KASSERT(vm_map_locked_p(map)); 1306 1.349 chs KASSERT((flags & (UVM_FLAG_NOWAIT | UVM_FLAG_UNMAP)) != 1307 1.349 chs (UVM_FLAG_NOWAIT | UVM_FLAG_UNMAP)); 1308 1.221 yamt 1309 1.176 yamt if (uobj) 1310 1.176 yamt newetype = UVM_ET_OBJ; 1311 1.176 yamt else 1312 1.176 yamt newetype = 0; 1313 1.176 yamt 1314 1.176 yamt if (flags & UVM_FLAG_COPYONW) { 1315 1.176 yamt newetype |= UVM_ET_COPYONWRITE; 1316 1.176 yamt if ((flags & UVM_FLAG_OVERLAY) == 0) 1317 1.176 yamt newetype |= UVM_ET_NEEDSCOPY; 1318 1.176 yamt } 1319 1.176 yamt 1320 1.10 mrg /* 1321 1.349 chs * For mappings with unmap, remove any old entries now. Adding the new 1322 1.347 chs * entry cannot fail because that can only happen if UVM_FLAG_NOWAIT 1323 1.349 chs * is set, and we do not support nowait and unmap together. 1324 1.347 chs */ 1325 1.347 chs 1326 1.349 chs if (flags & UVM_FLAG_UNMAP) { 1327 1.349 chs KASSERT(flags & UVM_FLAG_FIXED); 1328 1.347 chs uvm_unmap_remove(map, start, start + size, &dead_entries, 0); 1329 1.347 chs #ifdef DEBUG 1330 1.350 christos struct vm_map_entry *tmp_entry __diagused; 1331 1.350 christos bool rv __diagused; 1332 1.347 chs 1333 1.347 chs rv = uvm_map_lookup_entry(map, start, &tmp_entry); 1334 1.347 chs KASSERT(!rv); 1335 1.347 chs KASSERTMSG(prev_entry == tmp_entry, 1336 1.347 chs "args %p prev_entry %p tmp_entry %p", 1337 1.347 chs args, prev_entry, tmp_entry); 1338 1.347 chs #endif 1339 1.347 chs SAVE_HINT(map, map->hint, prev_entry); 1340 1.347 chs } 1341 1.347 chs 1342 1.347 chs /* 1343 1.106 chs * try and insert in map by extending previous entry, if possible. 1344 1.10 mrg * XXX: we don't try and pull back the next entry. might be useful 1345 1.10 mrg * for a stack, but we are currently allocating our stack in advance. 1346 1.10 mrg */ 1347 1.10 mrg 1348 1.121 atatat if (flags & UVM_FLAG_NOMERGE) 1349 1.121 atatat goto nomerge; 1350 1.121 atatat 1351 1.194 yamt if (prev_entry->end == start && 1352 1.121 atatat prev_entry != &map->header && 1353 1.312 rmind UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, 0, 1354 1.194 yamt prot, maxprot, inherit, advice, 0)) { 1355 1.161 matt 1356 1.10 mrg if (uobj && prev_entry->offset + 1357 1.10 mrg (prev_entry->end - prev_entry->start) != uoffset) 1358 1.121 atatat goto forwardmerge; 1359 1.10 mrg 1360 1.10 mrg /* 1361 1.98 chs * can't extend a shared amap. note: no need to lock amap to 1362 1.34 chuck * look at refs since we don't care about its exact value. 1363 1.10 mrg * if it is one (i.e. we have only reference) it will stay there 1364 1.10 mrg */ 1365 1.85 chs 1366 1.10 mrg if (prev_entry->aref.ar_amap && 1367 1.34 chuck amap_refs(prev_entry->aref.ar_amap) != 1) { 1368 1.121 atatat goto forwardmerge; 1369 1.10 mrg } 1370 1.85 chs 1371 1.119 chs if (prev_entry->aref.ar_amap) { 1372 1.139 enami error = amap_extend(prev_entry, size, 1373 1.126 bouyer amapwaitflag | AMAP_EXTEND_FORWARDS); 1374 1.174 yamt if (error) 1375 1.191 yamt goto nomerge; 1376 1.119 chs } 1377 1.10 mrg 1378 1.258 ad if (kmap) { 1379 1.207 yamt UVMMAP_EVCNT_INCR(kbackmerge); 1380 1.258 ad } else { 1381 1.207 yamt UVMMAP_EVCNT_INCR(ubackmerge); 1382 1.258 ad } 1383 1.10 mrg UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 1384 1.10 mrg 1385 1.10 mrg /* 1386 1.10 mrg * drop our reference to uobj since we are extending a reference 1387 1.10 mrg * that we already have (the ref count can not drop to zero). 1388 1.10 mrg */ 1389 1.119 chs 1390 1.10 mrg if (uobj && uobj->pgops->pgo_detach) 1391 1.10 mrg uobj->pgops->pgo_detach(uobj); 1392 1.10 mrg 1393 1.263 matt /* 1394 1.263 matt * Now that we've merged the entries, note that we've grown 1395 1.263 matt * and our gap has shrunk. Then fix the tree. 1396 1.263 matt */ 1397 1.10 mrg prev_entry->end += size; 1398 1.263 matt prev_entry->gap -= size; 1399 1.145 yamt uvm_rb_fixup(map, prev_entry); 1400 1.145 yamt 1401 1.222 yamt uvm_map_check(map, "map backmerged"); 1402 1.10 mrg 1403 1.10 mrg UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 1404 1.121 atatat merged++; 1405 1.106 chs } 1406 1.10 mrg 1407 1.121 atatat forwardmerge: 1408 1.194 yamt if (prev_entry->next->start == (start + size) && 1409 1.121 atatat prev_entry->next != &map->header && 1410 1.312 rmind UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, 0, 1411 1.194 yamt prot, maxprot, inherit, advice, 0)) { 1412 1.161 matt 1413 1.121 atatat if (uobj && prev_entry->next->offset != uoffset + size) 1414 1.121 atatat goto nomerge; 1415 1.121 atatat 1416 1.121 atatat /* 1417 1.121 atatat * can't extend a shared amap. note: no need to lock amap to 1418 1.121 atatat * look at refs since we don't care about its exact value. 1419 1.122 atatat * if it is one (i.e. we have only reference) it will stay there. 1420 1.122 atatat * 1421 1.122 atatat * note that we also can't merge two amaps, so if we 1422 1.122 atatat * merged with the previous entry which has an amap, 1423 1.122 atatat * and the next entry also has an amap, we give up. 1424 1.122 atatat * 1425 1.125 atatat * Interesting cases: 1426 1.125 atatat * amap, new, amap -> give up second merge (single fwd extend) 1427 1.125 atatat * amap, new, none -> double forward extend (extend again here) 1428 1.125 atatat * none, new, amap -> double backward extend (done here) 1429 1.125 atatat * uobj, new, amap -> single backward extend (done here) 1430 1.125 atatat * 1431 1.122 atatat * XXX should we attempt to deal with someone refilling 1432 1.122 atatat * the deallocated region between two entries that are 1433 1.122 atatat * backed by the same amap (ie, arefs is 2, "prev" and 1434 1.122 atatat * "next" refer to it, and adding this allocation will 1435 1.122 atatat * close the hole, thus restoring arefs to 1 and 1436 1.122 atatat * deallocating the "next" vm_map_entry)? -- @@@ 1437 1.121 atatat */ 1438 1.121 atatat 1439 1.121 atatat if (prev_entry->next->aref.ar_amap && 1440 1.122 atatat (amap_refs(prev_entry->next->aref.ar_amap) != 1 || 1441 1.122 atatat (merged && prev_entry->aref.ar_amap))) { 1442 1.121 atatat goto nomerge; 1443 1.121 atatat } 1444 1.121 atatat 1445 1.122 atatat if (merged) { 1446 1.123 atatat /* 1447 1.123 atatat * Try to extend the amap of the previous entry to 1448 1.123 atatat * cover the next entry as well. If it doesn't work 1449 1.123 atatat * just skip on, don't actually give up, since we've 1450 1.123 atatat * already completed the back merge. 1451 1.123 atatat */ 1452 1.125 atatat if (prev_entry->aref.ar_amap) { 1453 1.125 atatat if (amap_extend(prev_entry, 1454 1.125 atatat prev_entry->next->end - 1455 1.125 atatat prev_entry->next->start, 1456 1.126 bouyer amapwaitflag | AMAP_EXTEND_FORWARDS)) 1457 1.142 enami goto nomerge; 1458 1.125 atatat } 1459 1.125 atatat 1460 1.125 atatat /* 1461 1.125 atatat * Try to extend the amap of the *next* entry 1462 1.125 atatat * back to cover the new allocation *and* the 1463 1.125 atatat * previous entry as well (the previous merge 1464 1.125 atatat * didn't have an amap already otherwise we 1465 1.125 atatat * wouldn't be checking here for an amap). If 1466 1.125 atatat * it doesn't work just skip on, again, don't 1467 1.125 atatat * actually give up, since we've already 1468 1.125 atatat * completed the back merge. 1469 1.125 atatat */ 1470 1.125 atatat else if (prev_entry->next->aref.ar_amap) { 1471 1.125 atatat if (amap_extend(prev_entry->next, 1472 1.125 atatat prev_entry->end - 1473 1.141 atatat prev_entry->start, 1474 1.126 bouyer amapwaitflag | AMAP_EXTEND_BACKWARDS)) 1475 1.142 enami goto nomerge; 1476 1.125 atatat } 1477 1.125 atatat } else { 1478 1.125 atatat /* 1479 1.125 atatat * Pull the next entry's amap backwards to cover this 1480 1.125 atatat * new allocation. 1481 1.125 atatat */ 1482 1.125 atatat if (prev_entry->next->aref.ar_amap) { 1483 1.125 atatat error = amap_extend(prev_entry->next, size, 1484 1.126 bouyer amapwaitflag | AMAP_EXTEND_BACKWARDS); 1485 1.174 yamt if (error) 1486 1.191 yamt goto nomerge; 1487 1.125 atatat } 1488 1.122 atatat } 1489 1.122 atatat 1490 1.121 atatat if (merged) { 1491 1.121 atatat if (kmap) { 1492 1.207 yamt UVMMAP_EVCNT_DECR(kbackmerge); 1493 1.207 yamt UVMMAP_EVCNT_INCR(kbimerge); 1494 1.121 atatat } else { 1495 1.207 yamt UVMMAP_EVCNT_DECR(ubackmerge); 1496 1.207 yamt UVMMAP_EVCNT_INCR(ubimerge); 1497 1.121 atatat } 1498 1.122 atatat } else { 1499 1.258 ad if (kmap) { 1500 1.207 yamt UVMMAP_EVCNT_INCR(kforwmerge); 1501 1.258 ad } else { 1502 1.207 yamt UVMMAP_EVCNT_INCR(uforwmerge); 1503 1.258 ad } 1504 1.121 atatat } 1505 1.121 atatat UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0); 1506 1.10 mrg 1507 1.121 atatat /* 1508 1.121 atatat * drop our reference to uobj since we are extending a reference 1509 1.121 atatat * that we already have (the ref count can not drop to zero). 1510 1.121 atatat */ 1511 1.319 chs if (uobj && uobj->pgops->pgo_detach) 1512 1.121 atatat uobj->pgops->pgo_detach(uobj); 1513 1.1 mrg 1514 1.121 atatat if (merged) { 1515 1.174 yamt dead = prev_entry->next; 1516 1.121 atatat prev_entry->end = dead->end; 1517 1.121 atatat uvm_map_entry_unlink(map, dead); 1518 1.125 atatat if (dead->aref.ar_amap != NULL) { 1519 1.125 atatat prev_entry->aref = dead->aref; 1520 1.125 atatat dead->aref.ar_amap = NULL; 1521 1.125 atatat } 1522 1.121 atatat } else { 1523 1.121 atatat prev_entry->next->start -= size; 1524 1.263 matt if (prev_entry != &map->header) { 1525 1.263 matt prev_entry->gap -= size; 1526 1.263 matt KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry)); 1527 1.145 yamt uvm_rb_fixup(map, prev_entry); 1528 1.263 matt } 1529 1.121 atatat if (uobj) 1530 1.121 atatat prev_entry->next->offset = uoffset; 1531 1.121 atatat } 1532 1.145 yamt 1533 1.222 yamt uvm_map_check(map, "map forwardmerged"); 1534 1.1 mrg 1535 1.121 atatat UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0); 1536 1.121 atatat merged++; 1537 1.106 chs } 1538 1.121 atatat 1539 1.121 atatat nomerge: 1540 1.121 atatat if (!merged) { 1541 1.121 atatat UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 1542 1.258 ad if (kmap) { 1543 1.207 yamt UVMMAP_EVCNT_INCR(knomerge); 1544 1.258 ad } else { 1545 1.207 yamt UVMMAP_EVCNT_INCR(unomerge); 1546 1.258 ad } 1547 1.106 chs 1548 1.10 mrg /* 1549 1.121 atatat * allocate new entry and link it in. 1550 1.10 mrg */ 1551 1.106 chs 1552 1.121 atatat if (new_entry == NULL) { 1553 1.126 bouyer new_entry = uvm_mapent_alloc(map, 1554 1.127 thorpej (flags & UVM_FLAG_NOWAIT)); 1555 1.126 bouyer if (__predict_false(new_entry == NULL)) { 1556 1.174 yamt error = ENOMEM; 1557 1.174 yamt goto done; 1558 1.126 bouyer } 1559 1.121 atatat } 1560 1.174 yamt new_entry->start = start; 1561 1.121 atatat new_entry->end = new_entry->start + size; 1562 1.121 atatat new_entry->object.uvm_obj = uobj; 1563 1.121 atatat new_entry->offset = uoffset; 1564 1.121 atatat 1565 1.176 yamt new_entry->etype = newetype; 1566 1.121 atatat 1567 1.161 matt if (flags & UVM_FLAG_NOMERGE) { 1568 1.161 matt new_entry->flags |= UVM_MAP_NOMERGE; 1569 1.161 matt } 1570 1.121 atatat 1571 1.121 atatat new_entry->protection = prot; 1572 1.121 atatat new_entry->max_protection = maxprot; 1573 1.121 atatat new_entry->inheritance = inherit; 1574 1.121 atatat new_entry->wired_count = 0; 1575 1.121 atatat new_entry->advice = advice; 1576 1.121 atatat if (flags & UVM_FLAG_OVERLAY) { 1577 1.121 atatat 1578 1.121 atatat /* 1579 1.121 atatat * to_add: for BSS we overallocate a little since we 1580 1.121 atatat * are likely to extend 1581 1.121 atatat */ 1582 1.121 atatat 1583 1.121 atatat vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 1584 1.121 atatat UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 1585 1.126 bouyer struct vm_amap *amap = amap_alloc(size, to_add, 1586 1.227 yamt (flags & UVM_FLAG_NOWAIT)); 1587 1.126 bouyer if (__predict_false(amap == NULL)) { 1588 1.174 yamt error = ENOMEM; 1589 1.174 yamt goto done; 1590 1.126 bouyer } 1591 1.121 atatat new_entry->aref.ar_pageoff = 0; 1592 1.121 atatat new_entry->aref.ar_amap = amap; 1593 1.121 atatat } else { 1594 1.121 atatat new_entry->aref.ar_pageoff = 0; 1595 1.121 atatat new_entry->aref.ar_amap = NULL; 1596 1.121 atatat } 1597 1.121 atatat uvm_map_entry_link(map, prev_entry, new_entry); 1598 1.1 mrg 1599 1.121 atatat /* 1600 1.121 atatat * Update the free space hint 1601 1.121 atatat */ 1602 1.10 mrg 1603 1.121 atatat if ((map->first_free == prev_entry) && 1604 1.121 atatat (prev_entry->end >= new_entry->start)) 1605 1.121 atatat map->first_free = new_entry; 1606 1.174 yamt 1607 1.174 yamt new_entry = NULL; 1608 1.121 atatat } 1609 1.10 mrg 1610 1.146 yamt map->size += size; 1611 1.146 yamt 1612 1.10 mrg UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1613 1.174 yamt 1614 1.174 yamt error = 0; 1615 1.347 chs 1616 1.174 yamt done: 1617 1.311 para vm_map_unlock(map); 1618 1.311 para 1619 1.311 para if (new_entry) { 1620 1.311 para uvm_mapent_free(new_entry); 1621 1.174 yamt } 1622 1.174 yamt if (dead) { 1623 1.174 yamt KDASSERT(merged); 1624 1.311 para uvm_mapent_free(dead); 1625 1.248 ad } 1626 1.347 chs if (dead_entries) 1627 1.347 chs uvm_unmap_detach(dead_entries, 0); 1628 1.311 para 1629 1.174 yamt return error; 1630 1.1 mrg } 1631 1.1 mrg 1632 1.1 mrg /* 1633 1.247 yamt * uvm_map_lookup_entry_bytree: lookup an entry in tree 1634 1.417 riastrad * 1635 1.417 riastrad * => map must at least be read-locked by caller. 1636 1.417 riastrad * 1637 1.417 riastrad * => If address lies in an entry, set *entry to it and return true; 1638 1.417 riastrad * then (*entry)->start <= address < (*entry)->end. 1639 1.417 riastrad 1640 1.417 riastrad * => If address is below all entries in map, return false and set 1641 1.417 riastrad * *entry to &map->header. 1642 1.417 riastrad * 1643 1.417 riastrad * => Otherwise, return false and set *entry to the highest entry below 1644 1.417 riastrad * address, so (*entry)->end <= address, and if (*entry)->next is 1645 1.417 riastrad * not &map->header, address < (*entry)->next->start. 1646 1.247 yamt */ 1647 1.247 yamt 1648 1.263 matt static inline bool 1649 1.247 yamt uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address, 1650 1.247 yamt struct vm_map_entry **entry /* OUT */) 1651 1.247 yamt { 1652 1.247 yamt struct vm_map_entry *prev = &map->header; 1653 1.263 matt struct vm_map_entry *cur = ROOT_ENTRY(map); 1654 1.247 yamt 1655 1.417 riastrad KASSERT(rw_lock_held(&map->lock)); 1656 1.417 riastrad 1657 1.247 yamt while (cur) { 1658 1.417 riastrad KASSERT(prev == &map->header || prev->end <= address); 1659 1.418 riastrad KASSERT(prev == &map->header || prev->end <= cur->start); 1660 1.263 matt UVMMAP_EVCNT_INCR(mlk_treeloop); 1661 1.247 yamt if (address >= cur->start) { 1662 1.247 yamt if (address < cur->end) { 1663 1.247 yamt *entry = cur; 1664 1.247 yamt return true; 1665 1.247 yamt } 1666 1.247 yamt prev = cur; 1667 1.417 riastrad KASSERT(prev->end <= address); 1668 1.263 matt cur = RIGHT_ENTRY(cur); 1669 1.417 riastrad KASSERT(cur == NULL || prev->end <= cur->start); 1670 1.247 yamt } else 1671 1.263 matt cur = LEFT_ENTRY(cur); 1672 1.247 yamt } 1673 1.417 riastrad KASSERT(prev == &map->header || prev->end <= address); 1674 1.417 riastrad KASSERT(prev->next == &map->header || address < prev->next->start); 1675 1.247 yamt *entry = prev; 1676 1.247 yamt return false; 1677 1.247 yamt } 1678 1.247 yamt 1679 1.247 yamt /* 1680 1.1 mrg * uvm_map_lookup_entry: find map entry at or before an address 1681 1.1 mrg * 1682 1.417 riastrad * => map must at least be read-locked by caller. 1683 1.417 riastrad * 1684 1.417 riastrad * => If address lies in an entry, set *entry to it and return true; 1685 1.417 riastrad * then (*entry)->start <= address < (*entry)->end. 1686 1.417 riastrad 1687 1.417 riastrad * => If address is below all entries in map, return false and set 1688 1.417 riastrad * *entry to &map->header. 1689 1.417 riastrad * 1690 1.417 riastrad * => Otherwise, return false and set *entry to the highest entry below 1691 1.417 riastrad * address, so (*entry)->end <= address, and if (*entry)->next is 1692 1.417 riastrad * not &map->header, address < (*entry)->next->start. 1693 1.1 mrg */ 1694 1.1 mrg 1695 1.233 thorpej bool 1696 1.138 enami uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1697 1.138 enami struct vm_map_entry **entry /* OUT */) 1698 1.1 mrg { 1699 1.99 chs struct vm_map_entry *cur; 1700 1.385 skrll UVMHIST_FUNC(__func__); 1701 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx,addr=%#jx,ent=%#jx)", 1702 1.353 pgoyette (uintptr_t)map, address, (uintptr_t)entry, 0); 1703 1.1 mrg 1704 1.421 riastrad KASSERT(rw_lock_held(&map->lock)); 1705 1.417 riastrad 1706 1.1 mrg /* 1707 1.373 ad * make a quick check to see if we are already looking at 1708 1.373 ad * the entry we want (which is usually the case). note also 1709 1.373 ad * that we don't need to save the hint here... it is the 1710 1.373 ad * same hint (unless we are at the header, in which case the 1711 1.373 ad * hint didn't buy us anything anyway). 1712 1.1 mrg */ 1713 1.1 mrg 1714 1.1 mrg cur = map->hint; 1715 1.207 yamt UVMMAP_EVCNT_INCR(mlk_call); 1716 1.373 ad if (cur != &map->header && 1717 1.373 ad address >= cur->start && cur->end > address) { 1718 1.373 ad UVMMAP_EVCNT_INCR(mlk_hint); 1719 1.373 ad *entry = cur; 1720 1.373 ad UVMHIST_LOG(maphist,"<- got it via hint (%#jx)", 1721 1.373 ad (uintptr_t)cur, 0, 0, 0); 1722 1.373 ad uvm_mapent_check(*entry); 1723 1.373 ad return (true); 1724 1.144 yamt } 1725 1.222 yamt uvm_map_check(map, __func__); 1726 1.144 yamt 1727 1.1 mrg /* 1728 1.373 ad * lookup in the tree. 1729 1.1 mrg */ 1730 1.1 mrg 1731 1.373 ad UVMMAP_EVCNT_INCR(mlk_tree); 1732 1.373 ad if (__predict_true(uvm_map_lookup_entry_bytree(map, address, entry))) { 1733 1.373 ad SAVE_HINT(map, map->hint, *entry); 1734 1.373 ad UVMHIST_LOG(maphist,"<- search got it (%#jx)", 1735 1.373 ad (uintptr_t)cur, 0, 0, 0); 1736 1.373 ad KDASSERT((*entry)->start <= address); 1737 1.373 ad KDASSERT(address < (*entry)->end); 1738 1.373 ad uvm_mapent_check(*entry); 1739 1.373 ad return (true); 1740 1.373 ad } 1741 1.1 mrg 1742 1.374 ad SAVE_HINT(map, map->hint, *entry); 1743 1.1 mrg UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1744 1.147 yamt KDASSERT((*entry) == &map->header || (*entry)->end <= address); 1745 1.144 yamt KDASSERT((*entry)->next == &map->header || 1746 1.144 yamt address < (*entry)->next->start); 1747 1.234 thorpej return (false); 1748 1.1 mrg } 1749 1.1 mrg 1750 1.1 mrg /* 1751 1.140 enami * See if the range between start and start + length fits in the gap 1752 1.140 enami * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't 1753 1.140 enami * fit, and -1 address wraps around. 1754 1.140 enami */ 1755 1.203 thorpej static int 1756 1.232 yamt uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset, 1757 1.304 matt vsize_t align, int flags, int topdown, struct vm_map_entry *entry) 1758 1.140 enami { 1759 1.415 riastrad vaddr_t orig_start = *start; 1760 1.140 enami vaddr_t end; 1761 1.140 enami 1762 1.415 riastrad #define INVARIANTS() \ 1763 1.415 riastrad KASSERTMSG((topdown \ 1764 1.415 riastrad ? *start <= orig_start \ 1765 1.415 riastrad : *start >= orig_start), \ 1766 1.415 riastrad "[%s] *start=%"PRIxVADDR" orig_start=%"PRIxVADDR \ 1767 1.415 riastrad " length=%"PRIxVSIZE" uoffset=%#llx align=%"PRIxVSIZE \ 1768 1.415 riastrad " flags=%x entry@%p=[%"PRIxVADDR",%"PRIxVADDR")" \ 1769 1.415 riastrad " ncolors=%d colormask=%x", \ 1770 1.415 riastrad topdown ? "topdown" : "bottomup", *start, orig_start, \ 1771 1.415 riastrad length, (unsigned long long)uoffset, align, \ 1772 1.415 riastrad flags, entry, entry->start, entry->end, \ 1773 1.415 riastrad uvmexp.ncolors, uvmexp.colormask) 1774 1.415 riastrad 1775 1.415 riastrad INVARIANTS(); 1776 1.415 riastrad 1777 1.140 enami #ifdef PMAP_PREFER 1778 1.140 enami /* 1779 1.140 enami * push start address forward as needed to avoid VAC alias problems. 1780 1.140 enami * we only do this if a valid offset is specified. 1781 1.140 enami */ 1782 1.140 enami 1783 1.415 riastrad if (uoffset != UVM_UNKNOWN_OFFSET) { 1784 1.182 atatat PMAP_PREFER(uoffset, start, length, topdown); 1785 1.415 riastrad INVARIANTS(); 1786 1.415 riastrad } 1787 1.140 enami #endif 1788 1.304 matt if ((flags & UVM_FLAG_COLORMATCH) != 0) { 1789 1.304 matt KASSERT(align < uvmexp.ncolors); 1790 1.304 matt if (uvmexp.ncolors > 1) { 1791 1.304 matt const u_int colormask = uvmexp.colormask; 1792 1.304 matt const u_int colorsize = colormask + 1; 1793 1.304 matt vaddr_t hint = atop(*start); 1794 1.304 matt const u_int color = hint & colormask; 1795 1.304 matt if (color != align) { 1796 1.304 matt hint -= color; /* adjust to color boundary */ 1797 1.304 matt KASSERT((hint & colormask) == 0); 1798 1.304 matt if (topdown) { 1799 1.304 matt if (align > color) 1800 1.304 matt hint -= colorsize; 1801 1.304 matt } else { 1802 1.304 matt if (align < color) 1803 1.304 matt hint += colorsize; 1804 1.304 matt } 1805 1.304 matt *start = ptoa(hint + align); /* adjust to color */ 1806 1.415 riastrad INVARIANTS(); 1807 1.304 matt } 1808 1.304 matt } 1809 1.365 rin } else { 1810 1.365 rin KASSERT(powerof2(align)); 1811 1.365 rin uvm_map_align_va(start, align, topdown); 1812 1.415 riastrad INVARIANTS(); 1813 1.140 enami /* 1814 1.140 enami * XXX Should we PMAP_PREFER() here again? 1815 1.182 atatat * eh...i think we're okay 1816 1.140 enami */ 1817 1.140 enami } 1818 1.140 enami 1819 1.140 enami /* 1820 1.140 enami * Find the end of the proposed new region. Be sure we didn't 1821 1.140 enami * wrap around the address; if so, we lose. Otherwise, if the 1822 1.140 enami * proposed new region fits before the next entry, we win. 1823 1.419 riastrad * 1824 1.419 riastrad * XXX Should this use vm_map_max(map) as the max? 1825 1.140 enami */ 1826 1.140 enami 1827 1.419 riastrad if (length > __type_max(vaddr_t) - *start) 1828 1.419 riastrad return (-1); 1829 1.140 enami end = *start + length; 1830 1.140 enami 1831 1.140 enami if (entry->next->start >= end && *start >= entry->end) 1832 1.140 enami return (1); 1833 1.140 enami 1834 1.140 enami return (0); 1835 1.415 riastrad 1836 1.415 riastrad #undef INVARIANTS 1837 1.140 enami } 1838 1.140 enami 1839 1.400 riastrad static void 1840 1.400 riastrad uvm_findspace_invariants(struct vm_map *map, vaddr_t orig_hint, vaddr_t length, 1841 1.400 riastrad struct uvm_object *uobj, voff_t uoffset, vsize_t align, int flags, 1842 1.401 rin vaddr_t hint, struct vm_map_entry *entry, int line) 1843 1.400 riastrad { 1844 1.400 riastrad const int topdown = map->flags & VM_MAP_TOPDOWN; 1845 1.413 uwe const int hint_location_ok = 1846 1.413 uwe topdown ? hint <= orig_hint 1847 1.413 uwe : hint >= orig_hint; 1848 1.413 uwe 1849 1.426 riastrad KASSERTMSG(hint_location_ok, 1850 1.413 uwe "%s map=%p hint=%#" PRIxVADDR " %s orig_hint=%#" PRIxVADDR 1851 1.413 uwe " length=%#" PRIxVSIZE " uobj=%p uoffset=%#llx align=%" PRIxVSIZE 1852 1.420 riastrad " flags=%#x entry@%p=[%" PRIxVADDR ",%" PRIxVADDR ")" 1853 1.422 riastrad " entry->next@%p=[%" PRIxVADDR ",%" PRIxVADDR ")" 1854 1.420 riastrad " (uvm_map_findspace line %d)", 1855 1.413 uwe topdown ? "topdown" : "bottomup", 1856 1.413 uwe map, hint, topdown ? ">" : "<", orig_hint, 1857 1.400 riastrad length, uobj, (unsigned long long)uoffset, align, 1858 1.420 riastrad flags, entry, entry ? entry->start : 0, entry ? entry->end : 0, 1859 1.424 rin entry ? entry->next : NULL, 1860 1.422 riastrad entry && entry->next ? entry->next->start : 0, 1861 1.422 riastrad entry && entry->next ? entry->next->end : 0, 1862 1.420 riastrad line); 1863 1.400 riastrad } 1864 1.400 riastrad 1865 1.140 enami /* 1866 1.1 mrg * uvm_map_findspace: find "length" sized space in "map". 1867 1.1 mrg * 1868 1.167 junyoung * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is 1869 1.167 junyoung * set in "flags" (in which case we insist on using "hint"). 1870 1.1 mrg * => "result" is VA returned 1871 1.1 mrg * => uobj/uoffset are to be used to handle VAC alignment, if required 1872 1.167 junyoung * => if "align" is non-zero, we attempt to align to that value. 1873 1.1 mrg * => caller must at least have read-locked map 1874 1.1 mrg * => returns NULL on failure, or pointer to prev. map entry if success 1875 1.1 mrg * => note this is a cross between the old vm_map_findspace and vm_map_find 1876 1.1 mrg */ 1877 1.1 mrg 1878 1.99 chs struct vm_map_entry * 1879 1.138 enami uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1880 1.232 yamt vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset, 1881 1.138 enami vsize_t align, int flags) 1882 1.1 mrg { 1883 1.400 riastrad #define INVARIANTS() \ 1884 1.400 riastrad uvm_findspace_invariants(map, orig_hint, length, uobj, uoffset, align,\ 1885 1.401 rin flags, hint, entry, __LINE__) 1886 1.402 macallan struct vm_map_entry *entry = NULL; 1887 1.144 yamt struct vm_map_entry *child, *prev, *tmp; 1888 1.326 martin vaddr_t orig_hint __diagused; 1889 1.131 atatat const int topdown = map->flags & VM_MAP_TOPDOWN; 1890 1.400 riastrad int avail; 1891 1.385 skrll UVMHIST_FUNC(__func__); 1892 1.391 skrll UVMHIST_CALLARGS(maphist, "(map=%#jx, hint=%#jx, len=%ju, flags=%#jx...", 1893 1.385 skrll (uintptr_t)map, hint, length, flags); 1894 1.391 skrll UVMHIST_LOG(maphist, " uobj=%#jx, uoffset=%#jx, align=%#jx)", 1895 1.391 skrll (uintptr_t)uobj, uoffset, align, 0); 1896 1.1 mrg 1897 1.365 rin KASSERT((flags & UVM_FLAG_COLORMATCH) != 0 || powerof2(align)); 1898 1.304 matt KASSERT((flags & UVM_FLAG_COLORMATCH) == 0 || align < uvmexp.ncolors); 1899 1.85 chs KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1900 1.81 thorpej 1901 1.222 yamt uvm_map_check(map, "map_findspace entry"); 1902 1.144 yamt 1903 1.81 thorpej /* 1904 1.395 riastrad * Clamp the hint to the VM map's min/max address, and remmeber 1905 1.395 riastrad * the clamped original hint. Remember the original hint, 1906 1.395 riastrad * clamped to the min/max address. If we are aligning, then we 1907 1.395 riastrad * may have to try again with no alignment constraint if we 1908 1.395 riastrad * fail the first time. 1909 1.395 riastrad * 1910 1.395 riastrad * We use the original hint to verify later that the search has 1911 1.395 riastrad * been monotonic -- that is, nonincreasing or nondecreasing, 1912 1.395 riastrad * according to topdown or !topdown respectively. But the 1913 1.395 riastrad * clamping is not monotonic. 1914 1.81 thorpej */ 1915 1.184 chs if (hint < vm_map_min(map)) { /* check ranges ... */ 1916 1.81 thorpej if (flags & UVM_FLAG_FIXED) { 1917 1.1 mrg UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1918 1.139 enami return (NULL); 1919 1.1 mrg } 1920 1.184 chs hint = vm_map_min(map); 1921 1.1 mrg } 1922 1.184 chs if (hint > vm_map_max(map)) { 1923 1.353 pgoyette UVMHIST_LOG(maphist,"<- VA %#jx > range [%#jx->%#jx]", 1924 1.184 chs hint, vm_map_min(map), vm_map_max(map), 0); 1925 1.139 enami return (NULL); 1926 1.1 mrg } 1927 1.395 riastrad orig_hint = hint; 1928 1.400 riastrad INVARIANTS(); 1929 1.1 mrg 1930 1.391 skrll UVMHIST_LOG(maphist,"<- VA %#jx vs range [%#jx->%#jx]", 1931 1.391 skrll hint, vm_map_min(map), vm_map_max(map), 0); 1932 1.391 skrll 1933 1.1 mrg /* 1934 1.365 rin * hint may not be aligned properly; we need round up or down it 1935 1.365 rin * before proceeding further. 1936 1.365 rin */ 1937 1.400 riastrad if ((flags & UVM_FLAG_COLORMATCH) == 0) { 1938 1.366 rin uvm_map_align_va(&hint, align, topdown); 1939 1.400 riastrad INVARIANTS(); 1940 1.400 riastrad } 1941 1.365 rin 1942 1.391 skrll UVMHIST_LOG(maphist,"<- VA %#jx vs range [%#jx->%#jx]", 1943 1.391 skrll hint, vm_map_min(map), vm_map_max(map), 0); 1944 1.365 rin /* 1945 1.1 mrg * Look for the first possible address; if there's already 1946 1.1 mrg * something at this address, we have to start after it. 1947 1.1 mrg */ 1948 1.1 mrg 1949 1.131 atatat /* 1950 1.131 atatat * @@@: there are four, no, eight cases to consider. 1951 1.131 atatat * 1952 1.131 atatat * 0: found, fixed, bottom up -> fail 1953 1.131 atatat * 1: found, fixed, top down -> fail 1954 1.140 enami * 2: found, not fixed, bottom up -> start after entry->end, 1955 1.140 enami * loop up 1956 1.140 enami * 3: found, not fixed, top down -> start before entry->start, 1957 1.140 enami * loop down 1958 1.140 enami * 4: not found, fixed, bottom up -> check entry->next->start, fail 1959 1.140 enami * 5: not found, fixed, top down -> check entry->next->start, fail 1960 1.140 enami * 6: not found, not fixed, bottom up -> check entry->next->start, 1961 1.140 enami * loop up 1962 1.140 enami * 7: not found, not fixed, top down -> check entry->next->start, 1963 1.140 enami * loop down 1964 1.131 atatat * 1965 1.131 atatat * as you can see, it reduces to roughly five cases, and that 1966 1.131 atatat * adding top down mapping only adds one unique case (without 1967 1.131 atatat * it, there would be four cases). 1968 1.131 atatat */ 1969 1.131 atatat 1970 1.396 riastrad if ((flags & UVM_FLAG_FIXED) == 0 && 1971 1.396 riastrad hint == (topdown ? vm_map_max(map) : vm_map_min(map))) { 1972 1.396 riastrad /* 1973 1.396 riastrad * The uvm_map_findspace algorithm is monotonic -- for 1974 1.396 riastrad * topdown VM it starts with a high hint and returns a 1975 1.396 riastrad * lower free address; for !topdown VM it starts with a 1976 1.396 riastrad * low hint and returns a higher free address. As an 1977 1.396 riastrad * optimization, start with the first (highest for 1978 1.396 riastrad * topdown, lowest for !topdown) free address. 1979 1.396 riastrad * 1980 1.396 riastrad * XXX This `optimization' probably doesn't actually do 1981 1.396 riastrad * much in practice unless userland explicitly passes 1982 1.396 riastrad * the VM map's minimum or maximum address, which 1983 1.396 riastrad * varies from machine to machine (VM_MAX/MIN_ADDRESS, 1984 1.396 riastrad * e.g. 0x7fbfdfeff000 on amd64 but 0xfffffffff000 on 1985 1.396 riastrad * aarch64) and may vary according to other factors 1986 1.396 riastrad * like sysctl vm.user_va0_disable. In particular, if 1987 1.396 riastrad * the user specifies 0 as a hint to mmap, then mmap 1988 1.396 riastrad * will choose a default address which is usually _not_ 1989 1.396 riastrad * VM_MAX/MIN_ADDRESS but something else instead like 1990 1.396 riastrad * VM_MAX_ADDRESS - stack size - guard page overhead, 1991 1.396 riastrad * in which case this branch is never hit. 1992 1.396 riastrad * 1993 1.396 riastrad * In fact, this branch appears to have been broken for 1994 1.396 riastrad * two decades between when topdown was introduced in 1995 1.396 riastrad * ~2003 and when it was adapted to handle the topdown 1996 1.396 riastrad * case without violating the monotonicity assertion in 1997 1.396 riastrad * 2022. Maybe Someone^TM should either ditch the 1998 1.396 riastrad * optimization or find a better way to do it. 1999 1.396 riastrad */ 2000 1.140 enami entry = map->first_free; 2001 1.417 riastrad } else if (uvm_map_lookup_entry(map, hint, &entry)) { 2002 1.417 riastrad KASSERT(entry->start <= hint); 2003 1.417 riastrad KASSERT(hint < entry->end); 2004 1.417 riastrad /* "hint" address already in use ... */ 2005 1.417 riastrad if (flags & UVM_FLAG_FIXED) { 2006 1.417 riastrad UVMHIST_LOG(maphist, "<- fixed & VA in use", 2007 1.417 riastrad 0, 0, 0, 0); 2008 1.417 riastrad return (NULL); 2009 1.417 riastrad } 2010 1.417 riastrad if (topdown) 2011 1.417 riastrad /* Start from lower gap. */ 2012 1.417 riastrad entry = entry->prev; 2013 1.1 mrg } else { 2014 1.417 riastrad KASSERT(entry == &map->header || entry->end <= hint); 2015 1.417 riastrad KASSERT(entry->next == &map->header || 2016 1.417 riastrad hint < entry->next->start); 2017 1.417 riastrad if (flags & UVM_FLAG_FIXED) { 2018 1.419 riastrad if (entry->next->start >= hint && 2019 1.419 riastrad length <= entry->next->start - hint) 2020 1.140 enami goto found; 2021 1.140 enami 2022 1.140 enami /* "hint" address is gap but too small */ 2023 1.140 enami UVMHIST_LOG(maphist, "<- fixed mapping failed", 2024 1.140 enami 0, 0, 0, 0); 2025 1.140 enami return (NULL); /* only one shot at it ... */ 2026 1.140 enami } else { 2027 1.140 enami /* 2028 1.140 enami * See if given hint fits in this gap. 2029 1.140 enami */ 2030 1.400 riastrad avail = uvm_map_space_avail(&hint, length, 2031 1.400 riastrad uoffset, align, flags, topdown, entry); 2032 1.400 riastrad INVARIANTS(); 2033 1.400 riastrad switch (avail) { 2034 1.140 enami case 1: 2035 1.140 enami goto found; 2036 1.140 enami case -1: 2037 1.140 enami goto wraparound; 2038 1.140 enami } 2039 1.140 enami 2040 1.148 yamt if (topdown) { 2041 1.140 enami /* 2042 1.140 enami * Still there is a chance to fit 2043 1.140 enami * if hint > entry->end. 2044 1.140 enami */ 2045 1.148 yamt } else { 2046 1.168 junyoung /* Start from higher gap. */ 2047 1.148 yamt entry = entry->next; 2048 1.148 yamt if (entry == &map->header) 2049 1.148 yamt goto notfound; 2050 1.140 enami goto nextgap; 2051 1.148 yamt } 2052 1.1 mrg } 2053 1.1 mrg } 2054 1.1 mrg 2055 1.1 mrg /* 2056 1.144 yamt * Note that all UVM_FLAGS_FIXED case is already handled. 2057 1.144 yamt */ 2058 1.144 yamt KDASSERT((flags & UVM_FLAG_FIXED) == 0); 2059 1.144 yamt 2060 1.144 yamt /* Try to find the space in the red-black tree */ 2061 1.144 yamt 2062 1.144 yamt /* Check slot before any entry */ 2063 1.403 riastrad if (topdown) { 2064 1.403 riastrad KASSERTMSG(entry->next->start >= vm_map_min(map), 2065 1.403 riastrad "map=%p entry=%p entry->next=%p" 2066 1.403 riastrad " entry->next->start=0x%"PRIxVADDR" min=0x%"PRIxVADDR, 2067 1.403 riastrad map, entry, entry->next, 2068 1.403 riastrad entry->next->start, vm_map_min(map)); 2069 1.403 riastrad if (length > entry->next->start - vm_map_min(map)) 2070 1.403 riastrad hint = vm_map_min(map); /* XXX goto wraparound? */ 2071 1.403 riastrad else 2072 1.423 riastrad hint = MIN(orig_hint, entry->next->start - length); 2073 1.403 riastrad KASSERT(hint >= vm_map_min(map)); 2074 1.403 riastrad } else { 2075 1.403 riastrad hint = entry->end; 2076 1.403 riastrad } 2077 1.400 riastrad INVARIANTS(); 2078 1.400 riastrad avail = uvm_map_space_avail(&hint, length, uoffset, align, flags, 2079 1.400 riastrad topdown, entry); 2080 1.400 riastrad INVARIANTS(); 2081 1.400 riastrad switch (avail) { 2082 1.144 yamt case 1: 2083 1.144 yamt goto found; 2084 1.144 yamt case -1: 2085 1.144 yamt goto wraparound; 2086 1.144 yamt } 2087 1.144 yamt 2088 1.144 yamt nextgap: 2089 1.148 yamt KDASSERT((flags & UVM_FLAG_FIXED) == 0); 2090 1.144 yamt /* If there is not enough space in the whole tree, we fail */ 2091 1.263 matt tmp = ROOT_ENTRY(map); 2092 1.263 matt if (tmp == NULL || tmp->maxgap < length) 2093 1.144 yamt goto notfound; 2094 1.144 yamt 2095 1.144 yamt prev = NULL; /* previous candidate */ 2096 1.144 yamt 2097 1.144 yamt /* Find an entry close to hint that has enough space */ 2098 1.144 yamt for (; tmp;) { 2099 1.263 matt KASSERT(tmp->next->start == tmp->end + tmp->gap); 2100 1.144 yamt if (topdown) { 2101 1.144 yamt if (tmp->next->start < hint + length && 2102 1.144 yamt (prev == NULL || tmp->end > prev->end)) { 2103 1.263 matt if (tmp->gap >= length) 2104 1.144 yamt prev = tmp; 2105 1.263 matt else if ((child = LEFT_ENTRY(tmp)) != NULL 2106 1.263 matt && child->maxgap >= length) 2107 1.144 yamt prev = tmp; 2108 1.144 yamt } 2109 1.144 yamt } else { 2110 1.144 yamt if (tmp->end >= hint && 2111 1.144 yamt (prev == NULL || tmp->end < prev->end)) { 2112 1.263 matt if (tmp->gap >= length) 2113 1.144 yamt prev = tmp; 2114 1.263 matt else if ((child = RIGHT_ENTRY(tmp)) != NULL 2115 1.263 matt && child->maxgap >= length) 2116 1.144 yamt prev = tmp; 2117 1.144 yamt } 2118 1.144 yamt } 2119 1.144 yamt if (tmp->next->start < hint + length) 2120 1.263 matt child = RIGHT_ENTRY(tmp); 2121 1.144 yamt else if (tmp->end > hint) 2122 1.263 matt child = LEFT_ENTRY(tmp); 2123 1.144 yamt else { 2124 1.263 matt if (tmp->gap >= length) 2125 1.144 yamt break; 2126 1.144 yamt if (topdown) 2127 1.263 matt child = LEFT_ENTRY(tmp); 2128 1.144 yamt else 2129 1.263 matt child = RIGHT_ENTRY(tmp); 2130 1.144 yamt } 2131 1.263 matt if (child == NULL || child->maxgap < length) 2132 1.144 yamt break; 2133 1.144 yamt tmp = child; 2134 1.144 yamt } 2135 1.144 yamt 2136 1.148 yamt if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) { 2137 1.164 junyoung /* 2138 1.144 yamt * Check if the entry that we found satifies the 2139 1.144 yamt * space requirement 2140 1.144 yamt */ 2141 1.148 yamt if (topdown) { 2142 1.149 yamt if (hint > tmp->next->start - length) 2143 1.149 yamt hint = tmp->next->start - length; 2144 1.148 yamt } else { 2145 1.149 yamt if (hint < tmp->end) 2146 1.149 yamt hint = tmp->end; 2147 1.148 yamt } 2148 1.400 riastrad INVARIANTS(); 2149 1.400 riastrad avail = uvm_map_space_avail(&hint, length, uoffset, align, 2150 1.400 riastrad flags, topdown, tmp); 2151 1.400 riastrad INVARIANTS(); 2152 1.400 riastrad switch (avail) { 2153 1.148 yamt case 1: 2154 1.144 yamt entry = tmp; 2155 1.144 yamt goto found; 2156 1.148 yamt case -1: 2157 1.148 yamt goto wraparound; 2158 1.144 yamt } 2159 1.263 matt if (tmp->gap >= length) 2160 1.144 yamt goto listsearch; 2161 1.144 yamt } 2162 1.144 yamt if (prev == NULL) 2163 1.144 yamt goto notfound; 2164 1.144 yamt 2165 1.148 yamt if (topdown) { 2166 1.150 yamt KASSERT(orig_hint >= prev->next->start - length || 2167 1.148 yamt prev->next->start - length > prev->next->start); 2168 1.148 yamt hint = prev->next->start - length; 2169 1.148 yamt } else { 2170 1.150 yamt KASSERT(orig_hint <= prev->end); 2171 1.148 yamt hint = prev->end; 2172 1.148 yamt } 2173 1.400 riastrad INVARIANTS(); 2174 1.400 riastrad avail = uvm_map_space_avail(&hint, length, uoffset, align, 2175 1.400 riastrad flags, topdown, prev); 2176 1.400 riastrad INVARIANTS(); 2177 1.400 riastrad switch (avail) { 2178 1.148 yamt case 1: 2179 1.144 yamt entry = prev; 2180 1.144 yamt goto found; 2181 1.148 yamt case -1: 2182 1.148 yamt goto wraparound; 2183 1.144 yamt } 2184 1.263 matt if (prev->gap >= length) 2185 1.144 yamt goto listsearch; 2186 1.164 junyoung 2187 1.144 yamt if (topdown) 2188 1.263 matt tmp = LEFT_ENTRY(prev); 2189 1.144 yamt else 2190 1.263 matt tmp = RIGHT_ENTRY(prev); 2191 1.144 yamt for (;;) { 2192 1.404 riastrad KASSERT(tmp); 2193 1.404 riastrad KASSERTMSG(tmp->maxgap >= length, 2194 1.404 riastrad "tmp->maxgap=0x%"PRIxVSIZE" length=0x%"PRIxVSIZE, 2195 1.404 riastrad tmp->maxgap, length); 2196 1.144 yamt if (topdown) 2197 1.263 matt child = RIGHT_ENTRY(tmp); 2198 1.144 yamt else 2199 1.263 matt child = LEFT_ENTRY(tmp); 2200 1.263 matt if (child && child->maxgap >= length) { 2201 1.144 yamt tmp = child; 2202 1.144 yamt continue; 2203 1.144 yamt } 2204 1.263 matt if (tmp->gap >= length) 2205 1.144 yamt break; 2206 1.144 yamt if (topdown) 2207 1.263 matt tmp = LEFT_ENTRY(tmp); 2208 1.144 yamt else 2209 1.263 matt tmp = RIGHT_ENTRY(tmp); 2210 1.144 yamt } 2211 1.164 junyoung 2212 1.148 yamt if (topdown) { 2213 1.150 yamt KASSERT(orig_hint >= tmp->next->start - length || 2214 1.148 yamt tmp->next->start - length > tmp->next->start); 2215 1.148 yamt hint = tmp->next->start - length; 2216 1.148 yamt } else { 2217 1.150 yamt KASSERT(orig_hint <= tmp->end); 2218 1.148 yamt hint = tmp->end; 2219 1.148 yamt } 2220 1.400 riastrad INVARIANTS(); 2221 1.400 riastrad avail = uvm_map_space_avail(&hint, length, uoffset, align, 2222 1.400 riastrad flags, topdown, tmp); 2223 1.400 riastrad INVARIANTS(); 2224 1.400 riastrad switch (avail) { 2225 1.144 yamt case 1: 2226 1.144 yamt entry = tmp; 2227 1.144 yamt goto found; 2228 1.148 yamt case -1: 2229 1.148 yamt goto wraparound; 2230 1.144 yamt } 2231 1.144 yamt 2232 1.164 junyoung /* 2233 1.144 yamt * The tree fails to find an entry because of offset or alignment 2234 1.144 yamt * restrictions. Search the list instead. 2235 1.144 yamt */ 2236 1.144 yamt listsearch: 2237 1.144 yamt /* 2238 1.1 mrg * Look through the rest of the map, trying to fit a new region in 2239 1.1 mrg * the gap between existing regions, or after the very last region. 2240 1.140 enami * note: entry->end = base VA of current gap, 2241 1.140 enami * entry->next->start = VA of end of current gap 2242 1.1 mrg */ 2243 1.99 chs 2244 1.400 riastrad INVARIANTS(); 2245 1.140 enami for (;;) { 2246 1.140 enami /* Update hint for current gap. */ 2247 1.425 riastrad hint = topdown ? MIN(orig_hint, entry->next->start - length) 2248 1.425 riastrad : entry->end; 2249 1.400 riastrad INVARIANTS(); 2250 1.140 enami 2251 1.140 enami /* See if it fits. */ 2252 1.400 riastrad avail = uvm_map_space_avail(&hint, length, uoffset, align, 2253 1.400 riastrad flags, topdown, entry); 2254 1.400 riastrad INVARIANTS(); 2255 1.400 riastrad switch (avail) { 2256 1.140 enami case 1: 2257 1.140 enami goto found; 2258 1.140 enami case -1: 2259 1.140 enami goto wraparound; 2260 1.140 enami } 2261 1.140 enami 2262 1.140 enami /* Advance to next/previous gap */ 2263 1.140 enami if (topdown) { 2264 1.140 enami if (entry == &map->header) { 2265 1.140 enami UVMHIST_LOG(maphist, "<- failed (off start)", 2266 1.140 enami 0,0,0,0); 2267 1.140 enami goto notfound; 2268 1.134 matt } 2269 1.140 enami entry = entry->prev; 2270 1.140 enami } else { 2271 1.140 enami entry = entry->next; 2272 1.140 enami if (entry == &map->header) { 2273 1.140 enami UVMHIST_LOG(maphist, "<- failed (off end)", 2274 1.81 thorpej 0,0,0,0); 2275 1.140 enami goto notfound; 2276 1.81 thorpej } 2277 1.1 mrg } 2278 1.1 mrg } 2279 1.140 enami 2280 1.140 enami found: 2281 1.82 thorpej SAVE_HINT(map, map->hint, entry); 2282 1.1 mrg *result = hint; 2283 1.353 pgoyette UVMHIST_LOG(maphist,"<- got it! (result=%#jx)", hint, 0,0,0); 2284 1.400 riastrad INVARIANTS(); 2285 1.144 yamt KASSERT(entry->end <= hint); 2286 1.419 riastrad KASSERT(hint <= entry->next->start); 2287 1.419 riastrad KASSERT(length <= entry->next->start - hint); 2288 1.1 mrg return (entry); 2289 1.140 enami 2290 1.140 enami wraparound: 2291 1.140 enami UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0); 2292 1.140 enami 2293 1.165 yamt return (NULL); 2294 1.165 yamt 2295 1.140 enami notfound: 2296 1.165 yamt UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0); 2297 1.165 yamt 2298 1.140 enami return (NULL); 2299 1.400 riastrad #undef INVARIANTS 2300 1.1 mrg } 2301 1.1 mrg 2302 1.1 mrg /* 2303 1.1 mrg * U N M A P - m a i n h e l p e r f u n c t i o n s 2304 1.1 mrg */ 2305 1.1 mrg 2306 1.1 mrg /* 2307 1.1 mrg * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 2308 1.1 mrg * 2309 1.98 chs * => caller must check alignment and size 2310 1.1 mrg * => map must be locked by caller 2311 1.1 mrg * => we return a list of map entries that we've remove from the map 2312 1.1 mrg * in "entry_list" 2313 1.1 mrg */ 2314 1.1 mrg 2315 1.94 chs void 2316 1.138 enami uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2317 1.311 para struct vm_map_entry **entry_list /* OUT */, int flags) 2318 1.10 mrg { 2319 1.99 chs struct vm_map_entry *entry, *first_entry, *next; 2320 1.24 eeh vaddr_t len; 2321 1.385 skrll UVMHIST_FUNC(__func__); 2322 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx, start=%#jx, end=%#jx)", 2323 1.353 pgoyette (uintptr_t)map, start, end, 0); 2324 1.10 mrg VM_MAP_RANGE_CHECK(map, start, end); 2325 1.10 mrg 2326 1.421 riastrad KASSERT(vm_map_locked_p(map)); 2327 1.421 riastrad 2328 1.222 yamt uvm_map_check(map, "unmap_remove entry"); 2329 1.144 yamt 2330 1.10 mrg /* 2331 1.10 mrg * find first entry 2332 1.10 mrg */ 2333 1.99 chs 2334 1.234 thorpej if (uvm_map_lookup_entry(map, start, &first_entry) == true) { 2335 1.29 chuck /* clip and go... */ 2336 1.10 mrg entry = first_entry; 2337 1.311 para UVM_MAP_CLIP_START(map, entry, start); 2338 1.10 mrg /* critical! prevents stale hint */ 2339 1.82 thorpej SAVE_HINT(map, entry, entry->prev); 2340 1.10 mrg } else { 2341 1.10 mrg entry = first_entry->next; 2342 1.10 mrg } 2343 1.10 mrg 2344 1.10 mrg /* 2345 1.371 ad * save the free space hint 2346 1.10 mrg */ 2347 1.10 mrg 2348 1.220 yamt if (map->first_free != &map->header && map->first_free->start >= start) 2349 1.10 mrg map->first_free = entry->prev; 2350 1.10 mrg 2351 1.10 mrg /* 2352 1.10 mrg * note: we now re-use first_entry for a different task. we remove 2353 1.10 mrg * a number of map entries from the map and save them in a linked 2354 1.10 mrg * list headed by "first_entry". once we remove them from the map 2355 1.10 mrg * the caller should unlock the map and drop the references to the 2356 1.10 mrg * backing objects [c.f. uvm_unmap_detach]. the object is to 2357 1.100 wiz * separate unmapping from reference dropping. why? 2358 1.10 mrg * [1] the map has to be locked for unmapping 2359 1.10 mrg * [2] the map need not be locked for reference dropping 2360 1.10 mrg * [3] dropping references may trigger pager I/O, and if we hit 2361 1.10 mrg * a pager that does synchronous I/O we may have to wait for it. 2362 1.10 mrg * [4] we would like all waiting for I/O to occur with maps unlocked 2363 1.98 chs * so that we don't block other threads. 2364 1.10 mrg */ 2365 1.99 chs 2366 1.10 mrg first_entry = NULL; 2367 1.106 chs *entry_list = NULL; 2368 1.10 mrg 2369 1.10 mrg /* 2370 1.98 chs * break up the area into map entry sized regions and unmap. note 2371 1.10 mrg * that all mappings have to be removed before we can even consider 2372 1.10 mrg * dropping references to amaps or VM objects (otherwise we could end 2373 1.10 mrg * up with a mapping to a page on the free list which would be very bad) 2374 1.10 mrg */ 2375 1.10 mrg 2376 1.10 mrg while ((entry != &map->header) && (entry->start < end)) { 2377 1.311 para KASSERT((entry->flags & UVM_MAP_STATIC) == 0); 2378 1.174 yamt 2379 1.311 para UVM_MAP_CLIP_END(map, entry, end); 2380 1.10 mrg next = entry->next; 2381 1.10 mrg len = entry->end - entry->start; 2382 1.81 thorpej 2383 1.10 mrg /* 2384 1.10 mrg * unwire before removing addresses from the pmap; otherwise 2385 1.10 mrg * unwiring will put the entries back into the pmap (XXX). 2386 1.10 mrg */ 2387 1.1 mrg 2388 1.106 chs if (VM_MAPENT_ISWIRED(entry)) { 2389 1.10 mrg uvm_map_entry_unwire(map, entry); 2390 1.106 chs } 2391 1.187 yamt if (flags & UVM_FLAG_VAONLY) { 2392 1.187 yamt 2393 1.187 yamt /* nothing */ 2394 1.187 yamt 2395 1.187 yamt } else if ((map->flags & VM_MAP_PAGEABLE) == 0) { 2396 1.10 mrg 2397 1.106 chs /* 2398 1.106 chs * if the map is non-pageable, any pages mapped there 2399 1.106 chs * must be wired and entered with pmap_kenter_pa(), 2400 1.106 chs * and we should free any such pages immediately. 2401 1.287 joerg * this is mostly used for kmem_map. 2402 1.106 chs */ 2403 1.292 rmind KASSERT(vm_map_pmap(map) == pmap_kernel()); 2404 1.99 chs 2405 1.323 para uvm_km_pgremove_intrsafe(map, entry->start, entry->end); 2406 1.106 chs } else if (UVM_ET_ISOBJ(entry) && 2407 1.106 chs UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2408 1.300 yamt panic("%s: kernel object %p %p\n", 2409 1.300 yamt __func__, map, entry); 2410 1.106 chs } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) { 2411 1.29 chuck /* 2412 1.298 rmind * remove mappings the standard way. lock object 2413 1.298 rmind * and/or amap to ensure vm_page state does not 2414 1.298 rmind * change while in pmap_remove(). 2415 1.139 enami */ 2416 1.99 chs 2417 1.376 ad #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ 2418 1.372 ad uvm_map_lock_entry(entry, RW_WRITER); 2419 1.376 ad #else 2420 1.376 ad uvm_map_lock_entry(entry, RW_READER); 2421 1.376 ad #endif 2422 1.29 chuck pmap_remove(map->pmap, entry->start, entry->end); 2423 1.371 ad 2424 1.371 ad /* 2425 1.371 ad * note: if map is dying, leave pmap_update() for 2426 1.378 ad * later. if the map is to be reused (exec) then 2427 1.378 ad * pmap_update() will be called. if the map is 2428 1.378 ad * being disposed of (exit) then pmap_destroy() 2429 1.378 ad * will be called. 2430 1.371 ad */ 2431 1.371 ad 2432 1.371 ad if ((map->flags & VM_MAP_DYING) == 0) { 2433 1.371 ad pmap_update(vm_map_pmap(map)); 2434 1.371 ad } else { 2435 1.371 ad KASSERT(vm_map_pmap(map) != pmap_kernel()); 2436 1.371 ad } 2437 1.371 ad 2438 1.298 rmind uvm_map_unlock_entry(entry); 2439 1.10 mrg } 2440 1.10 mrg 2441 1.331 christos #if defined(UVMDEBUG) 2442 1.323 para /* 2443 1.323 para * check if there's remaining mapping, 2444 1.323 para * which is a bug in caller. 2445 1.323 para */ 2446 1.177 yamt 2447 1.323 para vaddr_t va; 2448 1.323 para for (va = entry->start; va < entry->end; 2449 1.323 para va += PAGE_SIZE) { 2450 1.323 para if (pmap_extract(vm_map_pmap(map), va, NULL)) { 2451 1.323 para panic("%s: %#"PRIxVADDR" has mapping", 2452 1.323 para __func__, va); 2453 1.177 yamt } 2454 1.323 para } 2455 1.187 yamt 2456 1.333 christos if (VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) { 2457 1.405 skrll uvm_km_check_empty(map, entry->start, entry->end); 2458 1.177 yamt } 2459 1.331 christos #endif /* defined(UVMDEBUG) */ 2460 1.177 yamt 2461 1.10 mrg /* 2462 1.98 chs * remove entry from map and put it on our list of entries 2463 1.106 chs * that we've nuked. then go to next entry. 2464 1.10 mrg */ 2465 1.99 chs 2466 1.353 pgoyette UVMHIST_LOG(maphist, " removed map entry %#jx", 2467 1.353 pgoyette (uintptr_t)entry, 0, 0, 0); 2468 1.82 thorpej 2469 1.82 thorpej /* critical! prevents stale hint */ 2470 1.82 thorpej SAVE_HINT(map, entry, entry->prev); 2471 1.82 thorpej 2472 1.10 mrg uvm_map_entry_unlink(map, entry); 2473 1.146 yamt KASSERT(map->size >= len); 2474 1.10 mrg map->size -= len; 2475 1.131 atatat entry->prev = NULL; 2476 1.10 mrg entry->next = first_entry; 2477 1.10 mrg first_entry = entry; 2478 1.106 chs entry = next; 2479 1.10 mrg } 2480 1.292 rmind 2481 1.222 yamt uvm_map_check(map, "unmap_remove leave"); 2482 1.144 yamt 2483 1.10 mrg /* 2484 1.10 mrg * now we've cleaned up the map and are ready for the caller to drop 2485 1.98 chs * references to the mapped objects. 2486 1.10 mrg */ 2487 1.10 mrg 2488 1.10 mrg *entry_list = first_entry; 2489 1.10 mrg UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 2490 1.180 yamt 2491 1.180 yamt if (map->flags & VM_MAP_WANTVA) { 2492 1.238 ad mutex_enter(&map->misc_lock); 2493 1.180 yamt map->flags &= ~VM_MAP_WANTVA; 2494 1.238 ad cv_broadcast(&map->cv); 2495 1.238 ad mutex_exit(&map->misc_lock); 2496 1.180 yamt } 2497 1.1 mrg } 2498 1.1 mrg 2499 1.1 mrg /* 2500 1.1 mrg * uvm_unmap_detach: drop references in a chain of map entries 2501 1.1 mrg * 2502 1.1 mrg * => we will free the map entries as we traverse the list. 2503 1.1 mrg */ 2504 1.1 mrg 2505 1.10 mrg void 2506 1.138 enami uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 2507 1.1 mrg { 2508 1.99 chs struct vm_map_entry *next_entry; 2509 1.385 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2510 1.1 mrg 2511 1.10 mrg while (first_entry) { 2512 1.85 chs KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 2513 1.10 mrg UVMHIST_LOG(maphist, 2514 1.353 pgoyette " detach %#jx: amap=%#jx, obj=%#jx, submap?=%jd", 2515 1.353 pgoyette (uintptr_t)first_entry, 2516 1.353 pgoyette (uintptr_t)first_entry->aref.ar_amap, 2517 1.353 pgoyette (uintptr_t)first_entry->object.uvm_obj, 2518 1.29 chuck UVM_ET_ISSUBMAP(first_entry)); 2519 1.1 mrg 2520 1.10 mrg /* 2521 1.10 mrg * drop reference to amap, if we've got one 2522 1.10 mrg */ 2523 1.10 mrg 2524 1.10 mrg if (first_entry->aref.ar_amap) 2525 1.85 chs uvm_map_unreference_amap(first_entry, flags); 2526 1.10 mrg 2527 1.10 mrg /* 2528 1.10 mrg * drop reference to our backing object, if we've got one 2529 1.10 mrg */ 2530 1.85 chs 2531 1.120 chs KASSERT(!UVM_ET_ISSUBMAP(first_entry)); 2532 1.120 chs if (UVM_ET_ISOBJ(first_entry) && 2533 1.120 chs first_entry->object.uvm_obj->pgops->pgo_detach) { 2534 1.120 chs (*first_entry->object.uvm_obj->pgops->pgo_detach) 2535 1.120 chs (first_entry->object.uvm_obj); 2536 1.10 mrg } 2537 1.10 mrg next_entry = first_entry->next; 2538 1.10 mrg uvm_mapent_free(first_entry); 2539 1.10 mrg first_entry = next_entry; 2540 1.10 mrg } 2541 1.10 mrg UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 2542 1.1 mrg } 2543 1.1 mrg 2544 1.1 mrg /* 2545 1.1 mrg * E X T R A C T I O N F U N C T I O N S 2546 1.1 mrg */ 2547 1.1 mrg 2548 1.98 chs /* 2549 1.1 mrg * uvm_map_reserve: reserve space in a vm_map for future use. 2550 1.1 mrg * 2551 1.98 chs * => we reserve space in a map by putting a dummy map entry in the 2552 1.1 mrg * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 2553 1.1 mrg * => map should be unlocked (we will write lock it) 2554 1.1 mrg * => we return true if we were able to reserve space 2555 1.1 mrg * => XXXCDC: should be inline? 2556 1.1 mrg */ 2557 1.1 mrg 2558 1.10 mrg int 2559 1.138 enami uvm_map_reserve(struct vm_map *map, vsize_t size, 2560 1.138 enami vaddr_t offset /* hint for pmap_prefer */, 2561 1.243 yamt vsize_t align /* alignment */, 2562 1.210 yamt vaddr_t *raddr /* IN:hint, OUT: reserved VA */, 2563 1.324 matt uvm_flag_t flags /* UVM_FLAG_FIXED or UVM_FLAG_COLORMATCH or 0 */) 2564 1.1 mrg { 2565 1.385 skrll UVMHIST_FUNC(__func__); 2566 1.385 skrll UVMHIST_CALLARGS(maphist, "(map=%#jx, size=%#jx, offset=%#jx, addr=%#jx)", 2567 1.353 pgoyette (uintptr_t)map, size, offset, (uintptr_t)raddr); 2568 1.85 chs 2569 1.10 mrg size = round_page(size); 2570 1.85 chs 2571 1.10 mrg /* 2572 1.10 mrg * reserve some virtual space. 2573 1.10 mrg */ 2574 1.85 chs 2575 1.243 yamt if (uvm_map(map, raddr, size, NULL, offset, align, 2576 1.10 mrg UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 2577 1.210 yamt UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) { 2578 1.10 mrg UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 2579 1.234 thorpej return (false); 2580 1.98 chs } 2581 1.85 chs 2582 1.353 pgoyette UVMHIST_LOG(maphist, "<- done (*raddr=%#jx)", *raddr,0,0,0); 2583 1.234 thorpej return (true); 2584 1.1 mrg } 2585 1.1 mrg 2586 1.1 mrg /* 2587 1.98 chs * uvm_map_replace: replace a reserved (blank) area of memory with 2588 1.1 mrg * real mappings. 2589 1.1 mrg * 2590 1.98 chs * => caller must WRITE-LOCK the map 2591 1.234 thorpej * => we return true if replacement was a success 2592 1.1 mrg * => we expect the newents chain to have nnewents entrys on it and 2593 1.1 mrg * we expect newents->prev to point to the last entry on the list 2594 1.1 mrg * => note newents is allowed to be NULL 2595 1.1 mrg */ 2596 1.1 mrg 2597 1.275 yamt static int 2598 1.138 enami uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 2599 1.275 yamt struct vm_map_entry *newents, int nnewents, vsize_t nsize, 2600 1.275 yamt struct vm_map_entry **oldentryp) 2601 1.10 mrg { 2602 1.99 chs struct vm_map_entry *oldent, *last; 2603 1.1 mrg 2604 1.222 yamt uvm_map_check(map, "map_replace entry"); 2605 1.144 yamt 2606 1.10 mrg /* 2607 1.10 mrg * first find the blank map entry at the specified address 2608 1.10 mrg */ 2609 1.85 chs 2610 1.10 mrg if (!uvm_map_lookup_entry(map, start, &oldent)) { 2611 1.234 thorpej return (false); 2612 1.10 mrg } 2613 1.85 chs 2614 1.10 mrg /* 2615 1.10 mrg * check to make sure we have a proper blank entry 2616 1.10 mrg */ 2617 1.1 mrg 2618 1.311 para if (end < oldent->end) { 2619 1.311 para UVM_MAP_CLIP_END(map, oldent, end); 2620 1.210 yamt } 2621 1.98 chs if (oldent->start != start || oldent->end != end || 2622 1.10 mrg oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 2623 1.234 thorpej return (false); 2624 1.10 mrg } 2625 1.1 mrg 2626 1.1 mrg #ifdef DIAGNOSTIC 2627 1.99 chs 2628 1.10 mrg /* 2629 1.10 mrg * sanity check the newents chain 2630 1.10 mrg */ 2631 1.99 chs 2632 1.10 mrg { 2633 1.99 chs struct vm_map_entry *tmpent = newents; 2634 1.10 mrg int nent = 0; 2635 1.275 yamt vsize_t sz = 0; 2636 1.24 eeh vaddr_t cur = start; 2637 1.10 mrg 2638 1.10 mrg while (tmpent) { 2639 1.10 mrg nent++; 2640 1.275 yamt sz += tmpent->end - tmpent->start; 2641 1.10 mrg if (tmpent->start < cur) 2642 1.10 mrg panic("uvm_map_replace1"); 2643 1.275 yamt if (tmpent->start >= tmpent->end || tmpent->end > end) { 2644 1.286 matt panic("uvm_map_replace2: " 2645 1.334 matt "tmpent->start=%#"PRIxVADDR 2646 1.334 matt ", tmpent->end=%#"PRIxVADDR 2647 1.334 matt ", end=%#"PRIxVADDR, 2648 1.286 matt tmpent->start, tmpent->end, end); 2649 1.10 mrg } 2650 1.10 mrg cur = tmpent->end; 2651 1.10 mrg if (tmpent->next) { 2652 1.10 mrg if (tmpent->next->prev != tmpent) 2653 1.10 mrg panic("uvm_map_replace3"); 2654 1.10 mrg } else { 2655 1.10 mrg if (newents->prev != tmpent) 2656 1.10 mrg panic("uvm_map_replace4"); 2657 1.10 mrg } 2658 1.10 mrg tmpent = tmpent->next; 2659 1.10 mrg } 2660 1.10 mrg if (nent != nnewents) 2661 1.10 mrg panic("uvm_map_replace5"); 2662 1.275 yamt if (sz != nsize) 2663 1.275 yamt panic("uvm_map_replace6"); 2664 1.10 mrg } 2665 1.10 mrg #endif 2666 1.10 mrg 2667 1.10 mrg /* 2668 1.10 mrg * map entry is a valid blank! replace it. (this does all the 2669 1.10 mrg * work of map entry link/unlink...). 2670 1.10 mrg */ 2671 1.10 mrg 2672 1.10 mrg if (newents) { 2673 1.99 chs last = newents->prev; 2674 1.10 mrg 2675 1.10 mrg /* critical: flush stale hints out of map */ 2676 1.82 thorpej SAVE_HINT(map, map->hint, newents); 2677 1.10 mrg if (map->first_free == oldent) 2678 1.10 mrg map->first_free = last; 2679 1.10 mrg 2680 1.10 mrg last->next = oldent->next; 2681 1.10 mrg last->next->prev = last; 2682 1.144 yamt 2683 1.144 yamt /* Fix RB tree */ 2684 1.144 yamt uvm_rb_remove(map, oldent); 2685 1.144 yamt 2686 1.10 mrg newents->prev = oldent->prev; 2687 1.10 mrg newents->prev->next = newents; 2688 1.10 mrg map->nentries = map->nentries + (nnewents - 1); 2689 1.10 mrg 2690 1.144 yamt /* Fixup the RB tree */ 2691 1.144 yamt { 2692 1.144 yamt int i; 2693 1.144 yamt struct vm_map_entry *tmp; 2694 1.144 yamt 2695 1.144 yamt tmp = newents; 2696 1.144 yamt for (i = 0; i < nnewents && tmp; i++) { 2697 1.144 yamt uvm_rb_insert(map, tmp); 2698 1.144 yamt tmp = tmp->next; 2699 1.144 yamt } 2700 1.144 yamt } 2701 1.10 mrg } else { 2702 1.10 mrg /* NULL list of new entries: just remove the old one */ 2703 1.221 yamt clear_hints(map, oldent); 2704 1.10 mrg uvm_map_entry_unlink(map, oldent); 2705 1.10 mrg } 2706 1.275 yamt map->size -= end - start - nsize; 2707 1.10 mrg 2708 1.222 yamt uvm_map_check(map, "map_replace leave"); 2709 1.10 mrg 2710 1.10 mrg /* 2711 1.209 yamt * now we can free the old blank entry and return. 2712 1.10 mrg */ 2713 1.1 mrg 2714 1.253 yamt *oldentryp = oldent; 2715 1.234 thorpej return (true); 2716 1.1 mrg } 2717 1.1 mrg 2718 1.1 mrg /* 2719 1.1 mrg * uvm_map_extract: extract a mapping from a map and put it somewhere 2720 1.1 mrg * (maybe removing the old mapping) 2721 1.1 mrg * 2722 1.1 mrg * => maps should be unlocked (we will write lock them) 2723 1.1 mrg * => returns 0 on success, error code otherwise 2724 1.1 mrg * => start must be page aligned 2725 1.1 mrg * => len must be page sized 2726 1.1 mrg * => flags: 2727 1.1 mrg * UVM_EXTRACT_REMOVE: remove mappings from srcmap 2728 1.1 mrg * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 2729 1.1 mrg * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 2730 1.1 mrg * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 2731 1.337 christos * UVM_EXTRACT_PROT_ALL: set prot to UVM_PROT_ALL as we go 2732 1.1 mrg * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 2733 1.1 mrg * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 2734 1.1 mrg * be used from within the kernel in a kernel level map <<< 2735 1.1 mrg */ 2736 1.1 mrg 2737 1.10 mrg int 2738 1.138 enami uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 2739 1.138 enami struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 2740 1.10 mrg { 2741 1.163 mycroft vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge; 2742 1.99 chs struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, 2743 1.99 chs *deadentry, *oldentry; 2744 1.253 yamt struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */ 2745 1.325 martin vsize_t elen __unused; 2746 1.10 mrg int nchain, error, copy_ok; 2747 1.275 yamt vsize_t nsize; 2748 1.385 skrll UVMHIST_FUNC(__func__); 2749 1.385 skrll UVMHIST_CALLARGS(maphist,"(srcmap=%#jx,start=%#jx, len=%#jx", 2750 1.353 pgoyette (uintptr_t)srcmap, start, len, 0); 2751 1.353 pgoyette UVMHIST_LOG(maphist," ...,dstmap=%#jx, flags=%#jx)", 2752 1.353 pgoyette (uintptr_t)dstmap, flags, 0, 0); 2753 1.10 mrg 2754 1.10 mrg /* 2755 1.10 mrg * step 0: sanity check: start must be on a page boundary, length 2756 1.10 mrg * must be page sized. can't ask for CONTIG/QREF if you asked for 2757 1.10 mrg * REMOVE. 2758 1.10 mrg */ 2759 1.10 mrg 2760 1.404 riastrad KASSERTMSG((start & PAGE_MASK) == 0, "start=0x%"PRIxVADDR, start); 2761 1.404 riastrad KASSERTMSG((len & PAGE_MASK) == 0, "len=0x%"PRIxVADDR, len); 2762 1.85 chs KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 2763 1.85 chs (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 2764 1.10 mrg 2765 1.10 mrg /* 2766 1.10 mrg * step 1: reserve space in the target map for the extracted area 2767 1.10 mrg */ 2768 1.10 mrg 2769 1.210 yamt if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2770 1.210 yamt dstaddr = vm_map_min(dstmap); 2771 1.380 riastrad if (!uvm_map_reserve(dstmap, len, start, 2772 1.324 matt atop(start) & uvmexp.colormask, &dstaddr, 2773 1.324 matt UVM_FLAG_COLORMATCH)) 2774 1.210 yamt return (ENOMEM); 2775 1.324 matt KASSERT((atop(start ^ dstaddr) & uvmexp.colormask) == 0); 2776 1.210 yamt *dstaddrp = dstaddr; /* pass address back to caller */ 2777 1.353 pgoyette UVMHIST_LOG(maphist, " dstaddr=%#jx", dstaddr,0,0,0); 2778 1.210 yamt } else { 2779 1.210 yamt dstaddr = *dstaddrp; 2780 1.210 yamt } 2781 1.10 mrg 2782 1.10 mrg /* 2783 1.98 chs * step 2: setup for the extraction process loop by init'ing the 2784 1.10 mrg * map entry chain, locking src map, and looking up the first useful 2785 1.10 mrg * entry in the map. 2786 1.10 mrg */ 2787 1.1 mrg 2788 1.10 mrg end = start + len; 2789 1.10 mrg newend = dstaddr + len; 2790 1.10 mrg chain = endchain = NULL; 2791 1.10 mrg nchain = 0; 2792 1.275 yamt nsize = 0; 2793 1.10 mrg vm_map_lock(srcmap); 2794 1.10 mrg 2795 1.10 mrg if (uvm_map_lookup_entry(srcmap, start, &entry)) { 2796 1.10 mrg 2797 1.10 mrg /* "start" is within an entry */ 2798 1.10 mrg if (flags & UVM_EXTRACT_QREF) { 2799 1.85 chs 2800 1.10 mrg /* 2801 1.10 mrg * for quick references we don't clip the entry, so 2802 1.10 mrg * the entry may map space "before" the starting 2803 1.10 mrg * virtual address... this is the "fudge" factor 2804 1.10 mrg * (which can be non-zero only the first time 2805 1.10 mrg * through the "while" loop in step 3). 2806 1.10 mrg */ 2807 1.85 chs 2808 1.10 mrg fudge = start - entry->start; 2809 1.10 mrg } else { 2810 1.85 chs 2811 1.10 mrg /* 2812 1.10 mrg * normal reference: we clip the map to fit (thus 2813 1.10 mrg * fudge is zero) 2814 1.10 mrg */ 2815 1.85 chs 2816 1.311 para UVM_MAP_CLIP_START(srcmap, entry, start); 2817 1.82 thorpej SAVE_HINT(srcmap, srcmap->hint, entry->prev); 2818 1.10 mrg fudge = 0; 2819 1.10 mrg } 2820 1.85 chs } else { 2821 1.1 mrg 2822 1.10 mrg /* "start" is not within an entry ... skip to next entry */ 2823 1.10 mrg if (flags & UVM_EXTRACT_CONTIG) { 2824 1.10 mrg error = EINVAL; 2825 1.10 mrg goto bad; /* definite hole here ... */ 2826 1.10 mrg } 2827 1.1 mrg 2828 1.10 mrg entry = entry->next; 2829 1.10 mrg fudge = 0; 2830 1.10 mrg } 2831 1.85 chs 2832 1.10 mrg /* save values from srcmap for step 6 */ 2833 1.10 mrg orig_entry = entry; 2834 1.10 mrg orig_fudge = fudge; 2835 1.1 mrg 2836 1.10 mrg /* 2837 1.10 mrg * step 3: now start looping through the map entries, extracting 2838 1.10 mrg * as we go. 2839 1.10 mrg */ 2840 1.1 mrg 2841 1.10 mrg while (entry->start < end && entry != &srcmap->header) { 2842 1.85 chs 2843 1.10 mrg /* if we are not doing a quick reference, clip it */ 2844 1.10 mrg if ((flags & UVM_EXTRACT_QREF) == 0) 2845 1.311 para UVM_MAP_CLIP_END(srcmap, entry, end); 2846 1.10 mrg 2847 1.10 mrg /* clear needs_copy (allow chunking) */ 2848 1.10 mrg if (UVM_ET_ISNEEDSCOPY(entry)) { 2849 1.212 yamt amap_copy(srcmap, entry, 2850 1.212 yamt AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end); 2851 1.10 mrg if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 2852 1.10 mrg error = ENOMEM; 2853 1.10 mrg goto bad; 2854 1.10 mrg } 2855 1.85 chs 2856 1.10 mrg /* amap_copy could clip (during chunk)! update fudge */ 2857 1.10 mrg if (fudge) { 2858 1.163 mycroft fudge = start - entry->start; 2859 1.10 mrg orig_fudge = fudge; 2860 1.10 mrg } 2861 1.10 mrg } 2862 1.1 mrg 2863 1.10 mrg /* calculate the offset of this from "start" */ 2864 1.10 mrg oldoffset = (entry->start + fudge) - start; 2865 1.1 mrg 2866 1.10 mrg /* allocate a new map entry */ 2867 1.126 bouyer newentry = uvm_mapent_alloc(dstmap, 0); 2868 1.10 mrg if (newentry == NULL) { 2869 1.10 mrg error = ENOMEM; 2870 1.10 mrg goto bad; 2871 1.10 mrg } 2872 1.10 mrg 2873 1.10 mrg /* set up new map entry */ 2874 1.10 mrg newentry->next = NULL; 2875 1.10 mrg newentry->prev = endchain; 2876 1.10 mrg newentry->start = dstaddr + oldoffset; 2877 1.10 mrg newentry->end = 2878 1.10 mrg newentry->start + (entry->end - (entry->start + fudge)); 2879 1.37 chs if (newentry->end > newend || newentry->end < newentry->start) 2880 1.10 mrg newentry->end = newend; 2881 1.10 mrg newentry->object.uvm_obj = entry->object.uvm_obj; 2882 1.10 mrg if (newentry->object.uvm_obj) { 2883 1.10 mrg if (newentry->object.uvm_obj->pgops->pgo_reference) 2884 1.10 mrg newentry->object.uvm_obj->pgops-> 2885 1.10 mrg pgo_reference(newentry->object.uvm_obj); 2886 1.354 mrg newentry->offset = entry->offset + fudge; 2887 1.10 mrg } else { 2888 1.10 mrg newentry->offset = 0; 2889 1.10 mrg } 2890 1.10 mrg newentry->etype = entry->etype; 2891 1.337 christos if (flags & UVM_EXTRACT_PROT_ALL) { 2892 1.337 christos newentry->protection = newentry->max_protection = 2893 1.337 christos UVM_PROT_ALL; 2894 1.337 christos } else { 2895 1.337 christos newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2896 1.337 christos entry->max_protection : entry->protection; 2897 1.337 christos newentry->max_protection = entry->max_protection; 2898 1.337 christos } 2899 1.10 mrg newentry->inheritance = entry->inheritance; 2900 1.10 mrg newentry->wired_count = 0; 2901 1.10 mrg newentry->aref.ar_amap = entry->aref.ar_amap; 2902 1.10 mrg if (newentry->aref.ar_amap) { 2903 1.34 chuck newentry->aref.ar_pageoff = 2904 1.34 chuck entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2905 1.85 chs uvm_map_reference_amap(newentry, AMAP_SHARED | 2906 1.10 mrg ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2907 1.10 mrg } else { 2908 1.34 chuck newentry->aref.ar_pageoff = 0; 2909 1.10 mrg } 2910 1.10 mrg newentry->advice = entry->advice; 2911 1.245 yamt if ((flags & UVM_EXTRACT_QREF) != 0) { 2912 1.245 yamt newentry->flags |= UVM_MAP_NOMERGE; 2913 1.245 yamt } 2914 1.10 mrg 2915 1.10 mrg /* now link it on the chain */ 2916 1.10 mrg nchain++; 2917 1.275 yamt nsize += newentry->end - newentry->start; 2918 1.10 mrg if (endchain == NULL) { 2919 1.10 mrg chain = endchain = newentry; 2920 1.10 mrg } else { 2921 1.10 mrg endchain->next = newentry; 2922 1.10 mrg endchain = newentry; 2923 1.10 mrg } 2924 1.10 mrg 2925 1.10 mrg /* end of 'while' loop! */ 2926 1.98 chs if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2927 1.10 mrg (entry->next == &srcmap->header || 2928 1.10 mrg entry->next->start != entry->end)) { 2929 1.10 mrg error = EINVAL; 2930 1.10 mrg goto bad; 2931 1.10 mrg } 2932 1.10 mrg entry = entry->next; 2933 1.10 mrg fudge = 0; 2934 1.10 mrg } 2935 1.10 mrg 2936 1.10 mrg /* 2937 1.10 mrg * step 4: close off chain (in format expected by uvm_map_replace) 2938 1.10 mrg */ 2939 1.10 mrg 2940 1.10 mrg if (chain) 2941 1.10 mrg chain->prev = endchain; 2942 1.10 mrg 2943 1.10 mrg /* 2944 1.10 mrg * step 5: attempt to lock the dest map so we can pmap_copy. 2945 1.98 chs * note usage of copy_ok: 2946 1.10 mrg * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2947 1.10 mrg * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2948 1.10 mrg */ 2949 1.85 chs 2950 1.234 thorpej if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) { 2951 1.10 mrg copy_ok = 1; 2952 1.10 mrg if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2953 1.275 yamt nchain, nsize, &resentry)) { 2954 1.10 mrg if (srcmap != dstmap) 2955 1.10 mrg vm_map_unlock(dstmap); 2956 1.10 mrg error = EIO; 2957 1.10 mrg goto bad; 2958 1.10 mrg } 2959 1.10 mrg } else { 2960 1.10 mrg copy_ok = 0; 2961 1.411 andvar /* replace deferred until step 7 */ 2962 1.10 mrg } 2963 1.10 mrg 2964 1.10 mrg /* 2965 1.10 mrg * step 6: traverse the srcmap a second time to do the following: 2966 1.10 mrg * - if we got a lock on the dstmap do pmap_copy 2967 1.10 mrg * - if UVM_EXTRACT_REMOVE remove the entries 2968 1.10 mrg * we make use of orig_entry and orig_fudge (saved in step 2) 2969 1.10 mrg */ 2970 1.10 mrg 2971 1.10 mrg if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2972 1.10 mrg 2973 1.10 mrg /* purge possible stale hints from srcmap */ 2974 1.10 mrg if (flags & UVM_EXTRACT_REMOVE) { 2975 1.82 thorpej SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2976 1.220 yamt if (srcmap->first_free != &srcmap->header && 2977 1.220 yamt srcmap->first_free->start >= start) 2978 1.10 mrg srcmap->first_free = orig_entry->prev; 2979 1.10 mrg } 2980 1.10 mrg 2981 1.10 mrg entry = orig_entry; 2982 1.10 mrg fudge = orig_fudge; 2983 1.10 mrg deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2984 1.10 mrg 2985 1.10 mrg while (entry->start < end && entry != &srcmap->header) { 2986 1.10 mrg if (copy_ok) { 2987 1.74 thorpej oldoffset = (entry->start + fudge) - start; 2988 1.90 chs elen = MIN(end, entry->end) - 2989 1.74 thorpej (entry->start + fudge); 2990 1.74 thorpej pmap_copy(dstmap->pmap, srcmap->pmap, 2991 1.74 thorpej dstaddr + oldoffset, elen, 2992 1.74 thorpej entry->start + fudge); 2993 1.10 mrg } 2994 1.10 mrg 2995 1.74 thorpej /* we advance "entry" in the following if statement */ 2996 1.10 mrg if (flags & UVM_EXTRACT_REMOVE) { 2997 1.376 ad #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ 2998 1.372 ad uvm_map_lock_entry(entry, RW_WRITER); 2999 1.376 ad #else 3000 1.376 ad uvm_map_lock_entry(entry, RW_READER); 3001 1.376 ad #endif 3002 1.98 chs pmap_remove(srcmap->pmap, entry->start, 3003 1.20 chuck entry->end); 3004 1.298 rmind uvm_map_unlock_entry(entry); 3005 1.139 enami oldentry = entry; /* save entry */ 3006 1.139 enami entry = entry->next; /* advance */ 3007 1.20 chuck uvm_map_entry_unlink(srcmap, oldentry); 3008 1.20 chuck /* add to dead list */ 3009 1.20 chuck oldentry->next = deadentry; 3010 1.20 chuck deadentry = oldentry; 3011 1.139 enami } else { 3012 1.139 enami entry = entry->next; /* advance */ 3013 1.10 mrg } 3014 1.10 mrg 3015 1.10 mrg /* end of 'while' loop */ 3016 1.10 mrg fudge = 0; 3017 1.10 mrg } 3018 1.105 chris pmap_update(srcmap->pmap); 3019 1.10 mrg 3020 1.10 mrg /* 3021 1.10 mrg * unlock dstmap. we will dispose of deadentry in 3022 1.10 mrg * step 7 if needed 3023 1.10 mrg */ 3024 1.85 chs 3025 1.10 mrg if (copy_ok && srcmap != dstmap) 3026 1.10 mrg vm_map_unlock(dstmap); 3027 1.10 mrg 3028 1.99 chs } else { 3029 1.99 chs deadentry = NULL; 3030 1.10 mrg } 3031 1.10 mrg 3032 1.10 mrg /* 3033 1.10 mrg * step 7: we are done with the source map, unlock. if copy_ok 3034 1.10 mrg * is 0 then we have not replaced the dummy mapping in dstmap yet 3035 1.10 mrg * and we need to do so now. 3036 1.10 mrg */ 3037 1.10 mrg 3038 1.10 mrg vm_map_unlock(srcmap); 3039 1.10 mrg if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 3040 1.10 mrg uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 3041 1.10 mrg 3042 1.10 mrg /* now do the replacement if we didn't do it in step 5 */ 3043 1.10 mrg if (copy_ok == 0) { 3044 1.10 mrg vm_map_lock(dstmap); 3045 1.10 mrg error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 3046 1.275 yamt nchain, nsize, &resentry); 3047 1.10 mrg vm_map_unlock(dstmap); 3048 1.10 mrg 3049 1.234 thorpej if (error == false) { 3050 1.10 mrg error = EIO; 3051 1.10 mrg goto bad2; 3052 1.10 mrg } 3053 1.10 mrg } 3054 1.144 yamt 3055 1.253 yamt if (resentry != NULL) 3056 1.253 yamt uvm_mapent_free(resentry); 3057 1.253 yamt 3058 1.139 enami return (0); 3059 1.10 mrg 3060 1.10 mrg /* 3061 1.10 mrg * bad: failure recovery 3062 1.10 mrg */ 3063 1.10 mrg bad: 3064 1.10 mrg vm_map_unlock(srcmap); 3065 1.10 mrg bad2: /* src already unlocked */ 3066 1.10 mrg if (chain) 3067 1.10 mrg uvm_unmap_detach(chain, 3068 1.10 mrg (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 3069 1.144 yamt 3070 1.253 yamt if (resentry != NULL) 3071 1.253 yamt uvm_mapent_free(resentry); 3072 1.253 yamt 3073 1.210 yamt if ((flags & UVM_EXTRACT_RESERVED) == 0) { 3074 1.210 yamt uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 3075 1.210 yamt } 3076 1.139 enami return (error); 3077 1.10 mrg } 3078 1.10 mrg 3079 1.10 mrg /* end of extraction functions */ 3080 1.1 mrg 3081 1.1 mrg /* 3082 1.1 mrg * uvm_map_submap: punch down part of a map into a submap 3083 1.1 mrg * 3084 1.1 mrg * => only the kernel_map is allowed to be submapped 3085 1.1 mrg * => the purpose of submapping is to break up the locking granularity 3086 1.1 mrg * of a larger map 3087 1.1 mrg * => the range specified must have been mapped previously with a uvm_map() 3088 1.1 mrg * call [with uobj==NULL] to create a blank map entry in the main map. 3089 1.1 mrg * [And it had better still be blank!] 3090 1.1 mrg * => maps which contain submaps should never be copied or forked. 3091 1.98 chs * => to remove a submap, use uvm_unmap() on the main map 3092 1.1 mrg * and then uvm_map_deallocate() the submap. 3093 1.1 mrg * => main map must be unlocked. 3094 1.1 mrg * => submap must have been init'd and have a zero reference count. 3095 1.1 mrg * [need not be locked as we don't actually reference it] 3096 1.1 mrg */ 3097 1.85 chs 3098 1.10 mrg int 3099 1.138 enami uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3100 1.138 enami struct vm_map *submap) 3101 1.10 mrg { 3102 1.99 chs struct vm_map_entry *entry; 3103 1.94 chs int error; 3104 1.1 mrg 3105 1.10 mrg vm_map_lock(map); 3106 1.85 chs VM_MAP_RANGE_CHECK(map, start, end); 3107 1.1 mrg 3108 1.10 mrg if (uvm_map_lookup_entry(map, start, &entry)) { 3109 1.311 para UVM_MAP_CLIP_START(map, entry, start); 3110 1.311 para UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ 3111 1.94 chs } else { 3112 1.10 mrg entry = NULL; 3113 1.10 mrg } 3114 1.1 mrg 3115 1.98 chs if (entry != NULL && 3116 1.10 mrg entry->start == start && entry->end == end && 3117 1.10 mrg entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 3118 1.10 mrg !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 3119 1.29 chuck entry->etype |= UVM_ET_SUBMAP; 3120 1.10 mrg entry->object.sub_map = submap; 3121 1.10 mrg entry->offset = 0; 3122 1.10 mrg uvm_map_reference(submap); 3123 1.94 chs error = 0; 3124 1.10 mrg } else { 3125 1.94 chs error = EINVAL; 3126 1.10 mrg } 3127 1.10 mrg vm_map_unlock(map); 3128 1.174 yamt 3129 1.94 chs return error; 3130 1.1 mrg } 3131 1.1 mrg 3132 1.175 yamt /* 3133 1.344 joerg * uvm_map_protect_user: change map protection on behalf of the user. 3134 1.344 joerg * Enforces PAX settings as necessary. 3135 1.344 joerg */ 3136 1.344 joerg int 3137 1.344 joerg uvm_map_protect_user(struct lwp *l, vaddr_t start, vaddr_t end, 3138 1.344 joerg vm_prot_t new_prot) 3139 1.344 joerg { 3140 1.344 joerg int error; 3141 1.344 joerg 3142 1.344 joerg if ((error = PAX_MPROTECT_VALIDATE(l, new_prot))) 3143 1.344 joerg return error; 3144 1.344 joerg 3145 1.344 joerg return uvm_map_protect(&l->l_proc->p_vmspace->vm_map, start, end, 3146 1.344 joerg new_prot, false); 3147 1.344 joerg } 3148 1.344 joerg 3149 1.344 joerg 3150 1.344 joerg /* 3151 1.1 mrg * uvm_map_protect: change map protection 3152 1.1 mrg * 3153 1.1 mrg * => set_max means set max_protection. 3154 1.1 mrg * => map must be unlocked. 3155 1.1 mrg */ 3156 1.1 mrg 3157 1.139 enami #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 3158 1.36 mycroft ~VM_PROT_WRITE : VM_PROT_ALL) 3159 1.1 mrg 3160 1.10 mrg int 3161 1.138 enami uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3162 1.233 thorpej vm_prot_t new_prot, bool set_max) 3163 1.10 mrg { 3164 1.99 chs struct vm_map_entry *current, *entry; 3165 1.94 chs int error = 0; 3166 1.385 skrll UVMHIST_FUNC(__func__); 3167 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_prot=%#jx)", 3168 1.353 pgoyette (uintptr_t)map, start, end, new_prot); 3169 1.85 chs 3170 1.10 mrg vm_map_lock(map); 3171 1.10 mrg VM_MAP_RANGE_CHECK(map, start, end); 3172 1.10 mrg if (uvm_map_lookup_entry(map, start, &entry)) { 3173 1.311 para UVM_MAP_CLIP_START(map, entry, start); 3174 1.10 mrg } else { 3175 1.10 mrg entry = entry->next; 3176 1.10 mrg } 3177 1.10 mrg 3178 1.1 mrg /* 3179 1.10 mrg * make a first pass to check for protection violations. 3180 1.1 mrg */ 3181 1.1 mrg 3182 1.10 mrg current = entry; 3183 1.10 mrg while ((current != &map->header) && (current->start < end)) { 3184 1.65 thorpej if (UVM_ET_ISSUBMAP(current)) { 3185 1.94 chs error = EINVAL; 3186 1.65 thorpej goto out; 3187 1.65 thorpej } 3188 1.10 mrg if ((new_prot & current->max_protection) != new_prot) { 3189 1.94 chs error = EACCES; 3190 1.65 thorpej goto out; 3191 1.112 thorpej } 3192 1.112 thorpej /* 3193 1.112 thorpej * Don't allow VM_PROT_EXECUTE to be set on entries that 3194 1.112 thorpej * point to vnodes that are associated with a NOEXEC file 3195 1.112 thorpej * system. 3196 1.112 thorpej */ 3197 1.112 thorpej if (UVM_ET_ISOBJ(current) && 3198 1.112 thorpej UVM_OBJ_IS_VNODE(current->object.uvm_obj)) { 3199 1.112 thorpej struct vnode *vp = 3200 1.112 thorpej (struct vnode *) current->object.uvm_obj; 3201 1.112 thorpej 3202 1.112 thorpej if ((new_prot & VM_PROT_EXECUTE) != 0 && 3203 1.112 thorpej (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { 3204 1.112 thorpej error = EACCES; 3205 1.112 thorpej goto out; 3206 1.112 thorpej } 3207 1.10 mrg } 3208 1.224 elad 3209 1.65 thorpej current = current->next; 3210 1.10 mrg } 3211 1.10 mrg 3212 1.10 mrg /* go back and fix up protections (no need to clip this time). */ 3213 1.10 mrg 3214 1.10 mrg current = entry; 3215 1.10 mrg while ((current != &map->header) && (current->start < end)) { 3216 1.10 mrg vm_prot_t old_prot; 3217 1.85 chs 3218 1.311 para UVM_MAP_CLIP_END(map, current, end); 3219 1.10 mrg old_prot = current->protection; 3220 1.10 mrg if (set_max) 3221 1.10 mrg current->protection = 3222 1.10 mrg (current->max_protection = new_prot) & old_prot; 3223 1.10 mrg else 3224 1.10 mrg current->protection = new_prot; 3225 1.10 mrg 3226 1.10 mrg /* 3227 1.98 chs * update physical map if necessary. worry about copy-on-write 3228 1.10 mrg * here -- CHECK THIS XXX 3229 1.10 mrg */ 3230 1.10 mrg 3231 1.10 mrg if (current->protection != old_prot) { 3232 1.29 chuck /* update pmap! */ 3233 1.376 ad #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ 3234 1.372 ad uvm_map_lock_entry(current, RW_WRITER); 3235 1.376 ad #else 3236 1.376 ad uvm_map_lock_entry(current, RW_READER); 3237 1.376 ad #endif 3238 1.29 chuck pmap_protect(map->pmap, current->start, current->end, 3239 1.358 maxv current->protection & MASK(current)); 3240 1.298 rmind uvm_map_unlock_entry(current); 3241 1.109 thorpej 3242 1.109 thorpej /* 3243 1.109 thorpej * If this entry points at a vnode, and the 3244 1.109 thorpej * protection includes VM_PROT_EXECUTE, mark 3245 1.111 thorpej * the vnode as VEXECMAP. 3246 1.109 thorpej */ 3247 1.109 thorpej if (UVM_ET_ISOBJ(current)) { 3248 1.109 thorpej struct uvm_object *uobj = 3249 1.109 thorpej current->object.uvm_obj; 3250 1.109 thorpej 3251 1.109 thorpej if (UVM_OBJ_IS_VNODE(uobj) && 3252 1.241 ad (current->protection & VM_PROT_EXECUTE)) { 3253 1.110 thorpej vn_markexec((struct vnode *) uobj); 3254 1.241 ad } 3255 1.109 thorpej } 3256 1.65 thorpej } 3257 1.10 mrg 3258 1.65 thorpej /* 3259 1.65 thorpej * If the map is configured to lock any future mappings, 3260 1.65 thorpej * wire this entry now if the old protection was VM_PROT_NONE 3261 1.65 thorpej * and the new protection is not VM_PROT_NONE. 3262 1.65 thorpej */ 3263 1.65 thorpej 3264 1.65 thorpej if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3265 1.358 maxv VM_MAPENT_ISWIRED(current) == 0 && 3266 1.65 thorpej old_prot == VM_PROT_NONE && 3267 1.65 thorpej new_prot != VM_PROT_NONE) { 3268 1.360 chs 3269 1.360 chs /* 3270 1.360 chs * We must call pmap_update() here because the 3271 1.360 chs * pmap_protect() call above might have removed some 3272 1.360 chs * pmap entries and uvm_map_pageable() might create 3273 1.360 chs * some new pmap entries that rely on the prior 3274 1.360 chs * removals being completely finished. 3275 1.360 chs */ 3276 1.360 chs 3277 1.360 chs pmap_update(map->pmap); 3278 1.360 chs 3279 1.358 maxv if (uvm_map_pageable(map, current->start, 3280 1.358 maxv current->end, false, 3281 1.94 chs UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 3282 1.99 chs 3283 1.65 thorpej /* 3284 1.65 thorpej * If locking the entry fails, remember the 3285 1.65 thorpej * error if it's the first one. Note we 3286 1.65 thorpej * still continue setting the protection in 3287 1.94 chs * the map, but will return the error 3288 1.94 chs * condition regardless. 3289 1.65 thorpej * 3290 1.65 thorpej * XXX Ignore what the actual error is, 3291 1.65 thorpej * XXX just call it a resource shortage 3292 1.65 thorpej * XXX so that it doesn't get confused 3293 1.65 thorpej * XXX what uvm_map_protect() itself would 3294 1.65 thorpej * XXX normally return. 3295 1.65 thorpej */ 3296 1.99 chs 3297 1.94 chs error = ENOMEM; 3298 1.65 thorpej } 3299 1.10 mrg } 3300 1.10 mrg current = current->next; 3301 1.10 mrg } 3302 1.105 chris pmap_update(map->pmap); 3303 1.85 chs 3304 1.65 thorpej out: 3305 1.10 mrg vm_map_unlock(map); 3306 1.174 yamt 3307 1.353 pgoyette UVMHIST_LOG(maphist, "<- done, error=%jd",error,0,0,0); 3308 1.94 chs return error; 3309 1.1 mrg } 3310 1.1 mrg 3311 1.1 mrg #undef MASK 3312 1.1 mrg 3313 1.98 chs /* 3314 1.1 mrg * uvm_map_inherit: set inheritance code for range of addrs in map. 3315 1.1 mrg * 3316 1.1 mrg * => map must be unlocked 3317 1.1 mrg * => note that the inherit code is used during a "fork". see fork 3318 1.1 mrg * code for details. 3319 1.1 mrg */ 3320 1.1 mrg 3321 1.10 mrg int 3322 1.138 enami uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 3323 1.138 enami vm_inherit_t new_inheritance) 3324 1.10 mrg { 3325 1.99 chs struct vm_map_entry *entry, *temp_entry; 3326 1.385 skrll UVMHIST_FUNC(__func__); 3327 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_inh=%#jx)", 3328 1.353 pgoyette (uintptr_t)map, start, end, new_inheritance); 3329 1.10 mrg 3330 1.10 mrg switch (new_inheritance) { 3331 1.80 wiz case MAP_INHERIT_NONE: 3332 1.80 wiz case MAP_INHERIT_COPY: 3333 1.80 wiz case MAP_INHERIT_SHARE: 3334 1.330 christos case MAP_INHERIT_ZERO: 3335 1.10 mrg break; 3336 1.10 mrg default: 3337 1.10 mrg UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3338 1.94 chs return EINVAL; 3339 1.10 mrg } 3340 1.1 mrg 3341 1.10 mrg vm_map_lock(map); 3342 1.10 mrg VM_MAP_RANGE_CHECK(map, start, end); 3343 1.10 mrg if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3344 1.10 mrg entry = temp_entry; 3345 1.311 para UVM_MAP_CLIP_START(map, entry, start); 3346 1.10 mrg } else { 3347 1.10 mrg entry = temp_entry->next; 3348 1.10 mrg } 3349 1.10 mrg while ((entry != &map->header) && (entry->start < end)) { 3350 1.311 para UVM_MAP_CLIP_END(map, entry, end); 3351 1.10 mrg entry->inheritance = new_inheritance; 3352 1.10 mrg entry = entry->next; 3353 1.10 mrg } 3354 1.10 mrg vm_map_unlock(map); 3355 1.10 mrg UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3356 1.94 chs return 0; 3357 1.41 mrg } 3358 1.41 mrg 3359 1.98 chs /* 3360 1.41 mrg * uvm_map_advice: set advice code for range of addrs in map. 3361 1.41 mrg * 3362 1.41 mrg * => map must be unlocked 3363 1.41 mrg */ 3364 1.41 mrg 3365 1.41 mrg int 3366 1.138 enami uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 3367 1.41 mrg { 3368 1.99 chs struct vm_map_entry *entry, *temp_entry; 3369 1.385 skrll UVMHIST_FUNC(__func__); 3370 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_adv=%#jx)", 3371 1.353 pgoyette (uintptr_t)map, start, end, new_advice); 3372 1.41 mrg 3373 1.41 mrg vm_map_lock(map); 3374 1.41 mrg VM_MAP_RANGE_CHECK(map, start, end); 3375 1.41 mrg if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3376 1.41 mrg entry = temp_entry; 3377 1.311 para UVM_MAP_CLIP_START(map, entry, start); 3378 1.41 mrg } else { 3379 1.41 mrg entry = temp_entry->next; 3380 1.41 mrg } 3381 1.61 thorpej 3382 1.61 thorpej /* 3383 1.61 thorpej * XXXJRT: disallow holes? 3384 1.61 thorpej */ 3385 1.61 thorpej 3386 1.41 mrg while ((entry != &map->header) && (entry->start < end)) { 3387 1.311 para UVM_MAP_CLIP_END(map, entry, end); 3388 1.41 mrg 3389 1.41 mrg switch (new_advice) { 3390 1.41 mrg case MADV_NORMAL: 3391 1.41 mrg case MADV_RANDOM: 3392 1.41 mrg case MADV_SEQUENTIAL: 3393 1.41 mrg /* nothing special here */ 3394 1.41 mrg break; 3395 1.41 mrg 3396 1.41 mrg default: 3397 1.50 mrg vm_map_unlock(map); 3398 1.41 mrg UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3399 1.94 chs return EINVAL; 3400 1.41 mrg } 3401 1.41 mrg entry->advice = new_advice; 3402 1.41 mrg entry = entry->next; 3403 1.41 mrg } 3404 1.41 mrg 3405 1.41 mrg vm_map_unlock(map); 3406 1.41 mrg UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3407 1.94 chs return 0; 3408 1.1 mrg } 3409 1.1 mrg 3410 1.1 mrg /* 3411 1.271 yamt * uvm_map_willneed: apply MADV_WILLNEED 3412 1.271 yamt */ 3413 1.271 yamt 3414 1.271 yamt int 3415 1.271 yamt uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end) 3416 1.271 yamt { 3417 1.271 yamt struct vm_map_entry *entry; 3418 1.385 skrll UVMHIST_FUNC(__func__); 3419 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx)", 3420 1.353 pgoyette (uintptr_t)map, start, end, 0); 3421 1.271 yamt 3422 1.271 yamt vm_map_lock_read(map); 3423 1.271 yamt VM_MAP_RANGE_CHECK(map, start, end); 3424 1.271 yamt if (!uvm_map_lookup_entry(map, start, &entry)) { 3425 1.271 yamt entry = entry->next; 3426 1.271 yamt } 3427 1.271 yamt while (entry->start < end) { 3428 1.271 yamt struct vm_amap * const amap = entry->aref.ar_amap; 3429 1.271 yamt struct uvm_object * const uobj = entry->object.uvm_obj; 3430 1.271 yamt 3431 1.271 yamt KASSERT(entry != &map->header); 3432 1.271 yamt KASSERT(start < entry->end); 3433 1.271 yamt /* 3434 1.296 yamt * For now, we handle only the easy but commonly-requested case. 3435 1.296 yamt * ie. start prefetching of backing uobj pages. 3436 1.271 yamt * 3437 1.296 yamt * XXX It might be useful to pmap_enter() the already-in-core 3438 1.296 yamt * pages by inventing a "weak" mode for uvm_fault() which would 3439 1.296 yamt * only do the PGO_LOCKED pgo_get(). 3440 1.271 yamt */ 3441 1.271 yamt if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) { 3442 1.271 yamt off_t offset; 3443 1.271 yamt off_t size; 3444 1.271 yamt 3445 1.271 yamt offset = entry->offset; 3446 1.271 yamt if (start < entry->start) { 3447 1.271 yamt offset += entry->start - start; 3448 1.271 yamt } 3449 1.271 yamt size = entry->offset + (entry->end - entry->start); 3450 1.271 yamt if (entry->end < end) { 3451 1.271 yamt size -= end - entry->end; 3452 1.271 yamt } 3453 1.271 yamt uvm_readahead(uobj, offset, size); 3454 1.271 yamt } 3455 1.271 yamt entry = entry->next; 3456 1.271 yamt } 3457 1.271 yamt vm_map_unlock_read(map); 3458 1.271 yamt UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3459 1.271 yamt return 0; 3460 1.271 yamt } 3461 1.271 yamt 3462 1.271 yamt /* 3463 1.1 mrg * uvm_map_pageable: sets the pageability of a range in a map. 3464 1.1 mrg * 3465 1.56 thorpej * => wires map entries. should not be used for transient page locking. 3466 1.56 thorpej * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 3467 1.216 drochner * => regions specified as not pageable require lock-down (wired) memory 3468 1.1 mrg * and page tables. 3469 1.59 thorpej * => map must never be read-locked 3470 1.234 thorpej * => if islocked is true, map is already write-locked 3471 1.59 thorpej * => we always unlock the map, since we must downgrade to a read-lock 3472 1.59 thorpej * to call uvm_fault_wire() 3473 1.1 mrg * => XXXCDC: check this and try and clean it up. 3474 1.1 mrg */ 3475 1.1 mrg 3476 1.19 kleink int 3477 1.138 enami uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 3478 1.233 thorpej bool new_pageable, int lockflags) 3479 1.1 mrg { 3480 1.99 chs struct vm_map_entry *entry, *start_entry, *failed_entry; 3481 1.10 mrg int rv; 3482 1.60 thorpej #ifdef DIAGNOSTIC 3483 1.60 thorpej u_int timestamp_save; 3484 1.60 thorpej #endif 3485 1.385 skrll UVMHIST_FUNC(__func__); 3486 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_pageable=%ju)", 3487 1.353 pgoyette (uintptr_t)map, start, end, new_pageable); 3488 1.85 chs KASSERT(map->flags & VM_MAP_PAGEABLE); 3489 1.45 thorpej 3490 1.64 thorpej if ((lockflags & UVM_LK_ENTER) == 0) 3491 1.59 thorpej vm_map_lock(map); 3492 1.10 mrg VM_MAP_RANGE_CHECK(map, start, end); 3493 1.10 mrg 3494 1.98 chs /* 3495 1.10 mrg * only one pageability change may take place at one time, since 3496 1.10 mrg * uvm_fault_wire assumes it will be called only once for each 3497 1.10 mrg * wiring/unwiring. therefore, we have to make sure we're actually 3498 1.10 mrg * changing the pageability for the entire region. we do so before 3499 1.98 chs * making any changes. 3500 1.10 mrg */ 3501 1.10 mrg 3502 1.234 thorpej if (uvm_map_lookup_entry(map, start, &start_entry) == false) { 3503 1.64 thorpej if ((lockflags & UVM_LK_EXIT) == 0) 3504 1.64 thorpej vm_map_unlock(map); 3505 1.85 chs 3506 1.94 chs UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); 3507 1.94 chs return EFAULT; 3508 1.10 mrg } 3509 1.10 mrg entry = start_entry; 3510 1.10 mrg 3511 1.359 kre if (start == end) { /* nothing required */ 3512 1.359 kre if ((lockflags & UVM_LK_EXIT) == 0) 3513 1.359 kre vm_map_unlock(map); 3514 1.359 kre 3515 1.359 kre UVMHIST_LOG(maphist,"<- done (nothing)",0,0,0,0); 3516 1.359 kre return 0; 3517 1.359 kre } 3518 1.359 kre 3519 1.98 chs /* 3520 1.100 wiz * handle wiring and unwiring separately. 3521 1.10 mrg */ 3522 1.1 mrg 3523 1.56 thorpej if (new_pageable) { /* unwire */ 3524 1.311 para UVM_MAP_CLIP_START(map, entry, start); 3525 1.85 chs 3526 1.10 mrg /* 3527 1.10 mrg * unwiring. first ensure that the range to be unwired is 3528 1.98 chs * really wired down and that there are no holes. 3529 1.10 mrg */ 3530 1.85 chs 3531 1.10 mrg while ((entry != &map->header) && (entry->start < end)) { 3532 1.10 mrg if (entry->wired_count == 0 || 3533 1.10 mrg (entry->end < end && 3534 1.55 thorpej (entry->next == &map->header || 3535 1.55 thorpej entry->next->start > entry->end))) { 3536 1.64 thorpej if ((lockflags & UVM_LK_EXIT) == 0) 3537 1.64 thorpej vm_map_unlock(map); 3538 1.94 chs UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); 3539 1.94 chs return EINVAL; 3540 1.10 mrg } 3541 1.10 mrg entry = entry->next; 3542 1.10 mrg } 3543 1.10 mrg 3544 1.98 chs /* 3545 1.56 thorpej * POSIX 1003.1b - a single munlock call unlocks a region, 3546 1.56 thorpej * regardless of the number of mlock calls made on that 3547 1.56 thorpej * region. 3548 1.10 mrg */ 3549 1.85 chs 3550 1.10 mrg entry = start_entry; 3551 1.10 mrg while ((entry != &map->header) && (entry->start < end)) { 3552 1.311 para UVM_MAP_CLIP_END(map, entry, end); 3553 1.56 thorpej if (VM_MAPENT_ISWIRED(entry)) 3554 1.10 mrg uvm_map_entry_unwire(map, entry); 3555 1.10 mrg entry = entry->next; 3556 1.10 mrg } 3557 1.64 thorpej if ((lockflags & UVM_LK_EXIT) == 0) 3558 1.64 thorpej vm_map_unlock(map); 3559 1.10 mrg UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3560 1.94 chs return 0; 3561 1.10 mrg } 3562 1.10 mrg 3563 1.10 mrg /* 3564 1.10 mrg * wire case: in two passes [XXXCDC: ugly block of code here] 3565 1.10 mrg * 3566 1.10 mrg * 1: holding the write lock, we create any anonymous maps that need 3567 1.10 mrg * to be created. then we clip each map entry to the region to 3568 1.98 chs * be wired and increment its wiring count. 3569 1.10 mrg * 3570 1.10 mrg * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 3571 1.56 thorpej * in the pages for any newly wired area (wired_count == 1). 3572 1.10 mrg * 3573 1.10 mrg * downgrading to a read lock for uvm_fault_wire avoids a possible 3574 1.10 mrg * deadlock with another thread that may have faulted on one of 3575 1.10 mrg * the pages to be wired (it would mark the page busy, blocking 3576 1.10 mrg * us, then in turn block on the map lock that we hold). because 3577 1.10 mrg * of problems in the recursive lock package, we cannot upgrade 3578 1.10 mrg * to a write lock in vm_map_lookup. thus, any actions that 3579 1.10 mrg * require the write lock must be done beforehand. because we 3580 1.10 mrg * keep the read lock on the map, the copy-on-write status of the 3581 1.10 mrg * entries we modify here cannot change. 3582 1.10 mrg */ 3583 1.10 mrg 3584 1.10 mrg while ((entry != &map->header) && (entry->start < end)) { 3585 1.55 thorpej if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3586 1.85 chs 3587 1.85 chs /* 3588 1.10 mrg * perform actions of vm_map_lookup that need the 3589 1.10 mrg * write lock on the map: create an anonymous map 3590 1.10 mrg * for a copy-on-write region, or an anonymous map 3591 1.29 chuck * for a zero-fill region. (XXXCDC: submap case 3592 1.29 chuck * ok?) 3593 1.10 mrg */ 3594 1.85 chs 3595 1.29 chuck if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3596 1.98 chs if (UVM_ET_ISNEEDSCOPY(entry) && 3597 1.117 chs ((entry->max_protection & VM_PROT_WRITE) || 3598 1.54 thorpej (entry->object.uvm_obj == NULL))) { 3599 1.212 yamt amap_copy(map, entry, 0, start, end); 3600 1.10 mrg /* XXXCDC: wait OK? */ 3601 1.10 mrg } 3602 1.10 mrg } 3603 1.55 thorpej } 3604 1.311 para UVM_MAP_CLIP_START(map, entry, start); 3605 1.311 para UVM_MAP_CLIP_END(map, entry, end); 3606 1.10 mrg entry->wired_count++; 3607 1.10 mrg 3608 1.10 mrg /* 3609 1.98 chs * Check for holes 3610 1.10 mrg */ 3611 1.85 chs 3612 1.54 thorpej if (entry->protection == VM_PROT_NONE || 3613 1.54 thorpej (entry->end < end && 3614 1.54 thorpej (entry->next == &map->header || 3615 1.54 thorpej entry->next->start > entry->end))) { 3616 1.85 chs 3617 1.10 mrg /* 3618 1.10 mrg * found one. amap creation actions do not need to 3619 1.98 chs * be undone, but the wired counts need to be restored. 3620 1.10 mrg */ 3621 1.85 chs 3622 1.10 mrg while (entry != &map->header && entry->end > start) { 3623 1.10 mrg entry->wired_count--; 3624 1.10 mrg entry = entry->prev; 3625 1.10 mrg } 3626 1.64 thorpej if ((lockflags & UVM_LK_EXIT) == 0) 3627 1.64 thorpej vm_map_unlock(map); 3628 1.10 mrg UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 3629 1.94 chs return EINVAL; 3630 1.10 mrg } 3631 1.10 mrg entry = entry->next; 3632 1.10 mrg } 3633 1.10 mrg 3634 1.10 mrg /* 3635 1.10 mrg * Pass 2. 3636 1.10 mrg */ 3637 1.51 thorpej 3638 1.60 thorpej #ifdef DIAGNOSTIC 3639 1.60 thorpej timestamp_save = map->timestamp; 3640 1.60 thorpej #endif 3641 1.60 thorpej vm_map_busy(map); 3642 1.249 yamt vm_map_unlock(map); 3643 1.10 mrg 3644 1.10 mrg rv = 0; 3645 1.10 mrg entry = start_entry; 3646 1.10 mrg while (entry != &map->header && entry->start < end) { 3647 1.51 thorpej if (entry->wired_count == 1) { 3648 1.44 thorpej rv = uvm_fault_wire(map, entry->start, entry->end, 3649 1.216 drochner entry->max_protection, 1); 3650 1.10 mrg if (rv) { 3651 1.94 chs 3652 1.51 thorpej /* 3653 1.51 thorpej * wiring failed. break out of the loop. 3654 1.51 thorpej * we'll clean up the map below, once we 3655 1.51 thorpej * have a write lock again. 3656 1.51 thorpej */ 3657 1.94 chs 3658 1.51 thorpej break; 3659 1.10 mrg } 3660 1.10 mrg } 3661 1.10 mrg entry = entry->next; 3662 1.10 mrg } 3663 1.10 mrg 3664 1.139 enami if (rv) { /* failed? */ 3665 1.85 chs 3666 1.52 thorpej /* 3667 1.52 thorpej * Get back to an exclusive (write) lock. 3668 1.52 thorpej */ 3669 1.85 chs 3670 1.249 yamt vm_map_lock(map); 3671 1.60 thorpej vm_map_unbusy(map); 3672 1.60 thorpej 3673 1.60 thorpej #ifdef DIAGNOSTIC 3674 1.252 yamt if (timestamp_save + 1 != map->timestamp) 3675 1.60 thorpej panic("uvm_map_pageable: stale map"); 3676 1.60 thorpej #endif 3677 1.10 mrg 3678 1.51 thorpej /* 3679 1.51 thorpej * first drop the wiring count on all the entries 3680 1.51 thorpej * which haven't actually been wired yet. 3681 1.51 thorpej */ 3682 1.85 chs 3683 1.54 thorpej failed_entry = entry; 3684 1.54 thorpej while (entry != &map->header && entry->start < end) { 3685 1.51 thorpej entry->wired_count--; 3686 1.54 thorpej entry = entry->next; 3687 1.54 thorpej } 3688 1.51 thorpej 3689 1.51 thorpej /* 3690 1.54 thorpej * now, unwire all the entries that were successfully 3691 1.54 thorpej * wired above. 3692 1.51 thorpej */ 3693 1.85 chs 3694 1.54 thorpej entry = start_entry; 3695 1.54 thorpej while (entry != failed_entry) { 3696 1.54 thorpej entry->wired_count--; 3697 1.55 thorpej if (VM_MAPENT_ISWIRED(entry) == 0) 3698 1.54 thorpej uvm_map_entry_unwire(map, entry); 3699 1.54 thorpej entry = entry->next; 3700 1.54 thorpej } 3701 1.64 thorpej if ((lockflags & UVM_LK_EXIT) == 0) 3702 1.64 thorpej vm_map_unlock(map); 3703 1.353 pgoyette UVMHIST_LOG(maphist, "<- done (RV=%jd)", rv,0,0,0); 3704 1.139 enami return (rv); 3705 1.10 mrg } 3706 1.51 thorpej 3707 1.64 thorpej if ((lockflags & UVM_LK_EXIT) == 0) { 3708 1.64 thorpej vm_map_unbusy(map); 3709 1.64 thorpej } else { 3710 1.85 chs 3711 1.64 thorpej /* 3712 1.64 thorpej * Get back to an exclusive (write) lock. 3713 1.64 thorpej */ 3714 1.85 chs 3715 1.249 yamt vm_map_lock(map); 3716 1.64 thorpej vm_map_unbusy(map); 3717 1.64 thorpej } 3718 1.64 thorpej 3719 1.10 mrg UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3720 1.94 chs return 0; 3721 1.1 mrg } 3722 1.1 mrg 3723 1.1 mrg /* 3724 1.54 thorpej * uvm_map_pageable_all: special case of uvm_map_pageable - affects 3725 1.54 thorpej * all mapped regions. 3726 1.54 thorpej * 3727 1.54 thorpej * => map must not be locked. 3728 1.54 thorpej * => if no flags are specified, all regions are unwired. 3729 1.54 thorpej * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 3730 1.54 thorpej */ 3731 1.54 thorpej 3732 1.54 thorpej int 3733 1.138 enami uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 3734 1.54 thorpej { 3735 1.99 chs struct vm_map_entry *entry, *failed_entry; 3736 1.54 thorpej vsize_t size; 3737 1.54 thorpej int rv; 3738 1.60 thorpej #ifdef DIAGNOSTIC 3739 1.60 thorpej u_int timestamp_save; 3740 1.60 thorpej #endif 3741 1.385 skrll UVMHIST_FUNC(__func__); 3742 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx,flags=%#jx)", (uintptr_t)map, flags, 3743 1.353 pgoyette 0, 0); 3744 1.54 thorpej 3745 1.85 chs KASSERT(map->flags & VM_MAP_PAGEABLE); 3746 1.54 thorpej 3747 1.54 thorpej vm_map_lock(map); 3748 1.54 thorpej 3749 1.54 thorpej /* 3750 1.54 thorpej * handle wiring and unwiring separately. 3751 1.54 thorpej */ 3752 1.54 thorpej 3753 1.54 thorpej if (flags == 0) { /* unwire */ 3754 1.99 chs 3755 1.54 thorpej /* 3756 1.56 thorpej * POSIX 1003.1b -- munlockall unlocks all regions, 3757 1.56 thorpej * regardless of how many times mlockall has been called. 3758 1.54 thorpej */ 3759 1.99 chs 3760 1.54 thorpej for (entry = map->header.next; entry != &map->header; 3761 1.54 thorpej entry = entry->next) { 3762 1.56 thorpej if (VM_MAPENT_ISWIRED(entry)) 3763 1.56 thorpej uvm_map_entry_unwire(map, entry); 3764 1.54 thorpej } 3765 1.238 ad map->flags &= ~VM_MAP_WIREFUTURE; 3766 1.54 thorpej vm_map_unlock(map); 3767 1.54 thorpej UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3768 1.94 chs return 0; 3769 1.54 thorpej } 3770 1.54 thorpej 3771 1.54 thorpej if (flags & MCL_FUTURE) { 3772 1.99 chs 3773 1.54 thorpej /* 3774 1.54 thorpej * must wire all future mappings; remember this. 3775 1.54 thorpej */ 3776 1.99 chs 3777 1.238 ad map->flags |= VM_MAP_WIREFUTURE; 3778 1.54 thorpej } 3779 1.54 thorpej 3780 1.54 thorpej if ((flags & MCL_CURRENT) == 0) { 3781 1.99 chs 3782 1.54 thorpej /* 3783 1.54 thorpej * no more work to do! 3784 1.54 thorpej */ 3785 1.99 chs 3786 1.54 thorpej UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 3787 1.54 thorpej vm_map_unlock(map); 3788 1.94 chs return 0; 3789 1.54 thorpej } 3790 1.54 thorpej 3791 1.54 thorpej /* 3792 1.54 thorpej * wire case: in three passes [XXXCDC: ugly block of code here] 3793 1.54 thorpej * 3794 1.54 thorpej * 1: holding the write lock, count all pages mapped by non-wired 3795 1.54 thorpej * entries. if this would cause us to go over our limit, we fail. 3796 1.54 thorpej * 3797 1.54 thorpej * 2: still holding the write lock, we create any anonymous maps that 3798 1.54 thorpej * need to be created. then we increment its wiring count. 3799 1.54 thorpej * 3800 1.54 thorpej * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 3801 1.56 thorpej * in the pages for any newly wired area (wired_count == 1). 3802 1.54 thorpej * 3803 1.54 thorpej * downgrading to a read lock for uvm_fault_wire avoids a possible 3804 1.54 thorpej * deadlock with another thread that may have faulted on one of 3805 1.54 thorpej * the pages to be wired (it would mark the page busy, blocking 3806 1.54 thorpej * us, then in turn block on the map lock that we hold). because 3807 1.54 thorpej * of problems in the recursive lock package, we cannot upgrade 3808 1.54 thorpej * to a write lock in vm_map_lookup. thus, any actions that 3809 1.54 thorpej * require the write lock must be done beforehand. because we 3810 1.54 thorpej * keep the read lock on the map, the copy-on-write status of the 3811 1.54 thorpej * entries we modify here cannot change. 3812 1.54 thorpej */ 3813 1.54 thorpej 3814 1.54 thorpej for (size = 0, entry = map->header.next; entry != &map->header; 3815 1.54 thorpej entry = entry->next) { 3816 1.54 thorpej if (entry->protection != VM_PROT_NONE && 3817 1.55 thorpej VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3818 1.54 thorpej size += entry->end - entry->start; 3819 1.54 thorpej } 3820 1.54 thorpej } 3821 1.54 thorpej 3822 1.54 thorpej if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 3823 1.54 thorpej vm_map_unlock(map); 3824 1.94 chs return ENOMEM; 3825 1.54 thorpej } 3826 1.54 thorpej 3827 1.54 thorpej if (limit != 0 && 3828 1.54 thorpej (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 3829 1.54 thorpej vm_map_unlock(map); 3830 1.94 chs return ENOMEM; 3831 1.54 thorpej } 3832 1.54 thorpej 3833 1.54 thorpej /* 3834 1.54 thorpej * Pass 2. 3835 1.54 thorpej */ 3836 1.54 thorpej 3837 1.54 thorpej for (entry = map->header.next; entry != &map->header; 3838 1.54 thorpej entry = entry->next) { 3839 1.54 thorpej if (entry->protection == VM_PROT_NONE) 3840 1.54 thorpej continue; 3841 1.55 thorpej if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3842 1.99 chs 3843 1.54 thorpej /* 3844 1.54 thorpej * perform actions of vm_map_lookup that need the 3845 1.54 thorpej * write lock on the map: create an anonymous map 3846 1.54 thorpej * for a copy-on-write region, or an anonymous map 3847 1.54 thorpej * for a zero-fill region. (XXXCDC: submap case 3848 1.54 thorpej * ok?) 3849 1.54 thorpej */ 3850 1.99 chs 3851 1.54 thorpej if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3852 1.98 chs if (UVM_ET_ISNEEDSCOPY(entry) && 3853 1.117 chs ((entry->max_protection & VM_PROT_WRITE) || 3854 1.54 thorpej (entry->object.uvm_obj == NULL))) { 3855 1.212 yamt amap_copy(map, entry, 0, entry->start, 3856 1.212 yamt entry->end); 3857 1.54 thorpej /* XXXCDC: wait OK? */ 3858 1.54 thorpej } 3859 1.54 thorpej } 3860 1.55 thorpej } 3861 1.54 thorpej entry->wired_count++; 3862 1.54 thorpej } 3863 1.54 thorpej 3864 1.54 thorpej /* 3865 1.54 thorpej * Pass 3. 3866 1.54 thorpej */ 3867 1.54 thorpej 3868 1.60 thorpej #ifdef DIAGNOSTIC 3869 1.60 thorpej timestamp_save = map->timestamp; 3870 1.60 thorpej #endif 3871 1.60 thorpej vm_map_busy(map); 3872 1.249 yamt vm_map_unlock(map); 3873 1.54 thorpej 3874 1.94 chs rv = 0; 3875 1.54 thorpej for (entry = map->header.next; entry != &map->header; 3876 1.54 thorpej entry = entry->next) { 3877 1.54 thorpej if (entry->wired_count == 1) { 3878 1.54 thorpej rv = uvm_fault_wire(map, entry->start, entry->end, 3879 1.216 drochner entry->max_protection, 1); 3880 1.54 thorpej if (rv) { 3881 1.99 chs 3882 1.54 thorpej /* 3883 1.54 thorpej * wiring failed. break out of the loop. 3884 1.54 thorpej * we'll clean up the map below, once we 3885 1.54 thorpej * have a write lock again. 3886 1.54 thorpej */ 3887 1.99 chs 3888 1.54 thorpej break; 3889 1.54 thorpej } 3890 1.54 thorpej } 3891 1.54 thorpej } 3892 1.54 thorpej 3893 1.99 chs if (rv) { 3894 1.99 chs 3895 1.54 thorpej /* 3896 1.54 thorpej * Get back an exclusive (write) lock. 3897 1.54 thorpej */ 3898 1.99 chs 3899 1.249 yamt vm_map_lock(map); 3900 1.60 thorpej vm_map_unbusy(map); 3901 1.60 thorpej 3902 1.60 thorpej #ifdef DIAGNOSTIC 3903 1.252 yamt if (timestamp_save + 1 != map->timestamp) 3904 1.60 thorpej panic("uvm_map_pageable_all: stale map"); 3905 1.60 thorpej #endif 3906 1.54 thorpej 3907 1.54 thorpej /* 3908 1.54 thorpej * first drop the wiring count on all the entries 3909 1.54 thorpej * which haven't actually been wired yet. 3910 1.67 thorpej * 3911 1.67 thorpej * Skip VM_PROT_NONE entries like we did above. 3912 1.54 thorpej */ 3913 1.99 chs 3914 1.54 thorpej failed_entry = entry; 3915 1.54 thorpej for (/* nothing */; entry != &map->header; 3916 1.67 thorpej entry = entry->next) { 3917 1.67 thorpej if (entry->protection == VM_PROT_NONE) 3918 1.67 thorpej continue; 3919 1.54 thorpej entry->wired_count--; 3920 1.67 thorpej } 3921 1.54 thorpej 3922 1.54 thorpej /* 3923 1.54 thorpej * now, unwire all the entries that were successfully 3924 1.54 thorpej * wired above. 3925 1.67 thorpej * 3926 1.67 thorpej * Skip VM_PROT_NONE entries like we did above. 3927 1.54 thorpej */ 3928 1.99 chs 3929 1.54 thorpej for (entry = map->header.next; entry != failed_entry; 3930 1.54 thorpej entry = entry->next) { 3931 1.67 thorpej if (entry->protection == VM_PROT_NONE) 3932 1.67 thorpej continue; 3933 1.54 thorpej entry->wired_count--; 3934 1.67 thorpej if (VM_MAPENT_ISWIRED(entry)) 3935 1.54 thorpej uvm_map_entry_unwire(map, entry); 3936 1.54 thorpej } 3937 1.54 thorpej vm_map_unlock(map); 3938 1.353 pgoyette UVMHIST_LOG(maphist,"<- done (RV=%jd)", rv,0,0,0); 3939 1.54 thorpej return (rv); 3940 1.54 thorpej } 3941 1.54 thorpej 3942 1.60 thorpej vm_map_unbusy(map); 3943 1.54 thorpej 3944 1.54 thorpej UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3945 1.94 chs return 0; 3946 1.54 thorpej } 3947 1.54 thorpej 3948 1.54 thorpej /* 3949 1.61 thorpej * uvm_map_clean: clean out a map range 3950 1.1 mrg * 3951 1.1 mrg * => valid flags: 3952 1.61 thorpej * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3953 1.1 mrg * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3954 1.1 mrg * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3955 1.1 mrg * if (flags & PGO_FREE): any cached pages are freed after clean 3956 1.1 mrg * => returns an error if any part of the specified range isn't mapped 3957 1.98 chs * => never a need to flush amap layer since the anonymous memory has 3958 1.61 thorpej * no permanent home, but may deactivate pages there 3959 1.61 thorpej * => called from sys_msync() and sys_madvise() 3960 1.406 chs * => caller must not have map locked 3961 1.1 mrg */ 3962 1.1 mrg 3963 1.10 mrg int 3964 1.138 enami uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3965 1.10 mrg { 3966 1.99 chs struct vm_map_entry *current, *entry; 3967 1.61 thorpej struct uvm_object *uobj; 3968 1.61 thorpej struct vm_amap *amap; 3969 1.375 ad struct vm_anon *anon; 3970 1.61 thorpej struct vm_page *pg; 3971 1.61 thorpej vaddr_t offset; 3972 1.24 eeh vsize_t size; 3973 1.188 dbj voff_t uoff; 3974 1.106 chs int error, refs; 3975 1.385 skrll UVMHIST_FUNC(__func__); 3976 1.385 skrll UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,flags=%#jx)", 3977 1.385 skrll (uintptr_t)map, start, end, flags); 3978 1.85 chs 3979 1.85 chs KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3980 1.85 chs (PGO_FREE|PGO_DEACTIVATE)); 3981 1.61 thorpej 3982 1.406 chs vm_map_lock(map); 3983 1.10 mrg VM_MAP_RANGE_CHECK(map, start, end); 3984 1.406 chs if (!uvm_map_lookup_entry(map, start, &entry)) { 3985 1.406 chs vm_map_unlock(map); 3986 1.94 chs return EFAULT; 3987 1.10 mrg } 3988 1.10 mrg 3989 1.10 mrg /* 3990 1.186 chs * Make a first pass to check for holes and wiring problems. 3991 1.10 mrg */ 3992 1.85 chs 3993 1.10 mrg for (current = entry; current->start < end; current = current->next) { 3994 1.10 mrg if (UVM_ET_ISSUBMAP(current)) { 3995 1.406 chs vm_map_unlock(map); 3996 1.94 chs return EINVAL; 3997 1.10 mrg } 3998 1.186 chs if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) { 3999 1.406 chs vm_map_unlock(map); 4000 1.186 chs return EBUSY; 4001 1.186 chs } 4002 1.90 chs if (end <= current->end) { 4003 1.90 chs break; 4004 1.90 chs } 4005 1.90 chs if (current->end != current->next->start) { 4006 1.406 chs vm_map_unlock(map); 4007 1.94 chs return EFAULT; 4008 1.10 mrg } 4009 1.10 mrg } 4010 1.10 mrg 4011 1.406 chs vm_map_busy(map); 4012 1.406 chs vm_map_unlock(map); 4013 1.94 chs error = 0; 4014 1.90 chs for (current = entry; start < end; current = current->next) { 4015 1.283 uebayasi amap = current->aref.ar_amap; /* upper layer */ 4016 1.283 uebayasi uobj = current->object.uvm_obj; /* lower layer */ 4017 1.85 chs KASSERT(start >= current->start); 4018 1.1 mrg 4019 1.10 mrg /* 4020 1.61 thorpej * No amap cleaning necessary if: 4021 1.61 thorpej * 4022 1.61 thorpej * (1) There's no amap. 4023 1.61 thorpej * 4024 1.61 thorpej * (2) We're not deactivating or freeing pages. 4025 1.10 mrg */ 4026 1.85 chs 4027 1.90 chs if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4028 1.61 thorpej goto flush_object; 4029 1.61 thorpej 4030 1.61 thorpej offset = start - current->start; 4031 1.90 chs size = MIN(end, current->end) - start; 4032 1.303 rmind 4033 1.372 ad amap_lock(amap, RW_WRITER); 4034 1.90 chs for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 4035 1.61 thorpej anon = amap_lookup(¤t->aref, offset); 4036 1.61 thorpej if (anon == NULL) 4037 1.61 thorpej continue; 4038 1.61 thorpej 4039 1.298 rmind KASSERT(anon->an_lock == amap->am_lock); 4040 1.192 yamt pg = anon->an_page; 4041 1.63 thorpej if (pg == NULL) { 4042 1.63 thorpej continue; 4043 1.63 thorpej } 4044 1.332 chs if (pg->flags & PG_BUSY) { 4045 1.332 chs continue; 4046 1.332 chs } 4047 1.63 thorpej 4048 1.61 thorpej switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4049 1.85 chs 4050 1.61 thorpej /* 4051 1.115 chs * In these first 3 cases, we just deactivate the page. 4052 1.61 thorpej */ 4053 1.85 chs 4054 1.61 thorpej case PGO_CLEANIT|PGO_FREE: 4055 1.61 thorpej case PGO_CLEANIT|PGO_DEACTIVATE: 4056 1.61 thorpej case PGO_DEACTIVATE: 4057 1.68 thorpej deactivate_it: 4058 1.61 thorpej /* 4059 1.115 chs * skip the page if it's loaned or wired, 4060 1.115 chs * since it shouldn't be on a paging queue 4061 1.115 chs * at all in these cases. 4062 1.61 thorpej */ 4063 1.85 chs 4064 1.115 chs if (pg->loan_count != 0 || 4065 1.115 chs pg->wire_count != 0) { 4066 1.61 thorpej continue; 4067 1.61 thorpej } 4068 1.85 chs KASSERT(pg->uanon == anon); 4069 1.369 ad uvm_pagelock(pg); 4070 1.61 thorpej uvm_pagedeactivate(pg); 4071 1.369 ad uvm_pageunlock(pg); 4072 1.61 thorpej continue; 4073 1.61 thorpej 4074 1.61 thorpej case PGO_FREE: 4075 1.85 chs 4076 1.68 thorpej /* 4077 1.68 thorpej * If there are multiple references to 4078 1.68 thorpej * the amap, just deactivate the page. 4079 1.68 thorpej */ 4080 1.85 chs 4081 1.68 thorpej if (amap_refs(amap) > 1) 4082 1.68 thorpej goto deactivate_it; 4083 1.68 thorpej 4084 1.115 chs /* skip the page if it's wired */ 4085 1.62 thorpej if (pg->wire_count != 0) { 4086 1.62 thorpej continue; 4087 1.62 thorpej } 4088 1.66 thorpej amap_unadd(¤t->aref, offset); 4089 1.61 thorpej refs = --anon->an_ref; 4090 1.298 rmind if (refs == 0) { 4091 1.375 ad uvm_anfree(anon); 4092 1.298 rmind } 4093 1.61 thorpej continue; 4094 1.61 thorpej } 4095 1.61 thorpej } 4096 1.375 ad amap_unlock(amap); 4097 1.1 mrg 4098 1.61 thorpej flush_object: 4099 1.10 mrg /* 4100 1.33 chuck * flush pages if we've got a valid backing object. 4101 1.116 chs * note that we must always clean object pages before 4102 1.116 chs * freeing them since otherwise we could reveal stale 4103 1.116 chs * data from files. 4104 1.10 mrg */ 4105 1.1 mrg 4106 1.188 dbj uoff = current->offset + (start - current->start); 4107 1.90 chs size = MIN(end, current->end) - start; 4108 1.61 thorpej if (uobj != NULL) { 4109 1.372 ad rw_enter(uobj->vmobjlock, RW_WRITER); 4110 1.136 thorpej if (uobj->pgops->pgo_put != NULL) 4111 1.188 dbj error = (uobj->pgops->pgo_put)(uobj, uoff, 4112 1.188 dbj uoff + size, flags | PGO_CLEANIT); 4113 1.136 thorpej else 4114 1.136 thorpej error = 0; 4115 1.10 mrg } 4116 1.10 mrg start += size; 4117 1.10 mrg } 4118 1.406 chs vm_map_unbusy(map); 4119 1.406 chs return error; 4120 1.1 mrg } 4121 1.1 mrg 4122 1.1 mrg 4123 1.1 mrg /* 4124 1.1 mrg * uvm_map_checkprot: check protection in map 4125 1.1 mrg * 4126 1.1 mrg * => must allow specified protection in a fully allocated region. 4127 1.1 mrg * => map must be read or write locked by caller. 4128 1.1 mrg */ 4129 1.1 mrg 4130 1.233 thorpej bool 4131 1.138 enami uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4132 1.138 enami vm_prot_t protection) 4133 1.10 mrg { 4134 1.99 chs struct vm_map_entry *entry; 4135 1.99 chs struct vm_map_entry *tmp_entry; 4136 1.10 mrg 4137 1.94 chs if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 4138 1.234 thorpej return (false); 4139 1.94 chs } 4140 1.94 chs entry = tmp_entry; 4141 1.94 chs while (start < end) { 4142 1.94 chs if (entry == &map->header) { 4143 1.234 thorpej return (false); 4144 1.94 chs } 4145 1.85 chs 4146 1.10 mrg /* 4147 1.10 mrg * no holes allowed 4148 1.10 mrg */ 4149 1.10 mrg 4150 1.94 chs if (start < entry->start) { 4151 1.234 thorpej return (false); 4152 1.94 chs } 4153 1.10 mrg 4154 1.10 mrg /* 4155 1.10 mrg * check protection associated with entry 4156 1.10 mrg */ 4157 1.1 mrg 4158 1.94 chs if ((entry->protection & protection) != protection) { 4159 1.234 thorpej return (false); 4160 1.94 chs } 4161 1.94 chs start = entry->end; 4162 1.94 chs entry = entry->next; 4163 1.94 chs } 4164 1.234 thorpej return (true); 4165 1.1 mrg } 4166 1.1 mrg 4167 1.1 mrg /* 4168 1.1 mrg * uvmspace_alloc: allocate a vmspace structure. 4169 1.1 mrg * 4170 1.1 mrg * - structure includes vm_map and pmap 4171 1.1 mrg * - XXX: no locking on this structure 4172 1.1 mrg * - refcnt set to 1, rest must be init'd by caller 4173 1.1 mrg */ 4174 1.10 mrg struct vmspace * 4175 1.327 martin uvmspace_alloc(vaddr_t vmin, vaddr_t vmax, bool topdown) 4176 1.10 mrg { 4177 1.10 mrg struct vmspace *vm; 4178 1.385 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 4179 1.10 mrg 4180 1.410 ad vm = kmem_alloc(sizeof(*vm), KM_SLEEP); 4181 1.327 martin uvmspace_init(vm, NULL, vmin, vmax, topdown); 4182 1.353 pgoyette UVMHIST_LOG(maphist,"<- done (vm=%#jx)", (uintptr_t)vm, 0, 0, 0); 4183 1.15 thorpej return (vm); 4184 1.15 thorpej } 4185 1.15 thorpej 4186 1.15 thorpej /* 4187 1.15 thorpej * uvmspace_init: initialize a vmspace structure. 4188 1.15 thorpej * 4189 1.15 thorpej * - XXX: no locking on this structure 4190 1.132 matt * - refcnt set to 1, rest must be init'd by caller 4191 1.15 thorpej */ 4192 1.15 thorpej void 4193 1.327 martin uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, 4194 1.327 martin vaddr_t vmax, bool topdown) 4195 1.15 thorpej { 4196 1.385 skrll UVMHIST_FUNC(__func__); 4197 1.385 skrll UVMHIST_CALLARGS(maphist, "(vm=%#jx, pmap=%#jx, vmin=%#jx, vmax=%#jx", 4198 1.353 pgoyette (uintptr_t)vm, (uintptr_t)pmap, vmin, vmax); 4199 1.353 pgoyette UVMHIST_LOG(maphist, " topdown=%ju)", topdown, 0, 0, 0); 4200 1.334 matt 4201 1.23 perry memset(vm, 0, sizeof(*vm)); 4202 1.199 christos uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE 4203 1.327 martin | (topdown ? VM_MAP_TOPDOWN : 0) 4204 1.131 atatat ); 4205 1.15 thorpej if (pmap) 4206 1.15 thorpej pmap_reference(pmap); 4207 1.15 thorpej else 4208 1.15 thorpej pmap = pmap_create(); 4209 1.15 thorpej vm->vm_map.pmap = pmap; 4210 1.10 mrg vm->vm_refcnt = 1; 4211 1.15 thorpej UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4212 1.1 mrg } 4213 1.1 mrg 4214 1.1 mrg /* 4215 1.168 junyoung * uvmspace_share: share a vmspace between two processes 4216 1.1 mrg * 4217 1.1 mrg * - used for vfork, threads(?) 4218 1.1 mrg */ 4219 1.1 mrg 4220 1.10 mrg void 4221 1.138 enami uvmspace_share(struct proc *p1, struct proc *p2) 4222 1.1 mrg { 4223 1.139 enami 4224 1.215 yamt uvmspace_addref(p1->p_vmspace); 4225 1.10 mrg p2->p_vmspace = p1->p_vmspace; 4226 1.1 mrg } 4227 1.1 mrg 4228 1.282 rmind #if 0 4229 1.282 rmind 4230 1.1 mrg /* 4231 1.1 mrg * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace 4232 1.1 mrg * 4233 1.1 mrg * - XXX: no locking on vmspace 4234 1.1 mrg */ 4235 1.1 mrg 4236 1.10 mrg void 4237 1.138 enami uvmspace_unshare(struct lwp *l) 4238 1.10 mrg { 4239 1.128 thorpej struct proc *p = l->l_proc; 4240 1.10 mrg struct vmspace *nvm, *ovm = p->p_vmspace; 4241 1.85 chs 4242 1.10 mrg if (ovm->vm_refcnt == 1) 4243 1.10 mrg /* nothing to do: vmspace isn't shared in the first place */ 4244 1.10 mrg return; 4245 1.85 chs 4246 1.10 mrg /* make a new vmspace, still holding old one */ 4247 1.10 mrg nvm = uvmspace_fork(ovm); 4248 1.10 mrg 4249 1.254 ad kpreempt_disable(); 4250 1.128 thorpej pmap_deactivate(l); /* unbind old vmspace */ 4251 1.98 chs p->p_vmspace = nvm; 4252 1.128 thorpej pmap_activate(l); /* switch to new vmspace */ 4253 1.254 ad kpreempt_enable(); 4254 1.13 thorpej 4255 1.10 mrg uvmspace_free(ovm); /* drop reference to old vmspace */ 4256 1.1 mrg } 4257 1.1 mrg 4258 1.282 rmind #endif 4259 1.282 rmind 4260 1.1 mrg /* 4261 1.1 mrg * uvmspace_exec: the process wants to exec a new program 4262 1.1 mrg */ 4263 1.1 mrg 4264 1.10 mrg void 4265 1.327 martin uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown) 4266 1.1 mrg { 4267 1.128 thorpej struct proc *p = l->l_proc; 4268 1.10 mrg struct vmspace *nvm, *ovm = p->p_vmspace; 4269 1.302 martin struct vm_map *map; 4270 1.373 ad int flags; 4271 1.1 mrg 4272 1.317 martin KASSERT(ovm != NULL); 4273 1.294 matt #ifdef __HAVE_CPU_VMSPACE_EXEC 4274 1.294 matt cpu_vmspace_exec(l, start, end); 4275 1.294 matt #endif 4276 1.1 mrg 4277 1.302 martin /* 4278 1.427 riastrad * If p is the only process using the vmspace, we can safely 4279 1.427 riastrad * recycle it for the program that is being exec'd, rather than 4280 1.427 riastrad * allocate a new vmspace -- but we have to make sure it's 4281 1.427 riastrad * empty first. 4282 1.10 mrg */ 4283 1.427 riastrad map = &ovm->vm_map; 4284 1.427 riastrad if (ovm->vm_refcnt == 1 && map->nentries != 0) { 4285 1.10 mrg /* 4286 1.10 mrg * SYSV SHM semantics require us to kill all segments on an exec 4287 1.10 mrg */ 4288 1.336 pgoyette if (uvm_shmexit && ovm->vm_shm) 4289 1.336 pgoyette (*uvm_shmexit)(ovm); 4290 1.54 thorpej 4291 1.54 thorpej /* 4292 1.54 thorpej * POSIX 1003.1b -- "lock future mappings" is revoked 4293 1.54 thorpej * when a process execs another program image. 4294 1.54 thorpej */ 4295 1.238 ad map->flags &= ~VM_MAP_WIREFUTURE; 4296 1.10 mrg 4297 1.10 mrg /* 4298 1.378 ad * now unmap the old program. 4299 1.380 riastrad * 4300 1.378 ad * XXX set VM_MAP_DYING for the duration, so pmap_update() 4301 1.378 ad * is not called until the pmap has been totally cleared out 4302 1.378 ad * after pmap_remove_all(), or it can confuse some pmap 4303 1.378 ad * implementations. it would be nice to handle this by 4304 1.378 ad * deferring the pmap_update() while it is known the address 4305 1.378 ad * space is not visible to any user LWP other than curlwp, 4306 1.378 ad * but there isn't an elegant way of inferring that right 4307 1.378 ad * now. 4308 1.10 mrg */ 4309 1.373 ad flags = pmap_remove_all(map->pmap) ? UVM_FLAG_VAONLY : 0; 4310 1.378 ad map->flags |= VM_MAP_DYING; 4311 1.373 ad uvm_unmap1(map, vm_map_min(map), vm_map_max(map), flags); 4312 1.378 ad map->flags &= ~VM_MAP_DYING; 4313 1.378 ad pmap_update(map->pmap); 4314 1.144 yamt KASSERT(map->header.prev == &map->header); 4315 1.144 yamt KASSERT(map->nentries == 0); 4316 1.427 riastrad } 4317 1.93 eeh 4318 1.427 riastrad if (ovm->vm_refcnt == 1) { 4319 1.93 eeh /* 4320 1.427 riastrad * The vmspace is not shared and is empty (if it 4321 1.427 riastrad * weren't, we would have emptied it above). 4322 1.427 riastrad * 4323 1.427 riastrad * Resize the map and set topdown as appropriate. 4324 1.93 eeh */ 4325 1.427 riastrad KASSERT(map->nentries == 0); 4326 1.184 chs vm_map_setmin(map, start); 4327 1.184 chs vm_map_setmax(map, end); 4328 1.427 riastrad if (topdown) { 4329 1.427 riastrad map->flags |= VM_MAP_TOPDOWN; 4330 1.427 riastrad } else { 4331 1.427 riastrad map->flags &= ~VM_MAP_TOPDOWN; 4332 1.427 riastrad } 4333 1.10 mrg } else { 4334 1.10 mrg /* 4335 1.10 mrg * p's vmspace is being shared, so we can't reuse it for p since 4336 1.10 mrg * it is still being used for others. allocate a new vmspace 4337 1.10 mrg * for p 4338 1.10 mrg */ 4339 1.327 martin nvm = uvmspace_alloc(start, end, topdown); 4340 1.1 mrg 4341 1.10 mrg /* 4342 1.10 mrg * install new vmspace and drop our ref to the old one. 4343 1.10 mrg */ 4344 1.254 ad kpreempt_disable(); 4345 1.128 thorpej pmap_deactivate(l); 4346 1.10 mrg p->p_vmspace = nvm; 4347 1.128 thorpej pmap_activate(l); 4348 1.254 ad kpreempt_enable(); 4349 1.13 thorpej 4350 1.10 mrg uvmspace_free(ovm); 4351 1.10 mrg } 4352 1.1 mrg } 4353 1.1 mrg 4354 1.1 mrg /* 4355 1.368 msaitoh * uvmspace_addref: add a reference to a vmspace. 4356 1.215 yamt */ 4357 1.215 yamt 4358 1.215 yamt void 4359 1.215 yamt uvmspace_addref(struct vmspace *vm) 4360 1.215 yamt { 4361 1.215 yamt 4362 1.371 ad KASSERT((vm->vm_map.flags & VM_MAP_DYING) == 0); 4363 1.215 yamt KASSERT(vm->vm_refcnt > 0); 4364 1.371 ad atomic_inc_uint(&vm->vm_refcnt); 4365 1.215 yamt } 4366 1.215 yamt 4367 1.215 yamt /* 4368 1.1 mrg * uvmspace_free: free a vmspace data structure 4369 1.1 mrg */ 4370 1.1 mrg 4371 1.10 mrg void 4372 1.138 enami uvmspace_free(struct vmspace *vm) 4373 1.1 mrg { 4374 1.99 chs struct vm_map_entry *dead_entries; 4375 1.171 pk struct vm_map *map = &vm->vm_map; 4376 1.373 ad int flags; 4377 1.172 he 4378 1.385 skrll UVMHIST_FUNC(__func__); 4379 1.385 skrll UVMHIST_CALLARGS(maphist,"(vm=%#jx) ref=%jd", (uintptr_t)vm, 4380 1.385 skrll vm->vm_refcnt, 0, 0); 4381 1.392 riastrad 4382 1.393 riastrad membar_release(); 4383 1.371 ad if (atomic_dec_uint_nv(&vm->vm_refcnt) > 0) 4384 1.120 chs return; 4385 1.393 riastrad membar_acquire(); 4386 1.99 chs 4387 1.120 chs /* 4388 1.120 chs * at this point, there should be no other references to the map. 4389 1.120 chs * delete all of the mappings, then destroy the pmap. 4390 1.120 chs */ 4391 1.99 chs 4392 1.120 chs map->flags |= VM_MAP_DYING; 4393 1.373 ad flags = pmap_remove_all(map->pmap) ? UVM_FLAG_VAONLY : 0; 4394 1.336 pgoyette 4395 1.120 chs /* Get rid of any SYSV shared memory segments. */ 4396 1.336 pgoyette if (uvm_shmexit && vm->vm_shm != NULL) 4397 1.336 pgoyette (*uvm_shmexit)(vm); 4398 1.314 rmind 4399 1.120 chs if (map->nentries) { 4400 1.421 riastrad vm_map_lock(map); 4401 1.184 chs uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map), 4402 1.373 ad &dead_entries, flags); 4403 1.421 riastrad vm_map_unlock(map); 4404 1.120 chs if (dead_entries != NULL) 4405 1.120 chs uvm_unmap_detach(dead_entries, 0); 4406 1.10 mrg } 4407 1.146 yamt KASSERT(map->nentries == 0); 4408 1.146 yamt KASSERT(map->size == 0); 4409 1.314 rmind 4410 1.239 ad mutex_destroy(&map->misc_lock); 4411 1.239 ad rw_destroy(&map->lock); 4412 1.255 ad cv_destroy(&map->cv); 4413 1.120 chs pmap_destroy(map->pmap); 4414 1.410 ad kmem_free(vm, sizeof(*vm)); 4415 1.1 mrg } 4416 1.1 mrg 4417 1.329 christos static struct vm_map_entry * 4418 1.329 christos uvm_mapent_clone(struct vm_map *new_map, struct vm_map_entry *old_entry, 4419 1.329 christos int flags) 4420 1.329 christos { 4421 1.329 christos struct vm_map_entry *new_entry; 4422 1.329 christos 4423 1.329 christos new_entry = uvm_mapent_alloc(new_map, 0); 4424 1.329 christos /* old_entry -> new_entry */ 4425 1.329 christos uvm_mapent_copy(old_entry, new_entry); 4426 1.329 christos 4427 1.329 christos /* new pmap has nothing wired in it */ 4428 1.329 christos new_entry->wired_count = 0; 4429 1.329 christos 4430 1.329 christos /* 4431 1.329 christos * gain reference to object backing the map (can't 4432 1.329 christos * be a submap, already checked this case). 4433 1.329 christos */ 4434 1.329 christos 4435 1.329 christos if (new_entry->aref.ar_amap) 4436 1.329 christos uvm_map_reference_amap(new_entry, flags); 4437 1.329 christos 4438 1.329 christos if (new_entry->object.uvm_obj && 4439 1.329 christos new_entry->object.uvm_obj->pgops->pgo_reference) 4440 1.329 christos new_entry->object.uvm_obj->pgops->pgo_reference( 4441 1.329 christos new_entry->object.uvm_obj); 4442 1.329 christos 4443 1.329 christos /* insert entry at end of new_map's entry list */ 4444 1.329 christos uvm_map_entry_link(new_map, new_map->header.prev, 4445 1.329 christos new_entry); 4446 1.329 christos 4447 1.329 christos return new_entry; 4448 1.329 christos } 4449 1.329 christos 4450 1.329 christos /* 4451 1.329 christos * share the mapping: this means we want the old and 4452 1.329 christos * new entries to share amaps and backing objects. 4453 1.329 christos */ 4454 1.329 christos static void 4455 1.329 christos uvm_mapent_forkshared(struct vm_map *new_map, struct vm_map *old_map, 4456 1.329 christos struct vm_map_entry *old_entry) 4457 1.329 christos { 4458 1.329 christos /* 4459 1.329 christos * if the old_entry needs a new amap (due to prev fork) 4460 1.329 christos * then we need to allocate it now so that we have 4461 1.329 christos * something we own to share with the new_entry. [in 4462 1.329 christos * other words, we need to clear needs_copy] 4463 1.329 christos */ 4464 1.329 christos 4465 1.329 christos if (UVM_ET_ISNEEDSCOPY(old_entry)) { 4466 1.329 christos /* get our own amap, clears needs_copy */ 4467 1.329 christos amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK, 4468 1.329 christos 0, 0); 4469 1.329 christos /* XXXCDC: WAITOK??? */ 4470 1.329 christos } 4471 1.329 christos 4472 1.329 christos uvm_mapent_clone(new_map, old_entry, AMAP_SHARED); 4473 1.329 christos } 4474 1.329 christos 4475 1.329 christos 4476 1.329 christos static void 4477 1.329 christos uvm_mapent_forkcopy(struct vm_map *new_map, struct vm_map *old_map, 4478 1.329 christos struct vm_map_entry *old_entry) 4479 1.329 christos { 4480 1.329 christos struct vm_map_entry *new_entry; 4481 1.329 christos 4482 1.329 christos /* 4483 1.329 christos * copy-on-write the mapping (using mmap's 4484 1.329 christos * MAP_PRIVATE semantics) 4485 1.329 christos * 4486 1.329 christos * allocate new_entry, adjust reference counts. 4487 1.329 christos * (note that new references are read-only). 4488 1.329 christos */ 4489 1.329 christos 4490 1.329 christos new_entry = uvm_mapent_clone(new_map, old_entry, 0); 4491 1.329 christos 4492 1.329 christos new_entry->etype |= 4493 1.329 christos (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4494 1.329 christos 4495 1.329 christos /* 4496 1.329 christos * the new entry will need an amap. it will either 4497 1.329 christos * need to be copied from the old entry or created 4498 1.329 christos * from scratch (if the old entry does not have an 4499 1.329 christos * amap). can we defer this process until later 4500 1.329 christos * (by setting "needs_copy") or do we need to copy 4501 1.329 christos * the amap now? 4502 1.329 christos * 4503 1.329 christos * we must copy the amap now if any of the following 4504 1.329 christos * conditions hold: 4505 1.329 christos * 1. the old entry has an amap and that amap is 4506 1.329 christos * being shared. this means that the old (parent) 4507 1.329 christos * process is sharing the amap with another 4508 1.329 christos * process. if we do not clear needs_copy here 4509 1.329 christos * we will end up in a situation where both the 4510 1.394 andvar * parent and child process are referring to the 4511 1.329 christos * same amap with "needs_copy" set. if the 4512 1.329 christos * parent write-faults, the fault routine will 4513 1.329 christos * clear "needs_copy" in the parent by allocating 4514 1.329 christos * a new amap. this is wrong because the 4515 1.329 christos * parent is supposed to be sharing the old amap 4516 1.329 christos * and the new amap will break that. 4517 1.329 christos * 4518 1.329 christos * 2. if the old entry has an amap and a non-zero 4519 1.329 christos * wire count then we are going to have to call 4520 1.329 christos * amap_cow_now to avoid page faults in the 4521 1.329 christos * parent process. since amap_cow_now requires 4522 1.329 christos * "needs_copy" to be clear we might as well 4523 1.329 christos * clear it here as well. 4524 1.329 christos * 4525 1.329 christos */ 4526 1.329 christos 4527 1.329 christos if (old_entry->aref.ar_amap != NULL) { 4528 1.329 christos if ((amap_flags(old_entry->aref.ar_amap) & AMAP_SHARED) != 0 || 4529 1.329 christos VM_MAPENT_ISWIRED(old_entry)) { 4530 1.329 christos 4531 1.329 christos amap_copy(new_map, new_entry, 4532 1.329 christos AMAP_COPY_NOCHUNK, 0, 0); 4533 1.329 christos /* XXXCDC: M_WAITOK ... ok? */ 4534 1.329 christos } 4535 1.329 christos } 4536 1.329 christos 4537 1.329 christos /* 4538 1.329 christos * if the parent's entry is wired down, then the 4539 1.329 christos * parent process does not want page faults on 4540 1.329 christos * access to that memory. this means that we 4541 1.329 christos * cannot do copy-on-write because we can't write 4542 1.329 christos * protect the old entry. in this case we 4543 1.329 christos * resolve all copy-on-write faults now, using 4544 1.329 christos * amap_cow_now. note that we have already 4545 1.329 christos * allocated any needed amap (above). 4546 1.329 christos */ 4547 1.329 christos 4548 1.329 christos if (VM_MAPENT_ISWIRED(old_entry)) { 4549 1.329 christos 4550 1.329 christos /* 4551 1.329 christos * resolve all copy-on-write faults now 4552 1.329 christos * (note that there is nothing to do if 4553 1.329 christos * the old mapping does not have an amap). 4554 1.329 christos */ 4555 1.329 christos if (old_entry->aref.ar_amap) 4556 1.329 christos amap_cow_now(new_map, new_entry); 4557 1.329 christos 4558 1.329 christos } else { 4559 1.329 christos /* 4560 1.329 christos * setup mappings to trigger copy-on-write faults 4561 1.329 christos * we must write-protect the parent if it has 4562 1.329 christos * an amap and it is not already "needs_copy"... 4563 1.329 christos * if it is already "needs_copy" then the parent 4564 1.329 christos * has already been write-protected by a previous 4565 1.329 christos * fork operation. 4566 1.329 christos */ 4567 1.329 christos if (old_entry->aref.ar_amap && 4568 1.329 christos !UVM_ET_ISNEEDSCOPY(old_entry)) { 4569 1.329 christos if (old_entry->max_protection & VM_PROT_WRITE) { 4570 1.376 ad #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ 4571 1.372 ad uvm_map_lock_entry(old_entry, RW_WRITER); 4572 1.376 ad #else 4573 1.376 ad uvm_map_lock_entry(old_entry, RW_READER); 4574 1.376 ad #endif 4575 1.329 christos pmap_protect(old_map->pmap, 4576 1.329 christos old_entry->start, old_entry->end, 4577 1.329 christos old_entry->protection & ~VM_PROT_WRITE); 4578 1.362 mlelstv uvm_map_unlock_entry(old_entry); 4579 1.329 christos } 4580 1.329 christos old_entry->etype |= UVM_ET_NEEDSCOPY; 4581 1.329 christos } 4582 1.329 christos } 4583 1.329 christos } 4584 1.329 christos 4585 1.1 mrg /* 4586 1.330 christos * zero the mapping: the new entry will be zero initialized 4587 1.330 christos */ 4588 1.330 christos static void 4589 1.330 christos uvm_mapent_forkzero(struct vm_map *new_map, struct vm_map *old_map, 4590 1.330 christos struct vm_map_entry *old_entry) 4591 1.330 christos { 4592 1.330 christos struct vm_map_entry *new_entry; 4593 1.330 christos 4594 1.330 christos new_entry = uvm_mapent_clone(new_map, old_entry, 0); 4595 1.330 christos 4596 1.330 christos new_entry->etype |= 4597 1.330 christos (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4598 1.330 christos 4599 1.330 christos if (new_entry->aref.ar_amap) { 4600 1.330 christos uvm_map_unreference_amap(new_entry, 0); 4601 1.330 christos new_entry->aref.ar_pageoff = 0; 4602 1.330 christos new_entry->aref.ar_amap = NULL; 4603 1.330 christos } 4604 1.330 christos 4605 1.330 christos if (UVM_ET_ISOBJ(new_entry)) { 4606 1.330 christos if (new_entry->object.uvm_obj->pgops->pgo_detach) 4607 1.330 christos new_entry->object.uvm_obj->pgops->pgo_detach( 4608 1.330 christos new_entry->object.uvm_obj); 4609 1.330 christos new_entry->object.uvm_obj = NULL; 4610 1.390 chs new_entry->offset = 0; 4611 1.330 christos new_entry->etype &= ~UVM_ET_OBJ; 4612 1.330 christos } 4613 1.330 christos } 4614 1.330 christos 4615 1.330 christos /* 4616 1.1 mrg * F O R K - m a i n e n t r y p o i n t 4617 1.1 mrg */ 4618 1.1 mrg /* 4619 1.1 mrg * uvmspace_fork: fork a process' main map 4620 1.1 mrg * 4621 1.1 mrg * => create a new vmspace for child process from parent. 4622 1.1 mrg * => parent's map must not be locked. 4623 1.1 mrg */ 4624 1.1 mrg 4625 1.10 mrg struct vmspace * 4626 1.138 enami uvmspace_fork(struct vmspace *vm1) 4627 1.10 mrg { 4628 1.10 mrg struct vmspace *vm2; 4629 1.99 chs struct vm_map *old_map = &vm1->vm_map; 4630 1.99 chs struct vm_map *new_map; 4631 1.99 chs struct vm_map_entry *old_entry; 4632 1.385 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 4633 1.1 mrg 4634 1.10 mrg vm_map_lock(old_map); 4635 1.1 mrg 4636 1.327 martin vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map), 4637 1.327 martin vm1->vm_map.flags & VM_MAP_TOPDOWN); 4638 1.23 perry memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4639 1.235 christos (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy); 4640 1.10 mrg new_map = &vm2->vm_map; /* XXX */ 4641 1.10 mrg 4642 1.10 mrg old_entry = old_map->header.next; 4643 1.162 pooka new_map->size = old_map->size; 4644 1.10 mrg 4645 1.10 mrg /* 4646 1.10 mrg * go entry-by-entry 4647 1.10 mrg */ 4648 1.1 mrg 4649 1.10 mrg while (old_entry != &old_map->header) { 4650 1.1 mrg 4651 1.10 mrg /* 4652 1.10 mrg * first, some sanity checks on the old entry 4653 1.10 mrg */ 4654 1.99 chs 4655 1.94 chs KASSERT(!UVM_ET_ISSUBMAP(old_entry)); 4656 1.94 chs KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || 4657 1.94 chs !UVM_ET_ISNEEDSCOPY(old_entry)); 4658 1.1 mrg 4659 1.10 mrg switch (old_entry->inheritance) { 4660 1.80 wiz case MAP_INHERIT_NONE: 4661 1.10 mrg /* 4662 1.162 pooka * drop the mapping, modify size 4663 1.10 mrg */ 4664 1.162 pooka new_map->size -= old_entry->end - old_entry->start; 4665 1.10 mrg break; 4666 1.10 mrg 4667 1.80 wiz case MAP_INHERIT_SHARE: 4668 1.329 christos uvm_mapent_forkshared(new_map, old_map, old_entry); 4669 1.10 mrg break; 4670 1.10 mrg 4671 1.80 wiz case MAP_INHERIT_COPY: 4672 1.329 christos uvm_mapent_forkcopy(new_map, old_map, old_entry); 4673 1.329 christos break; 4674 1.10 mrg 4675 1.330 christos case MAP_INHERIT_ZERO: 4676 1.330 christos uvm_mapent_forkzero(new_map, old_map, old_entry); 4677 1.330 christos break; 4678 1.329 christos default: 4679 1.329 christos KASSERT(0); 4680 1.10 mrg break; 4681 1.329 christos } 4682 1.10 mrg old_entry = old_entry->next; 4683 1.1 mrg } 4684 1.1 mrg 4685 1.268 ad pmap_update(old_map->pmap); 4686 1.98 chs vm_map_unlock(old_map); 4687 1.1 mrg 4688 1.336 pgoyette if (uvm_shmfork && vm1->vm_shm) 4689 1.336 pgoyette (*uvm_shmfork)(vm1, vm2); 4690 1.39 thorpej 4691 1.39 thorpej #ifdef PMAP_FORK 4692 1.39 thorpej pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 4693 1.1 mrg #endif 4694 1.1 mrg 4695 1.10 mrg UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4696 1.139 enami return (vm2); 4697 1.1 mrg } 4698 1.1 mrg 4699 1.1 mrg 4700 1.174 yamt /* 4701 1.194 yamt * uvm_mapent_trymerge: try to merge an entry with its neighbors. 4702 1.194 yamt * 4703 1.194 yamt * => called with map locked. 4704 1.194 yamt * => return non zero if successfully merged. 4705 1.194 yamt */ 4706 1.194 yamt 4707 1.194 yamt int 4708 1.194 yamt uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags) 4709 1.194 yamt { 4710 1.194 yamt struct uvm_object *uobj; 4711 1.194 yamt struct vm_map_entry *next; 4712 1.194 yamt struct vm_map_entry *prev; 4713 1.195 yamt vsize_t size; 4714 1.194 yamt int merged = 0; 4715 1.233 thorpej bool copying; 4716 1.194 yamt int newetype; 4717 1.194 yamt 4718 1.194 yamt if (entry->aref.ar_amap != NULL) { 4719 1.194 yamt return 0; 4720 1.194 yamt } 4721 1.194 yamt if ((entry->flags & UVM_MAP_NOMERGE) != 0) { 4722 1.194 yamt return 0; 4723 1.194 yamt } 4724 1.194 yamt 4725 1.194 yamt uobj = entry->object.uvm_obj; 4726 1.195 yamt size = entry->end - entry->start; 4727 1.194 yamt copying = (flags & UVM_MERGE_COPYING) != 0; 4728 1.194 yamt newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype; 4729 1.194 yamt 4730 1.194 yamt next = entry->next; 4731 1.194 yamt if (next != &map->header && 4732 1.194 yamt next->start == entry->end && 4733 1.194 yamt ((copying && next->aref.ar_amap != NULL && 4734 1.194 yamt amap_refs(next->aref.ar_amap) == 1) || 4735 1.194 yamt (!copying && next->aref.ar_amap == NULL)) && 4736 1.194 yamt UVM_ET_ISCOMPATIBLE(next, newetype, 4737 1.194 yamt uobj, entry->flags, entry->protection, 4738 1.194 yamt entry->max_protection, entry->inheritance, entry->advice, 4739 1.195 yamt entry->wired_count) && 4740 1.195 yamt (uobj == NULL || entry->offset + size == next->offset)) { 4741 1.194 yamt int error; 4742 1.194 yamt 4743 1.194 yamt if (copying) { 4744 1.195 yamt error = amap_extend(next, size, 4745 1.194 yamt AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS); 4746 1.194 yamt } else { 4747 1.194 yamt error = 0; 4748 1.194 yamt } 4749 1.194 yamt if (error == 0) { 4750 1.197 yamt if (uobj) { 4751 1.197 yamt if (uobj->pgops->pgo_detach) { 4752 1.197 yamt uobj->pgops->pgo_detach(uobj); 4753 1.197 yamt } 4754 1.194 yamt } 4755 1.194 yamt 4756 1.194 yamt entry->end = next->end; 4757 1.221 yamt clear_hints(map, next); 4758 1.194 yamt uvm_map_entry_unlink(map, next); 4759 1.194 yamt if (copying) { 4760 1.194 yamt entry->aref = next->aref; 4761 1.194 yamt entry->etype &= ~UVM_ET_NEEDSCOPY; 4762 1.194 yamt } 4763 1.222 yamt uvm_map_check(map, "trymerge forwardmerge"); 4764 1.311 para uvm_mapent_free(next); 4765 1.194 yamt merged++; 4766 1.194 yamt } 4767 1.194 yamt } 4768 1.194 yamt 4769 1.194 yamt prev = entry->prev; 4770 1.194 yamt if (prev != &map->header && 4771 1.194 yamt prev->end == entry->start && 4772 1.194 yamt ((copying && !merged && prev->aref.ar_amap != NULL && 4773 1.194 yamt amap_refs(prev->aref.ar_amap) == 1) || 4774 1.194 yamt (!copying && prev->aref.ar_amap == NULL)) && 4775 1.194 yamt UVM_ET_ISCOMPATIBLE(prev, newetype, 4776 1.194 yamt uobj, entry->flags, entry->protection, 4777 1.194 yamt entry->max_protection, entry->inheritance, entry->advice, 4778 1.195 yamt entry->wired_count) && 4779 1.196 yamt (uobj == NULL || 4780 1.196 yamt prev->offset + prev->end - prev->start == entry->offset)) { 4781 1.194 yamt int error; 4782 1.194 yamt 4783 1.194 yamt if (copying) { 4784 1.195 yamt error = amap_extend(prev, size, 4785 1.194 yamt AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS); 4786 1.194 yamt } else { 4787 1.194 yamt error = 0; 4788 1.194 yamt } 4789 1.194 yamt if (error == 0) { 4790 1.197 yamt if (uobj) { 4791 1.197 yamt if (uobj->pgops->pgo_detach) { 4792 1.197 yamt uobj->pgops->pgo_detach(uobj); 4793 1.197 yamt } 4794 1.197 yamt entry->offset = prev->offset; 4795 1.194 yamt } 4796 1.194 yamt 4797 1.194 yamt entry->start = prev->start; 4798 1.221 yamt clear_hints(map, prev); 4799 1.194 yamt uvm_map_entry_unlink(map, prev); 4800 1.194 yamt if (copying) { 4801 1.194 yamt entry->aref = prev->aref; 4802 1.194 yamt entry->etype &= ~UVM_ET_NEEDSCOPY; 4803 1.194 yamt } 4804 1.222 yamt uvm_map_check(map, "trymerge backmerge"); 4805 1.311 para uvm_mapent_free(prev); 4806 1.194 yamt merged++; 4807 1.194 yamt } 4808 1.194 yamt } 4809 1.194 yamt 4810 1.194 yamt return merged; 4811 1.194 yamt } 4812 1.194 yamt 4813 1.211 yamt /* 4814 1.211 yamt * uvm_map_setup: init map 4815 1.211 yamt * 4816 1.211 yamt * => map must not be in service yet. 4817 1.211 yamt */ 4818 1.211 yamt 4819 1.211 yamt void 4820 1.211 yamt uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) 4821 1.211 yamt { 4822 1.211 yamt 4823 1.263 matt rb_tree_init(&map->rb_tree, &uvm_map_tree_ops); 4824 1.211 yamt map->header.next = map->header.prev = &map->header; 4825 1.211 yamt map->nentries = 0; 4826 1.211 yamt map->size = 0; 4827 1.211 yamt map->ref_count = 1; 4828 1.211 yamt vm_map_setmin(map, vmin); 4829 1.211 yamt vm_map_setmax(map, vmax); 4830 1.211 yamt map->flags = flags; 4831 1.211 yamt map->first_free = &map->header; 4832 1.211 yamt map->hint = &map->header; 4833 1.211 yamt map->timestamp = 0; 4834 1.238 ad map->busy = NULL; 4835 1.238 ad 4836 1.240 ad rw_init(&map->lock); 4837 1.238 ad cv_init(&map->cv, "vm_map"); 4838 1.314 rmind mutex_init(&map->misc_lock, MUTEX_DRIVER, IPL_NONE); 4839 1.211 yamt } 4840 1.211 yamt 4841 1.211 yamt /* 4842 1.211 yamt * U N M A P - m a i n e n t r y p o i n t 4843 1.211 yamt */ 4844 1.211 yamt 4845 1.211 yamt /* 4846 1.211 yamt * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop") 4847 1.211 yamt * 4848 1.211 yamt * => caller must check alignment and size 4849 1.211 yamt * => map must be unlocked (we will lock it) 4850 1.211 yamt * => flags is UVM_FLAG_QUANTUM or 0. 4851 1.211 yamt */ 4852 1.211 yamt 4853 1.211 yamt void 4854 1.211 yamt uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4855 1.211 yamt { 4856 1.211 yamt struct vm_map_entry *dead_entries; 4857 1.385 skrll UVMHIST_FUNC(__func__); 4858 1.385 skrll UVMHIST_CALLARGS(maphist, " (map=%#jx, start=%#jx, end=%#jx)", 4859 1.385 skrll (uintptr_t)map, start, end, 0); 4860 1.211 yamt 4861 1.364 mrg KASSERTMSG(start < end, 4862 1.364 mrg "%s: map %p: start %#jx < end %#jx", __func__, map, 4863 1.364 mrg (uintmax_t)start, (uintmax_t)end); 4864 1.246 xtraeme if (map == kernel_map) { 4865 1.244 yamt LOCKDEBUG_MEM_CHECK((void *)start, end - start); 4866 1.246 xtraeme } 4867 1.351 chs 4868 1.211 yamt /* 4869 1.211 yamt * work now done by helper functions. wipe the pmap's and then 4870 1.211 yamt * detach from the dead entries... 4871 1.211 yamt */ 4872 1.211 yamt vm_map_lock(map); 4873 1.311 para uvm_unmap_remove(map, start, end, &dead_entries, flags); 4874 1.211 yamt vm_map_unlock(map); 4875 1.211 yamt 4876 1.211 yamt if (dead_entries != NULL) 4877 1.211 yamt uvm_unmap_detach(dead_entries, 0); 4878 1.211 yamt 4879 1.211 yamt UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 4880 1.211 yamt } 4881 1.211 yamt 4882 1.211 yamt 4883 1.211 yamt /* 4884 1.211 yamt * uvm_map_reference: add reference to a map 4885 1.211 yamt * 4886 1.371 ad * => map need not be locked 4887 1.211 yamt */ 4888 1.211 yamt 4889 1.211 yamt void 4890 1.211 yamt uvm_map_reference(struct vm_map *map) 4891 1.211 yamt { 4892 1.371 ad 4893 1.371 ad atomic_inc_uint(&map->ref_count); 4894 1.211 yamt } 4895 1.211 yamt 4896 1.298 rmind void 4897 1.372 ad uvm_map_lock_entry(struct vm_map_entry *entry, krw_t op) 4898 1.298 rmind { 4899 1.298 rmind 4900 1.299 rmind if (entry->aref.ar_amap != NULL) { 4901 1.372 ad amap_lock(entry->aref.ar_amap, op); 4902 1.299 rmind } 4903 1.298 rmind if (UVM_ET_ISOBJ(entry)) { 4904 1.372 ad rw_enter(entry->object.uvm_obj->vmobjlock, op); 4905 1.298 rmind } 4906 1.298 rmind } 4907 1.298 rmind 4908 1.298 rmind void 4909 1.298 rmind uvm_map_unlock_entry(struct vm_map_entry *entry) 4910 1.298 rmind { 4911 1.298 rmind 4912 1.299 rmind if (UVM_ET_ISOBJ(entry)) { 4913 1.372 ad rw_exit(entry->object.uvm_obj->vmobjlock); 4914 1.299 rmind } 4915 1.298 rmind if (entry->aref.ar_amap != NULL) { 4916 1.298 rmind amap_unlock(entry->aref.ar_amap); 4917 1.298 rmind } 4918 1.298 rmind } 4919 1.298 rmind 4920 1.383 thorpej #define UVM_VOADDR_TYPE_MASK 0x3UL 4921 1.383 thorpej #define UVM_VOADDR_TYPE_UOBJ 0x1UL 4922 1.383 thorpej #define UVM_VOADDR_TYPE_ANON 0x2UL 4923 1.383 thorpej #define UVM_VOADDR_OBJECT_MASK ~UVM_VOADDR_TYPE_MASK 4924 1.383 thorpej 4925 1.383 thorpej #define UVM_VOADDR_GET_TYPE(voa) \ 4926 1.383 thorpej ((voa)->object & UVM_VOADDR_TYPE_MASK) 4927 1.383 thorpej #define UVM_VOADDR_GET_OBJECT(voa) \ 4928 1.383 thorpej ((voa)->object & UVM_VOADDR_OBJECT_MASK) 4929 1.383 thorpej #define UVM_VOADDR_SET_OBJECT(voa, obj, type) \ 4930 1.383 thorpej do { \ 4931 1.383 thorpej KASSERT(((uintptr_t)(obj) & UVM_VOADDR_TYPE_MASK) == 0); \ 4932 1.383 thorpej (voa)->object = ((uintptr_t)(obj)) | (type); \ 4933 1.383 thorpej } while (/*CONSTCOND*/0) 4934 1.383 thorpej 4935 1.383 thorpej #define UVM_VOADDR_GET_UOBJ(voa) \ 4936 1.383 thorpej ((struct uvm_object *)UVM_VOADDR_GET_OBJECT(voa)) 4937 1.383 thorpej #define UVM_VOADDR_SET_UOBJ(voa, uobj) \ 4938 1.383 thorpej UVM_VOADDR_SET_OBJECT(voa, uobj, UVM_VOADDR_TYPE_UOBJ) 4939 1.383 thorpej 4940 1.383 thorpej #define UVM_VOADDR_GET_ANON(voa) \ 4941 1.383 thorpej ((struct vm_anon *)UVM_VOADDR_GET_OBJECT(voa)) 4942 1.383 thorpej #define UVM_VOADDR_SET_ANON(voa, anon) \ 4943 1.383 thorpej UVM_VOADDR_SET_OBJECT(voa, anon, UVM_VOADDR_TYPE_ANON) 4944 1.383 thorpej 4945 1.379 thorpej /* 4946 1.379 thorpej * uvm_voaddr_acquire: returns the virtual object address corresponding 4947 1.379 thorpej * to the specified virtual address. 4948 1.379 thorpej * 4949 1.379 thorpej * => resolves COW so the true page identity is tracked. 4950 1.379 thorpej * 4951 1.379 thorpej * => acquires a reference on the page's owner (uvm_object or vm_anon) 4952 1.379 thorpej */ 4953 1.379 thorpej bool 4954 1.379 thorpej uvm_voaddr_acquire(struct vm_map * const map, vaddr_t const va, 4955 1.379 thorpej struct uvm_voaddr * const voaddr) 4956 1.379 thorpej { 4957 1.379 thorpej struct vm_map_entry *entry; 4958 1.379 thorpej struct vm_anon *anon = NULL; 4959 1.379 thorpej bool result = false; 4960 1.379 thorpej bool exclusive = false; 4961 1.379 thorpej void (*unlock_fn)(struct vm_map *); 4962 1.379 thorpej 4963 1.385 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 4964 1.386 skrll UVMHIST_LOG(maphist,"(map=%#jx,va=%#jx)", (uintptr_t)map, va, 0, 0); 4965 1.379 thorpej 4966 1.379 thorpej const vaddr_t start = trunc_page(va); 4967 1.379 thorpej const vaddr_t end = round_page(va+1); 4968 1.379 thorpej 4969 1.379 thorpej lookup_again: 4970 1.379 thorpej if (__predict_false(exclusive)) { 4971 1.379 thorpej vm_map_lock(map); 4972 1.379 thorpej unlock_fn = vm_map_unlock; 4973 1.379 thorpej } else { 4974 1.379 thorpej vm_map_lock_read(map); 4975 1.379 thorpej unlock_fn = vm_map_unlock_read; 4976 1.379 thorpej } 4977 1.379 thorpej 4978 1.379 thorpej if (__predict_false(!uvm_map_lookup_entry(map, start, &entry))) { 4979 1.379 thorpej unlock_fn(map); 4980 1.379 thorpej UVMHIST_LOG(maphist,"<- done (no entry)",0,0,0,0); 4981 1.379 thorpej return false; 4982 1.379 thorpej } 4983 1.379 thorpej 4984 1.379 thorpej if (__predict_false(entry->protection == VM_PROT_NONE)) { 4985 1.379 thorpej unlock_fn(map); 4986 1.379 thorpej UVMHIST_LOG(maphist,"<- done (PROT_NONE)",0,0,0,0); 4987 1.379 thorpej return false; 4988 1.379 thorpej } 4989 1.379 thorpej 4990 1.379 thorpej /* 4991 1.379 thorpej * We have a fast path for the common case of "no COW resolution 4992 1.379 thorpej * needed" whereby we have taken a read lock on the map and if 4993 1.379 thorpej * we don't encounter any need to create a vm_anon then great! 4994 1.379 thorpej * But if we do, we loop around again, instead taking an exclusive 4995 1.379 thorpej * lock so that we can perform the fault. 4996 1.379 thorpej * 4997 1.379 thorpej * In the event that we have to resolve the fault, we do nearly the 4998 1.379 thorpej * same work as uvm_map_pageable() does: 4999 1.379 thorpej * 5000 1.379 thorpej * 1: holding the write lock, we create any anonymous maps that need 5001 1.379 thorpej * to be created. however, we do NOT need to clip the map entries 5002 1.379 thorpej * in this case. 5003 1.379 thorpej * 5004 1.379 thorpej * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 5005 1.379 thorpej * in the page (assuming the entry is not already wired). this 5006 1.379 thorpej * is done because we need the vm_anon to be present. 5007 1.379 thorpej */ 5008 1.379 thorpej if (__predict_true(!VM_MAPENT_ISWIRED(entry))) { 5009 1.379 thorpej 5010 1.379 thorpej bool need_fault = false; 5011 1.379 thorpej 5012 1.379 thorpej /* 5013 1.379 thorpej * perform the action of vm_map_lookup that need the 5014 1.379 thorpej * write lock on the map: create an anonymous map for 5015 1.379 thorpej * a copy-on-write region, or an anonymous map for 5016 1.379 thorpej * a zero-fill region. 5017 1.379 thorpej */ 5018 1.379 thorpej if (__predict_false(UVM_ET_ISSUBMAP(entry))) { 5019 1.379 thorpej unlock_fn(map); 5020 1.379 thorpej UVMHIST_LOG(maphist,"<- done (submap)",0,0,0,0); 5021 1.379 thorpej return false; 5022 1.379 thorpej } 5023 1.379 thorpej if (__predict_false(UVM_ET_ISNEEDSCOPY(entry) && 5024 1.379 thorpej ((entry->max_protection & VM_PROT_WRITE) || 5025 1.379 thorpej (entry->object.uvm_obj == NULL)))) { 5026 1.379 thorpej if (!exclusive) { 5027 1.379 thorpej /* need to take the slow path */ 5028 1.379 thorpej KASSERT(unlock_fn == vm_map_unlock_read); 5029 1.379 thorpej vm_map_unlock_read(map); 5030 1.379 thorpej exclusive = true; 5031 1.379 thorpej goto lookup_again; 5032 1.379 thorpej } 5033 1.379 thorpej need_fault = true; 5034 1.379 thorpej amap_copy(map, entry, 0, start, end); 5035 1.379 thorpej /* XXXCDC: wait OK? */ 5036 1.379 thorpej } 5037 1.379 thorpej 5038 1.379 thorpej /* 5039 1.379 thorpej * do a quick check to see if the fault has already 5040 1.379 thorpej * been resolved to the upper layer. 5041 1.379 thorpej */ 5042 1.379 thorpej if (__predict_true(entry->aref.ar_amap != NULL && 5043 1.379 thorpej need_fault == false)) { 5044 1.379 thorpej amap_lock(entry->aref.ar_amap, RW_WRITER); 5045 1.379 thorpej anon = amap_lookup(&entry->aref, start - entry->start); 5046 1.379 thorpej if (__predict_true(anon != NULL)) { 5047 1.379 thorpej /* amap unlocked below */ 5048 1.379 thorpej goto found_anon; 5049 1.379 thorpej } 5050 1.379 thorpej amap_unlock(entry->aref.ar_amap); 5051 1.379 thorpej need_fault = true; 5052 1.379 thorpej } 5053 1.379 thorpej 5054 1.379 thorpej /* 5055 1.379 thorpej * we predict this test as false because if we reach 5056 1.379 thorpej * this point, then we are likely dealing with a 5057 1.379 thorpej * shared memory region backed by a uvm_object, in 5058 1.379 thorpej * which case a fault to create the vm_anon is not 5059 1.379 thorpej * necessary. 5060 1.379 thorpej */ 5061 1.379 thorpej if (__predict_false(need_fault)) { 5062 1.379 thorpej if (exclusive) { 5063 1.379 thorpej vm_map_busy(map); 5064 1.379 thorpej vm_map_unlock(map); 5065 1.379 thorpej unlock_fn = vm_map_unbusy; 5066 1.379 thorpej } 5067 1.379 thorpej 5068 1.379 thorpej if (uvm_fault_wire(map, start, end, 5069 1.379 thorpej entry->max_protection, 1)) { 5070 1.379 thorpej /* wiring failed */ 5071 1.379 thorpej unlock_fn(map); 5072 1.379 thorpej UVMHIST_LOG(maphist,"<- done (wire failed)", 5073 1.379 thorpej 0,0,0,0); 5074 1.379 thorpej return false; 5075 1.379 thorpej } 5076 1.379 thorpej 5077 1.379 thorpej /* 5078 1.379 thorpej * now that we have resolved the fault, we can unwire 5079 1.379 thorpej * the page. 5080 1.379 thorpej */ 5081 1.379 thorpej if (exclusive) { 5082 1.379 thorpej vm_map_lock(map); 5083 1.379 thorpej vm_map_unbusy(map); 5084 1.379 thorpej unlock_fn = vm_map_unlock; 5085 1.379 thorpej } 5086 1.379 thorpej 5087 1.379 thorpej uvm_fault_unwire_locked(map, start, end); 5088 1.379 thorpej } 5089 1.379 thorpej } 5090 1.379 thorpej 5091 1.379 thorpej /* check the upper layer */ 5092 1.379 thorpej if (entry->aref.ar_amap) { 5093 1.379 thorpej amap_lock(entry->aref.ar_amap, RW_WRITER); 5094 1.379 thorpej anon = amap_lookup(&entry->aref, start - entry->start); 5095 1.379 thorpej if (anon) { 5096 1.379 thorpej found_anon: KASSERT(anon->an_lock == entry->aref.ar_amap->am_lock); 5097 1.379 thorpej anon->an_ref++; 5098 1.382 thorpej rw_obj_hold(anon->an_lock); 5099 1.379 thorpej KASSERT(anon->an_ref != 0); 5100 1.383 thorpej UVM_VOADDR_SET_ANON(voaddr, anon); 5101 1.379 thorpej voaddr->offset = va & PAGE_MASK; 5102 1.379 thorpej result = true; 5103 1.379 thorpej } 5104 1.379 thorpej amap_unlock(entry->aref.ar_amap); 5105 1.379 thorpej } 5106 1.379 thorpej 5107 1.379 thorpej /* check the lower layer */ 5108 1.379 thorpej if (!result && UVM_ET_ISOBJ(entry)) { 5109 1.379 thorpej struct uvm_object *uobj = entry->object.uvm_obj; 5110 1.379 thorpej 5111 1.379 thorpej KASSERT(uobj != NULL); 5112 1.379 thorpej (*uobj->pgops->pgo_reference)(uobj); 5113 1.383 thorpej UVM_VOADDR_SET_UOBJ(voaddr, uobj); 5114 1.379 thorpej voaddr->offset = entry->offset + (va - entry->start); 5115 1.379 thorpej result = true; 5116 1.379 thorpej } 5117 1.379 thorpej 5118 1.379 thorpej unlock_fn(map); 5119 1.379 thorpej 5120 1.379 thorpej if (result) { 5121 1.379 thorpej UVMHIST_LOG(maphist, 5122 1.386 skrll "<- done OK (type=%jd,owner=%#jx,offset=%#jx)", 5123 1.383 thorpej UVM_VOADDR_GET_TYPE(voaddr), 5124 1.383 thorpej UVM_VOADDR_GET_OBJECT(voaddr), 5125 1.383 thorpej voaddr->offset, 0); 5126 1.379 thorpej } else { 5127 1.379 thorpej UVMHIST_LOG(maphist,"<- done (failed)",0,0,0,0); 5128 1.379 thorpej } 5129 1.379 thorpej 5130 1.379 thorpej return result; 5131 1.379 thorpej } 5132 1.379 thorpej 5133 1.379 thorpej /* 5134 1.379 thorpej * uvm_voaddr_release: release the references held by the 5135 1.379 thorpej * vitual object address. 5136 1.379 thorpej */ 5137 1.379 thorpej void 5138 1.379 thorpej uvm_voaddr_release(struct uvm_voaddr * const voaddr) 5139 1.379 thorpej { 5140 1.379 thorpej 5141 1.383 thorpej switch (UVM_VOADDR_GET_TYPE(voaddr)) { 5142 1.383 thorpej case UVM_VOADDR_TYPE_UOBJ: { 5143 1.383 thorpej struct uvm_object * const uobj = UVM_VOADDR_GET_UOBJ(voaddr); 5144 1.379 thorpej 5145 1.379 thorpej KASSERT(uobj != NULL); 5146 1.379 thorpej KASSERT(uobj->pgops->pgo_detach != NULL); 5147 1.379 thorpej (*uobj->pgops->pgo_detach)(uobj); 5148 1.379 thorpej break; 5149 1.379 thorpej } 5150 1.379 thorpej case UVM_VOADDR_TYPE_ANON: { 5151 1.383 thorpej struct vm_anon * const anon = UVM_VOADDR_GET_ANON(voaddr); 5152 1.382 thorpej krwlock_t *lock; 5153 1.379 thorpej 5154 1.379 thorpej KASSERT(anon != NULL); 5155 1.382 thorpej rw_enter((lock = anon->an_lock), RW_WRITER); 5156 1.379 thorpej KASSERT(anon->an_ref > 0); 5157 1.382 thorpej if (--anon->an_ref == 0) { 5158 1.382 thorpej uvm_anfree(anon); 5159 1.379 thorpej } 5160 1.382 thorpej rw_exit(lock); 5161 1.382 thorpej rw_obj_free(lock); 5162 1.379 thorpej break; 5163 1.379 thorpej } 5164 1.379 thorpej default: 5165 1.379 thorpej panic("uvm_voaddr_release: bad type"); 5166 1.379 thorpej } 5167 1.379 thorpej memset(voaddr, 0, sizeof(*voaddr)); 5168 1.379 thorpej } 5169 1.379 thorpej 5170 1.379 thorpej /* 5171 1.379 thorpej * uvm_voaddr_compare: compare two uvm_voaddr objects. 5172 1.379 thorpej * 5173 1.379 thorpej * => memcmp() semantics 5174 1.379 thorpej */ 5175 1.379 thorpej int 5176 1.379 thorpej uvm_voaddr_compare(const struct uvm_voaddr * const voaddr1, 5177 1.379 thorpej const struct uvm_voaddr * const voaddr2) 5178 1.379 thorpej { 5179 1.383 thorpej const uintptr_t type1 = UVM_VOADDR_GET_TYPE(voaddr1); 5180 1.383 thorpej const uintptr_t type2 = UVM_VOADDR_GET_TYPE(voaddr2); 5181 1.379 thorpej 5182 1.383 thorpej KASSERT(type1 == UVM_VOADDR_TYPE_UOBJ || 5183 1.383 thorpej type1 == UVM_VOADDR_TYPE_ANON); 5184 1.379 thorpej 5185 1.383 thorpej KASSERT(type2 == UVM_VOADDR_TYPE_UOBJ || 5186 1.383 thorpej type2 == UVM_VOADDR_TYPE_ANON); 5187 1.380 riastrad 5188 1.383 thorpej if (type1 < type2) 5189 1.379 thorpej return -1; 5190 1.383 thorpej if (type1 > type2) 5191 1.379 thorpej return 1; 5192 1.380 riastrad 5193 1.383 thorpej const uintptr_t addr1 = UVM_VOADDR_GET_OBJECT(voaddr1); 5194 1.383 thorpej const uintptr_t addr2 = UVM_VOADDR_GET_OBJECT(voaddr2); 5195 1.379 thorpej 5196 1.379 thorpej if (addr1 < addr2) 5197 1.379 thorpej return -1; 5198 1.379 thorpej if (addr1 > addr2) 5199 1.379 thorpej return 1; 5200 1.380 riastrad 5201 1.379 thorpej if (voaddr1->offset < voaddr2->offset) 5202 1.379 thorpej return -1; 5203 1.379 thorpej if (voaddr1->offset > voaddr2->offset) 5204 1.379 thorpej return 1; 5205 1.380 riastrad 5206 1.379 thorpej return 0; 5207 1.379 thorpej } 5208 1.379 thorpej 5209 1.270 pooka #if defined(DDB) || defined(DEBUGPRINT) 5210 1.280 thorpej 5211 1.280 thorpej /* 5212 1.280 thorpej * uvm_map_printit: actually prints the map 5213 1.280 thorpej */ 5214 1.280 thorpej 5215 1.280 thorpej void 5216 1.280 thorpej uvm_map_printit(struct vm_map *map, bool full, 5217 1.280 thorpej void (*pr)(const char *, ...)) 5218 1.280 thorpej { 5219 1.280 thorpej struct vm_map_entry *entry; 5220 1.280 thorpej 5221 1.334 matt (*pr)("MAP %p: [%#lx->%#lx]\n", map, vm_map_min(map), 5222 1.280 thorpej vm_map_max(map)); 5223 1.334 matt (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=%#x\n", 5224 1.280 thorpej map->nentries, map->size, map->ref_count, map->timestamp, 5225 1.280 thorpej map->flags); 5226 1.280 thorpej (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap, 5227 1.280 thorpej pmap_resident_count(map->pmap), pmap_wired_count(map->pmap)); 5228 1.280 thorpej if (!full) 5229 1.280 thorpej return; 5230 1.280 thorpej for (entry = map->header.next; entry != &map->header; 5231 1.280 thorpej entry = entry->next) { 5232 1.334 matt (*pr)(" - %p: %#lx->%#lx: obj=%p/%#llx, amap=%p/%d\n", 5233 1.280 thorpej entry, entry->start, entry->end, entry->object.uvm_obj, 5234 1.280 thorpej (long long)entry->offset, entry->aref.ar_amap, 5235 1.280 thorpej entry->aref.ar_pageoff); 5236 1.280 thorpej (*pr)( 5237 1.280 thorpej "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 5238 1.397 riastrad "wc=%d, adv=%d%s\n", 5239 1.280 thorpej (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 5240 1.280 thorpej (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 5241 1.280 thorpej (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 5242 1.280 thorpej entry->protection, entry->max_protection, 5243 1.397 riastrad entry->inheritance, entry->wired_count, entry->advice, 5244 1.397 riastrad entry == map->first_free ? " (first_free)" : ""); 5245 1.280 thorpej } 5246 1.280 thorpej } 5247 1.280 thorpej 5248 1.247 yamt void 5249 1.247 yamt uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 5250 1.247 yamt { 5251 1.247 yamt struct vm_map *map; 5252 1.247 yamt 5253 1.247 yamt for (map = kernel_map;;) { 5254 1.247 yamt struct vm_map_entry *entry; 5255 1.247 yamt 5256 1.247 yamt if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) { 5257 1.247 yamt break; 5258 1.247 yamt } 5259 1.247 yamt (*pr)("%p is %p+%zu from VMMAP %p\n", 5260 1.247 yamt (void *)addr, (void *)entry->start, 5261 1.247 yamt (size_t)(addr - (uintptr_t)entry->start), map); 5262 1.247 yamt if (!UVM_ET_ISSUBMAP(entry)) { 5263 1.247 yamt break; 5264 1.247 yamt } 5265 1.247 yamt map = entry->object.sub_map; 5266 1.247 yamt } 5267 1.247 yamt } 5268 1.280 thorpej 5269 1.280 thorpej #endif /* DDB || DEBUGPRINT */ 5270 1.288 drochner 5271 1.288 drochner #ifndef __USER_VA0_IS_SAFE 5272 1.288 drochner static int 5273 1.290 drochner sysctl_user_va0_disable(SYSCTLFN_ARGS) 5274 1.288 drochner { 5275 1.288 drochner struct sysctlnode node; 5276 1.288 drochner int t, error; 5277 1.288 drochner 5278 1.288 drochner node = *rnode; 5279 1.288 drochner node.sysctl_data = &t; 5280 1.290 drochner t = user_va0_disable; 5281 1.288 drochner error = sysctl_lookup(SYSCTLFN_CALL(&node)); 5282 1.288 drochner if (error || newp == NULL) 5283 1.288 drochner return (error); 5284 1.288 drochner 5285 1.290 drochner if (!t && user_va0_disable && 5286 1.316 elad kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MAP_VA_ZERO, 0, 5287 1.316 elad NULL, NULL, NULL)) 5288 1.288 drochner return EPERM; 5289 1.288 drochner 5290 1.290 drochner user_va0_disable = !!t; 5291 1.288 drochner return 0; 5292 1.288 drochner } 5293 1.335 christos #endif 5294 1.335 christos 5295 1.335 christos static int 5296 1.335 christos fill_vmentry(struct lwp *l, struct proc *p, struct kinfo_vmentry *kve, 5297 1.335 christos struct vm_map *m, struct vm_map_entry *e) 5298 1.335 christos { 5299 1.335 christos #ifndef _RUMPKERNEL 5300 1.335 christos int error; 5301 1.335 christos 5302 1.335 christos memset(kve, 0, sizeof(*kve)); 5303 1.335 christos KASSERT(e != NULL); 5304 1.335 christos if (UVM_ET_ISOBJ(e)) { 5305 1.335 christos struct uvm_object *uobj = e->object.uvm_obj; 5306 1.335 christos KASSERT(uobj != NULL); 5307 1.335 christos kve->kve_ref_count = uobj->uo_refs; 5308 1.335 christos kve->kve_count = uobj->uo_npages; 5309 1.335 christos if (UVM_OBJ_IS_VNODE(uobj)) { 5310 1.335 christos struct vattr va; 5311 1.335 christos struct vnode *vp = (struct vnode *)uobj; 5312 1.335 christos vn_lock(vp, LK_SHARED | LK_RETRY); 5313 1.335 christos error = VOP_GETATTR(vp, &va, l->l_cred); 5314 1.335 christos VOP_UNLOCK(vp); 5315 1.335 christos kve->kve_type = KVME_TYPE_VNODE; 5316 1.335 christos if (error == 0) { 5317 1.335 christos kve->kve_vn_size = vp->v_size; 5318 1.335 christos kve->kve_vn_type = (int)vp->v_type; 5319 1.335 christos kve->kve_vn_mode = va.va_mode; 5320 1.335 christos kve->kve_vn_rdev = va.va_rdev; 5321 1.335 christos kve->kve_vn_fileid = va.va_fileid; 5322 1.335 christos kve->kve_vn_fsid = va.va_fsid; 5323 1.335 christos error = vnode_to_path(kve->kve_path, 5324 1.335 christos sizeof(kve->kve_path) / 2, vp, l, p); 5325 1.335 christos } 5326 1.335 christos } else if (UVM_OBJ_IS_KERN_OBJECT(uobj)) { 5327 1.335 christos kve->kve_type = KVME_TYPE_KERN; 5328 1.335 christos } else if (UVM_OBJ_IS_DEVICE(uobj)) { 5329 1.335 christos kve->kve_type = KVME_TYPE_DEVICE; 5330 1.335 christos } else if (UVM_OBJ_IS_AOBJ(uobj)) { 5331 1.335 christos kve->kve_type = KVME_TYPE_ANON; 5332 1.335 christos } else { 5333 1.335 christos kve->kve_type = KVME_TYPE_OBJECT; 5334 1.335 christos } 5335 1.335 christos } else if (UVM_ET_ISSUBMAP(e)) { 5336 1.335 christos struct vm_map *map = e->object.sub_map; 5337 1.335 christos KASSERT(map != NULL); 5338 1.335 christos kve->kve_ref_count = map->ref_count; 5339 1.335 christos kve->kve_count = map->nentries; 5340 1.335 christos kve->kve_type = KVME_TYPE_SUBMAP; 5341 1.335 christos } else 5342 1.335 christos kve->kve_type = KVME_TYPE_UNKNOWN; 5343 1.335 christos 5344 1.335 christos kve->kve_start = e->start; 5345 1.335 christos kve->kve_end = e->end; 5346 1.335 christos kve->kve_offset = e->offset; 5347 1.335 christos kve->kve_wired_count = e->wired_count; 5348 1.335 christos kve->kve_inheritance = e->inheritance; 5349 1.363 riastrad kve->kve_attributes = 0; /* unused */ 5350 1.335 christos kve->kve_advice = e->advice; 5351 1.335 christos #define PROT(p) (((p) & VM_PROT_READ) ? KVME_PROT_READ : 0) | \ 5352 1.335 christos (((p) & VM_PROT_WRITE) ? KVME_PROT_WRITE : 0) | \ 5353 1.335 christos (((p) & VM_PROT_EXECUTE) ? KVME_PROT_EXEC : 0) 5354 1.335 christos kve->kve_protection = PROT(e->protection); 5355 1.335 christos kve->kve_max_protection = PROT(e->max_protection); 5356 1.335 christos kve->kve_flags |= (e->etype & UVM_ET_COPYONWRITE) 5357 1.335 christos ? KVME_FLAG_COW : 0; 5358 1.335 christos kve->kve_flags |= (e->etype & UVM_ET_NEEDSCOPY) 5359 1.335 christos ? KVME_FLAG_NEEDS_COPY : 0; 5360 1.335 christos kve->kve_flags |= (m->flags & VM_MAP_TOPDOWN) 5361 1.335 christos ? KVME_FLAG_GROWS_DOWN : KVME_FLAG_GROWS_UP; 5362 1.335 christos kve->kve_flags |= (m->flags & VM_MAP_PAGEABLE) 5363 1.335 christos ? KVME_FLAG_PAGEABLE : 0; 5364 1.335 christos #endif 5365 1.335 christos return 0; 5366 1.335 christos } 5367 1.335 christos 5368 1.335 christos static int 5369 1.335 christos fill_vmentries(struct lwp *l, pid_t pid, u_int elem_size, void *oldp, 5370 1.335 christos size_t *oldlenp) 5371 1.335 christos { 5372 1.335 christos int error; 5373 1.335 christos struct proc *p; 5374 1.338 christos struct kinfo_vmentry *vme; 5375 1.335 christos struct vmspace *vm; 5376 1.335 christos struct vm_map *map; 5377 1.335 christos struct vm_map_entry *entry; 5378 1.335 christos char *dp; 5379 1.338 christos size_t count, vmesize; 5380 1.335 christos 5381 1.343 christos if (elem_size == 0 || elem_size > 2 * sizeof(*vme)) 5382 1.343 christos return EINVAL; 5383 1.343 christos 5384 1.343 christos if (oldp) { 5385 1.357 kamil if (*oldlenp > 10UL * 1024UL * 1024UL) 5386 1.343 christos return E2BIG; 5387 1.343 christos count = *oldlenp / elem_size; 5388 1.343 christos if (count == 0) 5389 1.343 christos return ENOMEM; 5390 1.343 christos vmesize = count * sizeof(*vme); 5391 1.343 christos } else 5392 1.343 christos vmesize = 0; 5393 1.335 christos 5394 1.335 christos if ((error = proc_find_locked(l, &p, pid)) != 0) 5395 1.335 christos return error; 5396 1.335 christos 5397 1.343 christos vme = NULL; 5398 1.343 christos count = 0; 5399 1.343 christos 5400 1.335 christos if ((error = proc_vmspace_getref(p, &vm)) != 0) 5401 1.335 christos goto out; 5402 1.335 christos 5403 1.335 christos map = &vm->vm_map; 5404 1.335 christos vm_map_lock_read(map); 5405 1.335 christos 5406 1.335 christos dp = oldp; 5407 1.338 christos if (oldp) 5408 1.338 christos vme = kmem_alloc(vmesize, KM_SLEEP); 5409 1.335 christos for (entry = map->header.next; entry != &map->header; 5410 1.335 christos entry = entry->next) { 5411 1.352 pgoyette if (oldp && (dp - (char *)oldp) < vmesize) { 5412 1.338 christos error = fill_vmentry(l, p, &vme[count], map, entry); 5413 1.335 christos if (error) 5414 1.338 christos goto out; 5415 1.335 christos dp += elem_size; 5416 1.335 christos } 5417 1.335 christos count++; 5418 1.335 christos } 5419 1.335 christos vm_map_unlock_read(map); 5420 1.335 christos uvmspace_free(vm); 5421 1.338 christos 5422 1.335 christos out: 5423 1.335 christos if (pid != -1) 5424 1.335 christos mutex_exit(p->p_lock); 5425 1.335 christos if (error == 0) { 5426 1.355 riastrad const u_int esize = uimin(sizeof(*vme), elem_size); 5427 1.338 christos dp = oldp; 5428 1.338 christos for (size_t i = 0; i < count; i++) { 5429 1.352 pgoyette if (oldp && (dp - (char *)oldp) < vmesize) { 5430 1.338 christos error = sysctl_copyout(l, &vme[i], dp, esize); 5431 1.338 christos if (error) 5432 1.338 christos break; 5433 1.338 christos dp += elem_size; 5434 1.338 christos } else 5435 1.338 christos break; 5436 1.338 christos } 5437 1.335 christos count *= elem_size; 5438 1.335 christos if (oldp != NULL && *oldlenp < count) 5439 1.335 christos error = ENOSPC; 5440 1.335 christos *oldlenp = count; 5441 1.335 christos } 5442 1.338 christos if (vme) 5443 1.338 christos kmem_free(vme, vmesize); 5444 1.335 christos return error; 5445 1.335 christos } 5446 1.335 christos 5447 1.335 christos static int 5448 1.335 christos sysctl_vmproc(SYSCTLFN_ARGS) 5449 1.335 christos { 5450 1.335 christos int error; 5451 1.335 christos 5452 1.335 christos if (namelen == 1 && name[0] == CTL_QUERY) 5453 1.335 christos return (sysctl_query(SYSCTLFN_CALL(rnode))); 5454 1.335 christos 5455 1.335 christos if (namelen == 0) 5456 1.335 christos return EINVAL; 5457 1.335 christos 5458 1.335 christos switch (name[0]) { 5459 1.335 christos case VM_PROC_MAP: 5460 1.335 christos if (namelen != 3) 5461 1.335 christos return EINVAL; 5462 1.335 christos sysctl_unlock(); 5463 1.343 christos error = fill_vmentries(l, name[1], name[2], oldp, oldlenp); 5464 1.335 christos sysctl_relock(); 5465 1.335 christos return error; 5466 1.335 christos default: 5467 1.335 christos return EINVAL; 5468 1.335 christos } 5469 1.335 christos } 5470 1.288 drochner 5471 1.288 drochner SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup") 5472 1.288 drochner { 5473 1.288 drochner 5474 1.335 christos sysctl_createv(clog, 0, NULL, NULL, 5475 1.335 christos CTLFLAG_PERMANENT, 5476 1.335 christos CTLTYPE_STRUCT, "proc", 5477 1.335 christos SYSCTL_DESCR("Process vm information"), 5478 1.335 christos sysctl_vmproc, 0, NULL, 0, 5479 1.335 christos CTL_VM, VM_PROC, CTL_EOL); 5480 1.335 christos #ifndef __USER_VA0_IS_SAFE 5481 1.288 drochner sysctl_createv(clog, 0, NULL, NULL, 5482 1.288 drochner CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 5483 1.289 drochner CTLTYPE_INT, "user_va0_disable", 5484 1.288 drochner SYSCTL_DESCR("Disable VA 0"), 5485 1.290 drochner sysctl_user_va0_disable, 0, &user_va0_disable, 0, 5486 1.288 drochner CTL_VM, CTL_CREATE, CTL_EOL); 5487 1.335 christos #endif 5488 1.288 drochner } 5489