1 /* $NetBSD: uvm_amap.c,v 1.129 2023/09/10 14:54:34 ad Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /* 29 * uvm_amap.c: amap operations 30 */ 31 32 /* 33 * this file contains functions that perform operations on amaps. see 34 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.129 2023/09/10 14:54:34 ad Exp $"); 39 40 #include "opt_uvmhist.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/kmem.h> 46 #include <sys/pool.h> 47 #include <sys/atomic.h> 48 49 #include <uvm/uvm.h> 50 #include <uvm/uvm_swap.h> 51 52 /* 53 * cache for allocation of vm_map structures. note that in order to 54 * avoid an endless loop, the amap cache's allocator cannot allocate 55 * memory from an amap (it currently goes through the kernel uobj, so 56 * we are ok). 57 */ 58 static struct pool_cache uvm_amap_cache; 59 static kmutex_t amap_list_lock __cacheline_aligned; 60 static LIST_HEAD(, vm_amap) amap_list; 61 62 /* 63 * local functions 64 */ 65 66 static int 67 amap_roundup_slots(int slots) 68 { 69 70 return kmem_roundup_size(slots * sizeof(int)) / sizeof(int); 71 } 72 73 #ifdef UVM_AMAP_PPREF 74 /* 75 * what is ppref? ppref is an _optional_ amap feature which is used 76 * to keep track of reference counts on a per-page basis. it is enabled 77 * when UVM_AMAP_PPREF is defined. 78 * 79 * when enabled, an array of ints is allocated for the pprefs. this 80 * array is allocated only when a partial reference is added to the 81 * map (either by unmapping part of the amap, or gaining a reference 82 * to only a part of an amap). if the allocation of the array fails 83 * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate 84 * that we tried to do ppref's but couldn't alloc the array so just 85 * give up (after all, this is an optional feature!). 86 * 87 * the array is divided into page sized "chunks." for chunks of length 1, 88 * the chunk reference count plus one is stored in that chunk's slot. 89 * for chunks of length > 1 the first slot contains (the reference count 90 * plus one) * -1. [the negative value indicates that the length is 91 * greater than one.] the second slot of the chunk contains the length 92 * of the chunk. here is an example: 93 * 94 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 95 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 96 * <----------><-><----><-------><----><-><-------> 97 * (x = don't care) 98 * 99 * this allows us to allow one int to contain the ref count for the whole 100 * chunk. note that the "plus one" part is needed because a reference 101 * count of zero is neither positive or negative (need a way to tell 102 * if we've got one zero or a bunch of them). 103 * 104 * here are some in-line functions to help us. 105 */ 106 107 /* 108 * pp_getreflen: get the reference and length for a specific offset 109 * 110 * => ppref's amap must be locked 111 */ 112 static inline void 113 pp_getreflen(int *ppref, int offset, int *refp, int *lenp) 114 { 115 116 if (ppref[offset] > 0) { /* chunk size must be 1 */ 117 *refp = ppref[offset] - 1; /* don't forget to adjust */ 118 *lenp = 1; 119 } else { 120 *refp = (ppref[offset] * -1) - 1; 121 *lenp = ppref[offset+1]; 122 } 123 } 124 125 /* 126 * pp_setreflen: set the reference and length for a specific offset 127 * 128 * => ppref's amap must be locked 129 */ 130 static inline void 131 pp_setreflen(int *ppref, int offset, int ref, int len) 132 { 133 if (len == 0) 134 return; 135 if (len == 1) { 136 ppref[offset] = ref + 1; 137 } else { 138 ppref[offset] = (ref + 1) * -1; 139 ppref[offset+1] = len; 140 } 141 } 142 #endif /* UVM_AMAP_PPREF */ 143 144 /* 145 * amap_alloc1: allocate an amap, but do not initialise the overlay. 146 * 147 * => Note: lock is not set. 148 */ 149 static struct vm_amap * 150 amap_alloc1(int slots, int padslots, int flags) 151 { 152 const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0; 153 const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP; 154 struct vm_amap *amap; 155 krwlock_t *newlock, *oldlock; 156 int totalslots; 157 158 amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK); 159 if (amap == NULL) { 160 return NULL; 161 } 162 KASSERT(amap->am_lock != NULL); 163 KASSERT(amap->am_nused == 0); 164 165 /* Try to privatize the lock if currently shared. */ 166 if (rw_obj_refcnt(amap->am_lock) > 1) { 167 newlock = rw_obj_tryalloc(); 168 if (newlock != NULL) { 169 oldlock = amap->am_lock; 170 mutex_enter(&amap_list_lock); 171 amap->am_lock = newlock; 172 mutex_exit(&amap_list_lock); 173 rw_obj_free(oldlock); 174 } 175 } 176 177 totalslots = amap_roundup_slots(slots + padslots); 178 amap->am_ref = 1; 179 amap->am_flags = 0; 180 #ifdef UVM_AMAP_PPREF 181 amap->am_ppref = NULL; 182 #endif 183 amap->am_maxslot = totalslots; 184 amap->am_nslot = slots; 185 186 /* 187 * Note: since allocations are likely big, we expect to reduce the 188 * memory fragmentation by allocating them in separate blocks. 189 */ 190 amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags); 191 if (amap->am_slots == NULL) 192 goto fail1; 193 194 amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags); 195 if (amap->am_bckptr == NULL) 196 goto fail2; 197 198 amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *), 199 kmflags); 200 if (amap->am_anon == NULL) 201 goto fail3; 202 203 return amap; 204 205 fail3: 206 kmem_free(amap->am_bckptr, totalslots * sizeof(int)); 207 fail2: 208 kmem_free(amap->am_slots, totalslots * sizeof(int)); 209 fail1: 210 pool_cache_put(&uvm_amap_cache, amap); 211 212 /* 213 * XXX hack to tell the pagedaemon how many pages we need, 214 * since we can need more than it would normally free. 215 */ 216 if (nowait) { 217 extern u_int uvm_extrapages; 218 atomic_add_int(&uvm_extrapages, 219 ((sizeof(int) * 2 + sizeof(struct vm_anon *)) * 220 totalslots) >> PAGE_SHIFT); 221 } 222 return NULL; 223 } 224 225 /* 226 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 227 * 228 * => caller should ensure sz is a multiple of PAGE_SIZE 229 * => reference count to new amap is set to one 230 * => new amap is returned unlocked 231 */ 232 233 struct vm_amap * 234 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf) 235 { 236 struct vm_amap *amap; 237 int slots, padslots; 238 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 239 240 AMAP_B2SLOT(slots, sz); 241 AMAP_B2SLOT(padslots, padsz); 242 243 amap = amap_alloc1(slots, padslots, waitf); 244 if (amap) { 245 memset(amap->am_anon, 0, 246 amap->am_maxslot * sizeof(struct vm_anon *)); 247 } 248 249 UVMHIST_LOG(maphist,"<- done, amap = %#jx, sz=%jd", (uintptr_t)amap, 250 sz, 0, 0); 251 return(amap); 252 } 253 254 /* 255 * amap_ctor: pool_cache constructor for new amaps 256 * 257 * => carefully synchronize with amap_swap_off() 258 */ 259 static int 260 amap_ctor(void *arg, void *obj, int flags) 261 { 262 struct vm_amap *amap = obj; 263 264 if ((flags & PR_NOWAIT) != 0) { 265 amap->am_lock = rw_obj_tryalloc(); 266 if (amap->am_lock == NULL) { 267 return ENOMEM; 268 } 269 } else { 270 amap->am_lock = rw_obj_alloc(); 271 } 272 amap->am_nused = 0; 273 amap->am_flags = 0; 274 275 mutex_enter(&amap_list_lock); 276 LIST_INSERT_HEAD(&amap_list, amap, am_list); 277 mutex_exit(&amap_list_lock); 278 return 0; 279 } 280 281 /* 282 * amap_ctor: pool_cache destructor for amaps 283 * 284 * => carefully synchronize with amap_swap_off() 285 */ 286 static void 287 amap_dtor(void *arg, void *obj) 288 { 289 struct vm_amap *amap = obj; 290 291 KASSERT(amap->am_nused == 0); 292 293 mutex_enter(&amap_list_lock); 294 LIST_REMOVE(amap, am_list); 295 mutex_exit(&amap_list_lock); 296 rw_obj_free(amap->am_lock); 297 } 298 299 /* 300 * uvm_amap_init: initialize the amap system. 301 */ 302 void 303 uvm_amap_init(void) 304 { 305 306 mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE); 307 308 pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 309 COHERENCY_UNIT, 0, 0, "amappl", NULL, IPL_NONE, 310 amap_ctor, amap_dtor, NULL); 311 } 312 313 /* 314 * amap_free: free an amap 315 * 316 * => the amap must be unlocked 317 * => the amap should have a zero reference count and be empty 318 */ 319 void 320 amap_free(struct vm_amap *amap) 321 { 322 int slots; 323 324 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 325 326 KASSERT(amap->am_ref == 0); 327 KASSERT(amap->am_nused == 0); 328 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); 329 slots = amap->am_maxslot; 330 kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots)); 331 kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr)); 332 kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon)); 333 #ifdef UVM_AMAP_PPREF 334 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 335 kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref)); 336 #endif 337 pool_cache_put(&uvm_amap_cache, amap); 338 UVMHIST_LOG(maphist,"<- done, freed amap = %#jx", (uintptr_t)amap, 339 0, 0, 0); 340 } 341 342 /* 343 * amap_extend: extend the size of an amap (if needed) 344 * 345 * => called from uvm_map when we want to extend an amap to cover 346 * a new mapping (rather than allocate a new one) 347 * => amap should be unlocked (we will lock it) 348 * => to safely extend an amap it should have a reference count of 349 * one (thus it can't be shared) 350 */ 351 int 352 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags) 353 { 354 struct vm_amap *amap = entry->aref.ar_amap; 355 int slotoff = entry->aref.ar_pageoff; 356 int slotmapped, slotadd, slotneed, slotadded, slotalloc; 357 int slotadj, slotarea, slotendoff; 358 int oldnslots; 359 #ifdef UVM_AMAP_PPREF 360 int *newppref, *oldppref; 361 #endif 362 int i, *newsl, *newbck, *oldsl, *oldbck; 363 struct vm_anon **newover, **oldover; 364 const km_flag_t kmflags = 365 (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP; 366 367 UVMHIST_FUNC(__func__); 368 UVMHIST_CALLARGS(maphist, " (entry=%#jx, addsize=%#jx, flags=%#jx)", 369 (uintptr_t)entry, addsize, flags, 0); 370 371 /* 372 * first, determine how many slots we need in the amap. don't 373 * forget that ar_pageoff could be non-zero: this means that 374 * there are some unused slots before us in the amap. 375 */ 376 377 amap_lock(amap, RW_WRITER); 378 KASSERT(amap_refs(amap) == 1); /* amap can't be shared */ 379 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 380 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 381 if (flags & AMAP_EXTEND_FORWARDS) { 382 slotneed = slotoff + slotmapped + slotadd; 383 slotadj = 0; 384 slotarea = 0; 385 } else { 386 slotneed = slotadd + slotmapped; 387 slotadj = slotadd - slotoff; 388 slotarea = amap->am_maxslot - slotmapped; 389 } 390 391 /* 392 * Because this amap only has 1 ref, we know that there is 393 * only one vm_map_entry pointing to it, and the one entry is 394 * using slots between slotoff and slotoff + slotmapped. If 395 * we have been using ppref then we know that only slots in 396 * the one map entry's range can have anons, since ppref 397 * allowed us to free any anons outside that range as other map 398 * entries which used this amap were removed. But without ppref, 399 * we couldn't know which slots were still needed by other map 400 * entries, so we couldn't free any anons as we removed map 401 * entries, and so any slot from 0 to am_nslot can have an 402 * anon. But now that we know there is only one map entry 403 * left and we know its range, we can free up any anons 404 * outside that range. This is necessary because the rest of 405 * this function assumes that there are no anons in the amap 406 * outside of the one map entry's range. 407 */ 408 409 slotendoff = slotoff + slotmapped; 410 if (amap->am_ppref == PPREF_NONE) { 411 amap_wiperange(amap, 0, slotoff); 412 amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff); 413 } 414 for (i = 0; i < slotoff; i++) { 415 KASSERT(amap->am_anon[i] == NULL); 416 } 417 for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) { 418 KASSERT(amap->am_anon[i] == NULL); 419 } 420 421 /* 422 * case 1: we already have enough slots in the map and thus 423 * only need to bump the reference counts on the slots we are 424 * adding. 425 */ 426 427 if (flags & AMAP_EXTEND_FORWARDS) { 428 if (amap->am_nslot >= slotneed) { 429 #ifdef UVM_AMAP_PPREF 430 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 431 amap_pp_adjref(amap, slotoff + slotmapped, 432 slotadd, 1); 433 } 434 #endif 435 amap_unlock(amap); 436 UVMHIST_LOG(maphist, 437 "<- done (case 1f), amap = %#jx, sltneed=%jd", 438 (uintptr_t)amap, slotneed, 0, 0); 439 return 0; 440 } 441 } else { 442 if (slotadj <= 0) { 443 slotoff -= slotadd; 444 entry->aref.ar_pageoff = slotoff; 445 #ifdef UVM_AMAP_PPREF 446 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 447 amap_pp_adjref(amap, slotoff, slotadd, 1); 448 } 449 #endif 450 amap_unlock(amap); 451 UVMHIST_LOG(maphist, 452 "<- done (case 1b), amap = %#jx, sltneed=%jd", 453 (uintptr_t)amap, slotneed, 0, 0); 454 return 0; 455 } 456 } 457 458 /* 459 * case 2: we pre-allocated slots for use and we just need to 460 * bump nslot up to take account for these slots. 461 */ 462 463 if (amap->am_maxslot >= slotneed) { 464 if (flags & AMAP_EXTEND_FORWARDS) { 465 #ifdef UVM_AMAP_PPREF 466 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 467 if ((slotoff + slotmapped) < amap->am_nslot) 468 amap_pp_adjref(amap, 469 slotoff + slotmapped, 470 (amap->am_nslot - 471 (slotoff + slotmapped)), 1); 472 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 473 slotneed - amap->am_nslot); 474 } 475 #endif 476 amap->am_nslot = slotneed; 477 amap_unlock(amap); 478 479 /* 480 * no need to zero am_anon since that was done at 481 * alloc time and we never shrink an allocation. 482 */ 483 484 UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, " 485 "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0); 486 return 0; 487 } else { 488 #ifdef UVM_AMAP_PPREF 489 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 490 /* 491 * Slide up the ref counts on the pages that 492 * are actually in use. 493 */ 494 memmove(amap->am_ppref + slotarea, 495 amap->am_ppref + slotoff, 496 slotmapped * sizeof(int)); 497 /* 498 * Mark the (adjusted) gap at the front as 499 * referenced/not referenced. 500 */ 501 pp_setreflen(amap->am_ppref, 502 0, 0, slotarea - slotadd); 503 pp_setreflen(amap->am_ppref, 504 slotarea - slotadd, 1, slotadd); 505 } 506 #endif 507 508 /* 509 * Slide the anon pointers up and clear out 510 * the space we just made. 511 */ 512 memmove(amap->am_anon + slotarea, 513 amap->am_anon + slotoff, 514 slotmapped * sizeof(struct vm_anon*)); 515 memset(amap->am_anon + slotoff, 0, 516 (slotarea - slotoff) * sizeof(struct vm_anon *)); 517 518 /* 519 * Slide the backpointers up, but don't bother 520 * wiping out the old slots. 521 */ 522 memmove(amap->am_bckptr + slotarea, 523 amap->am_bckptr + slotoff, 524 slotmapped * sizeof(int)); 525 526 /* 527 * Adjust all the useful active slot numbers. 528 */ 529 for (i = 0; i < amap->am_nused; i++) 530 amap->am_slots[i] += (slotarea - slotoff); 531 532 /* 533 * We just filled all the empty space in the 534 * front of the amap by activating a few new 535 * slots. 536 */ 537 amap->am_nslot = amap->am_maxslot; 538 entry->aref.ar_pageoff = slotarea - slotadd; 539 amap_unlock(amap); 540 541 UVMHIST_LOG(maphist,"<- done (case 2b), amap = %#jx, " 542 "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0); 543 return 0; 544 } 545 } 546 547 /* 548 * Case 3: we need to allocate a new amap and copy all the amap 549 * data over from old amap to the new one. Drop the lock before 550 * performing allocation. 551 * 552 * Note: since allocations are likely big, we expect to reduce the 553 * memory fragmentation by allocating them in separate blocks. 554 */ 555 556 amap_unlock(amap); 557 558 if (slotneed >= UVM_AMAP_LARGE) { 559 return E2BIG; 560 } 561 562 slotalloc = amap_roundup_slots(slotneed); 563 #ifdef UVM_AMAP_PPREF 564 newppref = NULL; 565 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 566 /* Will be handled later if fails. */ 567 newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags); 568 } 569 #endif 570 newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags); 571 newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags); 572 newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags); 573 if (newsl == NULL || newbck == NULL || newover == NULL) { 574 #ifdef UVM_AMAP_PPREF 575 if (newppref != NULL) { 576 kmem_free(newppref, slotalloc * sizeof(*newppref)); 577 } 578 #endif 579 if (newsl != NULL) { 580 kmem_free(newsl, slotalloc * sizeof(*newsl)); 581 } 582 if (newbck != NULL) { 583 kmem_free(newbck, slotalloc * sizeof(*newbck)); 584 } 585 if (newover != NULL) { 586 kmem_free(newover, slotalloc * sizeof(*newover)); 587 } 588 return ENOMEM; 589 } 590 amap_lock(amap, RW_WRITER); 591 KASSERT(amap->am_maxslot < slotneed); 592 593 /* 594 * Copy everything over to new allocated areas. 595 */ 596 597 slotadded = slotalloc - amap->am_nslot; 598 if (!(flags & AMAP_EXTEND_FORWARDS)) 599 slotarea = slotalloc - slotmapped; 600 601 /* do am_slots */ 602 oldsl = amap->am_slots; 603 if (flags & AMAP_EXTEND_FORWARDS) 604 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 605 else 606 for (i = 0; i < amap->am_nused; i++) 607 newsl[i] = oldsl[i] + slotarea - slotoff; 608 amap->am_slots = newsl; 609 610 /* do am_anon */ 611 oldover = amap->am_anon; 612 if (flags & AMAP_EXTEND_FORWARDS) { 613 memcpy(newover, oldover, 614 sizeof(struct vm_anon *) * amap->am_nslot); 615 memset(newover + amap->am_nslot, 0, 616 sizeof(struct vm_anon *) * slotadded); 617 } else { 618 memcpy(newover + slotarea, oldover + slotoff, 619 sizeof(struct vm_anon *) * slotmapped); 620 memset(newover, 0, 621 sizeof(struct vm_anon *) * slotarea); 622 } 623 amap->am_anon = newover; 624 625 /* do am_bckptr */ 626 oldbck = amap->am_bckptr; 627 if (flags & AMAP_EXTEND_FORWARDS) 628 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 629 else 630 memcpy(newbck + slotarea, oldbck + slotoff, 631 sizeof(int) * slotmapped); 632 amap->am_bckptr = newbck; 633 634 #ifdef UVM_AMAP_PPREF 635 /* do ppref */ 636 oldppref = amap->am_ppref; 637 if (newppref) { 638 if (flags & AMAP_EXTEND_FORWARDS) { 639 memcpy(newppref, oldppref, 640 sizeof(int) * amap->am_nslot); 641 memset(newppref + amap->am_nslot, 0, 642 sizeof(int) * slotadded); 643 } else { 644 memcpy(newppref + slotarea, oldppref + slotoff, 645 sizeof(int) * slotmapped); 646 } 647 amap->am_ppref = newppref; 648 if ((flags & AMAP_EXTEND_FORWARDS) && 649 (slotoff + slotmapped) < amap->am_nslot) 650 amap_pp_adjref(amap, slotoff + slotmapped, 651 (amap->am_nslot - (slotoff + slotmapped)), 1); 652 if (flags & AMAP_EXTEND_FORWARDS) 653 pp_setreflen(newppref, amap->am_nslot, 1, 654 slotneed - amap->am_nslot); 655 else { 656 pp_setreflen(newppref, 0, 0, 657 slotalloc - slotneed); 658 pp_setreflen(newppref, slotalloc - slotneed, 1, 659 slotneed - slotmapped); 660 } 661 } else { 662 if (amap->am_ppref) 663 amap->am_ppref = PPREF_NONE; 664 } 665 #endif 666 667 /* update master values */ 668 if (flags & AMAP_EXTEND_FORWARDS) 669 amap->am_nslot = slotneed; 670 else { 671 entry->aref.ar_pageoff = slotarea - slotadd; 672 amap->am_nslot = slotalloc; 673 } 674 oldnslots = amap->am_maxslot; 675 amap->am_maxslot = slotalloc; 676 amap_unlock(amap); 677 678 kmem_free(oldsl, oldnslots * sizeof(*oldsl)); 679 kmem_free(oldbck, oldnslots * sizeof(*oldbck)); 680 kmem_free(oldover, oldnslots * sizeof(*oldover)); 681 #ifdef UVM_AMAP_PPREF 682 if (oldppref && oldppref != PPREF_NONE) 683 kmem_free(oldppref, oldnslots * sizeof(*oldppref)); 684 #endif 685 UVMHIST_LOG(maphist,"<- done (case 3), amap = %#jx, slotneed=%jd", 686 (uintptr_t)amap, slotneed, 0, 0); 687 return 0; 688 } 689 690 /* 691 * amap_share_protect: change protection of anons in a shared amap 692 * 693 * for shared amaps, given the current data structure layout, it is 694 * not possible for us to directly locate all maps referencing the 695 * shared anon (to change the protection). in order to protect data 696 * in shared maps we use pmap_page_protect(). [this is useful for IPC 697 * mechanisms like map entry passing that may want to write-protect 698 * all mappings of a shared amap.] we traverse am_anon or am_slots 699 * depending on the current state of the amap. 700 * 701 * => entry's map and amap must be locked by the caller 702 */ 703 void 704 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot) 705 { 706 struct vm_amap *amap = entry->aref.ar_amap; 707 u_int slots, lcv, slot, stop; 708 struct vm_anon *anon; 709 710 KASSERT(rw_write_held(amap->am_lock)); 711 712 AMAP_B2SLOT(slots, (entry->end - entry->start)); 713 stop = entry->aref.ar_pageoff + slots; 714 715 if (slots < amap->am_nused) { 716 /* 717 * Cheaper to traverse am_anon. 718 */ 719 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 720 anon = amap->am_anon[lcv]; 721 if (anon == NULL) { 722 continue; 723 } 724 if (anon->an_page) { 725 pmap_page_protect(anon->an_page, prot); 726 } 727 } 728 return; 729 } 730 731 /* 732 * Cheaper to traverse am_slots. 733 */ 734 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 735 slot = amap->am_slots[lcv]; 736 if (slot < entry->aref.ar_pageoff || slot >= stop) { 737 continue; 738 } 739 anon = amap->am_anon[slot]; 740 if (anon->an_page) { 741 pmap_page_protect(anon->an_page, prot); 742 } 743 } 744 } 745 746 /* 747 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 748 * 749 * => Called from amap_unref(), when reference count drops to zero. 750 * => amap must be locked. 751 */ 752 753 void 754 amap_wipeout(struct vm_amap *amap) 755 { 756 u_int lcv; 757 758 UVMHIST_FUNC(__func__); 759 UVMHIST_CALLARGS(maphist,"(amap=%#jx)", (uintptr_t)amap, 0,0,0); 760 761 KASSERT(rw_write_held(amap->am_lock)); 762 KASSERT(amap->am_ref == 0); 763 764 if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) { 765 /* 766 * Note: amap_swap_off() will call us again. 767 */ 768 amap_unlock(amap); 769 return; 770 } 771 772 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 773 struct vm_anon *anon; 774 u_int slot; 775 776 slot = amap->am_slots[lcv]; 777 anon = amap->am_anon[slot]; 778 KASSERT(anon != NULL); 779 KASSERT(anon->an_ref != 0); 780 781 KASSERT(anon->an_lock == amap->am_lock); 782 UVMHIST_LOG(maphist," processing anon %#jx, ref=%jd", 783 (uintptr_t)anon, anon->an_ref, 0, 0); 784 785 /* 786 * Drop the reference. 787 */ 788 789 if (__predict_true(--anon->an_ref == 0)) { 790 uvm_anfree(anon); 791 } 792 if (__predict_false((lcv & 31) == 31)) { 793 preempt_point(); 794 } 795 } 796 797 /* 798 * Finally, destroy the amap. 799 */ 800 801 amap->am_nused = 0; 802 amap_unlock(amap); 803 amap_free(amap); 804 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0); 805 } 806 807 /* 808 * amap_copy: ensure that a map entry's "needs_copy" flag is false 809 * by copying the amap if necessary. 810 * 811 * => an entry with a null amap pointer will get a new (blank) one. 812 * => the map that the map entry belongs to must be locked by caller. 813 * => the amap currently attached to "entry" (if any) must be unlocked. 814 * => if canchunk is true, then we may clip the entry into a chunk 815 * => "startva" and "endva" are used only if canchunk is true. they are 816 * used to limit chunking (e.g. if you have a large space that you 817 * know you are going to need to allocate amaps for, there is no point 818 * in allowing that to be chunked) 819 */ 820 821 void 822 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags, 823 vaddr_t startva, vaddr_t endva) 824 { 825 const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0; 826 struct vm_amap *amap, *srcamap; 827 u_int slots, lcv; 828 krwlock_t *oldlock; 829 vsize_t len; 830 831 UVMHIST_FUNC(__func__); 832 UVMHIST_CALLARGS(maphist, " (map=%#jx, entry=%#jx, flags=%#jx)", 833 (uintptr_t)map, (uintptr_t)entry, flags, -2); 834 835 KASSERT(map != kernel_map); /* we use nointr pool */ 836 837 srcamap = entry->aref.ar_amap; 838 len = entry->end - entry->start; 839 840 /* 841 * Is there an amap to copy? If not, create one. 842 */ 843 844 if (srcamap == NULL) { 845 const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0; 846 847 /* 848 * Check to see if we have a large amap that we can 849 * chunk. We align startva/endva to chunk-sized 850 * boundaries and then clip to them. 851 */ 852 853 if (canchunk && atop(len) >= UVM_AMAP_LARGE) { 854 vsize_t chunksize; 855 856 /* Convert slots to bytes. */ 857 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 858 startva = (startva / chunksize) * chunksize; 859 endva = roundup(endva, chunksize); 860 UVMHIST_LOG(maphist, 861 " chunk amap ==> clip %#jx->%#jx to %#jx->%#jx", 862 entry->start, entry->end, startva, endva); 863 UVM_MAP_CLIP_START(map, entry, startva); 864 865 /* Watch out for endva wrap-around! */ 866 if (endva >= startva) { 867 UVM_MAP_CLIP_END(map, entry, endva); 868 } 869 } 870 871 if ((flags & AMAP_COPY_NOMERGE) == 0 && 872 uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) { 873 return; 874 } 875 876 UVMHIST_LOG(maphist, "<- done [creating new amap %#jx->%#jx]", 877 entry->start, entry->end, 0, 0); 878 879 /* 880 * Allocate an initialised amap and install it. 881 * Note: we must update the length after clipping. 882 */ 883 len = entry->end - entry->start; 884 entry->aref.ar_pageoff = 0; 885 entry->aref.ar_amap = amap_alloc(len, 0, waitf); 886 if (entry->aref.ar_amap != NULL) { 887 entry->etype &= ~UVM_ET_NEEDSCOPY; 888 } 889 return; 890 } 891 892 /* 893 * First check and see if we are the only map entry referencing 894 * he amap we currently have. If so, then just take it over instead 895 * of copying it. Note that we are reading am_ref without lock held 896 * as the value can only be one if we have the only reference 897 * to the amap (via our locked map). If the value is greater than 898 * one, then allocate amap and re-check the value. 899 */ 900 901 if (srcamap->am_ref == 1) { 902 entry->etype &= ~UVM_ET_NEEDSCOPY; 903 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]", 904 0, 0, 0, 0); 905 return; 906 } 907 908 UVMHIST_LOG(maphist," amap=%#jx, ref=%jd, must copy it", 909 (uintptr_t)srcamap, srcamap->am_ref, 0, 0); 910 911 /* 912 * Allocate a new amap (note: not initialised, etc). 913 */ 914 915 AMAP_B2SLOT(slots, len); 916 amap = amap_alloc1(slots, 0, waitf); 917 if (amap == NULL) { 918 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0); 919 return; 920 } 921 922 /* 923 * Make the new amap share the source amap's lock, and then lock 924 * both. We must do this before we set am_nused != 0, otherwise 925 * amap_swap_off() can become interested in the amap. 926 */ 927 928 oldlock = amap->am_lock; 929 mutex_enter(&amap_list_lock); 930 amap->am_lock = srcamap->am_lock; 931 mutex_exit(&amap_list_lock); 932 rw_obj_hold(amap->am_lock); 933 rw_obj_free(oldlock); 934 935 amap_lock(srcamap, RW_WRITER); 936 937 /* 938 * Re-check the reference count with the lock held. If it has 939 * dropped to one - we can take over the existing map. 940 */ 941 942 if (srcamap->am_ref == 1) { 943 /* Just take over the existing amap. */ 944 entry->etype &= ~UVM_ET_NEEDSCOPY; 945 amap_unlock(srcamap); 946 /* Destroy the new (unused) amap. */ 947 amap->am_ref--; 948 amap_free(amap); 949 return; 950 } 951 952 /* 953 * Copy the slots. Zero the padded part. 954 */ 955 956 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0); 957 for (lcv = 0 ; lcv < slots; lcv++) { 958 amap->am_anon[lcv] = 959 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 960 if (amap->am_anon[lcv] == NULL) 961 continue; 962 KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock); 963 KASSERT(amap->am_anon[lcv]->an_ref > 0); 964 KASSERT(amap->am_nused < amap->am_maxslot); 965 amap->am_anon[lcv]->an_ref++; 966 amap->am_bckptr[lcv] = amap->am_nused; 967 amap->am_slots[amap->am_nused] = lcv; 968 amap->am_nused++; 969 } 970 memset(&amap->am_anon[lcv], 0, 971 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 972 973 /* 974 * Drop our reference to the old amap (srcamap) and unlock. 975 * Since the reference count on srcamap is greater than one, 976 * (we checked above), it cannot drop to zero while it is locked. 977 */ 978 979 srcamap->am_ref--; 980 KASSERT(srcamap->am_ref > 0); 981 982 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) { 983 srcamap->am_flags &= ~AMAP_SHARED; 984 } 985 #ifdef UVM_AMAP_PPREF 986 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 987 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 988 len >> PAGE_SHIFT, -1); 989 } 990 #endif 991 992 amap_unlock(srcamap); 993 994 /* 995 * Install new amap. 996 */ 997 998 entry->aref.ar_pageoff = 0; 999 entry->aref.ar_amap = amap; 1000 entry->etype &= ~UVM_ET_NEEDSCOPY; 1001 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0); 1002 } 1003 1004 /* 1005 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 1006 * 1007 * called during fork(2) when the parent process has a wired map 1008 * entry. in that case we want to avoid write-protecting pages 1009 * in the parent's map (e.g. like what you'd do for a COW page) 1010 * so we resolve the COW here. 1011 * 1012 * => assume parent's entry was wired, thus all pages are resident. 1013 * => assume pages that are loaned out (loan_count) are already mapped 1014 * read-only in all maps, and thus no need for us to worry about them 1015 * => assume both parent and child vm_map's are locked 1016 * => caller passes child's map/entry in to us 1017 * => if we run out of memory we will unlock the amap and sleep _with_ the 1018 * parent and child vm_map's locked(!). we have to do this since 1019 * we are in the middle of a fork(2) and we can't let the parent 1020 * map change until we are done copying all the map entrys. 1021 * => XXXCDC: out of memory should cause fork to fail, but there is 1022 * currently no easy way to do this (needs fix) 1023 */ 1024 1025 void 1026 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry) 1027 { 1028 struct vm_amap *amap = entry->aref.ar_amap; 1029 struct vm_anon *anon, *nanon; 1030 struct vm_page *pg, *npg; 1031 u_int lcv, slot; 1032 1033 /* 1034 * note that if we unlock the amap then we must ReStart the "lcv" for 1035 * loop because some other process could reorder the anon's in the 1036 * am_anon[] array on us while the lock is dropped. 1037 */ 1038 1039 ReStart: 1040 amap_lock(amap, RW_WRITER); 1041 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 1042 slot = amap->am_slots[lcv]; 1043 anon = amap->am_anon[slot]; 1044 KASSERT(anon->an_lock == amap->am_lock); 1045 1046 /* 1047 * If anon has only one reference - we must have already 1048 * copied it. This can happen if we needed to sleep waiting 1049 * for memory in a previous run through this loop. The new 1050 * page might even have been paged out, since is not wired. 1051 */ 1052 1053 if (anon->an_ref == 1) { 1054 KASSERT(anon->an_page != NULL || anon->an_swslot != 0); 1055 continue; 1056 } 1057 1058 /* 1059 * The old page must be resident since the parent is wired. 1060 */ 1061 1062 pg = anon->an_page; 1063 KASSERT(pg != NULL); 1064 KASSERT(pg->wire_count > 0); 1065 1066 /* 1067 * If the page is loaned then it must already be mapped 1068 * read-only and we don't need to copy it. 1069 */ 1070 1071 if (pg->loan_count != 0) { 1072 continue; 1073 } 1074 KASSERT(pg->uanon == anon); 1075 KASSERT(pg->uobject == NULL); 1076 1077 /* 1078 * If the page is busy, then we have to unlock, wait for 1079 * it and then restart. 1080 */ 1081 1082 if (pg->flags & PG_BUSY) { 1083 uvm_pagewait(pg, amap->am_lock, "cownow"); 1084 goto ReStart; 1085 } 1086 1087 /* 1088 * Perform a copy-on-write. 1089 * First - get a new anon and a page. 1090 */ 1091 1092 nanon = uvm_analloc(); 1093 if (nanon) { 1094 nanon->an_lock = amap->am_lock; 1095 npg = uvm_pagealloc(NULL, 0, nanon, 0); 1096 } else { 1097 npg = NULL; 1098 } 1099 if (nanon == NULL || npg == NULL) { 1100 amap_unlock(amap); 1101 if (nanon) { 1102 nanon->an_lock = NULL; 1103 nanon->an_ref--; 1104 KASSERT(nanon->an_ref == 0); 1105 uvm_anfree(nanon); 1106 } 1107 uvm_wait("cownowpage"); 1108 goto ReStart; 1109 } 1110 1111 /* 1112 * Copy the data and replace anon with the new one. 1113 * Also, setup its lock (share the with amap's lock). 1114 */ 1115 1116 uvm_pagecopy(pg, npg); 1117 anon->an_ref--; 1118 KASSERT(anon->an_ref > 0); 1119 amap->am_anon[slot] = nanon; 1120 1121 /* 1122 * Drop PG_BUSY on new page. Since its owner was write 1123 * locked all this time - it cannot be PG_RELEASED or 1124 * waited on. 1125 */ 1126 uvm_pagelock(npg); 1127 uvm_pageactivate(npg); 1128 uvm_pageunlock(npg); 1129 npg->flags &= ~(PG_BUSY|PG_FAKE); 1130 UVM_PAGE_OWN(npg, NULL); 1131 } 1132 amap_unlock(amap); 1133 } 1134 1135 /* 1136 * amap_splitref: split a single reference into two separate references 1137 * 1138 * => called from uvm_map's clip routines 1139 * => origref's map should be locked 1140 * => origref->ar_amap should be unlocked (we will lock) 1141 */ 1142 void 1143 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) 1144 { 1145 struct vm_amap *amap = origref->ar_amap; 1146 u_int leftslots; 1147 1148 KASSERT(splitref->ar_amap == origref->ar_amap); 1149 AMAP_B2SLOT(leftslots, offset); 1150 KASSERT(leftslots != 0); 1151 1152 amap_lock(amap, RW_WRITER); 1153 KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0); 1154 1155 #ifdef UVM_AMAP_PPREF 1156 /* Establish ppref before we add a duplicate reference to the amap. */ 1157 if (amap->am_ppref == NULL) { 1158 amap_pp_establish(amap, origref->ar_pageoff); 1159 } 1160 #endif 1161 /* Note: not a share reference. */ 1162 amap->am_ref++; 1163 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 1164 amap_unlock(amap); 1165 } 1166 1167 #ifdef UVM_AMAP_PPREF 1168 1169 /* 1170 * amap_pp_establish: add a ppref array to an amap, if possible. 1171 * 1172 * => amap should be locked by caller. 1173 */ 1174 void 1175 amap_pp_establish(struct vm_amap *amap, vaddr_t offset) 1176 { 1177 const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref); 1178 1179 KASSERT(rw_write_held(amap->am_lock)); 1180 1181 amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP); 1182 if (amap->am_ppref == NULL) { 1183 /* Failure - just do not use ppref. */ 1184 amap->am_ppref = PPREF_NONE; 1185 return; 1186 } 1187 pp_setreflen(amap->am_ppref, 0, 0, offset); 1188 pp_setreflen(amap->am_ppref, offset, amap->am_ref, 1189 amap->am_nslot - offset); 1190 } 1191 1192 /* 1193 * amap_pp_adjref: adjust reference count to a part of an amap using the 1194 * per-page reference count array. 1195 * 1196 * => caller must check that ppref != PPREF_NONE before calling. 1197 * => map and amap must be locked. 1198 */ 1199 void 1200 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval) 1201 { 1202 int stopslot, *ppref, lcv, prevlcv; 1203 int ref, len, prevref, prevlen; 1204 1205 KASSERT(rw_write_held(amap->am_lock)); 1206 1207 stopslot = curslot + slotlen; 1208 ppref = amap->am_ppref; 1209 prevlcv = 0; 1210 1211 /* 1212 * Advance to the correct place in the array, fragment if needed. 1213 */ 1214 1215 for (lcv = 0 ; lcv < curslot ; lcv += len) { 1216 pp_getreflen(ppref, lcv, &ref, &len); 1217 if (lcv + len > curslot) { /* goes past start? */ 1218 pp_setreflen(ppref, lcv, ref, curslot - lcv); 1219 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 1220 len = curslot - lcv; /* new length of entry @ lcv */ 1221 } 1222 prevlcv = lcv; 1223 } 1224 if (lcv == 0) { 1225 /* 1226 * Ensure that the "prevref == ref" test below always 1227 * fails, since we are starting from the beginning of 1228 * the ppref array; that is, there is no previous chunk. 1229 */ 1230 prevref = -1; 1231 prevlen = 0; 1232 } else { 1233 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 1234 } 1235 1236 /* 1237 * Now adjust reference counts in range. Merge the first 1238 * changed entry with the last unchanged entry if possible. 1239 */ 1240 KASSERT(lcv == curslot); 1241 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 1242 pp_getreflen(ppref, lcv, &ref, &len); 1243 if (lcv + len > stopslot) { /* goes past end? */ 1244 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 1245 pp_setreflen(ppref, stopslot, ref, 1246 len - (stopslot - lcv)); 1247 len = stopslot - lcv; 1248 } 1249 ref += adjval; 1250 KASSERT(ref >= 0); 1251 KASSERT(ref <= amap->am_ref); 1252 if (lcv == prevlcv + prevlen && ref == prevref) { 1253 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 1254 } else { 1255 pp_setreflen(ppref, lcv, ref, len); 1256 } 1257 if (ref == 0) { 1258 amap_wiperange(amap, lcv, len); 1259 } 1260 } 1261 } 1262 1263 /* 1264 * amap_wiperange: wipe out a range of an amap. 1265 * Note: different from amap_wipeout because the amap is kept intact. 1266 * 1267 * => Both map and amap must be locked by caller. 1268 */ 1269 void 1270 amap_wiperange(struct vm_amap *amap, int slotoff, int slots) 1271 { 1272 u_int lcv, stop, slotend; 1273 bool byanon; 1274 1275 KASSERT(rw_write_held(amap->am_lock)); 1276 1277 /* 1278 * We can either traverse the amap by am_anon or by am_slots. 1279 * Determine which way is less expensive. 1280 */ 1281 1282 if (slots < amap->am_nused) { 1283 byanon = true; 1284 lcv = slotoff; 1285 stop = slotoff + slots; 1286 slotend = 0; 1287 } else { 1288 byanon = false; 1289 lcv = 0; 1290 stop = amap->am_nused; 1291 slotend = slotoff + slots; 1292 } 1293 1294 while (lcv < stop) { 1295 struct vm_anon *anon; 1296 u_int curslot, ptr, last; 1297 1298 if (byanon) { 1299 curslot = lcv++; /* lcv advances here */ 1300 if (amap->am_anon[curslot] == NULL) 1301 continue; 1302 } else { 1303 curslot = amap->am_slots[lcv]; 1304 if (curslot < slotoff || curslot >= slotend) { 1305 lcv++; /* lcv advances here */ 1306 continue; 1307 } 1308 stop--; /* drop stop, since anon will be removed */ 1309 } 1310 anon = amap->am_anon[curslot]; 1311 KASSERT(anon->an_lock == amap->am_lock); 1312 1313 /* 1314 * Remove anon from the amap. 1315 */ 1316 1317 amap->am_anon[curslot] = NULL; 1318 ptr = amap->am_bckptr[curslot]; 1319 last = amap->am_nused - 1; 1320 if (ptr != last) { 1321 amap->am_slots[ptr] = amap->am_slots[last]; 1322 amap->am_bckptr[amap->am_slots[ptr]] = ptr; 1323 } 1324 amap->am_nused--; 1325 1326 /* 1327 * Drop its reference count. 1328 */ 1329 1330 KASSERT(anon->an_lock == amap->am_lock); 1331 if (--anon->an_ref == 0) { 1332 uvm_anfree(anon); 1333 } 1334 } 1335 } 1336 1337 #endif 1338 1339 #if defined(VMSWAP) 1340 1341 /* 1342 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots. 1343 * 1344 * => called with swap_syscall_lock held. 1345 * => note that we don't always traverse all anons. 1346 * eg. amaps being wiped out, released anons. 1347 * => return true if failed. 1348 */ 1349 1350 bool 1351 amap_swap_off(int startslot, int endslot) 1352 { 1353 struct vm_amap *am; 1354 struct vm_amap *am_next; 1355 struct vm_amap marker_prev; 1356 struct vm_amap marker_next; 1357 bool rv = false; 1358 1359 #if defined(DIAGNOSTIC) 1360 memset(&marker_prev, 0, sizeof(marker_prev)); 1361 memset(&marker_next, 0, sizeof(marker_next)); 1362 #endif /* defined(DIAGNOSTIC) */ 1363 1364 mutex_enter(&amap_list_lock); 1365 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { 1366 int i; 1367 1368 LIST_INSERT_BEFORE(am, &marker_prev, am_list); 1369 LIST_INSERT_AFTER(am, &marker_next, am_list); 1370 1371 /* amap_list_lock prevents the lock pointer from changing. */ 1372 if (!amap_lock_try(am, RW_WRITER)) { 1373 (void)kpause("amapswpo", false, 1, &amap_list_lock); 1374 am_next = LIST_NEXT(&marker_prev, am_list); 1375 if (am_next == &marker_next) { 1376 am_next = LIST_NEXT(am_next, am_list); 1377 } else { 1378 KASSERT(LIST_NEXT(am_next, am_list) == 1379 &marker_next); 1380 } 1381 LIST_REMOVE(&marker_prev, am_list); 1382 LIST_REMOVE(&marker_next, am_list); 1383 continue; 1384 } 1385 1386 mutex_exit(&amap_list_lock); 1387 1388 /* If am_nused == 0, the amap could be free - careful. */ 1389 for (i = 0; i < am->am_nused; i++) { 1390 int slot; 1391 int swslot; 1392 struct vm_anon *anon; 1393 1394 slot = am->am_slots[i]; 1395 anon = am->am_anon[slot]; 1396 KASSERT(anon->an_lock == am->am_lock); 1397 1398 swslot = anon->an_swslot; 1399 if (swslot < startslot || endslot <= swslot) { 1400 continue; 1401 } 1402 1403 am->am_flags |= AMAP_SWAPOFF; 1404 1405 rv = uvm_anon_pagein(am, anon); 1406 amap_lock(am, RW_WRITER); 1407 1408 am->am_flags &= ~AMAP_SWAPOFF; 1409 if (amap_refs(am) == 0) { 1410 amap_wipeout(am); 1411 am = NULL; 1412 break; 1413 } 1414 if (rv) { 1415 break; 1416 } 1417 i = 0; 1418 } 1419 1420 if (am) { 1421 amap_unlock(am); 1422 } 1423 1424 mutex_enter(&amap_list_lock); 1425 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next || 1426 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) == 1427 &marker_next); 1428 am_next = LIST_NEXT(&marker_next, am_list); 1429 LIST_REMOVE(&marker_prev, am_list); 1430 LIST_REMOVE(&marker_next, am_list); 1431 } 1432 mutex_exit(&amap_list_lock); 1433 1434 return rv; 1435 } 1436 1437 #endif /* defined(VMSWAP) */ 1438 1439 /* 1440 * amap_lookup: look up a page in an amap. 1441 * 1442 * => amap should be locked by caller. 1443 */ 1444 struct vm_anon * 1445 amap_lookup(struct vm_aref *aref, vaddr_t offset) 1446 { 1447 struct vm_amap *amap = aref->ar_amap; 1448 struct vm_anon *an; 1449 u_int slot; 1450 1451 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1452 KASSERT(rw_lock_held(amap->am_lock)); 1453 1454 AMAP_B2SLOT(slot, offset); 1455 slot += aref->ar_pageoff; 1456 an = amap->am_anon[slot]; 1457 1458 UVMHIST_LOG(maphist, 1459 "<- done (amap=%#jx, offset=%#jx, result=%#jx)", 1460 (uintptr_t)amap, offset, (uintptr_t)an, 0); 1461 1462 KASSERT(slot < amap->am_nslot); 1463 KASSERT(an == NULL || an->an_ref != 0); 1464 KASSERT(an == NULL || an->an_lock == amap->am_lock); 1465 return an; 1466 } 1467 1468 /* 1469 * amap_lookups: look up a range of pages in an amap. 1470 * 1471 * => amap should be locked by caller. 1472 */ 1473 void 1474 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons, 1475 int npages) 1476 { 1477 struct vm_amap *amap = aref->ar_amap; 1478 u_int slot; 1479 1480 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1481 KASSERT(rw_lock_held(amap->am_lock)); 1482 1483 AMAP_B2SLOT(slot, offset); 1484 slot += aref->ar_pageoff; 1485 1486 UVMHIST_LOG(maphist, " slot=%u, npages=%d, nslot=%d", 1487 slot, npages, amap->am_nslot, 0); 1488 1489 KASSERT((slot + (npages - 1)) < amap->am_nslot); 1490 memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *)); 1491 1492 #if defined(DIAGNOSTIC) 1493 for (int i = 0; i < npages; i++) { 1494 struct vm_anon * const an = anons[i]; 1495 if (an == NULL) { 1496 continue; 1497 } 1498 KASSERT(an->an_ref != 0); 1499 KASSERT(an->an_lock == amap->am_lock); 1500 } 1501 #endif 1502 UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0); 1503 } 1504 1505 /* 1506 * amap_add: add (or replace) a page to an amap. 1507 * 1508 * => amap should be locked by caller. 1509 * => anon must have the lock associated with this amap. 1510 */ 1511 void 1512 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon, 1513 bool replace) 1514 { 1515 struct vm_amap *amap = aref->ar_amap; 1516 u_int slot; 1517 1518 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1519 KASSERT(rw_write_held(amap->am_lock)); 1520 KASSERT(anon->an_lock == amap->am_lock); 1521 1522 AMAP_B2SLOT(slot, offset); 1523 slot += aref->ar_pageoff; 1524 KASSERT(slot < amap->am_nslot); 1525 1526 if (replace) { 1527 struct vm_anon *oanon = amap->am_anon[slot]; 1528 1529 KASSERT(oanon != NULL); 1530 if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) { 1531 pmap_page_protect(oanon->an_page, VM_PROT_NONE); 1532 /* 1533 * XXX: suppose page is supposed to be wired somewhere? 1534 */ 1535 } 1536 } else { 1537 KASSERT(amap->am_anon[slot] == NULL); 1538 KASSERT(amap->am_nused < amap->am_maxslot); 1539 amap->am_bckptr[slot] = amap->am_nused; 1540 amap->am_slots[amap->am_nused] = slot; 1541 amap->am_nused++; 1542 } 1543 amap->am_anon[slot] = anon; 1544 UVMHIST_LOG(maphist, 1545 "<- done (amap=%#jx, offset=%#x, anon=%#jx, rep=%d)", 1546 (uintptr_t)amap, offset, (uintptr_t)anon, replace); 1547 } 1548 1549 /* 1550 * amap_unadd: remove a page from an amap. 1551 * 1552 * => amap should be locked by caller. 1553 */ 1554 void 1555 amap_unadd(struct vm_aref *aref, vaddr_t offset) 1556 { 1557 struct vm_amap *amap = aref->ar_amap; 1558 u_int slot, ptr, last; 1559 1560 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1561 KASSERT(rw_write_held(amap->am_lock)); 1562 1563 AMAP_B2SLOT(slot, offset); 1564 slot += aref->ar_pageoff; 1565 KASSERT(slot < amap->am_nslot); 1566 KASSERT(amap->am_anon[slot] != NULL); 1567 KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock); 1568 1569 amap->am_anon[slot] = NULL; 1570 ptr = amap->am_bckptr[slot]; 1571 1572 last = amap->am_nused - 1; 1573 if (ptr != last) { 1574 /* Move the last entry to keep the slots contiguous. */ 1575 amap->am_slots[ptr] = amap->am_slots[last]; 1576 amap->am_bckptr[amap->am_slots[ptr]] = ptr; 1577 } 1578 amap->am_nused--; 1579 UVMHIST_LOG(maphist, "<- done (amap=%#jx, slot=%#jx)", 1580 (uintptr_t)amap, slot,0, 0); 1581 } 1582 1583 /* 1584 * amap_adjref_anons: adjust the reference count(s) on amap and its anons. 1585 */ 1586 static void 1587 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len, 1588 int refv, bool all) 1589 { 1590 1591 #ifdef UVM_AMAP_PPREF 1592 KASSERT(rw_write_held(amap->am_lock)); 1593 1594 /* 1595 * We must establish the ppref array before changing am_ref 1596 * so that the ppref values match the current amap refcount. 1597 */ 1598 1599 if (amap->am_ppref == NULL) { 1600 amap_pp_establish(amap, offset); 1601 } 1602 #endif 1603 1604 amap->am_ref += refv; 1605 1606 #ifdef UVM_AMAP_PPREF 1607 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1608 amap_pp_adjref(amap, offset, len, refv); 1609 } 1610 #endif 1611 amap_unlock(amap); 1612 } 1613 1614 /* 1615 * amap_ref: gain a reference to an amap. 1616 * 1617 * => amap must not be locked (we will lock). 1618 * => "offset" and "len" are in units of pages. 1619 * => Called at fork time to gain the child's reference. 1620 */ 1621 void 1622 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) 1623 { 1624 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1625 1626 amap_lock(amap, RW_WRITER); 1627 if (flags & AMAP_SHARED) { 1628 amap->am_flags |= AMAP_SHARED; 1629 } 1630 amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0); 1631 1632 UVMHIST_LOG(maphist,"<- done! amap=%#jx", (uintptr_t)amap, 0, 0, 0); 1633 } 1634 1635 /* 1636 * amap_unref: remove a reference to an amap. 1637 * 1638 * => All pmap-level references to this amap must be already removed. 1639 * => Called from uvm_unmap_detach(); entry is already removed from the map. 1640 * => We will lock amap, so it must be unlocked. 1641 */ 1642 void 1643 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all) 1644 { 1645 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1646 1647 amap_lock(amap, RW_WRITER); 1648 1649 UVMHIST_LOG(maphist," amap=%#jx refs=%d, nused=%d", 1650 (uintptr_t)amap, amap->am_ref, amap->am_nused, 0); 1651 KASSERT(amap->am_ref > 0); 1652 1653 if (amap->am_ref == 1) { 1654 1655 /* 1656 * If the last reference - wipeout and destroy the amap. 1657 */ 1658 amap->am_ref--; 1659 amap_wipeout(amap); 1660 UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0); 1661 return; 1662 } 1663 1664 /* 1665 * Otherwise, drop the reference count(s) on anons. 1666 */ 1667 1668 if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) { 1669 amap->am_flags &= ~AMAP_SHARED; 1670 } 1671 amap_adjref_anons(amap, offset, len, -1, all); 1672 1673 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1674 } 1675