Home | History | Annotate | Line # | Download | only in uvm
      1 /*	$NetBSD: uvm_amap.c,v 1.106 2012/03/30 02:25:24 chs Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * uvm_amap.c: amap operations
     30  */
     31 
     32 /*
     33  * this file contains functions that perform operations on amaps.  see
     34  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.106 2012/03/30 02:25:24 chs Exp $");
     39 
     40 #include "opt_uvmhist.h"
     41 
     42 #include <sys/param.h>
     43 #include <sys/systm.h>
     44 #include <sys/kernel.h>
     45 #include <sys/kmem.h>
     46 #include <sys/pool.h>
     47 #include <sys/atomic.h>
     48 
     49 #include <uvm/uvm.h>
     50 #include <uvm/uvm_swap.h>
     51 
     52 /*
     53  * cache for allocation of vm_map structures.  note that in order to
     54  * avoid an endless loop, the amap cache's allocator cannot allocate
     55  * memory from an amap (it currently goes through the kernel uobj, so
     56  * we are ok).
     57  */
     58 static struct pool_cache uvm_amap_cache;
     59 static kmutex_t amap_list_lock;
     60 static LIST_HEAD(, vm_amap) amap_list;
     61 
     62 /*
     63  * local functions
     64  */
     65 
     66 static inline void
     67 amap_list_insert(struct vm_amap *amap)
     68 {
     69 
     70 	mutex_enter(&amap_list_lock);
     71 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
     72 	mutex_exit(&amap_list_lock);
     73 }
     74 
     75 static inline void
     76 amap_list_remove(struct vm_amap *amap)
     77 {
     78 
     79 	mutex_enter(&amap_list_lock);
     80 	LIST_REMOVE(amap, am_list);
     81 	mutex_exit(&amap_list_lock);
     82 }
     83 
     84 static int
     85 amap_roundup_slots(int slots)
     86 {
     87 
     88 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
     89 }
     90 
     91 #ifdef UVM_AMAP_PPREF
     92 /*
     93  * what is ppref?   ppref is an _optional_ amap feature which is used
     94  * to keep track of reference counts on a per-page basis.  it is enabled
     95  * when UVM_AMAP_PPREF is defined.
     96  *
     97  * when enabled, an array of ints is allocated for the pprefs.  this
     98  * array is allocated only when a partial reference is added to the
     99  * map (either by unmapping part of the amap, or gaining a reference
    100  * to only a part of an amap).  if the allocation of the array fails
    101  * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
    102  * that we tried to do ppref's but couldn't alloc the array so just
    103  * give up (after all, this is an optional feature!).
    104  *
    105  * the array is divided into page sized "chunks."   for chunks of length 1,
    106  * the chunk reference count plus one is stored in that chunk's slot.
    107  * for chunks of length > 1 the first slot contains (the reference count
    108  * plus one) * -1.    [the negative value indicates that the length is
    109  * greater than one.]   the second slot of the chunk contains the length
    110  * of the chunk.   here is an example:
    111  *
    112  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
    113  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
    114  *              <----------><-><----><-------><----><-><------->
    115  * (x = don't care)
    116  *
    117  * this allows us to allow one int to contain the ref count for the whole
    118  * chunk.    note that the "plus one" part is needed because a reference
    119  * count of zero is neither positive or negative (need a way to tell
    120  * if we've got one zero or a bunch of them).
    121  *
    122  * here are some in-line functions to help us.
    123  */
    124 
    125 /*
    126  * pp_getreflen: get the reference and length for a specific offset
    127  *
    128  * => ppref's amap must be locked
    129  */
    130 static inline void
    131 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
    132 {
    133 
    134 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
    135 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
    136 		*lenp = 1;
    137 	} else {
    138 		*refp = (ppref[offset] * -1) - 1;
    139 		*lenp = ppref[offset+1];
    140 	}
    141 }
    142 
    143 /*
    144  * pp_setreflen: set the reference and length for a specific offset
    145  *
    146  * => ppref's amap must be locked
    147  */
    148 static inline void
    149 pp_setreflen(int *ppref, int offset, int ref, int len)
    150 {
    151 	if (len == 0)
    152 		return;
    153 	if (len == 1) {
    154 		ppref[offset] = ref + 1;
    155 	} else {
    156 		ppref[offset] = (ref + 1) * -1;
    157 		ppref[offset+1] = len;
    158 	}
    159 }
    160 #endif /* UVM_AMAP_PPREF */
    161 
    162 /*
    163  * amap_alloc1: allocate an amap, but do not initialise the overlay.
    164  *
    165  * => Note: lock is not set.
    166  */
    167 static struct vm_amap *
    168 amap_alloc1(int slots, int padslots, int flags)
    169 {
    170 	const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
    171 	const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
    172 	struct vm_amap *amap;
    173 	int totalslots;
    174 
    175 	amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
    176 	if (amap == NULL) {
    177 		return NULL;
    178 	}
    179 	totalslots = amap_roundup_slots(slots + padslots);
    180 	amap->am_lock = NULL;
    181 	amap->am_ref = 1;
    182 	amap->am_flags = 0;
    183 #ifdef UVM_AMAP_PPREF
    184 	amap->am_ppref = NULL;
    185 #endif
    186 	amap->am_maxslot = totalslots;
    187 	amap->am_nslot = slots;
    188 	amap->am_nused = 0;
    189 
    190 	/*
    191 	 * Note: since allocations are likely big, we expect to reduce the
    192 	 * memory fragmentation by allocating them in separate blocks.
    193 	 */
    194 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
    195 	if (amap->am_slots == NULL)
    196 		goto fail1;
    197 
    198 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
    199 	if (amap->am_bckptr == NULL)
    200 		goto fail2;
    201 
    202 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
    203 	    kmflags);
    204 	if (amap->am_anon == NULL)
    205 		goto fail3;
    206 
    207 	return amap;
    208 
    209 fail3:
    210 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
    211 fail2:
    212 	kmem_free(amap->am_slots, totalslots * sizeof(int));
    213 fail1:
    214 	pool_cache_put(&uvm_amap_cache, amap);
    215 
    216 	/*
    217 	 * XXX hack to tell the pagedaemon how many pages we need,
    218 	 * since we can need more than it would normally free.
    219 	 */
    220 	if (nowait) {
    221 		extern u_int uvm_extrapages;
    222 		atomic_add_int(&uvm_extrapages,
    223 		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
    224 		    totalslots) >> PAGE_SHIFT);
    225 	}
    226 	return NULL;
    227 }
    228 
    229 /*
    230  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
    231  *
    232  * => caller should ensure sz is a multiple of PAGE_SIZE
    233  * => reference count to new amap is set to one
    234  * => new amap is returned unlocked
    235  */
    236 
    237 struct vm_amap *
    238 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
    239 {
    240 	struct vm_amap *amap;
    241 	int slots, padslots;
    242 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
    243 
    244 	AMAP_B2SLOT(slots, sz);
    245 	AMAP_B2SLOT(padslots, padsz);
    246 
    247 	amap = amap_alloc1(slots, padslots, waitf);
    248 	if (amap) {
    249 		memset(amap->am_anon, 0,
    250 		    amap->am_maxslot * sizeof(struct vm_anon *));
    251 		amap->am_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
    252 		amap_list_insert(amap);
    253 	}
    254 
    255 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
    256 	return(amap);
    257 }
    258 
    259 /*
    260  * uvm_amap_init: initialize the amap system.
    261  */
    262 void
    263 uvm_amap_init(void)
    264 {
    265 
    266 	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
    267 
    268 	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0, 0,
    269 	    "amappl", NULL, IPL_NONE, NULL, NULL, NULL);
    270 }
    271 
    272 /*
    273  * amap_free: free an amap
    274  *
    275  * => the amap must be unlocked
    276  * => the amap should have a zero reference count and be empty
    277  */
    278 void
    279 amap_free(struct vm_amap *amap)
    280 {
    281 	int slots;
    282 
    283 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
    284 
    285 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
    286 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
    287 	if (amap->am_lock != NULL) {
    288 		KASSERT(!mutex_owned(amap->am_lock));
    289 		mutex_obj_free(amap->am_lock);
    290 	}
    291 	slots = amap->am_maxslot;
    292 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
    293 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
    294 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
    295 #ifdef UVM_AMAP_PPREF
    296 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
    297 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
    298 #endif
    299 	pool_cache_put(&uvm_amap_cache, amap);
    300 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
    301 }
    302 
    303 /*
    304  * amap_extend: extend the size of an amap (if needed)
    305  *
    306  * => called from uvm_map when we want to extend an amap to cover
    307  *    a new mapping (rather than allocate a new one)
    308  * => amap should be unlocked (we will lock it)
    309  * => to safely extend an amap it should have a reference count of
    310  *    one (thus it can't be shared)
    311  */
    312 int
    313 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
    314 {
    315 	struct vm_amap *amap = entry->aref.ar_amap;
    316 	int slotoff = entry->aref.ar_pageoff;
    317 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
    318 	int slotadj, slotspace;
    319 	int oldnslots;
    320 #ifdef UVM_AMAP_PPREF
    321 	int *newppref, *oldppref;
    322 #endif
    323 	int i, *newsl, *newbck, *oldsl, *oldbck;
    324 	struct vm_anon **newover, **oldover, *tofree;
    325 	const km_flag_t kmflags =
    326 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
    327 
    328 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
    329 
    330 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
    331 	    entry, addsize, flags, 0);
    332 
    333 	/*
    334 	 * first, determine how many slots we need in the amap.  don't
    335 	 * forget that ar_pageoff could be non-zero: this means that
    336 	 * there are some unused slots before us in the amap.
    337 	 */
    338 
    339 	amap_lock(amap);
    340 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
    341 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
    342 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
    343 	if (flags & AMAP_EXTEND_FORWARDS) {
    344 		slotneed = slotoff + slotmapped + slotadd;
    345 		slotadj = 0;
    346 		slotspace = 0;
    347 	}
    348 	else {
    349 		slotneed = slotadd + slotmapped;
    350 		slotadj = slotadd - slotoff;
    351 		slotspace = amap->am_maxslot - slotmapped;
    352 	}
    353 	tofree = NULL;
    354 
    355 	/*
    356 	 * case 1: we already have enough slots in the map and thus
    357 	 * only need to bump the reference counts on the slots we are
    358 	 * adding.
    359 	 */
    360 
    361 	if (flags & AMAP_EXTEND_FORWARDS) {
    362 		if (amap->am_nslot >= slotneed) {
    363 #ifdef UVM_AMAP_PPREF
    364 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    365 				amap_pp_adjref(amap, slotoff + slotmapped,
    366 				    slotadd, 1, &tofree);
    367 			}
    368 #endif
    369 			uvm_anon_freelst(amap, tofree);
    370 			UVMHIST_LOG(maphist,
    371 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
    372 			    amap, slotneed, 0, 0);
    373 			return 0;
    374 		}
    375 	} else {
    376 		if (slotadj <= 0) {
    377 			slotoff -= slotadd;
    378 			entry->aref.ar_pageoff = slotoff;
    379 #ifdef UVM_AMAP_PPREF
    380 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    381 				amap_pp_adjref(amap, slotoff, slotadd, 1,
    382 				    &tofree);
    383 			}
    384 #endif
    385 			uvm_anon_freelst(amap, tofree);
    386 			UVMHIST_LOG(maphist,
    387 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
    388 			    amap, slotneed, 0, 0);
    389 			return 0;
    390 		}
    391 	}
    392 
    393 	/*
    394 	 * case 2: we pre-allocated slots for use and we just need to
    395 	 * bump nslot up to take account for these slots.
    396 	 */
    397 
    398 	if (amap->am_maxslot >= slotneed) {
    399 		if (flags & AMAP_EXTEND_FORWARDS) {
    400 #ifdef UVM_AMAP_PPREF
    401 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    402 				if ((slotoff + slotmapped) < amap->am_nslot)
    403 					amap_pp_adjref(amap,
    404 					    slotoff + slotmapped,
    405 					    (amap->am_nslot -
    406 					    (slotoff + slotmapped)), 1,
    407 					    &tofree);
    408 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
    409 				    slotneed - amap->am_nslot);
    410 			}
    411 #endif
    412 			amap->am_nslot = slotneed;
    413 			uvm_anon_freelst(amap, tofree);
    414 
    415 			/*
    416 			 * no need to zero am_anon since that was done at
    417 			 * alloc time and we never shrink an allocation.
    418 			 */
    419 
    420 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
    421 			    "slotneed=%d", amap, slotneed, 0, 0);
    422 			return 0;
    423 		} else {
    424 #ifdef UVM_AMAP_PPREF
    425 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    426 				/*
    427 				 * Slide up the ref counts on the pages that
    428 				 * are actually in use.
    429 				 */
    430 				memmove(amap->am_ppref + slotspace,
    431 				    amap->am_ppref + slotoff,
    432 				    slotmapped * sizeof(int));
    433 				/*
    434 				 * Mark the (adjusted) gap at the front as
    435 				 * referenced/not referenced.
    436 				 */
    437 				pp_setreflen(amap->am_ppref,
    438 				    0, 0, slotspace - slotadd);
    439 				pp_setreflen(amap->am_ppref,
    440 				    slotspace - slotadd, 1, slotadd);
    441 			}
    442 #endif
    443 
    444 			/*
    445 			 * Slide the anon pointers up and clear out
    446 			 * the space we just made.
    447 			 */
    448 			memmove(amap->am_anon + slotspace,
    449 			    amap->am_anon + slotoff,
    450 			    slotmapped * sizeof(struct vm_anon*));
    451 			memset(amap->am_anon + slotoff, 0,
    452 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
    453 
    454 			/*
    455 			 * Slide the backpointers up, but don't bother
    456 			 * wiping out the old slots.
    457 			 */
    458 			memmove(amap->am_bckptr + slotspace,
    459 			    amap->am_bckptr + slotoff,
    460 			    slotmapped * sizeof(int));
    461 
    462 			/*
    463 			 * Adjust all the useful active slot numbers.
    464 			 */
    465 			for (i = 0; i < amap->am_nused; i++)
    466 				amap->am_slots[i] += (slotspace - slotoff);
    467 
    468 			/*
    469 			 * We just filled all the empty space in the
    470 			 * front of the amap by activating a few new
    471 			 * slots.
    472 			 */
    473 			amap->am_nslot = amap->am_maxslot;
    474 			entry->aref.ar_pageoff = slotspace - slotadd;
    475 			amap_unlock(amap);
    476 
    477 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
    478 			    "slotneed=%d", amap, slotneed, 0, 0);
    479 			return 0;
    480 		}
    481 	}
    482 
    483 	/*
    484 	 * Case 3: we need to allocate a new amap and copy all the amap
    485 	 * data over from old amap to the new one.  Drop the lock before
    486 	 * performing allocation.
    487 	 *
    488 	 * Note: since allocations are likely big, we expect to reduce the
    489 	 * memory fragmentation by allocating them in separate blocks.
    490 	 */
    491 
    492 	amap_unlock(amap);
    493 
    494 	if (slotneed >= UVM_AMAP_LARGE) {
    495 		return E2BIG;
    496 	}
    497 
    498 	slotalloc = amap_roundup_slots(slotneed);
    499 #ifdef UVM_AMAP_PPREF
    500 	newppref = NULL;
    501 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    502 		/* Will be handled later if fails. */
    503 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
    504 	}
    505 #endif
    506 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
    507 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
    508 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
    509 	if (newsl == NULL || newbck == NULL || newover == NULL) {
    510 #ifdef UVM_AMAP_PPREF
    511 		if (newppref != NULL) {
    512 			kmem_free(newppref, slotalloc * sizeof(*newppref));
    513 		}
    514 #endif
    515 		if (newsl != NULL) {
    516 			kmem_free(newsl, slotalloc * sizeof(*newsl));
    517 		}
    518 		if (newbck != NULL) {
    519 			kmem_free(newbck, slotalloc * sizeof(*newbck));
    520 		}
    521 		if (newover != NULL) {
    522 			kmem_free(newover, slotalloc * sizeof(*newover));
    523 		}
    524 		return ENOMEM;
    525 	}
    526 	amap_lock(amap);
    527 	KASSERT(amap->am_maxslot < slotneed);
    528 
    529 	/*
    530 	 * Copy everything over to new allocated areas.
    531 	 */
    532 
    533 	slotadded = slotalloc - amap->am_nslot;
    534 	if (!(flags & AMAP_EXTEND_FORWARDS))
    535 		slotspace = slotalloc - slotmapped;
    536 
    537 	/* do am_slots */
    538 	oldsl = amap->am_slots;
    539 	if (flags & AMAP_EXTEND_FORWARDS)
    540 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
    541 	else
    542 		for (i = 0; i < amap->am_nused; i++)
    543 			newsl[i] = oldsl[i] + slotspace - slotoff;
    544 	amap->am_slots = newsl;
    545 
    546 	/* do am_anon */
    547 	oldover = amap->am_anon;
    548 	if (flags & AMAP_EXTEND_FORWARDS) {
    549 		memcpy(newover, oldover,
    550 		    sizeof(struct vm_anon *) * amap->am_nslot);
    551 		memset(newover + amap->am_nslot, 0,
    552 		    sizeof(struct vm_anon *) * slotadded);
    553 	} else {
    554 		memcpy(newover + slotspace, oldover + slotoff,
    555 		    sizeof(struct vm_anon *) * slotmapped);
    556 		memset(newover, 0,
    557 		    sizeof(struct vm_anon *) * slotspace);
    558 	}
    559 	amap->am_anon = newover;
    560 
    561 	/* do am_bckptr */
    562 	oldbck = amap->am_bckptr;
    563 	if (flags & AMAP_EXTEND_FORWARDS)
    564 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
    565 	else
    566 		memcpy(newbck + slotspace, oldbck + slotoff,
    567 		    sizeof(int) * slotmapped);
    568 	amap->am_bckptr = newbck;
    569 
    570 #ifdef UVM_AMAP_PPREF
    571 	/* do ppref */
    572 	oldppref = amap->am_ppref;
    573 	if (newppref) {
    574 		if (flags & AMAP_EXTEND_FORWARDS) {
    575 			memcpy(newppref, oldppref,
    576 			    sizeof(int) * amap->am_nslot);
    577 			memset(newppref + amap->am_nslot, 0,
    578 			    sizeof(int) * slotadded);
    579 		} else {
    580 			memcpy(newppref + slotspace, oldppref + slotoff,
    581 			    sizeof(int) * slotmapped);
    582 		}
    583 		amap->am_ppref = newppref;
    584 		if ((flags & AMAP_EXTEND_FORWARDS) &&
    585 		    (slotoff + slotmapped) < amap->am_nslot)
    586 			amap_pp_adjref(amap, slotoff + slotmapped,
    587 			    (amap->am_nslot - (slotoff + slotmapped)), 1,
    588 			    &tofree);
    589 		if (flags & AMAP_EXTEND_FORWARDS)
    590 			pp_setreflen(newppref, amap->am_nslot, 1,
    591 			    slotneed - amap->am_nslot);
    592 		else {
    593 			pp_setreflen(newppref, 0, 0,
    594 			    slotalloc - slotneed);
    595 			pp_setreflen(newppref, slotalloc - slotneed, 1,
    596 			    slotneed - slotmapped);
    597 		}
    598 	} else {
    599 		if (amap->am_ppref)
    600 			amap->am_ppref = PPREF_NONE;
    601 	}
    602 #endif
    603 
    604 	/* update master values */
    605 	if (flags & AMAP_EXTEND_FORWARDS)
    606 		amap->am_nslot = slotneed;
    607 	else {
    608 		entry->aref.ar_pageoff = slotspace - slotadd;
    609 		amap->am_nslot = slotalloc;
    610 	}
    611 	oldnslots = amap->am_maxslot;
    612 	amap->am_maxslot = slotalloc;
    613 
    614 	uvm_anon_freelst(amap, tofree);
    615 
    616 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
    617 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
    618 	kmem_free(oldover, oldnslots * sizeof(*oldover));
    619 #ifdef UVM_AMAP_PPREF
    620 	if (oldppref && oldppref != PPREF_NONE)
    621 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
    622 #endif
    623 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
    624 	    amap, slotneed, 0, 0);
    625 	return 0;
    626 }
    627 
    628 /*
    629  * amap_share_protect: change protection of anons in a shared amap
    630  *
    631  * for shared amaps, given the current data structure layout, it is
    632  * not possible for us to directly locate all maps referencing the
    633  * shared anon (to change the protection).  in order to protect data
    634  * in shared maps we use pmap_page_protect().  [this is useful for IPC
    635  * mechanisms like map entry passing that may want to write-protect
    636  * all mappings of a shared amap.]  we traverse am_anon or am_slots
    637  * depending on the current state of the amap.
    638  *
    639  * => entry's map and amap must be locked by the caller
    640  */
    641 void
    642 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
    643 {
    644 	struct vm_amap *amap = entry->aref.ar_amap;
    645 	u_int slots, lcv, slot, stop;
    646 	struct vm_anon *anon;
    647 
    648 	KASSERT(mutex_owned(amap->am_lock));
    649 
    650 	AMAP_B2SLOT(slots, (entry->end - entry->start));
    651 	stop = entry->aref.ar_pageoff + slots;
    652 
    653 	if (slots < amap->am_nused) {
    654 		/*
    655 		 * Cheaper to traverse am_anon.
    656 		 */
    657 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
    658 			anon = amap->am_anon[lcv];
    659 			if (anon == NULL) {
    660 				continue;
    661 			}
    662 			if (anon->an_page) {
    663 				pmap_page_protect(anon->an_page, prot);
    664 			}
    665 		}
    666 		return;
    667 	}
    668 
    669 	/*
    670 	 * Cheaper to traverse am_slots.
    671 	 */
    672 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
    673 		slot = amap->am_slots[lcv];
    674 		if (slot < entry->aref.ar_pageoff || slot >= stop) {
    675 			continue;
    676 		}
    677 		anon = amap->am_anon[slot];
    678 		if (anon->an_page) {
    679 			pmap_page_protect(anon->an_page, prot);
    680 		}
    681 	}
    682 }
    683 
    684 /*
    685  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
    686  *
    687  * => Called from amap_unref(), when reference count drops to zero.
    688  * => amap must be locked.
    689  */
    690 
    691 void
    692 amap_wipeout(struct vm_amap *amap)
    693 {
    694 	struct vm_anon *tofree = NULL;
    695 	u_int lcv;
    696 
    697 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
    698 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
    699 
    700 	KASSERT(mutex_owned(amap->am_lock));
    701 	KASSERT(amap->am_ref == 0);
    702 
    703 	if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
    704 		/*
    705 		 * Note: amap_swap_off() will call us again.
    706 		 */
    707 		amap_unlock(amap);
    708 		return;
    709 	}
    710 	amap_list_remove(amap);
    711 
    712 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
    713 		struct vm_anon *anon;
    714 		u_int slot;
    715 
    716 		slot = amap->am_slots[lcv];
    717 		anon = amap->am_anon[slot];
    718 		KASSERT(anon != NULL && anon->an_ref != 0);
    719 
    720 		KASSERT(anon->an_lock == amap->am_lock);
    721 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
    722 		    anon->an_ref, 0, 0);
    723 
    724 		/*
    725 		 * Drop the reference.  Defer freeing.
    726 		 */
    727 
    728 		if (--anon->an_ref == 0) {
    729 			anon->an_link = tofree;
    730 			tofree = anon;
    731 		}
    732 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) {
    733 			preempt();
    734 		}
    735 	}
    736 
    737 	/*
    738 	 * Finally, destroy the amap.
    739 	 */
    740 
    741 	amap->am_nused = 0;
    742 	uvm_anon_freelst(amap, tofree);
    743 	amap_free(amap);
    744 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
    745 }
    746 
    747 /*
    748  * amap_copy: ensure that a map entry's "needs_copy" flag is false
    749  *	by copying the amap if necessary.
    750  *
    751  * => an entry with a null amap pointer will get a new (blank) one.
    752  * => the map that the map entry belongs to must be locked by caller.
    753  * => the amap currently attached to "entry" (if any) must be unlocked.
    754  * => if canchunk is true, then we may clip the entry into a chunk
    755  * => "startva" and "endva" are used only if canchunk is true.  they are
    756  *     used to limit chunking (e.g. if you have a large space that you
    757  *     know you are going to need to allocate amaps for, there is no point
    758  *     in allowing that to be chunked)
    759  */
    760 
    761 void
    762 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
    763     vaddr_t startva, vaddr_t endva)
    764 {
    765 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
    766 	struct vm_amap *amap, *srcamap;
    767 	struct vm_anon *tofree;
    768 	u_int slots, lcv;
    769 	vsize_t len;
    770 
    771 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
    772 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, flags=%d)",
    773 		    map, entry, flags, 0);
    774 
    775 	KASSERT(map != kernel_map);	/* we use nointr pool */
    776 
    777 	srcamap = entry->aref.ar_amap;
    778 	len = entry->end - entry->start;
    779 
    780 	/*
    781 	 * Is there an amap to copy?  If not, create one.
    782 	 */
    783 
    784 	if (srcamap == NULL) {
    785 		const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
    786 
    787 		/*
    788 		 * Check to see if we have a large amap that we can
    789 		 * chunk.  We align startva/endva to chunk-sized
    790 		 * boundaries and then clip to them.
    791 		 */
    792 
    793 		if (canchunk && atop(len) >= UVM_AMAP_LARGE) {
    794 			vsize_t chunksize;
    795 
    796 			/* Convert slots to bytes. */
    797 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
    798 			startva = (startva / chunksize) * chunksize;
    799 			endva = roundup(endva, chunksize);
    800 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
    801 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
    802 			    endva);
    803 			UVM_MAP_CLIP_START(map, entry, startva);
    804 
    805 			/* Watch out for endva wrap-around! */
    806 			if (endva >= startva) {
    807 				UVM_MAP_CLIP_END(map, entry, endva);
    808 			}
    809 		}
    810 
    811 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
    812 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
    813 			return;
    814 		}
    815 
    816 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
    817 		    entry->start, entry->end, 0, 0);
    818 
    819 		/*
    820 		 * Allocate an initialised amap and install it.
    821 		 * Note: we must update the length after clipping.
    822 		 */
    823 		len = entry->end - entry->start;
    824 		entry->aref.ar_pageoff = 0;
    825 		entry->aref.ar_amap = amap_alloc(len, 0, waitf);
    826 		if (entry->aref.ar_amap != NULL) {
    827 			entry->etype &= ~UVM_ET_NEEDSCOPY;
    828 		}
    829 		return;
    830 	}
    831 
    832 	/*
    833 	 * First check and see if we are the only map entry referencing
    834 	 * he amap we currently have.  If so, then just take it over instead
    835 	 * of copying it.  Note that we are reading am_ref without lock held
    836 	 * as the value value can only be one if we have the only reference
    837 	 * to the amap (via our locked map).  If the value is greater than
    838 	 * one, then allocate amap and re-check the value.
    839 	 */
    840 
    841 	if (srcamap->am_ref == 1) {
    842 		entry->etype &= ~UVM_ET_NEEDSCOPY;
    843 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
    844 		    0, 0, 0, 0);
    845 		return;
    846 	}
    847 
    848 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
    849 	    srcamap, srcamap->am_ref, 0, 0);
    850 
    851 	/*
    852 	 * Allocate a new amap (note: not initialised, no lock set, etc).
    853 	 */
    854 
    855 	AMAP_B2SLOT(slots, len);
    856 	amap = amap_alloc1(slots, 0, waitf);
    857 	if (amap == NULL) {
    858 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
    859 		return;
    860 	}
    861 
    862 	amap_lock(srcamap);
    863 
    864 	/*
    865 	 * Re-check the reference count with the lock held.  If it has
    866 	 * dropped to one - we can take over the existing map.
    867 	 */
    868 
    869 	if (srcamap->am_ref == 1) {
    870 		/* Just take over the existing amap. */
    871 		entry->etype &= ~UVM_ET_NEEDSCOPY;
    872 		amap_unlock(srcamap);
    873 		/* Destroy the new (unused) amap. */
    874 		amap->am_ref--;
    875 		amap_free(amap);
    876 		return;
    877 	}
    878 
    879 	/*
    880 	 * Copy the slots.  Zero the padded part.
    881 	 */
    882 
    883 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
    884 	for (lcv = 0 ; lcv < slots; lcv++) {
    885 		amap->am_anon[lcv] =
    886 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
    887 		if (amap->am_anon[lcv] == NULL)
    888 			continue;
    889 		KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
    890 		KASSERT(amap->am_anon[lcv]->an_ref > 0);
    891 		KASSERT(amap->am_nused < amap->am_maxslot);
    892 		amap->am_anon[lcv]->an_ref++;
    893 		amap->am_bckptr[lcv] = amap->am_nused;
    894 		amap->am_slots[amap->am_nused] = lcv;
    895 		amap->am_nused++;
    896 	}
    897 	memset(&amap->am_anon[lcv], 0,
    898 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
    899 
    900 	/*
    901 	 * Drop our reference to the old amap (srcamap) and unlock.
    902 	 * Since the reference count on srcamap is greater than one,
    903 	 * (we checked above), it cannot drop to zero while it is locked.
    904 	 */
    905 
    906 	srcamap->am_ref--;
    907 	KASSERT(srcamap->am_ref > 0);
    908 
    909 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) {
    910 		srcamap->am_flags &= ~AMAP_SHARED;
    911 	}
    912 	tofree = NULL;
    913 #ifdef UVM_AMAP_PPREF
    914 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
    915 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
    916 		    len >> PAGE_SHIFT, -1, &tofree);
    917 	}
    918 #endif
    919 
    920 	/*
    921 	 * If we referenced any anons, then share the source amap's lock.
    922 	 * Otherwise, we have nothing in common, so allocate a new one.
    923 	 */
    924 
    925 	KASSERT(amap->am_lock == NULL);
    926 	if (amap->am_nused != 0) {
    927 		amap->am_lock = srcamap->am_lock;
    928 		mutex_obj_hold(amap->am_lock);
    929 	}
    930 	uvm_anon_freelst(srcamap, tofree);
    931 
    932 	if (amap->am_lock == NULL) {
    933 		amap->am_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
    934 	}
    935 	amap_list_insert(amap);
    936 
    937 	/*
    938 	 * Install new amap.
    939 	 */
    940 
    941 	entry->aref.ar_pageoff = 0;
    942 	entry->aref.ar_amap = amap;
    943 	entry->etype &= ~UVM_ET_NEEDSCOPY;
    944 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
    945 }
    946 
    947 /*
    948  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
    949  *
    950  *	called during fork(2) when the parent process has a wired map
    951  *	entry.   in that case we want to avoid write-protecting pages
    952  *	in the parent's map (e.g. like what you'd do for a COW page)
    953  *	so we resolve the COW here.
    954  *
    955  * => assume parent's entry was wired, thus all pages are resident.
    956  * => assume pages that are loaned out (loan_count) are already mapped
    957  *	read-only in all maps, and thus no need for us to worry about them
    958  * => assume both parent and child vm_map's are locked
    959  * => caller passes child's map/entry in to us
    960  * => if we run out of memory we will unlock the amap and sleep _with_ the
    961  *	parent and child vm_map's locked(!).    we have to do this since
    962  *	we are in the middle of a fork(2) and we can't let the parent
    963  *	map change until we are done copying all the map entrys.
    964  * => XXXCDC: out of memory should cause fork to fail, but there is
    965  *	currently no easy way to do this (needs fix)
    966  * => page queues must be unlocked (we may lock them)
    967  */
    968 
    969 void
    970 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
    971 {
    972 	struct vm_amap *amap = entry->aref.ar_amap;
    973 	struct vm_anon *anon, *nanon;
    974 	struct vm_page *pg, *npg;
    975 	u_int lcv, slot;
    976 
    977 	/*
    978 	 * note that if we unlock the amap then we must ReStart the "lcv" for
    979 	 * loop because some other process could reorder the anon's in the
    980 	 * am_anon[] array on us while the lock is dropped.
    981 	 */
    982 
    983 ReStart:
    984 	amap_lock(amap);
    985 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
    986 		slot = amap->am_slots[lcv];
    987 		anon = amap->am_anon[slot];
    988 		KASSERT(anon->an_lock == amap->am_lock);
    989 
    990 		/*
    991 		 * If anon has only one reference - we must have already
    992 		 * copied it.  This can happen if we needed to sleep waiting
    993 		 * for memory in a previous run through this loop.  The new
    994 		 * page might even have been paged out, since is not wired.
    995 		 */
    996 
    997 		if (anon->an_ref == 1) {
    998 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
    999 			continue;
   1000 		}
   1001 
   1002 		/*
   1003 		 * The old page must be resident since the parent is wired.
   1004 		 */
   1005 
   1006 		pg = anon->an_page;
   1007 		KASSERT(pg != NULL);
   1008 		KASSERT(pg->wire_count > 0);
   1009 
   1010 		/*
   1011 		 * If the page is loaned then it must already be mapped
   1012 		 * read-only and we don't need to copy it.
   1013 		 */
   1014 
   1015 		if (pg->loan_count != 0) {
   1016 			continue;
   1017 		}
   1018 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
   1019 
   1020 		/*
   1021 		 * If the page is busy, then we have to unlock, wait for
   1022 		 * it and then restart.
   1023 		 */
   1024 
   1025 		if (pg->flags & PG_BUSY) {
   1026 			pg->flags |= PG_WANTED;
   1027 			UVM_UNLOCK_AND_WAIT(pg, amap->am_lock, false,
   1028 			    "cownow", 0);
   1029 			goto ReStart;
   1030 		}
   1031 
   1032 		/*
   1033 		 * Perform a copy-on-write.
   1034 		 * First - get a new anon and a page.
   1035 		 */
   1036 
   1037 		nanon = uvm_analloc();
   1038 		if (nanon) {
   1039 			nanon->an_lock = amap->am_lock;
   1040 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
   1041 		} else {
   1042 			npg = NULL;
   1043 		}
   1044 		if (nanon == NULL || npg == NULL) {
   1045 			amap_unlock(amap);
   1046 			if (nanon) {
   1047 				nanon->an_lock = NULL;
   1048 				nanon->an_ref--;
   1049 				KASSERT(nanon->an_ref == 0);
   1050 				uvm_anon_free(nanon);
   1051 			}
   1052 			uvm_wait("cownowpage");
   1053 			goto ReStart;
   1054 		}
   1055 
   1056 		/*
   1057 		 * Copy the data and replace anon with the new one.
   1058 		 * Also, setup its lock (share the with amap's lock).
   1059 		 */
   1060 
   1061 		uvm_pagecopy(pg, npg);
   1062 		anon->an_ref--;
   1063 		KASSERT(anon->an_ref > 0);
   1064 		amap->am_anon[slot] = nanon;
   1065 
   1066 		/*
   1067 		 * Drop PG_BUSY on new page.  Since its owner was locked all
   1068 		 * this time - it cannot be PG_RELEASED or PG_WANTED.
   1069 		 */
   1070 
   1071 		mutex_enter(&uvm_pageqlock);
   1072 		uvm_pageactivate(npg);
   1073 		mutex_exit(&uvm_pageqlock);
   1074 		npg->flags &= ~(PG_BUSY|PG_FAKE);
   1075 		UVM_PAGE_OWN(npg, NULL);
   1076 	}
   1077 	amap_unlock(amap);
   1078 }
   1079 
   1080 /*
   1081  * amap_splitref: split a single reference into two separate references
   1082  *
   1083  * => called from uvm_map's clip routines
   1084  * => origref's map should be locked
   1085  * => origref->ar_amap should be unlocked (we will lock)
   1086  */
   1087 void
   1088 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
   1089 {
   1090 	struct vm_amap *amap = origref->ar_amap;
   1091 	u_int leftslots;
   1092 
   1093 	KASSERT(splitref->ar_amap == origref->ar_amap);
   1094 	AMAP_B2SLOT(leftslots, offset);
   1095 	KASSERT(leftslots != 0);
   1096 
   1097 	amap_lock(amap);
   1098 	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
   1099 
   1100 #ifdef UVM_AMAP_PPREF
   1101 	/* Establish ppref before we add a duplicate reference to the amap. */
   1102 	if (amap->am_ppref == NULL) {
   1103 		amap_pp_establish(amap, origref->ar_pageoff);
   1104 	}
   1105 #endif
   1106 	/* Note: not a share reference. */
   1107 	amap->am_ref++;
   1108 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
   1109 	amap_unlock(amap);
   1110 }
   1111 
   1112 #ifdef UVM_AMAP_PPREF
   1113 
   1114 /*
   1115  * amap_pp_establish: add a ppref array to an amap, if possible.
   1116  *
   1117  * => amap should be locked by caller.
   1118  */
   1119 void
   1120 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
   1121 {
   1122 	const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);
   1123 
   1124 	KASSERT(mutex_owned(amap->am_lock));
   1125 
   1126 	amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
   1127 	if (amap->am_ppref == NULL) {
   1128 		/* Failure - just do not use ppref. */
   1129 		amap->am_ppref = PPREF_NONE;
   1130 		return;
   1131 	}
   1132 	pp_setreflen(amap->am_ppref, 0, 0, offset);
   1133 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
   1134 	    amap->am_nslot - offset);
   1135 }
   1136 
   1137 /*
   1138  * amap_pp_adjref: adjust reference count to a part of an amap using the
   1139  * per-page reference count array.
   1140  *
   1141  * => caller must check that ppref != PPREF_NONE before calling.
   1142  * => map and amap must be locked.
   1143  */
   1144 void
   1145 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval,
   1146     struct vm_anon **tofree)
   1147 {
   1148 	int stopslot, *ppref, lcv, prevlcv;
   1149 	int ref, len, prevref, prevlen;
   1150 
   1151 	KASSERT(mutex_owned(amap->am_lock));
   1152 
   1153 	stopslot = curslot + slotlen;
   1154 	ppref = amap->am_ppref;
   1155 	prevlcv = 0;
   1156 
   1157 	/*
   1158 	 * Advance to the correct place in the array, fragment if needed.
   1159 	 */
   1160 
   1161 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
   1162 		pp_getreflen(ppref, lcv, &ref, &len);
   1163 		if (lcv + len > curslot) {     /* goes past start? */
   1164 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
   1165 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
   1166 			len = curslot - lcv;   /* new length of entry @ lcv */
   1167 		}
   1168 		prevlcv = lcv;
   1169 	}
   1170 	if (lcv == 0) {
   1171 		/*
   1172 		 * Ensure that the "prevref == ref" test below always
   1173 		 * fails, since we are starting from the beginning of
   1174 		 * the ppref array; that is, there is no previous chunk.
   1175 		 */
   1176 		prevref = -1;
   1177 		prevlen = 0;
   1178 	} else {
   1179 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
   1180 	}
   1181 
   1182 	/*
   1183 	 * Now adjust reference counts in range.  Merge the first
   1184 	 * changed entry with the last unchanged entry if possible.
   1185 	 */
   1186 	KASSERT(lcv == curslot);
   1187 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
   1188 		pp_getreflen(ppref, lcv, &ref, &len);
   1189 		if (lcv + len > stopslot) {     /* goes past end? */
   1190 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
   1191 			pp_setreflen(ppref, stopslot, ref,
   1192 			    len - (stopslot - lcv));
   1193 			len = stopslot - lcv;
   1194 		}
   1195 		ref += adjval;
   1196 		KASSERT(ref >= 0);
   1197 		KASSERT(ref <= amap->am_ref);
   1198 		if (lcv == prevlcv + prevlen && ref == prevref) {
   1199 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
   1200 		} else {
   1201 			pp_setreflen(ppref, lcv, ref, len);
   1202 		}
   1203 		if (ref == 0) {
   1204 			amap_wiperange(amap, lcv, len, tofree);
   1205 		}
   1206 	}
   1207 }
   1208 
   1209 /*
   1210  * amap_wiperange: wipe out a range of an amap.
   1211  * Note: different from amap_wipeout because the amap is kept intact.
   1212  *
   1213  * => Both map and amap must be locked by caller.
   1214  */
   1215 void
   1216 amap_wiperange(struct vm_amap *amap, int slotoff, int slots,
   1217     struct vm_anon **tofree)
   1218 {
   1219 	u_int lcv, stop, slotend;
   1220 	bool byanon;
   1221 
   1222 	KASSERT(mutex_owned(amap->am_lock));
   1223 
   1224 	/*
   1225 	 * We can either traverse the amap by am_anon or by am_slots.
   1226 	 * Determine which way is less expensive.
   1227 	 */
   1228 
   1229 	if (slots < amap->am_nused) {
   1230 		byanon = true;
   1231 		lcv = slotoff;
   1232 		stop = slotoff + slots;
   1233 		slotend = 0;
   1234 	} else {
   1235 		byanon = false;
   1236 		lcv = 0;
   1237 		stop = amap->am_nused;
   1238 		slotend = slotoff + slots;
   1239 	}
   1240 
   1241 	while (lcv < stop) {
   1242 		struct vm_anon *anon;
   1243 		u_int curslot, ptr, last;
   1244 
   1245 		if (byanon) {
   1246 			curslot = lcv++;	/* lcv advances here */
   1247 			if (amap->am_anon[curslot] == NULL)
   1248 				continue;
   1249 		} else {
   1250 			curslot = amap->am_slots[lcv];
   1251 			if (curslot < slotoff || curslot >= slotend) {
   1252 				lcv++;		/* lcv advances here */
   1253 				continue;
   1254 			}
   1255 			stop--;	/* drop stop, since anon will be removed */
   1256 		}
   1257 		anon = amap->am_anon[curslot];
   1258 		KASSERT(anon->an_lock == amap->am_lock);
   1259 
   1260 		/*
   1261 		 * Remove anon from the amap.
   1262 		 */
   1263 
   1264 		amap->am_anon[curslot] = NULL;
   1265 		ptr = amap->am_bckptr[curslot];
   1266 		last = amap->am_nused - 1;
   1267 		if (ptr != last) {
   1268 			amap->am_slots[ptr] = amap->am_slots[last];
   1269 			amap->am_bckptr[amap->am_slots[ptr]] = ptr;
   1270 		}
   1271 		amap->am_nused--;
   1272 
   1273 		/*
   1274 		 * Drop its reference count.
   1275 		 */
   1276 
   1277 		KASSERT(anon->an_lock == amap->am_lock);
   1278 		if (--anon->an_ref == 0) {
   1279 			/*
   1280 			 * Eliminated the last reference to an anon - defer
   1281 			 * freeing as uvm_anon_freelst() will unlock the amap.
   1282 			 */
   1283 			anon->an_link = *tofree;
   1284 			*tofree = anon;
   1285 		}
   1286 	}
   1287 }
   1288 
   1289 #endif
   1290 
   1291 #if defined(VMSWAP)
   1292 
   1293 /*
   1294  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
   1295  *
   1296  * => called with swap_syscall_lock held.
   1297  * => note that we don't always traverse all anons.
   1298  *    eg. amaps being wiped out, released anons.
   1299  * => return true if failed.
   1300  */
   1301 
   1302 bool
   1303 amap_swap_off(int startslot, int endslot)
   1304 {
   1305 	struct vm_amap *am;
   1306 	struct vm_amap *am_next;
   1307 	struct vm_amap marker_prev;
   1308 	struct vm_amap marker_next;
   1309 	bool rv = false;
   1310 
   1311 #if defined(DIAGNOSTIC)
   1312 	memset(&marker_prev, 0, sizeof(marker_prev));
   1313 	memset(&marker_next, 0, sizeof(marker_next));
   1314 #endif /* defined(DIAGNOSTIC) */
   1315 
   1316 	mutex_enter(&amap_list_lock);
   1317 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
   1318 		int i;
   1319 
   1320 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
   1321 		LIST_INSERT_AFTER(am, &marker_next, am_list);
   1322 
   1323 		if (!amap_lock_try(am)) {
   1324 			mutex_exit(&amap_list_lock);
   1325 			preempt();
   1326 			mutex_enter(&amap_list_lock);
   1327 			am_next = LIST_NEXT(&marker_prev, am_list);
   1328 			if (am_next == &marker_next) {
   1329 				am_next = LIST_NEXT(am_next, am_list);
   1330 			} else {
   1331 				KASSERT(LIST_NEXT(am_next, am_list) ==
   1332 				    &marker_next);
   1333 			}
   1334 			LIST_REMOVE(&marker_prev, am_list);
   1335 			LIST_REMOVE(&marker_next, am_list);
   1336 			continue;
   1337 		}
   1338 
   1339 		mutex_exit(&amap_list_lock);
   1340 
   1341 		if (am->am_nused <= 0) {
   1342 			amap_unlock(am);
   1343 			goto next;
   1344 		}
   1345 
   1346 		for (i = 0; i < am->am_nused; i++) {
   1347 			int slot;
   1348 			int swslot;
   1349 			struct vm_anon *anon;
   1350 
   1351 			slot = am->am_slots[i];
   1352 			anon = am->am_anon[slot];
   1353 			KASSERT(anon->an_lock == am->am_lock);
   1354 
   1355 			swslot = anon->an_swslot;
   1356 			if (swslot < startslot || endslot <= swslot) {
   1357 				continue;
   1358 			}
   1359 
   1360 			am->am_flags |= AMAP_SWAPOFF;
   1361 
   1362 			rv = uvm_anon_pagein(am, anon);
   1363 			amap_lock(am);
   1364 
   1365 			am->am_flags &= ~AMAP_SWAPOFF;
   1366 			if (amap_refs(am) == 0) {
   1367 				amap_wipeout(am);
   1368 				am = NULL;
   1369 				break;
   1370 			}
   1371 			if (rv) {
   1372 				break;
   1373 			}
   1374 			i = 0;
   1375 		}
   1376 
   1377 		if (am) {
   1378 			amap_unlock(am);
   1379 		}
   1380 
   1381 next:
   1382 		mutex_enter(&amap_list_lock);
   1383 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
   1384 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
   1385 		    &marker_next);
   1386 		am_next = LIST_NEXT(&marker_next, am_list);
   1387 		LIST_REMOVE(&marker_prev, am_list);
   1388 		LIST_REMOVE(&marker_next, am_list);
   1389 	}
   1390 	mutex_exit(&amap_list_lock);
   1391 
   1392 	return rv;
   1393 }
   1394 
   1395 #endif /* defined(VMSWAP) */
   1396 
   1397 /*
   1398  * amap_lookup: look up a page in an amap.
   1399  *
   1400  * => amap should be locked by caller.
   1401  */
   1402 struct vm_anon *
   1403 amap_lookup(struct vm_aref *aref, vaddr_t offset)
   1404 {
   1405 	struct vm_amap *amap = aref->ar_amap;
   1406 	struct vm_anon *an;
   1407 	u_int slot;
   1408 
   1409 	UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
   1410 	KASSERT(mutex_owned(amap->am_lock));
   1411 
   1412 	AMAP_B2SLOT(slot, offset);
   1413 	slot += aref->ar_pageoff;
   1414 	an = amap->am_anon[slot];
   1415 
   1416 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, offset=0x%x, result=0x%x)",
   1417 	    amap, offset, an, 0);
   1418 
   1419 	KASSERT(slot < amap->am_nslot);
   1420 	KASSERT(an == NULL || an->an_ref != 0);
   1421 	KASSERT(an == NULL || an->an_lock == amap->am_lock);
   1422 	return an;
   1423 }
   1424 
   1425 /*
   1426  * amap_lookups: look up a range of pages in an amap.
   1427  *
   1428  * => amap should be locked by caller.
   1429  */
   1430 void
   1431 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
   1432     int npages)
   1433 {
   1434 	struct vm_amap *amap = aref->ar_amap;
   1435 	u_int slot;
   1436 
   1437 	UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
   1438 	KASSERT(mutex_owned(amap->am_lock));
   1439 
   1440 	AMAP_B2SLOT(slot, offset);
   1441 	slot += aref->ar_pageoff;
   1442 
   1443 	UVMHIST_LOG(maphist, "  slot=%u, npages=%d, nslot=%d",
   1444 	    slot, npages, amap->am_nslot, 0);
   1445 
   1446 	KASSERT((slot + (npages - 1)) < amap->am_nslot);
   1447 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
   1448 
   1449 #if defined(DIAGNOSTIC)
   1450 	for (int i = 0; i < npages; i++) {
   1451 		struct vm_anon * const an = anons[i];
   1452 		if (an == NULL) {
   1453 			continue;
   1454 		}
   1455 		KASSERT(an->an_ref != 0);
   1456 		KASSERT(an->an_lock == amap->am_lock);
   1457 	}
   1458 #endif
   1459 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
   1460 }
   1461 
   1462 /*
   1463  * amap_add: add (or replace) a page to an amap.
   1464  *
   1465  * => amap should be locked by caller.
   1466  * => anon must have the lock associated with this amap.
   1467  */
   1468 void
   1469 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
   1470     bool replace)
   1471 {
   1472 	struct vm_amap *amap = aref->ar_amap;
   1473 	u_int slot;
   1474 
   1475 	UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
   1476 	KASSERT(mutex_owned(amap->am_lock));
   1477 	KASSERT(anon->an_lock == amap->am_lock);
   1478 
   1479 	AMAP_B2SLOT(slot, offset);
   1480 	slot += aref->ar_pageoff;
   1481 	KASSERT(slot < amap->am_nslot);
   1482 
   1483 	if (replace) {
   1484 		struct vm_anon *oanon = amap->am_anon[slot];
   1485 
   1486 		KASSERT(oanon != NULL);
   1487 		if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
   1488 			pmap_page_protect(oanon->an_page, VM_PROT_NONE);
   1489 			/*
   1490 			 * XXX: suppose page is supposed to be wired somewhere?
   1491 			 */
   1492 		}
   1493 	} else {
   1494 		KASSERT(amap->am_anon[slot] == NULL);
   1495 		KASSERT(amap->am_nused < amap->am_maxslot);
   1496 		amap->am_bckptr[slot] = amap->am_nused;
   1497 		amap->am_slots[amap->am_nused] = slot;
   1498 		amap->am_nused++;
   1499 	}
   1500 	amap->am_anon[slot] = anon;
   1501 	UVMHIST_LOG(maphist,
   1502 	    "<- done (amap=0x%x, offset=0x%x, anon=0x%x, rep=%d)",
   1503 	    amap, offset, anon, replace);
   1504 }
   1505 
   1506 /*
   1507  * amap_unadd: remove a page from an amap.
   1508  *
   1509  * => amap should be locked by caller.
   1510  */
   1511 void
   1512 amap_unadd(struct vm_aref *aref, vaddr_t offset)
   1513 {
   1514 	struct vm_amap *amap = aref->ar_amap;
   1515 	u_int slot, ptr, last;
   1516 
   1517 	UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
   1518 	KASSERT(mutex_owned(amap->am_lock));
   1519 
   1520 	AMAP_B2SLOT(slot, offset);
   1521 	slot += aref->ar_pageoff;
   1522 	KASSERT(slot < amap->am_nslot);
   1523 	KASSERT(amap->am_anon[slot] != NULL);
   1524 	KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);
   1525 
   1526 	amap->am_anon[slot] = NULL;
   1527 	ptr = amap->am_bckptr[slot];
   1528 
   1529 	last = amap->am_nused - 1;
   1530 	if (ptr != last) {
   1531 		/* Move the last entry to keep the slots contiguous. */
   1532 		amap->am_slots[ptr] = amap->am_slots[last];
   1533 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;
   1534 	}
   1535 	amap->am_nused--;
   1536 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, slot=0x%x)", amap, slot,0, 0);
   1537 }
   1538 
   1539 /*
   1540  * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
   1541  */
   1542 static void
   1543 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
   1544     int refv, bool all)
   1545 {
   1546 	struct vm_anon *tofree = NULL;
   1547 
   1548 #ifdef UVM_AMAP_PPREF
   1549 	KASSERT(mutex_owned(amap->am_lock));
   1550 
   1551 	/*
   1552 	 * We must establish the ppref array before changing am_ref
   1553 	 * so that the ppref values match the current amap refcount.
   1554 	 */
   1555 
   1556 	if (amap->am_ppref == NULL && !all && len != amap->am_nslot) {
   1557 		amap_pp_establish(amap, offset);
   1558 	}
   1559 #endif
   1560 
   1561 	amap->am_ref += refv;
   1562 
   1563 #ifdef UVM_AMAP_PPREF
   1564 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
   1565 		if (all) {
   1566 			amap_pp_adjref(amap, 0, amap->am_nslot, refv, &tofree);
   1567 		} else {
   1568 			amap_pp_adjref(amap, offset, len, refv, &tofree);
   1569 		}
   1570 	}
   1571 #endif
   1572 	uvm_anon_freelst(amap, tofree);
   1573 }
   1574 
   1575 /*
   1576  * amap_ref: gain a reference to an amap.
   1577  *
   1578  * => amap must not be locked (we will lock).
   1579  * => "offset" and "len" are in units of pages.
   1580  * => Called at fork time to gain the child's reference.
   1581  */
   1582 void
   1583 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
   1584 {
   1585 	UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
   1586 
   1587 	amap_lock(amap);
   1588 	if (flags & AMAP_SHARED) {
   1589 		amap->am_flags |= AMAP_SHARED;
   1590 	}
   1591 	amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
   1592 
   1593 	UVMHIST_LOG(maphist,"<- done!  amap=0x%x", amap, 0, 0, 0);
   1594 }
   1595 
   1596 /*
   1597  * amap_unref: remove a reference to an amap.
   1598  *
   1599  * => All pmap-level references to this amap must be already removed.
   1600  * => Called from uvm_unmap_detach(); entry is already removed from the map.
   1601  * => We will lock amap, so it must be unlocked.
   1602  */
   1603 void
   1604 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
   1605 {
   1606 	UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
   1607 
   1608 	amap_lock(amap);
   1609 
   1610 	UVMHIST_LOG(maphist,"  amap=0x%x  refs=%d, nused=%d",
   1611 	    amap, amap->am_ref, amap->am_nused, 0);
   1612 	KASSERT(amap->am_ref > 0);
   1613 
   1614 	if (amap->am_ref == 1) {
   1615 
   1616 		/*
   1617 		 * If the last reference - wipeout and destroy the amap.
   1618 		 */
   1619 		amap->am_ref--;
   1620 		amap_wipeout(amap);
   1621 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
   1622 		return;
   1623 	}
   1624 
   1625 	/*
   1626 	 * Otherwise, drop the reference count(s) on anons.
   1627 	 */
   1628 
   1629 	if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
   1630 		amap->am_flags &= ~AMAP_SHARED;
   1631 	}
   1632 	amap_adjref_anons(amap, offset, len, -1, all);
   1633 
   1634 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
   1635 }
   1636