Home | History | Annotate | Line # | Download | only in uvm
      1 /*	$NetBSD: uvm_amap.c,v 1.129 2023/09/10 14:54:34 ad Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 /*
     29  * uvm_amap.c: amap operations
     30  */
     31 
     32 /*
     33  * this file contains functions that perform operations on amaps.  see
     34  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
     35  */
     36 
     37 #include <sys/cdefs.h>
     38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.129 2023/09/10 14:54:34 ad Exp $");
     39 
     40 #include "opt_uvmhist.h"
     41 
     42 #include <sys/param.h>
     43 #include <sys/systm.h>
     44 #include <sys/kernel.h>
     45 #include <sys/kmem.h>
     46 #include <sys/pool.h>
     47 #include <sys/atomic.h>
     48 
     49 #include <uvm/uvm.h>
     50 #include <uvm/uvm_swap.h>
     51 
     52 /*
     53  * cache for allocation of vm_map structures.  note that in order to
     54  * avoid an endless loop, the amap cache's allocator cannot allocate
     55  * memory from an amap (it currently goes through the kernel uobj, so
     56  * we are ok).
     57  */
     58 static struct pool_cache uvm_amap_cache;
     59 static kmutex_t amap_list_lock __cacheline_aligned;
     60 static LIST_HEAD(, vm_amap) amap_list;
     61 
     62 /*
     63  * local functions
     64  */
     65 
     66 static int
     67 amap_roundup_slots(int slots)
     68 {
     69 
     70 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
     71 }
     72 
     73 #ifdef UVM_AMAP_PPREF
     74 /*
     75  * what is ppref?   ppref is an _optional_ amap feature which is used
     76  * to keep track of reference counts on a per-page basis.  it is enabled
     77  * when UVM_AMAP_PPREF is defined.
     78  *
     79  * when enabled, an array of ints is allocated for the pprefs.  this
     80  * array is allocated only when a partial reference is added to the
     81  * map (either by unmapping part of the amap, or gaining a reference
     82  * to only a part of an amap).  if the allocation of the array fails
     83  * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
     84  * that we tried to do ppref's but couldn't alloc the array so just
     85  * give up (after all, this is an optional feature!).
     86  *
     87  * the array is divided into page sized "chunks."   for chunks of length 1,
     88  * the chunk reference count plus one is stored in that chunk's slot.
     89  * for chunks of length > 1 the first slot contains (the reference count
     90  * plus one) * -1.    [the negative value indicates that the length is
     91  * greater than one.]   the second slot of the chunk contains the length
     92  * of the chunk.   here is an example:
     93  *
     94  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
     95  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
     96  *              <----------><-><----><-------><----><-><------->
     97  * (x = don't care)
     98  *
     99  * this allows us to allow one int to contain the ref count for the whole
    100  * chunk.    note that the "plus one" part is needed because a reference
    101  * count of zero is neither positive or negative (need a way to tell
    102  * if we've got one zero or a bunch of them).
    103  *
    104  * here are some in-line functions to help us.
    105  */
    106 
    107 /*
    108  * pp_getreflen: get the reference and length for a specific offset
    109  *
    110  * => ppref's amap must be locked
    111  */
    112 static inline void
    113 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
    114 {
    115 
    116 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
    117 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
    118 		*lenp = 1;
    119 	} else {
    120 		*refp = (ppref[offset] * -1) - 1;
    121 		*lenp = ppref[offset+1];
    122 	}
    123 }
    124 
    125 /*
    126  * pp_setreflen: set the reference and length for a specific offset
    127  *
    128  * => ppref's amap must be locked
    129  */
    130 static inline void
    131 pp_setreflen(int *ppref, int offset, int ref, int len)
    132 {
    133 	if (len == 0)
    134 		return;
    135 	if (len == 1) {
    136 		ppref[offset] = ref + 1;
    137 	} else {
    138 		ppref[offset] = (ref + 1) * -1;
    139 		ppref[offset+1] = len;
    140 	}
    141 }
    142 #endif /* UVM_AMAP_PPREF */
    143 
    144 /*
    145  * amap_alloc1: allocate an amap, but do not initialise the overlay.
    146  *
    147  * => Note: lock is not set.
    148  */
    149 static struct vm_amap *
    150 amap_alloc1(int slots, int padslots, int flags)
    151 {
    152 	const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
    153 	const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
    154 	struct vm_amap *amap;
    155 	krwlock_t *newlock, *oldlock;
    156 	int totalslots;
    157 
    158 	amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
    159 	if (amap == NULL) {
    160 		return NULL;
    161 	}
    162 	KASSERT(amap->am_lock != NULL);
    163 	KASSERT(amap->am_nused == 0);
    164 
    165 	/* Try to privatize the lock if currently shared. */
    166 	if (rw_obj_refcnt(amap->am_lock) > 1) {
    167 		newlock = rw_obj_tryalloc();
    168 		if (newlock != NULL) {
    169 		    	oldlock = amap->am_lock;
    170 		    	mutex_enter(&amap_list_lock);
    171 		    	amap->am_lock = newlock;
    172 		    	mutex_exit(&amap_list_lock);
    173 		    	rw_obj_free(oldlock);
    174 		}
    175 	}
    176 
    177 	totalslots = amap_roundup_slots(slots + padslots);
    178 	amap->am_ref = 1;
    179 	amap->am_flags = 0;
    180 #ifdef UVM_AMAP_PPREF
    181 	amap->am_ppref = NULL;
    182 #endif
    183 	amap->am_maxslot = totalslots;
    184 	amap->am_nslot = slots;
    185 
    186 	/*
    187 	 * Note: since allocations are likely big, we expect to reduce the
    188 	 * memory fragmentation by allocating them in separate blocks.
    189 	 */
    190 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
    191 	if (amap->am_slots == NULL)
    192 		goto fail1;
    193 
    194 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
    195 	if (amap->am_bckptr == NULL)
    196 		goto fail2;
    197 
    198 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
    199 	    kmflags);
    200 	if (amap->am_anon == NULL)
    201 		goto fail3;
    202 
    203 	return amap;
    204 
    205 fail3:
    206 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
    207 fail2:
    208 	kmem_free(amap->am_slots, totalslots * sizeof(int));
    209 fail1:
    210 	pool_cache_put(&uvm_amap_cache, amap);
    211 
    212 	/*
    213 	 * XXX hack to tell the pagedaemon how many pages we need,
    214 	 * since we can need more than it would normally free.
    215 	 */
    216 	if (nowait) {
    217 		extern u_int uvm_extrapages;
    218 		atomic_add_int(&uvm_extrapages,
    219 		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
    220 		    totalslots) >> PAGE_SHIFT);
    221 	}
    222 	return NULL;
    223 }
    224 
    225 /*
    226  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
    227  *
    228  * => caller should ensure sz is a multiple of PAGE_SIZE
    229  * => reference count to new amap is set to one
    230  * => new amap is returned unlocked
    231  */
    232 
    233 struct vm_amap *
    234 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
    235 {
    236 	struct vm_amap *amap;
    237 	int slots, padslots;
    238 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
    239 
    240 	AMAP_B2SLOT(slots, sz);
    241 	AMAP_B2SLOT(padslots, padsz);
    242 
    243 	amap = amap_alloc1(slots, padslots, waitf);
    244 	if (amap) {
    245 		memset(amap->am_anon, 0,
    246 		    amap->am_maxslot * sizeof(struct vm_anon *));
    247 	}
    248 
    249 	UVMHIST_LOG(maphist,"<- done, amap = %#jx, sz=%jd", (uintptr_t)amap,
    250 	    sz, 0, 0);
    251 	return(amap);
    252 }
    253 
    254 /*
    255  * amap_ctor: pool_cache constructor for new amaps
    256  *
    257  * => carefully synchronize with amap_swap_off()
    258  */
    259 static int
    260 amap_ctor(void *arg, void *obj, int flags)
    261 {
    262 	struct vm_amap *amap = obj;
    263 
    264 	if ((flags & PR_NOWAIT) != 0) {
    265 		amap->am_lock = rw_obj_tryalloc();
    266 		if (amap->am_lock == NULL) {
    267 			return ENOMEM;
    268 		}
    269 	} else {
    270 		amap->am_lock = rw_obj_alloc();
    271 	}
    272 	amap->am_nused = 0;
    273 	amap->am_flags = 0;
    274 
    275 	mutex_enter(&amap_list_lock);
    276 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
    277 	mutex_exit(&amap_list_lock);
    278 	return 0;
    279 }
    280 
    281 /*
    282  * amap_ctor: pool_cache destructor for amaps
    283  *
    284  * => carefully synchronize with amap_swap_off()
    285  */
    286 static void
    287 amap_dtor(void *arg, void *obj)
    288 {
    289 	struct vm_amap *amap = obj;
    290 
    291 	KASSERT(amap->am_nused == 0);
    292 
    293 	mutex_enter(&amap_list_lock);
    294 	LIST_REMOVE(amap, am_list);
    295 	mutex_exit(&amap_list_lock);
    296 	rw_obj_free(amap->am_lock);
    297 }
    298 
    299 /*
    300  * uvm_amap_init: initialize the amap system.
    301  */
    302 void
    303 uvm_amap_init(void)
    304 {
    305 
    306 	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
    307 
    308 	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap),
    309 	    COHERENCY_UNIT, 0, 0, "amappl", NULL, IPL_NONE,
    310 	    amap_ctor, amap_dtor, NULL);
    311 }
    312 
    313 /*
    314  * amap_free: free an amap
    315  *
    316  * => the amap must be unlocked
    317  * => the amap should have a zero reference count and be empty
    318  */
    319 void
    320 amap_free(struct vm_amap *amap)
    321 {
    322 	int slots;
    323 
    324 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
    325 
    326 	KASSERT(amap->am_ref == 0);
    327 	KASSERT(amap->am_nused == 0);
    328 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
    329 	slots = amap->am_maxslot;
    330 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
    331 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
    332 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
    333 #ifdef UVM_AMAP_PPREF
    334 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
    335 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
    336 #endif
    337 	pool_cache_put(&uvm_amap_cache, amap);
    338 	UVMHIST_LOG(maphist,"<- done, freed amap = %#jx", (uintptr_t)amap,
    339 	    0, 0, 0);
    340 }
    341 
    342 /*
    343  * amap_extend: extend the size of an amap (if needed)
    344  *
    345  * => called from uvm_map when we want to extend an amap to cover
    346  *    a new mapping (rather than allocate a new one)
    347  * => amap should be unlocked (we will lock it)
    348  * => to safely extend an amap it should have a reference count of
    349  *    one (thus it can't be shared)
    350  */
    351 int
    352 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
    353 {
    354 	struct vm_amap *amap = entry->aref.ar_amap;
    355 	int slotoff = entry->aref.ar_pageoff;
    356 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
    357 	int slotadj, slotarea, slotendoff;
    358 	int oldnslots;
    359 #ifdef UVM_AMAP_PPREF
    360 	int *newppref, *oldppref;
    361 #endif
    362 	int i, *newsl, *newbck, *oldsl, *oldbck;
    363 	struct vm_anon **newover, **oldover;
    364 	const km_flag_t kmflags =
    365 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
    366 
    367 	UVMHIST_FUNC(__func__);
    368 	UVMHIST_CALLARGS(maphist, "  (entry=%#jx, addsize=%#jx, flags=%#jx)",
    369 	    (uintptr_t)entry, addsize, flags, 0);
    370 
    371 	/*
    372 	 * first, determine how many slots we need in the amap.  don't
    373 	 * forget that ar_pageoff could be non-zero: this means that
    374 	 * there are some unused slots before us in the amap.
    375 	 */
    376 
    377 	amap_lock(amap, RW_WRITER);
    378 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
    379 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
    380 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
    381 	if (flags & AMAP_EXTEND_FORWARDS) {
    382 		slotneed = slotoff + slotmapped + slotadd;
    383 		slotadj = 0;
    384 		slotarea = 0;
    385 	} else {
    386 		slotneed = slotadd + slotmapped;
    387 		slotadj = slotadd - slotoff;
    388 		slotarea = amap->am_maxslot - slotmapped;
    389 	}
    390 
    391 	/*
    392 	 * Because this amap only has 1 ref, we know that there is
    393 	 * only one vm_map_entry pointing to it, and the one entry is
    394 	 * using slots between slotoff and slotoff + slotmapped.  If
    395 	 * we have been using ppref then we know that only slots in
    396 	 * the one map entry's range can have anons, since ppref
    397 	 * allowed us to free any anons outside that range as other map
    398 	 * entries which used this amap were removed. But without ppref,
    399 	 * we couldn't know which slots were still needed by other map
    400 	 * entries, so we couldn't free any anons as we removed map
    401 	 * entries, and so any slot from 0 to am_nslot can have an
    402 	 * anon.  But now that we know there is only one map entry
    403 	 * left and we know its range, we can free up any anons
    404 	 * outside that range.  This is necessary because the rest of
    405 	 * this function assumes that there are no anons in the amap
    406 	 * outside of the one map entry's range.
    407 	 */
    408 
    409 	slotendoff = slotoff + slotmapped;
    410 	if (amap->am_ppref == PPREF_NONE) {
    411 		amap_wiperange(amap, 0, slotoff);
    412 		amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff);
    413 	}
    414 	for (i = 0; i < slotoff; i++) {
    415 		KASSERT(amap->am_anon[i] == NULL);
    416 	}
    417 	for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) {
    418 		KASSERT(amap->am_anon[i] == NULL);
    419 	}
    420 
    421 	/*
    422 	 * case 1: we already have enough slots in the map and thus
    423 	 * only need to bump the reference counts on the slots we are
    424 	 * adding.
    425 	 */
    426 
    427 	if (flags & AMAP_EXTEND_FORWARDS) {
    428 		if (amap->am_nslot >= slotneed) {
    429 #ifdef UVM_AMAP_PPREF
    430 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    431 				amap_pp_adjref(amap, slotoff + slotmapped,
    432 				    slotadd, 1);
    433 			}
    434 #endif
    435 			amap_unlock(amap);
    436 			UVMHIST_LOG(maphist,
    437 			    "<- done (case 1f), amap = %#jx, sltneed=%jd",
    438 			    (uintptr_t)amap, slotneed, 0, 0);
    439 			return 0;
    440 		}
    441 	} else {
    442 		if (slotadj <= 0) {
    443 			slotoff -= slotadd;
    444 			entry->aref.ar_pageoff = slotoff;
    445 #ifdef UVM_AMAP_PPREF
    446 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    447 				amap_pp_adjref(amap, slotoff, slotadd, 1);
    448 			}
    449 #endif
    450 			amap_unlock(amap);
    451 			UVMHIST_LOG(maphist,
    452 			    "<- done (case 1b), amap = %#jx, sltneed=%jd",
    453 			    (uintptr_t)amap, slotneed, 0, 0);
    454 			return 0;
    455 		}
    456 	}
    457 
    458 	/*
    459 	 * case 2: we pre-allocated slots for use and we just need to
    460 	 * bump nslot up to take account for these slots.
    461 	 */
    462 
    463 	if (amap->am_maxslot >= slotneed) {
    464 		if (flags & AMAP_EXTEND_FORWARDS) {
    465 #ifdef UVM_AMAP_PPREF
    466 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    467 				if ((slotoff + slotmapped) < amap->am_nslot)
    468 					amap_pp_adjref(amap,
    469 					    slotoff + slotmapped,
    470 					    (amap->am_nslot -
    471 					    (slotoff + slotmapped)), 1);
    472 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
    473 				    slotneed - amap->am_nslot);
    474 			}
    475 #endif
    476 			amap->am_nslot = slotneed;
    477 			amap_unlock(amap);
    478 
    479 			/*
    480 			 * no need to zero am_anon since that was done at
    481 			 * alloc time and we never shrink an allocation.
    482 			 */
    483 
    484 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, "
    485 			    "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
    486 			return 0;
    487 		} else {
    488 #ifdef UVM_AMAP_PPREF
    489 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    490 				/*
    491 				 * Slide up the ref counts on the pages that
    492 				 * are actually in use.
    493 				 */
    494 				memmove(amap->am_ppref + slotarea,
    495 				    amap->am_ppref + slotoff,
    496 				    slotmapped * sizeof(int));
    497 				/*
    498 				 * Mark the (adjusted) gap at the front as
    499 				 * referenced/not referenced.
    500 				 */
    501 				pp_setreflen(amap->am_ppref,
    502 				    0, 0, slotarea - slotadd);
    503 				pp_setreflen(amap->am_ppref,
    504 				    slotarea - slotadd, 1, slotadd);
    505 			}
    506 #endif
    507 
    508 			/*
    509 			 * Slide the anon pointers up and clear out
    510 			 * the space we just made.
    511 			 */
    512 			memmove(amap->am_anon + slotarea,
    513 			    amap->am_anon + slotoff,
    514 			    slotmapped * sizeof(struct vm_anon*));
    515 			memset(amap->am_anon + slotoff, 0,
    516 			    (slotarea - slotoff) * sizeof(struct vm_anon *));
    517 
    518 			/*
    519 			 * Slide the backpointers up, but don't bother
    520 			 * wiping out the old slots.
    521 			 */
    522 			memmove(amap->am_bckptr + slotarea,
    523 			    amap->am_bckptr + slotoff,
    524 			    slotmapped * sizeof(int));
    525 
    526 			/*
    527 			 * Adjust all the useful active slot numbers.
    528 			 */
    529 			for (i = 0; i < amap->am_nused; i++)
    530 				amap->am_slots[i] += (slotarea - slotoff);
    531 
    532 			/*
    533 			 * We just filled all the empty space in the
    534 			 * front of the amap by activating a few new
    535 			 * slots.
    536 			 */
    537 			amap->am_nslot = amap->am_maxslot;
    538 			entry->aref.ar_pageoff = slotarea - slotadd;
    539 			amap_unlock(amap);
    540 
    541 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = %#jx, "
    542 			    "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
    543 			return 0;
    544 		}
    545 	}
    546 
    547 	/*
    548 	 * Case 3: we need to allocate a new amap and copy all the amap
    549 	 * data over from old amap to the new one.  Drop the lock before
    550 	 * performing allocation.
    551 	 *
    552 	 * Note: since allocations are likely big, we expect to reduce the
    553 	 * memory fragmentation by allocating them in separate blocks.
    554 	 */
    555 
    556 	amap_unlock(amap);
    557 
    558 	if (slotneed >= UVM_AMAP_LARGE) {
    559 		return E2BIG;
    560 	}
    561 
    562 	slotalloc = amap_roundup_slots(slotneed);
    563 #ifdef UVM_AMAP_PPREF
    564 	newppref = NULL;
    565 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    566 		/* Will be handled later if fails. */
    567 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
    568 	}
    569 #endif
    570 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
    571 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
    572 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
    573 	if (newsl == NULL || newbck == NULL || newover == NULL) {
    574 #ifdef UVM_AMAP_PPREF
    575 		if (newppref != NULL) {
    576 			kmem_free(newppref, slotalloc * sizeof(*newppref));
    577 		}
    578 #endif
    579 		if (newsl != NULL) {
    580 			kmem_free(newsl, slotalloc * sizeof(*newsl));
    581 		}
    582 		if (newbck != NULL) {
    583 			kmem_free(newbck, slotalloc * sizeof(*newbck));
    584 		}
    585 		if (newover != NULL) {
    586 			kmem_free(newover, slotalloc * sizeof(*newover));
    587 		}
    588 		return ENOMEM;
    589 	}
    590 	amap_lock(amap, RW_WRITER);
    591 	KASSERT(amap->am_maxslot < slotneed);
    592 
    593 	/*
    594 	 * Copy everything over to new allocated areas.
    595 	 */
    596 
    597 	slotadded = slotalloc - amap->am_nslot;
    598 	if (!(flags & AMAP_EXTEND_FORWARDS))
    599 		slotarea = slotalloc - slotmapped;
    600 
    601 	/* do am_slots */
    602 	oldsl = amap->am_slots;
    603 	if (flags & AMAP_EXTEND_FORWARDS)
    604 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
    605 	else
    606 		for (i = 0; i < amap->am_nused; i++)
    607 			newsl[i] = oldsl[i] + slotarea - slotoff;
    608 	amap->am_slots = newsl;
    609 
    610 	/* do am_anon */
    611 	oldover = amap->am_anon;
    612 	if (flags & AMAP_EXTEND_FORWARDS) {
    613 		memcpy(newover, oldover,
    614 		    sizeof(struct vm_anon *) * amap->am_nslot);
    615 		memset(newover + amap->am_nslot, 0,
    616 		    sizeof(struct vm_anon *) * slotadded);
    617 	} else {
    618 		memcpy(newover + slotarea, oldover + slotoff,
    619 		    sizeof(struct vm_anon *) * slotmapped);
    620 		memset(newover, 0,
    621 		    sizeof(struct vm_anon *) * slotarea);
    622 	}
    623 	amap->am_anon = newover;
    624 
    625 	/* do am_bckptr */
    626 	oldbck = amap->am_bckptr;
    627 	if (flags & AMAP_EXTEND_FORWARDS)
    628 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
    629 	else
    630 		memcpy(newbck + slotarea, oldbck + slotoff,
    631 		    sizeof(int) * slotmapped);
    632 	amap->am_bckptr = newbck;
    633 
    634 #ifdef UVM_AMAP_PPREF
    635 	/* do ppref */
    636 	oldppref = amap->am_ppref;
    637 	if (newppref) {
    638 		if (flags & AMAP_EXTEND_FORWARDS) {
    639 			memcpy(newppref, oldppref,
    640 			    sizeof(int) * amap->am_nslot);
    641 			memset(newppref + amap->am_nslot, 0,
    642 			    sizeof(int) * slotadded);
    643 		} else {
    644 			memcpy(newppref + slotarea, oldppref + slotoff,
    645 			    sizeof(int) * slotmapped);
    646 		}
    647 		amap->am_ppref = newppref;
    648 		if ((flags & AMAP_EXTEND_FORWARDS) &&
    649 		    (slotoff + slotmapped) < amap->am_nslot)
    650 			amap_pp_adjref(amap, slotoff + slotmapped,
    651 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
    652 		if (flags & AMAP_EXTEND_FORWARDS)
    653 			pp_setreflen(newppref, amap->am_nslot, 1,
    654 			    slotneed - amap->am_nslot);
    655 		else {
    656 			pp_setreflen(newppref, 0, 0,
    657 			    slotalloc - slotneed);
    658 			pp_setreflen(newppref, slotalloc - slotneed, 1,
    659 			    slotneed - slotmapped);
    660 		}
    661 	} else {
    662 		if (amap->am_ppref)
    663 			amap->am_ppref = PPREF_NONE;
    664 	}
    665 #endif
    666 
    667 	/* update master values */
    668 	if (flags & AMAP_EXTEND_FORWARDS)
    669 		amap->am_nslot = slotneed;
    670 	else {
    671 		entry->aref.ar_pageoff = slotarea - slotadd;
    672 		amap->am_nslot = slotalloc;
    673 	}
    674 	oldnslots = amap->am_maxslot;
    675 	amap->am_maxslot = slotalloc;
    676 	amap_unlock(amap);
    677 
    678 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
    679 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
    680 	kmem_free(oldover, oldnslots * sizeof(*oldover));
    681 #ifdef UVM_AMAP_PPREF
    682 	if (oldppref && oldppref != PPREF_NONE)
    683 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
    684 #endif
    685 	UVMHIST_LOG(maphist,"<- done (case 3), amap = %#jx, slotneed=%jd",
    686 	    (uintptr_t)amap, slotneed, 0, 0);
    687 	return 0;
    688 }
    689 
    690 /*
    691  * amap_share_protect: change protection of anons in a shared amap
    692  *
    693  * for shared amaps, given the current data structure layout, it is
    694  * not possible for us to directly locate all maps referencing the
    695  * shared anon (to change the protection).  in order to protect data
    696  * in shared maps we use pmap_page_protect().  [this is useful for IPC
    697  * mechanisms like map entry passing that may want to write-protect
    698  * all mappings of a shared amap.]  we traverse am_anon or am_slots
    699  * depending on the current state of the amap.
    700  *
    701  * => entry's map and amap must be locked by the caller
    702  */
    703 void
    704 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
    705 {
    706 	struct vm_amap *amap = entry->aref.ar_amap;
    707 	u_int slots, lcv, slot, stop;
    708 	struct vm_anon *anon;
    709 
    710 	KASSERT(rw_write_held(amap->am_lock));
    711 
    712 	AMAP_B2SLOT(slots, (entry->end - entry->start));
    713 	stop = entry->aref.ar_pageoff + slots;
    714 
    715 	if (slots < amap->am_nused) {
    716 		/*
    717 		 * Cheaper to traverse am_anon.
    718 		 */
    719 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
    720 			anon = amap->am_anon[lcv];
    721 			if (anon == NULL) {
    722 				continue;
    723 			}
    724 			if (anon->an_page) {
    725 				pmap_page_protect(anon->an_page, prot);
    726 			}
    727 		}
    728 		return;
    729 	}
    730 
    731 	/*
    732 	 * Cheaper to traverse am_slots.
    733 	 */
    734 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
    735 		slot = amap->am_slots[lcv];
    736 		if (slot < entry->aref.ar_pageoff || slot >= stop) {
    737 			continue;
    738 		}
    739 		anon = amap->am_anon[slot];
    740 		if (anon->an_page) {
    741 			pmap_page_protect(anon->an_page, prot);
    742 		}
    743 	}
    744 }
    745 
    746 /*
    747  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
    748  *
    749  * => Called from amap_unref(), when reference count drops to zero.
    750  * => amap must be locked.
    751  */
    752 
    753 void
    754 amap_wipeout(struct vm_amap *amap)
    755 {
    756 	u_int lcv;
    757 
    758 	UVMHIST_FUNC(__func__);
    759 	UVMHIST_CALLARGS(maphist,"(amap=%#jx)", (uintptr_t)amap, 0,0,0);
    760 
    761 	KASSERT(rw_write_held(amap->am_lock));
    762 	KASSERT(amap->am_ref == 0);
    763 
    764 	if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
    765 		/*
    766 		 * Note: amap_swap_off() will call us again.
    767 		 */
    768 		amap_unlock(amap);
    769 		return;
    770 	}
    771 
    772 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
    773 		struct vm_anon *anon;
    774 		u_int slot;
    775 
    776 		slot = amap->am_slots[lcv];
    777 		anon = amap->am_anon[slot];
    778 		KASSERT(anon != NULL);
    779 		KASSERT(anon->an_ref != 0);
    780 
    781 		KASSERT(anon->an_lock == amap->am_lock);
    782 		UVMHIST_LOG(maphist,"  processing anon %#jx, ref=%jd",
    783 		    (uintptr_t)anon, anon->an_ref, 0, 0);
    784 
    785 		/*
    786 		 * Drop the reference.
    787 		 */
    788 
    789 		if (__predict_true(--anon->an_ref == 0)) {
    790 			uvm_anfree(anon);
    791 		}
    792 		if (__predict_false((lcv & 31) == 31)) {
    793 			preempt_point();
    794 		}
    795 	}
    796 
    797 	/*
    798 	 * Finally, destroy the amap.
    799 	 */
    800 
    801 	amap->am_nused = 0;
    802 	amap_unlock(amap);
    803 	amap_free(amap);
    804 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
    805 }
    806 
    807 /*
    808  * amap_copy: ensure that a map entry's "needs_copy" flag is false
    809  *	by copying the amap if necessary.
    810  *
    811  * => an entry with a null amap pointer will get a new (blank) one.
    812  * => the map that the map entry belongs to must be locked by caller.
    813  * => the amap currently attached to "entry" (if any) must be unlocked.
    814  * => if canchunk is true, then we may clip the entry into a chunk
    815  * => "startva" and "endva" are used only if canchunk is true.  they are
    816  *     used to limit chunking (e.g. if you have a large space that you
    817  *     know you are going to need to allocate amaps for, there is no point
    818  *     in allowing that to be chunked)
    819  */
    820 
    821 void
    822 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
    823     vaddr_t startva, vaddr_t endva)
    824 {
    825 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
    826 	struct vm_amap *amap, *srcamap;
    827 	u_int slots, lcv;
    828 	krwlock_t *oldlock;
    829 	vsize_t len;
    830 
    831 	UVMHIST_FUNC(__func__);
    832 	UVMHIST_CALLARGS(maphist, "  (map=%#jx, entry=%#jx, flags=%#jx)",
    833 	    (uintptr_t)map, (uintptr_t)entry, flags, -2);
    834 
    835 	KASSERT(map != kernel_map);	/* we use nointr pool */
    836 
    837 	srcamap = entry->aref.ar_amap;
    838 	len = entry->end - entry->start;
    839 
    840 	/*
    841 	 * Is there an amap to copy?  If not, create one.
    842 	 */
    843 
    844 	if (srcamap == NULL) {
    845 		const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
    846 
    847 		/*
    848 		 * Check to see if we have a large amap that we can
    849 		 * chunk.  We align startva/endva to chunk-sized
    850 		 * boundaries and then clip to them.
    851 		 */
    852 
    853 		if (canchunk && atop(len) >= UVM_AMAP_LARGE) {
    854 			vsize_t chunksize;
    855 
    856 			/* Convert slots to bytes. */
    857 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
    858 			startva = (startva / chunksize) * chunksize;
    859 			endva = roundup(endva, chunksize);
    860 			UVMHIST_LOG(maphist,
    861 			    "  chunk amap ==> clip %#jx->%#jx to %#jx->%#jx",
    862 			    entry->start, entry->end, startva, endva);
    863 			UVM_MAP_CLIP_START(map, entry, startva);
    864 
    865 			/* Watch out for endva wrap-around! */
    866 			if (endva >= startva) {
    867 				UVM_MAP_CLIP_END(map, entry, endva);
    868 			}
    869 		}
    870 
    871 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
    872 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
    873 			return;
    874 		}
    875 
    876 		UVMHIST_LOG(maphist, "<- done [creating new amap %#jx->%#jx]",
    877 		    entry->start, entry->end, 0, 0);
    878 
    879 		/*
    880 		 * Allocate an initialised amap and install it.
    881 		 * Note: we must update the length after clipping.
    882 		 */
    883 		len = entry->end - entry->start;
    884 		entry->aref.ar_pageoff = 0;
    885 		entry->aref.ar_amap = amap_alloc(len, 0, waitf);
    886 		if (entry->aref.ar_amap != NULL) {
    887 			entry->etype &= ~UVM_ET_NEEDSCOPY;
    888 		}
    889 		return;
    890 	}
    891 
    892 	/*
    893 	 * First check and see if we are the only map entry referencing
    894 	 * he amap we currently have.  If so, then just take it over instead
    895 	 * of copying it.  Note that we are reading am_ref without lock held
    896 	 * as the value can only be one if we have the only reference
    897 	 * to the amap (via our locked map).  If the value is greater than
    898 	 * one, then allocate amap and re-check the value.
    899 	 */
    900 
    901 	if (srcamap->am_ref == 1) {
    902 		entry->etype &= ~UVM_ET_NEEDSCOPY;
    903 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
    904 		    0, 0, 0, 0);
    905 		return;
    906 	}
    907 
    908 	UVMHIST_LOG(maphist,"  amap=%#jx, ref=%jd, must copy it",
    909 	    (uintptr_t)srcamap, srcamap->am_ref, 0, 0);
    910 
    911 	/*
    912 	 * Allocate a new amap (note: not initialised, etc).
    913 	 */
    914 
    915 	AMAP_B2SLOT(slots, len);
    916 	amap = amap_alloc1(slots, 0, waitf);
    917 	if (amap == NULL) {
    918 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
    919 		return;
    920 	}
    921 
    922 	/*
    923 	 * Make the new amap share the source amap's lock, and then lock
    924 	 * both.  We must do this before we set am_nused != 0, otherwise
    925 	 * amap_swap_off() can become interested in the amap.
    926 	 */
    927 
    928 	oldlock = amap->am_lock;
    929 	mutex_enter(&amap_list_lock);
    930 	amap->am_lock = srcamap->am_lock;
    931 	mutex_exit(&amap_list_lock);
    932 	rw_obj_hold(amap->am_lock);
    933 	rw_obj_free(oldlock);
    934 
    935 	amap_lock(srcamap, RW_WRITER);
    936 
    937 	/*
    938 	 * Re-check the reference count with the lock held.  If it has
    939 	 * dropped to one - we can take over the existing map.
    940 	 */
    941 
    942 	if (srcamap->am_ref == 1) {
    943 		/* Just take over the existing amap. */
    944 		entry->etype &= ~UVM_ET_NEEDSCOPY;
    945 		amap_unlock(srcamap);
    946 		/* Destroy the new (unused) amap. */
    947 		amap->am_ref--;
    948 		amap_free(amap);
    949 		return;
    950 	}
    951 
    952 	/*
    953 	 * Copy the slots.  Zero the padded part.
    954 	 */
    955 
    956 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
    957 	for (lcv = 0 ; lcv < slots; lcv++) {
    958 		amap->am_anon[lcv] =
    959 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
    960 		if (amap->am_anon[lcv] == NULL)
    961 			continue;
    962 		KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
    963 		KASSERT(amap->am_anon[lcv]->an_ref > 0);
    964 		KASSERT(amap->am_nused < amap->am_maxslot);
    965 		amap->am_anon[lcv]->an_ref++;
    966 		amap->am_bckptr[lcv] = amap->am_nused;
    967 		amap->am_slots[amap->am_nused] = lcv;
    968 		amap->am_nused++;
    969 	}
    970 	memset(&amap->am_anon[lcv], 0,
    971 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
    972 
    973 	/*
    974 	 * Drop our reference to the old amap (srcamap) and unlock.
    975 	 * Since the reference count on srcamap is greater than one,
    976 	 * (we checked above), it cannot drop to zero while it is locked.
    977 	 */
    978 
    979 	srcamap->am_ref--;
    980 	KASSERT(srcamap->am_ref > 0);
    981 
    982 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) {
    983 		srcamap->am_flags &= ~AMAP_SHARED;
    984 	}
    985 #ifdef UVM_AMAP_PPREF
    986 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
    987 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
    988 		    len >> PAGE_SHIFT, -1);
    989 	}
    990 #endif
    991 
    992 	amap_unlock(srcamap);
    993 
    994 	/*
    995 	 * Install new amap.
    996 	 */
    997 
    998 	entry->aref.ar_pageoff = 0;
    999 	entry->aref.ar_amap = amap;
   1000 	entry->etype &= ~UVM_ET_NEEDSCOPY;
   1001 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
   1002 }
   1003 
   1004 /*
   1005  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
   1006  *
   1007  *	called during fork(2) when the parent process has a wired map
   1008  *	entry.   in that case we want to avoid write-protecting pages
   1009  *	in the parent's map (e.g. like what you'd do for a COW page)
   1010  *	so we resolve the COW here.
   1011  *
   1012  * => assume parent's entry was wired, thus all pages are resident.
   1013  * => assume pages that are loaned out (loan_count) are already mapped
   1014  *	read-only in all maps, and thus no need for us to worry about them
   1015  * => assume both parent and child vm_map's are locked
   1016  * => caller passes child's map/entry in to us
   1017  * => if we run out of memory we will unlock the amap and sleep _with_ the
   1018  *	parent and child vm_map's locked(!).    we have to do this since
   1019  *	we are in the middle of a fork(2) and we can't let the parent
   1020  *	map change until we are done copying all the map entrys.
   1021  * => XXXCDC: out of memory should cause fork to fail, but there is
   1022  *	currently no easy way to do this (needs fix)
   1023  */
   1024 
   1025 void
   1026 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
   1027 {
   1028 	struct vm_amap *amap = entry->aref.ar_amap;
   1029 	struct vm_anon *anon, *nanon;
   1030 	struct vm_page *pg, *npg;
   1031 	u_int lcv, slot;
   1032 
   1033 	/*
   1034 	 * note that if we unlock the amap then we must ReStart the "lcv" for
   1035 	 * loop because some other process could reorder the anon's in the
   1036 	 * am_anon[] array on us while the lock is dropped.
   1037 	 */
   1038 
   1039 ReStart:
   1040 	amap_lock(amap, RW_WRITER);
   1041 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
   1042 		slot = amap->am_slots[lcv];
   1043 		anon = amap->am_anon[slot];
   1044 		KASSERT(anon->an_lock == amap->am_lock);
   1045 
   1046 		/*
   1047 		 * If anon has only one reference - we must have already
   1048 		 * copied it.  This can happen if we needed to sleep waiting
   1049 		 * for memory in a previous run through this loop.  The new
   1050 		 * page might even have been paged out, since is not wired.
   1051 		 */
   1052 
   1053 		if (anon->an_ref == 1) {
   1054 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
   1055 			continue;
   1056 		}
   1057 
   1058 		/*
   1059 		 * The old page must be resident since the parent is wired.
   1060 		 */
   1061 
   1062 		pg = anon->an_page;
   1063 		KASSERT(pg != NULL);
   1064 		KASSERT(pg->wire_count > 0);
   1065 
   1066 		/*
   1067 		 * If the page is loaned then it must already be mapped
   1068 		 * read-only and we don't need to copy it.
   1069 		 */
   1070 
   1071 		if (pg->loan_count != 0) {
   1072 			continue;
   1073 		}
   1074 		KASSERT(pg->uanon == anon);
   1075 		KASSERT(pg->uobject == NULL);
   1076 
   1077 		/*
   1078 		 * If the page is busy, then we have to unlock, wait for
   1079 		 * it and then restart.
   1080 		 */
   1081 
   1082 		if (pg->flags & PG_BUSY) {
   1083 			uvm_pagewait(pg, amap->am_lock, "cownow");
   1084 			goto ReStart;
   1085 		}
   1086 
   1087 		/*
   1088 		 * Perform a copy-on-write.
   1089 		 * First - get a new anon and a page.
   1090 		 */
   1091 
   1092 		nanon = uvm_analloc();
   1093 		if (nanon) {
   1094 			nanon->an_lock = amap->am_lock;
   1095 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
   1096 		} else {
   1097 			npg = NULL;
   1098 		}
   1099 		if (nanon == NULL || npg == NULL) {
   1100 			amap_unlock(amap);
   1101 			if (nanon) {
   1102 				nanon->an_lock = NULL;
   1103 				nanon->an_ref--;
   1104 				KASSERT(nanon->an_ref == 0);
   1105 				uvm_anfree(nanon);
   1106 			}
   1107 			uvm_wait("cownowpage");
   1108 			goto ReStart;
   1109 		}
   1110 
   1111 		/*
   1112 		 * Copy the data and replace anon with the new one.
   1113 		 * Also, setup its lock (share the with amap's lock).
   1114 		 */
   1115 
   1116 		uvm_pagecopy(pg, npg);
   1117 		anon->an_ref--;
   1118 		KASSERT(anon->an_ref > 0);
   1119 		amap->am_anon[slot] = nanon;
   1120 
   1121 		/*
   1122 		 * Drop PG_BUSY on new page.  Since its owner was write
   1123 		 * locked all this time - it cannot be PG_RELEASED or
   1124 		 * waited on.
   1125 		 */
   1126 		uvm_pagelock(npg);
   1127 		uvm_pageactivate(npg);
   1128 		uvm_pageunlock(npg);
   1129 		npg->flags &= ~(PG_BUSY|PG_FAKE);
   1130 		UVM_PAGE_OWN(npg, NULL);
   1131 	}
   1132 	amap_unlock(amap);
   1133 }
   1134 
   1135 /*
   1136  * amap_splitref: split a single reference into two separate references
   1137  *
   1138  * => called from uvm_map's clip routines
   1139  * => origref's map should be locked
   1140  * => origref->ar_amap should be unlocked (we will lock)
   1141  */
   1142 void
   1143 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
   1144 {
   1145 	struct vm_amap *amap = origref->ar_amap;
   1146 	u_int leftslots;
   1147 
   1148 	KASSERT(splitref->ar_amap == origref->ar_amap);
   1149 	AMAP_B2SLOT(leftslots, offset);
   1150 	KASSERT(leftslots != 0);
   1151 
   1152 	amap_lock(amap, RW_WRITER);
   1153 	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
   1154 
   1155 #ifdef UVM_AMAP_PPREF
   1156 	/* Establish ppref before we add a duplicate reference to the amap. */
   1157 	if (amap->am_ppref == NULL) {
   1158 		amap_pp_establish(amap, origref->ar_pageoff);
   1159 	}
   1160 #endif
   1161 	/* Note: not a share reference. */
   1162 	amap->am_ref++;
   1163 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
   1164 	amap_unlock(amap);
   1165 }
   1166 
   1167 #ifdef UVM_AMAP_PPREF
   1168 
   1169 /*
   1170  * amap_pp_establish: add a ppref array to an amap, if possible.
   1171  *
   1172  * => amap should be locked by caller.
   1173  */
   1174 void
   1175 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
   1176 {
   1177 	const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);
   1178 
   1179 	KASSERT(rw_write_held(amap->am_lock));
   1180 
   1181 	amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
   1182 	if (amap->am_ppref == NULL) {
   1183 		/* Failure - just do not use ppref. */
   1184 		amap->am_ppref = PPREF_NONE;
   1185 		return;
   1186 	}
   1187 	pp_setreflen(amap->am_ppref, 0, 0, offset);
   1188 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
   1189 	    amap->am_nslot - offset);
   1190 }
   1191 
   1192 /*
   1193  * amap_pp_adjref: adjust reference count to a part of an amap using the
   1194  * per-page reference count array.
   1195  *
   1196  * => caller must check that ppref != PPREF_NONE before calling.
   1197  * => map and amap must be locked.
   1198  */
   1199 void
   1200 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
   1201 {
   1202 	int stopslot, *ppref, lcv, prevlcv;
   1203 	int ref, len, prevref, prevlen;
   1204 
   1205 	KASSERT(rw_write_held(amap->am_lock));
   1206 
   1207 	stopslot = curslot + slotlen;
   1208 	ppref = amap->am_ppref;
   1209 	prevlcv = 0;
   1210 
   1211 	/*
   1212 	 * Advance to the correct place in the array, fragment if needed.
   1213 	 */
   1214 
   1215 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
   1216 		pp_getreflen(ppref, lcv, &ref, &len);
   1217 		if (lcv + len > curslot) {     /* goes past start? */
   1218 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
   1219 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
   1220 			len = curslot - lcv;   /* new length of entry @ lcv */
   1221 		}
   1222 		prevlcv = lcv;
   1223 	}
   1224 	if (lcv == 0) {
   1225 		/*
   1226 		 * Ensure that the "prevref == ref" test below always
   1227 		 * fails, since we are starting from the beginning of
   1228 		 * the ppref array; that is, there is no previous chunk.
   1229 		 */
   1230 		prevref = -1;
   1231 		prevlen = 0;
   1232 	} else {
   1233 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
   1234 	}
   1235 
   1236 	/*
   1237 	 * Now adjust reference counts in range.  Merge the first
   1238 	 * changed entry with the last unchanged entry if possible.
   1239 	 */
   1240 	KASSERT(lcv == curslot);
   1241 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
   1242 		pp_getreflen(ppref, lcv, &ref, &len);
   1243 		if (lcv + len > stopslot) {     /* goes past end? */
   1244 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
   1245 			pp_setreflen(ppref, stopslot, ref,
   1246 			    len - (stopslot - lcv));
   1247 			len = stopslot - lcv;
   1248 		}
   1249 		ref += adjval;
   1250 		KASSERT(ref >= 0);
   1251 		KASSERT(ref <= amap->am_ref);
   1252 		if (lcv == prevlcv + prevlen && ref == prevref) {
   1253 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
   1254 		} else {
   1255 			pp_setreflen(ppref, lcv, ref, len);
   1256 		}
   1257 		if (ref == 0) {
   1258 			amap_wiperange(amap, lcv, len);
   1259 		}
   1260 	}
   1261 }
   1262 
   1263 /*
   1264  * amap_wiperange: wipe out a range of an amap.
   1265  * Note: different from amap_wipeout because the amap is kept intact.
   1266  *
   1267  * => Both map and amap must be locked by caller.
   1268  */
   1269 void
   1270 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
   1271 {
   1272 	u_int lcv, stop, slotend;
   1273 	bool byanon;
   1274 
   1275 	KASSERT(rw_write_held(amap->am_lock));
   1276 
   1277 	/*
   1278 	 * We can either traverse the amap by am_anon or by am_slots.
   1279 	 * Determine which way is less expensive.
   1280 	 */
   1281 
   1282 	if (slots < amap->am_nused) {
   1283 		byanon = true;
   1284 		lcv = slotoff;
   1285 		stop = slotoff + slots;
   1286 		slotend = 0;
   1287 	} else {
   1288 		byanon = false;
   1289 		lcv = 0;
   1290 		stop = amap->am_nused;
   1291 		slotend = slotoff + slots;
   1292 	}
   1293 
   1294 	while (lcv < stop) {
   1295 		struct vm_anon *anon;
   1296 		u_int curslot, ptr, last;
   1297 
   1298 		if (byanon) {
   1299 			curslot = lcv++;	/* lcv advances here */
   1300 			if (amap->am_anon[curslot] == NULL)
   1301 				continue;
   1302 		} else {
   1303 			curslot = amap->am_slots[lcv];
   1304 			if (curslot < slotoff || curslot >= slotend) {
   1305 				lcv++;		/* lcv advances here */
   1306 				continue;
   1307 			}
   1308 			stop--;	/* drop stop, since anon will be removed */
   1309 		}
   1310 		anon = amap->am_anon[curslot];
   1311 		KASSERT(anon->an_lock == amap->am_lock);
   1312 
   1313 		/*
   1314 		 * Remove anon from the amap.
   1315 		 */
   1316 
   1317 		amap->am_anon[curslot] = NULL;
   1318 		ptr = amap->am_bckptr[curslot];
   1319 		last = amap->am_nused - 1;
   1320 		if (ptr != last) {
   1321 			amap->am_slots[ptr] = amap->am_slots[last];
   1322 			amap->am_bckptr[amap->am_slots[ptr]] = ptr;
   1323 		}
   1324 		amap->am_nused--;
   1325 
   1326 		/*
   1327 		 * Drop its reference count.
   1328 		 */
   1329 
   1330 		KASSERT(anon->an_lock == amap->am_lock);
   1331 		if (--anon->an_ref == 0) {
   1332 			uvm_anfree(anon);
   1333 		}
   1334 	}
   1335 }
   1336 
   1337 #endif
   1338 
   1339 #if defined(VMSWAP)
   1340 
   1341 /*
   1342  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
   1343  *
   1344  * => called with swap_syscall_lock held.
   1345  * => note that we don't always traverse all anons.
   1346  *    eg. amaps being wiped out, released anons.
   1347  * => return true if failed.
   1348  */
   1349 
   1350 bool
   1351 amap_swap_off(int startslot, int endslot)
   1352 {
   1353 	struct vm_amap *am;
   1354 	struct vm_amap *am_next;
   1355 	struct vm_amap marker_prev;
   1356 	struct vm_amap marker_next;
   1357 	bool rv = false;
   1358 
   1359 #if defined(DIAGNOSTIC)
   1360 	memset(&marker_prev, 0, sizeof(marker_prev));
   1361 	memset(&marker_next, 0, sizeof(marker_next));
   1362 #endif /* defined(DIAGNOSTIC) */
   1363 
   1364 	mutex_enter(&amap_list_lock);
   1365 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
   1366 		int i;
   1367 
   1368 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
   1369 		LIST_INSERT_AFTER(am, &marker_next, am_list);
   1370 
   1371 		/* amap_list_lock prevents the lock pointer from changing. */
   1372 		if (!amap_lock_try(am, RW_WRITER)) {
   1373 			(void)kpause("amapswpo", false, 1, &amap_list_lock);
   1374 			am_next = LIST_NEXT(&marker_prev, am_list);
   1375 			if (am_next == &marker_next) {
   1376 				am_next = LIST_NEXT(am_next, am_list);
   1377 			} else {
   1378 				KASSERT(LIST_NEXT(am_next, am_list) ==
   1379 				    &marker_next);
   1380 			}
   1381 			LIST_REMOVE(&marker_prev, am_list);
   1382 			LIST_REMOVE(&marker_next, am_list);
   1383 			continue;
   1384 		}
   1385 
   1386 		mutex_exit(&amap_list_lock);
   1387 
   1388 		/* If am_nused == 0, the amap could be free - careful. */
   1389 		for (i = 0; i < am->am_nused; i++) {
   1390 			int slot;
   1391 			int swslot;
   1392 			struct vm_anon *anon;
   1393 
   1394 			slot = am->am_slots[i];
   1395 			anon = am->am_anon[slot];
   1396 			KASSERT(anon->an_lock == am->am_lock);
   1397 
   1398 			swslot = anon->an_swslot;
   1399 			if (swslot < startslot || endslot <= swslot) {
   1400 				continue;
   1401 			}
   1402 
   1403 			am->am_flags |= AMAP_SWAPOFF;
   1404 
   1405 			rv = uvm_anon_pagein(am, anon);
   1406 			amap_lock(am, RW_WRITER);
   1407 
   1408 			am->am_flags &= ~AMAP_SWAPOFF;
   1409 			if (amap_refs(am) == 0) {
   1410 				amap_wipeout(am);
   1411 				am = NULL;
   1412 				break;
   1413 			}
   1414 			if (rv) {
   1415 				break;
   1416 			}
   1417 			i = 0;
   1418 		}
   1419 
   1420 		if (am) {
   1421 			amap_unlock(am);
   1422 		}
   1423 
   1424 		mutex_enter(&amap_list_lock);
   1425 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
   1426 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
   1427 		    &marker_next);
   1428 		am_next = LIST_NEXT(&marker_next, am_list);
   1429 		LIST_REMOVE(&marker_prev, am_list);
   1430 		LIST_REMOVE(&marker_next, am_list);
   1431 	}
   1432 	mutex_exit(&amap_list_lock);
   1433 
   1434 	return rv;
   1435 }
   1436 
   1437 #endif /* defined(VMSWAP) */
   1438 
   1439 /*
   1440  * amap_lookup: look up a page in an amap.
   1441  *
   1442  * => amap should be locked by caller.
   1443  */
   1444 struct vm_anon *
   1445 amap_lookup(struct vm_aref *aref, vaddr_t offset)
   1446 {
   1447 	struct vm_amap *amap = aref->ar_amap;
   1448 	struct vm_anon *an;
   1449 	u_int slot;
   1450 
   1451 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
   1452 	KASSERT(rw_lock_held(amap->am_lock));
   1453 
   1454 	AMAP_B2SLOT(slot, offset);
   1455 	slot += aref->ar_pageoff;
   1456 	an = amap->am_anon[slot];
   1457 
   1458 	UVMHIST_LOG(maphist,
   1459 	    "<- done (amap=%#jx, offset=%#jx, result=%#jx)",
   1460 	    (uintptr_t)amap, offset, (uintptr_t)an, 0);
   1461 
   1462 	KASSERT(slot < amap->am_nslot);
   1463 	KASSERT(an == NULL || an->an_ref != 0);
   1464 	KASSERT(an == NULL || an->an_lock == amap->am_lock);
   1465 	return an;
   1466 }
   1467 
   1468 /*
   1469  * amap_lookups: look up a range of pages in an amap.
   1470  *
   1471  * => amap should be locked by caller.
   1472  */
   1473 void
   1474 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
   1475     int npages)
   1476 {
   1477 	struct vm_amap *amap = aref->ar_amap;
   1478 	u_int slot;
   1479 
   1480 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
   1481 	KASSERT(rw_lock_held(amap->am_lock));
   1482 
   1483 	AMAP_B2SLOT(slot, offset);
   1484 	slot += aref->ar_pageoff;
   1485 
   1486 	UVMHIST_LOG(maphist, "  slot=%u, npages=%d, nslot=%d",
   1487 	    slot, npages, amap->am_nslot, 0);
   1488 
   1489 	KASSERT((slot + (npages - 1)) < amap->am_nslot);
   1490 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
   1491 
   1492 #if defined(DIAGNOSTIC)
   1493 	for (int i = 0; i < npages; i++) {
   1494 		struct vm_anon * const an = anons[i];
   1495 		if (an == NULL) {
   1496 			continue;
   1497 		}
   1498 		KASSERT(an->an_ref != 0);
   1499 		KASSERT(an->an_lock == amap->am_lock);
   1500 	}
   1501 #endif
   1502 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
   1503 }
   1504 
   1505 /*
   1506  * amap_add: add (or replace) a page to an amap.
   1507  *
   1508  * => amap should be locked by caller.
   1509  * => anon must have the lock associated with this amap.
   1510  */
   1511 void
   1512 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
   1513     bool replace)
   1514 {
   1515 	struct vm_amap *amap = aref->ar_amap;
   1516 	u_int slot;
   1517 
   1518 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
   1519 	KASSERT(rw_write_held(amap->am_lock));
   1520 	KASSERT(anon->an_lock == amap->am_lock);
   1521 
   1522 	AMAP_B2SLOT(slot, offset);
   1523 	slot += aref->ar_pageoff;
   1524 	KASSERT(slot < amap->am_nslot);
   1525 
   1526 	if (replace) {
   1527 		struct vm_anon *oanon = amap->am_anon[slot];
   1528 
   1529 		KASSERT(oanon != NULL);
   1530 		if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
   1531 			pmap_page_protect(oanon->an_page, VM_PROT_NONE);
   1532 			/*
   1533 			 * XXX: suppose page is supposed to be wired somewhere?
   1534 			 */
   1535 		}
   1536 	} else {
   1537 		KASSERT(amap->am_anon[slot] == NULL);
   1538 		KASSERT(amap->am_nused < amap->am_maxslot);
   1539 		amap->am_bckptr[slot] = amap->am_nused;
   1540 		amap->am_slots[amap->am_nused] = slot;
   1541 		amap->am_nused++;
   1542 	}
   1543 	amap->am_anon[slot] = anon;
   1544 	UVMHIST_LOG(maphist,
   1545 	    "<- done (amap=%#jx, offset=%#x, anon=%#jx, rep=%d)",
   1546 	    (uintptr_t)amap, offset, (uintptr_t)anon, replace);
   1547 }
   1548 
   1549 /*
   1550  * amap_unadd: remove a page from an amap.
   1551  *
   1552  * => amap should be locked by caller.
   1553  */
   1554 void
   1555 amap_unadd(struct vm_aref *aref, vaddr_t offset)
   1556 {
   1557 	struct vm_amap *amap = aref->ar_amap;
   1558 	u_int slot, ptr, last;
   1559 
   1560 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
   1561 	KASSERT(rw_write_held(amap->am_lock));
   1562 
   1563 	AMAP_B2SLOT(slot, offset);
   1564 	slot += aref->ar_pageoff;
   1565 	KASSERT(slot < amap->am_nslot);
   1566 	KASSERT(amap->am_anon[slot] != NULL);
   1567 	KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);
   1568 
   1569 	amap->am_anon[slot] = NULL;
   1570 	ptr = amap->am_bckptr[slot];
   1571 
   1572 	last = amap->am_nused - 1;
   1573 	if (ptr != last) {
   1574 		/* Move the last entry to keep the slots contiguous. */
   1575 		amap->am_slots[ptr] = amap->am_slots[last];
   1576 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;
   1577 	}
   1578 	amap->am_nused--;
   1579 	UVMHIST_LOG(maphist, "<- done (amap=%#jx, slot=%#jx)",
   1580 	    (uintptr_t)amap, slot,0, 0);
   1581 }
   1582 
   1583 /*
   1584  * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
   1585  */
   1586 static void
   1587 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
   1588     int refv, bool all)
   1589 {
   1590 
   1591 #ifdef UVM_AMAP_PPREF
   1592 	KASSERT(rw_write_held(amap->am_lock));
   1593 
   1594 	/*
   1595 	 * We must establish the ppref array before changing am_ref
   1596 	 * so that the ppref values match the current amap refcount.
   1597 	 */
   1598 
   1599 	if (amap->am_ppref == NULL) {
   1600 		amap_pp_establish(amap, offset);
   1601 	}
   1602 #endif
   1603 
   1604 	amap->am_ref += refv;
   1605 
   1606 #ifdef UVM_AMAP_PPREF
   1607 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
   1608 		amap_pp_adjref(amap, offset, len, refv);
   1609 	}
   1610 #endif
   1611 	amap_unlock(amap);
   1612 }
   1613 
   1614 /*
   1615  * amap_ref: gain a reference to an amap.
   1616  *
   1617  * => amap must not be locked (we will lock).
   1618  * => "offset" and "len" are in units of pages.
   1619  * => Called at fork time to gain the child's reference.
   1620  */
   1621 void
   1622 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
   1623 {
   1624 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
   1625 
   1626 	amap_lock(amap, RW_WRITER);
   1627 	if (flags & AMAP_SHARED) {
   1628 		amap->am_flags |= AMAP_SHARED;
   1629 	}
   1630 	amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
   1631 
   1632 	UVMHIST_LOG(maphist,"<- done!  amap=%#jx", (uintptr_t)amap, 0, 0, 0);
   1633 }
   1634 
   1635 /*
   1636  * amap_unref: remove a reference to an amap.
   1637  *
   1638  * => All pmap-level references to this amap must be already removed.
   1639  * => Called from uvm_unmap_detach(); entry is already removed from the map.
   1640  * => We will lock amap, so it must be unlocked.
   1641  */
   1642 void
   1643 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
   1644 {
   1645 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
   1646 
   1647 	amap_lock(amap, RW_WRITER);
   1648 
   1649 	UVMHIST_LOG(maphist,"  amap=%#jx  refs=%d, nused=%d",
   1650 	    (uintptr_t)amap, amap->am_ref, amap->am_nused, 0);
   1651 	KASSERT(amap->am_ref > 0);
   1652 
   1653 	if (amap->am_ref == 1) {
   1654 
   1655 		/*
   1656 		 * If the last reference - wipeout and destroy the amap.
   1657 		 */
   1658 		amap->am_ref--;
   1659 		amap_wipeout(amap);
   1660 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
   1661 		return;
   1662 	}
   1663 
   1664 	/*
   1665 	 * Otherwise, drop the reference count(s) on anons.
   1666 	 */
   1667 
   1668 	if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
   1669 		amap->am_flags &= ~AMAP_SHARED;
   1670 	}
   1671 	amap_adjref_anons(amap, offset, len, -1, all);
   1672 
   1673 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
   1674 }
   1675