Home | History | Annotate | Line # | Download | only in uvm
      1 /*	$NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
     28  */
     29 
     30 /*
     31  * uvm_loan.c: page loanout handler
     32  */
     33 
     34 #include <sys/cdefs.h>
     35 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $");
     36 
     37 #include <sys/param.h>
     38 #include <sys/systm.h>
     39 #include <sys/kernel.h>
     40 #include <sys/mman.h>
     41 
     42 #include <uvm/uvm.h>
     43 
     44 #ifdef UVMHIST
     45 UVMHIST_DEFINE(loanhist);
     46 #endif
     47 
     48 /*
     49  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
     50  * from the VM system to other parts of the kernel.   this allows page
     51  * copying to be avoided (e.g. you can loan pages from objs/anons to
     52  * the mbuf system).
     53  *
     54  * there are 3 types of loans possible:
     55  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
     56  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
     57  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
     58  * note that it possible to have an O page loaned to both an A and K
     59  * at the same time.
     60  *
     61  * loans are tracked by pg->loan_count.  an O->A page will have both
     62  * a uvm_object and a vm_anon, but PG_ANON will not be set.   this sort
     63  * of page is considered "owned" by the uvm_object (not the anon).
     64  *
     65  * each loan of a page to the kernel bumps the pg->wire_count.  the
     66  * kernel mappings for these pages will be read-only and wired.  since
     67  * the page will also be wired, it will not be a candidate for pageout,
     68  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
     69  * write fault in the kernel to one of these pages will not cause
     70  * copy-on-write.  instead, the page fault is considered fatal.  this
     71  * is because the kernel mapping will have no way to look up the
     72  * object/anon which the page is owned by.  this is a good side-effect,
     73  * since a kernel write to a loaned page is an error.
     74  *
     75  * owners that want to free their pages and discover that they are
     76  * loaned out simply "disown" them (the page becomes an orphan).  these
     77  * pages should be freed when the last loan is dropped.   in some cases
     78  * an anon may "adopt" an orphaned page.
     79  *
     80  * locking: to read pg->loan_count either the owner or pg->interlock
     81  * must be locked.   to modify pg->loan_count, both the owner of the page
     82  * and pg->interlock must be locked.   pg->flags is (as always) locked by
     83  * the owner of the page.
     84  *
     85  * note that locking from the "loaned" side is tricky since the object
     86  * getting the loaned page has no reference to the page's owner and thus
     87  * the owner could "die" at any time.   in order to prevent the owner
     88  * from dying pg->interlock should be locked.   this forces us to sometimes
     89  * use "try" locking.
     90  *
     91  * loans are typically broken by the following events:
     92  *  1. user-level xwrite fault to a loaned page
     93  *  2. pageout of clean+inactive O->A loaned page
     94  *  3. owner frees page (e.g. pager flush)
     95  *
     96  * note that loaning a page causes all mappings of the page to become
     97  * read-only (via pmap_page_protect).   this could have an unexpected
     98  * effect on normal "wired" pages if one is not careful (XXX).
     99  */
    100 
    101 /*
    102  * local prototypes
    103  */
    104 
    105 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
    106 			     int, struct vm_anon *);
    107 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
    108 			     int, vaddr_t);
    109 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
    110 static void	uvm_unloananon(struct vm_anon **, int);
    111 static void	uvm_unloanpage(struct vm_page **, int);
    112 static int	uvm_loanpage(struct vm_page **, int, bool);
    113 
    114 
    115 /*
    116  * inlines
    117  */
    118 
    119 /*
    120  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
    121  *
    122  * => "ufi" is the result of a successful map lookup (meaning that
    123  *	on entry the map is locked by the caller)
    124  * => we may unlock and then relock the map if needed (for I/O)
    125  * => we put our output result in "output"
    126  * => we always return with the map unlocked
    127  * => possible return values:
    128  *	-1 == error, map is unlocked
    129  *	 0 == map relock error (try again!), map is unlocked
    130  *	>0 == number of pages we loaned, map is unlocked
    131  *
    132  * NOTE: We can live with this being an inline, because it is only called
    133  * from one place.
    134  */
    135 
    136 static inline int
    137 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
    138 {
    139 	vaddr_t curaddr = ufi->orig_rvaddr;
    140 	vsize_t togo = ufi->size;
    141 	struct vm_aref *aref = &ufi->entry->aref;
    142 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
    143 	struct vm_anon *anon;
    144 	int rv, result = 0;
    145 
    146 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
    147 
    148 	/*
    149 	 * lock us the rest of the way down (we unlock before return)
    150 	 */
    151 	if (aref->ar_amap) {
    152 		amap_lock(aref->ar_amap, RW_WRITER);
    153 	}
    154 
    155 	/*
    156 	 * loop until done
    157 	 */
    158 	while (togo) {
    159 
    160 		/*
    161 		 * find the page we want.   check the anon layer first.
    162 		 */
    163 
    164 		if (aref->ar_amap) {
    165 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
    166 		} else {
    167 			anon = NULL;
    168 		}
    169 
    170 		/* locked: map, amap, uobj */
    171 		if (anon) {
    172 			rv = uvm_loananon(ufi, output, flags, anon);
    173 		} else if (uobj) {
    174 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
    175 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
    176 			rv = uvm_loanzero(ufi, output, flags);
    177 		} else {
    178 			uvmfault_unlockall(ufi, aref->ar_amap, uobj);
    179 			rv = -1;
    180 		}
    181 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
    182 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
    183 		    !rw_write_held(aref->ar_amap->am_lock));
    184 		KASSERT(rv > 0 || uobj == NULL ||
    185 		    !rw_write_held(uobj->vmobjlock));
    186 
    187 		/* total failure */
    188 		if (rv < 0) {
    189 			UVMHIST_LOG(loanhist, "failure %jd", rv, 0,0,0);
    190 			return (-1);
    191 		}
    192 
    193 		/* relock failed, need to do another lookup */
    194 		if (rv == 0) {
    195 			UVMHIST_LOG(loanhist, "relock failure %jd", result
    196 			    ,0,0,0);
    197 			return (result);
    198 		}
    199 
    200 		/*
    201 		 * got it... advance to next page
    202 		 */
    203 
    204 		result++;
    205 		togo -= PAGE_SIZE;
    206 		curaddr += PAGE_SIZE;
    207 	}
    208 
    209 	/*
    210 	 * unlock what we locked, unlock the maps and return
    211 	 */
    212 
    213 	if (aref->ar_amap) {
    214 		amap_unlock(aref->ar_amap);
    215 	}
    216 	uvmfault_unlockmaps(ufi, false);
    217 	UVMHIST_LOG(loanhist, "done %jd", result, 0,0,0);
    218 	return (result);
    219 }
    220 
    221 /*
    222  * normal functions
    223  */
    224 
    225 /*
    226  * uvm_loan: loan pages in a map out to anons or to the kernel
    227  *
    228  * => map should be unlocked
    229  * => start and len should be multiples of PAGE_SIZE
    230  * => result is either an array of anon's or vm_pages (depending on flags)
    231  * => flag values: UVM_LOAN_TOANON - loan to anons
    232  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
    233  *    one and only one of these flags must be set!
    234  * => returns 0 (success), or an appropriate error number
    235  */
    236 
    237 int
    238 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
    239 {
    240 	struct uvm_faultinfo ufi;
    241 	void **result, **output;
    242 	int rv, error;
    243 
    244 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
    245 
    246 	/*
    247 	 * ensure that one and only one of the flags is set
    248 	 */
    249 
    250 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
    251 		((flags & UVM_LOAN_TOPAGE) == 0));
    252 
    253 	/*
    254 	 * "output" is a pointer to the current place to put the loaned page.
    255 	 */
    256 
    257 	result = v;
    258 	output = &result[0];	/* start at the beginning ... */
    259 
    260 	/*
    261 	 * while we've got pages to do
    262 	 */
    263 
    264 	while (len > 0) {
    265 
    266 		/*
    267 		 * fill in params for a call to uvmfault_lookup
    268 		 */
    269 
    270 		ufi.orig_map = map;
    271 		ufi.orig_rvaddr = start;
    272 		ufi.orig_size = len;
    273 
    274 		/*
    275 		 * do the lookup, the only time this will fail is if we hit on
    276 		 * an unmapped region (an error)
    277 		 */
    278 
    279 		if (!uvmfault_lookup(&ufi, false)) {
    280 			error = ENOENT;
    281 			goto fail;
    282 		}
    283 
    284 		/*
    285 		 * map now locked.  now do the loanout...
    286 		 */
    287 
    288 		rv = uvm_loanentry(&ufi, &output, flags);
    289 		if (rv < 0) {
    290 			/* all unlocked due to error */
    291 			error = EINVAL;
    292 			goto fail;
    293 		}
    294 
    295 		/*
    296 		 * done!  the map is unlocked.  advance, if possible.
    297 		 *
    298 		 * XXXCDC: could be recoded to hold the map lock with
    299 		 *	   smarter code (but it only happens on map entry
    300 		 *	   boundaries, so it isn't that bad).
    301 		 */
    302 
    303 		if (rv) {
    304 			rv <<= PAGE_SHIFT;
    305 			len -= rv;
    306 			start += rv;
    307 		}
    308 	}
    309 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
    310 	return 0;
    311 
    312 fail:
    313 	/*
    314 	 * failed to complete loans.  drop any loans and return failure code.
    315 	 * map is already unlocked.
    316 	 */
    317 
    318 	if (output - result) {
    319 		if (flags & UVM_LOAN_TOANON) {
    320 			uvm_unloananon((struct vm_anon **)result,
    321 			    output - result);
    322 		} else {
    323 			uvm_unloanpage((struct vm_page **)result,
    324 			    output - result);
    325 		}
    326 	}
    327 	UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
    328 	return (error);
    329 }
    330 
    331 /*
    332  * uvm_loananon: loan a page from an anon out
    333  *
    334  * => called with map, amap, uobj locked
    335  * => return value:
    336  *	-1 = fatal error, everything is unlocked, abort.
    337  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
    338  *		try again
    339  *	 1 = got it, everything still locked
    340  */
    341 
    342 int
    343 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
    344     struct vm_anon *anon)
    345 {
    346 	struct vm_page *pg;
    347 	int error;
    348 
    349 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
    350 
    351 	/*
    352 	 * if we are loaning to "another" anon then it is easy, we just
    353 	 * bump the reference count on the current anon and return a
    354 	 * pointer to it (it becomes copy-on-write shared).
    355 	 */
    356 
    357 	if (flags & UVM_LOAN_TOANON) {
    358 		KASSERT(rw_write_held(anon->an_lock));
    359 		pg = anon->an_page;
    360 		if (pg && (pg->flags & PG_ANON) != 0 && anon->an_ref == 1) {
    361 			if (pg->wire_count > 0) {
    362 				UVMHIST_LOG(loanhist, "->A wired %#jx",
    363 				    (uintptr_t)pg, 0, 0, 0);
    364 				uvmfault_unlockall(ufi,
    365 				    ufi->entry->aref.ar_amap,
    366 				    ufi->entry->object.uvm_obj);
    367 				return (-1);
    368 			}
    369 			pmap_page_protect(pg, VM_PROT_READ);
    370 		}
    371 		anon->an_ref++;
    372 		**output = anon;
    373 		(*output)++;
    374 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
    375 		return (1);
    376 	}
    377 
    378 	/*
    379 	 * we are loaning to a kernel-page.   we need to get the page
    380 	 * resident so we can wire it.   uvmfault_anonget will handle
    381 	 * this for us.
    382 	 */
    383 
    384 	KASSERT(rw_write_held(anon->an_lock));
    385 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
    386 
    387 	/*
    388 	 * if we were unable to get the anon, then uvmfault_anonget has
    389 	 * unlocked everything and returned an error code.
    390 	 */
    391 
    392 	if (error) {
    393 		UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
    394 		KASSERT(error != ENOLCK);
    395 
    396 		/* need to refault (i.e. refresh our lookup) ? */
    397 		if (error == ERESTART) {
    398 			return (0);
    399 		}
    400 
    401 		/* "try again"?   sleep a bit and retry ... */
    402 		if (error == EAGAIN) {
    403 			kpause("loanagain", false, hz/2, NULL);
    404 			return (0);
    405 		}
    406 
    407 		/* otherwise flag it as an error */
    408 		return (-1);
    409 	}
    410 
    411 	/*
    412 	 * we have the page and its owner locked: do the loan now.
    413 	 */
    414 
    415 	pg = anon->an_page;
    416 	if (pg->wire_count > 0) {
    417 		UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0);
    418 		KASSERT(pg->uobject == NULL);
    419 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
    420 		return (-1);
    421 	}
    422 	if (pg->loan_count == 0) {
    423 		pmap_page_protect(pg, VM_PROT_READ);
    424 	}
    425 	uvm_pagelock(pg);
    426 	pg->loan_count++;
    427 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
    428 	uvm_pageactivate(pg);
    429 	uvm_pageunlock(pg);
    430 	**output = pg;
    431 	(*output)++;
    432 
    433 	/* unlock and return success */
    434 	if (pg->uobject)
    435 		rw_exit(pg->uobject->vmobjlock);
    436 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
    437 	return (1);
    438 }
    439 
    440 /*
    441  * uvm_loanpage: loan out pages to kernel (->K)
    442  *
    443  * => pages should be object-owned and the object should be locked.
    444  * => in the case of error, the object might be unlocked and relocked.
    445  * => pages will be unbusied (if busied is true).
    446  * => fail with EBUSY if meet a wired page.
    447  */
    448 static int
    449 uvm_loanpage(struct vm_page **pgpp, int npages, bool busied)
    450 {
    451 	int i;
    452 	int error = 0;
    453 
    454 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
    455 
    456 	for (i = 0; i < npages; i++) {
    457 		struct vm_page *pg = pgpp[i];
    458 
    459 		KASSERT(pg->uobject != NULL);
    460 		KASSERT(pg->uobject == pgpp[0]->uobject);
    461 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
    462 		KASSERT(rw_write_held(pg->uobject->vmobjlock));
    463 		KASSERT(busied == ((pg->flags & PG_BUSY) != 0));
    464 
    465 		if (pg->wire_count > 0) {
    466 			UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
    467 			    0, 0, 0);
    468 			error = EBUSY;
    469 			break;
    470 		}
    471 		if (pg->loan_count == 0) {
    472 			pmap_page_protect(pg, VM_PROT_READ);
    473 		}
    474 		uvm_pagelock(pg);
    475 		pg->loan_count++;
    476 		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
    477 		uvm_pageactivate(pg);
    478 		uvm_pageunlock(pg);
    479 	}
    480 
    481 	if (busied) {
    482 		uvm_page_unbusy(pgpp, npages);
    483 	}
    484 
    485 	if (error) {
    486 		/*
    487 		 * backout what we've done
    488 		 */
    489 		krwlock_t *slock = pgpp[0]->uobject->vmobjlock;
    490 
    491 		rw_exit(slock);
    492 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
    493 		rw_enter(slock, RW_WRITER);
    494 	}
    495 
    496 	UVMHIST_LOG(loanhist, "done %jd", error, 0, 0, 0);
    497 	return error;
    498 }
    499 
    500 /*
    501  * XXX UBC temp limit
    502  * number of pages to get at once.
    503  * should be <= MAX_READ_AHEAD in genfs_vnops.c
    504  */
    505 #define	UVM_LOAN_GET_CHUNK	16
    506 
    507 /*
    508  * uvm_loanuobjchunk: helper for uvm_loanuobjpages()
    509  */
    510 static int
    511 uvm_loanuobjchunk(struct uvm_object *uobj, voff_t pgoff, int orignpages,
    512     struct vm_page **pgpp)
    513 {
    514 	int error, npages;
    515 
    516 	rw_enter(uobj->vmobjlock, RW_WRITER);
    517  reget:
    518  	npages = orignpages;
    519 	error = (*uobj->pgops->pgo_get)(uobj, pgoff, pgpp, &npages, 0,
    520 	    VM_PROT_READ, 0, PGO_SYNCIO);
    521 	switch (error) {
    522 	case 0:
    523 		KASSERT(npages == orignpages);
    524 
    525 		/* check for released pages */
    526 		rw_enter(uobj->vmobjlock, RW_WRITER);
    527 		for (int i = 0; i < npages; i++) {
    528 			KASSERT(pgpp[i]->uobject->vmobjlock == uobj->vmobjlock);
    529 			if ((pgpp[i]->flags & PG_RELEASED) != 0) {
    530 				/*
    531 				 * release pages and try again.
    532 				 */
    533 				uvm_page_unbusy(pgpp, npages);
    534 				goto reget;
    535 			}
    536 		}
    537 
    538 		/* loan out pages.  they will be unbusied whatever happens. */
    539 		error = uvm_loanpage(pgpp, npages, true);
    540 		rw_exit(uobj->vmobjlock);
    541 		if (error != 0) {
    542 			memset(pgpp, 0, sizeof(pgpp[0]) * npages);
    543 		}
    544 		return error;
    545 
    546 	case EAGAIN:
    547 		kpause("loanuopg", false, hz/2, NULL);
    548 		rw_enter(uobj->vmobjlock, RW_WRITER);
    549 		goto reget;
    550 
    551 	default:
    552 		return error;
    553 	}
    554 }
    555 
    556 /*
    557  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
    558  *
    559  * => uobj shouldn't be locked.  (we'll lock it)
    560  * => fail with EBUSY if we meet a wired page.
    561  */
    562 int
    563 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int npages,
    564     struct vm_page **pgpp)
    565 {
    566 	int ndone, error, chunk;
    567 
    568 	KASSERT(npages > 0);
    569 
    570 	memset(pgpp, 0, sizeof(pgpp[0]) * npages);
    571 	for (ndone = 0; ndone < npages; ndone += chunk) {
    572 		chunk = MIN(UVM_LOAN_GET_CHUNK, npages - ndone);
    573 		error = uvm_loanuobjchunk(uobj, pgoff + (ndone << PAGE_SHIFT),
    574 		    chunk, pgpp + ndone);
    575 		if (error != 0) {
    576 			if (ndone != 0) {
    577 				uvm_unloan(pgpp, ndone, UVM_LOAN_TOPAGE);
    578 			}
    579 			break;
    580 		}
    581 	}
    582 
    583 	return error;
    584 }
    585 
    586 /*
    587  * uvm_loanuobj: loan a page from a uobj out
    588  *
    589  * => called with map, amap, uobj locked
    590  * => return value:
    591  *	-1 = fatal error, everything is unlocked, abort.
    592  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
    593  *		try again
    594  *	 1 = got it, everything still locked
    595  */
    596 
    597 static int
    598 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
    599 {
    600 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
    601 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
    602 	struct vm_page *pg;
    603 	int error, npages;
    604 	bool locked;
    605 
    606 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
    607 
    608 	/*
    609 	 * first we must make sure the page is resident.
    610 	 *
    611 	 * XXXCDC: duplicate code with uvm_fault().
    612 	 */
    613 
    614 	/* locked: maps(read), amap(if there) */
    615 	rw_enter(uobj->vmobjlock, RW_WRITER);
    616 	/* locked: maps(read), amap(if there), uobj */
    617 
    618 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
    619 		npages = 1;
    620 		pg = NULL;
    621 		error = (*uobj->pgops->pgo_get)(uobj,
    622 		    va - ufi->entry->start + ufi->entry->offset,
    623 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
    624 	} else {
    625 		error = EIO;		/* must have pgo_get op */
    626 	}
    627 
    628 	/*
    629 	 * check the result of the locked pgo_get.  if there is a problem,
    630 	 * then we fail the loan.
    631 	 */
    632 
    633 	if (error && error != EBUSY) {
    634 		uvmfault_unlockall(ufi, amap, uobj);
    635 		return (-1);
    636 	}
    637 
    638 	/*
    639 	 * if we need to unlock for I/O, do so now.
    640 	 */
    641 
    642 	if (error == EBUSY) {
    643 		uvmfault_unlockall(ufi, amap, NULL);
    644 
    645 		/* locked: uobj */
    646 		npages = 1;
    647 		error = (*uobj->pgops->pgo_get)(uobj,
    648 		    va - ufi->entry->start + ufi->entry->offset,
    649 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
    650 		/* locked: <nothing> */
    651 
    652 		if (error) {
    653 			if (error == EAGAIN) {
    654 				kpause("fltagain2", false, hz/2, NULL);
    655 				return (0);
    656 			}
    657 			return (-1);
    658 		}
    659 
    660 		/*
    661 		 * pgo_get was a success.   attempt to relock everything.
    662 		 */
    663 
    664 		locked = uvmfault_relock(ufi);
    665 		if (locked && amap)
    666 			amap_lock(amap, RW_WRITER);
    667 		uobj = pg->uobject;
    668 		rw_enter(uobj->vmobjlock, RW_WRITER);
    669 
    670 		/*
    671 		 * verify that the page has not be released and re-verify
    672 		 * that amap slot is still free.   if there is a problem we
    673 		 * drop our lock (thus force a lookup refresh/retry).
    674 		 */
    675 
    676 		if ((pg->flags & PG_RELEASED) != 0 ||
    677 		    (locked && amap && amap_lookup(&ufi->entry->aref,
    678 		    ufi->orig_rvaddr - ufi->entry->start))) {
    679 			if (locked)
    680 				uvmfault_unlockall(ufi, amap, NULL);
    681 			locked = false;
    682 		}
    683 
    684 		/*
    685 		 * unbusy the page.
    686 		 */
    687 
    688 		if ((pg->flags & PG_RELEASED) == 0) {
    689 			uvm_pagelock(pg);
    690 			uvm_pagewakeup(pg);
    691 			uvm_pageunlock(pg);
    692 			pg->flags &= ~PG_BUSY;
    693 			UVM_PAGE_OWN(pg, NULL);
    694 		}
    695 
    696 		/*
    697 		 * didn't get the lock?   release the page and retry.
    698 		 */
    699 
    700  		if (locked == false) {
    701 			if (pg->flags & PG_RELEASED) {
    702 				uvm_pagefree(pg);
    703 			}
    704 			rw_exit(uobj->vmobjlock);
    705 			return (0);
    706 		}
    707 	}
    708 
    709 	/*
    710 	 * for tmpfs vnodes, the page will be from a UAO rather than
    711 	 * the vnode.  just check the locks match.
    712 	 */
    713 
    714 	KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
    715 
    716 	/*
    717 	 * at this point we have the page we want ("pg") and we have
    718 	 * all data structures locked.  do the loanout.  page can not
    719 	 * be PG_RELEASED (we caught this above).
    720 	 */
    721 
    722 	if ((flags & UVM_LOAN_TOANON) == 0) {
    723 		if (uvm_loanpage(&pg, 1, false)) {
    724 			uvmfault_unlockall(ufi, amap, uobj);
    725 			return (-1);
    726 		}
    727 		rw_exit(uobj->vmobjlock);
    728 		**output = pg;
    729 		(*output)++;
    730 		return (1);
    731 	}
    732 
    733 #ifdef notdef
    734 	/*
    735 	 * must be a loan to an anon.   check to see if there is already
    736 	 * an anon associated with this page.  if so, then just return
    737 	 * a reference to this object.   the page should already be
    738 	 * mapped read-only because it is already on loan.
    739 	 */
    740 
    741 	if (pg->uanon) {
    742 		/* XXX: locking */
    743 		anon = pg->uanon;
    744 		anon->an_ref++;
    745 		uvm_pagelock(pg);
    746 		uvm_pagewakeup(pg);
    747 		uvm_pageunlock(pg);
    748 		pg->flags &= ~PG_BUSY;
    749 		UVM_PAGE_OWN(pg, NULL);
    750 		rw_exit(uobj->vmobjlock);
    751 		**output = anon;
    752 		(*output)++;
    753 		return (1);
    754 	}
    755 
    756 	/*
    757 	 * need to allocate a new anon
    758 	 */
    759 
    760 	anon = uvm_analloc();
    761 	if (anon == NULL) {
    762 		goto fail;
    763 	}
    764 	if (pg->wire_count > 0) {
    765 		UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0);
    766 		goto fail;
    767 	}
    768 	if (pg->loan_count == 0) {
    769 		pmap_page_protect(pg, VM_PROT_READ);
    770 	}
    771 	uvm_pagelock(pg);
    772 	pg->loan_count++;
    773 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
    774 	pg->uanon = anon;
    775 	anon->an_page = pg;
    776 	anon->an_lock = /* TODO: share amap lock */
    777 	uvm_pageactivate(pg);
    778 	uvm_pagewakeup(pg);
    779 	uvm_pageunlock(pg);
    780 	pg->flags &= ~PG_BUSY;
    781 	UVM_PAGE_OWN(pg, NULL);
    782 	rw_exit(uobj->vmobjlock);
    783 	rw_exit(&anon->an_lock);
    784 	**output = anon;
    785 	(*output)++;
    786 	return (1);
    787 
    788 fail:
    789 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
    790 	/*
    791 	 * unlock everything and bail out.
    792 	 */
    793 	uvm_pagelock(pg);
    794 	uvm_pagewakeup(pg);
    795 	uvm_pageunlock(pg);
    796 	pg->flags &= ~PG_BUSY;
    797 	UVM_PAGE_OWN(pg, NULL);
    798 	uvmfault_unlockall(ufi, amap, uobj, NULL);
    799 	if (anon) {
    800 		anon->an_ref--;
    801 		uvm_anfree(anon);
    802 	}
    803 #endif	/* notdef */
    804 	return (-1);
    805 }
    806 
    807 /*
    808  * uvm_loanzero: loan a zero-fill page out
    809  *
    810  * => called with map, amap, uobj locked
    811  * => return value:
    812  *	-1 = fatal error, everything is unlocked, abort.
    813  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
    814  *		try again
    815  *	 1 = got it, everything still locked
    816  */
    817 
    818 static struct uvm_object uvm_loanzero_object;
    819 static krwlock_t uvm_loanzero_lock __cacheline_aligned;
    820 
    821 static int
    822 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
    823 {
    824 	struct vm_page *pg;
    825 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
    826 
    827 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
    828 again:
    829 	rw_enter(uvm_loanzero_object.vmobjlock, RW_WRITER);
    830 
    831 	/*
    832 	 * first, get ahold of our single zero page.
    833 	 */
    834 
    835 	pg = uvm_pagelookup(&uvm_loanzero_object, 0);
    836 	if (__predict_false(pg == NULL)) {
    837 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
    838 					   UVM_PGA_ZERO)) == NULL) {
    839 			rw_exit(uvm_loanzero_object.vmobjlock);
    840 			uvmfault_unlockall(ufi, amap, NULL);
    841 			uvm_wait("loanzero");
    842 			if (!uvmfault_relock(ufi)) {
    843 				return (0);
    844 			}
    845 			if (amap) {
    846 				amap_lock(amap, RW_WRITER);
    847 			}
    848 			goto again;
    849 		}
    850 
    851 		/* got a zero'd page. */
    852 		pg->flags &= ~(PG_BUSY|PG_FAKE);
    853 		pg->flags |= PG_RDONLY;
    854 		uvm_pagelock(pg);
    855 		uvm_pageactivate(pg);
    856 		uvm_pagewakeup(pg);
    857 		uvm_pageunlock(pg);
    858 		UVM_PAGE_OWN(pg, NULL);
    859 	}
    860 
    861 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
    862 		mutex_enter(&pg->interlock);
    863 		pg->loan_count++;
    864 		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
    865 		mutex_exit(&pg->interlock);
    866 		rw_exit(uvm_loanzero_object.vmobjlock);
    867 		**output = pg;
    868 		(*output)++;
    869 		return (1);
    870 	}
    871 
    872 #ifdef notdef
    873 	/*
    874 	 * loaning to an anon.  check to see if there is already an anon
    875 	 * associated with this page.  if so, then just return a reference
    876 	 * to this object.
    877 	 */
    878 
    879 	if (pg->uanon) {
    880 		anon = pg->uanon;
    881 		rw_enter(&anon->an_lock, RW_WRITER);
    882 		anon->an_ref++;
    883 		rw_exit(&anon->an_lock);
    884 		rw_exit(uvm_loanzero_object.vmobjlock);
    885 		**output = anon;
    886 		(*output)++;
    887 		return (1);
    888 	}
    889 
    890 	/*
    891 	 * need to allocate a new anon
    892 	 */
    893 
    894 	anon = uvm_analloc();
    895 	if (anon == NULL) {
    896 		/* out of swap causes us to fail */
    897 		rw_exit(uvm_loanzero_object.vmobjlock);
    898 		uvmfault_unlockall(ufi, amap, NULL, NULL);
    899 		return (-1);
    900 	}
    901 	anon->an_page = pg;
    902 	pg->uanon = anon;
    903 	uvm_pagelock(pg);
    904 	pg->loan_count++;
    905 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
    906 	uvm_pageactivate(pg);
    907 	uvm_pageunlock(pg);
    908 	rw_exit(&anon->an_lock);
    909 	rw_exit(uvm_loanzero_object.vmobjlock);
    910 	**output = anon;
    911 	(*output)++;
    912 	return (1);
    913 #else
    914 	return (-1);
    915 #endif
    916 }
    917 
    918 
    919 /*
    920  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
    921  *
    922  * => we expect all our resources to be unlocked
    923  */
    924 
    925 static void
    926 uvm_unloananon(struct vm_anon **aloans, int nanons)
    927 {
    928 #ifdef notdef
    929 	struct vm_anon *anon, *to_free = NULL;
    930 
    931 	/* TODO: locking */
    932 	amap_lock(amap, RW_WRITER);
    933 	while (nanons-- > 0) {
    934 		anon = *aloans++;
    935 		if (--anon->an_ref == 0) {
    936 			uvm_anfree(anon);
    937 		}
    938 	}
    939 	amap_unlock(amap);
    940 #endif	/* notdef */
    941 }
    942 
    943 /*
    944  * uvm_unloanpage: kill loans on pages loaned out to the kernel
    945  *
    946  * => we expect all our resources to be unlocked
    947  */
    948 
    949 static void
    950 uvm_unloanpage(struct vm_page **ploans, int npages)
    951 {
    952 	struct vm_page *pg;
    953 	krwlock_t *slock;
    954 
    955 	while (npages-- > 0) {
    956 		pg = *ploans++;
    957 
    958 		/*
    959 		 * do a little dance to acquire the object or anon lock
    960 		 * as appropriate.  we are locking in the wrong order,
    961 		 * so we have to do a try-lock here.
    962 		 */
    963 
    964 		mutex_enter(&pg->interlock);
    965 		slock = NULL;
    966 		while (pg->uobject != NULL || pg->uanon != NULL) {
    967 			if (pg->uobject != NULL) {
    968 				slock = pg->uobject->vmobjlock;
    969 			} else {
    970 				slock = pg->uanon->an_lock;
    971 			}
    972 			if (rw_tryenter(slock, RW_WRITER)) {
    973 				break;
    974 			}
    975 			/* XXX Better than yielding but inadequate. */
    976 			kpause("livelock", false, 1, &pg->interlock);
    977 			slock = NULL;
    978 		}
    979 
    980 		/*
    981 		 * drop our loan.  if page is owned by an anon but
    982 		 * PG_ANON is not set, the page was loaned to the anon
    983 		 * from an object which dropped ownership, so resolve
    984 		 * this by turning the anon's loan into real ownership
    985 		 * (ie. decrement loan_count again and set PG_ANON).
    986 		 * after all this, if there are no loans left, put the
    987 		 * page back a paging queue (if the page is owned by
    988 		 * an anon) or free it (if the page is now unowned).
    989 		 */
    990 
    991 		KASSERT(pg->loan_count > 0);
    992 		pg->loan_count--;
    993 		if (pg->uobject == NULL && pg->uanon != NULL &&
    994 		    (pg->flags & PG_ANON) == 0) {
    995 			KASSERT(pg->loan_count > 0);
    996 			pg->loan_count--;
    997 			pg->flags |= PG_ANON;
    998 		}
    999 		mutex_exit(&pg->interlock);
   1000 		if (pg->loan_count == 0 && pg->uobject == NULL &&
   1001 		    pg->uanon == NULL) {
   1002 			KASSERT((pg->flags & PG_BUSY) == 0);
   1003 			uvm_pagefree(pg);
   1004 		}
   1005 		if (slock != NULL) {
   1006 			rw_exit(slock);
   1007 		}
   1008 	}
   1009 }
   1010 
   1011 /*
   1012  * uvm_unloan: kill loans on pages or anons.
   1013  */
   1014 
   1015 void
   1016 uvm_unloan(void *v, int npages, int flags)
   1017 {
   1018 	if (flags & UVM_LOAN_TOANON) {
   1019 		uvm_unloananon(v, npages);
   1020 	} else {
   1021 		uvm_unloanpage(v, npages);
   1022 	}
   1023 }
   1024 
   1025 /*
   1026  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
   1027  * method, because the page can end up on a paging queue, and the
   1028  * page daemon will want to call pgo_put when it encounters the page
   1029  * on the inactive list.
   1030  */
   1031 
   1032 static int
   1033 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
   1034 {
   1035 	struct vm_page *pg;
   1036 
   1037 	KDASSERT(uobj == &uvm_loanzero_object);
   1038 
   1039 	/*
   1040 	 * Don't need to do any work here if we're not freeing pages.
   1041 	 */
   1042 
   1043 	if ((flags & PGO_FREE) == 0) {
   1044 		rw_exit(uobj->vmobjlock);
   1045 		return 0;
   1046 	}
   1047 
   1048 	/*
   1049 	 * we don't actually want to ever free the uvm_loanzero_page, so
   1050 	 * just reactivate or dequeue it.
   1051 	 */
   1052 
   1053 	pg = uvm_pagelookup(uobj, 0);
   1054 	KASSERT(pg != NULL);
   1055 
   1056 	uvm_pagelock(pg);
   1057 	if (pg->uanon) {
   1058 		uvm_pageactivate(pg);
   1059 	} else {
   1060 		uvm_pagedequeue(pg);
   1061 	}
   1062 	uvm_pageunlock(pg);
   1063 
   1064 	rw_exit(uobj->vmobjlock);
   1065 	return 0;
   1066 }
   1067 
   1068 static const struct uvm_pagerops ulz_pager = {
   1069 	.pgo_put = ulz_put,
   1070 };
   1071 
   1072 /*
   1073  * uvm_loan_init(): initialize the uvm_loan() facility.
   1074  */
   1075 
   1076 void
   1077 uvm_loan_init(void)
   1078 {
   1079 
   1080 	rw_init(&uvm_loanzero_lock);
   1081 	uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
   1082 	uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);
   1083 
   1084 	UVMHIST_INIT(loanhist, 300);
   1085 }
   1086 
   1087 /*
   1088  * uvm_loanbreak: break loan on a uobj page
   1089  *
   1090  * => called with uobj locked
   1091  * => the page may be busy; if it's busy, it will be unbusied
   1092  * => return value:
   1093  *	newly allocated page if succeeded
   1094  */
   1095 struct vm_page *
   1096 uvm_loanbreak(struct vm_page *uobjpage)
   1097 {
   1098 	struct vm_page *pg;
   1099 	struct uvm_object *uobj __diagused = uobjpage->uobject;
   1100 
   1101 	KASSERT(uobj != NULL);
   1102 	KASSERT(rw_write_held(uobj->vmobjlock));
   1103 
   1104 	/* alloc new un-owned page */
   1105 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
   1106 	if (pg == NULL)
   1107 		return NULL;
   1108 
   1109 	/*
   1110 	 * copy the data from the old page to the new
   1111 	 * one and clear the fake flags on the new page (keep it busy).
   1112 	 * force a reload of the old page by clearing it from all
   1113 	 * pmaps.
   1114 	 * then rename the pages.
   1115 	 */
   1116 
   1117 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
   1118 	pg->flags &= ~PG_FAKE;
   1119 	KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);
   1120 	pmap_page_protect(uobjpage, VM_PROT_NONE);
   1121 	/* uobj still locked */
   1122 	if ((uobjpage->flags & PG_BUSY) != 0) {
   1123 		uobjpage->flags &= ~PG_BUSY;
   1124 		UVM_PAGE_OWN(uobjpage, NULL);
   1125 	}
   1126 
   1127 	/*
   1128 	 * if the page is no longer referenced by
   1129 	 * an anon (i.e. we are breaking an O->K
   1130 	 * loan), then remove it from any pageq's.
   1131 	 */
   1132 
   1133 	uvm_pagelock2(uobjpage, pg);
   1134 	uvm_pagewakeup(uobjpage);
   1135 	if (uobjpage->uanon == NULL)
   1136 		uvm_pagedequeue(uobjpage);
   1137 
   1138 	/*
   1139 	 * replace uobjpage with new page.
   1140 	 */
   1141 
   1142 	uvm_pagereplace(uobjpage, pg);
   1143 
   1144 	/*
   1145 	 * at this point we have absolutely no
   1146 	 * control over uobjpage
   1147 	 */
   1148 
   1149 	uvm_pageactivate(pg);
   1150 	uvm_pageunlock2(uobjpage, pg);
   1151 
   1152 	/*
   1153 	 * done!  loan is broken and "pg" is
   1154 	 * PG_BUSY.   it can now replace uobjpage.
   1155 	 */
   1156 
   1157 	return pg;
   1158 }
   1159 
   1160 int
   1161 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
   1162 {
   1163 	struct vm_page *newpg, *oldpg;
   1164 	unsigned oldstatus;
   1165 
   1166 	KASSERT(rw_write_held(anon->an_lock));
   1167 	KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
   1168 	KASSERT(anon->an_page->loan_count > 0);
   1169 
   1170 	/* get new un-owned replacement page */
   1171 	newpg = uvm_pagealloc(NULL, 0, NULL, 0);
   1172 	if (newpg == NULL) {
   1173 		return ENOMEM;
   1174 	}
   1175 
   1176 	oldpg = anon->an_page;
   1177 	/* copy old -> new */
   1178 	uvm_pagecopy(oldpg, newpg);
   1179 	KASSERT(uvm_pagegetdirty(newpg) == UVM_PAGE_STATUS_DIRTY);
   1180 
   1181 	/* force reload */
   1182 	pmap_page_protect(oldpg, VM_PROT_NONE);
   1183 	oldstatus = uvm_pagegetdirty(anon->an_page);
   1184 
   1185 	uvm_pagelock2(oldpg, newpg);
   1186 	if (uobj == NULL) {
   1187 		/*
   1188 		 * we were the lender (A->K); need to remove the page from
   1189 		 * pageq's.
   1190 		 *
   1191 		 * PG_ANON is updated by the caller.
   1192 		 */
   1193 		KASSERT((oldpg->flags & PG_ANON) != 0);
   1194 		oldpg->flags &= ~PG_ANON;
   1195 		uvm_pagedequeue(oldpg);
   1196 	}
   1197 	oldpg->uanon = NULL;
   1198 
   1199 	if (uobj) {
   1200 		/* if we were receiver of loan */
   1201 		KASSERT((oldpg->pqflags & PG_ANON) == 0);
   1202 		oldpg->loan_count--;
   1203 	}
   1204 
   1205 	/* install new page in anon */
   1206 	anon->an_page = newpg;
   1207 	newpg->uanon = anon;
   1208 	newpg->flags |= PG_ANON;
   1209 
   1210 	uvm_pageactivate(newpg);
   1211 	uvm_pageunlock2(oldpg, newpg);
   1212 
   1213 	newpg->flags &= ~(PG_BUSY|PG_FAKE);
   1214 	UVM_PAGE_OWN(newpg, NULL);
   1215 
   1216 	if (uobj) {
   1217 		rw_exit(uobj->vmobjlock);
   1218 	}
   1219 
   1220 	/* done! */
   1221 	kpreempt_disable();
   1222 	if (uobj == NULL) {
   1223 		CPU_COUNT(CPU_COUNT_ANONUNKNOWN + oldstatus, -1);
   1224 	}
   1225 	CPU_COUNT(CPU_COUNT_ANONDIRTY, 1);
   1226 	kpreempt_enable();
   1227 	return 0;
   1228 }
   1229