1 /* $NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp 28 */ 29 30 /* 31 * uvm_loan.c: page loanout handler 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.104 2020/06/11 22:21:05 ad Exp $"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/mman.h> 41 42 #include <uvm/uvm.h> 43 44 #ifdef UVMHIST 45 UVMHIST_DEFINE(loanhist); 46 #endif 47 48 /* 49 * "loaned" pages are pages which are (read-only, copy-on-write) loaned 50 * from the VM system to other parts of the kernel. this allows page 51 * copying to be avoided (e.g. you can loan pages from objs/anons to 52 * the mbuf system). 53 * 54 * there are 3 types of loans possible: 55 * O->K uvm_object page to wired kernel page (e.g. mbuf data area) 56 * A->K anon page to wired kernel page (e.g. mbuf data area) 57 * O->A uvm_object to anon loan (e.g. vnode page to an anon) 58 * note that it possible to have an O page loaned to both an A and K 59 * at the same time. 60 * 61 * loans are tracked by pg->loan_count. an O->A page will have both 62 * a uvm_object and a vm_anon, but PG_ANON will not be set. this sort 63 * of page is considered "owned" by the uvm_object (not the anon). 64 * 65 * each loan of a page to the kernel bumps the pg->wire_count. the 66 * kernel mappings for these pages will be read-only and wired. since 67 * the page will also be wired, it will not be a candidate for pageout, 68 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE. a 69 * write fault in the kernel to one of these pages will not cause 70 * copy-on-write. instead, the page fault is considered fatal. this 71 * is because the kernel mapping will have no way to look up the 72 * object/anon which the page is owned by. this is a good side-effect, 73 * since a kernel write to a loaned page is an error. 74 * 75 * owners that want to free their pages and discover that they are 76 * loaned out simply "disown" them (the page becomes an orphan). these 77 * pages should be freed when the last loan is dropped. in some cases 78 * an anon may "adopt" an orphaned page. 79 * 80 * locking: to read pg->loan_count either the owner or pg->interlock 81 * must be locked. to modify pg->loan_count, both the owner of the page 82 * and pg->interlock must be locked. pg->flags is (as always) locked by 83 * the owner of the page. 84 * 85 * note that locking from the "loaned" side is tricky since the object 86 * getting the loaned page has no reference to the page's owner and thus 87 * the owner could "die" at any time. in order to prevent the owner 88 * from dying pg->interlock should be locked. this forces us to sometimes 89 * use "try" locking. 90 * 91 * loans are typically broken by the following events: 92 * 1. user-level xwrite fault to a loaned page 93 * 2. pageout of clean+inactive O->A loaned page 94 * 3. owner frees page (e.g. pager flush) 95 * 96 * note that loaning a page causes all mappings of the page to become 97 * read-only (via pmap_page_protect). this could have an unexpected 98 * effect on normal "wired" pages if one is not careful (XXX). 99 */ 100 101 /* 102 * local prototypes 103 */ 104 105 static int uvm_loananon(struct uvm_faultinfo *, void ***, 106 int, struct vm_anon *); 107 static int uvm_loanuobj(struct uvm_faultinfo *, void ***, 108 int, vaddr_t); 109 static int uvm_loanzero(struct uvm_faultinfo *, void ***, int); 110 static void uvm_unloananon(struct vm_anon **, int); 111 static void uvm_unloanpage(struct vm_page **, int); 112 static int uvm_loanpage(struct vm_page **, int, bool); 113 114 115 /* 116 * inlines 117 */ 118 119 /* 120 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan()) 121 * 122 * => "ufi" is the result of a successful map lookup (meaning that 123 * on entry the map is locked by the caller) 124 * => we may unlock and then relock the map if needed (for I/O) 125 * => we put our output result in "output" 126 * => we always return with the map unlocked 127 * => possible return values: 128 * -1 == error, map is unlocked 129 * 0 == map relock error (try again!), map is unlocked 130 * >0 == number of pages we loaned, map is unlocked 131 * 132 * NOTE: We can live with this being an inline, because it is only called 133 * from one place. 134 */ 135 136 static inline int 137 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags) 138 { 139 vaddr_t curaddr = ufi->orig_rvaddr; 140 vsize_t togo = ufi->size; 141 struct vm_aref *aref = &ufi->entry->aref; 142 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 143 struct vm_anon *anon; 144 int rv, result = 0; 145 146 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 147 148 /* 149 * lock us the rest of the way down (we unlock before return) 150 */ 151 if (aref->ar_amap) { 152 amap_lock(aref->ar_amap, RW_WRITER); 153 } 154 155 /* 156 * loop until done 157 */ 158 while (togo) { 159 160 /* 161 * find the page we want. check the anon layer first. 162 */ 163 164 if (aref->ar_amap) { 165 anon = amap_lookup(aref, curaddr - ufi->entry->start); 166 } else { 167 anon = NULL; 168 } 169 170 /* locked: map, amap, uobj */ 171 if (anon) { 172 rv = uvm_loananon(ufi, output, flags, anon); 173 } else if (uobj) { 174 rv = uvm_loanuobj(ufi, output, flags, curaddr); 175 } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) { 176 rv = uvm_loanzero(ufi, output, flags); 177 } else { 178 uvmfault_unlockall(ufi, aref->ar_amap, uobj); 179 rv = -1; 180 } 181 /* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */ 182 KASSERT(rv > 0 || aref->ar_amap == NULL || 183 !rw_write_held(aref->ar_amap->am_lock)); 184 KASSERT(rv > 0 || uobj == NULL || 185 !rw_write_held(uobj->vmobjlock)); 186 187 /* total failure */ 188 if (rv < 0) { 189 UVMHIST_LOG(loanhist, "failure %jd", rv, 0,0,0); 190 return (-1); 191 } 192 193 /* relock failed, need to do another lookup */ 194 if (rv == 0) { 195 UVMHIST_LOG(loanhist, "relock failure %jd", result 196 ,0,0,0); 197 return (result); 198 } 199 200 /* 201 * got it... advance to next page 202 */ 203 204 result++; 205 togo -= PAGE_SIZE; 206 curaddr += PAGE_SIZE; 207 } 208 209 /* 210 * unlock what we locked, unlock the maps and return 211 */ 212 213 if (aref->ar_amap) { 214 amap_unlock(aref->ar_amap); 215 } 216 uvmfault_unlockmaps(ufi, false); 217 UVMHIST_LOG(loanhist, "done %jd", result, 0,0,0); 218 return (result); 219 } 220 221 /* 222 * normal functions 223 */ 224 225 /* 226 * uvm_loan: loan pages in a map out to anons or to the kernel 227 * 228 * => map should be unlocked 229 * => start and len should be multiples of PAGE_SIZE 230 * => result is either an array of anon's or vm_pages (depending on flags) 231 * => flag values: UVM_LOAN_TOANON - loan to anons 232 * UVM_LOAN_TOPAGE - loan to wired kernel page 233 * one and only one of these flags must be set! 234 * => returns 0 (success), or an appropriate error number 235 */ 236 237 int 238 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) 239 { 240 struct uvm_faultinfo ufi; 241 void **result, **output; 242 int rv, error; 243 244 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 245 246 /* 247 * ensure that one and only one of the flags is set 248 */ 249 250 KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^ 251 ((flags & UVM_LOAN_TOPAGE) == 0)); 252 253 /* 254 * "output" is a pointer to the current place to put the loaned page. 255 */ 256 257 result = v; 258 output = &result[0]; /* start at the beginning ... */ 259 260 /* 261 * while we've got pages to do 262 */ 263 264 while (len > 0) { 265 266 /* 267 * fill in params for a call to uvmfault_lookup 268 */ 269 270 ufi.orig_map = map; 271 ufi.orig_rvaddr = start; 272 ufi.orig_size = len; 273 274 /* 275 * do the lookup, the only time this will fail is if we hit on 276 * an unmapped region (an error) 277 */ 278 279 if (!uvmfault_lookup(&ufi, false)) { 280 error = ENOENT; 281 goto fail; 282 } 283 284 /* 285 * map now locked. now do the loanout... 286 */ 287 288 rv = uvm_loanentry(&ufi, &output, flags); 289 if (rv < 0) { 290 /* all unlocked due to error */ 291 error = EINVAL; 292 goto fail; 293 } 294 295 /* 296 * done! the map is unlocked. advance, if possible. 297 * 298 * XXXCDC: could be recoded to hold the map lock with 299 * smarter code (but it only happens on map entry 300 * boundaries, so it isn't that bad). 301 */ 302 303 if (rv) { 304 rv <<= PAGE_SHIFT; 305 len -= rv; 306 start += rv; 307 } 308 } 309 UVMHIST_LOG(loanhist, "success", 0,0,0,0); 310 return 0; 311 312 fail: 313 /* 314 * failed to complete loans. drop any loans and return failure code. 315 * map is already unlocked. 316 */ 317 318 if (output - result) { 319 if (flags & UVM_LOAN_TOANON) { 320 uvm_unloananon((struct vm_anon **)result, 321 output - result); 322 } else { 323 uvm_unloanpage((struct vm_page **)result, 324 output - result); 325 } 326 } 327 UVMHIST_LOG(loanhist, "error %jd", error,0,0,0); 328 return (error); 329 } 330 331 /* 332 * uvm_loananon: loan a page from an anon out 333 * 334 * => called with map, amap, uobj locked 335 * => return value: 336 * -1 = fatal error, everything is unlocked, abort. 337 * 0 = lookup in ufi went stale, everything unlocked, relookup and 338 * try again 339 * 1 = got it, everything still locked 340 */ 341 342 int 343 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, 344 struct vm_anon *anon) 345 { 346 struct vm_page *pg; 347 int error; 348 349 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 350 351 /* 352 * if we are loaning to "another" anon then it is easy, we just 353 * bump the reference count on the current anon and return a 354 * pointer to it (it becomes copy-on-write shared). 355 */ 356 357 if (flags & UVM_LOAN_TOANON) { 358 KASSERT(rw_write_held(anon->an_lock)); 359 pg = anon->an_page; 360 if (pg && (pg->flags & PG_ANON) != 0 && anon->an_ref == 1) { 361 if (pg->wire_count > 0) { 362 UVMHIST_LOG(loanhist, "->A wired %#jx", 363 (uintptr_t)pg, 0, 0, 0); 364 uvmfault_unlockall(ufi, 365 ufi->entry->aref.ar_amap, 366 ufi->entry->object.uvm_obj); 367 return (-1); 368 } 369 pmap_page_protect(pg, VM_PROT_READ); 370 } 371 anon->an_ref++; 372 **output = anon; 373 (*output)++; 374 UVMHIST_LOG(loanhist, "->A done", 0,0,0,0); 375 return (1); 376 } 377 378 /* 379 * we are loaning to a kernel-page. we need to get the page 380 * resident so we can wire it. uvmfault_anonget will handle 381 * this for us. 382 */ 383 384 KASSERT(rw_write_held(anon->an_lock)); 385 error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon); 386 387 /* 388 * if we were unable to get the anon, then uvmfault_anonget has 389 * unlocked everything and returned an error code. 390 */ 391 392 if (error) { 393 UVMHIST_LOG(loanhist, "error %jd", error,0,0,0); 394 KASSERT(error != ENOLCK); 395 396 /* need to refault (i.e. refresh our lookup) ? */ 397 if (error == ERESTART) { 398 return (0); 399 } 400 401 /* "try again"? sleep a bit and retry ... */ 402 if (error == EAGAIN) { 403 kpause("loanagain", false, hz/2, NULL); 404 return (0); 405 } 406 407 /* otherwise flag it as an error */ 408 return (-1); 409 } 410 411 /* 412 * we have the page and its owner locked: do the loan now. 413 */ 414 415 pg = anon->an_page; 416 if (pg->wire_count > 0) { 417 UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0); 418 KASSERT(pg->uobject == NULL); 419 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL); 420 return (-1); 421 } 422 if (pg->loan_count == 0) { 423 pmap_page_protect(pg, VM_PROT_READ); 424 } 425 uvm_pagelock(pg); 426 pg->loan_count++; 427 KASSERT(pg->loan_count > 0); /* detect wrap-around */ 428 uvm_pageactivate(pg); 429 uvm_pageunlock(pg); 430 **output = pg; 431 (*output)++; 432 433 /* unlock and return success */ 434 if (pg->uobject) 435 rw_exit(pg->uobject->vmobjlock); 436 UVMHIST_LOG(loanhist, "->K done", 0,0,0,0); 437 return (1); 438 } 439 440 /* 441 * uvm_loanpage: loan out pages to kernel (->K) 442 * 443 * => pages should be object-owned and the object should be locked. 444 * => in the case of error, the object might be unlocked and relocked. 445 * => pages will be unbusied (if busied is true). 446 * => fail with EBUSY if meet a wired page. 447 */ 448 static int 449 uvm_loanpage(struct vm_page **pgpp, int npages, bool busied) 450 { 451 int i; 452 int error = 0; 453 454 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 455 456 for (i = 0; i < npages; i++) { 457 struct vm_page *pg = pgpp[i]; 458 459 KASSERT(pg->uobject != NULL); 460 KASSERT(pg->uobject == pgpp[0]->uobject); 461 KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT))); 462 KASSERT(rw_write_held(pg->uobject->vmobjlock)); 463 KASSERT(busied == ((pg->flags & PG_BUSY) != 0)); 464 465 if (pg->wire_count > 0) { 466 UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 467 0, 0, 0); 468 error = EBUSY; 469 break; 470 } 471 if (pg->loan_count == 0) { 472 pmap_page_protect(pg, VM_PROT_READ); 473 } 474 uvm_pagelock(pg); 475 pg->loan_count++; 476 KASSERT(pg->loan_count > 0); /* detect wrap-around */ 477 uvm_pageactivate(pg); 478 uvm_pageunlock(pg); 479 } 480 481 if (busied) { 482 uvm_page_unbusy(pgpp, npages); 483 } 484 485 if (error) { 486 /* 487 * backout what we've done 488 */ 489 krwlock_t *slock = pgpp[0]->uobject->vmobjlock; 490 491 rw_exit(slock); 492 uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE); 493 rw_enter(slock, RW_WRITER); 494 } 495 496 UVMHIST_LOG(loanhist, "done %jd", error, 0, 0, 0); 497 return error; 498 } 499 500 /* 501 * XXX UBC temp limit 502 * number of pages to get at once. 503 * should be <= MAX_READ_AHEAD in genfs_vnops.c 504 */ 505 #define UVM_LOAN_GET_CHUNK 16 506 507 /* 508 * uvm_loanuobjchunk: helper for uvm_loanuobjpages() 509 */ 510 static int 511 uvm_loanuobjchunk(struct uvm_object *uobj, voff_t pgoff, int orignpages, 512 struct vm_page **pgpp) 513 { 514 int error, npages; 515 516 rw_enter(uobj->vmobjlock, RW_WRITER); 517 reget: 518 npages = orignpages; 519 error = (*uobj->pgops->pgo_get)(uobj, pgoff, pgpp, &npages, 0, 520 VM_PROT_READ, 0, PGO_SYNCIO); 521 switch (error) { 522 case 0: 523 KASSERT(npages == orignpages); 524 525 /* check for released pages */ 526 rw_enter(uobj->vmobjlock, RW_WRITER); 527 for (int i = 0; i < npages; i++) { 528 KASSERT(pgpp[i]->uobject->vmobjlock == uobj->vmobjlock); 529 if ((pgpp[i]->flags & PG_RELEASED) != 0) { 530 /* 531 * release pages and try again. 532 */ 533 uvm_page_unbusy(pgpp, npages); 534 goto reget; 535 } 536 } 537 538 /* loan out pages. they will be unbusied whatever happens. */ 539 error = uvm_loanpage(pgpp, npages, true); 540 rw_exit(uobj->vmobjlock); 541 if (error != 0) { 542 memset(pgpp, 0, sizeof(pgpp[0]) * npages); 543 } 544 return error; 545 546 case EAGAIN: 547 kpause("loanuopg", false, hz/2, NULL); 548 rw_enter(uobj->vmobjlock, RW_WRITER); 549 goto reget; 550 551 default: 552 return error; 553 } 554 } 555 556 /* 557 * uvm_loanuobjpages: loan pages from a uobj out (O->K) 558 * 559 * => uobj shouldn't be locked. (we'll lock it) 560 * => fail with EBUSY if we meet a wired page. 561 */ 562 int 563 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int npages, 564 struct vm_page **pgpp) 565 { 566 int ndone, error, chunk; 567 568 KASSERT(npages > 0); 569 570 memset(pgpp, 0, sizeof(pgpp[0]) * npages); 571 for (ndone = 0; ndone < npages; ndone += chunk) { 572 chunk = MIN(UVM_LOAN_GET_CHUNK, npages - ndone); 573 error = uvm_loanuobjchunk(uobj, pgoff + (ndone << PAGE_SHIFT), 574 chunk, pgpp + ndone); 575 if (error != 0) { 576 if (ndone != 0) { 577 uvm_unloan(pgpp, ndone, UVM_LOAN_TOPAGE); 578 } 579 break; 580 } 581 } 582 583 return error; 584 } 585 586 /* 587 * uvm_loanuobj: loan a page from a uobj out 588 * 589 * => called with map, amap, uobj locked 590 * => return value: 591 * -1 = fatal error, everything is unlocked, abort. 592 * 0 = lookup in ufi went stale, everything unlocked, relookup and 593 * try again 594 * 1 = got it, everything still locked 595 */ 596 597 static int 598 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) 599 { 600 struct vm_amap *amap = ufi->entry->aref.ar_amap; 601 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 602 struct vm_page *pg; 603 int error, npages; 604 bool locked; 605 606 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 607 608 /* 609 * first we must make sure the page is resident. 610 * 611 * XXXCDC: duplicate code with uvm_fault(). 612 */ 613 614 /* locked: maps(read), amap(if there) */ 615 rw_enter(uobj->vmobjlock, RW_WRITER); 616 /* locked: maps(read), amap(if there), uobj */ 617 618 if (uobj->pgops->pgo_get) { /* try locked pgo_get */ 619 npages = 1; 620 pg = NULL; 621 error = (*uobj->pgops->pgo_get)(uobj, 622 va - ufi->entry->start + ufi->entry->offset, 623 &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED); 624 } else { 625 error = EIO; /* must have pgo_get op */ 626 } 627 628 /* 629 * check the result of the locked pgo_get. if there is a problem, 630 * then we fail the loan. 631 */ 632 633 if (error && error != EBUSY) { 634 uvmfault_unlockall(ufi, amap, uobj); 635 return (-1); 636 } 637 638 /* 639 * if we need to unlock for I/O, do so now. 640 */ 641 642 if (error == EBUSY) { 643 uvmfault_unlockall(ufi, amap, NULL); 644 645 /* locked: uobj */ 646 npages = 1; 647 error = (*uobj->pgops->pgo_get)(uobj, 648 va - ufi->entry->start + ufi->entry->offset, 649 &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO); 650 /* locked: <nothing> */ 651 652 if (error) { 653 if (error == EAGAIN) { 654 kpause("fltagain2", false, hz/2, NULL); 655 return (0); 656 } 657 return (-1); 658 } 659 660 /* 661 * pgo_get was a success. attempt to relock everything. 662 */ 663 664 locked = uvmfault_relock(ufi); 665 if (locked && amap) 666 amap_lock(amap, RW_WRITER); 667 uobj = pg->uobject; 668 rw_enter(uobj->vmobjlock, RW_WRITER); 669 670 /* 671 * verify that the page has not be released and re-verify 672 * that amap slot is still free. if there is a problem we 673 * drop our lock (thus force a lookup refresh/retry). 674 */ 675 676 if ((pg->flags & PG_RELEASED) != 0 || 677 (locked && amap && amap_lookup(&ufi->entry->aref, 678 ufi->orig_rvaddr - ufi->entry->start))) { 679 if (locked) 680 uvmfault_unlockall(ufi, amap, NULL); 681 locked = false; 682 } 683 684 /* 685 * unbusy the page. 686 */ 687 688 if ((pg->flags & PG_RELEASED) == 0) { 689 uvm_pagelock(pg); 690 uvm_pagewakeup(pg); 691 uvm_pageunlock(pg); 692 pg->flags &= ~PG_BUSY; 693 UVM_PAGE_OWN(pg, NULL); 694 } 695 696 /* 697 * didn't get the lock? release the page and retry. 698 */ 699 700 if (locked == false) { 701 if (pg->flags & PG_RELEASED) { 702 uvm_pagefree(pg); 703 } 704 rw_exit(uobj->vmobjlock); 705 return (0); 706 } 707 } 708 709 /* 710 * for tmpfs vnodes, the page will be from a UAO rather than 711 * the vnode. just check the locks match. 712 */ 713 714 KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock); 715 716 /* 717 * at this point we have the page we want ("pg") and we have 718 * all data structures locked. do the loanout. page can not 719 * be PG_RELEASED (we caught this above). 720 */ 721 722 if ((flags & UVM_LOAN_TOANON) == 0) { 723 if (uvm_loanpage(&pg, 1, false)) { 724 uvmfault_unlockall(ufi, amap, uobj); 725 return (-1); 726 } 727 rw_exit(uobj->vmobjlock); 728 **output = pg; 729 (*output)++; 730 return (1); 731 } 732 733 #ifdef notdef 734 /* 735 * must be a loan to an anon. check to see if there is already 736 * an anon associated with this page. if so, then just return 737 * a reference to this object. the page should already be 738 * mapped read-only because it is already on loan. 739 */ 740 741 if (pg->uanon) { 742 /* XXX: locking */ 743 anon = pg->uanon; 744 anon->an_ref++; 745 uvm_pagelock(pg); 746 uvm_pagewakeup(pg); 747 uvm_pageunlock(pg); 748 pg->flags &= ~PG_BUSY; 749 UVM_PAGE_OWN(pg, NULL); 750 rw_exit(uobj->vmobjlock); 751 **output = anon; 752 (*output)++; 753 return (1); 754 } 755 756 /* 757 * need to allocate a new anon 758 */ 759 760 anon = uvm_analloc(); 761 if (anon == NULL) { 762 goto fail; 763 } 764 if (pg->wire_count > 0) { 765 UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0); 766 goto fail; 767 } 768 if (pg->loan_count == 0) { 769 pmap_page_protect(pg, VM_PROT_READ); 770 } 771 uvm_pagelock(pg); 772 pg->loan_count++; 773 KASSERT(pg->loan_count > 0); /* detect wrap-around */ 774 pg->uanon = anon; 775 anon->an_page = pg; 776 anon->an_lock = /* TODO: share amap lock */ 777 uvm_pageactivate(pg); 778 uvm_pagewakeup(pg); 779 uvm_pageunlock(pg); 780 pg->flags &= ~PG_BUSY; 781 UVM_PAGE_OWN(pg, NULL); 782 rw_exit(uobj->vmobjlock); 783 rw_exit(&anon->an_lock); 784 **output = anon; 785 (*output)++; 786 return (1); 787 788 fail: 789 UVMHIST_LOG(loanhist, "fail", 0,0,0,0); 790 /* 791 * unlock everything and bail out. 792 */ 793 uvm_pagelock(pg); 794 uvm_pagewakeup(pg); 795 uvm_pageunlock(pg); 796 pg->flags &= ~PG_BUSY; 797 UVM_PAGE_OWN(pg, NULL); 798 uvmfault_unlockall(ufi, amap, uobj, NULL); 799 if (anon) { 800 anon->an_ref--; 801 uvm_anfree(anon); 802 } 803 #endif /* notdef */ 804 return (-1); 805 } 806 807 /* 808 * uvm_loanzero: loan a zero-fill page out 809 * 810 * => called with map, amap, uobj locked 811 * => return value: 812 * -1 = fatal error, everything is unlocked, abort. 813 * 0 = lookup in ufi went stale, everything unlocked, relookup and 814 * try again 815 * 1 = got it, everything still locked 816 */ 817 818 static struct uvm_object uvm_loanzero_object; 819 static krwlock_t uvm_loanzero_lock __cacheline_aligned; 820 821 static int 822 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags) 823 { 824 struct vm_page *pg; 825 struct vm_amap *amap = ufi->entry->aref.ar_amap; 826 827 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 828 again: 829 rw_enter(uvm_loanzero_object.vmobjlock, RW_WRITER); 830 831 /* 832 * first, get ahold of our single zero page. 833 */ 834 835 pg = uvm_pagelookup(&uvm_loanzero_object, 0); 836 if (__predict_false(pg == NULL)) { 837 while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL, 838 UVM_PGA_ZERO)) == NULL) { 839 rw_exit(uvm_loanzero_object.vmobjlock); 840 uvmfault_unlockall(ufi, amap, NULL); 841 uvm_wait("loanzero"); 842 if (!uvmfault_relock(ufi)) { 843 return (0); 844 } 845 if (amap) { 846 amap_lock(amap, RW_WRITER); 847 } 848 goto again; 849 } 850 851 /* got a zero'd page. */ 852 pg->flags &= ~(PG_BUSY|PG_FAKE); 853 pg->flags |= PG_RDONLY; 854 uvm_pagelock(pg); 855 uvm_pageactivate(pg); 856 uvm_pagewakeup(pg); 857 uvm_pageunlock(pg); 858 UVM_PAGE_OWN(pg, NULL); 859 } 860 861 if ((flags & UVM_LOAN_TOANON) == 0) { /* loaning to kernel-page */ 862 mutex_enter(&pg->interlock); 863 pg->loan_count++; 864 KASSERT(pg->loan_count > 0); /* detect wrap-around */ 865 mutex_exit(&pg->interlock); 866 rw_exit(uvm_loanzero_object.vmobjlock); 867 **output = pg; 868 (*output)++; 869 return (1); 870 } 871 872 #ifdef notdef 873 /* 874 * loaning to an anon. check to see if there is already an anon 875 * associated with this page. if so, then just return a reference 876 * to this object. 877 */ 878 879 if (pg->uanon) { 880 anon = pg->uanon; 881 rw_enter(&anon->an_lock, RW_WRITER); 882 anon->an_ref++; 883 rw_exit(&anon->an_lock); 884 rw_exit(uvm_loanzero_object.vmobjlock); 885 **output = anon; 886 (*output)++; 887 return (1); 888 } 889 890 /* 891 * need to allocate a new anon 892 */ 893 894 anon = uvm_analloc(); 895 if (anon == NULL) { 896 /* out of swap causes us to fail */ 897 rw_exit(uvm_loanzero_object.vmobjlock); 898 uvmfault_unlockall(ufi, amap, NULL, NULL); 899 return (-1); 900 } 901 anon->an_page = pg; 902 pg->uanon = anon; 903 uvm_pagelock(pg); 904 pg->loan_count++; 905 KASSERT(pg->loan_count > 0); /* detect wrap-around */ 906 uvm_pageactivate(pg); 907 uvm_pageunlock(pg); 908 rw_exit(&anon->an_lock); 909 rw_exit(uvm_loanzero_object.vmobjlock); 910 **output = anon; 911 (*output)++; 912 return (1); 913 #else 914 return (-1); 915 #endif 916 } 917 918 919 /* 920 * uvm_unloananon: kill loans on anons (basically a normal ref drop) 921 * 922 * => we expect all our resources to be unlocked 923 */ 924 925 static void 926 uvm_unloananon(struct vm_anon **aloans, int nanons) 927 { 928 #ifdef notdef 929 struct vm_anon *anon, *to_free = NULL; 930 931 /* TODO: locking */ 932 amap_lock(amap, RW_WRITER); 933 while (nanons-- > 0) { 934 anon = *aloans++; 935 if (--anon->an_ref == 0) { 936 uvm_anfree(anon); 937 } 938 } 939 amap_unlock(amap); 940 #endif /* notdef */ 941 } 942 943 /* 944 * uvm_unloanpage: kill loans on pages loaned out to the kernel 945 * 946 * => we expect all our resources to be unlocked 947 */ 948 949 static void 950 uvm_unloanpage(struct vm_page **ploans, int npages) 951 { 952 struct vm_page *pg; 953 krwlock_t *slock; 954 955 while (npages-- > 0) { 956 pg = *ploans++; 957 958 /* 959 * do a little dance to acquire the object or anon lock 960 * as appropriate. we are locking in the wrong order, 961 * so we have to do a try-lock here. 962 */ 963 964 mutex_enter(&pg->interlock); 965 slock = NULL; 966 while (pg->uobject != NULL || pg->uanon != NULL) { 967 if (pg->uobject != NULL) { 968 slock = pg->uobject->vmobjlock; 969 } else { 970 slock = pg->uanon->an_lock; 971 } 972 if (rw_tryenter(slock, RW_WRITER)) { 973 break; 974 } 975 /* XXX Better than yielding but inadequate. */ 976 kpause("livelock", false, 1, &pg->interlock); 977 slock = NULL; 978 } 979 980 /* 981 * drop our loan. if page is owned by an anon but 982 * PG_ANON is not set, the page was loaned to the anon 983 * from an object which dropped ownership, so resolve 984 * this by turning the anon's loan into real ownership 985 * (ie. decrement loan_count again and set PG_ANON). 986 * after all this, if there are no loans left, put the 987 * page back a paging queue (if the page is owned by 988 * an anon) or free it (if the page is now unowned). 989 */ 990 991 KASSERT(pg->loan_count > 0); 992 pg->loan_count--; 993 if (pg->uobject == NULL && pg->uanon != NULL && 994 (pg->flags & PG_ANON) == 0) { 995 KASSERT(pg->loan_count > 0); 996 pg->loan_count--; 997 pg->flags |= PG_ANON; 998 } 999 mutex_exit(&pg->interlock); 1000 if (pg->loan_count == 0 && pg->uobject == NULL && 1001 pg->uanon == NULL) { 1002 KASSERT((pg->flags & PG_BUSY) == 0); 1003 uvm_pagefree(pg); 1004 } 1005 if (slock != NULL) { 1006 rw_exit(slock); 1007 } 1008 } 1009 } 1010 1011 /* 1012 * uvm_unloan: kill loans on pages or anons. 1013 */ 1014 1015 void 1016 uvm_unloan(void *v, int npages, int flags) 1017 { 1018 if (flags & UVM_LOAN_TOANON) { 1019 uvm_unloananon(v, npages); 1020 } else { 1021 uvm_unloanpage(v, npages); 1022 } 1023 } 1024 1025 /* 1026 * Minimal pager for uvm_loanzero_object. We need to provide a "put" 1027 * method, because the page can end up on a paging queue, and the 1028 * page daemon will want to call pgo_put when it encounters the page 1029 * on the inactive list. 1030 */ 1031 1032 static int 1033 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) 1034 { 1035 struct vm_page *pg; 1036 1037 KDASSERT(uobj == &uvm_loanzero_object); 1038 1039 /* 1040 * Don't need to do any work here if we're not freeing pages. 1041 */ 1042 1043 if ((flags & PGO_FREE) == 0) { 1044 rw_exit(uobj->vmobjlock); 1045 return 0; 1046 } 1047 1048 /* 1049 * we don't actually want to ever free the uvm_loanzero_page, so 1050 * just reactivate or dequeue it. 1051 */ 1052 1053 pg = uvm_pagelookup(uobj, 0); 1054 KASSERT(pg != NULL); 1055 1056 uvm_pagelock(pg); 1057 if (pg->uanon) { 1058 uvm_pageactivate(pg); 1059 } else { 1060 uvm_pagedequeue(pg); 1061 } 1062 uvm_pageunlock(pg); 1063 1064 rw_exit(uobj->vmobjlock); 1065 return 0; 1066 } 1067 1068 static const struct uvm_pagerops ulz_pager = { 1069 .pgo_put = ulz_put, 1070 }; 1071 1072 /* 1073 * uvm_loan_init(): initialize the uvm_loan() facility. 1074 */ 1075 1076 void 1077 uvm_loan_init(void) 1078 { 1079 1080 rw_init(&uvm_loanzero_lock); 1081 uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0); 1082 uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock); 1083 1084 UVMHIST_INIT(loanhist, 300); 1085 } 1086 1087 /* 1088 * uvm_loanbreak: break loan on a uobj page 1089 * 1090 * => called with uobj locked 1091 * => the page may be busy; if it's busy, it will be unbusied 1092 * => return value: 1093 * newly allocated page if succeeded 1094 */ 1095 struct vm_page * 1096 uvm_loanbreak(struct vm_page *uobjpage) 1097 { 1098 struct vm_page *pg; 1099 struct uvm_object *uobj __diagused = uobjpage->uobject; 1100 1101 KASSERT(uobj != NULL); 1102 KASSERT(rw_write_held(uobj->vmobjlock)); 1103 1104 /* alloc new un-owned page */ 1105 pg = uvm_pagealloc(NULL, 0, NULL, 0); 1106 if (pg == NULL) 1107 return NULL; 1108 1109 /* 1110 * copy the data from the old page to the new 1111 * one and clear the fake flags on the new page (keep it busy). 1112 * force a reload of the old page by clearing it from all 1113 * pmaps. 1114 * then rename the pages. 1115 */ 1116 1117 uvm_pagecopy(uobjpage, pg); /* old -> new */ 1118 pg->flags &= ~PG_FAKE; 1119 KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY); 1120 pmap_page_protect(uobjpage, VM_PROT_NONE); 1121 /* uobj still locked */ 1122 if ((uobjpage->flags & PG_BUSY) != 0) { 1123 uobjpage->flags &= ~PG_BUSY; 1124 UVM_PAGE_OWN(uobjpage, NULL); 1125 } 1126 1127 /* 1128 * if the page is no longer referenced by 1129 * an anon (i.e. we are breaking an O->K 1130 * loan), then remove it from any pageq's. 1131 */ 1132 1133 uvm_pagelock2(uobjpage, pg); 1134 uvm_pagewakeup(uobjpage); 1135 if (uobjpage->uanon == NULL) 1136 uvm_pagedequeue(uobjpage); 1137 1138 /* 1139 * replace uobjpage with new page. 1140 */ 1141 1142 uvm_pagereplace(uobjpage, pg); 1143 1144 /* 1145 * at this point we have absolutely no 1146 * control over uobjpage 1147 */ 1148 1149 uvm_pageactivate(pg); 1150 uvm_pageunlock2(uobjpage, pg); 1151 1152 /* 1153 * done! loan is broken and "pg" is 1154 * PG_BUSY. it can now replace uobjpage. 1155 */ 1156 1157 return pg; 1158 } 1159 1160 int 1161 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj) 1162 { 1163 struct vm_page *newpg, *oldpg; 1164 unsigned oldstatus; 1165 1166 KASSERT(rw_write_held(anon->an_lock)); 1167 KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); 1168 KASSERT(anon->an_page->loan_count > 0); 1169 1170 /* get new un-owned replacement page */ 1171 newpg = uvm_pagealloc(NULL, 0, NULL, 0); 1172 if (newpg == NULL) { 1173 return ENOMEM; 1174 } 1175 1176 oldpg = anon->an_page; 1177 /* copy old -> new */ 1178 uvm_pagecopy(oldpg, newpg); 1179 KASSERT(uvm_pagegetdirty(newpg) == UVM_PAGE_STATUS_DIRTY); 1180 1181 /* force reload */ 1182 pmap_page_protect(oldpg, VM_PROT_NONE); 1183 oldstatus = uvm_pagegetdirty(anon->an_page); 1184 1185 uvm_pagelock2(oldpg, newpg); 1186 if (uobj == NULL) { 1187 /* 1188 * we were the lender (A->K); need to remove the page from 1189 * pageq's. 1190 * 1191 * PG_ANON is updated by the caller. 1192 */ 1193 KASSERT((oldpg->flags & PG_ANON) != 0); 1194 oldpg->flags &= ~PG_ANON; 1195 uvm_pagedequeue(oldpg); 1196 } 1197 oldpg->uanon = NULL; 1198 1199 if (uobj) { 1200 /* if we were receiver of loan */ 1201 KASSERT((oldpg->pqflags & PG_ANON) == 0); 1202 oldpg->loan_count--; 1203 } 1204 1205 /* install new page in anon */ 1206 anon->an_page = newpg; 1207 newpg->uanon = anon; 1208 newpg->flags |= PG_ANON; 1209 1210 uvm_pageactivate(newpg); 1211 uvm_pageunlock2(oldpg, newpg); 1212 1213 newpg->flags &= ~(PG_BUSY|PG_FAKE); 1214 UVM_PAGE_OWN(newpg, NULL); 1215 1216 if (uobj) { 1217 rw_exit(uobj->vmobjlock); 1218 } 1219 1220 /* done! */ 1221 kpreempt_disable(); 1222 if (uobj == NULL) { 1223 CPU_COUNT(CPU_COUNT_ANONUNKNOWN + oldstatus, -1); 1224 } 1225 CPU_COUNT(CPU_COUNT_ANONDIRTY, 1); 1226 kpreempt_enable(); 1227 return 0; 1228 } 1229