Home | History | Annotate | Line # | Download | only in uvm
uvm_pdaemon.c revision 1.1
      1 /*	$Id: uvm_pdaemon.c,v 1.1 1998/02/05 06:25:09 mrg Exp $	*/
      2 
      3 /*
      4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
      5  *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
      6  */
      7 /*
      8  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      9  * Copyright (c) 1991, 1993, The Regents of the University of California.
     10  *
     11  * All rights reserved.
     12  *
     13  * This code is derived from software contributed to Berkeley by
     14  * The Mach Operating System project at Carnegie-Mellon University.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. All advertising materials mentioning features or use of this software
     25  *    must display the following acknowledgement:
     26  *	This product includes software developed by Charles D. Cranor,
     27  *      Washington University, the University of California, Berkeley and
     28  *      its contributors.
     29  * 4. Neither the name of the University nor the names of its contributors
     30  *    may be used to endorse or promote products derived from this software
     31  *    without specific prior written permission.
     32  *
     33  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     37  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     43  * SUCH DAMAGE.
     44  *
     45  *	@(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
     46  *
     47  *
     48  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
     49  * All rights reserved.
     50  *
     51  * Permission to use, copy, modify and distribute this software and
     52  * its documentation is hereby granted, provided that both the copyright
     53  * notice and this permission notice appear in all copies of the
     54  * software, derivative works or modified versions, and any portions
     55  * thereof, and that both notices appear in supporting documentation.
     56  *
     57  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     58  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     59  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     60  *
     61  * Carnegie Mellon requests users of this software to return to
     62  *
     63  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     64  *  School of Computer Science
     65  *  Carnegie Mellon University
     66  *  Pittsburgh PA 15213-3890
     67  *
     68  * any improvements or extensions that they make and grant Carnegie the
     69  * rights to redistribute these changes.
     70  */
     71 
     72 /*
     73  * uvm_pdaemon.c: the page daemon
     74  */
     75 
     76 #include <sys/param.h>
     77 #include <sys/proc.h>
     78 #include <sys/systm.h>
     79 #include <sys/kernel.h>
     80 
     81 #include <vm/vm.h>
     82 #include <vm/vm_page.h>
     83 #include <vm/vm_kern.h>
     84 
     85 #include <uvm/uvm.h>
     86 
     87 UVMHIST_DECL(pdhist);
     88 
     89 /*
     90  * local prototypes
     91  */
     92 
     93 static void		uvmpd_scan __P((void));
     94 static boolean_t	uvmpd_scan_inactive __P((struct pglist *));
     95 static void		uvmpd_tune __P((void));
     96 
     97 
     98 /*
     99  * uvm_wait: wait (sleep) for the page daemon to free some pages
    100  *
    101  * => should be called with all locks released
    102  * => should _not_ be called by the page daemon (to avoid deadlock)
    103  */
    104 
    105 void uvm_wait(wmsg)
    106 
    107 char *wmsg;
    108 
    109 {
    110   int timo = 0;
    111   int s = splbio();
    112 
    113   /*
    114    * check for page daemon going to sleep (waiting for itself)
    115    */
    116 
    117   if (curproc == uvm.pagedaemon_proc) {
    118     /*
    119      * now we have a problem: the pagedaemon wants to go to sleep until
    120      * it frees more memory.   but how can it free more memory if it is
    121      * asleep?  that is a deadlock.   we have two options:
    122      *  [1] panic now
    123      *  [2] put a timeout on the sleep, thus causing the pagedaemon to
    124      *	    only pause (rather than sleep forever)
    125      *
    126      * note that option [2] will only help us if we get lucky and some
    127      * other process on the system breaks the deadlock by exiting or
    128      * freeing memory (thus allowing the pagedaemon to continue).
    129      * for now we panic if DEBUG is defined, otherwise we hope for the
    130      * best with option [2]  (better yet, this should never happen in
    131      * the first place!).
    132      */
    133 
    134     printf("pagedaemon: deadlock detected!\n");
    135     timo = hz >> 3;		/* set timeout */
    136 #if defined(DEBUG)
    137     panic("pagedaemon deadlock");	/* DEBUG: panic so we can debug it */
    138 #endif
    139   }
    140 
    141   simple_lock(&uvm.pagedaemon_lock);
    142   thread_wakeup(&uvm.pagedaemon);		/* wake the daemon! */
    143   UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg, timo);
    144 
    145   splx(s);
    146 }
    147 
    148 
    149 /*
    150  * uvmpd_tune: tune paging parameters
    151  *
    152  * => called when ever memory is added (or removed?) to the system
    153  * => caller must call with page queues locked
    154  */
    155 
    156 static void uvmpd_tune()
    157 
    158 {
    159   UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
    160 
    161   uvmexp.freemin = uvmexp.npages / 20;
    162   uvmexp.freemin = max(uvmexp.freemin, (16*1024)/PAGE_SIZE);  /* at least 16K */
    163   uvmexp.freemin = min(uvmexp.freemin, (256*1024)/PAGE_SIZE); /* at most 256K */
    164 
    165   uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
    166   if (uvmexp.freetarg <= uvmexp.freemin)
    167     uvmexp.freetarg = uvmexp.freemin + 1;
    168 
    169   /* uvmexp.inactarg: computed in main daemon loop */
    170 
    171   uvmexp.wiredmax = uvmexp.npages / 3;
    172   UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
    173 	      uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
    174 }
    175 
    176 /*
    177  * uvm_pageout: the main loop for the pagedaemon
    178  */
    179 
    180 void uvm_pageout()
    181 
    182 {
    183   int npages = 0;
    184   int s;
    185   struct uvm_aiodesc *aio, *nextaio;
    186   UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
    187 
    188   UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
    189 
    190   /*
    191    * ensure correct priority and set paging parameters...
    192    */
    193 
    194   uvm.pagedaemon_proc = curproc;
    195   (void) spl0();
    196   uvm_lock_pageq();
    197   npages = uvmexp.npages;
    198   uvmpd_tune();
    199   uvm_unlock_pageq();
    200 
    201   /*
    202    * main loop
    203    */
    204   while (TRUE) {
    205 
    206     /*
    207      * carefully attempt to go to sleep (without losing "wakeups"!).
    208      * we need splbio because we want to make sure the aio_done list
    209      * is totally empty before we go to sleep.
    210      */
    211 
    212     s = splbio();
    213     simple_lock(&uvm.pagedaemon_lock);
    214 
    215     /*
    216      * if we've got done aio's, then bypass the sleep
    217      */
    218 
    219     if (uvm.aio_done.tqh_first == NULL) {
    220       UVMHIST_LOG(maphist,"  <<SLEEPING>>",0,0,0,0);
    221       UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, &uvm.pagedaemon_lock, FALSE,
    222 			  "daemon_slp", 0);
    223       uvmexp.pdwoke++;
    224       UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
    225 
    226       /* relock pagedaemon_lock, still at splbio */
    227       simple_lock(&uvm.pagedaemon_lock);
    228     }
    229 
    230     /*
    231      * check for done aio structures
    232      */
    233 
    234     aio = uvm.aio_done.tqh_first;		/* save current list (if any)*/
    235     if (aio) {
    236       TAILQ_INIT(&uvm.aio_done);		/* zero global list */
    237     }
    238 
    239     simple_unlock(&uvm.pagedaemon_lock);	/* unlock */
    240     splx(s);					/* drop splbio */
    241 
    242     /*
    243      * first clear out any pending aios (to free space in case we
    244      * want to pageout more stuff).
    245      */
    246 
    247     for (/*null*/; aio != NULL ; aio = nextaio) {
    248 
    249       nextaio = aio->aioq.tqe_next;
    250       aio->aiodone(aio);
    251 
    252     }
    253 
    254     /*
    255      * now lock page queues and recompute inactive count
    256      */
    257     uvm_lock_pageq();
    258 
    259     if (npages != uvmexp.npages) {		/* check for new pages? */
    260       npages = uvmexp.npages;
    261       uvmpd_tune();
    262     }
    263 
    264     uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
    265     if (uvmexp.inactarg <= uvmexp.freetarg)
    266       uvmexp.inactarg = uvmexp.freetarg + 1;
    267 
    268     UVMHIST_LOG(pdhist,"  free/ftarg=%d/%d, inact/itarg=%d/%d",
    269 	uvmexp.free, uvmexp.freetarg, uvmexp.inactive, uvmexp.inactarg);
    270 
    271     /*
    272      * scan if needed
    273      * [XXX: note we are reading uvm.free without locking]
    274      */
    275     if (uvmexp.free < uvmexp.freetarg || uvmexp.inactive < uvmexp.inactarg)
    276       uvmpd_scan();
    277 
    278     /*
    279      * done scan.  unlock page queues (the only lock we are holding).
    280      */
    281     uvm_unlock_pageq();
    282 
    283     /*
    284      * done!    restart loop.
    285      */
    286     thread_wakeup(&uvmexp.free);
    287   }
    288   /*NOTREACHED*/
    289 }
    290 
    291 /*
    292  * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
    293  * 	its own function for ease of reading.
    294  *
    295  * => called with page queues locked
    296  * => we work on meeting our free target by converting inactive pages
    297  *    into free pages.
    298  * => we handle the building of swap-backed clusters
    299  * => we return TRUE if we are exiting because we met our target
    300  */
    301 
    302 static boolean_t uvmpd_scan_inactive(pglst)
    303 
    304 struct pglist *pglst;
    305 
    306 {
    307   boolean_t retval = FALSE;	/* assume we haven't hit target */
    308   int s, free, result;
    309   struct vm_page *p, *nextpg;
    310   struct uvm_object *uobj;
    311   struct vm_page *pps[MAXBSIZE/PAGE_SIZE], **ppsp;
    312   int npages;
    313   struct vm_page *swpps[MAXBSIZE/PAGE_SIZE]; 		/* XXX: see below */
    314   int swnpages, swcpages;				/* XXX: see below */
    315   int swslot, oldslot;
    316   struct vm_anon *anon;
    317   boolean_t swap_backed;
    318   vm_offset_t start;
    319   UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
    320 
    321   /*
    322    * note: we currently keep swap-backed pages on a seperate inactive
    323    * list from object-backed pages.   however, merging the two lists
    324    * back together again hasn't been ruled out.   thus, we keep our
    325    * swap cluster in "swpps" rather than in pps (allows us to mix clustering
    326    * types in the event of a mixed inactive queue).
    327    */
    328 
    329   /*
    330    * swslot is non-zero if we are building a swap cluster.  we want
    331    * to stay in the loop while we have a page to scan or we have
    332    * a swap-cluster to build.
    333    */
    334   swslot = 0;
    335   swnpages = swcpages = 0;
    336 
    337   for (p = pglst->tqh_first ; p != NULL || swslot != 0 ; p = nextpg) {
    338 
    339     /*
    340      * note that p can be NULL iff we have traversed the whole
    341      * list and need to do one final swap-backed clustered pageout.
    342      */
    343     if (p) {
    344       /*
    345        * update our copy of "free" and see if we've met our target
    346        */
    347       s = splimp();
    348       uvm_lock_fpageq();
    349       free = uvmexp.free;
    350       uvm_unlock_fpageq();
    351       splx(s);
    352 
    353       if (free >= uvmexp.freetarg) {
    354 	UVMHIST_LOG(pdhist,"  met free target: exit loop", 0, 0, 0, 0);
    355 	retval = TRUE;		/* hit the target! */
    356 	if (swslot == 0)
    357 	  break;	/* exit now if no swap-i/o pending */
    358 	p = NULL;	/* set p to null to signal final swap i/o */
    359       }
    360     }
    361 
    362     uobj = NULL;	/* be safe and shut gcc up */
    363     anon = NULL;	/* be safe and shut gcc up */
    364 
    365     if (p) {	/* if (we have a new page to consider) */
    366       /*
    367        * we are below target and have a new page to consider.
    368        */
    369       uvmexp.pdscans++;
    370       nextpg = p->pageq.tqe_next;
    371 
    372       /*
    373        * move referenced pages back to active queue and skip to next page
    374        * (unlikely to happen since inactive pages shouldn't have any
    375        *  valid mappings and we cleared reference before deactivating).
    376        */
    377       if (pmap_is_referenced(PMAP_PGARG(p))) {
    378 	uvm_pageactivate(p);
    379 	uvmexp.pdreact++;
    380 	continue;
    381       }
    382 
    383       /*
    384        * first we attempt to lock the object that this page belongs to.
    385        * if our attempt fails we skip on to the next page (no harm done).
    386        * it is important to "try" locking the object as we are locking in the
    387        * wrong order (pageq -> object) and we don't want to get deadlocked.
    388        *
    389        * the only time we exepct to see an ownerless page (i.e. a page
    390        * with no uobject and !PQ_ANON) is if an anon has loaned a page
    391        * from a uvm_object and the uvm_object has dropped the ownership.
    392        * in that case, the anon can "take over" the loaned page and
    393        * make it its own.
    394        */
    395 
    396       /* is page part of an anon or ownerless ? */
    397       if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
    398 
    399 	anon = p->uanon;
    400 
    401 #ifdef DIAGNOSTIC
    402         /* to be on inactive q, page must be part of _something_ */
    403         if (anon == NULL)
    404           panic("pagedaemon: page with no anon or object detected - loop 1");
    405 #endif
    406 
    407 	if (!simple_lock_try(&anon->an_lock))
    408 	  continue;		/* lock failed, skip this page */
    409 
    410         /* if the page is ownerless, claim it in the name of "anon"! */
    411         if ((p->pqflags & PQ_ANON) == 0) {
    412 #ifdef DIAGNOSTIC
    413           if (p->loan_count < 1)
    414             panic("pagedaemon: non-loaned ownerless page detected - loop 1");
    415 #endif
    416           p->loan_count--;
    417           p->pqflags |= PQ_ANON;      /* anon now owns it */
    418         }
    419 
    420 	if (p->flags & PG_BUSY) {
    421 	  simple_unlock(&anon->an_lock);
    422 	  uvmexp.pdbusy++;
    423 	  continue;		/* someone else owns page, skip it */
    424 	}
    425 
    426 	uvmexp.pdanscan++;
    427 
    428       } else {
    429 
    430 	uobj = p->uobject;
    431 
    432 	if (!simple_lock_try(&uobj->vmobjlock))
    433 	  continue;		/* lock failed, skip this page */
    434 
    435 	if (p->flags & PG_BUSY) {
    436 	  simple_unlock(&uobj->vmobjlock);
    437 	  uvmexp.pdbusy++;
    438 	  continue;		/* someone else owns page, skip it */
    439 	}
    440 
    441 	uvmexp.pdobscan++;
    442 
    443       }
    444 
    445       /*
    446        * we now have the object and the page queues locked.  the page is
    447        * not busy.   if the page is clean we can free it now and continue.
    448        */
    449 
    450       if (p->flags & PG_CLEAN) {
    451 	/* zap all mappings with pmap_page_protect... */
    452 	pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
    453 	uvm_pagefree(p);
    454 	uvmexp.pdfreed++;
    455 
    456 	if (anon) {
    457 #ifdef DIAGNOSTIC
    458 	  /*
    459 	   * an anonymous page can only be clean if it has valid
    460 	   * backing store.
    461 	   */
    462 	  if (anon->an_swslot == 0)
    463 	    panic("pagedaemon: clean anon page without backing store?");
    464 #endif
    465 	  anon->u.an_page = NULL;		/* remove from object */
    466 	  simple_unlock(&anon->an_lock);
    467 	} else {
    468 	  /* pagefree has already removed the page from the object */
    469 	  simple_unlock(&uobj->vmobjlock);
    470 	}
    471 	continue;
    472       }
    473 
    474       /*
    475        * the page we are looking at is dirty.   we must clean it before
    476        * it can be freed.  to do this we first mark the page busy so that
    477        * no one else will touch the page.   we write protect all the mappings
    478        * of the page so that no one touches it while it is in I/O.
    479        */
    480 
    481       swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
    482       uvmexp.pdpageouts++;
    483       p->flags |= PG_BUSY;		/* now we own it */
    484       UVM_PAGE_OWN(p, "scan_inactive");
    485       pmap_page_protect(PMAP_PGARG(p), VM_PROT_READ);
    486 
    487       /*
    488        * for swap-backed pages we need to (re)allocate swap space.
    489        */
    490       if (swap_backed) {
    491 
    492 	/*
    493 	 * free old swap slot (if any)
    494 	 */
    495 	if (anon) {
    496 	  if (anon->an_swslot) {
    497 	    uvm_swap_free(anon->an_swslot, 1);
    498 	    anon->an_swslot = 0;
    499 	  }
    500 	} else {
    501 	  oldslot = uao_set_swslot(uobj, p->offset/PAGE_SIZE, 0); /* remove */
    502 	  if (oldslot)
    503 	    uvm_swap_free(oldslot, 1); /* free */
    504 	}
    505 
    506 	/*
    507 	 * start new cluster (if necessary)
    508 	 */
    509 	if (swslot == 0) {
    510 	  swnpages = MAXBSIZE/PAGE_SIZE;	/* want this much */
    511 	  swslot = uvm_swap_alloc(&swnpages, TRUE);
    512 
    513 	  if (swslot == 0) {
    514 	    /* no swap?  give up! */
    515 	    p->flags &= ~PG_BUSY;
    516             UVM_PAGE_OWN(p, NULL);
    517 	    if (anon)
    518 	      simple_unlock(&anon->an_lock);
    519 	    else
    520 	      simple_unlock(&uobj->vmobjlock);
    521 	    continue;
    522 	  }
    523 	  swcpages = 0;	/* cluster is empty */
    524 	}
    525 
    526 	/*
    527 	 * add block to cluster
    528 	 */
    529 	swpps[swcpages] = p;
    530 	if (anon)
    531 	  anon->an_swslot = swslot + swcpages;
    532 	else
    533 	  uao_set_swslot(uobj, p->offset/PAGE_SIZE, swslot + swcpages);
    534 	swcpages++;
    535 
    536 	/* done (swap-backed) */
    537       }
    538 
    539       /* end: if (p) [end of "if we have new page to consider"] */
    540     } else {
    541 
    542       swap_backed = TRUE; /* if p == NULL we must be doing a last swap i/o */
    543 
    544     }
    545 
    546     /*
    547      * now consider doing the pageout.
    548      *
    549      * for swap-backed pages, we do the pageout if we have either
    550      * filled the cluster (in which case (swnpages == swcpages) or
    551      * run out of pages (p == NULL).
    552      *
    553      * for object pages, we always do the pageout.
    554      */
    555     if (swap_backed) {
    556 
    557       if (p) {	/* if we just added a page to cluster */
    558 	if (anon)
    559 	  simple_unlock(&anon->an_lock);
    560 	else
    561 	  simple_unlock(&uobj->vmobjlock);
    562 	if (swcpages < swnpages)	/* cluster not full yet? */
    563 	  continue;
    564       }
    565 
    566       /* starting I/O now... set up for it */
    567       npages = swcpages;
    568       ppsp = swpps;
    569       start = (vm_offset_t) swslot;	/* for swap-backed pages only */
    570 
    571       /* if this is final pageout we could have a few extra swap blocks */
    572       if (swcpages < swnpages) {
    573 	uvm_swap_free(swslot + swcpages, (swnpages - swcpages));
    574       }
    575 
    576     } else {
    577 
    578       /* normal object pageout */
    579       ppsp = pps;
    580       npages = sizeof(pps) / sizeof(struct vm_page *);
    581       start = 0;	/* not looked at because PGO_ALLPAGES is set */
    582 
    583     }
    584 
    585     /*
    586      * now do the pageout.
    587      *
    588      * for swap_backed pages we have already built the cluster.
    589      * for !swap_backed pages, uvm_pager_put will call the object's
    590      * "make put cluster" function to build a cluster on our behalf.
    591      *
    592      * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
    593      * it to free the cluster pages for us on a successful I/O (it always
    594      * does this for un-successful I/O requests).  this allows us to
    595      * do clustered pageout without having to deal with cluster pages
    596      * at this level.
    597      *
    598      * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
    599      *  IN:  locked: uobj (if !swap_backed), page queues
    600      * OUT:  locked: uobj (if !swap_backed && result != VM_PAGER_PEND)
    601      *      !locked: page queues, uobj (if swap_backed || VM_PAGER_PEND)
    602      *
    603      * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
    604      */
    605 
    606     /* locked: uobj (if !swap_backed), page queues */
    607     result = uvm_pager_put((swap_backed) ? NULL : uobj, p, &ppsp, &npages,
    608 			   PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
    609     /* locked: uobj (if !swap_backed && result != PEND) */
    610     /* unlocked: page queues, object (if swap_backed || result == PEND) */
    611 
    612     /*
    613      * if we did i/o to swap, zero swslot to indicate that we are
    614      * no longer building a swap-backed cluster.
    615      */
    616 
    617     if (swap_backed)
    618       swslot = 0;		/* done with this cluster */
    619 
    620     /*
    621      * first, we check for VM_PAGER_PEND which means that the async I/O
    622      * is in progress and the async I/O done routine will clean up
    623      * after us.   in this case we move on to the next page.
    624      *
    625      * there is a very remote chance that the pending async i/o can
    626      * finish _before_ we get here.   if that happens, our page "p"
    627      * may no longer be on the inactive queue.   so we verify this
    628      * when determining the next page (starting over at the head if
    629      * we've lost our inactive page).
    630      */
    631 
    632     if (result == VM_PAGER_PEND) {
    633       uvm_lock_pageq();				/* relock page queues */
    634       uvmexp.pdpending++;
    635       if (p) {
    636 	if (p->pqflags & PQ_INACTIVE)
    637 	  nextpg = p->pageq.tqe_next;		/* reload! */
    638 	else
    639 	  nextpg = pglst->tqh_first;		/* reload! */
    640       } else {
    641 	nextpg = NULL;				/* done list */
    642       }
    643       continue;
    644     }
    645 
    646     /*
    647      * clean up "p" if we have one
    648      */
    649 
    650     if (p) {
    651       /*
    652        * the I/O request to "p" is done and uvm_pager_put has freed
    653        * any cluster pages it may have allocated during I/O.  all
    654        * that is left for us to do is clean up page "p" (which is
    655        * still PG_BUSY).
    656        *
    657        * our result could be one of the following:
    658        *   VM_PAGER_OK: successful pageout
    659        *
    660        *   VM_PAGER_AGAIN: tmp resource shortage, we skip to next page
    661        *   VM_PAGER_{FAIL,ERROR,BAD}: an error.   we "reactivate"
    662        *		page to get it out of the way (it will eventually
    663        *		drift back into the inactive queue for a retry).
    664        *   VM_PAGER_UNLOCK: should never see this as it is only
    665        *		valid for "get" operations
    666        */
    667 
    668       /* relock p's object: page queues not lock yet, so no need for "try" */
    669       if (swap_backed) {	/* !swap_backed case: already locked... */
    670 	if (anon)
    671 	  simple_lock(&anon->an_lock);
    672 	else
    673 	  simple_lock(&uobj->vmobjlock);
    674       }
    675 
    676 #ifdef DIAGNOSTIC
    677       if (result == VM_PAGER_UNLOCK)
    678 	panic("pagedaemon: pageout returned invalid 'unlock' code");
    679 #endif
    680 
    681       /* handle PG_WANTED now */
    682       if (p->flags & PG_WANTED)
    683 	thread_wakeup(p);			/* still holding object lock */
    684       p->flags &= ~(PG_BUSY|PG_WANTED);
    685       UVM_PAGE_OWN(p, NULL);
    686 
    687       /* released during I/O? */
    688       if (p->flags & PG_RELEASED) {
    689 	if (anon) {
    690 	  anon->u.an_page = NULL;	/* remove page so we can get nextpg */
    691 	  simple_unlock(&anon->an_lock);/* XXX needed? */
    692 	  uvm_anfree(anon);		/* kills anon */
    693 	  pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
    694 	  anon = NULL;
    695 	  uvm_lock_pageq();
    696 	  nextpg = p->pageq.tqe_next;
    697 	  uvm_pagefree(p);			/* free released page */
    698 
    699 	} else {
    700 
    701 #ifdef DIAGNOSTIC
    702 	  if (uobj->pgops->pgo_releasepg == NULL)
    703 	    panic("pagedaemon: no pgo_releasepg function");
    704 #endif
    705 
    706 	  /*
    707 	   * pgo_releasepg nukes the page and gets "nextpg" for us.
    708 	   * it returns with the page queues locked (when given nextpg ptr).
    709 	   */
    710 	  if (!uobj->pgops->pgo_releasepg(p, &nextpg))
    711 	    uobj = NULL;			/* uobj died after release */
    712 
    713 	}
    714 
    715       } else {	/* page was not released during I/O */
    716 
    717 	uvm_lock_pageq();
    718 	nextpg = p->pageq.tqe_next;
    719 
    720 	if (result != VM_PAGER_OK) {
    721 
    722 	  /* pageout was a failure... */
    723 	  if (result != VM_PAGER_AGAIN)
    724 	    uvm_pageactivate(p);
    725 	  pmap_clear_reference(PMAP_PGARG(p));
    726 	  /* XXXCDC: if (swap_backed) FREE p's swap block? */
    727 
    728 	} else {
    729 
    730 	  /* pageout was a success... */
    731 	  pmap_clear_reference(PMAP_PGARG(p));
    732 	  pmap_clear_modify(PMAP_PGARG(p));
    733 	  p->flags |= PG_CLEAN;
    734 	  /* XXX: could free page here, but old pagedaemon does not */
    735 
    736 	}
    737       }
    738 
    739       /*
    740        * drop object lock (if there is an object left).   do a safety
    741        * check of nextpg to make sure it is on the inactive queue
    742        * (it should be since PG_BUSY pages on the inactive queue can't
    743        * be re-queued [note: not true for active queue]).
    744        */
    745 
    746       if (anon)
    747 	simple_unlock(&anon->an_lock);
    748       else if (uobj)
    749 	simple_unlock(&uobj->vmobjlock);
    750 
    751     } /* if (p) */ else {
    752 
    753       /* if p is null in this loop, make sure it stays null in next loop */
    754       nextpg = NULL;
    755 
    756     }
    757 
    758     if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
    759       printf("pagedaemon: invalid nextpg!   reverting to queue head\n");
    760       nextpg = pglst->tqh_first;	/* reload! */
    761     }
    762 
    763     continue;
    764   }		/* end of "inactive" 'for' loop */
    765   return(retval);
    766 }
    767 
    768 /*
    769  * uvmpd_scan: scan the page queues and attempt to meet our targets.
    770  *
    771  * => called with pageq's locked
    772  */
    773 
    774 void uvmpd_scan()
    775 
    776 {
    777   int s, free, pages_freed, page_shortage;
    778   struct vm_page *p, *nextpg;
    779   struct uvm_object *uobj;
    780   boolean_t got_it;
    781   UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
    782 
    783   uvmexp.pdrevs++;		/* counter */
    784 
    785 #ifdef __GNUC__
    786   uobj = NULL;	/* XXX gcc */
    787 #endif
    788   /*
    789    * get current "free" page count
    790    */
    791   s = splimp();
    792   uvm_lock_fpageq();
    793   free = uvmexp.free;
    794   uvm_unlock_fpageq();
    795   splx(s);
    796 
    797 #ifndef __SWAP_BROKEN
    798   /*
    799    * swap out some processes if we are below our free target.
    800    * we need to unlock the page queues for this.
    801    */
    802   if (free < uvmexp.freetarg) {
    803 
    804     uvmexp.pdswout++;
    805     UVMHIST_LOG(pdhist,"  free %d < target %d: swapout", free, uvmexp.freetarg,
    806 		0,0);
    807     uvm_unlock_pageq();
    808     uvm_swapout_threads();
    809     pmap_update();		/* update so we can scan inactive q */
    810     uvm_lock_pageq();
    811 
    812   }
    813 #endif
    814 
    815   /*
    816    * now we want to work on meeting our targets.   first we work on our
    817    * free target by converting inactive pages into free pages.  then
    818    * we work on meeting our inactive target by converting active pages
    819    * to inactive ones.
    820    */
    821 
    822   UVMHIST_LOG(pdhist, "  starting 'free' loop",0,0,0,0);
    823   pages_freed = uvmexp.pdfreed;	/* so far... */
    824 
    825   /*
    826    * do loop #1!   alternate starting queue between swap and object based
    827    * on the low bit of uvmexp.pdrevs (which we bump by one each call).
    828    */
    829 
    830   got_it = FALSE;
    831   if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
    832     got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
    833   if (!got_it)
    834     got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
    835   if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
    836     (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
    837 
    838   /*
    839    * we have done the scan to get free pages.   now we work on meeting
    840    * our inactive target.
    841    */
    842 
    843   page_shortage = uvmexp.inactarg - uvmexp.inactive;
    844   pages_freed = uvmexp.pdfreed - pages_freed; /* # pages freed in loop */
    845   if (page_shortage <= 0 && pages_freed == 0)
    846     page_shortage = 1;
    847 
    848   UVMHIST_LOG(pdhist, "  second loop: page_shortage=%d", page_shortage,0,0,0);
    849   for (p = uvm.page_active.tqh_first ;
    850        p != NULL && page_shortage > 0 ; p = nextpg) {
    851 
    852     nextpg = p->pageq.tqe_next;
    853     if (p->flags & PG_BUSY)
    854       continue;			/* quick check before trying to lock */
    855 
    856     /*
    857      * lock owner
    858      */
    859     /* is page anon owned or ownerless? */
    860     if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
    861 
    862 #ifdef DIAGNOSTIC
    863       if (p->uanon == NULL)
    864         panic("pagedaemon: page with no anon or object detected - loop 2");
    865 #endif
    866 
    867       if (!simple_lock_try(&p->uanon->an_lock))
    868         continue;
    869 
    870       /* take over the page? */
    871       if ((p->pqflags & PQ_ANON) == 0) {
    872 
    873 #ifdef DIAGNOSTIC
    874         if (p->loan_count < 1)
    875           panic("pagedaemon: non-loaned ownerless page detected - loop 2");
    876 #endif
    877 
    878         p->loan_count--;
    879         p->pqflags |= PQ_ANON;
    880       }
    881 
    882     } else {
    883 
    884       if (!simple_lock_try(&p->uobject->vmobjlock))
    885         continue;
    886 
    887     }
    888 
    889     if ((p->flags & PG_BUSY) == 0) {
    890       pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
    891       /* no need to check wire_count as pg is "active" */
    892       uvm_pagedeactivate(p);
    893       uvmexp.pddeact++;
    894       page_shortage--;
    895     }
    896 
    897     if (p->pqflags & PQ_ANON)
    898       simple_unlock(&p->uanon->an_lock);
    899     else
    900       simple_unlock(&p->uobject->vmobjlock);
    901   }
    902 
    903   /*
    904    * done scan
    905    */
    906 }
    907