Home | History | Annotate | Line # | Download | only in uvm
uvm_pdaemon.c revision 1.3
      1 /*	$NetBSD: uvm_pdaemon.c,v 1.3 1998/02/07 02:35:11 chs Exp $	*/
      2 
      3 /*
      4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
      5  *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
      6  */
      7 /*
      8  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      9  * Copyright (c) 1991, 1993, The Regents of the University of California.
     10  *
     11  * All rights reserved.
     12  *
     13  * This code is derived from software contributed to Berkeley by
     14  * The Mach Operating System project at Carnegie-Mellon University.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. All advertising materials mentioning features or use of this software
     25  *    must display the following acknowledgement:
     26  *	This product includes software developed by Charles D. Cranor,
     27  *      Washington University, the University of California, Berkeley and
     28  *      its contributors.
     29  * 4. Neither the name of the University nor the names of its contributors
     30  *    may be used to endorse or promote products derived from this software
     31  *    without specific prior written permission.
     32  *
     33  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     37  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     43  * SUCH DAMAGE.
     44  *
     45  *	@(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
     46  *
     47  *
     48  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
     49  * All rights reserved.
     50  *
     51  * Permission to use, copy, modify and distribute this software and
     52  * its documentation is hereby granted, provided that both the copyright
     53  * notice and this permission notice appear in all copies of the
     54  * software, derivative works or modified versions, and any portions
     55  * thereof, and that both notices appear in supporting documentation.
     56  *
     57  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     58  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     59  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     60  *
     61  * Carnegie Mellon requests users of this software to return to
     62  *
     63  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     64  *  School of Computer Science
     65  *  Carnegie Mellon University
     66  *  Pittsburgh PA 15213-3890
     67  *
     68  * any improvements or extensions that they make and grant Carnegie the
     69  * rights to redistribute these changes.
     70  */
     71 
     72 /*
     73  * uvm_pdaemon.c: the page daemon
     74  */
     75 
     76 #include <sys/param.h>
     77 #include <sys/proc.h>
     78 #include <sys/systm.h>
     79 #include <sys/kernel.h>
     80 
     81 #include <vm/vm.h>
     82 #include <vm/vm_page.h>
     83 #include <vm/vm_kern.h>
     84 
     85 #include <uvm/uvm.h>
     86 
     87 UVMHIST_DECL(pdhist);
     88 
     89 /*
     90  * local prototypes
     91  */
     92 
     93 static void		uvmpd_scan __P((void));
     94 static boolean_t	uvmpd_scan_inactive __P((struct pglist *));
     95 static void		uvmpd_tune __P((void));
     96 
     97 
     98 /*
     99  * uvm_wait: wait (sleep) for the page daemon to free some pages
    100  *
    101  * => should be called with all locks released
    102  * => should _not_ be called by the page daemon (to avoid deadlock)
    103  */
    104 
    105 void uvm_wait(wmsg)
    106 
    107 char *wmsg;
    108 
    109 {
    110   int timo = 0;
    111   int s = splbio();
    112 
    113   /*
    114    * check for page daemon going to sleep (waiting for itself)
    115    */
    116 
    117   if (curproc == uvm.pagedaemon_proc) {
    118     /*
    119      * now we have a problem: the pagedaemon wants to go to sleep until
    120      * it frees more memory.   but how can it free more memory if it is
    121      * asleep?  that is a deadlock.   we have two options:
    122      *  [1] panic now
    123      *  [2] put a timeout on the sleep, thus causing the pagedaemon to
    124      *	    only pause (rather than sleep forever)
    125      *
    126      * note that option [2] will only help us if we get lucky and some
    127      * other process on the system breaks the deadlock by exiting or
    128      * freeing memory (thus allowing the pagedaemon to continue).
    129      * for now we panic if DEBUG is defined, otherwise we hope for the
    130      * best with option [2]  (better yet, this should never happen in
    131      * the first place!).
    132      */
    133 
    134     printf("pagedaemon: deadlock detected!\n");
    135     timo = hz >> 3;		/* set timeout */
    136 #if defined(DEBUG)
    137     panic("pagedaemon deadlock");	/* DEBUG: panic so we can debug it */
    138 #endif
    139   }
    140 
    141   simple_lock(&uvm.pagedaemon_lock);
    142   thread_wakeup(&uvm.pagedaemon);		/* wake the daemon! */
    143   UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg, timo);
    144 
    145   splx(s);
    146 }
    147 
    148 
    149 /*
    150  * uvmpd_tune: tune paging parameters
    151  *
    152  * => called when ever memory is added (or removed?) to the system
    153  * => caller must call with page queues locked
    154  */
    155 
    156 static void uvmpd_tune()
    157 
    158 {
    159   UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
    160 
    161   uvmexp.freemin = uvmexp.npages / 20;
    162   uvmexp.freemin = max(uvmexp.freemin, (16*1024)/PAGE_SIZE);  /* at least 16K */
    163   uvmexp.freemin = min(uvmexp.freemin, (256*1024)/PAGE_SIZE); /* at most 256K */
    164 
    165   uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
    166   if (uvmexp.freetarg <= uvmexp.freemin)
    167     uvmexp.freetarg = uvmexp.freemin + 1;
    168 
    169   /* uvmexp.inactarg: computed in main daemon loop */
    170 
    171   uvmexp.wiredmax = uvmexp.npages / 3;
    172   UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
    173 	      uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
    174 }
    175 
    176 /*
    177  * uvm_pageout: the main loop for the pagedaemon
    178  */
    179 
    180 void uvm_pageout()
    181 
    182 {
    183   int npages = 0;
    184   int s;
    185   struct uvm_aiodesc *aio, *nextaio;
    186   UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
    187 
    188   UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
    189 
    190   /*
    191    * ensure correct priority and set paging parameters...
    192    */
    193 
    194   uvm.pagedaemon_proc = curproc;
    195   (void) spl0();
    196   uvm_lock_pageq();
    197   npages = uvmexp.npages;
    198   uvmpd_tune();
    199   uvm_unlock_pageq();
    200 
    201   /*
    202    * main loop
    203    */
    204   while (TRUE) {
    205 
    206     /*
    207      * carefully attempt to go to sleep (without losing "wakeups"!).
    208      * we need splbio because we want to make sure the aio_done list
    209      * is totally empty before we go to sleep.
    210      */
    211 
    212     s = splbio();
    213     simple_lock(&uvm.pagedaemon_lock);
    214 
    215     /*
    216      * if we've got done aio's, then bypass the sleep
    217      */
    218 
    219     if (uvm.aio_done.tqh_first == NULL) {
    220       UVMHIST_LOG(maphist,"  <<SLEEPING>>",0,0,0,0);
    221       UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, &uvm.pagedaemon_lock, FALSE,
    222 			  "daemon_slp", 0);
    223       uvmexp.pdwoke++;
    224       UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
    225 
    226       /* relock pagedaemon_lock, still at splbio */
    227       simple_lock(&uvm.pagedaemon_lock);
    228     }
    229 
    230     /*
    231      * check for done aio structures
    232      */
    233 
    234     aio = uvm.aio_done.tqh_first;		/* save current list (if any)*/
    235     if (aio) {
    236       TAILQ_INIT(&uvm.aio_done);		/* zero global list */
    237     }
    238 
    239     simple_unlock(&uvm.pagedaemon_lock);	/* unlock */
    240     splx(s);					/* drop splbio */
    241 
    242     /*
    243      * first clear out any pending aios (to free space in case we
    244      * want to pageout more stuff).
    245      */
    246 
    247     for (/*null*/; aio != NULL ; aio = nextaio) {
    248 
    249       uvmexp.paging -= aio->npages;
    250       nextaio = aio->aioq.tqe_next;
    251       aio->aiodone(aio);
    252 
    253     }
    254 
    255     /*
    256      * now lock page queues and recompute inactive count
    257      */
    258     uvm_lock_pageq();
    259 
    260     if (npages != uvmexp.npages) {		/* check for new pages? */
    261       npages = uvmexp.npages;
    262       uvmpd_tune();
    263     }
    264 
    265     uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
    266     if (uvmexp.inactarg <= uvmexp.freetarg)
    267       uvmexp.inactarg = uvmexp.freetarg + 1;
    268 
    269     UVMHIST_LOG(pdhist,"  free/ftarg=%d/%d, inact/itarg=%d/%d",
    270 	uvmexp.free, uvmexp.freetarg, uvmexp.inactive, uvmexp.inactarg);
    271 
    272     /*
    273      * scan if needed
    274      * [XXX: note we are reading uvm.free without locking]
    275      */
    276     if (uvmexp.free < uvmexp.freetarg || uvmexp.inactive < uvmexp.inactarg)
    277       uvmpd_scan();
    278 
    279     /*
    280      * done scan.  unlock page queues (the only lock we are holding).
    281      */
    282     uvm_unlock_pageq();
    283 
    284     /*
    285      * done!    restart loop.
    286      */
    287     thread_wakeup(&uvmexp.free);
    288   }
    289   /*NOTREACHED*/
    290 }
    291 
    292 /*
    293  * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
    294  * 	its own function for ease of reading.
    295  *
    296  * => called with page queues locked
    297  * => we work on meeting our free target by converting inactive pages
    298  *    into free pages.
    299  * => we handle the building of swap-backed clusters
    300  * => we return TRUE if we are exiting because we met our target
    301  */
    302 
    303 static boolean_t uvmpd_scan_inactive(pglst)
    304 
    305 struct pglist *pglst;
    306 
    307 {
    308   boolean_t retval = FALSE;	/* assume we haven't hit target */
    309   int s, free, result;
    310   struct vm_page *p, *nextpg;
    311   struct uvm_object *uobj;
    312   struct vm_page *pps[MAXBSIZE/PAGE_SIZE], **ppsp;
    313   int npages;
    314   struct vm_page *swpps[MAXBSIZE/PAGE_SIZE]; 		/* XXX: see below */
    315   int swnpages, swcpages;				/* XXX: see below */
    316   int swslot, oldslot;
    317   struct vm_anon *anon;
    318   boolean_t swap_backed;
    319   vm_offset_t start;
    320   UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
    321 
    322   /*
    323    * note: we currently keep swap-backed pages on a seperate inactive
    324    * list from object-backed pages.   however, merging the two lists
    325    * back together again hasn't been ruled out.   thus, we keep our
    326    * swap cluster in "swpps" rather than in pps (allows us to mix clustering
    327    * types in the event of a mixed inactive queue).
    328    */
    329 
    330   /*
    331    * swslot is non-zero if we are building a swap cluster.  we want
    332    * to stay in the loop while we have a page to scan or we have
    333    * a swap-cluster to build.
    334    */
    335   swslot = 0;
    336   swnpages = swcpages = 0;
    337   free = 0;
    338 
    339   for (p = pglst->tqh_first ; p != NULL || swslot != 0 ; p = nextpg) {
    340 
    341     /*
    342      * note that p can be NULL iff we have traversed the whole
    343      * list and need to do one final swap-backed clustered pageout.
    344      */
    345     if (p) {
    346       /*
    347        * update our copy of "free" and see if we've met our target
    348        */
    349       s = splimp();
    350       uvm_lock_fpageq();
    351       free = uvmexp.free;
    352       uvm_unlock_fpageq();
    353       splx(s);
    354 
    355       if (free >= uvmexp.freetarg) {
    356 	UVMHIST_LOG(pdhist,"  met free target: exit loop", 0, 0, 0, 0);
    357 	retval = TRUE;		/* hit the target! */
    358 	if (swslot == 0)
    359 	  break;	/* exit now if no swap-i/o pending */
    360 	p = NULL;	/* set p to null to signal final swap i/o */
    361       }
    362     }
    363 
    364     uobj = NULL;	/* be safe and shut gcc up */
    365     anon = NULL;	/* be safe and shut gcc up */
    366 
    367     if (p) {	/* if (we have a new page to consider) */
    368       /*
    369        * we are below target and have a new page to consider.
    370        */
    371       uvmexp.pdscans++;
    372       nextpg = p->pageq.tqe_next;
    373 
    374       /*
    375        * move referenced pages back to active queue and skip to next page
    376        * (unlikely to happen since inactive pages shouldn't have any
    377        *  valid mappings and we cleared reference before deactivating).
    378        */
    379       if (pmap_is_referenced(PMAP_PGARG(p))) {
    380 	uvm_pageactivate(p);
    381 	uvmexp.pdreact++;
    382 	continue;
    383       }
    384 
    385       /*
    386        * first we attempt to lock the object that this page belongs to.
    387        * if our attempt fails we skip on to the next page (no harm done).
    388        * it is important to "try" locking the object as we are locking in the
    389        * wrong order (pageq -> object) and we don't want to get deadlocked.
    390        *
    391        * the only time we exepct to see an ownerless page (i.e. a page
    392        * with no uobject and !PQ_ANON) is if an anon has loaned a page
    393        * from a uvm_object and the uvm_object has dropped the ownership.
    394        * in that case, the anon can "take over" the loaned page and
    395        * make it its own.
    396        */
    397 
    398       /* is page part of an anon or ownerless ? */
    399       if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
    400 
    401 	anon = p->uanon;
    402 
    403 #ifdef DIAGNOSTIC
    404         /* to be on inactive q, page must be part of _something_ */
    405         if (anon == NULL)
    406           panic("pagedaemon: page with no anon or object detected - loop 1");
    407 #endif
    408 
    409 	if (!simple_lock_try(&anon->an_lock))
    410 	  continue;		/* lock failed, skip this page */
    411 
    412         /* if the page is ownerless, claim it in the name of "anon"! */
    413         if ((p->pqflags & PQ_ANON) == 0) {
    414 #ifdef DIAGNOSTIC
    415           if (p->loan_count < 1)
    416             panic("pagedaemon: non-loaned ownerless page detected - loop 1");
    417 #endif
    418           p->loan_count--;
    419           p->pqflags |= PQ_ANON;      /* anon now owns it */
    420         }
    421 
    422 	if (p->flags & PG_BUSY) {
    423 	  simple_unlock(&anon->an_lock);
    424 	  uvmexp.pdbusy++;
    425 	  continue;		/* someone else owns page, skip it */
    426 	}
    427 
    428 	uvmexp.pdanscan++;
    429 
    430       } else {
    431 
    432 	uobj = p->uobject;
    433 
    434 	if (!simple_lock_try(&uobj->vmobjlock))
    435 	  continue;		/* lock failed, skip this page */
    436 
    437 	if (p->flags & PG_BUSY) {
    438 	  simple_unlock(&uobj->vmobjlock);
    439 	  uvmexp.pdbusy++;
    440 	  continue;		/* someone else owns page, skip it */
    441 	}
    442 
    443 	uvmexp.pdobscan++;
    444 
    445       }
    446 
    447       /*
    448        * we now have the object and the page queues locked.  the page is
    449        * not busy.   if the page is clean we can free it now and continue.
    450        */
    451 
    452       if (p->flags & PG_CLEAN) {
    453 	/* zap all mappings with pmap_page_protect... */
    454 	pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
    455 	uvm_pagefree(p);
    456 	uvmexp.pdfreed++;
    457 
    458 	if (anon) {
    459 #ifdef DIAGNOSTIC
    460 	  /*
    461 	   * an anonymous page can only be clean if it has valid
    462 	   * backing store.
    463 	   */
    464 	  if (anon->an_swslot == 0)
    465 	    panic("pagedaemon: clean anon page without backing store?");
    466 #endif
    467 	  anon->u.an_page = NULL;		/* remove from object */
    468 	  simple_unlock(&anon->an_lock);
    469 	} else {
    470 	  /* pagefree has already removed the page from the object */
    471 	  simple_unlock(&uobj->vmobjlock);
    472 	}
    473 	continue;
    474       }
    475 
    476       /*
    477        * this page is dirty, skip it if we'll have met
    478        * our free target when all the current pageouts complete.
    479        */
    480       if (free + uvmexp.paging > uvmexp.freetarg)
    481       {
    482 	if (anon) {
    483 	  simple_unlock(&anon->an_lock);
    484 	} else {
    485 	  simple_unlock(&uobj->vmobjlock);
    486 	}
    487 	continue;
    488       }
    489 
    490       /*
    491        * the page we are looking at is dirty.   we must clean it before
    492        * it can be freed.  to do this we first mark the page busy so that
    493        * no one else will touch the page.   we write protect all the mappings
    494        * of the page so that no one touches it while it is in I/O.
    495        */
    496 
    497       swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
    498       uvmexp.pdpageouts++;
    499       p->flags |= PG_BUSY;		/* now we own it */
    500       UVM_PAGE_OWN(p, "scan_inactive");
    501       pmap_page_protect(PMAP_PGARG(p), VM_PROT_READ);
    502 
    503       /*
    504        * for swap-backed pages we need to (re)allocate swap space.
    505        */
    506       if (swap_backed) {
    507 
    508 	/*
    509 	 * free old swap slot (if any)
    510 	 */
    511 	if (anon) {
    512 	  if (anon->an_swslot) {
    513 	    uvm_swap_free(anon->an_swslot, 1);
    514 	    anon->an_swslot = 0;
    515 	  }
    516 	} else {
    517 	  oldslot = uao_set_swslot(uobj, p->offset/PAGE_SIZE, 0); /* remove */
    518 	  if (oldslot)
    519 	    uvm_swap_free(oldslot, 1); /* free */
    520 	}
    521 
    522 	/*
    523 	 * start new cluster (if necessary)
    524 	 */
    525 	if (swslot == 0) {
    526 	  swnpages = MAXBSIZE/PAGE_SIZE;	/* want this much */
    527 	  swslot = uvm_swap_alloc(&swnpages, TRUE);
    528 
    529 	  if (swslot == 0) {
    530 	    /* no swap?  give up! */
    531 	    p->flags &= ~PG_BUSY;
    532             UVM_PAGE_OWN(p, NULL);
    533 	    if (anon)
    534 	      simple_unlock(&anon->an_lock);
    535 	    else
    536 	      simple_unlock(&uobj->vmobjlock);
    537 	    continue;
    538 	  }
    539 	  swcpages = 0;	/* cluster is empty */
    540 	}
    541 
    542 	/*
    543 	 * add block to cluster
    544 	 */
    545 	swpps[swcpages] = p;
    546 	if (anon)
    547 	  anon->an_swslot = swslot + swcpages;
    548 	else
    549 	  uao_set_swslot(uobj, p->offset/PAGE_SIZE, swslot + swcpages);
    550 	swcpages++;
    551 
    552 	/* done (swap-backed) */
    553       }
    554 
    555       /* end: if (p) [end of "if we have new page to consider"] */
    556     } else {
    557 
    558       swap_backed = TRUE; /* if p == NULL we must be doing a last swap i/o */
    559 
    560     }
    561 
    562     /*
    563      * now consider doing the pageout.
    564      *
    565      * for swap-backed pages, we do the pageout if we have either
    566      * filled the cluster (in which case (swnpages == swcpages) or
    567      * run out of pages (p == NULL).
    568      *
    569      * for object pages, we always do the pageout.
    570      */
    571     if (swap_backed) {
    572 
    573       if (p) {	/* if we just added a page to cluster */
    574 	if (anon)
    575 	  simple_unlock(&anon->an_lock);
    576 	else
    577 	  simple_unlock(&uobj->vmobjlock);
    578 	if (swcpages < swnpages)	/* cluster not full yet? */
    579 	  continue;
    580       }
    581 
    582       /* starting I/O now... set up for it */
    583       npages = swcpages;
    584       ppsp = swpps;
    585       start = (vm_offset_t) swslot;	/* for swap-backed pages only */
    586 
    587       /* if this is final pageout we could have a few extra swap blocks */
    588       if (swcpages < swnpages) {
    589 	uvm_swap_free(swslot + swcpages, (swnpages - swcpages));
    590       }
    591 
    592     } else {
    593 
    594       /* normal object pageout */
    595       ppsp = pps;
    596       npages = sizeof(pps) / sizeof(struct vm_page *);
    597       start = 0;	/* not looked at because PGO_ALLPAGES is set */
    598 
    599     }
    600 
    601     /*
    602      * now do the pageout.
    603      *
    604      * for swap_backed pages we have already built the cluster.
    605      * for !swap_backed pages, uvm_pager_put will call the object's
    606      * "make put cluster" function to build a cluster on our behalf.
    607      *
    608      * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
    609      * it to free the cluster pages for us on a successful I/O (it always
    610      * does this for un-successful I/O requests).  this allows us to
    611      * do clustered pageout without having to deal with cluster pages
    612      * at this level.
    613      *
    614      * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
    615      *  IN:  locked: uobj (if !swap_backed), page queues
    616      * OUT:  locked: uobj (if !swap_backed && result != VM_PAGER_PEND)
    617      *      !locked: page queues, uobj (if swap_backed || VM_PAGER_PEND)
    618      *
    619      * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
    620      */
    621 
    622     /* locked: uobj (if !swap_backed), page queues */
    623     result = uvm_pager_put((swap_backed) ? NULL : uobj, p, &ppsp, &npages,
    624 			   PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
    625     /* locked: uobj (if !swap_backed && result != PEND) */
    626     /* unlocked: page queues, object (if swap_backed || result == PEND) */
    627 
    628     /*
    629      * if we did i/o to swap, zero swslot to indicate that we are
    630      * no longer building a swap-backed cluster.
    631      */
    632 
    633     if (swap_backed)
    634       swslot = 0;		/* done with this cluster */
    635 
    636     /*
    637      * first, we check for VM_PAGER_PEND which means that the async I/O
    638      * is in progress and the async I/O done routine will clean up
    639      * after us.   in this case we move on to the next page.
    640      *
    641      * there is a very remote chance that the pending async i/o can
    642      * finish _before_ we get here.   if that happens, our page "p"
    643      * may no longer be on the inactive queue.   so we verify this
    644      * when determining the next page (starting over at the head if
    645      * we've lost our inactive page).
    646      */
    647 
    648     if (result == VM_PAGER_PEND) {
    649       uvmexp.paging += npages;
    650       uvm_lock_pageq();				/* relock page queues */
    651       uvmexp.pdpending++;
    652       if (p) {
    653 	if (p->pqflags & PQ_INACTIVE)
    654 	  nextpg = p->pageq.tqe_next;		/* reload! */
    655 	else
    656 	  nextpg = pglst->tqh_first;		/* reload! */
    657       } else {
    658 	nextpg = NULL;				/* done list */
    659       }
    660       continue;
    661     }
    662 
    663     /*
    664      * clean up "p" if we have one
    665      */
    666 
    667     if (p) {
    668       /*
    669        * the I/O request to "p" is done and uvm_pager_put has freed
    670        * any cluster pages it may have allocated during I/O.  all
    671        * that is left for us to do is clean up page "p" (which is
    672        * still PG_BUSY).
    673        *
    674        * our result could be one of the following:
    675        *   VM_PAGER_OK: successful pageout
    676        *
    677        *   VM_PAGER_AGAIN: tmp resource shortage, we skip to next page
    678        *   VM_PAGER_{FAIL,ERROR,BAD}: an error.   we "reactivate"
    679        *		page to get it out of the way (it will eventually
    680        *		drift back into the inactive queue for a retry).
    681        *   VM_PAGER_UNLOCK: should never see this as it is only
    682        *		valid for "get" operations
    683        */
    684 
    685       /* relock p's object: page queues not lock yet, so no need for "try" */
    686       if (swap_backed) {	/* !swap_backed case: already locked... */
    687 	if (anon)
    688 	  simple_lock(&anon->an_lock);
    689 	else
    690 	  simple_lock(&uobj->vmobjlock);
    691       }
    692 
    693 #ifdef DIAGNOSTIC
    694       if (result == VM_PAGER_UNLOCK)
    695 	panic("pagedaemon: pageout returned invalid 'unlock' code");
    696 #endif
    697 
    698       /* handle PG_WANTED now */
    699       if (p->flags & PG_WANTED)
    700 	thread_wakeup(p);			/* still holding object lock */
    701       p->flags &= ~(PG_BUSY|PG_WANTED);
    702       UVM_PAGE_OWN(p, NULL);
    703 
    704       /* released during I/O? */
    705       if (p->flags & PG_RELEASED) {
    706 	if (anon) {
    707 	  anon->u.an_page = NULL;	/* remove page so we can get nextpg */
    708 	  simple_unlock(&anon->an_lock);/* XXX needed? */
    709 	  uvm_anfree(anon);		/* kills anon */
    710 	  pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
    711 	  anon = NULL;
    712 	  uvm_lock_pageq();
    713 	  nextpg = p->pageq.tqe_next;
    714 	  uvm_pagefree(p);			/* free released page */
    715 
    716 	} else {
    717 
    718 #ifdef DIAGNOSTIC
    719 	  if (uobj->pgops->pgo_releasepg == NULL)
    720 	    panic("pagedaemon: no pgo_releasepg function");
    721 #endif
    722 
    723 	  /*
    724 	   * pgo_releasepg nukes the page and gets "nextpg" for us.
    725 	   * it returns with the page queues locked (when given nextpg ptr).
    726 	   */
    727 	  if (!uobj->pgops->pgo_releasepg(p, &nextpg))
    728 	    uobj = NULL;			/* uobj died after release */
    729 
    730 	  /*
    731 	   * lock page queues here so that they're always locked
    732 	   * at the end of the loop.
    733 	   */
    734 	  uvm_lock_pageq();
    735 	}
    736 
    737       } else {	/* page was not released during I/O */
    738 
    739 	uvm_lock_pageq();
    740 	nextpg = p->pageq.tqe_next;
    741 
    742 	if (result != VM_PAGER_OK) {
    743 
    744 	  /* pageout was a failure... */
    745 	  if (result != VM_PAGER_AGAIN)
    746 	    uvm_pageactivate(p);
    747 	  pmap_clear_reference(PMAP_PGARG(p));
    748 	  /* XXXCDC: if (swap_backed) FREE p's swap block? */
    749 
    750 	} else {
    751 
    752 	  /* pageout was a success... */
    753 	  pmap_clear_reference(PMAP_PGARG(p));
    754 	  pmap_clear_modify(PMAP_PGARG(p));
    755 	  p->flags |= PG_CLEAN;
    756 	  /* XXX: could free page here, but old pagedaemon does not */
    757 
    758 	}
    759       }
    760 
    761       /*
    762        * drop object lock (if there is an object left).   do a safety
    763        * check of nextpg to make sure it is on the inactive queue
    764        * (it should be since PG_BUSY pages on the inactive queue can't
    765        * be re-queued [note: not true for active queue]).
    766        */
    767 
    768       if (anon)
    769 	simple_unlock(&anon->an_lock);
    770       else if (uobj)
    771 	simple_unlock(&uobj->vmobjlock);
    772 
    773     } /* if (p) */ else {
    774 
    775       /* if p is null in this loop, make sure it stays null in next loop */
    776       nextpg = NULL;
    777 
    778       /*
    779        * lock page queues here just so they're always locked
    780        * at the end of the loop.
    781        */
    782       uvm_lock_pageq();
    783     }
    784 
    785     if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
    786       printf("pagedaemon: invalid nextpg!   reverting to queue head\n");
    787       nextpg = pglst->tqh_first;	/* reload! */
    788     }
    789 
    790   }		/* end of "inactive" 'for' loop */
    791   return(retval);
    792 }
    793 
    794 /*
    795  * uvmpd_scan: scan the page queues and attempt to meet our targets.
    796  *
    797  * => called with pageq's locked
    798  */
    799 
    800 void uvmpd_scan()
    801 
    802 {
    803   int s, free, pages_freed, page_shortage;
    804   struct vm_page *p, *nextpg;
    805   struct uvm_object *uobj;
    806   boolean_t got_it;
    807   UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
    808 
    809   uvmexp.pdrevs++;		/* counter */
    810 
    811 #ifdef __GNUC__
    812   uobj = NULL;	/* XXX gcc */
    813 #endif
    814   /*
    815    * get current "free" page count
    816    */
    817   s = splimp();
    818   uvm_lock_fpageq();
    819   free = uvmexp.free;
    820   uvm_unlock_fpageq();
    821   splx(s);
    822 
    823 #ifndef __SWAP_BROKEN
    824   /*
    825    * swap out some processes if we are below our free target.
    826    * we need to unlock the page queues for this.
    827    */
    828   if (free < uvmexp.freetarg) {
    829 
    830     uvmexp.pdswout++;
    831     UVMHIST_LOG(pdhist,"  free %d < target %d: swapout", free, uvmexp.freetarg,
    832 		0,0);
    833     uvm_unlock_pageq();
    834     uvm_swapout_threads();
    835     pmap_update();		/* update so we can scan inactive q */
    836     uvm_lock_pageq();
    837 
    838   }
    839 #endif
    840 
    841   /*
    842    * now we want to work on meeting our targets.   first we work on our
    843    * free target by converting inactive pages into free pages.  then
    844    * we work on meeting our inactive target by converting active pages
    845    * to inactive ones.
    846    */
    847 
    848   UVMHIST_LOG(pdhist, "  starting 'free' loop",0,0,0,0);
    849   pages_freed = uvmexp.pdfreed;	/* so far... */
    850 
    851   /*
    852    * do loop #1!   alternate starting queue between swap and object based
    853    * on the low bit of uvmexp.pdrevs (which we bump by one each call).
    854    */
    855 
    856   got_it = FALSE;
    857   if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
    858     got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
    859   if (!got_it)
    860     got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
    861   if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
    862     (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
    863 
    864   /*
    865    * we have done the scan to get free pages.   now we work on meeting
    866    * our inactive target.
    867    */
    868 
    869   page_shortage = uvmexp.inactarg - uvmexp.inactive;
    870   pages_freed = uvmexp.pdfreed - pages_freed; /* # pages freed in loop */
    871   if (page_shortage <= 0 && pages_freed == 0)
    872     page_shortage = 1;
    873 
    874   UVMHIST_LOG(pdhist, "  second loop: page_shortage=%d", page_shortage,0,0,0);
    875   for (p = uvm.page_active.tqh_first ;
    876        p != NULL && page_shortage > 0 ; p = nextpg) {
    877 
    878     nextpg = p->pageq.tqe_next;
    879     if (p->flags & PG_BUSY)
    880       continue;			/* quick check before trying to lock */
    881 
    882     /*
    883      * lock owner
    884      */
    885     /* is page anon owned or ownerless? */
    886     if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
    887 
    888 #ifdef DIAGNOSTIC
    889       if (p->uanon == NULL)
    890         panic("pagedaemon: page with no anon or object detected - loop 2");
    891 #endif
    892 
    893       if (!simple_lock_try(&p->uanon->an_lock))
    894         continue;
    895 
    896       /* take over the page? */
    897       if ((p->pqflags & PQ_ANON) == 0) {
    898 
    899 #ifdef DIAGNOSTIC
    900         if (p->loan_count < 1)
    901           panic("pagedaemon: non-loaned ownerless page detected - loop 2");
    902 #endif
    903 
    904         p->loan_count--;
    905         p->pqflags |= PQ_ANON;
    906       }
    907 
    908     } else {
    909 
    910       if (!simple_lock_try(&p->uobject->vmobjlock))
    911         continue;
    912 
    913     }
    914 
    915     if ((p->flags & PG_BUSY) == 0) {
    916       pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
    917       /* no need to check wire_count as pg is "active" */
    918       uvm_pagedeactivate(p);
    919       uvmexp.pddeact++;
    920       page_shortage--;
    921     }
    922 
    923     if (p->pqflags & PQ_ANON)
    924       simple_unlock(&p->uanon->an_lock);
    925     else
    926       simple_unlock(&p->uobject->vmobjlock);
    927   }
    928 
    929   /*
    930    * done scan
    931    */
    932 }
    933