Home | History | Annotate | Line # | Download | only in uvm
uvm_vnode.c revision 1.6
      1 /*	$NetBSD: uvm_vnode.c,v 1.6 1998/02/19 00:55:04 thorpej Exp $	*/
      2 
      3 /*
      4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
      5  *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
      6  */
      7 /*
      8  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      9  * Copyright (c) 1991, 1993
     10  *      The Regents of the University of California.
     11  * Copyright (c) 1990 University of Utah.
     12  *
     13  * All rights reserved.
     14  *
     15  * This code is derived from software contributed to Berkeley by
     16  * the Systems Programming Group of the University of Utah Computer
     17  * Science Department.
     18  *
     19  * Redistribution and use in source and binary forms, with or without
     20  * modification, are permitted provided that the following conditions
     21  * are met:
     22  * 1. Redistributions of source code must retain the above copyright
     23  *    notice, this list of conditions and the following disclaimer.
     24  * 2. Redistributions in binary form must reproduce the above copyright
     25  *    notice, this list of conditions and the following disclaimer in the
     26  *    documentation and/or other materials provided with the distribution.
     27  * 3. All advertising materials mentioning features or use of this software
     28  *    must display the following acknowledgement:
     29  *      This product includes software developed by Charles D. Cranor,
     30  *	Washington University, the University of California, Berkeley and
     31  *	its contributors.
     32  * 4. Neither the name of the University nor the names of its contributors
     33  *    may be used to endorse or promote products derived from this software
     34  *    without specific prior written permission.
     35  *
     36  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     39  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     46  * SUCH DAMAGE.
     47  *
     48  *      @(#)vnode_pager.c       8.8 (Berkeley) 2/13/94
     49  * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp
     50  */
     51 
     52 #include "fs_nfs.h"
     53 #include "opt_uvmhist.h"
     54 
     55 /*
     56  * uvm_vnode.c: the vnode pager.
     57  */
     58 
     59 #include <sys/param.h>
     60 #include <sys/systm.h>
     61 #include <sys/mount.h>
     62 #include <sys/proc.h>
     63 #include <sys/malloc.h>
     64 #include <sys/vnode.h>
     65 
     66 #include <vm/vm.h>
     67 #include <vm/vm_page.h>
     68 #include <vm/vm_kern.h>
     69 
     70 #include <sys/syscallargs.h>
     71 
     72 #include <uvm/uvm.h>
     73 #include <uvm/uvm_vnode.h>
     74 
     75 /*
     76  * private global data structure
     77  *
     78  * we keep a list of writeable active vnode-backed VM objects for sync op.
     79  * we keep a simpleq of vnodes that are currently being sync'd.
     80  */
     81 
     82 LIST_HEAD(uvn_list_struct, uvm_vnode);
     83 static struct uvn_list_struct uvn_wlist;	/* writeable uvns */
     84 #if NCPU > 1
     85 static simple_lock_data_t uvn_wl_lock;		/* locks uvn_wlist */
     86 #endif
     87 
     88 SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
     89 static struct uvn_sq_struct uvn_sync_q;		/* sync'ing uvns */
     90 lock_data_t uvn_sync_lock;			/* locks sync operation */
     91 
     92 /*
     93  * functions
     94  */
     95 
     96 static int		   uvn_asyncget __P((struct uvm_object *, vm_offset_t,
     97 					    int));
     98 struct uvm_object 	  *uvn_attach __P((void *, vm_prot_t));
     99 static void		   uvn_cluster __P((struct uvm_object *, vm_offset_t,
    100 					   vm_offset_t *, vm_offset_t *));
    101 static void                uvn_detach __P((struct uvm_object *));
    102 static boolean_t           uvn_flush __P((struct uvm_object *, vm_offset_t,
    103 					 vm_offset_t, int));
    104 static int                 uvn_get __P((struct uvm_object *, vm_offset_t,
    105 					vm_page_t *, int *, int,
    106 					vm_prot_t, int, int));
    107 static void		   uvn_init __P((void));
    108 static int		   uvn_io __P((struct uvm_vnode *, vm_page_t *,
    109 				      int, int, int));
    110 static int		   uvn_put __P((struct uvm_object *, vm_page_t *,
    111 					int, boolean_t));
    112 static void                uvn_reference __P((struct uvm_object *));
    113 static boolean_t	   uvn_releasepg __P((struct vm_page *,
    114 					      struct vm_page **));
    115 
    116 /*
    117  * master pager structure
    118  */
    119 
    120 struct uvm_pagerops uvm_vnodeops = {
    121   uvn_init,
    122   uvn_attach,
    123   uvn_reference,
    124   uvn_detach,
    125   NULL,			/* no specialized fault routine required */
    126   uvn_flush,
    127   uvn_get,
    128   uvn_asyncget,
    129   uvn_put,
    130   uvn_cluster,
    131   uvm_mk_pcluster,	/* use generic version of this: see uvm_pager.c */
    132   uvm_shareprot,	/* !NULL: allow us in share maps */
    133   NULL,			/* AIO-DONE function (not until we have asyncio) */
    134   uvn_releasepg,
    135 };
    136 
    137 /*
    138  * the ops!
    139  */
    140 
    141 /*
    142  * uvn_init
    143  *
    144  * init pager private data structures.
    145  */
    146 
    147 static void uvn_init()
    148 
    149 {
    150   LIST_INIT(&uvn_wlist);
    151   simple_lock_init(&uvn_wl_lock);
    152   /* note: uvn_sync_q init'd in uvm_vnp_sync() */
    153   lockinit(&uvn_sync_lock, PVM, "uvnsync", 0, 0);
    154 }
    155 
    156 /*
    157  * uvn_attach
    158  *
    159  * attach a vnode structure to a VM object.  if the vnode is already
    160  * attached, then just bump the reference count by one and return the
    161  * VM object.   if not already attached, attach and return the new VM obj.
    162  * the "accessprot" tells the max access the attaching thread wants to
    163  * our pages.
    164  *
    165  * => caller must _not_ already be holding the lock on the uvm_object.
    166  * => in fact, nothing should be locked so that we can sleep here.
    167  * => note that uvm_object is first thing in vnode structure, so their
    168  *    pointers are equiv.
    169  */
    170 
    171 struct uvm_object *uvn_attach(arg, accessprot)
    172 
    173 void *arg;
    174 vm_prot_t accessprot;
    175 
    176 {
    177   struct vnode *vp = arg;
    178   struct uvm_vnode *uvn = &vp->v_uvm;
    179   struct vattr vattr;
    180   int oldflags, result;
    181   u_quad_t used_vnode_size;
    182   UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist);
    183 
    184   UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0);
    185 
    186   /*
    187    * first get a lock on the uvn.
    188    */
    189   simple_lock(&uvn->u_obj.vmobjlock);
    190   while (uvn->u_flags & UVM_VNODE_BLOCKED) {
    191     uvn->u_flags |= UVM_VNODE_WANTED;
    192     UVMHIST_LOG(maphist, "  SLEEPING on blocked vn",0,0,0,0);
    193     UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE, "uvn_attach",0);
    194     simple_lock(&uvn->u_obj.vmobjlock);
    195     UVMHIST_LOG(maphist,"  WOKE UP",0,0,0,0);
    196   }
    197 
    198   /*
    199    * now we have lock and uvn must not be in a blocked state.
    200    * first check to see if it is already active, in which case
    201    * we can bump the reference count, check to see if we need to
    202    * add it to the writeable list, and then return.
    203    */
    204   if (uvn->u_flags & UVM_VNODE_VALID) {	/* already active? */
    205 
    206     /* regain VREF if we were persisting */
    207     if (uvn->u_obj.uo_refs == 0) {
    208       VREF(vp);
    209       UVMHIST_LOG(maphist," VREF (reclaim persisting vnode)", 0,0,0,0);
    210     }
    211     uvn->u_obj.uo_refs++;		/* bump uvn ref! */
    212 
    213     /* check for new writeable uvn */
    214     if ((accessprot & VM_PROT_WRITE) != 0 &&
    215 	(uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
    216       simple_lock(&uvn_wl_lock);
    217       LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
    218       simple_unlock(&uvn_wl_lock);
    219       uvn->u_flags |= UVM_VNODE_WRITEABLE;	/* we are now on wlist! */
    220     }
    221 
    222     /* unlock and return */
    223     simple_unlock(&uvn->u_obj.vmobjlock);
    224     UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,0,0,0);
    225     return(&uvn->u_obj);
    226   }
    227 
    228   /*
    229    * need to call VOP_GETATTR() to get the attributes, but that could
    230    * block (due to I/O), so we want to unlock the object before calling.
    231    * however, we want to keep anyone else from playing with the object
    232    * while it is unlocked.   to do this we set UVM_VNODE_ALOCK which
    233    * prevents anyone from attaching to the vnode until we are done with
    234    * it.
    235    */
    236   uvn->u_flags = UVM_VNODE_ALOCK;
    237   simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */
    238     /* XXX: curproc? */
    239   result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
    240 
    241    /*
    242     * make sure that the newsize fits within a vm_offset_t
    243     * XXX: need to revise addressing data types
    244     */
    245   used_vnode_size = vattr.va_size;
    246   if (used_vnode_size > (vm_offset_t) -PAGE_SIZE) {
    247 #ifdef DEBUG
    248     printf("uvn_attach: vn %p size truncated %qx->%x\n", vp, used_vnode_size,
    249 	   -PAGE_SIZE);
    250 #endif
    251     used_vnode_size = (vm_offset_t) -PAGE_SIZE;
    252   }
    253 
    254   /* relock object */
    255   simple_lock(&uvn->u_obj.vmobjlock);
    256 
    257   if (result != 0) {
    258     if (uvn->u_flags & UVM_VNODE_WANTED)
    259       wakeup(uvn);
    260     uvn->u_flags = 0;
    261     simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
    262     UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
    263     return(NULL);
    264   }
    265 
    266   /*
    267    * now set up the uvn.
    268    */
    269   uvn->u_obj.pgops = &uvm_vnodeops;
    270   TAILQ_INIT(&uvn->u_obj.memq);
    271   uvn->u_obj.uo_npages = 0;
    272   uvn->u_obj.uo_refs = 1;			/* just us... */
    273   oldflags = uvn->u_flags;
    274   uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
    275   uvn->u_nio = 0;
    276   uvn->u_size = used_vnode_size;
    277 
    278   /* if write access, we need to add it to the wlist */
    279   if (accessprot & VM_PROT_WRITE) {
    280     simple_lock(&uvn_wl_lock);
    281     LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
    282     simple_unlock(&uvn_wl_lock);
    283     uvn->u_flags |= UVM_VNODE_WRITEABLE;	/* we are on wlist! */
    284   }
    285 
    286   /*
    287    * add a reference to the vnode.   this reference will stay as long
    288    * as there is a valid mapping of the vnode.   dropped when the reference
    289    * count goes to zero [and we either free or persist].
    290    */
    291   VREF(vp);
    292   simple_unlock(&uvn->u_obj.vmobjlock);
    293   if (oldflags & UVM_VNODE_WANTED)
    294     wakeup(uvn);
    295 
    296   UVMHIST_LOG(maphist,"<- done/VREF, ret 0x%x", &uvn->u_obj,0,0,0);
    297   return(&uvn->u_obj);
    298 }
    299 
    300 
    301 /*
    302  * uvn_reference
    303  *
    304  * duplicate a reference to a VM object.  Note that the reference
    305  * count must already be at least one (the passed in reference) so
    306  * there is no chance of the uvn being killed or locked out here.
    307  *
    308  * => caller must call with object unlocked.
    309  * => caller must be using the same accessprot as was used at attach time
    310  */
    311 
    312 
    313 static void uvn_reference(uobj)
    314 
    315 struct uvm_object *uobj;
    316 
    317 {
    318 #ifdef DIAGNOSTIC
    319   struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
    320 #endif
    321   UVMHIST_FUNC("uvn_reference"); UVMHIST_CALLED(maphist);
    322 
    323   simple_lock(&uobj->vmobjlock);
    324 #ifdef DIAGNOSTIC
    325   if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
    326     printf("uvn_reference: ref=%d, flags=0x%x\n", uvn->u_flags, uobj->uo_refs);
    327     panic("uvn_reference: invalid state");
    328   }
    329 #endif
    330   uobj->uo_refs++;
    331   UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
    332 	uobj, uobj->uo_refs,0,0);
    333   simple_unlock(&uobj->vmobjlock);
    334 }
    335 
    336 /*
    337  * uvn_detach
    338  *
    339  * remove a reference to a VM object.
    340  *
    341  * => caller must call with object unlocked and map locked.
    342  * => this starts the detach process, but doesn't have to finish it
    343  *    (async i/o could still be pending).
    344  */
    345 
    346 static void uvn_detach(uobj)
    347 
    348 struct uvm_object *uobj;
    349 
    350 {
    351   struct uvm_vnode *uvn;
    352   struct vnode *vp;
    353   int oldflags;
    354   UVMHIST_FUNC("uvn_detach"); UVMHIST_CALLED(maphist);
    355 
    356   simple_lock(&uobj->vmobjlock);
    357 
    358   UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
    359   uobj->uo_refs--;			/* drop ref! */
    360   if (uobj->uo_refs) {			/* still more refs */
    361     simple_unlock(&uobj->vmobjlock);
    362     UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
    363     return;
    364   }
    365 
    366   /*
    367    * get other pointers ...
    368    */
    369 
    370   uvn = (struct uvm_vnode *) uobj;
    371   vp = (struct vnode *) uobj;
    372 
    373   /*
    374    * clear VTEXT flag now that there are no mappings left (VTEXT is used
    375    * to keep an active text file from being overwritten).
    376    */
    377   vp->v_flag &= ~VTEXT;
    378 
    379   /*
    380    * we just dropped the last reference to the uvn.   see if we can
    381    * let it "stick around".
    382    */
    383 
    384   if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
    385     uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES); /* won't block */
    386     vrele(vp);				/* drop vnode reference */
    387     simple_unlock(&uobj->vmobjlock);
    388     UVMHIST_LOG(maphist,"<- done/vrele!  (persist)", 0,0,0,0);
    389     return;
    390   }
    391 
    392   /*
    393    * its a goner!
    394    */
    395 
    396   UVMHIST_LOG(maphist,"  its a goner (flushing)!", 0,0,0,0);
    397 
    398   uvn->u_flags |= UVM_VNODE_DYING;
    399 
    400   /*
    401    * even though we may unlock in flush, no one can gain a reference
    402    * to us until we clear the "dying" flag [because it blocks
    403    * attaches].  we will not do that until after we've disposed of all
    404    * the pages with uvn_flush().  note that before the flush the only
    405    * pages that could be marked PG_BUSY are ones that are in async
    406    * pageout by the daemon.  (there can't be any pending "get"'s
    407    * because there are no references to the object).
    408    */
    409 
    410   (void) uvn_flush(uobj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
    411 
    412   UVMHIST_LOG(maphist,"  its a goner (done flush)!", 0,0,0,0);
    413 
    414   /*
    415    * given the structure of this pager, the above flush request will
    416    * create the following state: all the pages that were in the object
    417    * have either been free'd or they are marked PG_BUSY|PG_RELEASED.
    418    * the PG_BUSY bit was set either by us or the daemon for async I/O.
    419    * in either case, if we have pages left we can't kill the object
    420    * yet because i/o is pending.  in this case we set the "relkill"
    421    * flag which will cause pgo_releasepg to kill the object once all
    422    * the I/O's are done [pgo_releasepg will be called from the aiodone
    423    * routine or from the page daemon].
    424    */
    425 
    426   if (uobj->uo_npages) {		/* I/O pending.  iodone will free */
    427 #ifdef DIAGNOSTIC
    428     /*
    429      * XXXCDC: very unlikely to happen until we have async i/o so print
    430      * a little info message in case it does.
    431      */
    432     printf("uvn_detach: vn %p has pages left after flush - relkill mode\n",
    433 	   uobj);
    434 #endif
    435     uvn->u_flags |= UVM_VNODE_RELKILL;
    436     simple_unlock(&uobj->vmobjlock);
    437     UVMHIST_LOG(maphist,"<- done! (releasepg will kill obj)", 0,0,0,0);
    438     return;
    439   }
    440 
    441   /*
    442    * kill object now.   note that we can't be on the sync q because
    443    * all references are gone.
    444    */
    445   if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
    446     simple_lock(&uvn_wl_lock);		/* protect uvn_wlist */
    447     LIST_REMOVE(uvn, u_wlist);
    448     simple_unlock(&uvn_wl_lock);
    449   }
    450 #ifdef DIAGNOSTIC
    451   if (uobj->memq.tqh_first != NULL)
    452     panic("uvn_deref: vnode VM object still has pages afer syncio/free flush");
    453 #endif
    454   oldflags = uvn->u_flags;
    455   uvn->u_flags = 0;
    456   simple_unlock(&uobj->vmobjlock);
    457 
    458   /* wake up any sleepers */
    459   if (oldflags & UVM_VNODE_WANTED)
    460     wakeup(uvn);
    461 
    462   /*
    463    * drop our reference to the vnode.
    464    */
    465   vrele(vp);
    466   UVMHIST_LOG(maphist,"<- done (vrele) final", 0,0,0,0);
    467 
    468   return;
    469 }
    470 
    471 /*
    472  * uvm_vnp_terminate: external hook to clear out a vnode's VM
    473  *
    474  * called in two cases:
    475  *  [1] when a persisting vnode vm object (i.e. one with a zero reference
    476  *      count) needs to be freed so that a vnode can be reused.  this
    477  *      happens under "getnewvnode" in vfs_subr.c.   if the vnode from
    478  *      the free list is still attached (i.e. not VBAD) then vgone is
    479  *	called.   as part of the vgone trace this should get called to
    480  *	free the vm object.   this is the common case.
    481  *  [2] when a filesystem is being unmounted by force (MNT_FORCE,
    482  *	"umount -f") the vgone() function is called on active vnodes
    483  *	on the mounted file systems to kill their data (the vnodes become
    484  *	"dead" ones [see src/sys/miscfs/deadfs/...]).  that results in a
    485  *	call here (even if the uvn is still in use -- i.e. has a non-zero
    486  *	reference count).  this case happens at "umount -f" and during a
    487  *	"reboot/halt" operation.
    488  *
    489  * => the caller must XLOCK and VOP_LOCK the vnode before calling us
    490  *	[protects us from getting a vnode that is already in the DYING
    491  *	 state...]
    492  * => unlike uvn_detach, this function must not return until all the
    493  *	uvn's pages are disposed of.
    494  * => in case [2] the uvn is still alive after this call, but all I/O
    495  *	ops will fail (due to the backing vnode now being "dead").  this
    496  *	will prob. kill any process using the uvn due to pgo_get failing.
    497  */
    498 
    499 void uvm_vnp_terminate(vp)
    500 
    501 struct vnode *vp;
    502 
    503 {
    504   struct uvm_vnode *uvn = &vp->v_uvm;
    505   int oldflags;
    506   UVMHIST_FUNC("uvm_vnp_terminate"); UVMHIST_CALLED(maphist);
    507 
    508   /*
    509    * lock object and check if it is valid
    510    */
    511   simple_lock(&uvn->u_obj.vmobjlock);
    512   UVMHIST_LOG(maphist, "  vp=0x%x, ref=%d, flag=0x%x", vp, uvn->u_obj.uo_refs,
    513 	      uvn->u_flags, 0);
    514   if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
    515     simple_unlock(&uvn->u_obj.vmobjlock);
    516     UVMHIST_LOG(maphist, "<- done (not active)", 0, 0, 0, 0);
    517     return;
    518   }
    519 
    520   /*
    521    * must be a valid uvn that is not already dying (because XLOCK
    522    * protects us from that).   the uvn can't in the the ALOCK state
    523    * because it is valid, and uvn's that are in the ALOCK state haven't
    524    * been marked valid yet.
    525    */
    526 
    527 #ifdef DEBUG
    528   /*
    529    * debug check: are we yanking the vnode out from under our uvn?
    530    */
    531   if (uvn->u_obj.uo_refs) {
    532     printf("uvm_vnp_terminate(%p): terminating active vnode (refs=%d)\n",
    533            uvn, uvn->u_obj.uo_refs);
    534   }
    535 #endif
    536 
    537   /*
    538    * it is possible that the uvn was detached and is in the relkill
    539    * state [i.e. waiting for async i/o to finish so that releasepg can
    540    * kill object].  we take over the vnode now and cancel the relkill.
    541    * we want to know when the i/o is done so we can recycle right
    542    * away.   note that a uvn can only be in the RELKILL state if it
    543    * has a zero reference count.
    544    */
    545 
    546   if (uvn->u_flags & UVM_VNODE_RELKILL)
    547     uvn->u_flags &= ~UVM_VNODE_RELKILL;		/* cancel RELKILL */
    548 
    549   /*
    550    * block the uvn by setting the dying flag, and then flush the
    551    * pages.  (note that flush may unlock object while doing I/O, but
    552    * it will re-lock it before it returns control here).
    553    *
    554    * also, note that we tell I/O that we are already VOP_LOCK'd so
    555    * that uvn_io doesn't attempt to VOP_LOCK again.
    556    *
    557    * XXXCDC: setting VNISLOCKED on an active uvn which is being terminated
    558    *	due to a forceful unmount might not be a good idea.  maybe we need
    559    *	a way to pass in this info to uvn_flush through a pager-defined
    560    *	PGO_ constant [currently there are none].
    561    */
    562   uvn->u_flags |= UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED;
    563 
    564   (void) uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
    565 
    566   /*
    567    * as we just did a flush we expect all the pages to be gone or in
    568    * the process of going.   sleep to wait for the rest to go [via iosync].
    569    */
    570 
    571   while (uvn->u_obj.uo_npages) {
    572 #ifdef DIAGNOSTIC
    573     struct vm_page *pp;
    574     for (pp = uvn->u_obj.memq.tqh_first ; pp != NULL ;
    575 	 pp = pp->listq.tqe_next) {
    576       if ((pp->flags & PG_BUSY) == 0)
    577 	panic("uvm_vnp_terminate: detected unbusy page");
    578     }
    579     if (uvn->u_nio == 0)
    580       panic("uvm_vnp_terminate: no I/O to wait for?");
    581     printf("uvm_vnp_terminate: waiting for I/O to fin.\n");
    582     /*
    583      * XXXCDC: this is unlikely to happen without async i/o so we
    584      * put a printf in just to keep an eye on it.
    585      */
    586 #endif
    587     uvn->u_flags |= UVM_VNODE_IOSYNC;
    588     UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, FALSE,
    589 			"uvn_term",0);
    590     simple_lock(&uvn->u_obj.vmobjlock);
    591   }
    592 
    593   /*
    594    * done.   now we free the uvn if its reference count is zero
    595    * (true if we are zapping a persisting uvn).   however, if we are
    596    * terminating a uvn with active mappings we let it live ... future
    597    * calls down to the vnode layer will fail.
    598    */
    599 
    600   oldflags = uvn->u_flags;
    601   if (uvn->u_obj.uo_refs) {
    602 
    603     /*
    604      * uvn must live on it is dead-vnode state until all references
    605      * are gone.   restore flags.    clear CANPERSIST state.
    606      */
    607 
    608     uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
    609 		      UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
    610 
    611   } else {
    612 
    613     /*
    614      * free the uvn now.   note that the VREF reference is already gone
    615      * [it is dropped when we enter the persist state].
    616      */
    617     if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
    618       panic("uvm_vnp_terminate: io sync wanted bit set");
    619 
    620     if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
    621       simple_lock(&uvn_wl_lock);
    622       LIST_REMOVE(uvn, u_wlist);
    623       simple_unlock(&uvn_wl_lock);
    624     }
    625     uvn->u_flags = 0;		/* uvn is history, clear all bits */
    626   }
    627 
    628   if (oldflags & UVM_VNODE_WANTED)
    629     wakeup(uvn);		/* object lock still held */
    630 
    631   simple_unlock(&uvn->u_obj.vmobjlock);
    632   UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
    633 
    634 }
    635 
    636 /*
    637  * uvn_releasepg: handled a released page in a uvn
    638  *
    639  * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
    640  *	to dispose of.
    641  * => caller must handled PG_WANTED case
    642  * => called with page's object locked, pageq's unlocked
    643  * => returns TRUE if page's object is still alive, FALSE if we
    644  *	killed the page's object.    if we return TRUE, then we
    645  *	return with the object locked.
    646  * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
    647  *				with the page queues locked [for pagedaemon]
    648  * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
    649  * => we kill the uvn if it is not referenced and we are suppose to
    650  *	kill it ("relkill").
    651  */
    652 
    653 boolean_t uvn_releasepg(pg, nextpgp)
    654 
    655 struct vm_page *pg;
    656 struct vm_page **nextpgp;	/* OUT */
    657 
    658 {
    659   struct uvm_vnode *uvn = (struct uvm_vnode *) pg->uobject;
    660 #ifdef DIAGNOSTIC
    661   if ((pg->flags & PG_RELEASED) == 0)
    662     panic("uvn_releasepg: page not released!");
    663 #endif
    664 
    665   /*
    666    * dispose of the page [caller handles PG_WANTED]
    667    */
    668   pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
    669   uvm_lock_pageq();
    670   if (nextpgp)
    671     *nextpgp = pg->pageq.tqe_next;	/* next page for daemon */
    672   uvm_pagefree(pg);
    673   if (!nextpgp)
    674     uvm_unlock_pageq();
    675 
    676   /*
    677    * now see if we need to kill the object
    678    */
    679   if (uvn->u_flags & UVM_VNODE_RELKILL) {
    680     if (uvn->u_obj.uo_refs)
    681       panic("uvn_releasepg: kill flag set on referenced object!");
    682     if (uvn->u_obj.uo_npages == 0) {
    683       if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
    684 	simple_lock(&uvn_wl_lock);
    685 	LIST_REMOVE(uvn, u_wlist);
    686 	simple_unlock(&uvn_wl_lock);
    687       }
    688 #ifdef DIAGNOSTIC
    689       if (uvn->u_obj.memq.tqh_first)
    690 	panic("uvn_releasepg: pages in object with npages == 0");
    691 #endif
    692       if (uvn->u_flags & UVM_VNODE_WANTED)
    693 	wakeup(uvn);		/* still holding object lock */
    694       uvn->u_flags = 0;		/* DEAD! */
    695       simple_unlock(&uvn->u_obj.vmobjlock);
    696       return(FALSE);
    697     }
    698   }
    699   return(TRUE);
    700 }
    701 
    702 /*
    703  * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go
    704  * through the buffer cache and allow I/O in any size.  These VOPs use
    705  * synchronous i/o.  [vs. VOP_STRATEGY which can be async, but doesn't
    706  * go through the buffer cache or allow I/O sizes larger than a
    707  * block].  we will eventually want to change this.
    708  *
    709  * issues to consider:
    710  *   uvm provides the uvm_aiodesc structure for async i/o management.
    711  * there are two tailq's in the uvm. structure... one for pending async
    712  * i/o and one for "done" async i/o.   to do an async i/o one puts
    713  * an aiodesc on the "pending" list (protected by splbio()), starts the
    714  * i/o and returns VM_PAGER_PEND.    when the i/o is done, we expect
    715  * some sort of "i/o done" function to be called (at splbio(), interrupt
    716  * time).   this function should remove the aiodesc from the pending list
    717  * and place it on the "done" list and wakeup the daemon.   the daemon
    718  * will run at normal spl() and will remove all items from the "done"
    719  * list and call the "aiodone" hook for each done request (see uvm_pager.c).
    720  * [in the old vm code, this was done by calling the "put" routine with
    721  * null arguments which made the code harder to read and understand because
    722  * you had one function ("put") doing two things.]
    723  *
    724  * so the current pager needs:
    725  *   int uvn_aiodone(struct uvm_aiodesc *)
    726  *
    727  * => return KERN_SUCCESS (aio finished, free it).  otherwise requeue for
    728  *	later collection.
    729  * => called with pageq's locked by the daemon.
    730  *
    731  * general outline:
    732  * - "try" to lock object.   if fail, just return (will try again later)
    733  * - drop "u_nio" (this req is done!)
    734  * - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio }
    735  * - get "page" structures (atop?).
    736  * - handle "wanted" pages
    737  * - handle "released" pages [using pgo_releasepg]
    738  *   >>> pgo_releasepg may kill the object
    739  * dont forget to look at "object" wanted flag in all cases.
    740  */
    741 
    742 
    743 /*
    744  * uvn_flush: flush pages out of a uvm object.
    745  *
    746  * => object should be locked by caller.   we may _unlock_ the object
    747  *	if (and only if) we need to clean a page (PGO_CLEANIT).
    748  *	we return with the object locked.
    749  * => if PGO_CLEANIT is set, we may block (due to I/O).   thus, a caller
    750  *	might want to unlock higher level resources (e.g. vm_map)
    751  *	before calling flush.
    752  * => if PGO_CLEANIT is not set, then we will neither unlock the object
    753  *	or block.
    754  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
    755  *	for flushing.
    756  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
    757  *	that new pages are inserted on the tail end of the list.   thus,
    758  *	we can make a complete pass through the object in one go by starting
    759  *	at the head and working towards the tail (new pages are put in
    760  *	front of us).
    761  * => NOTE: we are allowed to lock the page queues, so the caller
    762  *	must not be holding the lock on them [e.g. pagedaemon had
    763  *	better not call us with the queues locked]
    764  * => we return TRUE unless we encountered some sort of I/O error
    765  *
    766  * comment on "cleaning" object and PG_BUSY pages:
    767  *	this routine is holding the lock on the object.   the only time
    768  *	that it can run into a PG_BUSY page that it does not own is if
    769  *	some other process has started I/O on the page (e.g. either
    770  *	a pagein, or a pageout).    if the PG_BUSY page is being paged
    771  *	in, then it can not be dirty (!PG_CLEAN) because no one has
    772  *	had a chance to modify it yet.    if the PG_BUSY page is being
    773  *	paged out then it means that someone else has already started
    774  *	cleaning the page for us (how nice!).    in this case, if we
    775  *	have syncio specified, then after we make our pass through the
    776  *	object we need to wait for the other PG_BUSY pages to clear
    777  *	off (i.e. we need to do an iosync).   also note that once a
    778  *	page is PG_BUSY it must stay in its object until it is un-busyed.
    779  *
    780  * note on page traversal:
    781  *	we can traverse the pages in an object either by going down the
    782  *	linked list in "uobj->memq", or we can go over the address range
    783  *	by page doing hash table lookups for each address.    depending
    784  *	on how many pages are in the object it may be cheaper to do one
    785  *	or the other.   we set "by_list" to true if we are using memq.
    786  *	if the cost of a hash lookup was equal to the cost of the list
    787  *	traversal we could compare the number of pages in the start->stop
    788  *	range to the total number of pages in the object.   however, it
    789  *	seems that a hash table lookup is more expensive than the linked
    790  *	list traversal, so we multiply the number of pages in the
    791  *	start->stop range by a penalty which we define below.
    792  */
    793 
    794 #define UVN_HASH_PENALTY 4	/* a guess */
    795 
    796 static boolean_t uvn_flush(uobj, start, stop, flags)
    797 
    798 struct uvm_object *uobj;
    799 vm_offset_t start, stop;
    800 int flags;
    801 
    802 {
    803   struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
    804   struct vm_page *pp, *ppnext, *ptmp;
    805   struct vm_page *pps[MAXBSIZE/PAGE_SIZE], **ppsp;
    806   int npages, result, lcv;
    807   boolean_t retval, need_iosync, by_list, needs_clean;
    808   vm_offset_t curoff;
    809   u_short pp_version;
    810   UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist);
    811 
    812   curoff = 0;	/* XXX: shut up gcc */
    813   /*
    814    * get init vals and determine how we are going to traverse object
    815    */
    816 
    817   need_iosync = FALSE;
    818   retval = TRUE;		/* return value */
    819   if (flags & PGO_ALLPAGES) {
    820     start = 0;
    821     stop = round_page(uvn->u_size);
    822     by_list = TRUE;		/* always go by the list */
    823   } else {
    824     start = trunc_page(start);
    825     stop = round_page(stop);
    826     if (stop > round_page(uvn->u_size))
    827       printf("uvn_flush: strange, got an out of range flush (fixed)\n");
    828 
    829     by_list = (uobj->uo_npages <=
    830 	       ((stop - start) / PAGE_SIZE) * UVN_HASH_PENALTY);
    831   }
    832 
    833   UVMHIST_LOG(maphist," flush start=0x%x, stop=0x%x, by_list=%d, flags=0x%x",
    834 	start, stop, by_list, flags);
    835 
    836   /*
    837    * PG_CLEANCHK: this bit is used by the pgo_mk_pcluster function as
    838    * a _hint_ as to how up to date the PG_CLEAN bit is.   if the hint
    839    * is wrong it will only prevent us from clustering... it won't break
    840    * anything.   we clear all PG_CLEANCHK bits here, and pgo_mk_pcluster
    841    * will set them as it syncs PG_CLEAN.   This is only an issue if we
    842    * are looking at non-inactive pages (because inactive page's PG_CLEAN
    843    * bit is always up to date since there are no mappings).
    844    * [borrowed PG_CLEANCHK idea from FreeBSD VM]
    845    */
    846 
    847   if ((flags & PGO_CLEANIT) != 0 && uobj->pgops->pgo_mk_pcluster != NULL) {
    848 
    849     if (by_list) {
    850       for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = pp->listq.tqe_next) {
    851 	if (pp->offset < start || pp->offset >= stop)
    852 	  continue;
    853 	pp->flags &= ~PG_CLEANCHK;
    854       }
    855 
    856     } else {   /* by hash */
    857       for (curoff = start ; curoff < stop ; curoff += PAGE_SIZE) {
    858 	pp = uvm_pagelookup(uobj, curoff);
    859 	if (pp)
    860 	  pp->flags &= ~PG_CLEANCHK;
    861       }
    862     }
    863 
    864   }
    865 
    866   /*
    867    * now do it.   note: we must update ppnext in body of loop or we
    868    * will get stuck.  we need to use ppnext because we may free "pp"
    869    * before doing the next loop.
    870    */
    871 
    872   if (by_list) {
    873     pp = uobj->memq.tqh_first;
    874   } else {
    875     curoff = start;
    876     pp = uvm_pagelookup(uobj, curoff);
    877   }
    878 
    879   ppnext = NULL;	/* XXX: shut up gcc */
    880   ppsp = NULL;		/* XXX: shut up gcc */
    881   uvm_lock_pageq();	/* page queues locked */
    882 
    883   /* locked: both page queues and uobj */
    884   for ( ; (by_list && pp != NULL) ||
    885 	  (!by_list && curoff < stop) ; pp = ppnext) {
    886 
    887     if (by_list) {
    888 
    889       /*
    890        * range check
    891        */
    892 
    893       if (pp->offset < start || pp->offset >= stop) {
    894 	ppnext = pp->listq.tqe_next;
    895 	continue;
    896       }
    897 
    898     } else {
    899 
    900       /*
    901        * null check
    902        */
    903 
    904       curoff += PAGE_SIZE;
    905       if (pp == NULL) {
    906 	if (curoff < stop)
    907 	  ppnext = uvm_pagelookup(uobj, curoff);
    908 	continue;
    909       }
    910 
    911     }
    912 
    913     /*
    914      * handle case where we do not need to clean page (either because
    915      * we are not clean or because page is not dirty or is busy):
    916      *
    917      * NOTE: we are allowed to deactivate a non-wired active PG_BUSY page,
    918      * but once a PG_BUSY page is on the inactive queue it must
    919      * stay put until it is !PG_BUSY (so as not to confuse pagedaemon).
    920      */
    921 
    922     if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) {
    923       needs_clean = FALSE;
    924       if ((pp->flags & PG_BUSY) != 0 &&
    925 	  (flags & (PGO_CLEANIT|PGO_SYNCIO)) == (PGO_CLEANIT|PGO_SYNCIO))
    926 	need_iosync = TRUE;
    927     } else {
    928       /* freeing: nuke all mappings so we can sync PG_CLEAN bit with no race */
    929       if ((pp->flags & PG_CLEAN) != 0 &&
    930 	  (flags & PGO_FREE) != 0 && (pp->pqflags & PQ_ACTIVE) != 0)
    931 	pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
    932       if ((pp->flags & PG_CLEAN) != 0 && pmap_is_modified(PMAP_PGARG(pp)))
    933 	pp->flags &= ~(PG_CLEAN);
    934       pp->flags |= PG_CLEANCHK;		/* update "hint" */
    935 
    936       needs_clean = ((pp->flags & PG_CLEAN) == 0);
    937     }
    938 
    939     /*
    940      * if we don't need a clean... load ppnext and dispose of pp
    941      */
    942     if (!needs_clean) {
    943       /* load ppnext */
    944       if (by_list)
    945         ppnext = pp->listq.tqe_next;
    946       else {
    947         if (curoff < stop)
    948 	  ppnext = uvm_pagelookup(uobj, curoff);
    949       }
    950 
    951       /* now dispose of pp */
    952       if (flags & PGO_DEACTIVATE) {
    953 	if ((pp->pqflags & PQ_INACTIVE) == 0 && pp->wire_count == 0) {
    954 	  pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
    955 	  uvm_pagedeactivate(pp);
    956 	}
    957 
    958       } else if (flags & PGO_FREE) {
    959 	if (pp->flags & PG_BUSY) {
    960 	  pp->flags |= PG_RELEASED;	/* release busy pages */
    961 	} else {
    962 	  pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
    963 	  uvm_pagefree(pp);		/* removed page from object */
    964 	}
    965 
    966       }
    967       /* ppnext is valid so we can continue... */
    968       continue;
    969     }
    970 
    971     /*
    972      * pp points to a page in the locked object that we are working on.
    973      * if it is !PG_CLEAN,!PG_BUSY and we asked for cleaning (PGO_CLEANIT).
    974      * we clean it now.
    975      *
    976      * let uvm_pager_put attempted a clustered page out.
    977      * note: locked: uobj and page queues.
    978      */
    979 
    980     pp->flags |= PG_BUSY;	/* we 'own' page now */
    981     UVM_PAGE_OWN(pp, "uvn_flush");
    982     pmap_page_protect(PMAP_PGARG(pp), VM_PROT_READ);
    983     pp_version = pp->version;
    984 ReTry:
    985     ppsp = pps;
    986     npages = sizeof(pps) / sizeof(struct vm_page *);
    987 
    988     /* locked: page queues, uobj */
    989     result = uvm_pager_put(uobj, pp, &ppsp, &npages,
    990 			   flags | PGO_DOACTCLUST, start, stop);
    991     /* unlocked: page queues, uobj */
    992 
    993     /*
    994      * at this point nothing is locked.   if we did an async I/O
    995      * it is remotely possible for the async i/o to complete and
    996      * the page "pp" be freed or what not before we get a chance
    997      * to relock the object.   in order to detect this, we have
    998      * saved the version number of the page in "pp_version".
    999      */
   1000 
   1001     /* relock! */
   1002     simple_lock(&uobj->vmobjlock);
   1003     uvm_lock_pageq();
   1004 
   1005     /*
   1006      * VM_PAGER_AGAIN: given the structure of this pager, this
   1007      * can only happen when  we are doing async I/O and can't
   1008      * map the pages into kernel memory (pager_map) due to lack
   1009      * of vm space.   if this happens we drop back to sync I/O.
   1010      */
   1011 
   1012     if (result == VM_PAGER_AGAIN) {
   1013       /*
   1014        * it is unlikely, but page could have been released while we
   1015        * had the object lock dropped.   we ignore this now and retry
   1016        * the I/O.  we will detect and handle the released page after
   1017        * the syncio I/O completes.
   1018        */
   1019 #ifdef DIAGNOSTIC
   1020       if (flags & PGO_SYNCIO)
   1021 	panic("uvn_flush: PGO_SYNCIO return 'try again' error (impossible)");
   1022 #endif
   1023       flags |= PGO_SYNCIO;
   1024       goto ReTry;
   1025     }
   1026 
   1027     /*
   1028      * the cleaning operation is now done.   finish up.  note that
   1029      * on error (!OK, !PEND) uvm_pager_put drops the cluster for us.
   1030      * if success (OK, PEND) then uvm_pager_put returns the cluster
   1031      * to us in ppsp/npages.
   1032      */
   1033 
   1034     /*
   1035      * for pending async i/o if we are not deactivating/freeing we can
   1036      * move on to the next page.
   1037      */
   1038 
   1039     if (result == VM_PAGER_PEND) {
   1040 
   1041       if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
   1042 	/* no per-page ops: refresh ppnext and continue */
   1043 	if (by_list) {
   1044 	  if (pp->version == pp_version)
   1045 	    ppnext = pp->listq.tqe_next;
   1046 	  else
   1047 	    ppnext = uobj->memq.tqh_first;	/* reset */
   1048 	} else {
   1049 	  if (curoff < stop)
   1050 	    ppnext = uvm_pagelookup(uobj, curoff);
   1051 	}
   1052 	continue;
   1053       }
   1054 
   1055       /* need to do anything here? */
   1056     }
   1057 
   1058     /*
   1059      * need to look at each page of the I/O operation.  we defer
   1060      * processing "pp" until the last trip through this "for" loop
   1061      * so that we can load "ppnext" for the main loop after we
   1062      * play with the cluster pages [thus the "npages + 1" in the
   1063      * loop below].
   1064      */
   1065 
   1066     for (lcv = 0 ; lcv < npages + 1 ; lcv++) {
   1067 
   1068       /*
   1069        * handle ppnext for outside loop, and saving pp until the end.
   1070        */
   1071       if (lcv < npages) {
   1072 	if (ppsp[lcv] == pp)
   1073 	  continue;			/* skip pp until the end */
   1074 	ptmp = ppsp[lcv];
   1075       } else {
   1076 	ptmp = pp;
   1077 
   1078 	/* set up next page for outer loop */
   1079 	if (by_list) {
   1080 	  if (pp->version == pp_version)
   1081 	    ppnext = pp->listq.tqe_next;
   1082 	  else
   1083 	    ppnext = uobj->memq.tqh_first;	/* reset */
   1084 	} else {
   1085 	  if (curoff < stop)
   1086 	    ppnext = uvm_pagelookup(uobj, curoff);
   1087 	}
   1088       }
   1089 
   1090       /*
   1091        * verify the page didn't get moved while obj was unlocked
   1092        */
   1093       if (result == VM_PAGER_PEND && ptmp->uobject != uobj)
   1094 	continue;
   1095 
   1096       /*
   1097        * unbusy the page if I/O is done.   note that for pending
   1098        * I/O it is possible that the I/O op finished before we
   1099        * relocked the object (in which case the page is no longer
   1100        * busy).
   1101        */
   1102 
   1103       if (result != VM_PAGER_PEND) {
   1104 	if (ptmp->flags & PG_WANTED)
   1105 	  thread_wakeup(ptmp);		/* still holding object lock */
   1106 	ptmp->flags &= ~(PG_WANTED|PG_BUSY);
   1107         UVM_PAGE_OWN(ptmp, NULL);
   1108 	if (ptmp->flags & PG_RELEASED) {
   1109 
   1110 	  uvm_unlock_pageq();	/* pgo_releasepg wants this */
   1111           if (!uvn_releasepg(ptmp, NULL)) {
   1112             return(TRUE);
   1113           }
   1114 	  uvm_lock_pageq();	/* relock */
   1115 	  continue;		/* next page */
   1116 
   1117 	} else {
   1118 	  ptmp->flags |= (PG_CLEAN|PG_CLEANCHK);
   1119 	  if ((flags & PGO_FREE) == 0)
   1120 	    pmap_clear_modify(PMAP_PGARG(ptmp));
   1121 	}
   1122       }
   1123 
   1124       /*
   1125        * dispose of page
   1126        */
   1127 
   1128       if (flags & PGO_DEACTIVATE) {
   1129 	if ((pp->pqflags & PQ_INACTIVE) == 0 && pp->wire_count == 0) {
   1130 	  pmap_page_protect(PMAP_PGARG(ptmp), VM_PROT_NONE);
   1131 	  uvm_pagedeactivate(ptmp);
   1132 	}
   1133 
   1134       } else if (flags & PGO_FREE) {
   1135 	if (result == VM_PAGER_PEND) {
   1136 	  if ((ptmp->flags & PG_BUSY) != 0)
   1137 	    ptmp->flags |= PG_RELEASED;		/* signal for i/o done */
   1138 	} else {
   1139 	  if (result != VM_PAGER_OK) {
   1140 	    printf("uvn_flush: obj=%p, offset=0x%lx.  error during pageout.\n",
   1141 		   pp->uobject, pp->offset);
   1142 	    printf("uvn_flush: WARNING: changes to page may be lost!\n");
   1143 	    retval = FALSE;
   1144 	  }
   1145 	  pmap_page_protect(PMAP_PGARG(ptmp), VM_PROT_NONE);
   1146 	  uvm_pagefree(ptmp);
   1147 	}
   1148       }
   1149 
   1150     }		/* end of "lcv" for loop */
   1151 
   1152   }		/* end of "pp" for loop */
   1153 
   1154   /*
   1155    * done with pagequeues: unlock
   1156    */
   1157   uvm_unlock_pageq();
   1158 
   1159   /*
   1160    * now wait for all I/O if required.
   1161    */
   1162   if (need_iosync) {
   1163 
   1164     UVMHIST_LOG(maphist,"  <<DOING IOSYNC>>",0,0,0,0);
   1165     while (uvn->u_nio != 0) {
   1166       uvn->u_flags |= UVM_VNODE_IOSYNC;
   1167       UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock,
   1168 			  FALSE, "uvn_flush",0);
   1169       simple_lock(&uvn->u_obj.vmobjlock);
   1170     }
   1171     if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
   1172       wakeup(&uvn->u_flags);
   1173     uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED);
   1174   }
   1175 
   1176   /* return, with object locked! */
   1177   UVMHIST_LOG(maphist,"<- done (retval=0x%x)",retval,0,0,0);
   1178   return(retval);
   1179 }
   1180 
   1181 /*
   1182  * uvn_cluster
   1183  *
   1184  * we are about to do I/O in an object at offset.   this function is called
   1185  * to establish a range of offsets around "offset" in which we can cluster
   1186  * I/O.
   1187  *
   1188  * - currently doesn't matter if obj locked or not.
   1189  */
   1190 
   1191 static void uvn_cluster(uobj, offset, loffset, hoffset)
   1192 
   1193 struct uvm_object *uobj;
   1194 vm_offset_t offset;
   1195 vm_offset_t *loffset, *hoffset; /* OUT */
   1196 
   1197 {
   1198   struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
   1199   *loffset = offset;
   1200 
   1201   if (*loffset >= uvn->u_size)
   1202     panic("uvn_cluster: offset out of range");
   1203 
   1204   /*
   1205    * XXX: old pager claims we could use VOP_BMAP to get maxcontig value.
   1206    */
   1207   *hoffset = *loffset + MAXBSIZE;
   1208   if (*hoffset > round_page(uvn->u_size))	/* past end? */
   1209     *hoffset = round_page(uvn->u_size);
   1210 
   1211   return;
   1212 }
   1213 
   1214 /*
   1215  * uvn_put: flush page data to backing store.
   1216  *
   1217  * => prefer map unlocked (not required)
   1218  * => object must be locked!   we will _unlock_ it before starting I/O.
   1219  * => flags: PGO_SYNCIO -- use sync. I/O
   1220  * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed)
   1221  * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
   1222  *	[thus we never do async i/o!  see iodone comment]
   1223  */
   1224 
   1225 static int uvn_put(uobj, pps, npages, flags)
   1226 
   1227 struct uvm_object *uobj;
   1228 struct vm_page **pps;
   1229 int npages, flags;
   1230 
   1231 {
   1232   int retval;
   1233 
   1234   /* note: object locked */
   1235   retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
   1236   /* note: object unlocked */
   1237 
   1238   return(retval);
   1239 }
   1240 
   1241 
   1242 /*
   1243  * uvn_get: get pages (synchronously) from backing store
   1244  *
   1245  * => prefer map unlocked (not required)
   1246  * => object must be locked!  we will _unlock_ it before starting any I/O.
   1247  * => flags: PGO_ALLPAGES: get all of the pages
   1248  *           PGO_LOCKED: fault data structures are locked
   1249  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
   1250  * => NOTE: caller must check for released pages!!
   1251  */
   1252 
   1253 static int uvn_get(uobj, offset, pps, npagesp, centeridx,
   1254 			access_type, advice, flags)
   1255 
   1256 struct uvm_object *uobj;
   1257 vm_offset_t offset;
   1258 struct vm_page **pps;		/* IN/OUT */
   1259 int *npagesp;			/* IN (OUT if PGO_LOCKED) */
   1260 int centeridx, advice, flags;
   1261 vm_prot_t access_type;
   1262 
   1263 {
   1264   vm_offset_t current_offset;
   1265   struct vm_page *ptmp;
   1266   int lcv, result, gotpages;
   1267   boolean_t done;
   1268   UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(maphist);
   1269   UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0);
   1270 
   1271   /*
   1272    * step 1: handled the case where fault data structures are locked.
   1273    */
   1274 
   1275   if (flags & PGO_LOCKED) {
   1276 
   1277     /*
   1278      * gotpages is the current number of pages we've gotten (which
   1279      * we pass back up to caller via *npagesp.
   1280      */
   1281 
   1282     gotpages = 0;
   1283 
   1284     /*
   1285      * step 1a: get pages that are already resident.   only do this
   1286      * if the data structures are locked (i.e. the first time through).
   1287      */
   1288 
   1289     done = TRUE;	/* be optimistic */
   1290 
   1291     for (lcv = 0, current_offset = offset ;
   1292 	 lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) {
   1293 
   1294       /* do we care about this page?  if not, skip it */
   1295       if (pps[lcv] == PGO_DONTCARE)
   1296 	continue;
   1297 
   1298       /* lookup page */
   1299       ptmp = uvm_pagelookup(uobj, current_offset);
   1300 
   1301       /* to be useful must get a non-busy, non-released page */
   1302       if (ptmp == NULL || (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
   1303 	if (lcv == centeridx || (flags & PGO_ALLPAGES) != 0)
   1304 	  done = FALSE;		/* need to do a wait or I/O! */
   1305 	continue;
   1306       }
   1307 
   1308       /* useful page: busy/lock it and plug it in our result array */
   1309       ptmp->flags |= PG_BUSY;		/* loan up to caller */
   1310       UVM_PAGE_OWN(ptmp, "uvn_get1");
   1311       pps[lcv] = ptmp;
   1312       gotpages++;
   1313 
   1314     }	/* "for" lcv loop */
   1315 
   1316     /*
   1317      * XXX: given the "advice", should we consider async read-ahead?
   1318      * XXX: fault current does deactive of pages behind us.  is
   1319      * this good (other callers might now).
   1320      */
   1321     /*
   1322      * XXX: read-ahead currently handled by buffer cache (bread) level.
   1323      * XXX: no async i/o available.
   1324      * XXX: so we don't do anything now.
   1325      */
   1326 
   1327     /*
   1328      * step 1c: now we've either done everything needed or we to unlock
   1329      * and do some waiting or I/O.
   1330      */
   1331 
   1332     *npagesp = gotpages;		/* let caller know */
   1333     if (done)
   1334       return(VM_PAGER_OK);		/* bingo! */
   1335     else
   1336       return(VM_PAGER_UNLOCK);		/* EEK!   Need to unlock and I/O */
   1337   }
   1338 
   1339   /*
   1340    * step 2: get non-resident or busy pages.
   1341    * object is locked.   data structures are unlocked.
   1342    *
   1343    * XXX: because we can't do async I/O at this level we get things
   1344    * page at a time (otherwise we'd chunk).   the VOP_READ() will do
   1345    * async-read-ahead for us at a lower level.
   1346    */
   1347 
   1348   for (lcv = 0, current_offset = offset ;
   1349        lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) {
   1350 
   1351     /* skip over pages we've already gotten or don't want */
   1352     /* skip over pages we don't _have_ to get */
   1353     if (pps[lcv] != NULL ||
   1354 	(lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
   1355       continue;
   1356 
   1357     /*
   1358      * we have yet to locate the current page (pps[lcv]).   we first
   1359      * look for a page that is already at the current offset.   if we
   1360      * fine a page, we check to see if it is busy or released.  if that
   1361      * is the case, then we sleep on the page until it is no longer busy
   1362      * or released and repeat the lookup.    if the page we found is
   1363      * neither busy nor released, then we busy it (so we own it) and
   1364      * plug it into pps[lcv].   this breaks the following while loop
   1365      * and indicates we are ready to move on to the next page in the
   1366      * "lcv" loop above.
   1367      *
   1368      * if we exit the while loop with pps[lcv] still set to NULL, then
   1369      * it means that we allocated a new busy/fake/clean page ptmp in the
   1370      * object and we need to do I/O to fill in the data.
   1371      */
   1372 
   1373     while (pps[lcv] == NULL) {		/* top of "pps" while loop */
   1374 
   1375       /* look for a current page */
   1376       ptmp = uvm_pagelookup(uobj, current_offset);
   1377 
   1378       /* nope?   allocate one now (if we can) */
   1379       if (ptmp == NULL) {
   1380 
   1381 	ptmp = uvm_pagealloc(uobj, current_offset, NULL);	/* alloc */
   1382 
   1383 	/* out of RAM? */
   1384 	if (ptmp == NULL) {
   1385 	  simple_unlock(&uobj->vmobjlock);
   1386 	  uvm_wait("uvn_getpage");
   1387 	  simple_lock(&uobj->vmobjlock);
   1388 	  continue;		/* goto top of pps while loop */
   1389 	}
   1390 
   1391 	/*
   1392 	 * got new page ready for I/O.  break pps while loop.  pps[lcv] is
   1393 	 * still NULL.
   1394 	 */
   1395 	break;
   1396       }
   1397 
   1398       /* page is there, see if we need to wait on it */
   1399       if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
   1400 	ptmp->flags |= PG_WANTED;
   1401 	UVM_UNLOCK_AND_WAIT(ptmp,&uobj->vmobjlock,0,"uvn_get",0);
   1402 	simple_lock(&uobj->vmobjlock);
   1403 	continue;		/* goto top of pps while loop */
   1404       }
   1405 
   1406       /*
   1407        * if we get here then the page has become resident and unbusy
   1408        * between steps 1 and 2.  we busy it now (so we own it) and set
   1409        * pps[lcv] (so that we exit the while loop).
   1410        */
   1411       ptmp->flags |= PG_BUSY;
   1412       UVM_PAGE_OWN(ptmp, "uvn_get2");
   1413       pps[lcv] = ptmp;
   1414     }
   1415 
   1416     /*
   1417      * if we own the a valid page at the correct offset, pps[lcv] will
   1418      * point to it.   nothing more to do except go to the next page.
   1419      */
   1420 
   1421     if (pps[lcv])
   1422       continue;			/* next lcv */
   1423 
   1424     /*
   1425      * we have a "fake/busy/clean" page that we just allocated.  do
   1426      * I/O to fill it with valid data.  note that object must be
   1427      * locked going into uvn_io, but will be unlocked afterwards.
   1428      */
   1429 
   1430     result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1, PGO_SYNCIO, UIO_READ);
   1431 
   1432     /*
   1433      * I/O done.   object is unlocked (by uvn_io).   because we used
   1434      * syncio the result can not be PEND or AGAIN.   we must relock
   1435      * and check for errors.
   1436      */
   1437 
   1438     /* lock object.   check for errors.   */
   1439     simple_lock(&uobj->vmobjlock);
   1440     if (result != VM_PAGER_OK) {
   1441       if (ptmp->flags & PG_WANTED)
   1442 	thread_wakeup(ptmp);		/* object lock still held */
   1443       ptmp->flags &= ~(PG_WANTED|PG_BUSY);
   1444       UVM_PAGE_OWN(ptmp, NULL);
   1445       uvm_lock_pageq();
   1446       uvm_pagefree(ptmp);
   1447       uvm_unlock_pageq();
   1448       simple_unlock(&uobj->vmobjlock);
   1449       return(result);
   1450     }
   1451 
   1452     /*
   1453      * we got the page!   clear the fake flag (indicates valid data now
   1454      * in page) and plug into our result array.   note that page is still
   1455      * busy.
   1456      *
   1457      * it is the callers job to:
   1458      * => check if the page is released
   1459      * => unbusy the page
   1460      * => activate the page
   1461      */
   1462 
   1463     ptmp->flags &= ~PG_FAKE;			/* data is valid ... */
   1464     pmap_clear_modify(PMAP_PGARG(ptmp));	/* ... and clean */
   1465     pps[lcv] = ptmp;
   1466 
   1467   }	/* lcv loop */
   1468 
   1469   /*
   1470    * finally, unlock object and return.
   1471    */
   1472 
   1473   simple_unlock(&uobj->vmobjlock);
   1474   return(VM_PAGER_OK);
   1475 }
   1476 
   1477 /*
   1478  * uvn_asyncget: start async I/O to bring pages into ram
   1479  *
   1480  * => caller must lock object(???XXX: see if this is best)
   1481  * => could be called from uvn_get or a madvise() fault-ahead.
   1482  * => if it fails, it doesn't matter.
   1483  */
   1484 
   1485 static int uvn_asyncget(uobj, offset, npages)
   1486 
   1487 struct uvm_object *uobj;
   1488 vm_offset_t offset;
   1489 int npages;
   1490 
   1491 {
   1492   /*
   1493    * XXXCDC: we can't do async I/O yet
   1494    */
   1495   printf("uvn_asyncget called\n");
   1496   return(KERN_SUCCESS);
   1497 }
   1498 
   1499 /*
   1500  * uvn_io: do I/O to a vnode
   1501  *
   1502  * => prefer map unlocked (not required)
   1503  * => object must be locked!   we will _unlock_ it before starting I/O.
   1504  * => flags: PGO_SYNCIO -- use sync. I/O
   1505  * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
   1506  *	[thus we never do async i/o!  see iodone comment]
   1507  */
   1508 
   1509 static int uvn_io(uvn, pps, npages, flags, rw)
   1510 
   1511 struct uvm_vnode *uvn;
   1512 vm_page_t *pps;
   1513 int npages, flags, rw;
   1514 
   1515 {
   1516   struct vnode *vn;
   1517   struct uio uio;
   1518   struct iovec iov;
   1519   vm_offset_t kva, file_offset;
   1520   int waitf, result, got, wanted;
   1521   UVMHIST_FUNC("uvn_io"); UVMHIST_CALLED(maphist);
   1522 
   1523   UVMHIST_LOG(maphist, "rw=%d", rw,0,0,0);
   1524 
   1525   /*
   1526    * init values
   1527    */
   1528 
   1529   waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
   1530   vn = (struct vnode *) uvn;
   1531   file_offset = pps[0]->offset;
   1532 
   1533   /*
   1534    * check for sync'ing I/O.
   1535    */
   1536 
   1537   while (uvn->u_flags & UVM_VNODE_IOSYNC) {
   1538     if (waitf == M_NOWAIT) {
   1539       simple_unlock(&uvn->u_obj.vmobjlock);
   1540       UVMHIST_LOG(maphist,"<- try again (iosync)",0,0,0,0);
   1541       return(VM_PAGER_AGAIN);
   1542     }
   1543     uvn->u_flags |= UVM_VNODE_IOSYNCWANTED;
   1544     UVM_UNLOCK_AND_WAIT(&uvn->u_flags, &uvn->u_obj.vmobjlock,
   1545 			FALSE, "uvn_iosync",0);
   1546     simple_lock(&uvn->u_obj.vmobjlock);
   1547   }
   1548 
   1549   /*
   1550    * check size
   1551    */
   1552 
   1553   if (file_offset >= uvn->u_size) {
   1554       simple_unlock(&uvn->u_obj.vmobjlock);
   1555       UVMHIST_LOG(maphist,"<- BAD (size check)",0,0,0,0);
   1556 #ifdef DIAGNOSTIC
   1557       printf("uvn_io: note: size check fired\n");
   1558 #endif
   1559       return(VM_PAGER_BAD);
   1560   }
   1561 
   1562   /*
   1563    * first try and map the pages in (without waiting)
   1564    */
   1565 
   1566   kva = uvm_pagermapin(pps, npages, NULL, M_NOWAIT);
   1567   if (kva == NULL && waitf == M_NOWAIT) {
   1568     simple_unlock(&uvn->u_obj.vmobjlock);
   1569     UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0);
   1570     return(VM_PAGER_AGAIN);
   1571   }
   1572 
   1573   /*
   1574    * ok, now bump u_nio up.   at this point we are done with uvn
   1575    * and can unlock it.   if we still don't have a kva, try again
   1576    * (this time with sleep ok).
   1577    */
   1578 
   1579   uvn->u_nio++;			/* we have an I/O in progress! */
   1580   simple_unlock(&uvn->u_obj.vmobjlock);
   1581   /* NOTE: object now unlocked */
   1582   if (kva == NULL) {
   1583     kva = uvm_pagermapin(pps, npages, NULL, M_WAITOK);
   1584   }
   1585 
   1586   /*
   1587    * ok, mapped in.  our pages are PG_BUSY so they are not going to
   1588    * get touched (so we can look at "offset" without having to lock
   1589    * the object).  set up for I/O.
   1590    */
   1591 
   1592   /*
   1593    * fill out uio/iov
   1594    */
   1595 
   1596   iov.iov_base = (caddr_t) kva;
   1597   wanted = npages * PAGE_SIZE;
   1598   if (file_offset + wanted > uvn->u_size)
   1599     wanted = uvn->u_size - file_offset;	/* XXX: needed? */
   1600   iov.iov_len = wanted;
   1601   uio.uio_iov = &iov;
   1602   uio.uio_iovcnt = 1;
   1603   uio.uio_offset = file_offset;
   1604   uio.uio_segflg = UIO_SYSSPACE;
   1605   uio.uio_rw = rw;
   1606   uio.uio_resid = wanted;
   1607   uio.uio_procp = NULL;
   1608 
   1609   /*
   1610    * do the I/O!  (XXX: curproc?)
   1611    */
   1612 
   1613   UVMHIST_LOG(maphist, "calling VOP",0,0,0,0);
   1614 
   1615   if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
   1616     VOP_LOCK(vn);
   1617   /* NOTE: vnode now locked! */
   1618 
   1619   if (rw == UIO_READ)
   1620     result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
   1621   else
   1622     result = VOP_WRITE(vn, &uio, 0, curproc->p_ucred);
   1623 
   1624   if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
   1625     VOP_UNLOCK(vn);
   1626   /* NOTE: vnode now unlocked (unless vnislocked) */
   1627 
   1628   UVMHIST_LOG(maphist, "done calling VOP",0,0,0,0);
   1629 
   1630   /*
   1631    * result == unix style errno (0 == OK!)
   1632    *
   1633    * zero out rest of buffer (if needed)
   1634    */
   1635 
   1636   if (result == 0) {
   1637     got = wanted - uio.uio_resid;
   1638 
   1639     if (wanted && got == 0) {
   1640       result = EIO;		/* XXX: error? */
   1641     } else if (got < PAGE_SIZE * npages && rw == UIO_READ) {
   1642       bzero((void *) (kva + got), (PAGE_SIZE * npages) - got);
   1643     }
   1644   }
   1645 
   1646   /*
   1647    * now remove pager mapping
   1648    */
   1649   uvm_pagermapout(kva, npages);
   1650 
   1651   /*
   1652    * now clean up the object (i.e. drop I/O count)
   1653    */
   1654 
   1655   simple_lock(&uvn->u_obj.vmobjlock);
   1656   /* NOTE: object now locked! */
   1657 
   1658   uvn->u_nio--;			/* I/O DONE! */
   1659   if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) {
   1660     wakeup(&uvn->u_nio);
   1661   }
   1662   simple_unlock(&uvn->u_obj.vmobjlock);
   1663   /* NOTE: object now unlocked! */
   1664 
   1665   /*
   1666    * done!
   1667    */
   1668 
   1669   UVMHIST_LOG(maphist, "<- done (result %d)", result,0,0,0);
   1670   if (result == 0)
   1671     return(VM_PAGER_OK);
   1672   else
   1673     return(VM_PAGER_ERROR);
   1674 }
   1675 
   1676 /*
   1677  * uvm_vnp_uncache: disable "persisting" in a vnode... when last reference
   1678  * is gone we will kill the object (flushing dirty pages back to the vnode
   1679  * if needed).
   1680  *
   1681  * => returns TRUE if there was no uvm_object attached or if there was
   1682  *	one and we killed it [i.e. if there is no active uvn]
   1683  * => called with the vnode VOP_LOCK'd [we will unlock it for I/O, if
   1684  *	needed]
   1685  *
   1686  * => XXX: given that we now kill uvn's when a vnode is recycled (without
   1687  *	having to hold a reference on the vnode) and given a working
   1688  *	uvm_vnp_sync(), how does that effect the need for this function?
   1689  *      [XXXCDC: seems like it can die?]
   1690  *
   1691  * => XXX: this function should DIE once we merge the VM and buffer
   1692  *	cache.
   1693  *
   1694  * research shows that this is called in the following places:
   1695  * ext2fs_truncate, ffs_truncate, detrunc[msdosfs]: called when vnode
   1696  *	changes sizes
   1697  * ext2fs_write, WRITE [ufs_readwrite], msdosfs_write: called when we
   1698  *	are written to
   1699  * ex2fs_chmod, ufs_chmod: called if VTEXT vnode and the sticky bit
   1700  *	is off
   1701  * ffs_realloccg: when we can't extend the current block and have
   1702  *	to allocate a new one we call this [XXX: why?]
   1703  * nfsrv_rename, rename_files: called when the target filename is there
   1704  *	and we want to remove it
   1705  * nfsrv_remove, sys_unlink: called on file we are removing
   1706  * nfsrv_access: if VTEXT and we want WRITE access and we don't uncache
   1707  *	then return "text busy"
   1708  * nfs_open: seems to uncache any file opened with nfs
   1709  * vn_writechk: if VTEXT vnode and can't uncache return "text busy"
   1710  */
   1711 
   1712 boolean_t uvm_vnp_uncache(vp)
   1713 
   1714 struct vnode *vp;
   1715 
   1716 {
   1717   struct uvm_vnode *uvn = &vp->v_uvm;
   1718 
   1719   /*
   1720    * lock uvn part of the vnode and check to see if we need to do anything
   1721    */
   1722 
   1723   simple_lock(&uvn->u_obj.vmobjlock);
   1724   if ((uvn->u_flags & UVM_VNODE_VALID) == 0 ||
   1725       (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
   1726     simple_unlock(&uvn->u_obj.vmobjlock);
   1727     return(TRUE);
   1728   }
   1729 
   1730   /*
   1731    * we have a valid, non-blocked uvn.   clear persist flag.
   1732    * if uvn is currently active we can return now.
   1733    */
   1734 
   1735   uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
   1736   if (uvn->u_obj.uo_refs) {
   1737     simple_unlock(&uvn->u_obj.vmobjlock);
   1738     return(FALSE);
   1739   }
   1740 
   1741   /*
   1742    * uvn is currently persisting!   we have to gain a reference to
   1743    * it so that we can call uvn_detach to kill the uvn.
   1744    */
   1745 
   1746   VREF(vp);			/* seems ok, even with VOP_LOCK */
   1747   uvn->u_obj.uo_refs++;		/* value is now 1 */
   1748   simple_unlock(&uvn->u_obj.vmobjlock);
   1749 
   1750 
   1751 #ifdef DEBUG
   1752   /*
   1753    * carry over sanity check from old vnode pager: the vnode should
   1754    * be VOP_LOCK'd, and we confirm it here.
   1755    */
   1756   if (!VOP_ISLOCKED(vp)) {
   1757     boolean_t is_ok_anyway = FALSE;
   1758 #ifdef NFS
   1759     extern int (**nfsv2_vnodeop_p) __P((void *));
   1760     extern int (**spec_nfsv2nodeop_p) __P((void *));
   1761 #ifdef FIFO
   1762     extern int (**fifo_nfsv2nodeop_p) __P((void *));
   1763 #endif	/* FIFO */
   1764 
   1765     /* vnode is NOT VOP_LOCKed: some vnode types _never_ lock */
   1766     if (vp->v_op == nfsv2_vnodeop_p || vp->v_op == spec_nfsv2nodeop_p) {
   1767       is_ok_anyway = TRUE;
   1768     }
   1769 #ifdef FIFO
   1770     if (vp->v_op == fifo_nfsv2nodeop_p) {
   1771       is_ok_anyway = TRUE;
   1772     }
   1773 #endif	/* FIFO */
   1774 #endif	/* NFS */
   1775     if (!is_ok_anyway)
   1776       panic("uvm_vnp_uncache: vnode not locked!");
   1777   }
   1778 #endif	/* DEBUG */
   1779 
   1780   /*
   1781    * now drop our reference to the vnode.   if we have the sole
   1782    * reference to the vnode then this will cause it to die [as we
   1783    * just cleared the persist flag].   we have to unlock the vnode
   1784    * while we are doing this as it may trigger I/O.
   1785    *
   1786    * XXX: it might be possible for uvn to get reclaimed while we are
   1787    * unlocked causing us to return TRUE when we should not.   we ignore
   1788    * this as a false-positive return value doesn't hurt us.
   1789    */
   1790   VOP_UNLOCK(vp);
   1791   uvn_detach(&uvn->u_obj);
   1792   VOP_LOCK(vp);
   1793 
   1794   /*
   1795    * and return...
   1796    */
   1797 
   1798   return(TRUE);
   1799 }
   1800 
   1801 /*
   1802  * uvm_vnp_setsize: grow or shrink a vnode uvn
   1803  *
   1804  * grow   => just update size value
   1805  * shrink => toss un-needed pages
   1806  *
   1807  * => we assume that the caller has a reference of some sort to the
   1808  *	vnode in question so that it will not be yanked out from under
   1809  *	us.
   1810  *
   1811  * called from:
   1812  *  => truncate fns (ext2fs_truncate, ffs_truncate, detrunc[msdos])
   1813  *  => "write" fns (ext2fs_write, WRITE [ufs/ufs], msdosfs_write, nfs_write)
   1814  *  => ffs_balloc [XXX: why? doesn't WRITE handle?]
   1815  *  => NFS: nfs_loadattrcache, nfs_getattrcache, nfs_setattr
   1816  *  => union fs: union_newsize
   1817  */
   1818 
   1819 void uvm_vnp_setsize(vp, newsize)
   1820 
   1821 struct vnode *vp;
   1822 u_quad_t newsize;
   1823 
   1824 {
   1825   struct uvm_vnode *uvn = &vp->v_uvm;
   1826 
   1827   /*
   1828    * lock uvn and check for valid object, and if valid: do it!
   1829    */
   1830   simple_lock(&uvn->u_obj.vmobjlock);
   1831   if (uvn->u_flags & UVM_VNODE_VALID) {
   1832 
   1833     /*
   1834      * make sure that the newsize fits within a vm_offset_t
   1835      * XXX: need to revise addressing data types
   1836      */
   1837 
   1838     if (newsize > (vm_offset_t) -PAGE_SIZE) {
   1839 #ifdef DEBUG
   1840       printf("uvm_vnp_setsize: vn %p size truncated %qx->%lx\n", vp, newsize,
   1841 	     (vm_offset_t) -PAGE_SIZE);
   1842 #endif
   1843       newsize = (vm_offset_t) -PAGE_SIZE;
   1844     }
   1845 
   1846     /*
   1847      * now check if the size has changed: if we shrink we had better
   1848      * toss some pages...
   1849      */
   1850 
   1851     if (uvn->u_size > newsize) {
   1852       (void) uvn_flush(&uvn->u_obj, (vm_offset_t) newsize,
   1853 		       uvn->u_size, PGO_FREE);
   1854     }
   1855     uvn->u_size = (vm_offset_t)newsize;
   1856   }
   1857   simple_unlock(&uvn->u_obj.vmobjlock);
   1858 
   1859   /*
   1860    * done
   1861    */
   1862   return;
   1863 }
   1864 
   1865 /*
   1866  * uvm_vnp_sync: flush all dirty VM pages back to their backing vnodes.
   1867  *
   1868  * => called from sys_sync with no VM structures locked
   1869  * => only one process can do a sync at a time (because the uvn
   1870  *    structure only has one queue for sync'ing).  we ensure this
   1871  *    by holding the uvn_sync_lock while the sync is in progress.
   1872  *    other processes attempting a sync will sleep on this lock
   1873  *    until we are done.
   1874  */
   1875 
   1876 void uvm_vnp_sync(mp)
   1877 
   1878 struct mount *mp;
   1879 
   1880 {
   1881   struct uvm_vnode *uvn;
   1882   struct vnode *vp;
   1883   boolean_t got_lock;
   1884 
   1885   /*
   1886    * step 1: ensure we are only ones using the uvn_sync_q by locking
   1887    * our lock...
   1888    */
   1889   lockmgr(&uvn_sync_lock, LK_EXCLUSIVE, (void *)0, curproc);
   1890 
   1891   /*
   1892    * step 2: build up a simpleq of uvns of interest based on the
   1893    * write list.   we gain a reference to uvns of interest.  must
   1894    * be careful about locking uvn's since we will be holding uvn_wl_lock
   1895    * in the body of the loop.
   1896    */
   1897   SIMPLEQ_INIT(&uvn_sync_q);
   1898   simple_lock(&uvn_wl_lock);
   1899   for (uvn = uvn_wlist.lh_first ; uvn != NULL ; uvn = uvn->u_wlist.le_next) {
   1900 
   1901     vp = (struct vnode *) uvn;
   1902     if (mp && vp->v_mount != mp)
   1903       continue;
   1904 
   1905     /* attempt to gain reference */
   1906     while ((got_lock = simple_lock_try(&uvn->u_obj.vmobjlock)) == FALSE &&
   1907 	   (uvn->u_flags & UVM_VNODE_BLOCKED) == 0)
   1908       /*spin*/;
   1909 
   1910     /*
   1911      * we will exit the loop if we were unable to get the lock and we
   1912      * detected that the vnode was "blocked" ... if it is blocked then
   1913      * it must be a dying vnode.   since dying vnodes are in the process
   1914      * of being flushed out we can safely skip it.
   1915      *
   1916      * note that uvn must already be valid because we found it on the
   1917      * wlist (this also means it can't be ALOCK'd).
   1918      */
   1919     if (!got_lock)
   1920       continue;
   1921 
   1922     /*
   1923      * gain reference.   watch out for persisting uvns (need to regain
   1924      * vnode REF).
   1925      */
   1926     if (uvn->u_obj.uo_refs == 0)
   1927       VREF(vp);
   1928     uvn->u_obj.uo_refs++;
   1929     simple_unlock(&uvn->u_obj.vmobjlock);
   1930 
   1931     /*
   1932      * got it!
   1933      */
   1934     SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);
   1935   }
   1936   simple_unlock(&uvn_wl_lock);
   1937 
   1938   /*
   1939    * step 3: we now have a list of uvn's that may need cleaning.
   1940    * we are holding the uvn_sync_lock, but have dropped the uvn_wl_lock
   1941    * (so we can now safely lock uvn's again).
   1942    */
   1943 
   1944   for (uvn = uvn_sync_q.sqh_first ; uvn ; uvn = uvn->u_syncq.sqe_next) {
   1945     simple_lock(&uvn->u_obj.vmobjlock);
   1946 #ifdef DIAGNOSTIC
   1947     if (uvn->u_flags & UVM_VNODE_DYING) {
   1948       printf("uvm_vnp_sync: dying vnode on sync list\n");
   1949     }
   1950 #endif
   1951     uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_ALLPAGES|PGO_DOACTCLUST);
   1952 
   1953     /*
   1954      * if we have the only reference and we just cleaned the uvn, then
   1955      * we can pull it out of the UVM_VNODE_WRITEABLE state thus allowing
   1956      * us to avoid thinking about flushing it again on later sync ops.
   1957      */
   1958     if (uvn->u_obj.uo_refs == 1 && (uvn->u_flags & UVM_VNODE_WRITEABLE)) {
   1959       LIST_REMOVE(uvn, u_wlist);
   1960       uvn->u_flags &= ~UVM_VNODE_WRITEABLE;
   1961     }
   1962 
   1963     simple_unlock(&uvn->u_obj.vmobjlock);
   1964 
   1965     /* now drop our reference to the uvn */
   1966     uvn_detach(&uvn->u_obj);
   1967   }
   1968 
   1969   /*
   1970    * done!  release sync lock
   1971    */
   1972   lockmgr(&uvn_sync_lock, LK_RELEASE, (void *)0, curproc);
   1973 }
   1974