Home | History | Annotate | Line # | Download | only in uvm
uvm_aobj.c revision 1.3
      1 /*	$NetBSD: uvm_aobj.c,v 1.3 1998/02/07 02:32:37 chs Exp $	*/
      2 
      3 /* copyright here */
      4 
      5 #include <sys/param.h>
      6 #include <sys/systm.h>
      7 #include <sys/proc.h>
      8 #include <sys/malloc.h>
      9 
     10 #include <vm/vm.h>
     11 #include <vm/vm_page.h>
     12 #include <vm/vm_kern.h>
     13 
     14 #include <uvm/uvm.h>
     15 
     16 /*
     17  * uvm_aobj.c: anonymous-memory backed uvm_object
     18  */
     19 
     20 /*
     21  * an aobj manages anonymous-memory backed uvm_objects.   in addition
     22  * to keeping the list of resident pages, it also keeps a list of
     23  * allocated swap blocks.  depending on the size of the aobj this list
     24  * of allocated swap blocks is either stored in an array (small objects)
     25  * or in a hash table (large objects).
     26  */
     27 
     28 /*
     29  * local structures
     30  */
     31 
     32 /*
     33  * for hash tables, we break the address space of the aobj into blocks
     34  * of UAO_SWHASH_CLUSTER_SIZE pages.   we require the cluster size to
     35  * be a power of two.
     36  */
     37 
     38 #define UAO_SWHASH_CLUSTER_SHIFT 4
     39 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
     40 
     41 /* get the "tag" for this page index */
     42 #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
     43 	((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
     44 
     45 /* given an ELT and a page index, find the swap slot */
     46 #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
     47 	((ELT)->slots[(PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1)])
     48 
     49 /* given an ELT, return its pageidx base */
     50 #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
     51 	((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
     52 
     53 /*
     54  * the swhash hash function
     55  */
     56 #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
     57 	(&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
     58 			    & (AOBJ)->u_swhashmask)])
     59 
     60 /*
     61  * the swhash threshhold determines if we will use an array or a
     62  * hash table to store the list of allocated swap blocks.
     63  */
     64 
     65 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
     66 #define UAO_USES_SWHASH(AOBJ) \
     67 	((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD)	/* use hash? */
     68 
     69 /*
     70  * the number of buckets in a swhash, with an upper bound
     71  */
     72 #define UAO_SWHASH_MAXBUCKETS 256
     73 #define UAO_SWHASH_BUCKETS(AOBJ) \
     74 	(min((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
     75 	     UAO_SWHASH_MAXBUCKETS))
     76 
     77 
     78 /*
     79  * uao_swhash_elt: when a hash table is being used, this structure defines
     80  * the format of an entry in the bucket list.
     81  */
     82 
     83 struct uao_swhash_elt {
     84   LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
     85   vm_offset_t tag;			/* our 'tag' */
     86   int count;				/* our number of active slots */
     87   int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
     88 };
     89 
     90 /*
     91  * uao_swhash: the swap hash table structure
     92  */
     93 
     94 LIST_HEAD(uao_swhash, uao_swhash_elt);
     95 
     96 
     97 /*
     98  * uvm_aobj: the actual anon-backed uvm_object
     99  *
    100  * => the uvm_object is at the top of the structure, this allows
    101  *   (struct uvm_device *) == (struct uvm_object *)
    102  * => only one of u_swslots and u_swhash is used in any given aobj
    103  */
    104 
    105 struct uvm_aobj {
    106   struct uvm_object u_obj;	/* has: lock, pgops, memq, #pages, #refs */
    107   vm_size_t u_pages;		/* number of pages in entire object */
    108   int u_flags;			/* the flags (see uvm_aobj.h) */
    109   int *u_swslots;		/* array of offset->swapslot mappings */
    110   struct uao_swhash *u_swhash;	/* hashtable of offset->swapslot mappings */
    111   				/*  (u_swhash is an array of bucket heads) */
    112   u_long u_swhashmask;		/* mask for hashtable */
    113   LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
    114 };
    115 
    116 /*
    117  * local functions
    118  */
    119 
    120 static void			 uao_init __P((void));
    121 static struct uao_swhash_elt	*uao_find_swhash_elt __P((struct uvm_aobj *,
    122 							  int, boolean_t));
    123 static int			 uao_find_swslot __P((struct uvm_aobj *,
    124 						      vm_offset_t));
    125 static boolean_t		 uao_flush __P((struct uvm_object *,
    126 						vm_offset_t, vm_offset_t,
    127 						int));
    128 static void			 uao_free __P((struct uvm_aobj *));
    129 static int			 uao_get __P((struct uvm_object *, vm_offset_t,
    130 					      vm_page_t *, int *, int,
    131 					      vm_prot_t, int, int));
    132 static boolean_t		 uao_releasepg __P((struct vm_page *,
    133 						    struct vm_page **));
    134 
    135 
    136 
    137 /*
    138  * aobj_pager
    139  *
    140  * note that some functions (e.g. put) are handled elsewhere
    141  */
    142 
    143 struct uvm_pagerops aobj_pager = {
    144   uao_init,		/* init */
    145   NULL,			/* attach */
    146   uao_reference,	/* reference */
    147   uao_detach,		/* detach */
    148   NULL,			/* fault */
    149   uao_flush,		/* flush */
    150   uao_get,		/* get */
    151   NULL,			/* asyncget */
    152   NULL,			/* put (done by pagedaemon) */
    153   NULL,			/* cluster */
    154   NULL,			/* mk_pcluster */
    155   uvm_shareprot,	/* shareprot */
    156   NULL,			/* aiodone */
    157   uao_releasepg		/* releasepg */
    158 };
    159 
    160 /*
    161  * uao_list: global list of active aobjs, locked by uao_list_lock
    162  */
    163 
    164 static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
    165 #if NCPU > 1
    166 static simple_lock_data_t uao_list_lock;
    167 #endif
    168 
    169 
    170 /*
    171  * functions
    172  */
    173 
    174 /*
    175  * hash table/array related functions
    176  */
    177 
    178 /*
    179  * uao_find_swhash_elt: find (or create) a hash table entry for a page
    180  * offset.
    181  *
    182  * => the object should be locked by the caller
    183  */
    184 
    185 static struct uao_swhash_elt *uao_find_swhash_elt(aobj, pageidx, create)
    186 
    187 struct uvm_aobj *aobj;
    188 int pageidx;
    189 boolean_t create;
    190 
    191 {
    192   struct uao_swhash *swhash;
    193   struct uao_swhash_elt *elt;
    194   int page_tag;
    195 
    196   swhash = UAO_SWHASH_HASH(aobj, pageidx);	/* first hash to get bucket */
    197   page_tag = UAO_SWHASH_ELT_TAG(pageidx);	/* tag to search for */
    198 
    199   /*
    200    * now search the bucket for the requested tag
    201    */
    202   for (elt = swhash->lh_first; elt != NULL; elt = elt->list.le_next) {
    203     if (elt->tag == page_tag)
    204       return(elt);
    205   }
    206 
    207   /* fail now if we are not allowed to create a new entry in the bucket */
    208   if (!create)
    209     return NULL;
    210 
    211 
    212   /*
    213    * malloc a new entry for the bucket and init/insert it in
    214    */
    215   MALLOC(elt, struct uao_swhash_elt *, sizeof(*elt), M_UVMAOBJ, M_WAITOK);
    216   LIST_INSERT_HEAD(swhash, elt, list);
    217   elt->tag = page_tag;
    218   elt->count = 0;
    219   bzero(elt->slots, sizeof(elt->slots));
    220 
    221   return(elt);
    222 }
    223 
    224 /*
    225  * uao_find_swslot: find the swap slot number for an aobj/pageidx
    226  *
    227  * => object must be locked by caller
    228  */
    229 
    230 __inline static int uao_find_swslot(aobj, pageidx)
    231 
    232 struct uvm_aobj *aobj;
    233 vm_offset_t pageidx;
    234 
    235 {
    236   /*
    237    * if noswap flag is set, then we never return a slot
    238    */
    239 
    240   if (aobj->u_flags & UAO_FLAG_NOSWAP)
    241     return(0);
    242 
    243   /*
    244    * if hashing, look in hash table.
    245    */
    246 
    247   if (UAO_USES_SWHASH(aobj)) {
    248     struct uao_swhash_elt *elt = uao_find_swhash_elt(aobj, pageidx, FALSE);
    249 
    250     if (elt)
    251       return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
    252     else
    253       return(NULL);
    254   }
    255 
    256   /*
    257    * otherwise, look in the array
    258    */
    259   return(aobj->u_swslots[pageidx]);
    260 }
    261 
    262 /*
    263  * uao_set_swslot: set the swap slot for a page in an aobj.
    264  *
    265  * => setting a slot to zero frees the slot
    266  * => object must be locked by caller
    267  */
    268 
    269 int uao_set_swslot(uobj, pageidx, slot)
    270 
    271 struct uvm_object *uobj;
    272 int pageidx, slot;
    273 
    274 {
    275   struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
    276   int oldslot;
    277   UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
    278   UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d", aobj, pageidx, slot, 0);
    279 
    280   /*
    281    * if noswap flag is set, then we can't set a slot
    282    */
    283 
    284   if (aobj->u_flags & UAO_FLAG_NOSWAP) {
    285 
    286     if (slot == 0)
    287       return(0);		/* a clear is ok */
    288 
    289     /* but a set is not */
    290     printf("uao_set_swslot: uobj = %p\n", uobj);
    291     panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
    292   }
    293 
    294   /*
    295    * are we using a hash table?  if so, add it in the hash.
    296    */
    297 
    298   if (UAO_USES_SWHASH(aobj)) {
    299     struct uao_swhash_elt *elt = uao_find_swhash_elt(aobj, pageidx, TRUE);
    300 
    301     oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
    302     UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
    303 
    304     /*
    305      * now adjust the elt's reference counter and free it if we've dropped
    306      * it to zero.
    307      */
    308 
    309     if (slot) {		/* an allocation? */
    310 
    311       if (oldslot == 0)
    312 	elt->count++;
    313 
    314     } else {		/* freeing slot ... */
    315 
    316       if (oldslot)	/* to be safe (who would replace zero with zero?) */
    317 	elt->count--;
    318 
    319       if (elt->count == 0) {
    320 	LIST_REMOVE(elt, list);
    321 	FREE(elt, M_UVMAOBJ);
    322       }
    323     }
    324 
    325   } else {
    326 
    327     /* we are using an array */
    328     oldslot = aobj->u_swslots[pageidx];
    329     aobj->u_swslots[pageidx] = slot;
    330 
    331   }
    332 
    333     return(oldslot);
    334 }
    335 
    336 /*
    337  * end of hash/array functions
    338  */
    339 
    340 /*
    341  * uao_free: free all resources held by an aobj, and then free the aobj
    342  *
    343  * => the aobj should be dead
    344  */
    345 
    346 static void
    347 uao_free(aobj)
    348 struct uvm_aobj *aobj;
    349 {
    350 
    351 
    352   if (UAO_USES_SWHASH(aobj)) {
    353     int i, hashbuckets = aobj->u_swhashmask + 1;
    354 
    355     /*
    356      * free the swslots from each hash bucket,
    357      * then the hash bucket, and finally the hash table itself.
    358      */
    359     for (i = 0; i < hashbuckets; i++) {
    360       struct uao_swhash_elt *elt, *next;
    361 
    362       for (elt = aobj->u_swhash[i].lh_first; elt != NULL; elt = next) {
    363 	int j;
    364 
    365 	for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++)
    366 	{
    367 	    int slot = elt->slots[j];
    368 
    369 	    if (slot)
    370 	    {
    371 		uvm_swap_free(slot, 1);
    372 	    }
    373 	}
    374 
    375 	next = elt->list.le_next;
    376 	FREE(elt, M_UVMAOBJ);
    377       }
    378     }
    379     FREE(aobj->u_swhash, M_UVMAOBJ);
    380   } else {
    381     int i;
    382 
    383     /*
    384      * free the array
    385      */
    386 
    387     for (i = 0; i < aobj->u_pages; i++)
    388     {
    389 	int slot = aobj->u_swslots[i];
    390 
    391 	if (slot)
    392 	{
    393 	    uvm_swap_free(slot, 1);
    394 	}
    395     }
    396 
    397     FREE(aobj->u_swslots, M_UVMAOBJ);
    398   }
    399 
    400   /*
    401    * finally free the aobj itself
    402    */
    403   FREE(aobj, M_UVMAOBJ);
    404 }
    405 
    406 
    407 /*
    408  * pager functions
    409  */
    410 
    411 /*
    412  * uao_create: create an aobj of the given size and return its uvm_object.
    413  *
    414  * => for normal use, flags are always zero
    415  * => for the kernel object, the flags are:
    416  *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
    417  *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
    418  */
    419 
    420 struct uvm_object *uao_create(size, flags)
    421 
    422 vm_size_t size;
    423 int flags;
    424 
    425 {
    426   static struct uvm_aobj kernel_object_store;	/* home of kernel_object */
    427   static int kobj_alloced = 0;			/* not allocated yet */
    428   int pages = round_page(size) / PAGE_SIZE;
    429   struct uvm_aobj *aobj;
    430 
    431   /*
    432    * malloc a new aobj unless we are asked for the kernel object
    433    */
    434   if (flags & UAO_FLAG_KERNOBJ) {		/* want kernel object? */
    435     if (kobj_alloced)
    436       panic("uao_create: kernel object already allocated");
    437 
    438     aobj = &kernel_object_store;
    439     aobj->u_pages = pages;
    440     aobj->u_flags = UAO_FLAG_NOSWAP;	/* no swap to start */
    441     aobj->u_obj.uo_refs = UVM_OBJ_KERN; /* we are special, we never die */
    442     kobj_alloced = UAO_FLAG_KERNOBJ;
    443 
    444   } else if (flags & UAO_FLAG_KERNSWAP) {
    445 
    446     aobj = &kernel_object_store;
    447     if (kobj_alloced != UAO_FLAG_KERNOBJ)
    448       panic("uao_create: asked to enable swap on kernel object");
    449     kobj_alloced = UAO_FLAG_KERNSWAP;
    450 
    451   } else {	/* normal object */
    452 
    453     MALLOC(aobj, struct uvm_aobj *, sizeof(*aobj), M_UVMAOBJ, M_WAITOK);
    454     aobj->u_pages = pages;
    455     aobj->u_flags = 0;		/* normal object */
    456     aobj->u_obj.uo_refs = 1;	/* start with 1 reference */
    457 
    458   }
    459 
    460   /*
    461    * allocate hash/array if necessary
    462    *
    463    * note: in the KERNSWAP case no need to worry about locking since
    464    * we are still booting we should be the only thread around.
    465    */
    466 
    467   if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
    468 
    469     int mflags = (flags & UAO_FLAG_KERNSWAP) != 0 ? M_NOWAIT : M_WAITOK;
    470 
    471     /* allocate hash table or array depending on object size */
    472       if (UAO_USES_SWHASH(aobj)) {
    473       aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj), M_UVMAOBJ, mflags,
    474 					&aobj->u_swhashmask);
    475       if (aobj->u_swhash == NULL)
    476 	panic("uao_create: hashinit swhash failed");
    477     } else {
    478       MALLOC(aobj->u_swslots, int *, pages * sizeof(int), M_UVMAOBJ, mflags);
    479       if (aobj->u_swslots == NULL)
    480 	panic("uao_create: malloc swslots failed");
    481       bzero(aobj->u_swslots, pages * sizeof(int));
    482     }
    483 
    484     if (flags) {
    485       aobj->u_flags &= ~UAO_FLAG_NOSWAP;	/* clear noswap */
    486       return(&aobj->u_obj);
    487       /* done! */
    488     }
    489   }
    490 
    491   /*
    492    * init aobj fields
    493    */
    494   simple_lock_init(&aobj->u_obj.vmobjlock);
    495   aobj->u_obj.pgops = &aobj_pager;
    496   TAILQ_INIT(&aobj->u_obj.memq);
    497   aobj->u_obj.uo_npages = 0;
    498 
    499   /*
    500    * now that aobj is ready, add it to the global list
    501    * XXXCHS: uao_init hasn't been called'd in the KERNOBJ case, do we really
    502    * need the kernel object on this list anyway?
    503    */
    504   simple_lock(&uao_list_lock);
    505   LIST_INSERT_HEAD(&uao_list, aobj, u_list);
    506   simple_unlock(&uao_list_lock);
    507 
    508   /*
    509    * done!
    510    */
    511   return(&aobj->u_obj);
    512 }
    513 
    514 
    515 
    516 /*
    517  * uao_init: set up aobj pager subsystem
    518  *
    519  * => called at boot time from uvm_pager_init()
    520  */
    521 
    522 static void uao_init()
    523 
    524 {
    525   LIST_INIT(&uao_list);
    526   simple_lock_init(&uao_list_lock);
    527 }
    528 
    529 /*
    530  * uao_reference: add a ref to an aobj
    531  *
    532  * => aobj must be unlocked (we will lock it)
    533  */
    534 
    535 void uao_reference(uobj)
    536 
    537 struct uvm_object *uobj;
    538 
    539 {
    540   UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
    541 
    542   /*
    543    * kernel_object already has plenty of references, leave it alone.
    544    */
    545 
    546   if (uobj->uo_refs == UVM_OBJ_KERN) {
    547     return;
    548   }
    549 
    550   simple_lock(&uobj->vmobjlock);
    551   uobj->uo_refs++;		/* bump! */
    552   UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
    553 	uobj, uobj->uo_refs,0,0);
    554   simple_unlock(&uobj->vmobjlock);
    555 }
    556 
    557 /*
    558  * uao_detach: drop a reference to an aobj
    559  *
    560  * => aobj must be unlocked, we will lock it
    561  */
    562 
    563 void uao_detach(uobj)
    564 
    565 struct uvm_object *uobj;
    566 
    567 {
    568   struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
    569   struct vm_page *pg;
    570   boolean_t busybody;
    571   UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
    572 
    573   /*
    574    * detaching from kernel_object is a noop.
    575    */
    576 
    577   if (uobj->uo_refs == UVM_OBJ_KERN) {
    578     return;
    579   }
    580 
    581   simple_lock(&uobj->vmobjlock);
    582 
    583   UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
    584   uobj->uo_refs--;				/* drop ref! */
    585   if (uobj->uo_refs) {				/* still more refs? */
    586     simple_unlock(&uobj->vmobjlock);
    587     UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
    588     return;
    589   }
    590 
    591   /*
    592    * remove the aobj from the global list.
    593    */
    594   simple_lock(&uao_list_lock);
    595   LIST_REMOVE(aobj, u_list);
    596   simple_unlock(&uao_list_lock);
    597 
    598   /*
    599    * free all the pages that aren't PG_BUSY, mark for release any that are.
    600    */
    601 
    602   busybody = FALSE;
    603   for (pg = uobj->memq.tqh_first ; pg != NULL ; pg = pg->listq.tqe_next) {
    604     int swslot;
    605 
    606     if (pg->flags & PG_BUSY) {
    607       pg->flags |= PG_RELEASED;
    608       busybody = TRUE;
    609       continue;
    610     }
    611 
    612 
    613     /* zap the mappings, free the swap slot, free the page */
    614     pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
    615 
    616     swslot = uao_set_swslot(&aobj->u_obj, pg->offset / PAGE_SIZE, 0);
    617     if (swslot)	{
    618       uvm_swap_free(swslot, 1);
    619     }
    620 
    621     uvm_lock_pageq();
    622     uvm_pagefree(pg);
    623     uvm_unlock_pageq();
    624   }
    625 
    626   /*
    627    * if we found any busy pages, we're done for now.
    628    * mark the aobj for death, releasepg will finish up for us.
    629    */
    630   if (busybody) {
    631     aobj->u_flags |= UAO_FLAG_KILLME;
    632     simple_unlock(&aobj->u_obj.vmobjlock);
    633     return;
    634   }
    635 
    636   /*
    637    * finally, free the rest.
    638    */
    639   uao_free(aobj);
    640 }
    641 
    642 
    643 
    644 /*
    645  * uao_flush: uh, yea, sure it's flushed.  really!
    646  */
    647 boolean_t uao_flush(uobj, start, end, flags)
    648 
    649 struct uvm_object *uobj;
    650 vm_offset_t start, end;
    651 int flags;
    652 
    653 {
    654   /*
    655    * anonymous memory doesn't "flush"
    656    */
    657   /*
    658    * XXX
    659    * deal with PGO_DEACTIVATE (for madvise(MADV_SEQUENTIAL))
    660    * and PGO_FREE (for msync(MSINVALIDATE))
    661    */
    662   return TRUE;
    663 }
    664 
    665 /*
    666  * uao_get: fetch me a page
    667  *
    668  * we have three cases:
    669  * 1: page is resident     -> just return the page.
    670  * 2: page is zero-fill    -> allocate a new page and zero it.
    671  * 3: page is swapped out  -> fetch the page from swap.
    672  *
    673  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
    674  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
    675  * then we will need to return VM_PAGER_UNLOCK.
    676  *
    677  * => prefer map unlocked (not required)
    678  * => object must be locked!  we will _unlock_ it before starting any I/O.
    679  * => flags: PGO_ALLPAGES: get all of the pages
    680  *           PGO_LOCKED: fault data structures are locked
    681  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
    682  * => NOTE: caller must check for released pages!!
    683  */
    684 
    685 static int uao_get(uobj, offset, pps, npagesp, centeridx, access_type,
    686 		   advice, flags)
    687 
    688 struct uvm_object *uobj;
    689 vm_offset_t offset;
    690 struct vm_page **pps;
    691 int *npagesp;
    692 int centeridx, advice, flags;
    693 vm_prot_t access_type;
    694 
    695 {
    696   struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
    697   vm_offset_t current_offset;
    698   vm_page_t ptmp;
    699   int lcv, gotpages, maxpages, swslot, rv;
    700   boolean_t done;
    701   UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
    702 
    703   UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d", aobj, offset, flags,0);
    704 
    705   /*
    706    * get number of pages
    707    */
    708 
    709   maxpages = *npagesp;
    710 
    711   /*
    712    * step 1: handled the case where fault data structures are locked.
    713    */
    714 
    715   if (flags & PGO_LOCKED) {
    716 
    717     /*
    718      * step 1a: get pages that are already resident.   only do this
    719      * if the data structures are locked (i.e. the first time through).
    720      */
    721 
    722     done = TRUE;	/* be optimistic */
    723     gotpages = 0;	/* # of pages we got so far */
    724 
    725     for (lcv = 0, current_offset = offset ;
    726 	 lcv < maxpages ; lcv++, current_offset += PAGE_SIZE) {
    727 
    728       /* do we care about this page?  if not, skip it */
    729       if (pps[lcv] == PGO_DONTCARE)
    730 	continue;
    731 
    732       ptmp = uvm_pagelookup(uobj, current_offset);
    733 
    734       /*
    735        * if page is new, attempt to allocate the page, then zero-fill it.
    736        */
    737       if (ptmp == NULL &&
    738 	  uao_find_swslot(aobj, current_offset / PAGE_SIZE) == 0) {
    739 
    740 	ptmp = uvm_pagealloc(uobj, current_offset, NULL);
    741 	if (ptmp) {
    742 	  ptmp->flags &= ~(PG_BUSY|PG_FAKE);	/* new page */
    743 	  ptmp->pqflags |= PQ_AOBJ;
    744 	  UVM_PAGE_OWN(ptmp, NULL);
    745 	  uvm_pagezero(ptmp);
    746 	}
    747       }
    748 
    749       /* to be useful must get a non-busy, non-released page */
    750       if (ptmp == NULL || (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
    751 	if (lcv == centeridx || (flags & PGO_ALLPAGES) != 0)
    752 	  done = FALSE;		/* need to do a wait or I/O! */
    753 	continue;
    754       }
    755 
    756       /* useful page: busy/lock it and plug it in our result array */
    757       ptmp->flags |= PG_BUSY;		/* caller must un-busy this page */
    758       UVM_PAGE_OWN(ptmp, "uao_get1");
    759       pps[lcv] = ptmp;
    760       gotpages++;
    761 
    762     }	/* "for" lcv loop */
    763 
    764     /*
    765      * step 1b: now we've either done everything needed or we to unlock
    766      * and do some waiting or I/O.
    767      */
    768 
    769     UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
    770 
    771     *npagesp = gotpages;
    772     if (done)
    773       return(VM_PAGER_OK);		/* bingo! */
    774     else
    775       return(VM_PAGER_UNLOCK);		/* EEK!   Need to unlock and I/O */
    776   }
    777 
    778   /*
    779    * step 2: get non-resident or busy pages.
    780    * object is locked.   data structures are unlocked.
    781    */
    782 
    783   for (lcv = 0, current_offset = offset ;
    784        lcv < maxpages ;
    785        lcv++, current_offset += PAGE_SIZE) {
    786 
    787     /* skip over pages we've already gotten or don't want */
    788     /* skip over pages we don't _have_ to get */
    789     if (pps[lcv] != NULL ||
    790 	(lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
    791       continue;
    792 
    793     /*
    794      * we have yet to locate the current page (pps[lcv]).   we first
    795      * look for a page that is already at the current offset.   if we
    796      * find a page, we check to see if it is busy or released.  if that
    797      * is the case, then we sleep on the page until it is no longer busy
    798      * or released and repeat the lookup.    if the page we found is
    799      * neither busy nor released, then we busy it (so we own it) and
    800      * plug it into pps[lcv].   this 'break's the following while loop
    801      * and indicates we are ready to move on to the next page in the
    802      * "lcv" loop above.
    803      *
    804      * if we exit the while loop with pps[lcv] still set to NULL, then
    805      * it means that we allocated a new busy/fake/clean page ptmp in the
    806      * object and we need to do I/O to fill in the data.
    807      */
    808 
    809     while (pps[lcv] == NULL) {		/* top of "pps" while loop */
    810 
    811       /* look for a resident page */
    812       ptmp = uvm_pagelookup(uobj, current_offset);
    813 
    814       /* not resident?   allocate one now (if we can) */
    815       if (ptmp == NULL) {
    816 
    817 	ptmp = uvm_pagealloc(uobj, current_offset, NULL);	/* alloc */
    818 
    819 	/* out of RAM? */
    820 	if (ptmp == NULL) {
    821 	  simple_unlock(&uobj->vmobjlock);
    822 	  UVMHIST_LOG(pdhist, "sleeping, ptmp == NULL\n",0,0,0,0);
    823 	  uvm_wait("uao_getpage");
    824 	  simple_lock(&uobj->vmobjlock);
    825 	  continue;		/* goto top of pps while loop */
    826 	}
    827 
    828 	/* safe with PQ's unlocked: because we just alloc'd the page */
    829 	ptmp->pqflags |= PQ_AOBJ;
    830 
    831 	/*
    832 	 * got new page ready for I/O.  break pps while loop.  pps[lcv] is
    833 	 * still NULL.
    834 	 */
    835 	break;
    836       }
    837 
    838       /* page is there, see if we need to wait on it */
    839       if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
    840 	ptmp->flags |= PG_WANTED;
    841 	UVMHIST_LOG(pdhist, "sleeping, ptmp->flags 0x%x\n",ptmp->flags,0,0,0);
    842 	UVM_UNLOCK_AND_WAIT(ptmp,&uobj->vmobjlock,0,"uao_get",0);
    843 	simple_lock(&uobj->vmobjlock);
    844 	continue;		/* goto top of pps while loop */
    845       }
    846 
    847       /*
    848        * if we get here then the page has become resident and unbusy
    849        * between steps 1 and 2.  we busy it now (so we own it) and set
    850        * pps[lcv] (so that we exit the while loop).
    851        */
    852       ptmp->flags |= PG_BUSY;	/* we own it, caller must un-busy */
    853       UVM_PAGE_OWN(ptmp, "uao_get2");
    854       pps[lcv] = ptmp;
    855     }
    856 
    857     /*
    858      * if we own the valid page at the correct offset, pps[lcv] will
    859      * point to it.   nothing more to do except go to the next page.
    860      */
    861 
    862     if (pps[lcv])
    863       continue;			/* next lcv */
    864 
    865     /*
    866      * we have a "fake/busy/clean" page that we just allocated.
    867      * do the needed "i/o", either reading from swap or zeroing.
    868      */
    869 
    870     swslot = uao_find_swslot(aobj, current_offset / PAGE_SIZE);
    871 
    872     /*
    873      * just zero the page if there's nothing in swap.
    874      */
    875     if (swslot == 0)
    876     {
    877 	/*
    878 	 * page hasn't existed before, just zero it.
    879 	 */
    880 	uvm_pagezero(ptmp);
    881     }
    882     else
    883     {
    884 	UVMHIST_LOG(pdhist, "pagein from swslot %d", swslot, 0,0,0);
    885 
    886 	/*
    887 	 * page in the swapped-out page.
    888 	 * unlock object for i/o, relock when done.
    889 	 */
    890 	simple_unlock(&uobj->vmobjlock);
    891 	rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
    892 	simple_lock(&uobj->vmobjlock);
    893 
    894 	/*
    895 	 * I/O done.  check for errors.
    896 	 */
    897 	if (rv != VM_PAGER_OK)
    898 	{
    899 	    UVMHIST_LOG(pdhist, "<- done (error=%d)",rv,0,0,0);
    900 	    if (ptmp->flags & PG_WANTED)
    901 		thread_wakeup(ptmp);		/* object lock still held */
    902 	    ptmp->flags &= ~(PG_WANTED|PG_BUSY);
    903 	    UVM_PAGE_OWN(ptmp, NULL);
    904 	    uvm_lock_pageq();
    905 	    uvm_pagefree(ptmp);
    906 	    uvm_unlock_pageq();
    907 	    simple_unlock(&uobj->vmobjlock);
    908 	    return rv;
    909 	}
    910     }
    911 
    912     /*
    913      * we got the page!   clear the fake flag (indicates valid data now
    914      * in page) and plug into our result array.   note that page is still
    915      * busy.
    916      *
    917      * it is the callers job to:
    918      * => check if the page is released
    919      * => unbusy the page
    920      * => activate the page
    921      */
    922 
    923     ptmp->flags &= ~PG_FAKE;			/* data is valid ... */
    924     pmap_clear_modify(PMAP_PGARG(ptmp));	/* ... and clean */
    925     pps[lcv] = ptmp;
    926 
    927   }	/* lcv loop */
    928 
    929   /*
    930    * finally, unlock object and return.
    931    */
    932 
    933   simple_unlock(&uobj->vmobjlock);
    934   UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
    935   return(VM_PAGER_OK);
    936 }
    937 
    938 /*
    939  * uao_releasepg: handle released page in an aobj
    940  *
    941  * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
    942  *      to dispose of.
    943  * => caller must handle PG_WANTED case
    944  * => called with page's object locked, pageq's unlocked
    945  * => returns TRUE if page's object is still alive, FALSE if we
    946  *      killed the page's object.    if we return TRUE, then we
    947  *      return with the object locked.
    948  * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
    949  *                              with the page queues locked [for pagedaemon]
    950  * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
    951  * => we kill the aobj if it is not referenced and we are suppose to
    952  *      kill it ("KILLME").
    953  */
    954 
    955 static boolean_t uao_releasepg(pg, nextpgp)
    956 
    957 struct vm_page *pg;
    958 struct vm_page **nextpgp;	/* OUT */
    959 
    960 {
    961   struct uvm_aobj *aobj = (struct uvm_aobj *) pg->uobject;
    962   int slot;
    963 
    964 #ifdef DIAGNOSTIC
    965   if ((pg->flags & PG_RELEASED) == 0)
    966     panic("uao_releasepg: page not released!");
    967 #endif
    968 
    969   /*
    970    * dispose of the page [caller handles PG_WANTED] and swap slot.
    971    */
    972   pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
    973   slot = uao_set_swslot(&aobj->u_obj, pg->offset / PAGE_SIZE, 0);
    974   if (slot)
    975     uvm_swap_free(slot, 1);
    976   uvm_lock_pageq();
    977   if (nextpgp)
    978     *nextpgp = pg->pageq.tqe_next;	/* next page for daemon */
    979   uvm_pagefree(pg);
    980   if (!nextpgp)
    981     uvm_unlock_pageq();			/* keep locked for daemon */
    982 
    983   /*
    984    * if we're not killing the object, we're done.
    985    */
    986   if ((aobj->u_flags & UAO_FLAG_KILLME) == 0)
    987     return TRUE;
    988 
    989 #ifdef DIAGNOSTIC
    990   if (aobj->u_obj.uo_refs)
    991     panic("uvm_km_releasepg: kill flag set on referenced object!");
    992 #endif
    993 
    994   /*
    995    * if there are still pages in the object, we're done for now.
    996    */
    997   if (aobj->u_obj.uo_npages != 0)
    998     return TRUE;
    999 
   1000 #ifdef DIAGNOSTIC
   1001   if (aobj->u_obj.memq.tqh_first)
   1002     panic("uvn_releasepg: pages in object with npages == 0");
   1003 #endif
   1004 
   1005   /*
   1006    * finally, free the rest.
   1007    */
   1008   uao_free(aobj);
   1009 
   1010   return FALSE;
   1011 }
   1012