Home | History | Annotate | Line # | Download | only in uvm
uvm_aobj.c revision 1.32
      1 /*	$NetBSD: uvm_aobj.c,v 1.32 2000/06/26 14:21:17 mrg Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
      5  *                    Washington University.
      6  * All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *      This product includes software developed by Charles D. Cranor and
     19  *      Washington University.
     20  * 4. The name of the author may not be used to endorse or promote products
     21  *    derived from this software without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     33  *
     34  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
     35  */
     36 /*
     37  * uvm_aobj.c: anonymous memory uvm_object pager
     38  *
     39  * author: Chuck Silvers <chuq (at) chuq.com>
     40  * started: Jan-1998
     41  *
     42  * - design mostly from Chuck Cranor
     43  */
     44 
     45 
     46 
     47 #include "opt_uvmhist.h"
     48 
     49 #include <sys/param.h>
     50 #include <sys/systm.h>
     51 #include <sys/proc.h>
     52 #include <sys/malloc.h>
     53 #include <sys/pool.h>
     54 #include <sys/kernel.h>
     55 
     56 #include <vm/vm.h>
     57 
     58 #include <uvm/uvm.h>
     59 
     60 /*
     61  * an aobj manages anonymous-memory backed uvm_objects.   in addition
     62  * to keeping the list of resident pages, it also keeps a list of
     63  * allocated swap blocks.  depending on the size of the aobj this list
     64  * of allocated swap blocks is either stored in an array (small objects)
     65  * or in a hash table (large objects).
     66  */
     67 
     68 /*
     69  * local structures
     70  */
     71 
     72 /*
     73  * for hash tables, we break the address space of the aobj into blocks
     74  * of UAO_SWHASH_CLUSTER_SIZE pages.   we require the cluster size to
     75  * be a power of two.
     76  */
     77 
     78 #define UAO_SWHASH_CLUSTER_SHIFT 4
     79 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
     80 
     81 /* get the "tag" for this page index */
     82 #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
     83 	((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
     84 
     85 /* given an ELT and a page index, find the swap slot */
     86 #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
     87 	((ELT)->slots[(PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1)])
     88 
     89 /* given an ELT, return its pageidx base */
     90 #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
     91 	((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
     92 
     93 /*
     94  * the swhash hash function
     95  */
     96 #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
     97 	(&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
     98 			    & (AOBJ)->u_swhashmask)])
     99 
    100 /*
    101  * the swhash threshhold determines if we will use an array or a
    102  * hash table to store the list of allocated swap blocks.
    103  */
    104 
    105 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
    106 #define UAO_USES_SWHASH(AOBJ) \
    107 	((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD)	/* use hash? */
    108 
    109 /*
    110  * the number of buckets in a swhash, with an upper bound
    111  */
    112 #define UAO_SWHASH_MAXBUCKETS 256
    113 #define UAO_SWHASH_BUCKETS(AOBJ) \
    114 	(min((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
    115 	     UAO_SWHASH_MAXBUCKETS))
    116 
    117 
    118 /*
    119  * uao_swhash_elt: when a hash table is being used, this structure defines
    120  * the format of an entry in the bucket list.
    121  */
    122 
    123 struct uao_swhash_elt {
    124 	LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
    125 	voff_t tag;				/* our 'tag' */
    126 	int count;				/* our number of active slots */
    127 	int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
    128 };
    129 
    130 /*
    131  * uao_swhash: the swap hash table structure
    132  */
    133 
    134 LIST_HEAD(uao_swhash, uao_swhash_elt);
    135 
    136 /*
    137  * uao_swhash_elt_pool: pool of uao_swhash_elt structures
    138  */
    139 
    140 struct pool uao_swhash_elt_pool;
    141 
    142 /*
    143  * uvm_aobj: the actual anon-backed uvm_object
    144  *
    145  * => the uvm_object is at the top of the structure, this allows
    146  *   (struct uvm_device *) == (struct uvm_object *)
    147  * => only one of u_swslots and u_swhash is used in any given aobj
    148  */
    149 
    150 struct uvm_aobj {
    151 	struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
    152 	int u_pages;		 /* number of pages in entire object */
    153 	int u_flags;		 /* the flags (see uvm_aobj.h) */
    154 	int *u_swslots;		 /* array of offset->swapslot mappings */
    155 				 /*
    156 				  * hashtable of offset->swapslot mappings
    157 				  * (u_swhash is an array of bucket heads)
    158 				  */
    159 	struct uao_swhash *u_swhash;
    160 	u_long u_swhashmask;		/* mask for hashtable */
    161 	LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
    162 };
    163 
    164 /*
    165  * uvm_aobj_pool: pool of uvm_aobj structures
    166  */
    167 
    168 struct pool uvm_aobj_pool;
    169 
    170 /*
    171  * local functions
    172  */
    173 
    174 static struct uao_swhash_elt	*uao_find_swhash_elt __P((struct uvm_aobj *,
    175 							  int, boolean_t));
    176 static int			 uao_find_swslot __P((struct uvm_aobj *, int));
    177 static boolean_t		 uao_flush __P((struct uvm_object *,
    178 						voff_t, voff_t, int));
    179 static void			 uao_free __P((struct uvm_aobj *));
    180 static int			 uao_get __P((struct uvm_object *, voff_t,
    181 					      vm_page_t *, int *, int,
    182 					      vm_prot_t, int, int));
    183 static boolean_t		 uao_releasepg __P((struct vm_page *,
    184 						    struct vm_page **));
    185 static boolean_t		 uao_pagein __P((struct uvm_aobj *, int, int));
    186 static boolean_t		 uao_pagein_page __P((struct uvm_aobj *, int));
    187 
    188 
    189 
    190 /*
    191  * aobj_pager
    192  *
    193  * note that some functions (e.g. put) are handled elsewhere
    194  */
    195 
    196 struct uvm_pagerops aobj_pager = {
    197 	NULL,			/* init */
    198 	uao_reference,		/* reference */
    199 	uao_detach,		/* detach */
    200 	NULL,			/* fault */
    201 	uao_flush,		/* flush */
    202 	uao_get,		/* get */
    203 	NULL,			/* asyncget */
    204 	NULL,			/* put (done by pagedaemon) */
    205 	NULL,			/* cluster */
    206 	NULL,			/* mk_pcluster */
    207 	NULL,			/* aiodone */
    208 	uao_releasepg		/* releasepg */
    209 };
    210 
    211 /*
    212  * uao_list: global list of active aobjs, locked by uao_list_lock
    213  */
    214 
    215 static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
    216 static simple_lock_data_t uao_list_lock;
    217 
    218 
    219 /*
    220  * functions
    221  */
    222 
    223 /*
    224  * hash table/array related functions
    225  */
    226 
    227 /*
    228  * uao_find_swhash_elt: find (or create) a hash table entry for a page
    229  * offset.
    230  *
    231  * => the object should be locked by the caller
    232  */
    233 
    234 static struct uao_swhash_elt *
    235 uao_find_swhash_elt(aobj, pageidx, create)
    236 	struct uvm_aobj *aobj;
    237 	int pageidx;
    238 	boolean_t create;
    239 {
    240 	struct uao_swhash *swhash;
    241 	struct uao_swhash_elt *elt;
    242 	voff_t page_tag;
    243 
    244 	swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
    245 	page_tag = UAO_SWHASH_ELT_TAG(pageidx);	/* tag to search for */
    246 
    247 	/*
    248 	 * now search the bucket for the requested tag
    249 	 */
    250 	for (elt = swhash->lh_first; elt != NULL; elt = elt->list.le_next) {
    251 		if (elt->tag == page_tag)
    252 			return(elt);
    253 	}
    254 
    255 	/* fail now if we are not allowed to create a new entry in the bucket */
    256 	if (!create)
    257 		return NULL;
    258 
    259 
    260 	/*
    261 	 * allocate a new entry for the bucket and init/insert it in
    262 	 */
    263 	elt = pool_get(&uao_swhash_elt_pool, PR_WAITOK);
    264 	LIST_INSERT_HEAD(swhash, elt, list);
    265 	elt->tag = page_tag;
    266 	elt->count = 0;
    267 	memset(elt->slots, 0, sizeof(elt->slots));
    268 
    269 	return(elt);
    270 }
    271 
    272 /*
    273  * uao_find_swslot: find the swap slot number for an aobj/pageidx
    274  *
    275  * => object must be locked by caller
    276  */
    277 __inline static int
    278 uao_find_swslot(aobj, pageidx)
    279 	struct uvm_aobj *aobj;
    280 	int pageidx;
    281 {
    282 
    283 	/*
    284 	 * if noswap flag is set, then we never return a slot
    285 	 */
    286 
    287 	if (aobj->u_flags & UAO_FLAG_NOSWAP)
    288 		return(0);
    289 
    290 	/*
    291 	 * if hashing, look in hash table.
    292 	 */
    293 
    294 	if (UAO_USES_SWHASH(aobj)) {
    295 		struct uao_swhash_elt *elt =
    296 		    uao_find_swhash_elt(aobj, pageidx, FALSE);
    297 
    298 		if (elt)
    299 			return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
    300 		else
    301 			return(0);
    302 	}
    303 
    304 	/*
    305 	 * otherwise, look in the array
    306 	 */
    307 	return(aobj->u_swslots[pageidx]);
    308 }
    309 
    310 /*
    311  * uao_set_swslot: set the swap slot for a page in an aobj.
    312  *
    313  * => setting a slot to zero frees the slot
    314  * => object must be locked by caller
    315  */
    316 int
    317 uao_set_swslot(uobj, pageidx, slot)
    318 	struct uvm_object *uobj;
    319 	int pageidx, slot;
    320 {
    321 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
    322 	int oldslot;
    323 	UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
    324 	UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
    325 	    aobj, pageidx, slot, 0);
    326 
    327 	/*
    328 	 * if noswap flag is set, then we can't set a slot
    329 	 */
    330 
    331 	if (aobj->u_flags & UAO_FLAG_NOSWAP) {
    332 
    333 		if (slot == 0)
    334 			return(0);		/* a clear is ok */
    335 
    336 		/* but a set is not */
    337 		printf("uao_set_swslot: uobj = %p\n", uobj);
    338 	    panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
    339 	}
    340 
    341 	/*
    342 	 * are we using a hash table?  if so, add it in the hash.
    343 	 */
    344 
    345 	if (UAO_USES_SWHASH(aobj)) {
    346 		/*
    347 		 * Avoid allocating an entry just to free it again if
    348 		 * the page had not swap slot in the first place, and
    349 		 * we are freeing.
    350 		 */
    351 		struct uao_swhash_elt *elt =
    352 		    uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
    353 		if (elt == NULL) {
    354 #ifdef DIAGNOSTIC
    355 			if (slot)
    356 				panic("uao_set_swslot: didn't create elt");
    357 #endif
    358 			return (0);
    359 		}
    360 
    361 		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
    362 		UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
    363 
    364 		/*
    365 		 * now adjust the elt's reference counter and free it if we've
    366 		 * dropped it to zero.
    367 		 */
    368 
    369 		/* an allocation? */
    370 		if (slot) {
    371 			if (oldslot == 0)
    372 				elt->count++;
    373 		} else {		/* freeing slot ... */
    374 			if (oldslot)	/* to be safe */
    375 				elt->count--;
    376 
    377 			if (elt->count == 0) {
    378 				LIST_REMOVE(elt, list);
    379 				pool_put(&uao_swhash_elt_pool, elt);
    380 			}
    381 		}
    382 
    383 	} else {
    384 		/* we are using an array */
    385 		oldslot = aobj->u_swslots[pageidx];
    386 		aobj->u_swslots[pageidx] = slot;
    387 	}
    388 	return (oldslot);
    389 }
    390 
    391 /*
    392  * end of hash/array functions
    393  */
    394 
    395 /*
    396  * uao_free: free all resources held by an aobj, and then free the aobj
    397  *
    398  * => the aobj should be dead
    399  */
    400 static void
    401 uao_free(aobj)
    402 	struct uvm_aobj *aobj;
    403 {
    404 
    405 	simple_unlock(&aobj->u_obj.vmobjlock);
    406 
    407 	if (UAO_USES_SWHASH(aobj)) {
    408 		int i, hashbuckets = aobj->u_swhashmask + 1;
    409 
    410 		/*
    411 		 * free the swslots from each hash bucket,
    412 		 * then the hash bucket, and finally the hash table itself.
    413 		 */
    414 		for (i = 0; i < hashbuckets; i++) {
    415 			struct uao_swhash_elt *elt, *next;
    416 
    417 			for (elt = LIST_FIRST(&aobj->u_swhash[i]);
    418 			     elt != NULL;
    419 			     elt = next) {
    420 				int j;
    421 
    422 				for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++) {
    423 					int slot = elt->slots[j];
    424 
    425 					if (slot) {
    426 						uvm_swap_free(slot, 1);
    427 
    428 						/*
    429 						 * this page is no longer
    430 						 * only in swap.
    431 						 */
    432 						simple_lock(&uvm.swap_data_lock);
    433 						uvmexp.swpgonly--;
    434 						simple_unlock(&uvm.swap_data_lock);
    435 					}
    436 				}
    437 
    438 				next = LIST_NEXT(elt, list);
    439 				pool_put(&uao_swhash_elt_pool, elt);
    440 			}
    441 		}
    442 		FREE(aobj->u_swhash, M_UVMAOBJ);
    443 	} else {
    444 		int i;
    445 
    446 		/*
    447 		 * free the array
    448 		 */
    449 
    450 		for (i = 0; i < aobj->u_pages; i++) {
    451 			int slot = aobj->u_swslots[i];
    452 
    453 			if (slot) {
    454 				uvm_swap_free(slot, 1);
    455 
    456 				/* this page is no longer only in swap. */
    457 				simple_lock(&uvm.swap_data_lock);
    458 				uvmexp.swpgonly--;
    459 				simple_unlock(&uvm.swap_data_lock);
    460 			}
    461 		}
    462 		FREE(aobj->u_swslots, M_UVMAOBJ);
    463 	}
    464 
    465 	/*
    466 	 * finally free the aobj itself
    467 	 */
    468 	pool_put(&uvm_aobj_pool, aobj);
    469 }
    470 
    471 /*
    472  * pager functions
    473  */
    474 
    475 /*
    476  * uao_create: create an aobj of the given size and return its uvm_object.
    477  *
    478  * => for normal use, flags are always zero
    479  * => for the kernel object, the flags are:
    480  *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
    481  *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
    482  */
    483 struct uvm_object *
    484 uao_create(size, flags)
    485 	vsize_t size;
    486 	int flags;
    487 {
    488 	static struct uvm_aobj kernel_object_store; /* home of kernel_object */
    489 	static int kobj_alloced = 0;			/* not allocated yet */
    490 	int pages = round_page(size) >> PAGE_SHIFT;
    491 	struct uvm_aobj *aobj;
    492 
    493 	/*
    494 	 * malloc a new aobj unless we are asked for the kernel object
    495 	 */
    496 	if (flags & UAO_FLAG_KERNOBJ) {		/* want kernel object? */
    497 		if (kobj_alloced)
    498 			panic("uao_create: kernel object already allocated");
    499 
    500 		aobj = &kernel_object_store;
    501 		aobj->u_pages = pages;
    502 		aobj->u_flags = UAO_FLAG_NOSWAP;	/* no swap to start */
    503 		/* we are special, we never die */
    504 		aobj->u_obj.uo_refs = UVM_OBJ_KERN;
    505 		kobj_alloced = UAO_FLAG_KERNOBJ;
    506 	} else if (flags & UAO_FLAG_KERNSWAP) {
    507 		aobj = &kernel_object_store;
    508 		if (kobj_alloced != UAO_FLAG_KERNOBJ)
    509 		    panic("uao_create: asked to enable swap on kernel object");
    510 		kobj_alloced = UAO_FLAG_KERNSWAP;
    511 	} else {	/* normal object */
    512 		aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
    513 		aobj->u_pages = pages;
    514 		aobj->u_flags = 0;		/* normal object */
    515 		aobj->u_obj.uo_refs = 1;	/* start with 1 reference */
    516 	}
    517 
    518 	/*
    519  	 * allocate hash/array if necessary
    520  	 *
    521  	 * note: in the KERNSWAP case no need to worry about locking since
    522  	 * we are still booting we should be the only thread around.
    523  	 */
    524 	if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
    525 		int mflags = (flags & UAO_FLAG_KERNSWAP) != 0 ?
    526 		    M_NOWAIT : M_WAITOK;
    527 
    528 		/* allocate hash table or array depending on object size */
    529 		if (UAO_USES_SWHASH(aobj)) {
    530 			aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
    531 			    M_UVMAOBJ, mflags, &aobj->u_swhashmask);
    532 			if (aobj->u_swhash == NULL)
    533 				panic("uao_create: hashinit swhash failed");
    534 		} else {
    535 			MALLOC(aobj->u_swslots, int *, pages * sizeof(int),
    536 			    M_UVMAOBJ, mflags);
    537 			if (aobj->u_swslots == NULL)
    538 				panic("uao_create: malloc swslots failed");
    539 			memset(aobj->u_swslots, 0, pages * sizeof(int));
    540 		}
    541 
    542 		if (flags) {
    543 			aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
    544 			return(&aobj->u_obj);
    545 			/* done! */
    546 		}
    547 	}
    548 
    549 	/*
    550  	 * init aobj fields
    551  	 */
    552 	simple_lock_init(&aobj->u_obj.vmobjlock);
    553 	aobj->u_obj.pgops = &aobj_pager;
    554 	TAILQ_INIT(&aobj->u_obj.memq);
    555 	aobj->u_obj.uo_npages = 0;
    556 
    557 	/*
    558  	 * now that aobj is ready, add it to the global list
    559  	 */
    560 	simple_lock(&uao_list_lock);
    561 	LIST_INSERT_HEAD(&uao_list, aobj, u_list);
    562 	simple_unlock(&uao_list_lock);
    563 
    564 	/*
    565  	 * done!
    566  	 */
    567 	return(&aobj->u_obj);
    568 }
    569 
    570 
    571 
    572 /*
    573  * uao_init: set up aobj pager subsystem
    574  *
    575  * => called at boot time from uvm_pager_init()
    576  */
    577 void
    578 uao_init()
    579 {
    580 	static int uao_initialized;
    581 
    582 	if (uao_initialized)
    583 		return;
    584 	uao_initialized = TRUE;
    585 
    586 	LIST_INIT(&uao_list);
    587 	simple_lock_init(&uao_list_lock);
    588 
    589 	/*
    590 	 * NOTE: Pages fror this pool must not come from a pageable
    591 	 * kernel map!
    592 	 */
    593 	pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
    594 	    0, 0, 0, "uaoeltpl", 0, NULL, NULL, M_UVMAOBJ);
    595 
    596 	pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 0, 0,
    597 	    "aobjpl", 0,
    598 	    pool_page_alloc_nointr, pool_page_free_nointr, M_UVMAOBJ);
    599 }
    600 
    601 /*
    602  * uao_reference: add a ref to an aobj
    603  *
    604  * => aobj must be unlocked
    605  * => just lock it and call the locked version
    606  */
    607 void
    608 uao_reference(uobj)
    609 	struct uvm_object *uobj;
    610 {
    611 	simple_lock(&uobj->vmobjlock);
    612 	uao_reference_locked(uobj);
    613 	simple_unlock(&uobj->vmobjlock);
    614 }
    615 
    616 /*
    617  * uao_reference_locked: add a ref to an aobj that is already locked
    618  *
    619  * => aobj must be locked
    620  * this needs to be separate from the normal routine
    621  * since sometimes we need to add a reference to an aobj when
    622  * it's already locked.
    623  */
    624 void
    625 uao_reference_locked(uobj)
    626 	struct uvm_object *uobj;
    627 {
    628 	UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
    629 
    630 	/*
    631  	 * kernel_object already has plenty of references, leave it alone.
    632  	 */
    633 
    634 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
    635 		return;
    636 
    637 	uobj->uo_refs++;		/* bump! */
    638 	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
    639 		    uobj, uobj->uo_refs,0,0);
    640 }
    641 
    642 
    643 /*
    644  * uao_detach: drop a reference to an aobj
    645  *
    646  * => aobj must be unlocked
    647  * => just lock it and call the locked version
    648  */
    649 void
    650 uao_detach(uobj)
    651 	struct uvm_object *uobj;
    652 {
    653 	simple_lock(&uobj->vmobjlock);
    654 	uao_detach_locked(uobj);
    655 }
    656 
    657 
    658 /*
    659  * uao_detach_locked: drop a reference to an aobj
    660  *
    661  * => aobj must be locked, and is unlocked (or freed) upon return.
    662  * this needs to be separate from the normal routine
    663  * since sometimes we need to detach from an aobj when
    664  * it's already locked.
    665  */
    666 void
    667 uao_detach_locked(uobj)
    668 	struct uvm_object *uobj;
    669 {
    670 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
    671 	struct vm_page *pg;
    672 	boolean_t busybody;
    673 	UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
    674 
    675 	/*
    676  	 * detaching from kernel_object is a noop.
    677  	 */
    678 	if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
    679 		simple_unlock(&uobj->vmobjlock);
    680 		return;
    681 	}
    682 
    683 	UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
    684 	uobj->uo_refs--;				/* drop ref! */
    685 	if (uobj->uo_refs) {				/* still more refs? */
    686 		simple_unlock(&uobj->vmobjlock);
    687 		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
    688 		return;
    689 	}
    690 
    691 	/*
    692  	 * remove the aobj from the global list.
    693  	 */
    694 	simple_lock(&uao_list_lock);
    695 	LIST_REMOVE(aobj, u_list);
    696 	simple_unlock(&uao_list_lock);
    697 
    698 	/*
    699  	 * free all the pages that aren't PG_BUSY,
    700 	 * mark for release any that are.
    701  	 */
    702 	busybody = FALSE;
    703 	for (pg = TAILQ_FIRST(&uobj->memq);
    704 	     pg != NULL;
    705 	     pg = TAILQ_NEXT(pg, listq)) {
    706 		if (pg->flags & PG_BUSY) {
    707 			pg->flags |= PG_RELEASED;
    708 			busybody = TRUE;
    709 			continue;
    710 		}
    711 
    712 		/* zap the mappings, free the swap slot, free the page */
    713 		pmap_page_protect(pg, VM_PROT_NONE);
    714 		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
    715 		uvm_lock_pageq();
    716 		uvm_pagefree(pg);
    717 		uvm_unlock_pageq();
    718 	}
    719 
    720 	/*
    721  	 * if we found any busy pages, we're done for now.
    722  	 * mark the aobj for death, releasepg will finish up for us.
    723  	 */
    724 	if (busybody) {
    725 		aobj->u_flags |= UAO_FLAG_KILLME;
    726 		simple_unlock(&aobj->u_obj.vmobjlock);
    727 		return;
    728 	}
    729 
    730 	/*
    731  	 * finally, free the rest.
    732  	 */
    733 	uao_free(aobj);
    734 }
    735 
    736 /*
    737  * uao_flush: "flush" pages out of a uvm object
    738  *
    739  * => object should be locked by caller.  we may _unlock_ the object
    740  *	if (and only if) we need to clean a page (PGO_CLEANIT).
    741  *	XXXJRT Currently, however, we don't.  In the case of cleaning
    742  *	XXXJRT a page, we simply just deactivate it.  Should probably
    743  *	XXXJRT handle this better, in the future (although "flushing"
    744  *	XXXJRT anonymous memory isn't terribly important).
    745  * => if PGO_CLEANIT is not set, then we will neither unlock the object
    746  *	or block.
    747  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
    748  *	for flushing.
    749  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
    750  *	that new pages are inserted on the tail end of the list.  thus,
    751  *	we can make a complete pass through the object in one go by starting
    752  *	at the head and working towards the tail (new pages are put in
    753  *	front of us).
    754  * => NOTE: we are allowed to lock the page queues, so the caller
    755  *	must not be holding the lock on them [e.g. pagedaemon had
    756  *	better not call us with the queues locked]
    757  * => we return TRUE unless we encountered some sort of I/O error
    758  *	XXXJRT currently never happens, as we never directly initiate
    759  *	XXXJRT I/O
    760  *
    761  * comment on "cleaning" object and PG_BUSY pages:
    762  *	this routine is holding the lock on the object.  the only time
    763  *	that is can run into a PG_BUSY page that it does not own is if
    764  *	some other process has started I/O on the page (e.g. either
    765  *	a pagein or a pageout).  if the PG_BUSY page is being paged
    766  *	in, then it can not be dirty (!PG_CLEAN) because no one has
    767  *	had a change to modify it yet.  if the PG_BUSY page is being
    768  *	paged out then it means that someone else has already started
    769  *	cleaning the page for us (how nice!).  in this case, if we
    770  *	have syncio specified, then after we make our pass through the
    771  *	object we need to wait for the other PG_BUSY pages to clear
    772  *	off (i.e. we need to do an iosync).  also note that once a
    773  *	page is PG_BUSY is must stary in its object until it is un-busyed.
    774  *	XXXJRT We never actually do this, as we are "flushing" anonymous
    775  *	XXXJRT memory, which doesn't have persistent backing store.
    776  *
    777  * note on page traversal:
    778  *	we can traverse the pages in an object either by going down the
    779  *	linked list in "uobj->memq", or we can go over the address range
    780  *	by page doing hash table lookups for each address.  depending
    781  *	on how many pages are in the object it may be cheaper to do one
    782  *	or the other.  we set "by_list" to true if we are using memq.
    783  *	if the cost of a hash lookup was equal to the cost of the list
    784  *	traversal we could compare the number of pages in the start->stop
    785  *	range to the total number of pages in the object.  however, it
    786  *	seems that a hash table lookup is more expensive than the linked
    787  *	list traversal, so we multiply the number of pages in the
    788  *	start->stop range by a penalty which we define below.
    789  */
    790 
    791 #define	UAO_HASH_PENALTY 4	/* XXX: a guess */
    792 
    793 boolean_t
    794 uao_flush(uobj, start, stop, flags)
    795 	struct uvm_object *uobj;
    796 	voff_t start, stop;
    797 	int flags;
    798 {
    799 	struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
    800 	struct vm_page *pp, *ppnext;
    801 	boolean_t retval, by_list;
    802 	voff_t curoff;
    803 	UVMHIST_FUNC("uao_flush"); UVMHIST_CALLED(maphist);
    804 
    805 	curoff = 0;	/* XXX: shut up gcc */
    806 
    807 	retval = TRUE;	/* default to success */
    808 
    809 	if (flags & PGO_ALLPAGES) {
    810 		start = 0;
    811 		stop = aobj->u_pages << PAGE_SHIFT;
    812 		by_list = TRUE;		/* always go by the list */
    813 	} else {
    814 		start = trunc_page(start);
    815 		stop = round_page(stop);
    816 		if (stop > (aobj->u_pages << PAGE_SHIFT)) {
    817 			printf("uao_flush: strange, got an out of range "
    818 			    "flush (fixed)\n");
    819 			stop = aobj->u_pages << PAGE_SHIFT;
    820 		}
    821 		by_list = (uobj->uo_npages <=
    822 		    ((stop - start) >> PAGE_SHIFT) * UAO_HASH_PENALTY);
    823 	}
    824 
    825 	UVMHIST_LOG(maphist,
    826 	    " flush start=0x%lx, stop=0x%x, by_list=%d, flags=0x%x",
    827 	    start, stop, by_list, flags);
    828 
    829 	/*
    830 	 * Don't need to do any work here if we're not freeing
    831 	 * or deactivating pages.
    832 	 */
    833 	if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
    834 		UVMHIST_LOG(maphist,
    835 		    "<- done (no work to do)",0,0,0,0);
    836 		return (retval);
    837 	}
    838 
    839 	/*
    840 	 * now do it.  note: we must update ppnext in the body of loop or we
    841 	 * will get stuck.  we need to use ppnext because we may free "pp"
    842 	 * before doing the next loop.
    843 	 */
    844 
    845 	if (by_list) {
    846 		pp = uobj->memq.tqh_first;
    847 	} else {
    848 		curoff = start;
    849 		pp = uvm_pagelookup(uobj, curoff);
    850 	}
    851 
    852 	ppnext = NULL;	/* XXX: shut up gcc */
    853 	uvm_lock_pageq();	/* page queues locked */
    854 
    855 	/* locked: both page queues and uobj */
    856 	for ( ; (by_list && pp != NULL) ||
    857 	    (!by_list && curoff < stop) ; pp = ppnext) {
    858 		if (by_list) {
    859 			ppnext = pp->listq.tqe_next;
    860 
    861 			/* range check */
    862 			if (pp->offset < start || pp->offset >= stop)
    863 				continue;
    864 		} else {
    865 			curoff += PAGE_SIZE;
    866 			if (curoff < stop)
    867 				ppnext = uvm_pagelookup(uobj, curoff);
    868 
    869 			/* null check */
    870 			if (pp == NULL)
    871 				continue;
    872 		}
    873 
    874 		switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
    875 		/*
    876 		 * XXX In these first 3 cases, we always just
    877 		 * XXX deactivate the page.  We may want to
    878 		 * XXX handle the different cases more specifically
    879 		 * XXX in the future.
    880 		 */
    881 		case PGO_CLEANIT|PGO_FREE:
    882 		case PGO_CLEANIT|PGO_DEACTIVATE:
    883 		case PGO_DEACTIVATE:
    884  deactivate_it:
    885 			/* skip the page if it's loaned or wired */
    886 			if (pp->loan_count != 0 ||
    887 			    pp->wire_count != 0)
    888 				continue;
    889 
    890 			/* zap all mappings for the page. */
    891 			pmap_page_protect(pp, VM_PROT_NONE);
    892 
    893 			/* ...and deactivate the page. */
    894 			uvm_pagedeactivate(pp);
    895 
    896 			continue;
    897 
    898 		case PGO_FREE:
    899 			/*
    900 			 * If there are multiple references to
    901 			 * the object, just deactivate the page.
    902 			 */
    903 			if (uobj->uo_refs > 1)
    904 				goto deactivate_it;
    905 
    906 			/* XXX skip the page if it's loaned or wired */
    907 			if (pp->loan_count != 0 ||
    908 			    pp->wire_count != 0)
    909 				continue;
    910 
    911 			/*
    912 			 * mark the page as released if its busy.
    913 			 */
    914 			if (pp->flags & PG_BUSY) {
    915 				pp->flags |= PG_RELEASED;
    916 				continue;
    917 			}
    918 
    919 			/* zap all mappings for the page. */
    920 			pmap_page_protect(pp, VM_PROT_NONE);
    921 
    922 			uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
    923 			uvm_pagefree(pp);
    924 
    925 			continue;
    926 
    927 		default:
    928 			panic("uao_flush: weird flags");
    929 		}
    930 #ifdef DIAGNOSTIC
    931 		panic("uao_flush: unreachable code");
    932 #endif
    933 	}
    934 
    935 	uvm_unlock_pageq();
    936 
    937 	UVMHIST_LOG(maphist,
    938 	    "<- done, rv=%d",retval,0,0,0);
    939 	return (retval);
    940 }
    941 
    942 /*
    943  * uao_get: fetch me a page
    944  *
    945  * we have three cases:
    946  * 1: page is resident     -> just return the page.
    947  * 2: page is zero-fill    -> allocate a new page and zero it.
    948  * 3: page is swapped out  -> fetch the page from swap.
    949  *
    950  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
    951  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
    952  * then we will need to return VM_PAGER_UNLOCK.
    953  *
    954  * => prefer map unlocked (not required)
    955  * => object must be locked!  we will _unlock_ it before starting any I/O.
    956  * => flags: PGO_ALLPAGES: get all of the pages
    957  *           PGO_LOCKED: fault data structures are locked
    958  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
    959  * => NOTE: caller must check for released pages!!
    960  */
    961 static int
    962 uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
    963 	struct uvm_object *uobj;
    964 	voff_t offset;
    965 	struct vm_page **pps;
    966 	int *npagesp;
    967 	int centeridx, advice, flags;
    968 	vm_prot_t access_type;
    969 {
    970 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
    971 	voff_t current_offset;
    972 	vm_page_t ptmp;
    973 	int lcv, gotpages, maxpages, swslot, rv, pageidx;
    974 	boolean_t done;
    975 	UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
    976 
    977 	UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d",
    978 		    aobj, offset, flags,0);
    979 
    980 	/*
    981  	 * get number of pages
    982  	 */
    983 	maxpages = *npagesp;
    984 
    985 	/*
    986  	 * step 1: handled the case where fault data structures are locked.
    987  	 */
    988 
    989 	if (flags & PGO_LOCKED) {
    990 		/*
    991  		 * step 1a: get pages that are already resident.   only do
    992 		 * this if the data structures are locked (i.e. the first
    993 		 * time through).
    994  		 */
    995 
    996 		done = TRUE;	/* be optimistic */
    997 		gotpages = 0;	/* # of pages we got so far */
    998 
    999 		for (lcv = 0, current_offset = offset ; lcv < maxpages ;
   1000 		    lcv++, current_offset += PAGE_SIZE) {
   1001 			/* do we care about this page?  if not, skip it */
   1002 			if (pps[lcv] == PGO_DONTCARE)
   1003 				continue;
   1004 
   1005 			ptmp = uvm_pagelookup(uobj, current_offset);
   1006 
   1007 			/*
   1008  			 * if page is new, attempt to allocate the page,
   1009 			 * zero-fill'd.
   1010  			 */
   1011 			if (ptmp == NULL && uao_find_swslot(aobj,
   1012 			    current_offset >> PAGE_SHIFT) == 0) {
   1013 				ptmp = uvm_pagealloc(uobj, current_offset,
   1014 				    NULL, UVM_PGA_ZERO);
   1015 				if (ptmp) {
   1016 					/* new page */
   1017 					ptmp->flags &= ~(PG_BUSY|PG_FAKE);
   1018 					ptmp->pqflags |= PQ_AOBJ;
   1019 					UVM_PAGE_OWN(ptmp, NULL);
   1020 				}
   1021 			}
   1022 
   1023 			/*
   1024 			 * to be useful must get a non-busy, non-released page
   1025 			 */
   1026 			if (ptmp == NULL ||
   1027 			    (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
   1028 				if (lcv == centeridx ||
   1029 				    (flags & PGO_ALLPAGES) != 0)
   1030 					/* need to do a wait or I/O! */
   1031 					done = FALSE;
   1032 					continue;
   1033 			}
   1034 
   1035 			/*
   1036 			 * useful page: busy/lock it and plug it in our
   1037 			 * result array
   1038 			 */
   1039 			/* caller must un-busy this page */
   1040 			ptmp->flags |= PG_BUSY;
   1041 			UVM_PAGE_OWN(ptmp, "uao_get1");
   1042 			pps[lcv] = ptmp;
   1043 			gotpages++;
   1044 
   1045 		}	/* "for" lcv loop */
   1046 
   1047 		/*
   1048  		 * step 1b: now we've either done everything needed or we
   1049 		 * to unlock and do some waiting or I/O.
   1050  		 */
   1051 
   1052 		UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
   1053 
   1054 		*npagesp = gotpages;
   1055 		if (done)
   1056 			/* bingo! */
   1057 			return(VM_PAGER_OK);
   1058 		else
   1059 			/* EEK!   Need to unlock and I/O */
   1060 			return(VM_PAGER_UNLOCK);
   1061 	}
   1062 
   1063 	/*
   1064  	 * step 2: get non-resident or busy pages.
   1065  	 * object is locked.   data structures are unlocked.
   1066  	 */
   1067 
   1068 	for (lcv = 0, current_offset = offset ; lcv < maxpages ;
   1069 	    lcv++, current_offset += PAGE_SIZE) {
   1070 
   1071 		/*
   1072 		 * - skip over pages we've already gotten or don't want
   1073 		 * - skip over pages we don't _have_ to get
   1074 		 */
   1075 
   1076 		if (pps[lcv] != NULL ||
   1077 		    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
   1078 			continue;
   1079 
   1080 		pageidx = current_offset >> PAGE_SHIFT;
   1081 
   1082 		/*
   1083  		 * we have yet to locate the current page (pps[lcv]).   we
   1084 		 * first look for a page that is already at the current offset.
   1085 		 * if we find a page, we check to see if it is busy or
   1086 		 * released.  if that is the case, then we sleep on the page
   1087 		 * until it is no longer busy or released and repeat the lookup.
   1088 		 * if the page we found is neither busy nor released, then we
   1089 		 * busy it (so we own it) and plug it into pps[lcv].   this
   1090 		 * 'break's the following while loop and indicates we are
   1091 		 * ready to move on to the next page in the "lcv" loop above.
   1092  		 *
   1093  		 * if we exit the while loop with pps[lcv] still set to NULL,
   1094 		 * then it means that we allocated a new busy/fake/clean page
   1095 		 * ptmp in the object and we need to do I/O to fill in the data.
   1096  		 */
   1097 
   1098 		/* top of "pps" while loop */
   1099 		while (pps[lcv] == NULL) {
   1100 			/* look for a resident page */
   1101 			ptmp = uvm_pagelookup(uobj, current_offset);
   1102 
   1103 			/* not resident?   allocate one now (if we can) */
   1104 			if (ptmp == NULL) {
   1105 
   1106 				ptmp = uvm_pagealloc(uobj, current_offset,
   1107 				    NULL, 0);
   1108 
   1109 				/* out of RAM? */
   1110 				if (ptmp == NULL) {
   1111 					simple_unlock(&uobj->vmobjlock);
   1112 					UVMHIST_LOG(pdhist,
   1113 					    "sleeping, ptmp == NULL\n",0,0,0,0);
   1114 					uvm_wait("uao_getpage");
   1115 					simple_lock(&uobj->vmobjlock);
   1116 					/* goto top of pps while loop */
   1117 					continue;
   1118 				}
   1119 
   1120 				/*
   1121 				 * safe with PQ's unlocked: because we just
   1122 				 * alloc'd the page
   1123 				 */
   1124 				ptmp->pqflags |= PQ_AOBJ;
   1125 
   1126 				/*
   1127 				 * got new page ready for I/O.  break pps while
   1128 				 * loop.  pps[lcv] is still NULL.
   1129 				 */
   1130 				break;
   1131 			}
   1132 
   1133 			/* page is there, see if we need to wait on it */
   1134 			if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
   1135 				ptmp->flags |= PG_WANTED;
   1136 				UVMHIST_LOG(pdhist,
   1137 				    "sleeping, ptmp->flags 0x%x\n",
   1138 				    ptmp->flags,0,0,0);
   1139 				UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock,
   1140 				    FALSE, "uao_get", 0);
   1141 				simple_lock(&uobj->vmobjlock);
   1142 				continue;	/* goto top of pps while loop */
   1143 			}
   1144 
   1145 			/*
   1146  			 * if we get here then the page has become resident and
   1147 			 * unbusy between steps 1 and 2.  we busy it now (so we
   1148 			 * own it) and set pps[lcv] (so that we exit the while
   1149 			 * loop).
   1150  			 */
   1151 			/* we own it, caller must un-busy */
   1152 			ptmp->flags |= PG_BUSY;
   1153 			UVM_PAGE_OWN(ptmp, "uao_get2");
   1154 			pps[lcv] = ptmp;
   1155 		}
   1156 
   1157 		/*
   1158  		 * if we own the valid page at the correct offset, pps[lcv] will
   1159  		 * point to it.   nothing more to do except go to the next page.
   1160  		 */
   1161 		if (pps[lcv])
   1162 			continue;			/* next lcv */
   1163 
   1164 		/*
   1165  		 * we have a "fake/busy/clean" page that we just allocated.
   1166  		 * do the needed "i/o", either reading from swap or zeroing.
   1167  		 */
   1168 		swslot = uao_find_swslot(aobj, pageidx);
   1169 
   1170 		/*
   1171  		 * just zero the page if there's nothing in swap.
   1172  		 */
   1173 		if (swslot == 0)
   1174 		{
   1175 			/*
   1176 			 * page hasn't existed before, just zero it.
   1177 			 */
   1178 			uvm_pagezero(ptmp);
   1179 		} else {
   1180 			UVMHIST_LOG(pdhist, "pagein from swslot %d",
   1181 			     swslot, 0,0,0);
   1182 
   1183 			/*
   1184 			 * page in the swapped-out page.
   1185 			 * unlock object for i/o, relock when done.
   1186 			 */
   1187 			simple_unlock(&uobj->vmobjlock);
   1188 			rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
   1189 			simple_lock(&uobj->vmobjlock);
   1190 
   1191 			/*
   1192 			 * I/O done.  check for errors.
   1193 			 */
   1194 			if (rv != VM_PAGER_OK)
   1195 			{
   1196 				UVMHIST_LOG(pdhist, "<- done (error=%d)",
   1197 				    rv,0,0,0);
   1198 				if (ptmp->flags & PG_WANTED)
   1199 					wakeup(ptmp);
   1200 
   1201 				/*
   1202 				 * remove the swap slot from the aobj
   1203 				 * and mark the aobj as having no real slot.
   1204 				 * don't free the swap slot, thus preventing
   1205 				 * it from being used again.
   1206 				 */
   1207 				swslot = uao_set_swslot(&aobj->u_obj, pageidx,
   1208 							SWSLOT_BAD);
   1209 				uvm_swap_markbad(swslot, 1);
   1210 
   1211 				ptmp->flags &= ~(PG_WANTED|PG_BUSY);
   1212 				UVM_PAGE_OWN(ptmp, NULL);
   1213 				uvm_lock_pageq();
   1214 				uvm_pagefree(ptmp);
   1215 				uvm_unlock_pageq();
   1216 
   1217 				simple_unlock(&uobj->vmobjlock);
   1218 				return (rv);
   1219 			}
   1220 		}
   1221 
   1222 		/*
   1223  		 * we got the page!   clear the fake flag (indicates valid
   1224 		 * data now in page) and plug into our result array.   note
   1225 		 * that page is still busy.
   1226  		 *
   1227  		 * it is the callers job to:
   1228  		 * => check if the page is released
   1229  		 * => unbusy the page
   1230  		 * => activate the page
   1231  		 */
   1232 
   1233 		ptmp->flags &= ~PG_FAKE;		/* data is valid ... */
   1234 		pmap_clear_modify(ptmp);		/* ... and clean */
   1235 		pps[lcv] = ptmp;
   1236 
   1237 	}	/* lcv loop */
   1238 
   1239 	/*
   1240  	 * finally, unlock object and return.
   1241  	 */
   1242 
   1243 	simple_unlock(&uobj->vmobjlock);
   1244 	UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
   1245 	return(VM_PAGER_OK);
   1246 }
   1247 
   1248 /*
   1249  * uao_releasepg: handle released page in an aobj
   1250  *
   1251  * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
   1252  *      to dispose of.
   1253  * => caller must handle PG_WANTED case
   1254  * => called with page's object locked, pageq's unlocked
   1255  * => returns TRUE if page's object is still alive, FALSE if we
   1256  *      killed the page's object.    if we return TRUE, then we
   1257  *      return with the object locked.
   1258  * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
   1259  *                              with the page queues locked [for pagedaemon]
   1260  * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
   1261  * => we kill the aobj if it is not referenced and we are suppose to
   1262  *      kill it ("KILLME").
   1263  */
   1264 static boolean_t
   1265 uao_releasepg(pg, nextpgp)
   1266 	struct vm_page *pg;
   1267 	struct vm_page **nextpgp;	/* OUT */
   1268 {
   1269 	struct uvm_aobj *aobj = (struct uvm_aobj *) pg->uobject;
   1270 
   1271 #ifdef DIAGNOSTIC
   1272 	if ((pg->flags & PG_RELEASED) == 0)
   1273 		panic("uao_releasepg: page not released!");
   1274 #endif
   1275 
   1276 	/*
   1277  	 * dispose of the page [caller handles PG_WANTED] and swap slot.
   1278  	 */
   1279 	pmap_page_protect(pg, VM_PROT_NONE);
   1280 	uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
   1281 	uvm_lock_pageq();
   1282 	if (nextpgp)
   1283 		*nextpgp = pg->pageq.tqe_next;	/* next page for daemon */
   1284 	uvm_pagefree(pg);
   1285 	if (!nextpgp)
   1286 		uvm_unlock_pageq();		/* keep locked for daemon */
   1287 
   1288 	/*
   1289  	 * if we're not killing the object, we're done.
   1290  	 */
   1291 	if ((aobj->u_flags & UAO_FLAG_KILLME) == 0)
   1292 		return TRUE;
   1293 
   1294 #ifdef DIAGNOSTIC
   1295 	if (aobj->u_obj.uo_refs)
   1296 		panic("uvm_km_releasepg: kill flag set on referenced object!");
   1297 #endif
   1298 
   1299 	/*
   1300  	 * if there are still pages in the object, we're done for now.
   1301  	 */
   1302 	if (aobj->u_obj.uo_npages != 0)
   1303 		return TRUE;
   1304 
   1305 #ifdef DIAGNOSTIC
   1306 	if (TAILQ_FIRST(&aobj->u_obj.memq))
   1307 		panic("uvn_releasepg: pages in object with npages == 0");
   1308 #endif
   1309 
   1310 	/*
   1311  	 * finally, free the rest.
   1312  	 */
   1313 	uao_free(aobj);
   1314 
   1315 	return FALSE;
   1316 }
   1317 
   1318 
   1319 /*
   1320  * uao_dropswap:  release any swap resources from this aobj page.
   1321  *
   1322  * => aobj must be locked or have a reference count of 0.
   1323  */
   1324 
   1325 void
   1326 uao_dropswap(uobj, pageidx)
   1327 	struct uvm_object *uobj;
   1328 	int pageidx;
   1329 {
   1330 	int slot;
   1331 
   1332 	slot = uao_set_swslot(uobj, pageidx, 0);
   1333 	if (slot) {
   1334 		uvm_swap_free(slot, 1);
   1335 	}
   1336 }
   1337 
   1338 
   1339 /*
   1340  * page in every page in every aobj that is paged-out to a range of swslots.
   1341  *
   1342  * => nothing should be locked.
   1343  * => returns TRUE if pagein was aborted due to lack of memory.
   1344  */
   1345 boolean_t
   1346 uao_swap_off(startslot, endslot)
   1347 	int startslot, endslot;
   1348 {
   1349 	struct uvm_aobj *aobj, *nextaobj;
   1350 
   1351 	/*
   1352 	 * walk the list of all aobjs.
   1353 	 */
   1354 
   1355 restart:
   1356 	simple_lock(&uao_list_lock);
   1357 
   1358 	for (aobj = LIST_FIRST(&uao_list);
   1359 	     aobj != NULL;
   1360 	     aobj = nextaobj) {
   1361 		boolean_t rv;
   1362 
   1363 		/*
   1364 		 * try to get the object lock,
   1365 		 * start all over if we fail.
   1366 		 * most of the time we'll get the aobj lock,
   1367 		 * so this should be a rare case.
   1368 		 */
   1369 		if (!simple_lock_try(&aobj->u_obj.vmobjlock)) {
   1370 			simple_unlock(&uao_list_lock);
   1371 			goto restart;
   1372 		}
   1373 
   1374 		/*
   1375 		 * add a ref to the aobj so it doesn't disappear
   1376 		 * while we're working.
   1377 		 */
   1378 		uao_reference_locked(&aobj->u_obj);
   1379 
   1380 		/*
   1381 		 * now it's safe to unlock the uao list.
   1382 		 */
   1383 		simple_unlock(&uao_list_lock);
   1384 
   1385 		/*
   1386 		 * page in any pages in the swslot range.
   1387 		 * if there's an error, abort and return the error.
   1388 		 */
   1389 		rv = uao_pagein(aobj, startslot, endslot);
   1390 		if (rv) {
   1391 			uao_detach_locked(&aobj->u_obj);
   1392 			return rv;
   1393 		}
   1394 
   1395 		/*
   1396 		 * we're done with this aobj.
   1397 		 * relock the list and drop our ref on the aobj.
   1398 		 */
   1399 		simple_lock(&uao_list_lock);
   1400 		nextaobj = LIST_NEXT(aobj, u_list);
   1401 		uao_detach_locked(&aobj->u_obj);
   1402 	}
   1403 
   1404 	/*
   1405 	 * done with traversal, unlock the list
   1406 	 */
   1407 	simple_unlock(&uao_list_lock);
   1408 	return FALSE;
   1409 }
   1410 
   1411 
   1412 /*
   1413  * page in any pages from aobj in the given range.
   1414  *
   1415  * => aobj must be locked and is returned locked.
   1416  * => returns TRUE if pagein was aborted due to lack of memory.
   1417  */
   1418 static boolean_t
   1419 uao_pagein(aobj, startslot, endslot)
   1420 	struct uvm_aobj *aobj;
   1421 	int startslot, endslot;
   1422 {
   1423 	boolean_t rv;
   1424 
   1425 	if (UAO_USES_SWHASH(aobj)) {
   1426 		struct uao_swhash_elt *elt;
   1427 		int bucket;
   1428 
   1429 restart:
   1430 		for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
   1431 			for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
   1432 			     elt != NULL;
   1433 			     elt = LIST_NEXT(elt, list)) {
   1434 				int i;
   1435 
   1436 				for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
   1437 					int slot = elt->slots[i];
   1438 
   1439 					/*
   1440 					 * if the slot isn't in range, skip it.
   1441 					 */
   1442 					if (slot < startslot ||
   1443 					    slot >= endslot) {
   1444 						continue;
   1445 					}
   1446 
   1447 					/*
   1448 					 * process the page,
   1449 					 * the start over on this object
   1450 					 * since the swhash elt
   1451 					 * may have been freed.
   1452 					 */
   1453 					rv = uao_pagein_page(aobj,
   1454 					  UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
   1455 					if (rv) {
   1456 						return rv;
   1457 					}
   1458 					goto restart;
   1459 				}
   1460 			}
   1461 		}
   1462 	} else {
   1463 		int i;
   1464 
   1465 		for (i = 0; i < aobj->u_pages; i++) {
   1466 			int slot = aobj->u_swslots[i];
   1467 
   1468 			/*
   1469 			 * if the slot isn't in range, skip it
   1470 			 */
   1471 			if (slot < startslot || slot >= endslot) {
   1472 				continue;
   1473 			}
   1474 
   1475 			/*
   1476 			 * process the page.
   1477 			 */
   1478 			rv = uao_pagein_page(aobj, i);
   1479 			if (rv) {
   1480 				return rv;
   1481 			}
   1482 		}
   1483 	}
   1484 
   1485 	return FALSE;
   1486 }
   1487 
   1488 /*
   1489  * page in a page from an aobj.  used for swap_off.
   1490  * returns TRUE if pagein was aborted due to lack of memory.
   1491  *
   1492  * => aobj must be locked and is returned locked.
   1493  */
   1494 static boolean_t
   1495 uao_pagein_page(aobj, pageidx)
   1496 	struct uvm_aobj *aobj;
   1497 	int pageidx;
   1498 {
   1499 	struct vm_page *pg;
   1500 	int rv, slot, npages;
   1501 	UVMHIST_FUNC("uao_pagein_page");  UVMHIST_CALLED(pdhist);
   1502 
   1503 	pg = NULL;
   1504 	npages = 1;
   1505 	/* locked: aobj */
   1506 	rv = uao_get(&aobj->u_obj, pageidx << PAGE_SHIFT,
   1507 		     &pg, &npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, 0);
   1508 	/* unlocked: aobj */
   1509 
   1510 	/*
   1511 	 * relock and finish up.
   1512 	 */
   1513 	simple_lock(&aobj->u_obj.vmobjlock);
   1514 
   1515 	switch (rv) {
   1516 	case VM_PAGER_OK:
   1517 		break;
   1518 
   1519 	case VM_PAGER_ERROR:
   1520 	case VM_PAGER_REFAULT:
   1521 		/*
   1522 		 * nothing more to do on errors.
   1523 		 * VM_PAGER_REFAULT can only mean that the anon was freed,
   1524 		 * so again there's nothing to do.
   1525 		 */
   1526 		return FALSE;
   1527 
   1528 #ifdef DIAGNOSTIC
   1529 	default:
   1530 		panic("uao_pagein_page: uao_get -> %d\n", rv);
   1531 #endif
   1532 	}
   1533 
   1534 #ifdef DIAGNOSTIC
   1535 	/*
   1536 	 * this should never happen, since we have a reference on the aobj.
   1537 	 */
   1538 	if (pg->flags & PG_RELEASED) {
   1539 		panic("uao_pagein_page: found PG_RELEASED page?\n");
   1540 	}
   1541 #endif
   1542 
   1543 	/*
   1544 	 * ok, we've got the page now.
   1545 	 * mark it as dirty, clear its swslot and un-busy it.
   1546 	 */
   1547 	slot = uao_set_swslot(&aobj->u_obj, pageidx, 0);
   1548 	uvm_swap_free(slot, 1);
   1549 	pg->flags &= ~(PG_BUSY|PG_CLEAN|PG_FAKE);
   1550 	UVM_PAGE_OWN(pg, NULL);
   1551 
   1552 	/*
   1553 	 * deactivate the page (to put it on a page queue).
   1554 	 */
   1555 	pmap_clear_reference(pg);
   1556 	pmap_page_protect(pg, VM_PROT_NONE);
   1557 	uvm_lock_pageq();
   1558 	uvm_pagedeactivate(pg);
   1559 	uvm_unlock_pageq();
   1560 
   1561 	return FALSE;
   1562 }
   1563