Home | History | Annotate | Line # | Download | only in uvm
uvm_page.c revision 1.164
      1 /*	$NetBSD: uvm_page.c,v 1.164 2010/11/14 15:06:34 uebayasi Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * Copyright (c) 1997 Charles D. Cranor and Washington University.
     31  * Copyright (c) 1991, 1993, The Regents of the University of California.
     32  *
     33  * All rights reserved.
     34  *
     35  * This code is derived from software contributed to Berkeley by
     36  * The Mach Operating System project at Carnegie-Mellon University.
     37  *
     38  * Redistribution and use in source and binary forms, with or without
     39  * modification, are permitted provided that the following conditions
     40  * are met:
     41  * 1. Redistributions of source code must retain the above copyright
     42  *    notice, this list of conditions and the following disclaimer.
     43  * 2. Redistributions in binary form must reproduce the above copyright
     44  *    notice, this list of conditions and the following disclaimer in the
     45  *    documentation and/or other materials provided with the distribution.
     46  * 3. All advertising materials mentioning features or use of this software
     47  *    must display the following acknowledgement:
     48  *	This product includes software developed by Charles D. Cranor,
     49  *      Washington University, the University of California, Berkeley and
     50  *      its contributors.
     51  * 4. Neither the name of the University nor the names of its contributors
     52  *    may be used to endorse or promote products derived from this software
     53  *    without specific prior written permission.
     54  *
     55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     65  * SUCH DAMAGE.
     66  *
     67  *	@(#)vm_page.c   8.3 (Berkeley) 3/21/94
     68  * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
     69  *
     70  *
     71  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
     72  * All rights reserved.
     73  *
     74  * Permission to use, copy, modify and distribute this software and
     75  * its documentation is hereby granted, provided that both the copyright
     76  * notice and this permission notice appear in all copies of the
     77  * software, derivative works or modified versions, and any portions
     78  * thereof, and that both notices appear in supporting documentation.
     79  *
     80  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     81  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     82  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     83  *
     84  * Carnegie Mellon requests users of this software to return to
     85  *
     86  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     87  *  School of Computer Science
     88  *  Carnegie Mellon University
     89  *  Pittsburgh PA 15213-3890
     90  *
     91  * any improvements or extensions that they make and grant Carnegie the
     92  * rights to redistribute these changes.
     93  */
     94 
     95 /*
     96  * uvm_page.c: page ops.
     97  */
     98 
     99 #include <sys/cdefs.h>
    100 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.164 2010/11/14 15:06:34 uebayasi Exp $");
    101 
    102 #include "opt_ddb.h"
    103 #include "opt_uvmhist.h"
    104 #include "opt_readahead.h"
    105 
    106 #include <sys/param.h>
    107 #include <sys/systm.h>
    108 #include <sys/malloc.h>
    109 #include <sys/sched.h>
    110 #include <sys/kernel.h>
    111 #include <sys/vnode.h>
    112 #include <sys/proc.h>
    113 #include <sys/atomic.h>
    114 #include <sys/cpu.h>
    115 
    116 #include <uvm/uvm.h>
    117 #include <uvm/uvm_ddb.h>
    118 #include <uvm/uvm_pdpolicy.h>
    119 
    120 /*
    121  * global vars... XXXCDC: move to uvm. structure.
    122  */
    123 
    124 /*
    125  * physical memory config is stored in vm_physmem.
    126  */
    127 
    128 SIMPLEQ_HEAD(vm_physseg_freelist, vm_physseg);
    129 
    130 struct vm_physseg *vm_physmem_ptrs[VM_PHYSSEG_MAX];
    131 int vm_nphysmem = 0;
    132 static struct vm_physseg vm_physmem_store[VM_PHYSSEG_MAX];
    133 static struct vm_physseg_freelist vm_physmem_freelist =
    134     SIMPLEQ_HEAD_INITIALIZER(vm_physmem_freelist);
    135 
    136 /*
    137  * Some supported CPUs in a given architecture don't support all
    138  * of the things necessary to do idle page zero'ing efficiently.
    139  * We therefore provide a way to enable it from machdep code here.
    140  */
    141 bool vm_page_zero_enable = false;
    142 
    143 /*
    144  * number of pages per-CPU to reserve for the kernel.
    145  */
    146 int vm_page_reserve_kernel = 5;
    147 
    148 /*
    149  * physical memory size;
    150  */
    151 int physmem;
    152 
    153 /*
    154  * local variables
    155  */
    156 
    157 /*
    158  * these variables record the values returned by vm_page_bootstrap,
    159  * for debugging purposes.  The implementation of uvm_pageboot_alloc
    160  * and pmap_startup here also uses them internally.
    161  */
    162 
    163 static vaddr_t      virtual_space_start;
    164 static vaddr_t      virtual_space_end;
    165 
    166 /*
    167  * we allocate an initial number of page colors in uvm_page_init(),
    168  * and remember them.  We may re-color pages as cache sizes are
    169  * discovered during the autoconfiguration phase.  But we can never
    170  * free the initial set of buckets, since they are allocated using
    171  * uvm_pageboot_alloc().
    172  */
    173 
    174 static bool have_recolored_pages /* = false */;
    175 
    176 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
    177 
    178 #ifdef DEBUG
    179 vaddr_t uvm_zerocheckkva;
    180 #endif /* DEBUG */
    181 
    182 /*
    183  * local prototypes
    184  */
    185 
    186 static void uvm_pageinsert(struct uvm_object *, struct vm_page *);
    187 static void uvm_pageremove(struct uvm_object *, struct vm_page *);
    188 static struct vm_physseg *uvm_physseg_alloc(
    189     struct vm_physseg_freelist * const, struct vm_physseg **, int,
    190     const paddr_t, const paddr_t);
    191 static void uvm_physseg_free(struct vm_physseg_freelist *,
    192     struct vm_physseg **, struct vm_physseg *);
    193 static void uvm_physseg_init(void);
    194 static void uvm_physseg_insert(struct vm_physseg *,
    195     struct vm_physseg **, int);
    196 static void uvm_physseg_remove(struct vm_physseg **, struct vm_physseg *);
    197 
    198 /*
    199  * per-object tree of pages
    200  */
    201 
    202 static signed int
    203 uvm_page_compare_nodes(void *ctx, const void *n1, const void *n2)
    204 {
    205 	const struct vm_page *pg1 = n1;
    206 	const struct vm_page *pg2 = n2;
    207 	const voff_t a = pg1->offset;
    208 	const voff_t b = pg2->offset;
    209 
    210 	if (a < b)
    211 		return -1;
    212 	if (a > b)
    213 		return 1;
    214 	return 0;
    215 }
    216 
    217 static signed int
    218 uvm_page_compare_key(void *ctx, const void *n, const void *key)
    219 {
    220 	const struct vm_page *pg = n;
    221 	const voff_t a = pg->offset;
    222 	const voff_t b = *(const voff_t *)key;
    223 
    224 	if (a < b)
    225 		return -1;
    226 	if (a > b)
    227 		return 1;
    228 	return 0;
    229 }
    230 
    231 const rb_tree_ops_t uvm_page_tree_ops = {
    232 	.rbto_compare_nodes = uvm_page_compare_nodes,
    233 	.rbto_compare_key = uvm_page_compare_key,
    234 	.rbto_node_offset = offsetof(struct vm_page, rb_node),
    235 	.rbto_context = NULL
    236 };
    237 
    238 /*
    239  * inline functions
    240  */
    241 
    242 /*
    243  * uvm_pageinsert: insert a page in the object.
    244  *
    245  * => caller must lock object
    246  * => caller must lock page queues
    247  * => call should have already set pg's object and offset pointers
    248  *    and bumped the version counter
    249  */
    250 
    251 static inline void
    252 uvm_pageinsert_list(struct uvm_object *uobj, struct vm_page *pg,
    253     struct vm_page *where)
    254 {
    255 
    256 	KASSERT(uobj == pg->uobject);
    257 	KASSERT(mutex_owned(&uobj->vmobjlock));
    258 	KASSERT((pg->flags & PG_TABLED) == 0);
    259 	KASSERT(where == NULL || (where->flags & PG_TABLED));
    260 	KASSERT(where == NULL || (where->uobject == uobj));
    261 
    262 	if (UVM_OBJ_IS_VNODE(uobj)) {
    263 		if (uobj->uo_npages == 0) {
    264 			struct vnode *vp = (struct vnode *)uobj;
    265 
    266 			vholdl(vp);
    267 		}
    268 		if (UVM_OBJ_IS_VTEXT(uobj)) {
    269 			atomic_inc_uint(&uvmexp.execpages);
    270 		} else {
    271 			atomic_inc_uint(&uvmexp.filepages);
    272 		}
    273 	} else if (UVM_OBJ_IS_AOBJ(uobj)) {
    274 		atomic_inc_uint(&uvmexp.anonpages);
    275 	}
    276 
    277 	if (where)
    278 		TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue);
    279 	else
    280 		TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
    281 	pg->flags |= PG_TABLED;
    282 	uobj->uo_npages++;
    283 }
    284 
    285 
    286 static inline void
    287 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg)
    288 {
    289 	struct vm_page *ret;
    290 
    291 	KASSERT(uobj == pg->uobject);
    292 	ret = rb_tree_insert_node(&uobj->rb_tree, pg);
    293 	KASSERT(ret == pg);
    294 }
    295 
    296 static inline void
    297 uvm_pageinsert(struct uvm_object *uobj, struct vm_page *pg)
    298 {
    299 
    300 	KDASSERT(uobj != NULL);
    301 	uvm_pageinsert_tree(uobj, pg);
    302 	uvm_pageinsert_list(uobj, pg, NULL);
    303 }
    304 
    305 /*
    306  * uvm_page_remove: remove page from object.
    307  *
    308  * => caller must lock object
    309  * => caller must lock page queues
    310  */
    311 
    312 static inline void
    313 uvm_pageremove_list(struct uvm_object *uobj, struct vm_page *pg)
    314 {
    315 
    316 	KASSERT(uobj == pg->uobject);
    317 	KASSERT(mutex_owned(&uobj->vmobjlock));
    318 	KASSERT(pg->flags & PG_TABLED);
    319 
    320 	if (UVM_OBJ_IS_VNODE(uobj)) {
    321 		if (uobj->uo_npages == 1) {
    322 			struct vnode *vp = (struct vnode *)uobj;
    323 
    324 			holdrelel(vp);
    325 		}
    326 		if (UVM_OBJ_IS_VTEXT(uobj)) {
    327 			atomic_dec_uint(&uvmexp.execpages);
    328 		} else {
    329 			atomic_dec_uint(&uvmexp.filepages);
    330 		}
    331 	} else if (UVM_OBJ_IS_AOBJ(uobj)) {
    332 		atomic_dec_uint(&uvmexp.anonpages);
    333 	}
    334 
    335 	/* object should be locked */
    336 	uobj->uo_npages--;
    337 	TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
    338 	pg->flags &= ~PG_TABLED;
    339 	pg->uobject = NULL;
    340 }
    341 
    342 static inline void
    343 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg)
    344 {
    345 
    346 	KASSERT(uobj == pg->uobject);
    347 	rb_tree_remove_node(&uobj->rb_tree, pg);
    348 }
    349 
    350 static inline void
    351 uvm_pageremove(struct uvm_object *uobj, struct vm_page *pg)
    352 {
    353 
    354 	KDASSERT(uobj != NULL);
    355 	uvm_pageremove_tree(uobj, pg);
    356 	uvm_pageremove_list(uobj, pg);
    357 }
    358 
    359 static void
    360 uvm_page_init_buckets(struct pgfreelist *pgfl)
    361 {
    362 	int color, i;
    363 
    364 	for (color = 0; color < uvmexp.ncolors; color++) {
    365 		for (i = 0; i < PGFL_NQUEUES; i++) {
    366 			LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
    367 		}
    368 	}
    369 }
    370 
    371 /*
    372  * uvm_page_init: init the page system.   called from uvm_init().
    373  *
    374  * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
    375  */
    376 
    377 void
    378 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
    379 {
    380 	static struct uvm_cpu boot_cpu;
    381 	psize_t freepages, pagecount, bucketcount, n;
    382 	struct pgflbucket *bucketarray, *cpuarray;
    383 	struct vm_physseg *seg;
    384 	struct vm_page *pagearray;
    385 	int lcv;
    386 	u_int i;
    387 	paddr_t paddr;
    388 
    389 	KASSERT(ncpu <= 1);
    390 	CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *));
    391 
    392 	/*
    393 	 * init the page queues and page queue locks, except the free
    394 	 * list; we allocate that later (with the initial vm_page
    395 	 * structures).
    396 	 */
    397 
    398 	uvm.cpus[0] = &boot_cpu;
    399 	curcpu()->ci_data.cpu_uvm = &boot_cpu;
    400 	uvm_reclaim_init();
    401 	uvmpdpol_init();
    402 	mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE);
    403 	mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
    404 
    405 	/*
    406 	 * allocate vm_page structures.
    407 	 */
    408 
    409 	/*
    410 	 * sanity check:
    411 	 * before calling this function the MD code is expected to register
    412 	 * some free RAM with the uvm_page_physload() function.   our job
    413 	 * now is to allocate vm_page structures for this memory.
    414 	 */
    415 
    416 	if (vm_nphysmem == 0)
    417 		panic("uvm_page_bootstrap: no memory pre-allocated");
    418 
    419 	/*
    420 	 * first calculate the number of free pages...
    421 	 *
    422 	 * note that we use start/end rather than avail_start/avail_end.
    423 	 * this allows us to allocate extra vm_page structures in case we
    424 	 * want to return some memory to the pool after booting.
    425 	 */
    426 
    427 	freepages = 0;
    428 	for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) {
    429 		seg = VM_PHYSMEM_PTR(lcv);
    430 		freepages += (seg->end - seg->start);
    431 	}
    432 
    433 	/*
    434 	 * Let MD code initialize the number of colors, or default
    435 	 * to 1 color if MD code doesn't care.
    436 	 */
    437 	if (uvmexp.ncolors == 0)
    438 		uvmexp.ncolors = 1;
    439 	uvmexp.colormask = uvmexp.ncolors - 1;
    440 
    441 	/*
    442 	 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
    443 	 * use.   for each page of memory we use we need a vm_page structure.
    444 	 * thus, the total number of pages we can use is the total size of
    445 	 * the memory divided by the PAGE_SIZE plus the size of the vm_page
    446 	 * structure.   we add one to freepages as a fudge factor to avoid
    447 	 * truncation errors (since we can only allocate in terms of whole
    448 	 * pages).
    449 	 */
    450 
    451 	bucketcount = uvmexp.ncolors * VM_NFREELIST;
    452 	pagecount = ((freepages + 1) << PAGE_SHIFT) /
    453 	    (PAGE_SIZE + sizeof(struct vm_page));
    454 
    455 	bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
    456 	    sizeof(struct pgflbucket) * 2) + (pagecount *
    457 	    sizeof(struct vm_page)));
    458 	cpuarray = bucketarray + bucketcount;
    459 	pagearray = (struct vm_page *)(bucketarray + bucketcount * 2);
    460 
    461 	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
    462 		uvm.page_free[lcv].pgfl_buckets =
    463 		    (bucketarray + (lcv * uvmexp.ncolors));
    464 		uvm_page_init_buckets(&uvm.page_free[lcv]);
    465 		uvm.cpus[0]->page_free[lcv].pgfl_buckets =
    466 		    (cpuarray + (lcv * uvmexp.ncolors));
    467 		uvm_page_init_buckets(&uvm.cpus[0]->page_free[lcv]);
    468 	}
    469 	memset(pagearray, 0, pagecount * sizeof(struct vm_page));
    470 
    471 	/*
    472 	 * init the vm_page structures and put them in the correct place.
    473 	 */
    474 
    475 	for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) {
    476 		seg = VM_PHYSMEM_PTR(lcv);
    477 		n = seg->end - seg->start;
    478 
    479 		/* set up page array pointers */
    480 		seg->pgs = pagearray;
    481 		pagearray += n;
    482 		pagecount -= n;
    483 		seg->lastpg = seg->pgs + n;
    484 
    485 		/* init and free vm_pages (we've already zeroed them) */
    486 		paddr = ctob(seg->start);
    487 		for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
    488 			seg->pgs[i].phys_addr = paddr;
    489 #ifdef __HAVE_VM_PAGE_MD
    490 			VM_MDPAGE_INIT(&seg->pgs[i]);
    491 #endif
    492 			if (atop(paddr) >= seg->avail_start &&
    493 			    atop(paddr) <= seg->avail_end) {
    494 				uvmexp.npages++;
    495 				/* add page to free pool */
    496 				uvm_pagefree(&seg->pgs[i]);
    497 			}
    498 		}
    499 	}
    500 
    501 	/*
    502 	 * pass up the values of virtual_space_start and
    503 	 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
    504 	 * layers of the VM.
    505 	 */
    506 
    507 	*kvm_startp = round_page(virtual_space_start);
    508 	*kvm_endp = trunc_page(virtual_space_end);
    509 #ifdef DEBUG
    510 	/*
    511 	 * steal kva for uvm_pagezerocheck().
    512 	 */
    513 	uvm_zerocheckkva = *kvm_startp;
    514 	*kvm_startp += PAGE_SIZE;
    515 #endif /* DEBUG */
    516 
    517 	/*
    518 	 * init various thresholds.
    519 	 */
    520 
    521 	uvmexp.reserve_pagedaemon = 1;
    522 	uvmexp.reserve_kernel = vm_page_reserve_kernel;
    523 
    524 	/*
    525 	 * determine if we should zero pages in the idle loop.
    526 	 */
    527 
    528 	uvm.cpus[0]->page_idle_zero = vm_page_zero_enable;
    529 
    530 	/*
    531 	 * done!
    532 	 */
    533 
    534 	uvm.page_init_done = true;
    535 }
    536 
    537 /*
    538  * uvm_setpagesize: set the page size
    539  *
    540  * => sets page_shift and page_mask from uvmexp.pagesize.
    541  */
    542 
    543 void
    544 uvm_setpagesize(void)
    545 {
    546 
    547 	/*
    548 	 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
    549 	 * to be a constant (indicated by being a non-zero value).
    550 	 */
    551 	if (uvmexp.pagesize == 0) {
    552 		if (PAGE_SIZE == 0)
    553 			panic("uvm_setpagesize: uvmexp.pagesize not set");
    554 		uvmexp.pagesize = PAGE_SIZE;
    555 	}
    556 	uvmexp.pagemask = uvmexp.pagesize - 1;
    557 	if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
    558 		panic("uvm_setpagesize: page size not a power of two");
    559 	for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
    560 		if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
    561 			break;
    562 }
    563 
    564 /*
    565  * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
    566  */
    567 
    568 vaddr_t
    569 uvm_pageboot_alloc(vsize_t size)
    570 {
    571 	static bool initialized = false;
    572 	vaddr_t addr;
    573 #if !defined(PMAP_STEAL_MEMORY)
    574 	vaddr_t vaddr;
    575 	paddr_t paddr;
    576 #endif
    577 
    578 	/*
    579 	 * on first call to this function, initialize ourselves.
    580 	 */
    581 	if (initialized == false) {
    582 		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
    583 
    584 		/* round it the way we like it */
    585 		virtual_space_start = round_page(virtual_space_start);
    586 		virtual_space_end = trunc_page(virtual_space_end);
    587 
    588 		initialized = true;
    589 	}
    590 
    591 	/* round to page size */
    592 	size = round_page(size);
    593 
    594 #if defined(PMAP_STEAL_MEMORY)
    595 
    596 	/*
    597 	 * defer bootstrap allocation to MD code (it may want to allocate
    598 	 * from a direct-mapped segment).  pmap_steal_memory should adjust
    599 	 * virtual_space_start/virtual_space_end if necessary.
    600 	 */
    601 
    602 	addr = pmap_steal_memory(size, &virtual_space_start,
    603 	    &virtual_space_end);
    604 
    605 	return(addr);
    606 
    607 #else /* !PMAP_STEAL_MEMORY */
    608 
    609 	/*
    610 	 * allocate virtual memory for this request
    611 	 */
    612 	if (virtual_space_start == virtual_space_end ||
    613 	    (virtual_space_end - virtual_space_start) < size)
    614 		panic("uvm_pageboot_alloc: out of virtual space");
    615 
    616 	addr = virtual_space_start;
    617 
    618 #ifdef PMAP_GROWKERNEL
    619 	/*
    620 	 * If the kernel pmap can't map the requested space,
    621 	 * then allocate more resources for it.
    622 	 */
    623 	if (uvm_maxkaddr < (addr + size)) {
    624 		uvm_maxkaddr = pmap_growkernel(addr + size);
    625 		if (uvm_maxkaddr < (addr + size))
    626 			panic("uvm_pageboot_alloc: pmap_growkernel() failed");
    627 	}
    628 #endif
    629 
    630 	virtual_space_start += size;
    631 
    632 	/*
    633 	 * allocate and mapin physical pages to back new virtual pages
    634 	 */
    635 
    636 	for (vaddr = round_page(addr) ; vaddr < addr + size ;
    637 	    vaddr += PAGE_SIZE) {
    638 
    639 		if (!uvm_page_physget(&paddr))
    640 			panic("uvm_pageboot_alloc: out of memory");
    641 
    642 		/*
    643 		 * Note this memory is no longer managed, so using
    644 		 * pmap_kenter is safe.
    645 		 */
    646 		pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
    647 	}
    648 	pmap_update(pmap_kernel());
    649 	return(addr);
    650 #endif	/* PMAP_STEAL_MEMORY */
    651 }
    652 
    653 #if !defined(PMAP_STEAL_MEMORY)
    654 /*
    655  * uvm_page_physget: "steal" one page from the vm_physmem structure.
    656  *
    657  * => attempt to allocate it off the end of a segment in which the "avail"
    658  *    values match the start/end values.   if we can't do that, then we
    659  *    will advance both values (making them equal, and removing some
    660  *    vm_page structures from the non-avail area).
    661  * => return false if out of memory.
    662  */
    663 
    664 /* subroutine: try to allocate from memory chunks on the specified freelist */
    665 static bool uvm_page_physget_freelist(paddr_t *, int);
    666 
    667 static bool
    668 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
    669 {
    670 	struct vm_physseg *seg;
    671 	int lcv, x;
    672 
    673 	/* pass 1: try allocating from a matching end */
    674 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
    675 	for (lcv = vm_nphysmem - 1 ; lcv >= 0 ; lcv--)
    676 #else
    677 	for (lcv = 0 ; lcv < vm_nphysmem ; lcv++)
    678 #endif
    679 	{
    680 		seg = VM_PHYSMEM_PTR(lcv);
    681 
    682 		if (uvm.page_init_done == true)
    683 			panic("uvm_page_physget: called _after_ bootstrap");
    684 
    685 		if (seg->free_list != freelist)
    686 			continue;
    687 
    688 		/* try from front */
    689 		if (seg->avail_start == seg->start &&
    690 		    seg->avail_start < seg->avail_end) {
    691 			*paddrp = ctob(seg->avail_start);
    692 			seg->avail_start++;
    693 			seg->start++;
    694 			/* nothing left?   nuke it */
    695 			if (seg->avail_start == seg->end) {
    696 				if (vm_nphysmem == 1)
    697 				    panic("uvm_page_physget: out of memory!");
    698 				vm_nphysmem--;
    699 				for (x = lcv ; x < vm_nphysmem ; x++)
    700 					VM_PHYSMEM_PTR_SWAP(x, x + 1);
    701 			}
    702 			return (true);
    703 		}
    704 
    705 		/* try from rear */
    706 		if (seg->avail_end == seg->end &&
    707 		    seg->avail_start < seg->avail_end) {
    708 			*paddrp = ctob(seg->avail_end - 1);
    709 			seg->avail_end--;
    710 			seg->end--;
    711 			/* nothing left?   nuke it */
    712 			if (seg->avail_end == seg->start) {
    713 				if (vm_nphysmem == 1)
    714 				    panic("uvm_page_physget: out of memory!");
    715 				vm_nphysmem--;
    716 				for (x = lcv ; x < vm_nphysmem ; x++)
    717 					VM_PHYSMEM_PTR_SWAP(x, x + 1);
    718 			}
    719 			return (true);
    720 		}
    721 	}
    722 
    723 	/* pass2: forget about matching ends, just allocate something */
    724 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
    725 	for (lcv = vm_nphysmem - 1 ; lcv >= 0 ; lcv--)
    726 #else
    727 	for (lcv = 0 ; lcv < vm_nphysmem ; lcv++)
    728 #endif
    729 	{
    730 		seg = VM_PHYSMEM_PTR(lcv);
    731 
    732 		/* any room in this bank? */
    733 		if (seg->avail_start >= seg->avail_end)
    734 			continue;  /* nope */
    735 
    736 		*paddrp = ctob(seg->avail_start);
    737 		seg->avail_start++;
    738 		/* truncate! */
    739 		seg->start = seg->avail_start;
    740 
    741 		/* nothing left?   nuke it */
    742 		if (seg->avail_start == seg->end) {
    743 			if (vm_nphysmem == 1)
    744 				panic("uvm_page_physget: out of memory!");
    745 			vm_nphysmem--;
    746 			for (x = lcv ; x < vm_nphysmem ; x++)
    747 				VM_PHYSMEM_PTR_SWAP(x, x + 1);
    748 		}
    749 		return (true);
    750 	}
    751 
    752 	return (false);        /* whoops! */
    753 }
    754 
    755 bool
    756 uvm_page_physget(paddr_t *paddrp)
    757 {
    758 	int i;
    759 
    760 	/* try in the order of freelist preference */
    761 	for (i = 0; i < VM_NFREELIST; i++)
    762 		if (uvm_page_physget_freelist(paddrp, i) == true)
    763 			return (true);
    764 	return (false);
    765 }
    766 #endif /* PMAP_STEAL_MEMORY */
    767 
    768 /*
    769  * uvm_page_physload: load physical memory into VM system
    770  *
    771  * => all args are PFs
    772  * => all pages in start/end get vm_page structures
    773  * => areas marked by avail_start/avail_end get added to the free page pool
    774  * => we are limited to VM_PHYSSEG_MAX physical memory segments
    775  */
    776 
    777 void
    778 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
    779     paddr_t avail_end, int free_list)
    780 {
    781 	struct vm_physseg *seg;
    782 	int lcv;
    783 
    784 	if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
    785 		panic("uvm_page_physload: bad free list %d", free_list);
    786 
    787 	seg = uvm_physseg_alloc(&vm_physmem_freelist, vm_physmem_ptrs,
    788 	    vm_nphysmem, start, end);
    789 	KASSERT(seg != NULL);
    790 
    791 	seg->avail_start = avail_start;
    792 	seg->avail_end = avail_end;
    793 	/*
    794 	 * check to see if this is a "preload" (i.e. uvm_page_init hasn't been
    795 	 * called yet, so malloc is not available).
    796 	 */
    797 
    798 	for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) {
    799 		if (VM_PHYSMEM_PTR(lcv)->pgs)
    800 			break;
    801 	}
    802 	if (lcv == vm_nphysmem) {
    803 		seg->pgs = NULL;
    804 		seg->lastpg = NULL;
    805 		seg->free_list = free_list;
    806 	} else {
    807 		panic("uvm_page_physload: "
    808 		    "tried to add RAM after uvm_page_init");
    809 	}
    810 	vm_nphysmem++;
    811 }
    812 
    813 #if 0
    814 void
    815 uvm_page_physunload(void *cookie)
    816 {
    817 	struct vm_physseg *seg = cookie;
    818 
    819 	panic("memory unload is not supported yet");
    820 
    821 	uvm_physseg_free(&vm_physmem_freelist, vm_physmem_ptrs, seg);
    822 	vm_nphysmem--;
    823 }
    824 #endif
    825 
    826 int uvm_physseg_inited;
    827 
    828 static struct vm_physseg *
    829 uvm_physseg_alloc(struct vm_physseg_freelist *freelist,
    830     struct vm_physseg **segs, int nsegs,
    831     const paddr_t start, const paddr_t end)
    832 {
    833 	struct vm_physseg *ps;
    834 
    835 	if (uvmexp.pagesize == 0)
    836 		panic("uvm_page_physload: page size not set!");
    837 	if (start >= end)
    838 		panic("uvm_page_physload: start >= end");
    839 	if (nsegs == VM_PHYSSEG_MAX)
    840 		panic("uvm_page_physload: unable to load physical memory "
    841 		    "segment\n"
    842 		    "\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n"
    843 		    "\tincrease VM_PHYSSEG_MAX\n",
    844 		    VM_PHYSSEG_MAX, (long long)start, (long long)end);
    845 
    846 	if (uvm_physseg_inited == 0) {
    847 		uvm_physseg_inited = 1;
    848 		uvm_physseg_init();
    849 	}
    850 
    851 	ps = SIMPLEQ_FIRST(freelist);
    852 	KASSERT(ps != NULL);
    853 	SIMPLEQ_REMOVE_HEAD(freelist, list);
    854 
    855 	ps->start = start;
    856 	ps->end = end;
    857 	uvm_physseg_insert(ps, segs, nsegs);
    858 	return ps;
    859 }
    860 
    861 void
    862 uvm_physseg_free(struct vm_physseg_freelist *freelist,
    863     struct vm_physseg **segs, struct vm_physseg *seg)
    864 {
    865 
    866 	uvm_physseg_remove(segs, seg);
    867 	SIMPLEQ_INSERT_TAIL(freelist, seg, list);
    868 }
    869 
    870 static void
    871 uvm_physseg_init(void)
    872 {
    873 	int lcv;
    874 
    875 	for (lcv = 0; lcv < VM_PHYSSEG_MAX; lcv++) {
    876 		SIMPLEQ_INSERT_TAIL(&vm_physmem_freelist,
    877 		    &vm_physmem_store[lcv], list);
    878 	}
    879 }
    880 
    881 static void
    882 uvm_physseg_insert(struct vm_physseg *ps,
    883     struct vm_physseg **segs, int nsegs)
    884 {
    885 
    886 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
    887 	/* random: put it at the end (easy!) */
    888 	segs[nsegs] = ps;
    889 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
    890 	{
    891 		int lcv;
    892 		int x;
    893 		/* sort by address for binary search */
    894 		for (lcv = 0 ; lcv < nsegs ; lcv++)
    895 			if (ps->start < segs[lcv]->start)
    896 				break;
    897 		/* move back other entries, if necessary ... */
    898 		for (x = nsegs ; x > lcv ; x--)
    899 			segs[x] = segs[x - 1];
    900 		segs[lcv] = ps;
    901 	}
    902 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
    903 	{
    904 		int lcv;
    905 		int x;
    906 		/* sort by largest segment first */
    907 		for (lcv = 0 ; lcv < nsegs ; lcv++)
    908 			if ((ps->end - ps->start) >
    909 			    (segs[lcv]->end - segs[lcv]->start))
    910 				break;
    911 		/* move back other entries, if necessary ... */
    912 		for (x = nsegs ; x > lcv ; x--)
    913 			segs[x] = segs[x - 1];
    914 		segs[lcv] = ps;
    915 	}
    916 #else
    917 	panic("uvm_page_physload: unknown physseg strategy selected!");
    918 #endif
    919 }
    920 
    921 static void
    922 uvm_physseg_remove(struct vm_physseg **segs, struct vm_physseg *seg)
    923 {
    924 	struct vm_physseg **segp;
    925 
    926 	for (segp = segs; segp < segs + VM_PHYSSEG_MAX; segp++)
    927 		if (*segp == seg)
    928 			break;
    929 	if (segp == segs + VM_PHYSSEG_MAX)
    930 		panic("unknown segment: %p", seg);
    931 	while (segp + 1 < segs + VM_PHYSSEG_MAX) {
    932 		*segp = *(segp + 1);
    933 		segp++;
    934 	}
    935 	*segp = NULL;
    936 }
    937 
    938 /*
    939  * vm_physseg_find: find vm_physseg structure that belongs to a PA
    940  */
    941 
    942 #define	VM_PHYSSEG_OP_PF	1
    943 #define	VM_PHYSSEG_OP_PG	2
    944 
    945 #if VM_PHYSSEG_MAX == 1
    946 #define	VM_PHYSSEG_FIND	vm_physseg_find_contig
    947 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
    948 #define	VM_PHYSSEG_FIND	vm_physseg_find_bsearch
    949 #else
    950 #define	VM_PHYSSEG_FIND	vm_physseg_find_linear
    951 #endif
    952 
    953 static inline int VM_PHYSSEG_FIND(struct vm_physseg **, int, int,
    954     paddr_t, const struct vm_page *, int *);
    955 static inline bool vm_physseg_within_p(struct vm_physseg *, int, paddr_t,
    956     const struct vm_page *, int *);
    957 static inline bool vm_physseg_ge_p(struct vm_physseg *, int, paddr_t,
    958     const struct vm_page *, int *);
    959 static inline bool vm_physseg_lt_p(struct vm_physseg *, int, paddr_t,
    960     const struct vm_page *, int *);
    961 
    962 int
    963 vm_physseg_find(paddr_t pframe, int *offp)
    964 {
    965 
    966 	return VM_PHYSSEG_FIND(vm_physmem_ptrs, vm_nphysmem, VM_PHYSSEG_OP_PF,
    967 	    pframe, NULL, offp);
    968 }
    969 
    970 #if VM_PHYSSEG_MAX == 1
    971 static inline int
    972 vm_physseg_find_contig(struct vm_physseg **segs, int nsegs, int op,
    973     paddr_t pframe, const struct vm_page *pg, int *offp)
    974 {
    975 
    976 	/* 'contig' case */
    977 	if (nsegs == 0)
    978 		return(-1);
    979 	if (vm_physseg_within_p(segs[0], op, pframe, pg, offp)) {
    980 		return(0);
    981 	}
    982 	return(-1);
    983 }
    984 
    985 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
    986 
    987 static inline int
    988 vm_physseg_find_bsearch(struct vm_physseg **segs, int nsegs, int op,
    989     paddr_t pframe, const struct vm_page *pg, int *offp)
    990 {
    991 	/* binary search for it */
    992 	u_int	start, len, try;
    993 
    994 	/*
    995 	 * if try is too large (thus target is less than try) we reduce
    996 	 * the length to trunc(len/2) [i.e. everything smaller than "try"]
    997 	 *
    998 	 * if the try is too small (thus target is greater than try) then
    999 	 * we set the new start to be (try + 1).   this means we need to
   1000 	 * reduce the length to (round(len/2) - 1).
   1001 	 *
   1002 	 * note "adjust" below which takes advantage of the fact that
   1003 	 *  (round(len/2) - 1) == trunc((len - 1) / 2)
   1004 	 * for any value of len we may have
   1005 	 */
   1006 
   1007 	for (start = 0, len = nsegs ; len != 0 ; len = len / 2) {
   1008 		try = start + (len / 2);	/* try in the middle */
   1009 
   1010 		/* start past our try? */
   1011 		if (vm_physseg_ge_p(segs[try], op, pframe, pg, offp)) {
   1012 			/* was try correct? */
   1013 			if (vm_physseg_lt_p(segs[try], op, pframe, pg, offp)) {
   1014 				return(try);            /* got it */
   1015 			}
   1016 			start = try + 1;	/* next time, start here */
   1017 			len--;			/* "adjust" */
   1018 		} else {
   1019 			/*
   1020 			 * pframe before try, just reduce length of
   1021 			 * region, done in "for" loop
   1022 			 */
   1023 		}
   1024 	}
   1025 	return(-1);
   1026 }
   1027 
   1028 #else
   1029 
   1030 static inline int
   1031 vm_physseg_find_linear(struct vm_physseg **segs, int nsegs, int op,
   1032     paddr_t pframe, const struct vm_page *pg, int *offp)
   1033 {
   1034 	/* linear search for it */
   1035 	int	lcv;
   1036 
   1037 	for (lcv = 0; lcv < nsegs; lcv++) {
   1038 		if (vm_physseg_within_p(segs[lcv], op, pframe, pg, offp)) {
   1039 			return(lcv);		   /* got it */
   1040 		}
   1041 	}
   1042 	return(-1);
   1043 }
   1044 #endif
   1045 
   1046 static inline bool
   1047 vm_physseg_within_p(struct vm_physseg *seg, int op, paddr_t pframe,
   1048     const struct vm_page *pg, int *offp)
   1049 {
   1050 
   1051 	return vm_physseg_ge_p(seg, op, pframe, pg, offp) &&
   1052 	    vm_physseg_lt_p(seg, op, pframe, pg, offp);
   1053 }
   1054 
   1055 static inline bool
   1056 vm_physseg_ge_p(struct vm_physseg *seg, int op, paddr_t pframe,
   1057     const struct vm_page *pg, int *offp)
   1058 {
   1059 
   1060 	switch (op) {
   1061 	case VM_PHYSSEG_OP_PF:
   1062 		if (offp)
   1063 			*offp = pframe - seg->start;
   1064 		return pframe >= seg->start;
   1065 	case VM_PHYSSEG_OP_PG:
   1066 		if (offp)
   1067 			*offp = pg - seg->pgs;
   1068 		return pg >= seg->pgs;
   1069 	default:
   1070 		return false;
   1071 	}
   1072 }
   1073 
   1074 static inline bool
   1075 vm_physseg_lt_p(struct vm_physseg *seg, int op, paddr_t pframe,
   1076     const struct vm_page *pg, int *offp)
   1077 {
   1078 
   1079 	switch (op) {
   1080 	case VM_PHYSSEG_OP_PF:
   1081 		return pframe < seg->end;
   1082 	case VM_PHYSSEG_OP_PG:
   1083 		return pg < seg->lastpg;
   1084 	default:
   1085 		return false;
   1086 	}
   1087 }
   1088 
   1089 /*
   1090  * PHYS_TO_VM_PAGE: find vm_page for a PA.   used by MI code to get vm_pages
   1091  * back from an I/O mapping (ugh!).   used in some MD code as well.
   1092  */
   1093 struct vm_page *
   1094 uvm_phys_to_vm_page(paddr_t pa)
   1095 {
   1096 	paddr_t pf = atop(pa);
   1097 	int	off;
   1098 	int	psi;
   1099 
   1100 	psi = vm_physseg_find(pf, &off);
   1101 	if (psi != -1)
   1102 		return(&VM_PHYSMEM_PTR(psi)->pgs[off]);
   1103 	return(NULL);
   1104 }
   1105 
   1106 paddr_t
   1107 uvm_vm_page_to_phys(const struct vm_page *pg)
   1108 {
   1109 
   1110 	return pg->phys_addr;
   1111 }
   1112 
   1113 /*
   1114  * uvm_page_recolor: Recolor the pages if the new bucket count is
   1115  * larger than the old one.
   1116  */
   1117 
   1118 void
   1119 uvm_page_recolor(int newncolors)
   1120 {
   1121 	struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray;
   1122 	struct pgfreelist gpgfl, pgfl;
   1123 	struct vm_page *pg;
   1124 	vsize_t bucketcount;
   1125 	int lcv, color, i, ocolors;
   1126 	struct uvm_cpu *ucpu;
   1127 
   1128 	if (newncolors <= uvmexp.ncolors)
   1129 		return;
   1130 
   1131 	if (uvm.page_init_done == false) {
   1132 		uvmexp.ncolors = newncolors;
   1133 		return;
   1134 	}
   1135 
   1136 	bucketcount = newncolors * VM_NFREELIST;
   1137 	bucketarray = malloc(bucketcount * sizeof(struct pgflbucket) * 2,
   1138 	    M_VMPAGE, M_NOWAIT);
   1139 	cpuarray = bucketarray + bucketcount;
   1140 	if (bucketarray == NULL) {
   1141 		printf("WARNING: unable to allocate %ld page color buckets\n",
   1142 		    (long) bucketcount);
   1143 		return;
   1144 	}
   1145 
   1146 	mutex_spin_enter(&uvm_fpageqlock);
   1147 
   1148 	/* Make sure we should still do this. */
   1149 	if (newncolors <= uvmexp.ncolors) {
   1150 		mutex_spin_exit(&uvm_fpageqlock);
   1151 		free(bucketarray, M_VMPAGE);
   1152 		return;
   1153 	}
   1154 
   1155 	oldbucketarray = uvm.page_free[0].pgfl_buckets;
   1156 	ocolors = uvmexp.ncolors;
   1157 
   1158 	uvmexp.ncolors = newncolors;
   1159 	uvmexp.colormask = uvmexp.ncolors - 1;
   1160 
   1161 	ucpu = curcpu()->ci_data.cpu_uvm;
   1162 	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
   1163 		gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
   1164 		pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors));
   1165 		uvm_page_init_buckets(&gpgfl);
   1166 		uvm_page_init_buckets(&pgfl);
   1167 		for (color = 0; color < ocolors; color++) {
   1168 			for (i = 0; i < PGFL_NQUEUES; i++) {
   1169 				while ((pg = LIST_FIRST(&uvm.page_free[
   1170 				    lcv].pgfl_buckets[color].pgfl_queues[i]))
   1171 				    != NULL) {
   1172 					LIST_REMOVE(pg, pageq.list); /* global */
   1173 					LIST_REMOVE(pg, listq.list); /* cpu */
   1174 					LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[
   1175 					    VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
   1176 					    i], pg, pageq.list);
   1177 					LIST_INSERT_HEAD(&pgfl.pgfl_buckets[
   1178 					    VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
   1179 					    i], pg, listq.list);
   1180 				}
   1181 			}
   1182 		}
   1183 		uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets;
   1184 		ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
   1185 	}
   1186 
   1187 	if (have_recolored_pages) {
   1188 		mutex_spin_exit(&uvm_fpageqlock);
   1189 		free(oldbucketarray, M_VMPAGE);
   1190 		return;
   1191 	}
   1192 
   1193 	have_recolored_pages = true;
   1194 	mutex_spin_exit(&uvm_fpageqlock);
   1195 }
   1196 
   1197 /*
   1198  * uvm_cpu_attach: initialize per-CPU data structures.
   1199  */
   1200 
   1201 void
   1202 uvm_cpu_attach(struct cpu_info *ci)
   1203 {
   1204 	struct pgflbucket *bucketarray;
   1205 	struct pgfreelist pgfl;
   1206 	struct uvm_cpu *ucpu;
   1207 	vsize_t bucketcount;
   1208 	int lcv;
   1209 
   1210 	if (CPU_IS_PRIMARY(ci)) {
   1211 		/* Already done in uvm_page_init(). */
   1212 		return;
   1213 	}
   1214 
   1215 	/* Add more reserve pages for this CPU. */
   1216 	uvmexp.reserve_kernel += vm_page_reserve_kernel;
   1217 
   1218 	/* Configure this CPU's free lists. */
   1219 	bucketcount = uvmexp.ncolors * VM_NFREELIST;
   1220 	bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
   1221 	    M_VMPAGE, M_WAITOK);
   1222 	ucpu = kmem_zalloc(sizeof(*ucpu), KM_SLEEP);
   1223 	uvm.cpus[cpu_index(ci)] = ucpu;
   1224 	ci->ci_data.cpu_uvm = ucpu;
   1225 	for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
   1226 		pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors));
   1227 		uvm_page_init_buckets(&pgfl);
   1228 		ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
   1229 	}
   1230 }
   1231 
   1232 /*
   1233  * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
   1234  */
   1235 
   1236 static struct vm_page *
   1237 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2,
   1238     int *trycolorp)
   1239 {
   1240 	struct pgflist *freeq;
   1241 	struct vm_page *pg;
   1242 	int color, trycolor = *trycolorp;
   1243 	struct pgfreelist *gpgfl, *pgfl;
   1244 
   1245 	KASSERT(mutex_owned(&uvm_fpageqlock));
   1246 
   1247 	color = trycolor;
   1248 	pgfl = &ucpu->page_free[flist];
   1249 	gpgfl = &uvm.page_free[flist];
   1250 	do {
   1251 		/* cpu, try1 */
   1252 		if ((pg = LIST_FIRST((freeq =
   1253 		    &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
   1254 			VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
   1255 		    	uvmexp.cpuhit++;
   1256 			goto gotit;
   1257 		}
   1258 		/* global, try1 */
   1259 		if ((pg = LIST_FIRST((freeq =
   1260 		    &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
   1261 			VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
   1262 		    	uvmexp.cpumiss++;
   1263 			goto gotit;
   1264 		}
   1265 		/* cpu, try2 */
   1266 		if ((pg = LIST_FIRST((freeq =
   1267 		    &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
   1268 			VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
   1269 		    	uvmexp.cpuhit++;
   1270 			goto gotit;
   1271 		}
   1272 		/* global, try2 */
   1273 		if ((pg = LIST_FIRST((freeq =
   1274 		    &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
   1275 			VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
   1276 		    	uvmexp.cpumiss++;
   1277 			goto gotit;
   1278 		}
   1279 		color = (color + 1) & uvmexp.colormask;
   1280 	} while (color != trycolor);
   1281 
   1282 	return (NULL);
   1283 
   1284  gotit:
   1285 	LIST_REMOVE(pg, pageq.list);	/* global list */
   1286 	LIST_REMOVE(pg, listq.list);	/* per-cpu list */
   1287 	uvmexp.free--;
   1288 
   1289 	/* update zero'd page count */
   1290 	if (pg->flags & PG_ZERO)
   1291 		uvmexp.zeropages--;
   1292 
   1293 	if (color == trycolor)
   1294 		uvmexp.colorhit++;
   1295 	else {
   1296 		uvmexp.colormiss++;
   1297 		*trycolorp = color;
   1298 	}
   1299 
   1300 	return (pg);
   1301 }
   1302 
   1303 /*
   1304  * uvm_pagealloc_strat: allocate vm_page from a particular free list.
   1305  *
   1306  * => return null if no pages free
   1307  * => wake up pagedaemon if number of free pages drops below low water mark
   1308  * => if obj != NULL, obj must be locked (to put in obj's tree)
   1309  * => if anon != NULL, anon must be locked (to put in anon)
   1310  * => only one of obj or anon can be non-null
   1311  * => caller must activate/deactivate page if it is not wired.
   1312  * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
   1313  * => policy decision: it is more important to pull a page off of the
   1314  *	appropriate priority free list than it is to get a zero'd or
   1315  *	unknown contents page.  This is because we live with the
   1316  *	consequences of a bad free list decision for the entire
   1317  *	lifetime of the page, e.g. if the page comes from memory that
   1318  *	is slower to access.
   1319  */
   1320 
   1321 struct vm_page *
   1322 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
   1323     int flags, int strat, int free_list)
   1324 {
   1325 	int lcv, try1, try2, zeroit = 0, color;
   1326 	struct uvm_cpu *ucpu;
   1327 	struct vm_page *pg;
   1328 	lwp_t *l;
   1329 
   1330 	KASSERT(obj == NULL || anon == NULL);
   1331 	KASSERT(anon == NULL || off == 0);
   1332 	KASSERT(off == trunc_page(off));
   1333 	KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock));
   1334 	KASSERT(anon == NULL || mutex_owned(&anon->an_lock));
   1335 
   1336 	mutex_spin_enter(&uvm_fpageqlock);
   1337 
   1338 	/*
   1339 	 * This implements a global round-robin page coloring
   1340 	 * algorithm.
   1341 	 *
   1342 	 * XXXJRT: What about virtually-indexed caches?
   1343 	 */
   1344 
   1345 	ucpu = curcpu()->ci_data.cpu_uvm;
   1346 	color = ucpu->page_free_nextcolor;
   1347 
   1348 	/*
   1349 	 * check to see if we need to generate some free pages waking
   1350 	 * the pagedaemon.
   1351 	 */
   1352 
   1353 	uvm_kick_pdaemon();
   1354 
   1355 	/*
   1356 	 * fail if any of these conditions is true:
   1357 	 * [1]  there really are no free pages, or
   1358 	 * [2]  only kernel "reserved" pages remain and
   1359 	 *        reserved pages have not been requested.
   1360 	 * [3]  only pagedaemon "reserved" pages remain and
   1361 	 *        the requestor isn't the pagedaemon.
   1362 	 * we make kernel reserve pages available if called by a
   1363 	 * kernel thread or a realtime thread.
   1364 	 */
   1365 	l = curlwp;
   1366 	if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) {
   1367 		flags |= UVM_PGA_USERESERVE;
   1368 	}
   1369 	if ((uvmexp.free <= uvmexp.reserve_kernel &&
   1370 	    (flags & UVM_PGA_USERESERVE) == 0) ||
   1371 	    (uvmexp.free <= uvmexp.reserve_pagedaemon &&
   1372 	     curlwp != uvm.pagedaemon_lwp))
   1373 		goto fail;
   1374 
   1375 #if PGFL_NQUEUES != 2
   1376 #error uvm_pagealloc_strat needs to be updated
   1377 #endif
   1378 
   1379 	/*
   1380 	 * If we want a zero'd page, try the ZEROS queue first, otherwise
   1381 	 * we try the UNKNOWN queue first.
   1382 	 */
   1383 	if (flags & UVM_PGA_ZERO) {
   1384 		try1 = PGFL_ZEROS;
   1385 		try2 = PGFL_UNKNOWN;
   1386 	} else {
   1387 		try1 = PGFL_UNKNOWN;
   1388 		try2 = PGFL_ZEROS;
   1389 	}
   1390 
   1391  again:
   1392 	switch (strat) {
   1393 	case UVM_PGA_STRAT_NORMAL:
   1394 		/* Check freelists: descending priority (ascending id) order */
   1395 		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
   1396 			pg = uvm_pagealloc_pgfl(ucpu, lcv,
   1397 			    try1, try2, &color);
   1398 			if (pg != NULL)
   1399 				goto gotit;
   1400 		}
   1401 
   1402 		/* No pages free! */
   1403 		goto fail;
   1404 
   1405 	case UVM_PGA_STRAT_ONLY:
   1406 	case UVM_PGA_STRAT_FALLBACK:
   1407 		/* Attempt to allocate from the specified free list. */
   1408 		KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
   1409 		pg = uvm_pagealloc_pgfl(ucpu, free_list,
   1410 		    try1, try2, &color);
   1411 		if (pg != NULL)
   1412 			goto gotit;
   1413 
   1414 		/* Fall back, if possible. */
   1415 		if (strat == UVM_PGA_STRAT_FALLBACK) {
   1416 			strat = UVM_PGA_STRAT_NORMAL;
   1417 			goto again;
   1418 		}
   1419 
   1420 		/* No pages free! */
   1421 		goto fail;
   1422 
   1423 	default:
   1424 		panic("uvm_pagealloc_strat: bad strat %d", strat);
   1425 		/* NOTREACHED */
   1426 	}
   1427 
   1428  gotit:
   1429 	/*
   1430 	 * We now know which color we actually allocated from; set
   1431 	 * the next color accordingly.
   1432 	 */
   1433 
   1434 	ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask;
   1435 
   1436 	/*
   1437 	 * update allocation statistics and remember if we have to
   1438 	 * zero the page
   1439 	 */
   1440 
   1441 	if (flags & UVM_PGA_ZERO) {
   1442 		if (pg->flags & PG_ZERO) {
   1443 			uvmexp.pga_zerohit++;
   1444 			zeroit = 0;
   1445 		} else {
   1446 			uvmexp.pga_zeromiss++;
   1447 			zeroit = 1;
   1448 		}
   1449 		if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
   1450 			ucpu->page_idle_zero = vm_page_zero_enable;
   1451 		}
   1452 	}
   1453 	KASSERT(pg->pqflags == PQ_FREE);
   1454 
   1455 	pg->offset = off;
   1456 	pg->uobject = obj;
   1457 	pg->uanon = anon;
   1458 	pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
   1459 	if (anon) {
   1460 		anon->an_page = pg;
   1461 		pg->pqflags = PQ_ANON;
   1462 		atomic_inc_uint(&uvmexp.anonpages);
   1463 	} else {
   1464 		if (obj) {
   1465 			uvm_pageinsert(obj, pg);
   1466 		}
   1467 		pg->pqflags = 0;
   1468 	}
   1469 	mutex_spin_exit(&uvm_fpageqlock);
   1470 
   1471 #if defined(UVM_PAGE_TRKOWN)
   1472 	pg->owner_tag = NULL;
   1473 #endif
   1474 	UVM_PAGE_OWN(pg, "new alloc");
   1475 
   1476 	if (flags & UVM_PGA_ZERO) {
   1477 		/*
   1478 		 * A zero'd page is not clean.  If we got a page not already
   1479 		 * zero'd, then we have to zero it ourselves.
   1480 		 */
   1481 		pg->flags &= ~PG_CLEAN;
   1482 		if (zeroit)
   1483 			pmap_zero_page(VM_PAGE_TO_PHYS(pg));
   1484 	}
   1485 
   1486 	return(pg);
   1487 
   1488  fail:
   1489 	mutex_spin_exit(&uvm_fpageqlock);
   1490 	return (NULL);
   1491 }
   1492 
   1493 /*
   1494  * uvm_pagereplace: replace a page with another
   1495  *
   1496  * => object must be locked
   1497  */
   1498 
   1499 void
   1500 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
   1501 {
   1502 	struct uvm_object *uobj = oldpg->uobject;
   1503 
   1504 	KASSERT((oldpg->flags & PG_TABLED) != 0);
   1505 	KASSERT(uobj != NULL);
   1506 	KASSERT((newpg->flags & PG_TABLED) == 0);
   1507 	KASSERT(newpg->uobject == NULL);
   1508 	KASSERT(mutex_owned(&uobj->vmobjlock));
   1509 
   1510 	newpg->uobject = uobj;
   1511 	newpg->offset = oldpg->offset;
   1512 
   1513 	uvm_pageremove_tree(uobj, oldpg);
   1514 	uvm_pageinsert_tree(uobj, newpg);
   1515 	uvm_pageinsert_list(uobj, newpg, oldpg);
   1516 	uvm_pageremove_list(uobj, oldpg);
   1517 }
   1518 
   1519 /*
   1520  * uvm_pagerealloc: reallocate a page from one object to another
   1521  *
   1522  * => both objects must be locked
   1523  */
   1524 
   1525 void
   1526 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
   1527 {
   1528 	/*
   1529 	 * remove it from the old object
   1530 	 */
   1531 
   1532 	if (pg->uobject) {
   1533 		uvm_pageremove(pg->uobject, pg);
   1534 	}
   1535 
   1536 	/*
   1537 	 * put it in the new object
   1538 	 */
   1539 
   1540 	if (newobj) {
   1541 		pg->uobject = newobj;
   1542 		pg->offset = newoff;
   1543 		uvm_pageinsert(newobj, pg);
   1544 	}
   1545 }
   1546 
   1547 #ifdef DEBUG
   1548 /*
   1549  * check if page is zero-filled
   1550  *
   1551  *  - called with free page queue lock held.
   1552  */
   1553 void
   1554 uvm_pagezerocheck(struct vm_page *pg)
   1555 {
   1556 	int *p, *ep;
   1557 
   1558 	KASSERT(uvm_zerocheckkva != 0);
   1559 	KASSERT(mutex_owned(&uvm_fpageqlock));
   1560 
   1561 	/*
   1562 	 * XXX assuming pmap_kenter_pa and pmap_kremove never call
   1563 	 * uvm page allocator.
   1564 	 *
   1565 	 * it might be better to have "CPU-local temporary map" pmap interface.
   1566 	 */
   1567 	pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ, 0);
   1568 	p = (int *)uvm_zerocheckkva;
   1569 	ep = (int *)((char *)p + PAGE_SIZE);
   1570 	pmap_update(pmap_kernel());
   1571 	while (p < ep) {
   1572 		if (*p != 0)
   1573 			panic("PG_ZERO page isn't zero-filled");
   1574 		p++;
   1575 	}
   1576 	pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
   1577 	/*
   1578 	 * pmap_update() is not necessary here because no one except us
   1579 	 * uses this VA.
   1580 	 */
   1581 }
   1582 #endif /* DEBUG */
   1583 
   1584 /*
   1585  * uvm_pagefree: free page
   1586  *
   1587  * => erase page's identity (i.e. remove from object)
   1588  * => put page on free list
   1589  * => caller must lock owning object (either anon or uvm_object)
   1590  * => caller must lock page queues
   1591  * => assumes all valid mappings of pg are gone
   1592  */
   1593 
   1594 void
   1595 uvm_pagefree(struct vm_page *pg)
   1596 {
   1597 	struct pgflist *pgfl;
   1598 	struct uvm_cpu *ucpu;
   1599 	int index, color, queue;
   1600 	bool iszero;
   1601 
   1602 #ifdef DEBUG
   1603 	if (pg->uobject == (void *)0xdeadbeef &&
   1604 	    pg->uanon == (void *)0xdeadbeef) {
   1605 		panic("uvm_pagefree: freeing free page %p", pg);
   1606 	}
   1607 #endif /* DEBUG */
   1608 
   1609 	KASSERT((pg->flags & PG_PAGEOUT) == 0);
   1610 	KASSERT(!(pg->pqflags & PQ_FREE));
   1611 	KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg));
   1612 	KASSERT(pg->uobject == NULL || mutex_owned(&pg->uobject->vmobjlock));
   1613 	KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
   1614 		mutex_owned(&pg->uanon->an_lock));
   1615 
   1616 	/*
   1617 	 * if the page is loaned, resolve the loan instead of freeing.
   1618 	 */
   1619 
   1620 	if (pg->loan_count) {
   1621 		KASSERT(pg->wire_count == 0);
   1622 
   1623 		/*
   1624 		 * if the page is owned by an anon then we just want to
   1625 		 * drop anon ownership.  the kernel will free the page when
   1626 		 * it is done with it.  if the page is owned by an object,
   1627 		 * remove it from the object and mark it dirty for the benefit
   1628 		 * of possible anon owners.
   1629 		 *
   1630 		 * regardless of previous ownership, wakeup any waiters,
   1631 		 * unbusy the page, and we're done.
   1632 		 */
   1633 
   1634 		if (pg->uobject != NULL) {
   1635 			uvm_pageremove(pg->uobject, pg);
   1636 			pg->flags &= ~PG_CLEAN;
   1637 		} else if (pg->uanon != NULL) {
   1638 			if ((pg->pqflags & PQ_ANON) == 0) {
   1639 				pg->loan_count--;
   1640 			} else {
   1641 				pg->pqflags &= ~PQ_ANON;
   1642 				atomic_dec_uint(&uvmexp.anonpages);
   1643 			}
   1644 			pg->uanon->an_page = NULL;
   1645 			pg->uanon = NULL;
   1646 		}
   1647 		if (pg->flags & PG_WANTED) {
   1648 			wakeup(pg);
   1649 		}
   1650 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
   1651 #ifdef UVM_PAGE_TRKOWN
   1652 		pg->owner_tag = NULL;
   1653 #endif
   1654 		if (pg->loan_count) {
   1655 			KASSERT(pg->uobject == NULL);
   1656 			if (pg->uanon == NULL) {
   1657 				uvm_pagedequeue(pg);
   1658 			}
   1659 			return;
   1660 		}
   1661 	}
   1662 
   1663 	/*
   1664 	 * remove page from its object or anon.
   1665 	 */
   1666 
   1667 	if (pg->uobject != NULL) {
   1668 		uvm_pageremove(pg->uobject, pg);
   1669 	} else if (pg->uanon != NULL) {
   1670 		pg->uanon->an_page = NULL;
   1671 		atomic_dec_uint(&uvmexp.anonpages);
   1672 	}
   1673 
   1674 	/*
   1675 	 * now remove the page from the queues.
   1676 	 */
   1677 
   1678 	uvm_pagedequeue(pg);
   1679 
   1680 	/*
   1681 	 * if the page was wired, unwire it now.
   1682 	 */
   1683 
   1684 	if (pg->wire_count) {
   1685 		pg->wire_count = 0;
   1686 		uvmexp.wired--;
   1687 	}
   1688 
   1689 	/*
   1690 	 * and put on free queue
   1691 	 */
   1692 
   1693 	iszero = (pg->flags & PG_ZERO);
   1694 	index = uvm_page_lookup_freelist(pg);
   1695 	color = VM_PGCOLOR_BUCKET(pg);
   1696 	queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN);
   1697 
   1698 #ifdef DEBUG
   1699 	pg->uobject = (void *)0xdeadbeef;
   1700 	pg->uanon = (void *)0xdeadbeef;
   1701 #endif
   1702 
   1703 	mutex_spin_enter(&uvm_fpageqlock);
   1704 	pg->pqflags = PQ_FREE;
   1705 
   1706 #ifdef DEBUG
   1707 	if (iszero)
   1708 		uvm_pagezerocheck(pg);
   1709 #endif /* DEBUG */
   1710 
   1711 
   1712 	/* global list */
   1713 	pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue];
   1714 	LIST_INSERT_HEAD(pgfl, pg, pageq.list);
   1715 	uvmexp.free++;
   1716 	if (iszero) {
   1717 		uvmexp.zeropages++;
   1718 	}
   1719 
   1720 	/* per-cpu list */
   1721 	ucpu = curcpu()->ci_data.cpu_uvm;
   1722 	pg->offset = (uintptr_t)ucpu;
   1723 	pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue];
   1724 	LIST_INSERT_HEAD(pgfl, pg, listq.list);
   1725 	ucpu->pages[queue]++;
   1726 	if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
   1727 		ucpu->page_idle_zero = vm_page_zero_enable;
   1728 	}
   1729 
   1730 	mutex_spin_exit(&uvm_fpageqlock);
   1731 }
   1732 
   1733 /*
   1734  * uvm_page_unbusy: unbusy an array of pages.
   1735  *
   1736  * => pages must either all belong to the same object, or all belong to anons.
   1737  * => if pages are object-owned, object must be locked.
   1738  * => if pages are anon-owned, anons must be locked.
   1739  * => caller must lock page queues if pages may be released.
   1740  * => caller must make sure that anon-owned pages are not PG_RELEASED.
   1741  */
   1742 
   1743 void
   1744 uvm_page_unbusy(struct vm_page **pgs, int npgs)
   1745 {
   1746 	struct vm_page *pg;
   1747 	int i;
   1748 	UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
   1749 
   1750 	for (i = 0; i < npgs; i++) {
   1751 		pg = pgs[i];
   1752 		if (pg == NULL || pg == PGO_DONTCARE) {
   1753 			continue;
   1754 		}
   1755 
   1756 		KASSERT(pg->uobject == NULL ||
   1757 		    mutex_owned(&pg->uobject->vmobjlock));
   1758 		KASSERT(pg->uobject != NULL ||
   1759 		    (pg->uanon != NULL && mutex_owned(&pg->uanon->an_lock)));
   1760 
   1761 		KASSERT(pg->flags & PG_BUSY);
   1762 		KASSERT((pg->flags & PG_PAGEOUT) == 0);
   1763 		if (pg->flags & PG_WANTED) {
   1764 			wakeup(pg);
   1765 		}
   1766 		if (pg->flags & PG_RELEASED) {
   1767 			UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
   1768 			KASSERT(pg->uobject != NULL ||
   1769 			    (pg->uanon != NULL && pg->uanon->an_ref > 0));
   1770 			pg->flags &= ~PG_RELEASED;
   1771 			uvm_pagefree(pg);
   1772 		} else {
   1773 			UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
   1774 			KASSERT((pg->flags & PG_FAKE) == 0);
   1775 			pg->flags &= ~(PG_WANTED|PG_BUSY);
   1776 			UVM_PAGE_OWN(pg, NULL);
   1777 		}
   1778 	}
   1779 }
   1780 
   1781 #if defined(UVM_PAGE_TRKOWN)
   1782 /*
   1783  * uvm_page_own: set or release page ownership
   1784  *
   1785  * => this is a debugging function that keeps track of who sets PG_BUSY
   1786  *	and where they do it.   it can be used to track down problems
   1787  *	such a process setting "PG_BUSY" and never releasing it.
   1788  * => page's object [if any] must be locked
   1789  * => if "tag" is NULL then we are releasing page ownership
   1790  */
   1791 void
   1792 uvm_page_own(struct vm_page *pg, const char *tag)
   1793 {
   1794 	struct uvm_object *uobj;
   1795 	struct vm_anon *anon;
   1796 
   1797 	KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
   1798 
   1799 	uobj = pg->uobject;
   1800 	anon = pg->uanon;
   1801 	if (uobj != NULL) {
   1802 		KASSERT(mutex_owned(&uobj->vmobjlock));
   1803 	} else if (anon != NULL) {
   1804 		KASSERT(mutex_owned(&anon->an_lock));
   1805 	}
   1806 
   1807 	KASSERT((pg->flags & PG_WANTED) == 0);
   1808 
   1809 	/* gain ownership? */
   1810 	if (tag) {
   1811 		KASSERT((pg->flags & PG_BUSY) != 0);
   1812 		if (pg->owner_tag) {
   1813 			printf("uvm_page_own: page %p already owned "
   1814 			    "by proc %d [%s]\n", pg,
   1815 			    pg->owner, pg->owner_tag);
   1816 			panic("uvm_page_own");
   1817 		}
   1818 		pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
   1819 		pg->lowner = (curlwp) ? curlwp->l_lid :  (lwpid_t) -1;
   1820 		pg->owner_tag = tag;
   1821 		return;
   1822 	}
   1823 
   1824 	/* drop ownership */
   1825 	KASSERT((pg->flags & PG_BUSY) == 0);
   1826 	if (pg->owner_tag == NULL) {
   1827 		printf("uvm_page_own: dropping ownership of an non-owned "
   1828 		    "page (%p)\n", pg);
   1829 		panic("uvm_page_own");
   1830 	}
   1831 	if (!uvmpdpol_pageisqueued_p(pg)) {
   1832 		KASSERT((pg->uanon == NULL && pg->uobject == NULL) ||
   1833 		    pg->wire_count > 0);
   1834 	} else {
   1835 		KASSERT(pg->wire_count == 0);
   1836 	}
   1837 	pg->owner_tag = NULL;
   1838 }
   1839 #endif
   1840 
   1841 /*
   1842  * uvm_pageidlezero: zero free pages while the system is idle.
   1843  *
   1844  * => try to complete one color bucket at a time, to reduce our impact
   1845  *	on the CPU cache.
   1846  * => we loop until we either reach the target or there is a lwp ready
   1847  *      to run, or MD code detects a reason to break early.
   1848  */
   1849 void
   1850 uvm_pageidlezero(void)
   1851 {
   1852 	struct vm_page *pg;
   1853 	struct pgfreelist *pgfl, *gpgfl;
   1854 	struct uvm_cpu *ucpu;
   1855 	int free_list, firstbucket, nextbucket;
   1856 
   1857 	ucpu = curcpu()->ci_data.cpu_uvm;
   1858 	if (!ucpu->page_idle_zero ||
   1859 	    ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
   1860 	    	ucpu->page_idle_zero = false;
   1861 		return;
   1862 	}
   1863 	mutex_enter(&uvm_fpageqlock);
   1864 	firstbucket = ucpu->page_free_nextcolor;
   1865 	nextbucket = firstbucket;
   1866 	do {
   1867 		for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
   1868 			if (sched_curcpu_runnable_p()) {
   1869 				goto quit;
   1870 			}
   1871 			pgfl = &ucpu->page_free[free_list];
   1872 			gpgfl = &uvm.page_free[free_list];
   1873 			while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[
   1874 			    nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
   1875 				if (sched_curcpu_runnable_p()) {
   1876 					goto quit;
   1877 				}
   1878 				LIST_REMOVE(pg, pageq.list); /* global list */
   1879 				LIST_REMOVE(pg, listq.list); /* per-cpu list */
   1880 				ucpu->pages[PGFL_UNKNOWN]--;
   1881 				uvmexp.free--;
   1882 				KASSERT(pg->pqflags == PQ_FREE);
   1883 				pg->pqflags = 0;
   1884 				mutex_spin_exit(&uvm_fpageqlock);
   1885 #ifdef PMAP_PAGEIDLEZERO
   1886 				if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
   1887 
   1888 					/*
   1889 					 * The machine-dependent code detected
   1890 					 * some reason for us to abort zeroing
   1891 					 * pages, probably because there is a
   1892 					 * process now ready to run.
   1893 					 */
   1894 
   1895 					mutex_spin_enter(&uvm_fpageqlock);
   1896 					pg->pqflags = PQ_FREE;
   1897 					LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
   1898 					    nextbucket].pgfl_queues[
   1899 					    PGFL_UNKNOWN], pg, pageq.list);
   1900 					LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
   1901 					    nextbucket].pgfl_queues[
   1902 					    PGFL_UNKNOWN], pg, listq.list);
   1903 					ucpu->pages[PGFL_UNKNOWN]++;
   1904 					uvmexp.free++;
   1905 					uvmexp.zeroaborts++;
   1906 					goto quit;
   1907 				}
   1908 #else
   1909 				pmap_zero_page(VM_PAGE_TO_PHYS(pg));
   1910 #endif /* PMAP_PAGEIDLEZERO */
   1911 				pg->flags |= PG_ZERO;
   1912 
   1913 				mutex_spin_enter(&uvm_fpageqlock);
   1914 				pg->pqflags = PQ_FREE;
   1915 				LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
   1916 				    nextbucket].pgfl_queues[PGFL_ZEROS],
   1917 				    pg, pageq.list);
   1918 				LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
   1919 				    nextbucket].pgfl_queues[PGFL_ZEROS],
   1920 				    pg, listq.list);
   1921 				ucpu->pages[PGFL_ZEROS]++;
   1922 				uvmexp.free++;
   1923 				uvmexp.zeropages++;
   1924 			}
   1925 		}
   1926 		if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
   1927 			break;
   1928 		}
   1929 		nextbucket = (nextbucket + 1) & uvmexp.colormask;
   1930 	} while (nextbucket != firstbucket);
   1931 	ucpu->page_idle_zero = false;
   1932  quit:
   1933 	mutex_spin_exit(&uvm_fpageqlock);
   1934 }
   1935 
   1936 /*
   1937  * uvm_pagelookup: look up a page
   1938  *
   1939  * => caller should lock object to keep someone from pulling the page
   1940  *	out from under it
   1941  */
   1942 
   1943 struct vm_page *
   1944 uvm_pagelookup(struct uvm_object *obj, voff_t off)
   1945 {
   1946 	struct vm_page *pg;
   1947 
   1948 	KASSERT(mutex_owned(&obj->vmobjlock));
   1949 
   1950 	pg = rb_tree_find_node(&obj->rb_tree, &off);
   1951 
   1952 	KASSERT(pg == NULL || obj->uo_npages != 0);
   1953 	KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
   1954 		(pg->flags & PG_BUSY) != 0);
   1955 	return pg;
   1956 }
   1957 
   1958 /*
   1959  * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
   1960  *
   1961  * => caller must lock page queues
   1962  */
   1963 
   1964 void
   1965 uvm_pagewire(struct vm_page *pg)
   1966 {
   1967 	KASSERT(mutex_owned(&uvm_pageqlock));
   1968 #if defined(READAHEAD_STATS)
   1969 	if ((pg->pqflags & PQ_READAHEAD) != 0) {
   1970 		uvm_ra_hit.ev_count++;
   1971 		pg->pqflags &= ~PQ_READAHEAD;
   1972 	}
   1973 #endif /* defined(READAHEAD_STATS) */
   1974 	if (pg->wire_count == 0) {
   1975 		uvm_pagedequeue(pg);
   1976 		uvmexp.wired++;
   1977 	}
   1978 	pg->wire_count++;
   1979 }
   1980 
   1981 /*
   1982  * uvm_pageunwire: unwire the page.
   1983  *
   1984  * => activate if wire count goes to zero.
   1985  * => caller must lock page queues
   1986  */
   1987 
   1988 void
   1989 uvm_pageunwire(struct vm_page *pg)
   1990 {
   1991 	KASSERT(mutex_owned(&uvm_pageqlock));
   1992 	pg->wire_count--;
   1993 	if (pg->wire_count == 0) {
   1994 		uvm_pageactivate(pg);
   1995 		uvmexp.wired--;
   1996 	}
   1997 }
   1998 
   1999 /*
   2000  * uvm_pagedeactivate: deactivate page
   2001  *
   2002  * => caller must lock page queues
   2003  * => caller must check to make sure page is not wired
   2004  * => object that page belongs to must be locked (so we can adjust pg->flags)
   2005  * => caller must clear the reference on the page before calling
   2006  */
   2007 
   2008 void
   2009 uvm_pagedeactivate(struct vm_page *pg)
   2010 {
   2011 
   2012 	KASSERT(mutex_owned(&uvm_pageqlock));
   2013 	KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg));
   2014 	uvmpdpol_pagedeactivate(pg);
   2015 }
   2016 
   2017 /*
   2018  * uvm_pageactivate: activate page
   2019  *
   2020  * => caller must lock page queues
   2021  */
   2022 
   2023 void
   2024 uvm_pageactivate(struct vm_page *pg)
   2025 {
   2026 
   2027 	KASSERT(mutex_owned(&uvm_pageqlock));
   2028 #if defined(READAHEAD_STATS)
   2029 	if ((pg->pqflags & PQ_READAHEAD) != 0) {
   2030 		uvm_ra_hit.ev_count++;
   2031 		pg->pqflags &= ~PQ_READAHEAD;
   2032 	}
   2033 #endif /* defined(READAHEAD_STATS) */
   2034 	if (pg->wire_count != 0) {
   2035 		return;
   2036 	}
   2037 	uvmpdpol_pageactivate(pg);
   2038 }
   2039 
   2040 /*
   2041  * uvm_pagedequeue: remove a page from any paging queue
   2042  */
   2043 
   2044 void
   2045 uvm_pagedequeue(struct vm_page *pg)
   2046 {
   2047 
   2048 	if (uvmpdpol_pageisqueued_p(pg)) {
   2049 		KASSERT(mutex_owned(&uvm_pageqlock));
   2050 	}
   2051 
   2052 	uvmpdpol_pagedequeue(pg);
   2053 }
   2054 
   2055 /*
   2056  * uvm_pageenqueue: add a page to a paging queue without activating.
   2057  * used where a page is not really demanded (yet).  eg. read-ahead
   2058  */
   2059 
   2060 void
   2061 uvm_pageenqueue(struct vm_page *pg)
   2062 {
   2063 
   2064 	KASSERT(mutex_owned(&uvm_pageqlock));
   2065 	if (pg->wire_count != 0) {
   2066 		return;
   2067 	}
   2068 	uvmpdpol_pageenqueue(pg);
   2069 }
   2070 
   2071 /*
   2072  * uvm_pagezero: zero fill a page
   2073  *
   2074  * => if page is part of an object then the object should be locked
   2075  *	to protect pg->flags.
   2076  */
   2077 
   2078 void
   2079 uvm_pagezero(struct vm_page *pg)
   2080 {
   2081 	pg->flags &= ~PG_CLEAN;
   2082 	pmap_zero_page(VM_PAGE_TO_PHYS(pg));
   2083 }
   2084 
   2085 /*
   2086  * uvm_pagecopy: copy a page
   2087  *
   2088  * => if page is part of an object then the object should be locked
   2089  *	to protect pg->flags.
   2090  */
   2091 
   2092 void
   2093 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
   2094 {
   2095 
   2096 	dst->flags &= ~PG_CLEAN;
   2097 	pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
   2098 }
   2099 
   2100 /*
   2101  * uvm_pageismanaged: test it see that a page (specified by PA) is managed.
   2102  */
   2103 
   2104 bool
   2105 uvm_pageismanaged(paddr_t pa)
   2106 {
   2107 
   2108 	return (vm_physseg_find(atop(pa), NULL) != -1);
   2109 }
   2110 
   2111 /*
   2112  * uvm_page_lookup_freelist: look up the free list for the specified page
   2113  */
   2114 
   2115 int
   2116 uvm_page_lookup_freelist(struct vm_page *pg)
   2117 {
   2118 	int lcv;
   2119 
   2120 	lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
   2121 	KASSERT(lcv != -1);
   2122 	return (VM_PHYSMEM_PTR(lcv)->free_list);
   2123 }
   2124 
   2125 #if defined(DDB) || defined(DEBUGPRINT)
   2126 
   2127 /*
   2128  * uvm_page_printit: actually print the page
   2129  */
   2130 
   2131 static const char page_flagbits[] = UVM_PGFLAGBITS;
   2132 static const char page_pqflagbits[] = UVM_PQFLAGBITS;
   2133 
   2134 void
   2135 uvm_page_printit(struct vm_page *pg, bool full,
   2136     void (*pr)(const char *, ...))
   2137 {
   2138 	struct vm_page *tpg;
   2139 	struct uvm_object *uobj;
   2140 	struct pgflist *pgl;
   2141 	char pgbuf[128];
   2142 	char pqbuf[128];
   2143 
   2144 	(*pr)("PAGE %p:\n", pg);
   2145 	snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags);
   2146 	snprintb(pqbuf, sizeof(pqbuf), page_pqflagbits, pg->pqflags);
   2147 	(*pr)("  flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n",
   2148 	    pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg));
   2149 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
   2150 	    pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
   2151 #if defined(UVM_PAGE_TRKOWN)
   2152 	if (pg->flags & PG_BUSY)
   2153 		(*pr)("  owning process = %d, tag=%s\n",
   2154 		    pg->owner, pg->owner_tag);
   2155 	else
   2156 		(*pr)("  page not busy, no owner\n");
   2157 #else
   2158 	(*pr)("  [page ownership tracking disabled]\n");
   2159 #endif
   2160 
   2161 	if (!full)
   2162 		return;
   2163 
   2164 	/* cross-verify object/anon */
   2165 	if ((pg->pqflags & PQ_FREE) == 0) {
   2166 		if (pg->pqflags & PQ_ANON) {
   2167 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
   2168 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
   2169 				(pg->uanon) ? pg->uanon->an_page : NULL);
   2170 			else
   2171 				(*pr)("  anon backpointer is OK\n");
   2172 		} else {
   2173 			uobj = pg->uobject;
   2174 			if (uobj) {
   2175 				(*pr)("  checking object list\n");
   2176 				TAILQ_FOREACH(tpg, &uobj->memq, listq.queue) {
   2177 					if (tpg == pg) {
   2178 						break;
   2179 					}
   2180 				}
   2181 				if (tpg)
   2182 					(*pr)("  page found on object list\n");
   2183 				else
   2184 			(*pr)("  >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
   2185 			}
   2186 		}
   2187 	}
   2188 
   2189 	/* cross-verify page queue */
   2190 	if (pg->pqflags & PQ_FREE) {
   2191 		int fl = uvm_page_lookup_freelist(pg);
   2192 		int color = VM_PGCOLOR_BUCKET(pg);
   2193 		pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[
   2194 		    ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN];
   2195 	} else {
   2196 		pgl = NULL;
   2197 	}
   2198 
   2199 	if (pgl) {
   2200 		(*pr)("  checking pageq list\n");
   2201 		LIST_FOREACH(tpg, pgl, pageq.list) {
   2202 			if (tpg == pg) {
   2203 				break;
   2204 			}
   2205 		}
   2206 		if (tpg)
   2207 			(*pr)("  page found on pageq list\n");
   2208 		else
   2209 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
   2210 	}
   2211 }
   2212 
   2213 /*
   2214  * uvm_pages_printthem - print a summary of all managed pages
   2215  */
   2216 
   2217 void
   2218 uvm_page_printall(void (*pr)(const char *, ...))
   2219 {
   2220 	unsigned i;
   2221 	struct vm_page *pg;
   2222 
   2223 	(*pr)("%18s %4s %4s %18s %18s"
   2224 #ifdef UVM_PAGE_TRKOWN
   2225 	    " OWNER"
   2226 #endif
   2227 	    "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON");
   2228 	for (i = 0; i < vm_nphysmem; i++) {
   2229 		for (pg = VM_PHYSMEM_PTR(i)->pgs; pg < VM_PHYSMEM_PTR(i)->lastpg; pg++) {
   2230 			(*pr)("%18p %04x %04x %18p %18p",
   2231 			    pg, pg->flags, pg->pqflags, pg->uobject,
   2232 			    pg->uanon);
   2233 #ifdef UVM_PAGE_TRKOWN
   2234 			if (pg->flags & PG_BUSY)
   2235 				(*pr)(" %d [%s]", pg->owner, pg->owner_tag);
   2236 #endif
   2237 			(*pr)("\n");
   2238 		}
   2239 	}
   2240 }
   2241 
   2242 #endif /* DDB || DEBUGPRINT */
   2243