Home | History | Annotate | Line # | Download | only in uvm
uvm_pdaemon.c revision 1.93.4.2.4.12
      1 /*	$NetBSD: uvm_pdaemon.c,v 1.93.4.2.4.12 2012/04/14 00:49:35 matt Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      5  * Copyright (c) 1991, 1993, The Regents of the University of California.
      6  *
      7  * All rights reserved.
      8  *
      9  * This code is derived from software contributed to Berkeley by
     10  * The Mach Operating System project at Carnegie-Mellon University.
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. All advertising materials mentioning features or use of this software
     21  *    must display the following acknowledgement:
     22  *	This product includes software developed by Charles D. Cranor,
     23  *      Washington University, the University of California, Berkeley and
     24  *      its contributors.
     25  * 4. Neither the name of the University nor the names of its contributors
     26  *    may be used to endorse or promote products derived from this software
     27  *    without specific prior written permission.
     28  *
     29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     39  * SUCH DAMAGE.
     40  *
     41  *	@(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
     42  * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
     43  *
     44  *
     45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
     46  * All rights reserved.
     47  *
     48  * Permission to use, copy, modify and distribute this software and
     49  * its documentation is hereby granted, provided that both the copyright
     50  * notice and this permission notice appear in all copies of the
     51  * software, derivative works or modified versions, and any portions
     52  * thereof, and that both notices appear in supporting documentation.
     53  *
     54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     57  *
     58  * Carnegie Mellon requests users of this software to return to
     59  *
     60  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     61  *  School of Computer Science
     62  *  Carnegie Mellon University
     63  *  Pittsburgh PA 15213-3890
     64  *
     65  * any improvements or extensions that they make and grant Carnegie the
     66  * rights to redistribute these changes.
     67  */
     68 
     69 /*
     70  * uvm_pdaemon.c: the page daemon
     71  */
     72 
     73 #include <sys/cdefs.h>
     74 __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.93.4.2.4.12 2012/04/14 00:49:35 matt Exp $");
     75 
     76 #include "opt_uvmhist.h"
     77 #include "opt_readahead.h"
     78 
     79 #include <sys/param.h>
     80 #include <sys/proc.h>
     81 #include <sys/systm.h>
     82 #include <sys/kernel.h>
     83 #include <sys/pool.h>
     84 #include <sys/buf.h>
     85 #include <sys/atomic.h>
     86 
     87 #include <uvm/uvm.h>
     88 #include <uvm/uvm_pdpolicy.h>
     89 
     90 /*
     91  * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
     92  * in a pass thru the inactive list when swap is full.  the value should be
     93  * "small"... if it's too large we'll cycle the active pages thru the inactive
     94  * queue too quickly to for them to be referenced and avoid being freed.
     95  */
     96 
     97 #define	UVMPD_NUMDIRTYREACTS	16
     98 
     99 #define	UVMPD_NUMTRYLOCKOWNER	16
    100 
    101 /*
    102  * local prototypes
    103  */
    104 
    105 static bool	uvmpd_scan(struct uvm_pggroup *);
    106 static void	uvmpd_scan_queue(struct uvm_pggroup *);
    107 static void	uvmpd_tune(void);
    108 
    109 static void	uvmpd_checkgroup(const struct uvm_pggroup *);
    110 
    111 static struct uvm_pdinfo {
    112 	unsigned int pd_waiters;
    113 	unsigned int pd_scans_neededs;
    114 	struct uvm_pggrouplist pd_pagingq;
    115 	struct uvm_pggrouplist pd_pendingq;
    116 	bool pd_stalled;
    117 } uvm_pdinfo =  {
    118 	.pd_pagingq = TAILQ_HEAD_INITIALIZER(uvm_pdinfo.pd_pagingq),
    119 	.pd_pendingq = TAILQ_HEAD_INITIALIZER(uvm_pdinfo.pd_pendingq),
    120 };
    121 
    122 /*
    123  * XXX hack to avoid hangs when large processes fork.
    124  */
    125 u_int uvm_extrapages;
    126 
    127 /*
    128  * uvm_wait: wait (sleep) for the page daemon to free some pages
    129  *
    130  * => should be called with all locks released
    131  * => should _not_ be called by the page daemon (to avoid deadlock)
    132  */
    133 
    134 void
    135 uvm_wait(const char *wmsg)
    136 {
    137 	int timo = 0;
    138 
    139 	mutex_spin_enter(&uvm_fpageqlock);
    140 
    141 	/*
    142 	 * check for page daemon going to sleep (waiting for itself)
    143 	 */
    144 
    145 	if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
    146 		/*
    147 		 * now we have a problem: the pagedaemon wants to go to
    148 		 * sleep until it frees more memory.   but how can it
    149 		 * free more memory if it is asleep?  that is a deadlock.
    150 		 * we have two options:
    151 		 *  [1] panic now
    152 		 *  [2] put a timeout on the sleep, thus causing the
    153 		 *      pagedaemon to only pause (rather than sleep forever)
    154 		 *
    155 		 * note that option [2] will only help us if we get lucky
    156 		 * and some other process on the system breaks the deadlock
    157 		 * by exiting or freeing memory (thus allowing the pagedaemon
    158 		 * to continue).  for now we panic if DEBUG is defined,
    159 		 * otherwise we hope for the best with option [2] (better
    160 		 * yet, this should never happen in the first place!).
    161 		 */
    162 
    163 		printf("pagedaemon: deadlock detected!\n");
    164 		timo = hz >> 3;		/* set timeout */
    165 #if defined(DEBUG)
    166 		/* DEBUG: panic so we can debug it */
    167 		panic("pagedaemon deadlock");
    168 #endif
    169 	}
    170 
    171 	uvm_pdinfo.pd_waiters++;
    172 	if (!uvm_pdinfo.pd_stalled)
    173 		wakeup(&uvm.pagedaemon);		/* wake the daemon! */
    174 	UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo);
    175 	uvm_pdinfo.pd_waiters--;
    176 }
    177 
    178 
    179 static void
    180 uvmpd_checkgroup(const struct uvm_pggroup *grp)
    181 {
    182 #ifdef DEBUG
    183 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    184 	bool in_pendingq = false;
    185 	bool in_pagingq = false;
    186 	const struct uvm_pggroup *tstgrp;
    187 
    188 	TAILQ_FOREACH(tstgrp, &pdinfo->pd_pendingq, pgrp_pending_link) {
    189 		if (tstgrp == grp) {
    190 			in_pendingq = true;
    191 			break;
    192 		}
    193 	}
    194 
    195 	TAILQ_FOREACH(tstgrp, &pdinfo->pd_pagingq, pgrp_paging_link) {
    196 		if (tstgrp == grp) {
    197 			in_pagingq = true;
    198 			break;
    199 		}
    200 	}
    201 
    202 	if (grp->pgrp_paging > 0) {
    203 		KASSERT(in_pagingq);
    204 		KASSERT(!in_pendingq);
    205 	} else {
    206 		KASSERT(!in_pagingq);
    207 		KASSERT(in_pendingq == grp->pgrp_scan_needed);
    208 	}
    209 #endif
    210 }
    211 
    212 /*
    213  * uvm_kick_pdaemon: perform checks to determine if we need to
    214  * give the pagedaemon a nudge, and do so if necessary.
    215  *
    216  * => called with uvm_fpageqlock held.
    217  */
    218 
    219 void
    220 uvm_kick_pdaemon(void)
    221 {
    222 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    223 	bool need_wakeup = false;
    224 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);
    225 
    226 	KASSERT(mutex_owned(&uvm_fpageqlock));
    227 
    228 	struct uvm_pggroup *grp;
    229 	STAILQ_FOREACH(grp, &uvm.page_groups, pgrp_uvm_link) {
    230 		const bool prev_scan_needed = grp->pgrp_scan_needed;
    231 
    232 		KASSERT(grp->pgrp_npages > 0);
    233 		uvmpd_checkgroup(grp);
    234 
    235 		grp->pgrp_scan_needed =
    236 		    grp->pgrp_free + grp->pgrp_paging < grp->pgrp_freemin
    237 		    || (grp->pgrp_free + grp->pgrp_paging < grp->pgrp_freetarg
    238 			&& uvmpdpol_needsscan_p(grp));
    239 
    240 		if (prev_scan_needed != grp->pgrp_scan_needed) {
    241 			UVMHIST_LOG(pdhist, " [%zd] %d->%d (scan=%d)",
    242 			    grp - uvm.pggroups, prev_scan_needed,
    243 			    grp->pgrp_scan_needed, uvmpdpol_needsscan_p(grp));
    244 			UVMHIST_LOG(pdhist, " [%zd] %d < min(%d,%d)",
    245 			    grp - uvm.pggroups,
    246 			    grp->pgrp_free + grp->pgrp_paging,
    247 			    grp->pgrp_freemin, grp->pgrp_freetarg);
    248 		}
    249 
    250 		if (prev_scan_needed != grp->pgrp_scan_needed) {
    251 			if (grp->pgrp_scan_needed) {
    252 				struct uvm_pggroup *prev;
    253 				TAILQ_FOREACH(prev, &pdinfo->pd_pendingq,
    254 				    pgrp_pending_link) {
    255 					if (grp->pgrp_free < prev->pgrp_free)
    256 						break;
    257 				}
    258 				if (prev == NULL) {
    259 					TAILQ_INSERT_TAIL(&pdinfo->pd_pendingq,
    260 					    grp, pgrp_pending_link);
    261 				} else {
    262 					TAILQ_INSERT_BEFORE(prev, grp,
    263 					    pgrp_pending_link);
    264 				}
    265 				need_wakeup = true;
    266 			} else {
    267 				TAILQ_REMOVE(&pdinfo->pd_pendingq,
    268 				    grp, pgrp_pending_link);
    269 			}
    270 			uvmpd_checkgroup(grp);
    271 		}
    272 	}
    273 
    274 	const bool stalled = pdinfo->pd_stalled;
    275 	if (need_wakeup && !stalled)
    276 		wakeup(&uvm.pagedaemon);
    277 
    278 	UVMHIST_LOG(pdhist, " <- done: wakeup=%d stalled=%d!",
    279 	    need_wakeup, stalled, 0, 0);
    280 }
    281 
    282 /*
    283  * uvmpd_tune: tune paging parameters
    284  *
    285  * => called when ever memory is added (or removed?) to the system
    286  * => caller must call with page queues locked
    287  */
    288 
    289 static void
    290 uvmpd_tune(void)
    291 {
    292 	u_int extrapages = atomic_swap_uint(&uvm_extrapages, 0) / uvmexp.ncolors;
    293 	u_int freemin = 0;
    294 	u_int freetarg = 0;
    295 	u_int wiredmax = 0;
    296 
    297 	UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
    298 
    299 	extrapages = roundup(extrapages, uvmexp.npggroups);
    300 
    301 	struct uvm_pggroup *grp;
    302 	STAILQ_FOREACH(grp, &uvm.page_groups, pgrp_uvm_link) {
    303 		KASSERT(grp->pgrp_npages > 0);
    304 
    305 		/*
    306 		 * try to keep 0.5% of available RAM free, but limit
    307 		 * to between 128k and 1024k per-CPU.
    308 		 * XXX: what are these values good for?
    309 		 */
    310 		u_int val = grp->pgrp_npages / 200;
    311 		val = MAX(val, (128*1024) >> PAGE_SHIFT);
    312 		val = MIN(val, (1024*1024) >> PAGE_SHIFT);
    313 		val *= ncpu;
    314 
    315 		/* Make sure there's always a user page free. */
    316 		if (val * uvmexp.npggroups <= uvmexp.reserve_kernel)
    317 			val = uvmexp.reserve_kernel / uvmexp.npggroups + 1;
    318 
    319 		grp->pgrp_freemin = val;
    320 
    321 		/* Calculate freetarg. */
    322 		val = (grp->pgrp_freemin * 4) / 3;
    323 		if (val <= grp->pgrp_freemin)
    324 			val = grp->pgrp_freemin + 1;
    325 #ifdef VM_FREELIST_NORMALOK_P
    326 		if (!VM_FREELIST_NORMALOK_P(grp->pgrp_free_list))
    327 			val *= 4;
    328 #endif
    329 		grp->pgrp_freetarg = val + extrapages / uvmexp.npggroups;
    330 		if (grp->pgrp_freetarg > grp->pgrp_npages / 2)
    331 			grp->pgrp_freetarg = grp->pgrp_npages / 2;
    332 
    333 		grp->pgrp_wiredmax = grp->pgrp_npages / 3;
    334 		UVMHIST_LOG(pdhist,
    335 		    "[%zd]: freemin=%d, freetarg=%d, wiredmax=%d",
    336 		    grp - uvm.pggroups, grp->pgrp_freemin, grp->pgrp_freetarg,
    337 		    grp->pgrp_wiredmax);
    338 
    339 		freemin += grp->pgrp_freemin;
    340 		freetarg += grp->pgrp_freetarg;
    341 		wiredmax += grp->pgrp_wiredmax;
    342 	}
    343 
    344 	uvmexp.freemin = freemin;
    345 	uvmexp.freetarg = freetarg;
    346 	uvmexp.wiredmax = wiredmax;
    347 
    348 	UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
    349 	    uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
    350 }
    351 
    352 /*
    353  * uvm_pageout: the main loop for the pagedaemon
    354  */
    355 
    356 void
    357 uvm_pageout(void *arg)
    358 {
    359 	u_int npages = 0;
    360 	u_int extrapages = 0;
    361 	u_int npggroups = 0;
    362 	struct pool *pp;
    363 	uint64_t where;
    364 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    365 	bool progress = true;
    366 	UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
    367 
    368 	UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
    369 
    370 	/*
    371 	 * ensure correct priority and set paging parameters...
    372 	 */
    373 
    374 	uvm.pagedaemon_lwp = curlwp;
    375 	mutex_enter(&uvm_pageqlock);
    376 	npages = uvmexp.npages;
    377 	uvmpd_tune();
    378 	mutex_exit(&uvm_pageqlock);
    379 
    380 	/*
    381 	 * main loop
    382 	 */
    383 
    384 	for (;;) {
    385 		struct uvm_pggroup *grp;
    386 		bool need_free = false;
    387 		u_int bufcnt = 0;
    388 
    389 		mutex_spin_enter(&uvm_fpageqlock);
    390 		/*
    391 		 * If we have no one waiting or all color requests have
    392 		 * active paging, then wait.
    393 		 */
    394 		if (progress == false
    395 		    || (pdinfo->pd_waiters == 0
    396 		        && TAILQ_FIRST(&pdinfo->pd_pendingq) == NULL)) {
    397 			UVMHIST_LOG(pdhist,"  <<SLEEPING>>",0,0,0,0);
    398 			pdinfo->pd_stalled = !progress
    399 			    && pdinfo->pd_waiters > 0;
    400 			int timo = (pdinfo->pd_stalled ? 2 * hz : 0);
    401 			UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
    402 			    &uvm_fpageqlock, false, "pgdaemon", timo);
    403 			uvmexp.pdwoke++;
    404 			UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
    405 			pdinfo->pd_stalled = false;
    406 			progress = false;
    407 		} else if (TAILQ_FIRST(&pdinfo->pd_pendingq) == NULL) {
    408 			/*
    409 			 * Someone is waiting but no group are pending.
    410 			 * Let's kick ourselves to find groups that need work.
    411 			 */
    412 			uvm_kick_pdaemon();
    413 			mutex_spin_exit(&uvm_fpageqlock);
    414 		} else {
    415 			mutex_spin_exit(&uvm_fpageqlock);
    416 		}
    417 
    418 		/*
    419 		 * now lock page queues and recompute inactive count
    420 		 */
    421 
    422 		mutex_enter(&uvm_pageqlock);
    423 		mutex_spin_enter(&uvm_fpageqlock);
    424 
    425 		if (npages != uvmexp.npages
    426 		    || extrapages != uvm_extrapages
    427 		    || npggroups != uvmexp.npggroups) {
    428 			npages = uvmexp.npages;
    429 			extrapages = uvm_extrapages;
    430 			npggroups = uvmexp.npggroups;
    431 			uvmpd_tune();
    432 		}
    433 
    434 		/*
    435 		 * Estimate a hint.  Note that bufmem are returned to
    436 		 * system only when entire pool page is empty.
    437 		 */
    438 		bool need_wakeup = false;
    439 		while ((grp = TAILQ_FIRST(&pdinfo->pd_pendingq)) != NULL) {
    440 			KASSERT(grp->pgrp_npages > 0);
    441 
    442 			uvmpdpol_tune(grp);
    443 
    444 			/*
    445 			 * While we are locked, remove this from the pendingq.
    446 			 */
    447 			uvmpd_checkgroup(grp);
    448 			KASSERT(grp->pgrp_scan_needed);
    449 			TAILQ_REMOVE(&pdinfo->pd_pendingq, grp,
    450 			    pgrp_pending_link);
    451 			grp->pgrp_scan_needed = false;
    452 			uvmpd_checkgroup(grp);
    453 
    454 			int diff = grp->pgrp_freetarg - grp->pgrp_free;
    455 			if (diff < 0)
    456 				diff = 0;
    457 
    458 			bufcnt += diff;
    459 
    460 			UVMHIST_LOG(pdhist," [%zu]: "
    461 			    "free/ftarg/fmin=%u/%u/%u",
    462 			    grp - uvm.pggroups, grp->pgrp_free,
    463 			    grp->pgrp_freetarg, grp->pgrp_freemin);
    464 
    465 
    466 			if (grp->pgrp_paging < diff)
    467 				need_free = true;
    468 
    469 			/*
    470 			 * scan if needed
    471 			 */
    472 			bool local_progress = false;
    473 			if (grp->pgrp_paging < diff
    474 			    || uvmpdpol_needsscan_p(grp)) {
    475 				mutex_spin_exit(&uvm_fpageqlock);
    476 				if (uvmpd_scan(grp)) {
    477 					progress = true;
    478 					local_progress = true;
    479 				}
    480 				mutex_spin_enter(&uvm_fpageqlock);
    481 			} else {
    482 				UVMHIST_LOG(pdhist,
    483 				    " [%zu]: diff/paging=%u/%u: "
    484 				    "scan skipped",
    485 				    grp - uvm.pggroups, diff,
    486 				    grp->pgrp_paging, 0);
    487 			}
    488 
    489 			/*
    490 			 * if there's any free memory to be had for this group,
    491 			 * wake up any waiters but only if we made progress for
    492 			 * this group.
    493 			 */
    494 			if (grp->pgrp_free * uvmexp.npggroups > uvmexp.reserve_kernel
    495 			    || (local_progress && grp->pgrp_paging == 0)) {
    496 				need_wakeup = true;
    497 			}
    498 
    499 		}
    500 		if (need_wakeup) {
    501 			wakeup(&uvmexp.free);
    502 		}
    503 		KASSERT(!need_free || need_wakeup);
    504 		mutex_spin_exit(&uvm_fpageqlock);
    505 
    506 		/*
    507 		 * scan done.  unlock page queues (the only lock
    508 		 * we are holding)
    509 		 */
    510 		mutex_exit(&uvm_pageqlock);
    511 
    512 		/*
    513 		 * if we don't need free memory, we're done.
    514 		 */
    515 
    516 		if (!need_free)
    517 			continue;
    518 
    519 		/*
    520 		 * start draining pool resources now that we're not
    521 		 * holding any locks.
    522 		 */
    523 		pool_drain_start(&pp, &where);
    524 
    525 		/*
    526 		 * kill unused metadata buffers.
    527 		 */
    528 		if (bufcnt > 0) {
    529 			mutex_enter(&bufcache_lock);
    530 			buf_drain(bufcnt << PAGE_SHIFT);
    531 			mutex_exit(&bufcache_lock);
    532 		}
    533 
    534 		/*
    535 		 * complete draining the pools.
    536 		 */
    537 		pool_drain_end(pp, where);
    538 	}
    539 	/*NOTREACHED*/
    540 }
    541 
    542 
    543 /*
    544  * uvm_aiodone_worker: a workqueue callback for the aiodone daemon.
    545  */
    546 
    547 void
    548 uvm_aiodone_worker(struct work *wk, void *dummy)
    549 {
    550 	struct buf *bp = (void *)wk;
    551 
    552 	KASSERT(&bp->b_work == wk);
    553 
    554 	/*
    555 	 * process an i/o that's done.
    556 	 */
    557 
    558 	(*bp->b_iodone)(bp);
    559 }
    560 
    561 void
    562 uvm_pageout_start(struct uvm_pggroup *grp, u_int npages)
    563 {
    564 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    565 
    566 	mutex_spin_enter(&uvm_fpageqlock);
    567 
    568 	uvmpd_checkgroup(grp);
    569 	uvmexp.paging += npages;
    570 	if (grp->pgrp_paging == 0) {
    571 		/*
    572 		 * If the group is in a paging queue, it can't be in a pending
    573 		 * queue so remove it if it is.
    574 		 */
    575 		if (grp->pgrp_scan_needed) {
    576 			TAILQ_REMOVE(&pdinfo->pd_pendingq, grp,
    577 			    pgrp_pending_link);
    578 			grp->pgrp_scan_needed = false;
    579 		}
    580 		TAILQ_INSERT_TAIL(&pdinfo->pd_pagingq, grp, pgrp_paging_link);
    581 	}
    582 	grp->pgrp_paging += npages;
    583 	uvmpd_checkgroup(grp);
    584 	mutex_spin_exit(&uvm_fpageqlock);
    585 }
    586 
    587 void
    588 uvm_pageout_done(struct vm_page *pg, bool freed)
    589 {
    590 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    591 
    592 	KASSERT(pg->flags & PG_PAGEOUT);
    593 
    594 	mutex_spin_enter(&uvm_fpageqlock);
    595 	struct uvm_pggroup * const grp = uvm_page_to_pggroup(pg);
    596 
    597 	KASSERT(grp->pgrp_paging > 0);
    598 	uvmpd_checkgroup(grp);
    599 	if (--grp->pgrp_paging == 0) {
    600 		TAILQ_REMOVE(&pdinfo->pd_pagingq, grp, pgrp_paging_link);
    601 		uvmpd_checkgroup(grp);
    602 	}
    603 	KASSERT(uvmexp.paging > 0);
    604 	uvmexp.paging--;
    605 	grp->pgrp_pdfreed += freed;
    606 
    607 	/*
    608 	 * Page is no longer being paged out.
    609 	 */
    610 	pg->flags &= ~PG_PAGEOUT;
    611 
    612 	/*
    613 	 * wake up either of pagedaemon or LWPs waiting for it.
    614 	 */
    615 	if (grp->pgrp_free * uvmexp.npggroups <= uvmexp.reserve_kernel) {
    616 		wakeup(&uvm.pagedaemon);
    617 	} else {
    618 		wakeup(&uvmexp.free);
    619 	}
    620 
    621 	mutex_spin_exit(&uvm_fpageqlock);
    622 }
    623 
    624 /*
    625  * uvmpd_trylockowner: trylock the page's owner.
    626  *
    627  * => called with pageq locked.
    628  * => resolve orphaned O->A loaned page.
    629  * => return the locked mutex on success.  otherwise, return NULL.
    630  */
    631 
    632 kmutex_t *
    633 uvmpd_trylockowner(struct vm_page *pg)
    634 {
    635 	struct uvm_object *uobj = pg->uobject;
    636 	kmutex_t *slock;
    637 
    638 	KASSERT(mutex_owned(&uvm_pageqlock));
    639 
    640 	if (uobj != NULL) {
    641 		slock = &uobj->vmobjlock;
    642 	} else {
    643 		struct vm_anon *anon = pg->uanon;
    644 
    645 		KASSERT(anon != NULL);
    646 		slock = &anon->an_lock;
    647 	}
    648 
    649 	if (!mutex_tryenter(slock)) {
    650 		return NULL;
    651 	}
    652 
    653 	if (uobj == NULL) {
    654 
    655 		/*
    656 		 * set PQ_ANON if it isn't set already.
    657 		 */
    658 
    659 		if ((pg->pqflags & PQ_ANON) == 0) {
    660 			KASSERT(pg->loan_count > 0);
    661 			pg->loan_count--;
    662 			pg->pqflags |= PQ_ANON;
    663 			/* anon now owns it */
    664 		}
    665 	}
    666 
    667 	return slock;
    668 }
    669 
    670 #if defined(VMSWAP)
    671 struct swapcluster {
    672 	int swc_slot;
    673 	int swc_nallocated;
    674 	int swc_nused;
    675 	struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
    676 };
    677 
    678 static void
    679 swapcluster_init(struct swapcluster *swc)
    680 {
    681 
    682 	swc->swc_slot = 0;
    683 	swc->swc_nused = 0;
    684 }
    685 
    686 static int
    687 swapcluster_allocslots(struct swapcluster *swc)
    688 {
    689 	int slot;
    690 	int npages;
    691 
    692 	if (swc->swc_slot != 0) {
    693 		return 0;
    694 	}
    695 
    696 	/* Even with strange MAXPHYS, the shift
    697 	   implicitly rounds down to a page. */
    698 	npages = MAXPHYS >> PAGE_SHIFT;
    699 	slot = uvm_swap_alloc(&npages, true);
    700 	if (slot == 0) {
    701 		return ENOMEM;
    702 	}
    703 	swc->swc_slot = slot;
    704 	swc->swc_nallocated = npages;
    705 	swc->swc_nused = 0;
    706 
    707 	return 0;
    708 }
    709 
    710 static int
    711 swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
    712 {
    713 	int slot;
    714 	struct uvm_object *uobj;
    715 
    716 	KASSERT(swc->swc_slot != 0);
    717 	KASSERT(swc->swc_nused < swc->swc_nallocated);
    718 	KASSERT((pg->pqflags & PQ_SWAPBACKED) != 0);
    719 
    720 	slot = swc->swc_slot + swc->swc_nused;
    721 	uobj = pg->uobject;
    722 	if (uobj == NULL) {
    723 		KASSERT(mutex_owned(&pg->uanon->an_lock));
    724 		pg->uanon->an_swslot = slot;
    725 	} else {
    726 		int result;
    727 
    728 		KASSERT(mutex_owned(&uobj->vmobjlock));
    729 		result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
    730 		if (result == -1) {
    731 			return ENOMEM;
    732 		}
    733 	}
    734 	swc->swc_pages[swc->swc_nused] = pg;
    735 	swc->swc_nused++;
    736 
    737 	return 0;
    738 }
    739 
    740 static void
    741 swapcluster_flush(struct uvm_pggroup *grp, struct swapcluster *swc, bool now)
    742 {
    743 	int slot;
    744 	u_int nused;
    745 	int nallocated;
    746 	int error;
    747 
    748 	if (swc->swc_slot == 0) {
    749 		return;
    750 	}
    751 	KASSERT(swc->swc_nused <= swc->swc_nallocated);
    752 
    753 	slot = swc->swc_slot;
    754 	nused = swc->swc_nused;
    755 	nallocated = swc->swc_nallocated;
    756 
    757 	/*
    758 	 * if this is the final pageout we could have a few
    759 	 * unused swap blocks.  if so, free them now.
    760 	 */
    761 
    762 	if (nused < nallocated) {
    763 		if (!now) {
    764 			return;
    765 		}
    766 		uvm_swap_free(slot + nused, nallocated - nused);
    767 	}
    768 
    769 	/*
    770 	 * now start the pageout.
    771 	 */
    772 
    773 	if (nused > 0) {
    774 		grp->pgrp_pdpageouts++;
    775 		uvmexp.pdpageouts++;	/* procfs */
    776 		uvm_pageout_start(grp, nused);
    777 		error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
    778 		KASSERT(error == 0 || error == ENOMEM);
    779 	}
    780 
    781 	/*
    782 	 * zero swslot to indicate that we are
    783 	 * no longer building a swap-backed cluster.
    784 	 */
    785 
    786 	swc->swc_slot = 0;
    787 	swc->swc_nused = 0;
    788 }
    789 
    790 static int
    791 swapcluster_nused(struct swapcluster *swc)
    792 {
    793 
    794 	return swc->swc_nused;
    795 }
    796 
    797 /*
    798  * uvmpd_dropswap: free any swap allocated to this page.
    799  *
    800  * => called with owner locked.
    801  * => return true if a page had an associated slot.
    802  */
    803 
    804 static bool
    805 uvmpd_dropswap(struct vm_page *pg)
    806 {
    807 	bool result = false;
    808 	struct vm_anon *anon = pg->uanon;
    809 
    810 	if ((pg->pqflags & PQ_ANON) && anon->an_swslot) {
    811 		uvm_swap_free(anon->an_swslot, 1);
    812 		anon->an_swslot = 0;
    813 		pg->flags &= ~PG_CLEAN;
    814 		result = true;
    815 	} else if (pg->pqflags & PQ_AOBJ) {
    816 		int slot = uao_set_swslot(pg->uobject,
    817 		    pg->offset >> PAGE_SHIFT, 0);
    818 		if (slot) {
    819 			uvm_swap_free(slot, 1);
    820 			pg->flags &= ~PG_CLEAN;
    821 			result = true;
    822 		}
    823 	}
    824 
    825 	return result;
    826 }
    827 
    828 /*
    829  * uvmpd_trydropswap: try to free any swap allocated to this page.
    830  *
    831  * => return true if a slot is successfully freed.
    832  */
    833 
    834 bool
    835 uvmpd_trydropswap(struct vm_page *pg)
    836 {
    837 	kmutex_t *slock;
    838 	bool result;
    839 
    840 	if ((pg->flags & PG_BUSY) != 0) {
    841 		return false;
    842 	}
    843 
    844 	/*
    845 	 * lock the page's owner.
    846 	 */
    847 
    848 	slock = uvmpd_trylockowner(pg);
    849 	if (slock == NULL) {
    850 		return false;
    851 	}
    852 
    853 	/*
    854 	 * skip this page if it's busy.
    855 	 */
    856 
    857 	if ((pg->flags & PG_BUSY) != 0) {
    858 		mutex_exit(slock);
    859 		return false;
    860 	}
    861 
    862 	result = uvmpd_dropswap(pg);
    863 
    864 	mutex_exit(slock);
    865 
    866 	return result;
    867 }
    868 
    869 #endif /* defined(VMSWAP) */
    870 
    871 /*
    872  * uvmpd_scan_queue: scan an replace candidate list for pages
    873  * to clean or free.
    874  *
    875  * => called with page queues locked
    876  * => we work on meeting our free target by converting inactive pages
    877  *    into free pages.
    878  * => we handle the building of swap-backed clusters
    879  */
    880 
    881 static void
    882 uvmpd_scan_queue(struct uvm_pggroup *grp)
    883 {
    884 	struct vm_page *pg;
    885 	struct uvm_object *uobj;
    886 	struct vm_anon *anon;
    887 #if defined(VMSWAP)
    888 	struct swapcluster swc;
    889 #endif /* defined(VMSWAP) */
    890 	u_int dirtyreacts;
    891 	u_int lockownerfail;
    892 	u_int victims;
    893 	u_int freed;
    894 	u_int busy;
    895 	kmutex_t *slock;
    896 	UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
    897 
    898 	/*
    899 	 * swslot is non-zero if we are building a swap cluster.  we want
    900 	 * to stay in the loop while we have a page to scan or we have
    901 	 * a swap-cluster to build.
    902 	 */
    903 
    904 #if defined(VMSWAP)
    905 	swapcluster_init(&swc);
    906 #endif /* defined(VMSWAP) */
    907 
    908 	dirtyreacts = 0;
    909 	lockownerfail = 0;
    910 	victims = 0;
    911 	freed = 0;
    912 	busy = 0;
    913 	uvmpdpol_scaninit(grp);
    914 
    915 	UVMHIST_LOG(pdhist,"  [%zd]: want free target (%u)",
    916 	    grp - uvm.pggroups, grp->pgrp_freetarg << 2, 0, 0);
    917 	while (/* CONSTCOND */ 1) {
    918 
    919 		/*
    920 		 * see if we've met the free target.
    921 		 */
    922 
    923 		if (grp->pgrp_free + grp->pgrp_paging
    924 #if defined(VMSWAP)
    925 		    + swapcluster_nused(&swc)
    926 #endif /* defined(VMSWAP) */
    927 		    >= grp->pgrp_freetarg << 2 ||
    928 		    dirtyreacts == UVMPD_NUMDIRTYREACTS) {
    929 			UVMHIST_LOG(pdhist,"  [%zd]: met free target (%u + %u)"
    930 			    ", dirty reacts %u",
    931 			    grp - uvm.pggroups, grp->pgrp_free,
    932 			    grp->pgrp_paging, dirtyreacts);
    933 			break;
    934 		}
    935 
    936 		pg = uvmpdpol_selectvictim(grp);
    937 		if (pg == NULL) {
    938 			UVMHIST_LOG(pdhist,"  [%zd]: selectvictim didn't",
    939 			    grp - uvm.pggroups, 0, 0, 0);
    940 			break;
    941 		}
    942 		victims++;
    943 		KASSERT(uvmpdpol_pageisqueued_p(pg));
    944 		KASSERT(pg->wire_count == 0);
    945 
    946 		/*
    947 		 * we are below target and have a new page to consider.
    948 		 */
    949 
    950 		anon = pg->uanon;
    951 		uobj = pg->uobject;
    952 
    953 		/*
    954 		 * first we attempt to lock the object that this page
    955 		 * belongs to.  if our attempt fails we skip on to
    956 		 * the next page (no harm done).  it is important to
    957 		 * "try" locking the object as we are locking in the
    958 		 * wrong order (pageq -> object) and we don't want to
    959 		 * deadlock.
    960 		 *
    961 		 * the only time we expect to see an ownerless page
    962 		 * (i.e. a page with no uobject and !PQ_ANON) is if an
    963 		 * anon has loaned a page from a uvm_object and the
    964 		 * uvm_object has dropped the ownership.  in that
    965 		 * case, the anon can "take over" the loaned page
    966 		 * and make it its own.
    967 		 */
    968 
    969 		slock = uvmpd_trylockowner(pg);
    970 		if (slock == NULL) {
    971 			/*
    972 			 * yield cpu to make a chance for an LWP holding
    973 			 * the lock run.  otherwise we can busy-loop too long
    974 			 * if the page queue is filled with a lot of pages
    975 			 * from few objects.
    976 			 */
    977 			lockownerfail++;
    978 			if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) {
    979 				mutex_exit(&uvm_pageqlock);
    980 				/* XXX Better than yielding but inadequate. */
    981 				kpause("livelock", false, 1, NULL);
    982 				mutex_enter(&uvm_pageqlock);
    983 				lockownerfail = 0;
    984 			}
    985 			continue;
    986 		}
    987 		if (pg->flags & PG_BUSY) {
    988 			mutex_exit(slock);
    989 			busy++;
    990 			continue;
    991 		}
    992 
    993 		/* does the page belong to an object? */
    994 		if (uobj != NULL) {
    995 			grp->pgrp_pdobscan++;
    996 		} else {
    997 #if defined(VMSWAP)
    998 			KASSERT(anon != NULL);
    999 			grp->pgrp_pdanscan++;
   1000 #else /* defined(VMSWAP) */
   1001 			panic("%s: anon", __func__);
   1002 #endif /* defined(VMSWAP) */
   1003 		}
   1004 
   1005 
   1006 		/*
   1007 		 * we now have the object and the page queues locked.
   1008 		 * if the page is not swap-backed, call the object's
   1009 		 * pager to flush and free the page.
   1010 		 */
   1011 
   1012 #if defined(READAHEAD_STATS)
   1013 		if ((pg->pqflags & PQ_READAHEAD) != 0) {
   1014 			pg->pqflags &= ~PQ_READAHEAD;
   1015 			uvm_ra_miss.ev_count++;
   1016 		}
   1017 #endif /* defined(READAHEAD_STATS) */
   1018 
   1019 		if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
   1020 			KASSERT(uobj != NULL);
   1021 			mutex_exit(&uvm_pageqlock);
   1022 			(void) (uobj->pgops->pgo_put)(uobj, pg->offset,
   1023 			    pg->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
   1024 			grp->pgrp_pdputs++;
   1025 			mutex_enter(&uvm_pageqlock);
   1026 			continue;
   1027 		}
   1028 
   1029 		/*
   1030 		 * the page is swap-backed.  remove all the permissions
   1031 		 * from the page so we can sync the modified info
   1032 		 * without any race conditions.  if the page is clean
   1033 		 * we can free it now and continue.
   1034 		 */
   1035 
   1036 		pmap_page_protect(pg, VM_PROT_NONE);
   1037 		if ((pg->flags & PG_CLEAN) && pmap_clear_modify(pg)) {
   1038 			pg->flags &= ~(PG_CLEAN);
   1039 		}
   1040 		if (pg->flags & PG_CLEAN) {
   1041 			int slot;
   1042 			int pageidx;
   1043 
   1044 			pageidx = pg->offset >> PAGE_SHIFT;
   1045 			KASSERT(!uvmpdpol_pageisqueued_p(pg));
   1046 			uvm_pagefree(pg);
   1047 			freed++;
   1048 
   1049 			/*
   1050 			 * for anons, we need to remove the page
   1051 			 * from the anon ourselves.  for aobjs,
   1052 			 * pagefree did that for us.
   1053 			 */
   1054 
   1055 			if (anon) {
   1056 				KASSERT(anon->an_swslot != 0);
   1057 				anon->an_page = NULL;
   1058 				slot = anon->an_swslot;
   1059 			} else {
   1060 				slot = uao_find_swslot(uobj, pageidx);
   1061 			}
   1062 			mutex_exit(slock);
   1063 
   1064 			if (slot > 0) {
   1065 				/* this page is now only in swap. */
   1066 				mutex_enter(&uvm_swap_data_lock);
   1067 				KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
   1068 				uvmexp.swpgonly++;
   1069 				mutex_exit(&uvm_swap_data_lock);
   1070 			}
   1071 			continue;
   1072 		}
   1073 
   1074 #if defined(VMSWAP)
   1075 		/*
   1076 		 * this page is dirty, skip it if we'll have met our
   1077 		 * free target when all the current pageouts complete.
   1078 		 */
   1079 
   1080 		if (grp->pgrp_free + grp->pgrp_paging > grp->pgrp_freetarg << 2) {
   1081 			mutex_exit(slock);
   1082 			continue;
   1083 		}
   1084 
   1085 		/*
   1086 		 * free any swap space allocated to the page since
   1087 		 * we'll have to write it again with its new data.
   1088 		 */
   1089 
   1090 		uvmpd_dropswap(pg);
   1091 
   1092 		/*
   1093 		 * start new swap pageout cluster (if necessary).
   1094 		 *
   1095 		 * if swap is full reactivate this page so that
   1096 		 * we eventually cycle all pages through the
   1097 		 * inactive queue.
   1098 		 */
   1099 
   1100 		if (swapcluster_allocslots(&swc)) {
   1101 			dirtyreacts++;
   1102 			uvm_pageactivate(pg);
   1103 			mutex_exit(slock);
   1104 			continue;
   1105 		}
   1106 
   1107 		/*
   1108 		 * at this point, we're definitely going reuse this
   1109 		 * page.  mark the page busy and delayed-free.
   1110 		 * we should remove the page from the page queues
   1111 		 * so we don't ever look at it again.
   1112 		 * adjust counters and such.
   1113 		 */
   1114 
   1115 		pg->flags |= PG_BUSY;
   1116 		UVM_PAGE_OWN(pg, "scan_queue", NULL);
   1117 
   1118 		pg->flags |= PG_PAGEOUT;
   1119 		uvm_pagedequeue(pg);
   1120 
   1121 		grp->pgrp_pgswapout++;
   1122 		mutex_exit(&uvm_pageqlock);
   1123 
   1124 		/*
   1125 		 * add the new page to the cluster.
   1126 		 */
   1127 
   1128 		if (swapcluster_add(&swc, pg)) {
   1129 			pg->flags &= ~(PG_BUSY|PG_PAGEOUT);
   1130 			UVM_PAGE_OWN(pg, NULL, NULL);
   1131 			mutex_enter(&uvm_pageqlock);
   1132 			dirtyreacts++;
   1133 			uvm_pageactivate(pg);
   1134 			mutex_exit(slock);
   1135 			continue;
   1136 		}
   1137 		mutex_exit(slock);
   1138 
   1139 		swapcluster_flush(grp, &swc, false);
   1140 		mutex_enter(&uvm_pageqlock);
   1141 
   1142 		/*
   1143 		 * the pageout is in progress.  bump counters and set up
   1144 		 * for the next loop.
   1145 		 */
   1146 
   1147 		uvmexp.pdpending++;
   1148 #else /* defined(VMSWAP) */
   1149 		uvm_pageactivate(pg);
   1150 		mutex_exit(slock);
   1151 #endif /* defined(VMSWAP) */
   1152 	}
   1153 
   1154 	UVMHIST_LOG(pdhist,"  [%zd] <-- done: %u victims: %u freed, %u busy",
   1155 	    grp - uvm.pggroups, victims, freed, busy);
   1156 
   1157 	grp->pgrp_pdvictims += victims;
   1158 	grp->pgrp_pdnullscans += (victims == 0);
   1159 	grp->pgrp_pdfreed += freed;
   1160 	grp->pgrp_pdbusy += busy;
   1161 
   1162 #if defined(VMSWAP)
   1163 	mutex_exit(&uvm_pageqlock);
   1164 	swapcluster_flush(grp, &swc, true);
   1165 	mutex_enter(&uvm_pageqlock);
   1166 #endif /* defined(VMSWAP) */
   1167 }
   1168 
   1169 /*
   1170  * uvmpd_scan: scan the page queues and attempt to meet our targets.
   1171  *
   1172  * => called with pageq's locked
   1173  */
   1174 
   1175 static bool
   1176 uvmpd_scan(struct uvm_pggroup *grp)
   1177 {
   1178 	u_int swap_shortage, pages_freed;
   1179 	UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
   1180 
   1181 	grp->pgrp_pdrevs++;
   1182 
   1183 	/*
   1184 	 * work on meeting our targets.   first we work on our free target
   1185 	 * by converting inactive pages into free pages.  then we work on
   1186 	 * meeting our inactive target by converting active pages to
   1187 	 * inactive ones.
   1188 	 */
   1189 
   1190 	UVMHIST_LOG(pdhist, "  starting 'free' loop",0,0,0,0);
   1191 
   1192 	pages_freed = grp->pgrp_pdfreed;
   1193 	uvmpd_scan_queue(grp);
   1194 	pages_freed = grp->pgrp_pdfreed - pages_freed;
   1195 
   1196 	/*
   1197 	 * detect if we're not going to be able to page anything out
   1198 	 * until we free some swap resources from active pages.
   1199 	 */
   1200 
   1201 	swap_shortage = 0;
   1202 	if (pages_freed == 0
   1203 	    && grp->pgrp_free < grp->pgrp_freetarg
   1204 	    && uvmexp.swpginuse >= uvmexp.swpgavail
   1205 	    && !uvm_swapisfull()) {
   1206 		swap_shortage = grp->pgrp_freetarg - grp->pgrp_free;
   1207 	}
   1208 
   1209 	uvmpdpol_balancequeue(grp, swap_shortage);
   1210 
   1211 	/*
   1212 	 * swap out some processes if we are still below the minimum
   1213 	 * free target.  we need to unlock the page queues for this.
   1214 	 */
   1215 
   1216 #ifdef VMSWAP
   1217 	if (grp->pgrp_free < grp->pgrp_freemin
   1218 	    && uvmexp.nswapdev != 0 && uvm.swapout_enabled) {
   1219 		grp->pgrp_pdswout++;
   1220 		UVMHIST_LOG(pdhist,"  free %d < min %d: swapout",
   1221 		    uvmexp.free, uvmexp.freemin, 0, 0);
   1222 		mutex_exit(&uvm_pageqlock);
   1223 		uvm_swapout_threads();
   1224 		mutex_enter(&uvm_pageqlock);
   1225 
   1226 	}
   1227 #endif /* VMSWAP */
   1228 
   1229 	return pages_freed != 0;
   1230 }
   1231 
   1232 /*
   1233  * uvm_reclaimable: decide whether to wait for pagedaemon.
   1234  *
   1235  * => return true if it seems to be worth to do uvm_wait.
   1236  *
   1237  * XXX should be tunable.
   1238  * XXX should consider pools, etc?
   1239  */
   1240 
   1241 bool
   1242 uvm_reclaimable(u_int color, bool kmem_p)
   1243 {
   1244 	KASSERT(color < uvmexp.ncolors);
   1245 
   1246 	/*
   1247 	 * if swap is not full, no problem.
   1248 	 */
   1249 #ifdef VMSWAP
   1250 	if (!uvm_swapisfull()) {
   1251 		KASSERT(uvmexp.nswapdev > 0);
   1252 		return true;
   1253 	}
   1254 #endif
   1255 
   1256 	/*
   1257 	 * file-backed pages can be reclaimed even when swap is full.
   1258 	 * if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
   1259 	 *
   1260 	 * XXX assume the worst case, ie. all wired pages are file-backed.
   1261 	 *
   1262 	 * XXX should consider about other reclaimable memory.
   1263 	 * XXX ie. pools, traditional buffer cache.
   1264 	 */
   1265 	u_int active = 0;
   1266 	u_int inactive = 0;
   1267 	u_int filepages = 0;
   1268 	u_int npages = 0;
   1269 	for (u_int lcv = 0; lcv < VM_NFREELIST; lcv++) {
   1270 		struct uvm_pggroup * const grp =
   1271 		    uvm.page_free[color].pgfl_pggroups[lcv];
   1272 
   1273 #ifdef VM_FREELIST_NORMALOK_P
   1274 		/*
   1275 		 * If this for kmem and it's a normal freelist, skip it.
   1276 		 */
   1277 		if (kmem_p && VM_FREELIST_NORMALOK_P(lcv))
   1278 			continue;
   1279 #endif
   1280 
   1281 		npages += grp->pgrp_npages;
   1282 		filepages += grp->pgrp_filepages + grp->pgrp_execpages;
   1283 		uvm_estimatepageable(grp, &active, &inactive);
   1284 	}
   1285 	filepages -= uvmexp.wired;
   1286 	/*
   1287 	 *
   1288 	 */
   1289 	if (filepages >= MIN((active + inactive) >> 4, npages / 25)) {
   1290 		return true;
   1291 	}
   1292 
   1293 	/*
   1294 	 * kill the process, fail allocation, etc..
   1295 	 */
   1296 
   1297 	return false;
   1298 }
   1299 
   1300 void
   1301 uvm_estimatepageable(const struct uvm_pggroup *grp,
   1302 	u_int *activep, u_int *inactivep)
   1303 {
   1304 
   1305 	uvmpdpol_estimatepageable(grp, activep, inactivep);
   1306 }
   1307