Home | History | Annotate | Line # | Download | only in uvm
uvm_pdaemon.c revision 1.93.4.2.4.10
      1  1.93.4.2.4.10      matt /*	$NetBSD: uvm_pdaemon.c,v 1.93.4.2.4.10 2012/04/12 19:41:57 matt Exp $	*/
      2            1.1       mrg 
      3           1.34       chs /*
      4            1.1       mrg  * Copyright (c) 1997 Charles D. Cranor and Washington University.
      5           1.34       chs  * Copyright (c) 1991, 1993, The Regents of the University of California.
      6            1.1       mrg  *
      7            1.1       mrg  * All rights reserved.
      8            1.1       mrg  *
      9            1.1       mrg  * This code is derived from software contributed to Berkeley by
     10            1.1       mrg  * The Mach Operating System project at Carnegie-Mellon University.
     11            1.1       mrg  *
     12            1.1       mrg  * Redistribution and use in source and binary forms, with or without
     13            1.1       mrg  * modification, are permitted provided that the following conditions
     14            1.1       mrg  * are met:
     15            1.1       mrg  * 1. Redistributions of source code must retain the above copyright
     16            1.1       mrg  *    notice, this list of conditions and the following disclaimer.
     17            1.1       mrg  * 2. Redistributions in binary form must reproduce the above copyright
     18            1.1       mrg  *    notice, this list of conditions and the following disclaimer in the
     19            1.1       mrg  *    documentation and/or other materials provided with the distribution.
     20            1.1       mrg  * 3. All advertising materials mentioning features or use of this software
     21            1.1       mrg  *    must display the following acknowledgement:
     22            1.1       mrg  *	This product includes software developed by Charles D. Cranor,
     23           1.34       chs  *      Washington University, the University of California, Berkeley and
     24            1.1       mrg  *      its contributors.
     25            1.1       mrg  * 4. Neither the name of the University nor the names of its contributors
     26            1.1       mrg  *    may be used to endorse or promote products derived from this software
     27            1.1       mrg  *    without specific prior written permission.
     28            1.1       mrg  *
     29            1.1       mrg  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     30            1.1       mrg  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     31            1.1       mrg  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     32            1.1       mrg  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     33            1.1       mrg  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     34            1.1       mrg  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     35            1.1       mrg  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     36            1.1       mrg  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     37            1.1       mrg  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     38            1.1       mrg  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     39            1.1       mrg  * SUCH DAMAGE.
     40            1.1       mrg  *
     41            1.1       mrg  *	@(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
     42            1.4       mrg  * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
     43            1.1       mrg  *
     44            1.1       mrg  *
     45            1.1       mrg  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
     46            1.1       mrg  * All rights reserved.
     47           1.34       chs  *
     48            1.1       mrg  * Permission to use, copy, modify and distribute this software and
     49            1.1       mrg  * its documentation is hereby granted, provided that both the copyright
     50            1.1       mrg  * notice and this permission notice appear in all copies of the
     51            1.1       mrg  * software, derivative works or modified versions, and any portions
     52            1.1       mrg  * thereof, and that both notices appear in supporting documentation.
     53           1.34       chs  *
     54           1.34       chs  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     55           1.34       chs  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     56            1.1       mrg  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     57           1.34       chs  *
     58            1.1       mrg  * Carnegie Mellon requests users of this software to return to
     59            1.1       mrg  *
     60            1.1       mrg  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     61            1.1       mrg  *  School of Computer Science
     62            1.1       mrg  *  Carnegie Mellon University
     63            1.1       mrg  *  Pittsburgh PA 15213-3890
     64            1.1       mrg  *
     65            1.1       mrg  * any improvements or extensions that they make and grant Carnegie the
     66            1.1       mrg  * rights to redistribute these changes.
     67            1.1       mrg  */
     68            1.1       mrg 
     69            1.1       mrg /*
     70            1.1       mrg  * uvm_pdaemon.c: the page daemon
     71            1.1       mrg  */
     72           1.42     lukem 
     73           1.42     lukem #include <sys/cdefs.h>
     74  1.93.4.2.4.10      matt __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.93.4.2.4.10 2012/04/12 19:41:57 matt Exp $");
     75           1.42     lukem 
     76           1.42     lukem #include "opt_uvmhist.h"
     77           1.69      yamt #include "opt_readahead.h"
     78            1.1       mrg 
     79            1.1       mrg #include <sys/param.h>
     80            1.1       mrg #include <sys/proc.h>
     81            1.1       mrg #include <sys/systm.h>
     82            1.1       mrg #include <sys/kernel.h>
     83            1.9        pk #include <sys/pool.h>
     84           1.24       chs #include <sys/buf.h>
     85       1.93.4.2       snj #include <sys/atomic.h>
     86            1.1       mrg 
     87            1.1       mrg #include <uvm/uvm.h>
     88           1.77      yamt #include <uvm/uvm_pdpolicy.h>
     89            1.1       mrg 
     90            1.1       mrg /*
     91           1.45       wiz  * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
     92           1.14       chs  * in a pass thru the inactive list when swap is full.  the value should be
     93           1.14       chs  * "small"... if it's too large we'll cycle the active pages thru the inactive
     94           1.14       chs  * queue too quickly to for them to be referenced and avoid being freed.
     95           1.14       chs  */
     96           1.14       chs 
     97           1.89        ad #define	UVMPD_NUMDIRTYREACTS	16
     98           1.14       chs 
     99           1.89        ad #define	UVMPD_NUMTRYLOCKOWNER	16
    100           1.14       chs 
    101           1.14       chs /*
    102            1.1       mrg  * local prototypes
    103            1.1       mrg  */
    104            1.1       mrg 
    105  1.93.4.2.4.10      matt static bool	uvmpd_scan(struct uvm_pggroup *);
    106   1.93.4.2.4.3      matt static void	uvmpd_scan_queue(struct uvm_pggroup *);
    107           1.65   thorpej static void	uvmpd_tune(void);
    108            1.1       mrg 
    109   1.93.4.2.4.4      matt static void	uvmpd_checkgroup(const struct uvm_pggroup *);
    110   1.93.4.2.4.4      matt 
    111   1.93.4.2.4.3      matt static struct uvm_pdinfo {
    112   1.93.4.2.4.3      matt 	unsigned int pd_waiters;
    113   1.93.4.2.4.3      matt 	unsigned int pd_scans_neededs;
    114   1.93.4.2.4.3      matt 	struct uvm_pggrouplist pd_pagingq;
    115   1.93.4.2.4.3      matt 	struct uvm_pggrouplist pd_pendingq;
    116   1.93.4.2.4.3      matt } uvm_pdinfo =  {
    117   1.93.4.2.4.3      matt 	.pd_pagingq = TAILQ_HEAD_INITIALIZER(uvm_pdinfo.pd_pagingq),
    118   1.93.4.2.4.3      matt 	.pd_pendingq = TAILQ_HEAD_INITIALIZER(uvm_pdinfo.pd_pendingq),
    119   1.93.4.2.4.3      matt };
    120           1.89        ad 
    121            1.1       mrg /*
    122           1.61       chs  * XXX hack to avoid hangs when large processes fork.
    123           1.61       chs  */
    124       1.93.4.2       snj u_int uvm_extrapages;
    125           1.61       chs 
    126           1.61       chs /*
    127            1.1       mrg  * uvm_wait: wait (sleep) for the page daemon to free some pages
    128            1.1       mrg  *
    129            1.1       mrg  * => should be called with all locks released
    130            1.1       mrg  * => should _not_ be called by the page daemon (to avoid deadlock)
    131            1.1       mrg  */
    132            1.1       mrg 
    133           1.19   thorpej void
    134           1.65   thorpej uvm_wait(const char *wmsg)
    135            1.8       mrg {
    136            1.8       mrg 	int timo = 0;
    137           1.89        ad 
    138           1.89        ad 	mutex_spin_enter(&uvm_fpageqlock);
    139            1.1       mrg 
    140            1.8       mrg 	/*
    141            1.8       mrg 	 * check for page daemon going to sleep (waiting for itself)
    142            1.8       mrg 	 */
    143            1.1       mrg 
    144           1.86        ad 	if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
    145            1.8       mrg 		/*
    146            1.8       mrg 		 * now we have a problem: the pagedaemon wants to go to
    147            1.8       mrg 		 * sleep until it frees more memory.   but how can it
    148            1.8       mrg 		 * free more memory if it is asleep?  that is a deadlock.
    149            1.8       mrg 		 * we have two options:
    150            1.8       mrg 		 *  [1] panic now
    151            1.8       mrg 		 *  [2] put a timeout on the sleep, thus causing the
    152            1.8       mrg 		 *      pagedaemon to only pause (rather than sleep forever)
    153            1.8       mrg 		 *
    154            1.8       mrg 		 * note that option [2] will only help us if we get lucky
    155            1.8       mrg 		 * and some other process on the system breaks the deadlock
    156            1.8       mrg 		 * by exiting or freeing memory (thus allowing the pagedaemon
    157            1.8       mrg 		 * to continue).  for now we panic if DEBUG is defined,
    158            1.8       mrg 		 * otherwise we hope for the best with option [2] (better
    159            1.8       mrg 		 * yet, this should never happen in the first place!).
    160            1.8       mrg 		 */
    161            1.1       mrg 
    162            1.8       mrg 		printf("pagedaemon: deadlock detected!\n");
    163            1.8       mrg 		timo = hz >> 3;		/* set timeout */
    164            1.1       mrg #if defined(DEBUG)
    165            1.8       mrg 		/* DEBUG: panic so we can debug it */
    166            1.8       mrg 		panic("pagedaemon deadlock");
    167            1.1       mrg #endif
    168            1.8       mrg 	}
    169            1.1       mrg 
    170   1.93.4.2.4.3      matt 	uvm_pdinfo.pd_waiters++;
    171           1.17   thorpej 	wakeup(&uvm.pagedaemon);		/* wake the daemon! */
    172           1.89        ad 	UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo);
    173   1.93.4.2.4.7      matt 	uvm_pdinfo.pd_waiters--;
    174            1.1       mrg }
    175            1.1       mrg 
    176   1.93.4.2.4.4      matt 
    177   1.93.4.2.4.4      matt static void
    178   1.93.4.2.4.4      matt uvmpd_checkgroup(const struct uvm_pggroup *grp)
    179   1.93.4.2.4.4      matt {
    180   1.93.4.2.4.4      matt #ifdef DEBUG
    181   1.93.4.2.4.4      matt 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    182   1.93.4.2.4.4      matt 	bool in_pendingq = false;
    183   1.93.4.2.4.4      matt 	bool in_pagingq = false;
    184   1.93.4.2.4.4      matt 	const struct uvm_pggroup *tstgrp;
    185   1.93.4.2.4.4      matt 
    186   1.93.4.2.4.4      matt 	TAILQ_FOREACH(tstgrp, &pdinfo->pd_pendingq, pgrp_pending_link) {
    187   1.93.4.2.4.4      matt 		if (tstgrp == grp) {
    188   1.93.4.2.4.4      matt 			in_pendingq = true;
    189   1.93.4.2.4.4      matt 			break;
    190   1.93.4.2.4.4      matt 		}
    191   1.93.4.2.4.4      matt 	}
    192   1.93.4.2.4.4      matt 
    193   1.93.4.2.4.4      matt 	TAILQ_FOREACH(tstgrp, &pdinfo->pd_pagingq, pgrp_paging_link) {
    194   1.93.4.2.4.4      matt 		if (tstgrp == grp) {
    195   1.93.4.2.4.4      matt 			in_pagingq = true;
    196   1.93.4.2.4.4      matt 			break;
    197   1.93.4.2.4.4      matt 		}
    198   1.93.4.2.4.4      matt 	}
    199   1.93.4.2.4.4      matt 
    200   1.93.4.2.4.4      matt 	if (grp->pgrp_paging > 0) {
    201   1.93.4.2.4.4      matt 		KASSERT(in_pagingq);
    202   1.93.4.2.4.4      matt 		KASSERT(!in_pendingq);
    203   1.93.4.2.4.4      matt 	} else {
    204   1.93.4.2.4.4      matt 		KASSERT(!in_pagingq);
    205   1.93.4.2.4.4      matt 		KASSERT(in_pendingq == grp->pgrp_scan_needed);
    206   1.93.4.2.4.4      matt 	}
    207   1.93.4.2.4.4      matt #endif
    208   1.93.4.2.4.4      matt }
    209   1.93.4.2.4.4      matt 
    210           1.77      yamt /*
    211           1.77      yamt  * uvm_kick_pdaemon: perform checks to determine if we need to
    212           1.77      yamt  * give the pagedaemon a nudge, and do so if necessary.
    213           1.89        ad  *
    214           1.89        ad  * => called with uvm_fpageqlock held.
    215           1.77      yamt  */
    216           1.77      yamt 
    217           1.77      yamt void
    218           1.77      yamt uvm_kick_pdaemon(void)
    219           1.77      yamt {
    220   1.93.4.2.4.3      matt 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    221   1.93.4.2.4.3      matt 	bool need_wakeup = false;
    222   1.93.4.2.4.3      matt 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);
    223           1.77      yamt 
    224           1.89        ad 	KASSERT(mutex_owned(&uvm_fpageqlock));
    225           1.89        ad 
    226   1.93.4.2.4.3      matt 	struct uvm_pggroup *grp;
    227   1.93.4.2.4.3      matt 	STAILQ_FOREACH(grp, &uvm.page_groups, pgrp_uvm_link) {
    228   1.93.4.2.4.3      matt 		const bool prev_scan_needed = grp->pgrp_scan_needed;
    229   1.93.4.2.4.3      matt 
    230   1.93.4.2.4.3      matt 		KASSERT(grp->pgrp_npages > 0);
    231   1.93.4.2.4.4      matt 		uvmpd_checkgroup(grp);
    232   1.93.4.2.4.3      matt 
    233   1.93.4.2.4.3      matt 		grp->pgrp_scan_needed =
    234   1.93.4.2.4.3      matt 		    grp->pgrp_free + grp->pgrp_paging < grp->pgrp_freemin
    235   1.93.4.2.4.3      matt 		    || (grp->pgrp_free + grp->pgrp_paging < grp->pgrp_freetarg
    236   1.93.4.2.4.3      matt 			&& uvmpdpol_needsscan_p(grp));
    237   1.93.4.2.4.3      matt 
    238   1.93.4.2.4.3      matt 		if (prev_scan_needed != grp->pgrp_scan_needed) {
    239   1.93.4.2.4.3      matt 			UVMHIST_LOG(pdhist, " [%zd] %d->%d (scan=%d)",
    240   1.93.4.2.4.3      matt 			    grp - uvm.pggroups, prev_scan_needed,
    241   1.93.4.2.4.3      matt 			    grp->pgrp_scan_needed, uvmpdpol_needsscan_p(grp));
    242   1.93.4.2.4.3      matt 			UVMHIST_LOG(pdhist, " [%zd] %d < min(%d,%d)",
    243   1.93.4.2.4.3      matt 			    grp - uvm.pggroups,
    244   1.93.4.2.4.3      matt 			    grp->pgrp_free + grp->pgrp_paging,
    245   1.93.4.2.4.3      matt 			    grp->pgrp_freemin, grp->pgrp_freetarg);
    246   1.93.4.2.4.3      matt 		}
    247   1.93.4.2.4.3      matt 
    248   1.93.4.2.4.4      matt 		if (prev_scan_needed != grp->pgrp_scan_needed) {
    249   1.93.4.2.4.3      matt 			if (grp->pgrp_scan_needed) {
    250   1.93.4.2.4.9      matt 				struct uvm_pggroup *prev;
    251   1.93.4.2.4.9      matt 				TAILQ_FOREACH(prev, &pdinfo->pd_pendingq,
    252   1.93.4.2.4.9      matt 				    pgrp_pending_link) {
    253   1.93.4.2.4.9      matt 					if (grp->pgrp_free < prev->pgrp_free)
    254   1.93.4.2.4.9      matt 						break;
    255   1.93.4.2.4.9      matt 				}
    256   1.93.4.2.4.9      matt 				if (prev == NULL) {
    257   1.93.4.2.4.9      matt 					TAILQ_INSERT_TAIL(&pdinfo->pd_pendingq,
    258   1.93.4.2.4.9      matt 					    grp, pgrp_pending_link);
    259   1.93.4.2.4.9      matt 				} else {
    260   1.93.4.2.4.9      matt 					TAILQ_INSERT_BEFORE(prev, grp,
    261   1.93.4.2.4.9      matt 					    pgrp_pending_link);
    262   1.93.4.2.4.9      matt 				}
    263   1.93.4.2.4.3      matt 				need_wakeup = true;
    264   1.93.4.2.4.3      matt 			} else {
    265   1.93.4.2.4.3      matt 				TAILQ_REMOVE(&pdinfo->pd_pendingq,
    266   1.93.4.2.4.4      matt 				    grp, pgrp_pending_link);
    267   1.93.4.2.4.3      matt 			}
    268   1.93.4.2.4.4      matt 			uvmpd_checkgroup(grp);
    269   1.93.4.2.4.3      matt 		}
    270           1.77      yamt 	}
    271   1.93.4.2.4.3      matt 
    272   1.93.4.2.4.3      matt 	if (need_wakeup)
    273   1.93.4.2.4.3      matt 		wakeup(&uvm.pagedaemon);
    274   1.93.4.2.4.3      matt 
    275   1.93.4.2.4.3      matt 	UVMHIST_LOG(pdhist, " <- done: wakeup=%d!",
    276   1.93.4.2.4.4      matt 	    need_wakeup, 0, 0, 0);
    277           1.77      yamt }
    278            1.1       mrg 
    279            1.1       mrg /*
    280            1.1       mrg  * uvmpd_tune: tune paging parameters
    281            1.1       mrg  *
    282            1.1       mrg  * => called when ever memory is added (or removed?) to the system
    283            1.1       mrg  * => caller must call with page queues locked
    284            1.1       mrg  */
    285            1.1       mrg 
    286           1.65   thorpej static void
    287           1.37       chs uvmpd_tune(void)
    288            1.8       mrg {
    289   1.93.4.2.4.3      matt 	u_int extrapages = atomic_swap_uint(&uvm_extrapages, 0) / uvmexp.ncolors;
    290   1.93.4.2.4.3      matt 	u_int freemin = 0;
    291   1.93.4.2.4.3      matt 	u_int freetarg = 0;
    292   1.93.4.2.4.3      matt 	u_int wiredmax = 0;
    293       1.93.4.2       snj 
    294            1.8       mrg 	UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
    295            1.1       mrg 
    296   1.93.4.2.4.3      matt 	extrapages = roundup(extrapages, uvmexp.npggroups);
    297   1.93.4.2.4.3      matt 
    298   1.93.4.2.4.3      matt 	struct uvm_pggroup *grp;
    299   1.93.4.2.4.3      matt 	STAILQ_FOREACH(grp, &uvm.page_groups, pgrp_uvm_link) {
    300   1.93.4.2.4.3      matt 		KASSERT(grp->pgrp_npages > 0);
    301   1.93.4.2.4.3      matt 
    302   1.93.4.2.4.3      matt 		/*
    303   1.93.4.2.4.3      matt 		 * try to keep 0.5% of available RAM free, but limit
    304   1.93.4.2.4.3      matt 		 * to between 128k and 1024k per-CPU.
    305   1.93.4.2.4.3      matt 		 * XXX: what are these values good for?
    306   1.93.4.2.4.3      matt 		 */
    307   1.93.4.2.4.3      matt 		u_int val = grp->pgrp_npages / 200;
    308   1.93.4.2.4.3      matt 		val = MAX(val, (128*1024) >> PAGE_SHIFT);
    309   1.93.4.2.4.3      matt 		val = MIN(val, (1024*1024) >> PAGE_SHIFT);
    310   1.93.4.2.4.3      matt 		val *= ncpu;
    311   1.93.4.2.4.3      matt 
    312   1.93.4.2.4.3      matt 		/* Make sure there's always a user page free. */
    313   1.93.4.2.4.3      matt 		if (val * uvmexp.npggroups <= uvmexp.reserve_kernel)
    314   1.93.4.2.4.3      matt 			val = uvmexp.reserve_kernel / uvmexp.npggroups + 1;
    315   1.93.4.2.4.3      matt 
    316   1.93.4.2.4.3      matt 		grp->pgrp_freemin = val;
    317   1.93.4.2.4.3      matt 
    318   1.93.4.2.4.3      matt 		/* Calculate freetarg. */
    319   1.93.4.2.4.3      matt 		val = (grp->pgrp_freemin * 4) / 3;
    320   1.93.4.2.4.3      matt 		if (val <= grp->pgrp_freemin)
    321   1.93.4.2.4.3      matt 			val = grp->pgrp_freemin + 1;
    322   1.93.4.2.4.8      matt #ifdef VM_FREELIST_NORMALOK_P
    323   1.93.4.2.4.8      matt 		if (!VM_FREELIST_NORMALOK_P(grp->pgrp_free_list))
    324   1.93.4.2.4.8      matt 			val *= 4;
    325   1.93.4.2.4.8      matt #endif
    326   1.93.4.2.4.3      matt 		grp->pgrp_freetarg = val + extrapages / uvmexp.npggroups;
    327   1.93.4.2.4.3      matt 		if (grp->pgrp_freetarg > grp->pgrp_npages / 2)
    328   1.93.4.2.4.3      matt 			grp->pgrp_freetarg = grp->pgrp_npages / 2;
    329   1.93.4.2.4.3      matt 
    330   1.93.4.2.4.3      matt 		grp->pgrp_wiredmax = grp->pgrp_npages / 3;
    331   1.93.4.2.4.3      matt 		UVMHIST_LOG(pdhist,
    332   1.93.4.2.4.3      matt 		    "[%zd]: freemin=%d, freetarg=%d, wiredmax=%d",
    333   1.93.4.2.4.3      matt 		    grp - uvm.pggroups, grp->pgrp_freemin, grp->pgrp_freetarg,
    334   1.93.4.2.4.3      matt 		    grp->pgrp_wiredmax);
    335   1.93.4.2.4.3      matt 
    336   1.93.4.2.4.3      matt 		freemin += grp->pgrp_freemin;
    337   1.93.4.2.4.3      matt 		freetarg += grp->pgrp_freetarg;
    338   1.93.4.2.4.3      matt 		wiredmax += grp->pgrp_wiredmax;
    339   1.93.4.2.4.3      matt 	}
    340   1.93.4.2.4.3      matt 
    341   1.93.4.2.4.3      matt 	uvmexp.freemin = freemin;
    342   1.93.4.2.4.3      matt 	uvmexp.freetarg = freetarg;
    343   1.93.4.2.4.3      matt 	uvmexp.wiredmax = wiredmax;
    344           1.61       chs 
    345            1.8       mrg 	UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
    346   1.93.4.2.4.3      matt 	    uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
    347            1.1       mrg }
    348            1.1       mrg 
    349            1.1       mrg /*
    350            1.1       mrg  * uvm_pageout: the main loop for the pagedaemon
    351            1.1       mrg  */
    352            1.1       mrg 
    353            1.8       mrg void
    354           1.80      yamt uvm_pageout(void *arg)
    355            1.8       mrg {
    356   1.93.4.2.4.3      matt 	u_int npages = 0;
    357   1.93.4.2.4.3      matt 	u_int extrapages = 0;
    358   1.93.4.2.4.3      matt 	u_int npggroups = 0;
    359           1.88        ad 	struct pool *pp;
    360           1.88        ad 	uint64_t where;
    361   1.93.4.2.4.3      matt 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    362  1.93.4.2.4.10      matt 	bool progress = true;
    363            1.8       mrg 	UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
    364           1.24       chs 
    365            1.8       mrg 	UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
    366            1.8       mrg 
    367            1.8       mrg 	/*
    368            1.8       mrg 	 * ensure correct priority and set paging parameters...
    369            1.8       mrg 	 */
    370            1.8       mrg 
    371           1.86        ad 	uvm.pagedaemon_lwp = curlwp;
    372           1.89        ad 	mutex_enter(&uvm_pageqlock);
    373            1.8       mrg 	npages = uvmexp.npages;
    374            1.8       mrg 	uvmpd_tune();
    375           1.89        ad 	mutex_exit(&uvm_pageqlock);
    376            1.8       mrg 
    377            1.8       mrg 	/*
    378            1.8       mrg 	 * main loop
    379            1.8       mrg 	 */
    380           1.24       chs 
    381           1.24       chs 	for (;;) {
    382   1.93.4.2.4.3      matt 		struct uvm_pggroup *grp;
    383   1.93.4.2.4.3      matt 		bool need_free = false;
    384   1.93.4.2.4.3      matt 		u_int bufcnt = 0;
    385           1.24       chs 
    386           1.89        ad 		mutex_spin_enter(&uvm_fpageqlock);
    387   1.93.4.2.4.3      matt 		/*
    388   1.93.4.2.4.3      matt 		 * If we have no one waiting or all color requests have
    389   1.93.4.2.4.3      matt 		 * active paging, then wait.
    390   1.93.4.2.4.3      matt 		 */
    391  1.93.4.2.4.10      matt 		if (progress == false
    392  1.93.4.2.4.10      matt 		    || (pdinfo->pd_waiters == 0
    393  1.93.4.2.4.10      matt 		        && TAILQ_FIRST(&pdinfo->pd_pendingq) == NULL)) {
    394           1.89        ad 			UVMHIST_LOG(pdhist,"  <<SLEEPING>>",0,0,0,0);
    395  1.93.4.2.4.10      matt 			int timo = 0;
    396  1.93.4.2.4.10      matt 			if (!progress && pdinfo->pd_waiters > 0)
    397  1.93.4.2.4.10      matt 				timo = 2 * hz;
    398           1.89        ad 			UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
    399  1.93.4.2.4.10      matt 			    &uvm_fpageqlock, false, "pgdaemon", timo);
    400           1.89        ad 			uvmexp.pdwoke++;
    401           1.89        ad 			UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
    402  1.93.4.2.4.10      matt 			progress = false;
    403   1.93.4.2.4.7      matt 		} else if (TAILQ_FIRST(&pdinfo->pd_pendingq) == NULL) {
    404   1.93.4.2.4.7      matt 			/*
    405   1.93.4.2.4.7      matt 			 * Someone is waiting but no group are pending.
    406   1.93.4.2.4.7      matt 			 * Let's kick ourselves to find groups that need work.
    407   1.93.4.2.4.7      matt 			 */
    408   1.93.4.2.4.7      matt 			uvm_kick_pdaemon();
    409   1.93.4.2.4.7      matt 			mutex_spin_exit(&uvm_fpageqlock);
    410           1.89        ad 		} else {
    411           1.89        ad 			mutex_spin_exit(&uvm_fpageqlock);
    412           1.89        ad 		}
    413           1.24       chs 
    414            1.8       mrg 		/*
    415           1.24       chs 		 * now lock page queues and recompute inactive count
    416            1.8       mrg 		 */
    417            1.8       mrg 
    418           1.89        ad 		mutex_enter(&uvm_pageqlock);
    419   1.93.4.2.4.3      matt 		mutex_spin_enter(&uvm_fpageqlock);
    420   1.93.4.2.4.3      matt 
    421   1.93.4.2.4.3      matt 		if (npages != uvmexp.npages
    422   1.93.4.2.4.3      matt 		    || extrapages != uvm_extrapages
    423   1.93.4.2.4.3      matt 		    || npggroups != uvmexp.npggroups) {
    424           1.24       chs 			npages = uvmexp.npages;
    425           1.61       chs 			extrapages = uvm_extrapages;
    426   1.93.4.2.4.3      matt 			npggroups = uvmexp.npggroups;
    427           1.24       chs 			uvmpd_tune();
    428           1.24       chs 		}
    429           1.24       chs 
    430           1.60     enami 		/*
    431           1.60     enami 		 * Estimate a hint.  Note that bufmem are returned to
    432           1.60     enami 		 * system only when entire pool page is empty.
    433           1.60     enami 		 */
    434   1.93.4.2.4.3      matt 		bool need_wakeup = false;
    435   1.93.4.2.4.3      matt 		while ((grp = TAILQ_FIRST(&pdinfo->pd_pendingq)) != NULL) {
    436   1.93.4.2.4.3      matt 			KASSERT(grp->pgrp_npages > 0);
    437           1.60     enami 
    438   1.93.4.2.4.3      matt 			uvmpdpol_tune(grp);
    439            1.8       mrg 
    440   1.93.4.2.4.4      matt 			/*
    441   1.93.4.2.4.4      matt 			 * While we are locked, remove this from the pendingq.
    442   1.93.4.2.4.4      matt 			 */
    443   1.93.4.2.4.4      matt 			uvmpd_checkgroup(grp);
    444   1.93.4.2.4.4      matt 			KASSERT(grp->pgrp_scan_needed);
    445   1.93.4.2.4.4      matt 			TAILQ_REMOVE(&pdinfo->pd_pendingq, grp,
    446   1.93.4.2.4.4      matt 			    pgrp_pending_link);
    447   1.93.4.2.4.4      matt 			grp->pgrp_scan_needed = false;
    448   1.93.4.2.4.4      matt 			uvmpd_checkgroup(grp);
    449   1.93.4.2.4.4      matt 
    450   1.93.4.2.4.3      matt 			int diff = grp->pgrp_freetarg - grp->pgrp_free;
    451   1.93.4.2.4.3      matt 			if (diff < 0)
    452   1.93.4.2.4.3      matt 				diff = 0;
    453           1.89        ad 
    454   1.93.4.2.4.3      matt 			bufcnt += diff;
    455            1.8       mrg 
    456   1.93.4.2.4.3      matt 			UVMHIST_LOG(pdhist," [%zu]: "
    457   1.93.4.2.4.3      matt 			    "free/ftarg/fmin=%u/%u/%u",
    458   1.93.4.2.4.3      matt 			    grp - uvm.pggroups, grp->pgrp_free,
    459   1.93.4.2.4.3      matt 			    grp->pgrp_freetarg, grp->pgrp_freemin);
    460   1.93.4.2.4.3      matt 
    461   1.93.4.2.4.3      matt 
    462   1.93.4.2.4.3      matt 			if (grp->pgrp_paging < diff)
    463   1.93.4.2.4.3      matt 				need_free = true;
    464   1.93.4.2.4.3      matt 
    465   1.93.4.2.4.3      matt 			/*
    466   1.93.4.2.4.3      matt 			 * scan if needed
    467   1.93.4.2.4.3      matt 			 */
    468   1.93.4.2.4.3      matt 			if (grp->pgrp_paging < diff
    469   1.93.4.2.4.3      matt 			    || uvmpdpol_needsscan_p(grp)) {
    470   1.93.4.2.4.3      matt 				mutex_spin_exit(&uvm_fpageqlock);
    471  1.93.4.2.4.10      matt 				if (uvmpd_scan(grp))
    472  1.93.4.2.4.10      matt 					progress = true;
    473   1.93.4.2.4.3      matt 				mutex_spin_enter(&uvm_fpageqlock);
    474   1.93.4.2.4.3      matt 			} else {
    475   1.93.4.2.4.3      matt 				UVMHIST_LOG(pdhist,
    476   1.93.4.2.4.3      matt 				    " [%zu]: diff/paging=%u/%u: "
    477   1.93.4.2.4.3      matt 				    "scan skipped",
    478   1.93.4.2.4.3      matt 				    grp - uvm.pggroups, diff,
    479   1.93.4.2.4.3      matt 				    grp->pgrp_paging, 0);
    480   1.93.4.2.4.3      matt 			}
    481   1.93.4.2.4.3      matt 
    482   1.93.4.2.4.3      matt 			/*
    483   1.93.4.2.4.3      matt 			 * if there's any free memory to be had,
    484   1.93.4.2.4.3      matt 			 * wake up any waiters.
    485   1.93.4.2.4.3      matt 			 */
    486   1.93.4.2.4.3      matt 			if (grp->pgrp_free * uvmexp.npggroups > uvmexp.reserve_kernel
    487   1.93.4.2.4.3      matt 			    || grp->pgrp_paging == 0) {
    488   1.93.4.2.4.3      matt 				need_wakeup = true;
    489   1.93.4.2.4.3      matt 			}
    490   1.93.4.2.4.3      matt 
    491   1.93.4.2.4.3      matt 		}
    492   1.93.4.2.4.3      matt 		if (need_wakeup) {
    493           1.24       chs 			wakeup(&uvmexp.free);
    494            1.8       mrg 		}
    495   1.93.4.2.4.3      matt 		KASSERT (!need_free || need_wakeup);
    496           1.89        ad 		mutex_spin_exit(&uvm_fpageqlock);
    497            1.1       mrg 
    498            1.8       mrg 		/*
    499   1.93.4.2.4.3      matt 		 * scan done.  unlock page queues (the only lock
    500   1.93.4.2.4.3      matt 		 * we are holding)
    501            1.8       mrg 		 */
    502           1.89        ad 		mutex_exit(&uvm_pageqlock);
    503           1.38       chs 
    504           1.88        ad 		/*
    505           1.93        ad 		 * if we don't need free memory, we're done.
    506           1.93        ad 		 */
    507           1.93        ad 
    508   1.93.4.2.4.3      matt 		if (!need_free)
    509           1.93        ad 			continue;
    510           1.93        ad 
    511           1.93        ad 		/*
    512           1.88        ad 		 * start draining pool resources now that we're not
    513           1.88        ad 		 * holding any locks.
    514           1.88        ad 		 */
    515           1.88        ad 		pool_drain_start(&pp, &where);
    516           1.60     enami 
    517           1.38       chs 		/*
    518           1.88        ad 		 * kill unused metadata buffers.
    519           1.38       chs 		 */
    520   1.93.4.2.4.3      matt 		if (bufcnt > 0) {
    521   1.93.4.2.4.3      matt 			mutex_enter(&bufcache_lock);
    522   1.93.4.2.4.3      matt 			buf_drain(bufcnt << PAGE_SHIFT);
    523   1.93.4.2.4.3      matt 			mutex_exit(&bufcache_lock);
    524   1.93.4.2.4.3      matt 		}
    525           1.57  jdolecek 
    526           1.57  jdolecek 		/*
    527           1.88        ad 		 * complete draining the pools.
    528           1.88        ad 		 */
    529           1.88        ad 		pool_drain_end(pp, where);
    530           1.24       chs 	}
    531           1.24       chs 	/*NOTREACHED*/
    532           1.24       chs }
    533           1.24       chs 
    534            1.8       mrg 
    535           1.24       chs /*
    536           1.81      yamt  * uvm_aiodone_worker: a workqueue callback for the aiodone daemon.
    537           1.24       chs  */
    538            1.8       mrg 
    539           1.24       chs void
    540           1.81      yamt uvm_aiodone_worker(struct work *wk, void *dummy)
    541           1.24       chs {
    542           1.81      yamt 	struct buf *bp = (void *)wk;
    543            1.9        pk 
    544           1.81      yamt 	KASSERT(&bp->b_work == wk);
    545            1.8       mrg 
    546           1.81      yamt 	/*
    547           1.81      yamt 	 * process an i/o that's done.
    548           1.81      yamt 	 */
    549            1.8       mrg 
    550           1.81      yamt 	(*bp->b_iodone)(bp);
    551           1.89        ad }
    552           1.89        ad 
    553           1.89        ad void
    554   1.93.4.2.4.3      matt uvm_pageout_start(struct uvm_pggroup *grp, u_int npages)
    555           1.89        ad {
    556   1.93.4.2.4.3      matt 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    557           1.89        ad 
    558           1.89        ad 	mutex_spin_enter(&uvm_fpageqlock);
    559   1.93.4.2.4.3      matt 
    560   1.93.4.2.4.4      matt 	uvmpd_checkgroup(grp);
    561   1.93.4.2.4.5      matt 	uvmexp.paging += npages;
    562   1.93.4.2.4.3      matt 	if (grp->pgrp_paging == 0) {
    563   1.93.4.2.4.9      matt 		/*
    564   1.93.4.2.4.9      matt 		 * If the group is in a paging queue, it can't be in a pending
    565   1.93.4.2.4.9      matt 		 * queue so remove it if it is.
    566   1.93.4.2.4.9      matt 		 */
    567   1.93.4.2.4.9      matt 		if (grp->pgrp_scan_needed) {
    568   1.93.4.2.4.9      matt 			TAILQ_REMOVE(&pdinfo->pd_pendingq, grp,
    569   1.93.4.2.4.9      matt 			    pgrp_pending_link);
    570   1.93.4.2.4.9      matt 			grp->pgrp_scan_needed = false;
    571   1.93.4.2.4.9      matt 		}
    572   1.93.4.2.4.4      matt 		TAILQ_INSERT_TAIL(&pdinfo->pd_pagingq, grp, pgrp_paging_link);
    573   1.93.4.2.4.3      matt 	}
    574   1.93.4.2.4.3      matt 	grp->pgrp_paging += npages;
    575   1.93.4.2.4.5      matt 	uvmpd_checkgroup(grp);
    576           1.89        ad 	mutex_spin_exit(&uvm_fpageqlock);
    577           1.89        ad }
    578           1.89        ad 
    579           1.89        ad void
    580   1.93.4.2.4.3      matt uvm_pageout_done(struct vm_page *pg, bool freed)
    581           1.89        ad {
    582   1.93.4.2.4.3      matt 	struct uvm_pdinfo * const pdinfo = &uvm_pdinfo;
    583   1.93.4.2.4.3      matt 
    584   1.93.4.2.4.3      matt 	KASSERT(pg->flags & PG_PAGEOUT);
    585           1.89        ad 
    586           1.89        ad 	mutex_spin_enter(&uvm_fpageqlock);
    587   1.93.4.2.4.3      matt 	struct uvm_pggroup * const grp = uvm_page_to_pggroup(pg);
    588   1.93.4.2.4.3      matt 
    589   1.93.4.2.4.3      matt 	KASSERT(grp->pgrp_paging > 0);
    590   1.93.4.2.4.4      matt 	uvmpd_checkgroup(grp);
    591   1.93.4.2.4.3      matt 	if (--grp->pgrp_paging == 0) {
    592   1.93.4.2.4.4      matt 		TAILQ_REMOVE(&pdinfo->pd_pagingq, grp, pgrp_paging_link);
    593   1.93.4.2.4.4      matt 		uvmpd_checkgroup(grp);
    594   1.93.4.2.4.3      matt 	}
    595   1.93.4.2.4.3      matt 	KASSERT(uvmexp.paging > 0);
    596   1.93.4.2.4.3      matt 	uvmexp.paging--;
    597   1.93.4.2.4.3      matt 	grp->pgrp_pdfreed += freed;
    598           1.89        ad 
    599           1.89        ad 	/*
    600   1.93.4.2.4.5      matt 	 * Page is no longer being paged out.
    601   1.93.4.2.4.5      matt 	 */
    602   1.93.4.2.4.5      matt 	pg->flags &= ~PG_PAGEOUT;
    603   1.93.4.2.4.5      matt 
    604   1.93.4.2.4.5      matt 	/*
    605           1.89        ad 	 * wake up either of pagedaemon or LWPs waiting for it.
    606           1.89        ad 	 */
    607   1.93.4.2.4.3      matt 	if (grp->pgrp_free * uvmexp.npggroups <= uvmexp.reserve_kernel) {
    608           1.81      yamt 		wakeup(&uvm.pagedaemon);
    609           1.81      yamt 	} else {
    610           1.81      yamt 		wakeup(&uvmexp.free);
    611            1.8       mrg 	}
    612   1.93.4.2.4.3      matt 
    613           1.89        ad 	mutex_spin_exit(&uvm_fpageqlock);
    614            1.1       mrg }
    615            1.1       mrg 
    616           1.76      yamt /*
    617           1.76      yamt  * uvmpd_trylockowner: trylock the page's owner.
    618           1.76      yamt  *
    619           1.76      yamt  * => called with pageq locked.
    620           1.76      yamt  * => resolve orphaned O->A loaned page.
    621           1.89        ad  * => return the locked mutex on success.  otherwise, return NULL.
    622           1.76      yamt  */
    623           1.76      yamt 
    624           1.89        ad kmutex_t *
    625           1.76      yamt uvmpd_trylockowner(struct vm_page *pg)
    626           1.76      yamt {
    627           1.76      yamt 	struct uvm_object *uobj = pg->uobject;
    628           1.89        ad 	kmutex_t *slock;
    629           1.89        ad 
    630           1.89        ad 	KASSERT(mutex_owned(&uvm_pageqlock));
    631           1.76      yamt 
    632           1.76      yamt 	if (uobj != NULL) {
    633           1.76      yamt 		slock = &uobj->vmobjlock;
    634           1.76      yamt 	} else {
    635           1.76      yamt 		struct vm_anon *anon = pg->uanon;
    636           1.76      yamt 
    637           1.76      yamt 		KASSERT(anon != NULL);
    638           1.76      yamt 		slock = &anon->an_lock;
    639           1.76      yamt 	}
    640           1.76      yamt 
    641           1.89        ad 	if (!mutex_tryenter(slock)) {
    642           1.76      yamt 		return NULL;
    643           1.76      yamt 	}
    644           1.76      yamt 
    645           1.76      yamt 	if (uobj == NULL) {
    646           1.76      yamt 
    647           1.76      yamt 		/*
    648           1.76      yamt 		 * set PQ_ANON if it isn't set already.
    649           1.76      yamt 		 */
    650           1.76      yamt 
    651           1.76      yamt 		if ((pg->pqflags & PQ_ANON) == 0) {
    652           1.76      yamt 			KASSERT(pg->loan_count > 0);
    653           1.76      yamt 			pg->loan_count--;
    654           1.76      yamt 			pg->pqflags |= PQ_ANON;
    655           1.76      yamt 			/* anon now owns it */
    656           1.76      yamt 		}
    657           1.76      yamt 	}
    658           1.76      yamt 
    659           1.76      yamt 	return slock;
    660           1.76      yamt }
    661           1.76      yamt 
    662           1.73      yamt #if defined(VMSWAP)
    663           1.73      yamt struct swapcluster {
    664           1.73      yamt 	int swc_slot;
    665           1.73      yamt 	int swc_nallocated;
    666           1.73      yamt 	int swc_nused;
    667           1.75      yamt 	struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
    668           1.73      yamt };
    669           1.73      yamt 
    670           1.73      yamt static void
    671           1.73      yamt swapcluster_init(struct swapcluster *swc)
    672           1.73      yamt {
    673           1.73      yamt 
    674           1.73      yamt 	swc->swc_slot = 0;
    675           1.89        ad 	swc->swc_nused = 0;
    676           1.73      yamt }
    677           1.73      yamt 
    678           1.73      yamt static int
    679           1.73      yamt swapcluster_allocslots(struct swapcluster *swc)
    680           1.73      yamt {
    681           1.73      yamt 	int slot;
    682           1.73      yamt 	int npages;
    683           1.73      yamt 
    684           1.73      yamt 	if (swc->swc_slot != 0) {
    685           1.73      yamt 		return 0;
    686           1.73      yamt 	}
    687           1.73      yamt 
    688           1.73      yamt 	/* Even with strange MAXPHYS, the shift
    689           1.73      yamt 	   implicitly rounds down to a page. */
    690           1.73      yamt 	npages = MAXPHYS >> PAGE_SHIFT;
    691           1.84   thorpej 	slot = uvm_swap_alloc(&npages, true);
    692           1.73      yamt 	if (slot == 0) {
    693           1.73      yamt 		return ENOMEM;
    694           1.73      yamt 	}
    695           1.73      yamt 	swc->swc_slot = slot;
    696           1.73      yamt 	swc->swc_nallocated = npages;
    697           1.73      yamt 	swc->swc_nused = 0;
    698           1.73      yamt 
    699           1.73      yamt 	return 0;
    700           1.73      yamt }
    701           1.73      yamt 
    702           1.73      yamt static int
    703           1.73      yamt swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
    704           1.73      yamt {
    705           1.73      yamt 	int slot;
    706           1.73      yamt 	struct uvm_object *uobj;
    707           1.73      yamt 
    708           1.73      yamt 	KASSERT(swc->swc_slot != 0);
    709           1.73      yamt 	KASSERT(swc->swc_nused < swc->swc_nallocated);
    710           1.73      yamt 	KASSERT((pg->pqflags & PQ_SWAPBACKED) != 0);
    711           1.73      yamt 
    712           1.73      yamt 	slot = swc->swc_slot + swc->swc_nused;
    713           1.73      yamt 	uobj = pg->uobject;
    714           1.73      yamt 	if (uobj == NULL) {
    715           1.89        ad 		KASSERT(mutex_owned(&pg->uanon->an_lock));
    716           1.73      yamt 		pg->uanon->an_swslot = slot;
    717           1.73      yamt 	} else {
    718           1.73      yamt 		int result;
    719           1.73      yamt 
    720           1.89        ad 		KASSERT(mutex_owned(&uobj->vmobjlock));
    721           1.73      yamt 		result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
    722           1.73      yamt 		if (result == -1) {
    723           1.73      yamt 			return ENOMEM;
    724           1.73      yamt 		}
    725           1.73      yamt 	}
    726           1.73      yamt 	swc->swc_pages[swc->swc_nused] = pg;
    727           1.73      yamt 	swc->swc_nused++;
    728           1.73      yamt 
    729           1.73      yamt 	return 0;
    730           1.73      yamt }
    731           1.73      yamt 
    732           1.73      yamt static void
    733   1.93.4.2.4.3      matt swapcluster_flush(struct uvm_pggroup *grp, struct swapcluster *swc, bool now)
    734           1.73      yamt {
    735           1.73      yamt 	int slot;
    736   1.93.4.2.4.3      matt 	u_int nused;
    737           1.73      yamt 	int nallocated;
    738           1.73      yamt 	int error;
    739           1.73      yamt 
    740           1.73      yamt 	if (swc->swc_slot == 0) {
    741           1.73      yamt 		return;
    742           1.73      yamt 	}
    743           1.73      yamt 	KASSERT(swc->swc_nused <= swc->swc_nallocated);
    744           1.73      yamt 
    745           1.73      yamt 	slot = swc->swc_slot;
    746           1.73      yamt 	nused = swc->swc_nused;
    747           1.73      yamt 	nallocated = swc->swc_nallocated;
    748           1.73      yamt 
    749           1.73      yamt 	/*
    750           1.73      yamt 	 * if this is the final pageout we could have a few
    751           1.73      yamt 	 * unused swap blocks.  if so, free them now.
    752           1.73      yamt 	 */
    753           1.73      yamt 
    754           1.73      yamt 	if (nused < nallocated) {
    755           1.73      yamt 		if (!now) {
    756           1.73      yamt 			return;
    757           1.73      yamt 		}
    758           1.73      yamt 		uvm_swap_free(slot + nused, nallocated - nused);
    759           1.73      yamt 	}
    760           1.73      yamt 
    761           1.73      yamt 	/*
    762           1.73      yamt 	 * now start the pageout.
    763           1.73      yamt 	 */
    764           1.73      yamt 
    765           1.91      yamt 	if (nused > 0) {
    766   1.93.4.2.4.3      matt 		grp->pgrp_pdpageouts++;
    767   1.93.4.2.4.3      matt 		uvmexp.pdpageouts++;	/* procfs */
    768   1.93.4.2.4.3      matt 		uvm_pageout_start(grp, nused);
    769           1.91      yamt 		error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
    770           1.92      yamt 		KASSERT(error == 0 || error == ENOMEM);
    771           1.91      yamt 	}
    772           1.73      yamt 
    773           1.73      yamt 	/*
    774           1.73      yamt 	 * zero swslot to indicate that we are
    775           1.73      yamt 	 * no longer building a swap-backed cluster.
    776           1.73      yamt 	 */
    777           1.73      yamt 
    778           1.73      yamt 	swc->swc_slot = 0;
    779           1.89        ad 	swc->swc_nused = 0;
    780           1.89        ad }
    781           1.89        ad 
    782           1.89        ad static int
    783           1.89        ad swapcluster_nused(struct swapcluster *swc)
    784           1.89        ad {
    785           1.89        ad 
    786           1.89        ad 	return swc->swc_nused;
    787           1.73      yamt }
    788           1.77      yamt 
    789           1.77      yamt /*
    790           1.77      yamt  * uvmpd_dropswap: free any swap allocated to this page.
    791           1.77      yamt  *
    792           1.77      yamt  * => called with owner locked.
    793           1.84   thorpej  * => return true if a page had an associated slot.
    794           1.77      yamt  */
    795           1.77      yamt 
    796           1.83   thorpej static bool
    797           1.77      yamt uvmpd_dropswap(struct vm_page *pg)
    798           1.77      yamt {
    799           1.84   thorpej 	bool result = false;
    800           1.77      yamt 	struct vm_anon *anon = pg->uanon;
    801           1.77      yamt 
    802           1.77      yamt 	if ((pg->pqflags & PQ_ANON) && anon->an_swslot) {
    803           1.77      yamt 		uvm_swap_free(anon->an_swslot, 1);
    804           1.77      yamt 		anon->an_swslot = 0;
    805           1.77      yamt 		pg->flags &= ~PG_CLEAN;
    806           1.84   thorpej 		result = true;
    807           1.77      yamt 	} else if (pg->pqflags & PQ_AOBJ) {
    808           1.77      yamt 		int slot = uao_set_swslot(pg->uobject,
    809           1.77      yamt 		    pg->offset >> PAGE_SHIFT, 0);
    810           1.77      yamt 		if (slot) {
    811           1.77      yamt 			uvm_swap_free(slot, 1);
    812           1.77      yamt 			pg->flags &= ~PG_CLEAN;
    813           1.84   thorpej 			result = true;
    814           1.77      yamt 		}
    815           1.77      yamt 	}
    816           1.77      yamt 
    817           1.77      yamt 	return result;
    818           1.77      yamt }
    819           1.77      yamt 
    820           1.77      yamt /*
    821           1.77      yamt  * uvmpd_trydropswap: try to free any swap allocated to this page.
    822           1.77      yamt  *
    823           1.84   thorpej  * => return true if a slot is successfully freed.
    824           1.77      yamt  */
    825           1.77      yamt 
    826           1.83   thorpej bool
    827           1.77      yamt uvmpd_trydropswap(struct vm_page *pg)
    828           1.77      yamt {
    829           1.89        ad 	kmutex_t *slock;
    830           1.83   thorpej 	bool result;
    831           1.77      yamt 
    832           1.77      yamt 	if ((pg->flags & PG_BUSY) != 0) {
    833           1.84   thorpej 		return false;
    834           1.77      yamt 	}
    835           1.77      yamt 
    836           1.77      yamt 	/*
    837           1.77      yamt 	 * lock the page's owner.
    838           1.77      yamt 	 */
    839           1.77      yamt 
    840           1.77      yamt 	slock = uvmpd_trylockowner(pg);
    841           1.77      yamt 	if (slock == NULL) {
    842           1.84   thorpej 		return false;
    843           1.77      yamt 	}
    844           1.77      yamt 
    845           1.77      yamt 	/*
    846           1.77      yamt 	 * skip this page if it's busy.
    847           1.77      yamt 	 */
    848           1.77      yamt 
    849           1.77      yamt 	if ((pg->flags & PG_BUSY) != 0) {
    850           1.89        ad 		mutex_exit(slock);
    851           1.84   thorpej 		return false;
    852           1.77      yamt 	}
    853           1.77      yamt 
    854           1.77      yamt 	result = uvmpd_dropswap(pg);
    855           1.77      yamt 
    856           1.89        ad 	mutex_exit(slock);
    857           1.77      yamt 
    858           1.77      yamt 	return result;
    859           1.77      yamt }
    860           1.77      yamt 
    861           1.73      yamt #endif /* defined(VMSWAP) */
    862           1.73      yamt 
    863            1.1       mrg /*
    864           1.77      yamt  * uvmpd_scan_queue: scan an replace candidate list for pages
    865           1.77      yamt  * to clean or free.
    866            1.1       mrg  *
    867            1.1       mrg  * => called with page queues locked
    868            1.1       mrg  * => we work on meeting our free target by converting inactive pages
    869            1.1       mrg  *    into free pages.
    870            1.1       mrg  * => we handle the building of swap-backed clusters
    871            1.1       mrg  */
    872            1.1       mrg 
    873           1.65   thorpej static void
    874   1.93.4.2.4.3      matt uvmpd_scan_queue(struct uvm_pggroup *grp)
    875            1.8       mrg {
    876   1.93.4.2.4.3      matt 	struct vm_page *pg;
    877            1.8       mrg 	struct uvm_object *uobj;
    878           1.37       chs 	struct vm_anon *anon;
    879           1.68      yamt #if defined(VMSWAP)
    880           1.73      yamt 	struct swapcluster swc;
    881           1.68      yamt #endif /* defined(VMSWAP) */
    882   1.93.4.2.4.6      matt 	u_int dirtyreacts;
    883   1.93.4.2.4.6      matt 	u_int lockownerfail;
    884   1.93.4.2.4.6      matt 	u_int victims;
    885   1.93.4.2.4.6      matt 	u_int freed;
    886   1.93.4.2.4.6      matt 	u_int busy;
    887           1.89        ad 	kmutex_t *slock;
    888           1.77      yamt 	UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
    889            1.1       mrg 
    890            1.8       mrg 	/*
    891            1.8       mrg 	 * swslot is non-zero if we are building a swap cluster.  we want
    892           1.24       chs 	 * to stay in the loop while we have a page to scan or we have
    893            1.8       mrg 	 * a swap-cluster to build.
    894            1.8       mrg 	 */
    895           1.24       chs 
    896           1.73      yamt #if defined(VMSWAP)
    897           1.73      yamt 	swapcluster_init(&swc);
    898           1.73      yamt #endif /* defined(VMSWAP) */
    899           1.77      yamt 
    900           1.14       chs 	dirtyreacts = 0;
    901           1.89        ad 	lockownerfail = 0;
    902   1.93.4.2.4.6      matt 	victims = 0;
    903   1.93.4.2.4.6      matt 	freed = 0;
    904   1.93.4.2.4.6      matt 	busy = 0;
    905   1.93.4.2.4.3      matt 	uvmpdpol_scaninit(grp);
    906           1.43       chs 
    907   1.93.4.2.4.6      matt 	UVMHIST_LOG(pdhist,"  [%zd]: want free target (%u)",
    908   1.93.4.2.4.8      matt 	    grp - uvm.pggroups, grp->pgrp_freetarg << 2, 0, 0);
    909           1.77      yamt 	while (/* CONSTCOND */ 1) {
    910           1.24       chs 
    911           1.73      yamt 		/*
    912           1.73      yamt 		 * see if we've met the free target.
    913           1.73      yamt 		 */
    914           1.73      yamt 
    915   1.93.4.2.4.3      matt 		if (grp->pgrp_free + grp->pgrp_paging
    916           1.89        ad #if defined(VMSWAP)
    917           1.89        ad 		    + swapcluster_nused(&swc)
    918           1.89        ad #endif /* defined(VMSWAP) */
    919   1.93.4.2.4.3      matt 		    >= grp->pgrp_freetarg << 2 ||
    920           1.73      yamt 		    dirtyreacts == UVMPD_NUMDIRTYREACTS) {
    921   1.93.4.2.4.6      matt 			UVMHIST_LOG(pdhist,"  [%zd]: met free target (%u + %u)"
    922   1.93.4.2.4.6      matt 			    ", dirty reacts %u",
    923   1.93.4.2.4.6      matt 			    grp - uvm.pggroups, grp->pgrp_free,
    924   1.93.4.2.4.6      matt 			    grp->pgrp_paging, dirtyreacts);
    925           1.73      yamt 			break;
    926           1.73      yamt 		}
    927           1.24       chs 
    928   1.93.4.2.4.3      matt 		pg = uvmpdpol_selectvictim(grp);
    929   1.93.4.2.4.3      matt 		if (pg == NULL) {
    930   1.93.4.2.4.6      matt 			UVMHIST_LOG(pdhist,"  [%zd]: selectvictim didn't",
    931   1.93.4.2.4.6      matt 			    grp - uvm.pggroups, 0, 0, 0);
    932           1.77      yamt 			break;
    933           1.77      yamt 		}
    934   1.93.4.2.4.6      matt 		victims++;
    935   1.93.4.2.4.3      matt 		KASSERT(uvmpdpol_pageisqueued_p(pg));
    936   1.93.4.2.4.3      matt 		KASSERT(pg->wire_count == 0);
    937           1.77      yamt 
    938           1.73      yamt 		/*
    939           1.73      yamt 		 * we are below target and have a new page to consider.
    940           1.73      yamt 		 */
    941           1.30       chs 
    942   1.93.4.2.4.3      matt 		anon = pg->uanon;
    943   1.93.4.2.4.3      matt 		uobj = pg->uobject;
    944            1.8       mrg 
    945           1.73      yamt 		/*
    946           1.73      yamt 		 * first we attempt to lock the object that this page
    947           1.73      yamt 		 * belongs to.  if our attempt fails we skip on to
    948           1.73      yamt 		 * the next page (no harm done).  it is important to
    949           1.73      yamt 		 * "try" locking the object as we are locking in the
    950           1.73      yamt 		 * wrong order (pageq -> object) and we don't want to
    951           1.73      yamt 		 * deadlock.
    952           1.73      yamt 		 *
    953           1.73      yamt 		 * the only time we expect to see an ownerless page
    954           1.73      yamt 		 * (i.e. a page with no uobject and !PQ_ANON) is if an
    955           1.73      yamt 		 * anon has loaned a page from a uvm_object and the
    956           1.73      yamt 		 * uvm_object has dropped the ownership.  in that
    957           1.73      yamt 		 * case, the anon can "take over" the loaned page
    958           1.73      yamt 		 * and make it its own.
    959           1.73      yamt 		 */
    960           1.30       chs 
    961   1.93.4.2.4.3      matt 		slock = uvmpd_trylockowner(pg);
    962           1.76      yamt 		if (slock == NULL) {
    963           1.89        ad 			/*
    964           1.89        ad 			 * yield cpu to make a chance for an LWP holding
    965           1.89        ad 			 * the lock run.  otherwise we can busy-loop too long
    966           1.89        ad 			 * if the page queue is filled with a lot of pages
    967           1.89        ad 			 * from few objects.
    968           1.89        ad 			 */
    969           1.89        ad 			lockownerfail++;
    970           1.89        ad 			if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) {
    971           1.89        ad 				mutex_exit(&uvm_pageqlock);
    972           1.89        ad 				/* XXX Better than yielding but inadequate. */
    973           1.89        ad 				kpause("livelock", false, 1, NULL);
    974           1.89        ad 				mutex_enter(&uvm_pageqlock);
    975           1.89        ad 				lockownerfail = 0;
    976           1.89        ad 			}
    977           1.76      yamt 			continue;
    978           1.76      yamt 		}
    979   1.93.4.2.4.3      matt 		if (pg->flags & PG_BUSY) {
    980           1.89        ad 			mutex_exit(slock);
    981   1.93.4.2.4.6      matt 			busy++;
    982           1.76      yamt 			continue;
    983           1.76      yamt 		}
    984           1.76      yamt 
    985           1.73      yamt 		/* does the page belong to an object? */
    986           1.73      yamt 		if (uobj != NULL) {
    987   1.93.4.2.4.3      matt 			grp->pgrp_pdobscan++;
    988           1.73      yamt 		} else {
    989           1.73      yamt #if defined(VMSWAP)
    990           1.73      yamt 			KASSERT(anon != NULL);
    991   1.93.4.2.4.3      matt 			grp->pgrp_pdanscan++;
    992           1.68      yamt #else /* defined(VMSWAP) */
    993           1.73      yamt 			panic("%s: anon", __func__);
    994           1.68      yamt #endif /* defined(VMSWAP) */
    995           1.73      yamt 		}
    996            1.8       mrg 
    997           1.37       chs 
    998           1.73      yamt 		/*
    999           1.73      yamt 		 * we now have the object and the page queues locked.
   1000           1.73      yamt 		 * if the page is not swap-backed, call the object's
   1001           1.73      yamt 		 * pager to flush and free the page.
   1002           1.73      yamt 		 */
   1003           1.37       chs 
   1004           1.69      yamt #if defined(READAHEAD_STATS)
   1005   1.93.4.2.4.3      matt 		if ((pg->pqflags & PQ_READAHEAD) != 0) {
   1006   1.93.4.2.4.3      matt 			pg->pqflags &= ~PQ_READAHEAD;
   1007           1.73      yamt 			uvm_ra_miss.ev_count++;
   1008           1.73      yamt 		}
   1009           1.69      yamt #endif /* defined(READAHEAD_STATS) */
   1010           1.69      yamt 
   1011   1.93.4.2.4.3      matt 		if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
   1012           1.82       alc 			KASSERT(uobj != NULL);
   1013           1.89        ad 			mutex_exit(&uvm_pageqlock);
   1014   1.93.4.2.4.3      matt 			(void) (uobj->pgops->pgo_put)(uobj, pg->offset,
   1015   1.93.4.2.4.3      matt 			    pg->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
   1016   1.93.4.2.4.6      matt 			grp->pgrp_pdputs++;
   1017           1.89        ad 			mutex_enter(&uvm_pageqlock);
   1018           1.73      yamt 			continue;
   1019           1.73      yamt 		}
   1020           1.37       chs 
   1021           1.73      yamt 		/*
   1022           1.73      yamt 		 * the page is swap-backed.  remove all the permissions
   1023           1.73      yamt 		 * from the page so we can sync the modified info
   1024           1.73      yamt 		 * without any race conditions.  if the page is clean
   1025           1.73      yamt 		 * we can free it now and continue.
   1026           1.73      yamt 		 */
   1027            1.8       mrg 
   1028   1.93.4.2.4.3      matt 		pmap_page_protect(pg, VM_PROT_NONE);
   1029   1.93.4.2.4.3      matt 		if ((pg->flags & PG_CLEAN) && pmap_clear_modify(pg)) {
   1030   1.93.4.2.4.3      matt 			pg->flags &= ~(PG_CLEAN);
   1031           1.73      yamt 		}
   1032   1.93.4.2.4.3      matt 		if (pg->flags & PG_CLEAN) {
   1033           1.73      yamt 			int slot;
   1034           1.73      yamt 			int pageidx;
   1035           1.73      yamt 
   1036   1.93.4.2.4.3      matt 			pageidx = pg->offset >> PAGE_SHIFT;
   1037   1.93.4.2.4.3      matt 			KASSERT(!uvmpdpol_pageisqueued_p(pg));
   1038   1.93.4.2.4.3      matt 			uvm_pagefree(pg);
   1039   1.93.4.2.4.6      matt 			freed++;
   1040            1.8       mrg 
   1041            1.8       mrg 			/*
   1042           1.73      yamt 			 * for anons, we need to remove the page
   1043           1.73      yamt 			 * from the anon ourselves.  for aobjs,
   1044           1.73      yamt 			 * pagefree did that for us.
   1045            1.8       mrg 			 */
   1046           1.24       chs 
   1047           1.73      yamt 			if (anon) {
   1048           1.73      yamt 				KASSERT(anon->an_swslot != 0);
   1049           1.73      yamt 				anon->an_page = NULL;
   1050           1.73      yamt 				slot = anon->an_swslot;
   1051           1.73      yamt 			} else {
   1052           1.73      yamt 				slot = uao_find_swslot(uobj, pageidx);
   1053            1.8       mrg 			}
   1054           1.89        ad 			mutex_exit(slock);
   1055            1.8       mrg 
   1056           1.73      yamt 			if (slot > 0) {
   1057           1.73      yamt 				/* this page is now only in swap. */
   1058           1.87        ad 				mutex_enter(&uvm_swap_data_lock);
   1059           1.73      yamt 				KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
   1060           1.73      yamt 				uvmexp.swpgonly++;
   1061           1.87        ad 				mutex_exit(&uvm_swap_data_lock);
   1062           1.37       chs 			}
   1063           1.73      yamt 			continue;
   1064           1.73      yamt 		}
   1065           1.37       chs 
   1066           1.77      yamt #if defined(VMSWAP)
   1067           1.73      yamt 		/*
   1068           1.73      yamt 		 * this page is dirty, skip it if we'll have met our
   1069           1.73      yamt 		 * free target when all the current pageouts complete.
   1070           1.73      yamt 		 */
   1071           1.24       chs 
   1072   1.93.4.2.4.3      matt 		if (grp->pgrp_free + grp->pgrp_paging > grp->pgrp_freetarg << 2) {
   1073           1.89        ad 			mutex_exit(slock);
   1074           1.73      yamt 			continue;
   1075           1.73      yamt 		}
   1076           1.14       chs 
   1077           1.73      yamt 		/*
   1078           1.73      yamt 		 * free any swap space allocated to the page since
   1079           1.73      yamt 		 * we'll have to write it again with its new data.
   1080           1.73      yamt 		 */
   1081           1.24       chs 
   1082   1.93.4.2.4.3      matt 		uvmpd_dropswap(pg);
   1083           1.14       chs 
   1084           1.73      yamt 		/*
   1085           1.73      yamt 		 * start new swap pageout cluster (if necessary).
   1086       1.93.4.1       snj 		 *
   1087       1.93.4.1       snj 		 * if swap is full reactivate this page so that
   1088       1.93.4.1       snj 		 * we eventually cycle all pages through the
   1089       1.93.4.1       snj 		 * inactive queue.
   1090            1.8       mrg 		 */
   1091           1.24       chs 
   1092           1.73      yamt 		if (swapcluster_allocslots(&swc)) {
   1093       1.93.4.1       snj 			dirtyreacts++;
   1094   1.93.4.2.4.3      matt 			uvm_pageactivate(pg);
   1095           1.89        ad 			mutex_exit(slock);
   1096           1.73      yamt 			continue;
   1097            1.8       mrg 		}
   1098            1.8       mrg 
   1099            1.8       mrg 		/*
   1100           1.73      yamt 		 * at this point, we're definitely going reuse this
   1101           1.73      yamt 		 * page.  mark the page busy and delayed-free.
   1102           1.73      yamt 		 * we should remove the page from the page queues
   1103           1.73      yamt 		 * so we don't ever look at it again.
   1104           1.73      yamt 		 * adjust counters and such.
   1105            1.8       mrg 		 */
   1106            1.8       mrg 
   1107   1.93.4.2.4.3      matt 		pg->flags |= PG_BUSY;
   1108   1.93.4.2.4.8      matt 		UVM_PAGE_OWN(pg, "scan_queue", NULL);
   1109           1.73      yamt 
   1110   1.93.4.2.4.3      matt 		pg->flags |= PG_PAGEOUT;
   1111   1.93.4.2.4.3      matt 		uvm_pagedequeue(pg);
   1112           1.73      yamt 
   1113   1.93.4.2.4.3      matt 		grp->pgrp_pgswapout++;
   1114           1.89        ad 		mutex_exit(&uvm_pageqlock);
   1115            1.8       mrg 
   1116            1.8       mrg 		/*
   1117           1.73      yamt 		 * add the new page to the cluster.
   1118            1.8       mrg 		 */
   1119            1.8       mrg 
   1120   1.93.4.2.4.3      matt 		if (swapcluster_add(&swc, pg)) {
   1121   1.93.4.2.4.3      matt 			pg->flags &= ~(PG_BUSY|PG_PAGEOUT);
   1122   1.93.4.2.4.8      matt 			UVM_PAGE_OWN(pg, NULL, NULL);
   1123           1.89        ad 			mutex_enter(&uvm_pageqlock);
   1124           1.77      yamt 			dirtyreacts++;
   1125   1.93.4.2.4.3      matt 			uvm_pageactivate(pg);
   1126           1.89        ad 			mutex_exit(slock);
   1127           1.73      yamt 			continue;
   1128           1.73      yamt 		}
   1129           1.89        ad 		mutex_exit(slock);
   1130           1.73      yamt 
   1131   1.93.4.2.4.3      matt 		swapcluster_flush(grp, &swc, false);
   1132           1.89        ad 		mutex_enter(&uvm_pageqlock);
   1133           1.73      yamt 
   1134            1.8       mrg 		/*
   1135           1.31       chs 		 * the pageout is in progress.  bump counters and set up
   1136           1.31       chs 		 * for the next loop.
   1137            1.8       mrg 		 */
   1138            1.8       mrg 
   1139           1.31       chs 		uvmexp.pdpending++;
   1140           1.77      yamt #else /* defined(VMSWAP) */
   1141   1.93.4.2.4.3      matt 		uvm_pageactivate(pg);
   1142           1.89        ad 		mutex_exit(slock);
   1143           1.77      yamt #endif /* defined(VMSWAP) */
   1144           1.73      yamt 	}
   1145           1.73      yamt 
   1146   1.93.4.2.4.6      matt 	UVMHIST_LOG(pdhist,"  [%zd] <-- done: %u victims: %u freed, %u busy",
   1147   1.93.4.2.4.6      matt 	    grp - uvm.pggroups, victims, freed, busy);
   1148   1.93.4.2.4.6      matt 
   1149   1.93.4.2.4.6      matt 	grp->pgrp_pdvictims += victims;
   1150   1.93.4.2.4.6      matt 	grp->pgrp_pdnullscans += (victims == 0);
   1151   1.93.4.2.4.6      matt 	grp->pgrp_pdfreed += freed;
   1152   1.93.4.2.4.6      matt 	grp->pgrp_pdbusy += busy;
   1153   1.93.4.2.4.6      matt 
   1154           1.73      yamt #if defined(VMSWAP)
   1155           1.89        ad 	mutex_exit(&uvm_pageqlock);
   1156   1.93.4.2.4.3      matt 	swapcluster_flush(grp, &swc, true);
   1157           1.89        ad 	mutex_enter(&uvm_pageqlock);
   1158           1.68      yamt #endif /* defined(VMSWAP) */
   1159            1.1       mrg }
   1160            1.1       mrg 
   1161            1.1       mrg /*
   1162            1.1       mrg  * uvmpd_scan: scan the page queues and attempt to meet our targets.
   1163            1.1       mrg  *
   1164            1.1       mrg  * => called with pageq's locked
   1165            1.1       mrg  */
   1166            1.1       mrg 
   1167  1.93.4.2.4.10      matt static bool
   1168   1.93.4.2.4.3      matt uvmpd_scan(struct uvm_pggroup *grp)
   1169            1.1       mrg {
   1170   1.93.4.2.4.3      matt 	u_int swap_shortage, pages_freed;
   1171            1.8       mrg 	UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
   1172            1.1       mrg 
   1173   1.93.4.2.4.3      matt 	grp->pgrp_pdrevs++;
   1174            1.1       mrg 
   1175            1.8       mrg 	/*
   1176           1.93        ad 	 * work on meeting our targets.   first we work on our free target
   1177           1.93        ad 	 * by converting inactive pages into free pages.  then we work on
   1178           1.93        ad 	 * meeting our inactive target by converting active pages to
   1179           1.93        ad 	 * inactive ones.
   1180            1.8       mrg 	 */
   1181            1.8       mrg 
   1182            1.8       mrg 	UVMHIST_LOG(pdhist, "  starting 'free' loop",0,0,0,0);
   1183            1.8       mrg 
   1184   1.93.4.2.4.3      matt 	pages_freed = grp->pgrp_pdfreed;
   1185   1.93.4.2.4.3      matt 	uvmpd_scan_queue(grp);
   1186   1.93.4.2.4.3      matt 	pages_freed = grp->pgrp_pdfreed - pages_freed;
   1187            1.8       mrg 
   1188            1.8       mrg 	/*
   1189           1.14       chs 	 * detect if we're not going to be able to page anything out
   1190           1.14       chs 	 * until we free some swap resources from active pages.
   1191           1.14       chs 	 */
   1192           1.24       chs 
   1193           1.14       chs 	swap_shortage = 0;
   1194  1.93.4.2.4.10      matt 	if (pages_freed == 0
   1195  1.93.4.2.4.10      matt 	    && grp->pgrp_free < grp->pgrp_freetarg
   1196  1.93.4.2.4.10      matt 	    && uvmexp.swpginuse >= uvmexp.swpgavail
   1197  1.93.4.2.4.10      matt 	    && !uvm_swapisfull()) {
   1198   1.93.4.2.4.3      matt 		swap_shortage = grp->pgrp_freetarg - grp->pgrp_free;
   1199           1.14       chs 	}
   1200           1.24       chs 
   1201   1.93.4.2.4.3      matt 	uvmpdpol_balancequeue(grp, swap_shortage);
   1202           1.93        ad 
   1203           1.93        ad 	/*
   1204           1.93        ad 	 * swap out some processes if we are still below the minimum
   1205           1.93        ad 	 * free target.  we need to unlock the page queues for this.
   1206           1.93        ad 	 */
   1207           1.93        ad 
   1208  1.93.4.2.4.10      matt #ifdef VMSWAP
   1209   1.93.4.2.4.3      matt 	if (grp->pgrp_free < grp->pgrp_freemin
   1210   1.93.4.2.4.3      matt 	    && uvmexp.nswapdev != 0 && uvm.swapout_enabled) {
   1211   1.93.4.2.4.3      matt 		grp->pgrp_pdswout++;
   1212           1.93        ad 		UVMHIST_LOG(pdhist,"  free %d < min %d: swapout",
   1213           1.93        ad 		    uvmexp.free, uvmexp.freemin, 0, 0);
   1214           1.93        ad 		mutex_exit(&uvm_pageqlock);
   1215           1.93        ad 		uvm_swapout_threads();
   1216           1.93        ad 		mutex_enter(&uvm_pageqlock);
   1217           1.93        ad 
   1218           1.93        ad 	}
   1219  1.93.4.2.4.10      matt #endif /* VMSWAP */
   1220  1.93.4.2.4.10      matt 
   1221  1.93.4.2.4.10      matt 	return pages_freed != 0;
   1222            1.1       mrg }
   1223           1.62      yamt 
   1224           1.62      yamt /*
   1225           1.62      yamt  * uvm_reclaimable: decide whether to wait for pagedaemon.
   1226           1.62      yamt  *
   1227           1.84   thorpej  * => return true if it seems to be worth to do uvm_wait.
   1228           1.62      yamt  *
   1229           1.62      yamt  * XXX should be tunable.
   1230           1.62      yamt  * XXX should consider pools, etc?
   1231           1.62      yamt  */
   1232           1.62      yamt 
   1233           1.83   thorpej bool
   1234   1.93.4.2.4.9      matt uvm_reclaimable(u_int color, bool kmem_p)
   1235           1.62      yamt {
   1236   1.93.4.2.4.9      matt 	u_int filepages, npages;
   1237   1.93.4.2.4.9      matt 	u_int active, inactive;
   1238           1.62      yamt 
   1239           1.62      yamt 	/*
   1240           1.62      yamt 	 * if swap is not full, no problem.
   1241           1.62      yamt 	 */
   1242           1.62      yamt 
   1243   1.93.4.2.4.9      matt #ifdef VMSWAP
   1244           1.62      yamt 	if (!uvm_swapisfull()) {
   1245           1.84   thorpej 		return true;
   1246           1.62      yamt 	}
   1247   1.93.4.2.4.9      matt #endif
   1248           1.62      yamt 
   1249           1.62      yamt 	/*
   1250           1.62      yamt 	 * file-backed pages can be reclaimed even when swap is full.
   1251           1.62      yamt 	 * if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
   1252           1.62      yamt 	 *
   1253           1.62      yamt 	 * XXX assume the worst case, ie. all wired pages are file-backed.
   1254           1.63      yamt 	 *
   1255           1.63      yamt 	 * XXX should consider about other reclaimable memory.
   1256           1.63      yamt 	 * XXX ie. pools, traditional buffer cache.
   1257           1.62      yamt 	 */
   1258   1.93.4.2.4.9      matt 	active = 0;
   1259   1.93.4.2.4.9      matt 	inactive = 0;
   1260   1.93.4.2.4.9      matt 	filepages = 0;
   1261   1.93.4.2.4.9      matt 	npages = 0;
   1262   1.93.4.2.4.9      matt 	for (u_int lcv = 0; lcv < VM_NFREELIST; lcv++) {
   1263   1.93.4.2.4.9      matt 		struct uvm_pggroup * const grp =
   1264   1.93.4.2.4.9      matt 		    uvm.page_free[color].pgfl_pggroups[lcv];
   1265           1.62      yamt 
   1266   1.93.4.2.4.9      matt #ifdef VM_FREELIST_NORMALOK_P
   1267   1.93.4.2.4.9      matt 		/*
   1268   1.93.4.2.4.9      matt 		 * If this for kmem and it's a normal freelist, skip it.
   1269   1.93.4.2.4.9      matt 		 */
   1270   1.93.4.2.4.9      matt 		if (kmem_p && VM_FREELIST_NORMALOK_P(lcv))
   1271   1.93.4.2.4.9      matt 			continue;
   1272   1.93.4.2.4.9      matt #endif
   1273   1.93.4.2.4.9      matt 
   1274   1.93.4.2.4.9      matt 		npages += grp->pgrp_npages;
   1275   1.93.4.2.4.9      matt 		filepages += grp->pgrp_filepages + grp->pgrp_execpages;
   1276   1.93.4.2.4.9      matt 		uvm_estimatepageable(grp, &active, &inactive);
   1277   1.93.4.2.4.9      matt 	}
   1278   1.93.4.2.4.9      matt 	filepages -= uvmexp.wired;
   1279   1.93.4.2.4.9      matt 	/*
   1280   1.93.4.2.4.9      matt 	 *
   1281   1.93.4.2.4.9      matt 	 */
   1282   1.93.4.2.4.9      matt 	if (filepages >= MIN((active + inactive) >> 4, npages / 25)) {
   1283           1.84   thorpej 		return true;
   1284           1.62      yamt 	}
   1285           1.62      yamt 
   1286           1.62      yamt 	/*
   1287           1.62      yamt 	 * kill the process, fail allocation, etc..
   1288           1.62      yamt 	 */
   1289           1.62      yamt 
   1290           1.84   thorpej 	return false;
   1291           1.62      yamt }
   1292           1.77      yamt 
   1293           1.77      yamt void
   1294   1.93.4.2.4.9      matt uvm_estimatepageable(const struct uvm_pggroup *grp,
   1295   1.93.4.2.4.9      matt 	u_int *active, u_int *inactive)
   1296           1.77      yamt {
   1297           1.77      yamt 
   1298   1.93.4.2.4.9      matt 	uvmpdpol_estimatepageable(grp, active, inactive);
   1299           1.77      yamt }
   1300