uvm_pdaemon.c revision 1.4 1 1.4 mrg /* $NetBSD: uvm_pdaemon.c,v 1.4 1998/02/07 11:09:33 mrg Exp $ */
2 1.1 mrg
3 1.1 mrg /*
4 1.1 mrg * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
5 1.1 mrg * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
6 1.1 mrg */
7 1.1 mrg /*
8 1.1 mrg * Copyright (c) 1997 Charles D. Cranor and Washington University.
9 1.1 mrg * Copyright (c) 1991, 1993, The Regents of the University of California.
10 1.1 mrg *
11 1.1 mrg * All rights reserved.
12 1.1 mrg *
13 1.1 mrg * This code is derived from software contributed to Berkeley by
14 1.1 mrg * The Mach Operating System project at Carnegie-Mellon University.
15 1.1 mrg *
16 1.1 mrg * Redistribution and use in source and binary forms, with or without
17 1.1 mrg * modification, are permitted provided that the following conditions
18 1.1 mrg * are met:
19 1.1 mrg * 1. Redistributions of source code must retain the above copyright
20 1.1 mrg * notice, this list of conditions and the following disclaimer.
21 1.1 mrg * 2. Redistributions in binary form must reproduce the above copyright
22 1.1 mrg * notice, this list of conditions and the following disclaimer in the
23 1.1 mrg * documentation and/or other materials provided with the distribution.
24 1.1 mrg * 3. All advertising materials mentioning features or use of this software
25 1.1 mrg * must display the following acknowledgement:
26 1.1 mrg * This product includes software developed by Charles D. Cranor,
27 1.1 mrg * Washington University, the University of California, Berkeley and
28 1.1 mrg * its contributors.
29 1.1 mrg * 4. Neither the name of the University nor the names of its contributors
30 1.1 mrg * may be used to endorse or promote products derived from this software
31 1.1 mrg * without specific prior written permission.
32 1.1 mrg *
33 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
34 1.1 mrg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 1.1 mrg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 1.1 mrg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
37 1.1 mrg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 1.1 mrg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39 1.1 mrg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 1.1 mrg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41 1.1 mrg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42 1.1 mrg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 1.1 mrg * SUCH DAMAGE.
44 1.1 mrg *
45 1.1 mrg * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
46 1.4 mrg * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
47 1.1 mrg *
48 1.1 mrg *
49 1.1 mrg * Copyright (c) 1987, 1990 Carnegie-Mellon University.
50 1.1 mrg * All rights reserved.
51 1.1 mrg *
52 1.1 mrg * Permission to use, copy, modify and distribute this software and
53 1.1 mrg * its documentation is hereby granted, provided that both the copyright
54 1.1 mrg * notice and this permission notice appear in all copies of the
55 1.1 mrg * software, derivative works or modified versions, and any portions
56 1.1 mrg * thereof, and that both notices appear in supporting documentation.
57 1.1 mrg *
58 1.1 mrg * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
59 1.1 mrg * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
60 1.1 mrg * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
61 1.1 mrg *
62 1.1 mrg * Carnegie Mellon requests users of this software to return to
63 1.1 mrg *
64 1.1 mrg * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
65 1.1 mrg * School of Computer Science
66 1.1 mrg * Carnegie Mellon University
67 1.1 mrg * Pittsburgh PA 15213-3890
68 1.1 mrg *
69 1.1 mrg * any improvements or extensions that they make and grant Carnegie the
70 1.1 mrg * rights to redistribute these changes.
71 1.1 mrg */
72 1.1 mrg
73 1.1 mrg /*
74 1.1 mrg * uvm_pdaemon.c: the page daemon
75 1.1 mrg */
76 1.1 mrg
77 1.1 mrg #include <sys/param.h>
78 1.1 mrg #include <sys/proc.h>
79 1.1 mrg #include <sys/systm.h>
80 1.1 mrg #include <sys/kernel.h>
81 1.1 mrg
82 1.1 mrg #include <vm/vm.h>
83 1.1 mrg #include <vm/vm_page.h>
84 1.1 mrg #include <vm/vm_kern.h>
85 1.1 mrg
86 1.1 mrg #include <uvm/uvm.h>
87 1.1 mrg
88 1.1 mrg UVMHIST_DECL(pdhist);
89 1.1 mrg
90 1.1 mrg /*
91 1.1 mrg * local prototypes
92 1.1 mrg */
93 1.1 mrg
94 1.1 mrg static void uvmpd_scan __P((void));
95 1.1 mrg static boolean_t uvmpd_scan_inactive __P((struct pglist *));
96 1.1 mrg static void uvmpd_tune __P((void));
97 1.1 mrg
98 1.1 mrg
99 1.1 mrg /*
100 1.1 mrg * uvm_wait: wait (sleep) for the page daemon to free some pages
101 1.1 mrg *
102 1.1 mrg * => should be called with all locks released
103 1.1 mrg * => should _not_ be called by the page daemon (to avoid deadlock)
104 1.1 mrg */
105 1.1 mrg
106 1.1 mrg void uvm_wait(wmsg)
107 1.1 mrg
108 1.1 mrg char *wmsg;
109 1.1 mrg
110 1.1 mrg {
111 1.1 mrg int timo = 0;
112 1.1 mrg int s = splbio();
113 1.1 mrg
114 1.1 mrg /*
115 1.1 mrg * check for page daemon going to sleep (waiting for itself)
116 1.1 mrg */
117 1.1 mrg
118 1.1 mrg if (curproc == uvm.pagedaemon_proc) {
119 1.1 mrg /*
120 1.1 mrg * now we have a problem: the pagedaemon wants to go to sleep until
121 1.1 mrg * it frees more memory. but how can it free more memory if it is
122 1.1 mrg * asleep? that is a deadlock. we have two options:
123 1.1 mrg * [1] panic now
124 1.1 mrg * [2] put a timeout on the sleep, thus causing the pagedaemon to
125 1.1 mrg * only pause (rather than sleep forever)
126 1.1 mrg *
127 1.1 mrg * note that option [2] will only help us if we get lucky and some
128 1.1 mrg * other process on the system breaks the deadlock by exiting or
129 1.1 mrg * freeing memory (thus allowing the pagedaemon to continue).
130 1.1 mrg * for now we panic if DEBUG is defined, otherwise we hope for the
131 1.1 mrg * best with option [2] (better yet, this should never happen in
132 1.1 mrg * the first place!).
133 1.1 mrg */
134 1.1 mrg
135 1.1 mrg printf("pagedaemon: deadlock detected!\n");
136 1.1 mrg timo = hz >> 3; /* set timeout */
137 1.1 mrg #if defined(DEBUG)
138 1.1 mrg panic("pagedaemon deadlock"); /* DEBUG: panic so we can debug it */
139 1.1 mrg #endif
140 1.1 mrg }
141 1.1 mrg
142 1.1 mrg simple_lock(&uvm.pagedaemon_lock);
143 1.1 mrg thread_wakeup(&uvm.pagedaemon); /* wake the daemon! */
144 1.1 mrg UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg, timo);
145 1.1 mrg
146 1.1 mrg splx(s);
147 1.1 mrg }
148 1.1 mrg
149 1.1 mrg
150 1.1 mrg /*
151 1.1 mrg * uvmpd_tune: tune paging parameters
152 1.1 mrg *
153 1.1 mrg * => called when ever memory is added (or removed?) to the system
154 1.1 mrg * => caller must call with page queues locked
155 1.1 mrg */
156 1.1 mrg
157 1.1 mrg static void uvmpd_tune()
158 1.1 mrg
159 1.1 mrg {
160 1.1 mrg UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
161 1.1 mrg
162 1.1 mrg uvmexp.freemin = uvmexp.npages / 20;
163 1.1 mrg uvmexp.freemin = max(uvmexp.freemin, (16*1024)/PAGE_SIZE); /* at least 16K */
164 1.1 mrg uvmexp.freemin = min(uvmexp.freemin, (256*1024)/PAGE_SIZE); /* at most 256K */
165 1.1 mrg
166 1.1 mrg uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
167 1.1 mrg if (uvmexp.freetarg <= uvmexp.freemin)
168 1.1 mrg uvmexp.freetarg = uvmexp.freemin + 1;
169 1.1 mrg
170 1.1 mrg /* uvmexp.inactarg: computed in main daemon loop */
171 1.1 mrg
172 1.1 mrg uvmexp.wiredmax = uvmexp.npages / 3;
173 1.1 mrg UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
174 1.1 mrg uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
175 1.1 mrg }
176 1.1 mrg
177 1.1 mrg /*
178 1.1 mrg * uvm_pageout: the main loop for the pagedaemon
179 1.1 mrg */
180 1.1 mrg
181 1.1 mrg void uvm_pageout()
182 1.1 mrg
183 1.1 mrg {
184 1.1 mrg int npages = 0;
185 1.1 mrg int s;
186 1.1 mrg struct uvm_aiodesc *aio, *nextaio;
187 1.1 mrg UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
188 1.1 mrg
189 1.1 mrg UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
190 1.1 mrg
191 1.1 mrg /*
192 1.1 mrg * ensure correct priority and set paging parameters...
193 1.1 mrg */
194 1.1 mrg
195 1.1 mrg uvm.pagedaemon_proc = curproc;
196 1.1 mrg (void) spl0();
197 1.1 mrg uvm_lock_pageq();
198 1.1 mrg npages = uvmexp.npages;
199 1.1 mrg uvmpd_tune();
200 1.1 mrg uvm_unlock_pageq();
201 1.1 mrg
202 1.1 mrg /*
203 1.1 mrg * main loop
204 1.1 mrg */
205 1.1 mrg while (TRUE) {
206 1.1 mrg
207 1.1 mrg /*
208 1.1 mrg * carefully attempt to go to sleep (without losing "wakeups"!).
209 1.1 mrg * we need splbio because we want to make sure the aio_done list
210 1.1 mrg * is totally empty before we go to sleep.
211 1.1 mrg */
212 1.1 mrg
213 1.1 mrg s = splbio();
214 1.1 mrg simple_lock(&uvm.pagedaemon_lock);
215 1.1 mrg
216 1.1 mrg /*
217 1.1 mrg * if we've got done aio's, then bypass the sleep
218 1.1 mrg */
219 1.1 mrg
220 1.1 mrg if (uvm.aio_done.tqh_first == NULL) {
221 1.1 mrg UVMHIST_LOG(maphist," <<SLEEPING>>",0,0,0,0);
222 1.1 mrg UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, &uvm.pagedaemon_lock, FALSE,
223 1.1 mrg "daemon_slp", 0);
224 1.1 mrg uvmexp.pdwoke++;
225 1.1 mrg UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
226 1.1 mrg
227 1.1 mrg /* relock pagedaemon_lock, still at splbio */
228 1.1 mrg simple_lock(&uvm.pagedaemon_lock);
229 1.1 mrg }
230 1.1 mrg
231 1.1 mrg /*
232 1.1 mrg * check for done aio structures
233 1.1 mrg */
234 1.1 mrg
235 1.1 mrg aio = uvm.aio_done.tqh_first; /* save current list (if any)*/
236 1.1 mrg if (aio) {
237 1.1 mrg TAILQ_INIT(&uvm.aio_done); /* zero global list */
238 1.1 mrg }
239 1.1 mrg
240 1.1 mrg simple_unlock(&uvm.pagedaemon_lock); /* unlock */
241 1.1 mrg splx(s); /* drop splbio */
242 1.1 mrg
243 1.1 mrg /*
244 1.1 mrg * first clear out any pending aios (to free space in case we
245 1.1 mrg * want to pageout more stuff).
246 1.1 mrg */
247 1.1 mrg
248 1.1 mrg for (/*null*/; aio != NULL ; aio = nextaio) {
249 1.1 mrg
250 1.3 chs uvmexp.paging -= aio->npages;
251 1.1 mrg nextaio = aio->aioq.tqe_next;
252 1.1 mrg aio->aiodone(aio);
253 1.1 mrg
254 1.1 mrg }
255 1.1 mrg
256 1.1 mrg /*
257 1.1 mrg * now lock page queues and recompute inactive count
258 1.1 mrg */
259 1.1 mrg uvm_lock_pageq();
260 1.1 mrg
261 1.1 mrg if (npages != uvmexp.npages) { /* check for new pages? */
262 1.1 mrg npages = uvmexp.npages;
263 1.1 mrg uvmpd_tune();
264 1.1 mrg }
265 1.1 mrg
266 1.1 mrg uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
267 1.1 mrg if (uvmexp.inactarg <= uvmexp.freetarg)
268 1.1 mrg uvmexp.inactarg = uvmexp.freetarg + 1;
269 1.1 mrg
270 1.1 mrg UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d",
271 1.1 mrg uvmexp.free, uvmexp.freetarg, uvmexp.inactive, uvmexp.inactarg);
272 1.1 mrg
273 1.1 mrg /*
274 1.1 mrg * scan if needed
275 1.1 mrg * [XXX: note we are reading uvm.free without locking]
276 1.1 mrg */
277 1.1 mrg if (uvmexp.free < uvmexp.freetarg || uvmexp.inactive < uvmexp.inactarg)
278 1.1 mrg uvmpd_scan();
279 1.1 mrg
280 1.1 mrg /*
281 1.1 mrg * done scan. unlock page queues (the only lock we are holding).
282 1.1 mrg */
283 1.1 mrg uvm_unlock_pageq();
284 1.1 mrg
285 1.1 mrg /*
286 1.1 mrg * done! restart loop.
287 1.1 mrg */
288 1.1 mrg thread_wakeup(&uvmexp.free);
289 1.1 mrg }
290 1.1 mrg /*NOTREACHED*/
291 1.1 mrg }
292 1.1 mrg
293 1.1 mrg /*
294 1.1 mrg * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
295 1.1 mrg * its own function for ease of reading.
296 1.1 mrg *
297 1.1 mrg * => called with page queues locked
298 1.1 mrg * => we work on meeting our free target by converting inactive pages
299 1.1 mrg * into free pages.
300 1.1 mrg * => we handle the building of swap-backed clusters
301 1.1 mrg * => we return TRUE if we are exiting because we met our target
302 1.1 mrg */
303 1.1 mrg
304 1.1 mrg static boolean_t uvmpd_scan_inactive(pglst)
305 1.1 mrg
306 1.1 mrg struct pglist *pglst;
307 1.1 mrg
308 1.1 mrg {
309 1.1 mrg boolean_t retval = FALSE; /* assume we haven't hit target */
310 1.1 mrg int s, free, result;
311 1.1 mrg struct vm_page *p, *nextpg;
312 1.1 mrg struct uvm_object *uobj;
313 1.1 mrg struct vm_page *pps[MAXBSIZE/PAGE_SIZE], **ppsp;
314 1.1 mrg int npages;
315 1.1 mrg struct vm_page *swpps[MAXBSIZE/PAGE_SIZE]; /* XXX: see below */
316 1.1 mrg int swnpages, swcpages; /* XXX: see below */
317 1.1 mrg int swslot, oldslot;
318 1.1 mrg struct vm_anon *anon;
319 1.1 mrg boolean_t swap_backed;
320 1.1 mrg vm_offset_t start;
321 1.1 mrg UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
322 1.1 mrg
323 1.1 mrg /*
324 1.1 mrg * note: we currently keep swap-backed pages on a seperate inactive
325 1.1 mrg * list from object-backed pages. however, merging the two lists
326 1.1 mrg * back together again hasn't been ruled out. thus, we keep our
327 1.1 mrg * swap cluster in "swpps" rather than in pps (allows us to mix clustering
328 1.1 mrg * types in the event of a mixed inactive queue).
329 1.1 mrg */
330 1.1 mrg
331 1.1 mrg /*
332 1.1 mrg * swslot is non-zero if we are building a swap cluster. we want
333 1.1 mrg * to stay in the loop while we have a page to scan or we have
334 1.1 mrg * a swap-cluster to build.
335 1.1 mrg */
336 1.1 mrg swslot = 0;
337 1.1 mrg swnpages = swcpages = 0;
338 1.3 chs free = 0;
339 1.1 mrg
340 1.1 mrg for (p = pglst->tqh_first ; p != NULL || swslot != 0 ; p = nextpg) {
341 1.1 mrg
342 1.1 mrg /*
343 1.1 mrg * note that p can be NULL iff we have traversed the whole
344 1.1 mrg * list and need to do one final swap-backed clustered pageout.
345 1.1 mrg */
346 1.1 mrg if (p) {
347 1.1 mrg /*
348 1.1 mrg * update our copy of "free" and see if we've met our target
349 1.1 mrg */
350 1.1 mrg s = splimp();
351 1.1 mrg uvm_lock_fpageq();
352 1.1 mrg free = uvmexp.free;
353 1.1 mrg uvm_unlock_fpageq();
354 1.1 mrg splx(s);
355 1.1 mrg
356 1.1 mrg if (free >= uvmexp.freetarg) {
357 1.1 mrg UVMHIST_LOG(pdhist," met free target: exit loop", 0, 0, 0, 0);
358 1.1 mrg retval = TRUE; /* hit the target! */
359 1.1 mrg if (swslot == 0)
360 1.1 mrg break; /* exit now if no swap-i/o pending */
361 1.1 mrg p = NULL; /* set p to null to signal final swap i/o */
362 1.1 mrg }
363 1.1 mrg }
364 1.1 mrg
365 1.1 mrg uobj = NULL; /* be safe and shut gcc up */
366 1.1 mrg anon = NULL; /* be safe and shut gcc up */
367 1.1 mrg
368 1.1 mrg if (p) { /* if (we have a new page to consider) */
369 1.1 mrg /*
370 1.1 mrg * we are below target and have a new page to consider.
371 1.1 mrg */
372 1.1 mrg uvmexp.pdscans++;
373 1.1 mrg nextpg = p->pageq.tqe_next;
374 1.1 mrg
375 1.1 mrg /*
376 1.1 mrg * move referenced pages back to active queue and skip to next page
377 1.1 mrg * (unlikely to happen since inactive pages shouldn't have any
378 1.1 mrg * valid mappings and we cleared reference before deactivating).
379 1.1 mrg */
380 1.1 mrg if (pmap_is_referenced(PMAP_PGARG(p))) {
381 1.1 mrg uvm_pageactivate(p);
382 1.1 mrg uvmexp.pdreact++;
383 1.1 mrg continue;
384 1.1 mrg }
385 1.1 mrg
386 1.1 mrg /*
387 1.1 mrg * first we attempt to lock the object that this page belongs to.
388 1.1 mrg * if our attempt fails we skip on to the next page (no harm done).
389 1.1 mrg * it is important to "try" locking the object as we are locking in the
390 1.1 mrg * wrong order (pageq -> object) and we don't want to get deadlocked.
391 1.1 mrg *
392 1.1 mrg * the only time we exepct to see an ownerless page (i.e. a page
393 1.1 mrg * with no uobject and !PQ_ANON) is if an anon has loaned a page
394 1.1 mrg * from a uvm_object and the uvm_object has dropped the ownership.
395 1.1 mrg * in that case, the anon can "take over" the loaned page and
396 1.1 mrg * make it its own.
397 1.1 mrg */
398 1.1 mrg
399 1.1 mrg /* is page part of an anon or ownerless ? */
400 1.1 mrg if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
401 1.1 mrg
402 1.1 mrg anon = p->uanon;
403 1.1 mrg
404 1.1 mrg #ifdef DIAGNOSTIC
405 1.1 mrg /* to be on inactive q, page must be part of _something_ */
406 1.1 mrg if (anon == NULL)
407 1.1 mrg panic("pagedaemon: page with no anon or object detected - loop 1");
408 1.1 mrg #endif
409 1.1 mrg
410 1.1 mrg if (!simple_lock_try(&anon->an_lock))
411 1.1 mrg continue; /* lock failed, skip this page */
412 1.1 mrg
413 1.1 mrg /* if the page is ownerless, claim it in the name of "anon"! */
414 1.1 mrg if ((p->pqflags & PQ_ANON) == 0) {
415 1.1 mrg #ifdef DIAGNOSTIC
416 1.1 mrg if (p->loan_count < 1)
417 1.1 mrg panic("pagedaemon: non-loaned ownerless page detected - loop 1");
418 1.1 mrg #endif
419 1.1 mrg p->loan_count--;
420 1.1 mrg p->pqflags |= PQ_ANON; /* anon now owns it */
421 1.1 mrg }
422 1.1 mrg
423 1.1 mrg if (p->flags & PG_BUSY) {
424 1.1 mrg simple_unlock(&anon->an_lock);
425 1.1 mrg uvmexp.pdbusy++;
426 1.1 mrg continue; /* someone else owns page, skip it */
427 1.1 mrg }
428 1.1 mrg
429 1.1 mrg uvmexp.pdanscan++;
430 1.1 mrg
431 1.1 mrg } else {
432 1.1 mrg
433 1.1 mrg uobj = p->uobject;
434 1.1 mrg
435 1.1 mrg if (!simple_lock_try(&uobj->vmobjlock))
436 1.1 mrg continue; /* lock failed, skip this page */
437 1.1 mrg
438 1.1 mrg if (p->flags & PG_BUSY) {
439 1.1 mrg simple_unlock(&uobj->vmobjlock);
440 1.1 mrg uvmexp.pdbusy++;
441 1.1 mrg continue; /* someone else owns page, skip it */
442 1.1 mrg }
443 1.1 mrg
444 1.1 mrg uvmexp.pdobscan++;
445 1.1 mrg
446 1.1 mrg }
447 1.1 mrg
448 1.1 mrg /*
449 1.1 mrg * we now have the object and the page queues locked. the page is
450 1.1 mrg * not busy. if the page is clean we can free it now and continue.
451 1.1 mrg */
452 1.1 mrg
453 1.1 mrg if (p->flags & PG_CLEAN) {
454 1.1 mrg /* zap all mappings with pmap_page_protect... */
455 1.1 mrg pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
456 1.1 mrg uvm_pagefree(p);
457 1.1 mrg uvmexp.pdfreed++;
458 1.1 mrg
459 1.1 mrg if (anon) {
460 1.1 mrg #ifdef DIAGNOSTIC
461 1.1 mrg /*
462 1.1 mrg * an anonymous page can only be clean if it has valid
463 1.1 mrg * backing store.
464 1.1 mrg */
465 1.1 mrg if (anon->an_swslot == 0)
466 1.1 mrg panic("pagedaemon: clean anon page without backing store?");
467 1.1 mrg #endif
468 1.1 mrg anon->u.an_page = NULL; /* remove from object */
469 1.1 mrg simple_unlock(&anon->an_lock);
470 1.1 mrg } else {
471 1.1 mrg /* pagefree has already removed the page from the object */
472 1.1 mrg simple_unlock(&uobj->vmobjlock);
473 1.1 mrg }
474 1.1 mrg continue;
475 1.1 mrg }
476 1.1 mrg
477 1.1 mrg /*
478 1.3 chs * this page is dirty, skip it if we'll have met
479 1.3 chs * our free target when all the current pageouts complete.
480 1.3 chs */
481 1.3 chs if (free + uvmexp.paging > uvmexp.freetarg)
482 1.3 chs {
483 1.3 chs if (anon) {
484 1.3 chs simple_unlock(&anon->an_lock);
485 1.3 chs } else {
486 1.3 chs simple_unlock(&uobj->vmobjlock);
487 1.3 chs }
488 1.3 chs continue;
489 1.3 chs }
490 1.3 chs
491 1.3 chs /*
492 1.1 mrg * the page we are looking at is dirty. we must clean it before
493 1.1 mrg * it can be freed. to do this we first mark the page busy so that
494 1.1 mrg * no one else will touch the page. we write protect all the mappings
495 1.1 mrg * of the page so that no one touches it while it is in I/O.
496 1.1 mrg */
497 1.1 mrg
498 1.1 mrg swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
499 1.1 mrg uvmexp.pdpageouts++;
500 1.1 mrg p->flags |= PG_BUSY; /* now we own it */
501 1.1 mrg UVM_PAGE_OWN(p, "scan_inactive");
502 1.1 mrg pmap_page_protect(PMAP_PGARG(p), VM_PROT_READ);
503 1.1 mrg
504 1.1 mrg /*
505 1.1 mrg * for swap-backed pages we need to (re)allocate swap space.
506 1.1 mrg */
507 1.1 mrg if (swap_backed) {
508 1.1 mrg
509 1.1 mrg /*
510 1.1 mrg * free old swap slot (if any)
511 1.1 mrg */
512 1.1 mrg if (anon) {
513 1.1 mrg if (anon->an_swslot) {
514 1.1 mrg uvm_swap_free(anon->an_swslot, 1);
515 1.1 mrg anon->an_swslot = 0;
516 1.1 mrg }
517 1.1 mrg } else {
518 1.1 mrg oldslot = uao_set_swslot(uobj, p->offset/PAGE_SIZE, 0); /* remove */
519 1.1 mrg if (oldslot)
520 1.1 mrg uvm_swap_free(oldslot, 1); /* free */
521 1.1 mrg }
522 1.1 mrg
523 1.1 mrg /*
524 1.1 mrg * start new cluster (if necessary)
525 1.1 mrg */
526 1.1 mrg if (swslot == 0) {
527 1.1 mrg swnpages = MAXBSIZE/PAGE_SIZE; /* want this much */
528 1.1 mrg swslot = uvm_swap_alloc(&swnpages, TRUE);
529 1.1 mrg
530 1.1 mrg if (swslot == 0) {
531 1.1 mrg /* no swap? give up! */
532 1.1 mrg p->flags &= ~PG_BUSY;
533 1.1 mrg UVM_PAGE_OWN(p, NULL);
534 1.1 mrg if (anon)
535 1.1 mrg simple_unlock(&anon->an_lock);
536 1.1 mrg else
537 1.1 mrg simple_unlock(&uobj->vmobjlock);
538 1.1 mrg continue;
539 1.1 mrg }
540 1.1 mrg swcpages = 0; /* cluster is empty */
541 1.1 mrg }
542 1.1 mrg
543 1.1 mrg /*
544 1.1 mrg * add block to cluster
545 1.1 mrg */
546 1.1 mrg swpps[swcpages] = p;
547 1.1 mrg if (anon)
548 1.1 mrg anon->an_swslot = swslot + swcpages;
549 1.1 mrg else
550 1.1 mrg uao_set_swslot(uobj, p->offset/PAGE_SIZE, swslot + swcpages);
551 1.1 mrg swcpages++;
552 1.1 mrg
553 1.1 mrg /* done (swap-backed) */
554 1.1 mrg }
555 1.1 mrg
556 1.1 mrg /* end: if (p) [end of "if we have new page to consider"] */
557 1.1 mrg } else {
558 1.1 mrg
559 1.1 mrg swap_backed = TRUE; /* if p == NULL we must be doing a last swap i/o */
560 1.1 mrg
561 1.1 mrg }
562 1.1 mrg
563 1.1 mrg /*
564 1.1 mrg * now consider doing the pageout.
565 1.1 mrg *
566 1.1 mrg * for swap-backed pages, we do the pageout if we have either
567 1.1 mrg * filled the cluster (in which case (swnpages == swcpages) or
568 1.1 mrg * run out of pages (p == NULL).
569 1.1 mrg *
570 1.1 mrg * for object pages, we always do the pageout.
571 1.1 mrg */
572 1.1 mrg if (swap_backed) {
573 1.1 mrg
574 1.1 mrg if (p) { /* if we just added a page to cluster */
575 1.1 mrg if (anon)
576 1.1 mrg simple_unlock(&anon->an_lock);
577 1.1 mrg else
578 1.1 mrg simple_unlock(&uobj->vmobjlock);
579 1.1 mrg if (swcpages < swnpages) /* cluster not full yet? */
580 1.1 mrg continue;
581 1.1 mrg }
582 1.1 mrg
583 1.1 mrg /* starting I/O now... set up for it */
584 1.1 mrg npages = swcpages;
585 1.1 mrg ppsp = swpps;
586 1.1 mrg start = (vm_offset_t) swslot; /* for swap-backed pages only */
587 1.1 mrg
588 1.1 mrg /* if this is final pageout we could have a few extra swap blocks */
589 1.1 mrg if (swcpages < swnpages) {
590 1.1 mrg uvm_swap_free(swslot + swcpages, (swnpages - swcpages));
591 1.1 mrg }
592 1.1 mrg
593 1.1 mrg } else {
594 1.1 mrg
595 1.1 mrg /* normal object pageout */
596 1.1 mrg ppsp = pps;
597 1.1 mrg npages = sizeof(pps) / sizeof(struct vm_page *);
598 1.1 mrg start = 0; /* not looked at because PGO_ALLPAGES is set */
599 1.1 mrg
600 1.1 mrg }
601 1.1 mrg
602 1.1 mrg /*
603 1.1 mrg * now do the pageout.
604 1.1 mrg *
605 1.1 mrg * for swap_backed pages we have already built the cluster.
606 1.1 mrg * for !swap_backed pages, uvm_pager_put will call the object's
607 1.1 mrg * "make put cluster" function to build a cluster on our behalf.
608 1.1 mrg *
609 1.1 mrg * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
610 1.1 mrg * it to free the cluster pages for us on a successful I/O (it always
611 1.1 mrg * does this for un-successful I/O requests). this allows us to
612 1.1 mrg * do clustered pageout without having to deal with cluster pages
613 1.1 mrg * at this level.
614 1.1 mrg *
615 1.1 mrg * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
616 1.1 mrg * IN: locked: uobj (if !swap_backed), page queues
617 1.1 mrg * OUT: locked: uobj (if !swap_backed && result != VM_PAGER_PEND)
618 1.1 mrg * !locked: page queues, uobj (if swap_backed || VM_PAGER_PEND)
619 1.1 mrg *
620 1.1 mrg * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
621 1.1 mrg */
622 1.1 mrg
623 1.1 mrg /* locked: uobj (if !swap_backed), page queues */
624 1.1 mrg result = uvm_pager_put((swap_backed) ? NULL : uobj, p, &ppsp, &npages,
625 1.1 mrg PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
626 1.1 mrg /* locked: uobj (if !swap_backed && result != PEND) */
627 1.1 mrg /* unlocked: page queues, object (if swap_backed || result == PEND) */
628 1.1 mrg
629 1.1 mrg /*
630 1.1 mrg * if we did i/o to swap, zero swslot to indicate that we are
631 1.1 mrg * no longer building a swap-backed cluster.
632 1.1 mrg */
633 1.1 mrg
634 1.1 mrg if (swap_backed)
635 1.1 mrg swslot = 0; /* done with this cluster */
636 1.1 mrg
637 1.1 mrg /*
638 1.1 mrg * first, we check for VM_PAGER_PEND which means that the async I/O
639 1.1 mrg * is in progress and the async I/O done routine will clean up
640 1.1 mrg * after us. in this case we move on to the next page.
641 1.1 mrg *
642 1.1 mrg * there is a very remote chance that the pending async i/o can
643 1.1 mrg * finish _before_ we get here. if that happens, our page "p"
644 1.1 mrg * may no longer be on the inactive queue. so we verify this
645 1.1 mrg * when determining the next page (starting over at the head if
646 1.1 mrg * we've lost our inactive page).
647 1.1 mrg */
648 1.1 mrg
649 1.1 mrg if (result == VM_PAGER_PEND) {
650 1.3 chs uvmexp.paging += npages;
651 1.1 mrg uvm_lock_pageq(); /* relock page queues */
652 1.1 mrg uvmexp.pdpending++;
653 1.1 mrg if (p) {
654 1.1 mrg if (p->pqflags & PQ_INACTIVE)
655 1.1 mrg nextpg = p->pageq.tqe_next; /* reload! */
656 1.1 mrg else
657 1.1 mrg nextpg = pglst->tqh_first; /* reload! */
658 1.1 mrg } else {
659 1.1 mrg nextpg = NULL; /* done list */
660 1.1 mrg }
661 1.1 mrg continue;
662 1.1 mrg }
663 1.1 mrg
664 1.1 mrg /*
665 1.1 mrg * clean up "p" if we have one
666 1.1 mrg */
667 1.1 mrg
668 1.1 mrg if (p) {
669 1.1 mrg /*
670 1.1 mrg * the I/O request to "p" is done and uvm_pager_put has freed
671 1.1 mrg * any cluster pages it may have allocated during I/O. all
672 1.1 mrg * that is left for us to do is clean up page "p" (which is
673 1.1 mrg * still PG_BUSY).
674 1.1 mrg *
675 1.1 mrg * our result could be one of the following:
676 1.1 mrg * VM_PAGER_OK: successful pageout
677 1.1 mrg *
678 1.1 mrg * VM_PAGER_AGAIN: tmp resource shortage, we skip to next page
679 1.1 mrg * VM_PAGER_{FAIL,ERROR,BAD}: an error. we "reactivate"
680 1.1 mrg * page to get it out of the way (it will eventually
681 1.1 mrg * drift back into the inactive queue for a retry).
682 1.1 mrg * VM_PAGER_UNLOCK: should never see this as it is only
683 1.1 mrg * valid for "get" operations
684 1.1 mrg */
685 1.1 mrg
686 1.1 mrg /* relock p's object: page queues not lock yet, so no need for "try" */
687 1.1 mrg if (swap_backed) { /* !swap_backed case: already locked... */
688 1.1 mrg if (anon)
689 1.1 mrg simple_lock(&anon->an_lock);
690 1.1 mrg else
691 1.1 mrg simple_lock(&uobj->vmobjlock);
692 1.1 mrg }
693 1.1 mrg
694 1.1 mrg #ifdef DIAGNOSTIC
695 1.1 mrg if (result == VM_PAGER_UNLOCK)
696 1.1 mrg panic("pagedaemon: pageout returned invalid 'unlock' code");
697 1.1 mrg #endif
698 1.1 mrg
699 1.1 mrg /* handle PG_WANTED now */
700 1.1 mrg if (p->flags & PG_WANTED)
701 1.1 mrg thread_wakeup(p); /* still holding object lock */
702 1.1 mrg p->flags &= ~(PG_BUSY|PG_WANTED);
703 1.1 mrg UVM_PAGE_OWN(p, NULL);
704 1.1 mrg
705 1.1 mrg /* released during I/O? */
706 1.1 mrg if (p->flags & PG_RELEASED) {
707 1.1 mrg if (anon) {
708 1.1 mrg anon->u.an_page = NULL; /* remove page so we can get nextpg */
709 1.1 mrg simple_unlock(&anon->an_lock);/* XXX needed? */
710 1.1 mrg uvm_anfree(anon); /* kills anon */
711 1.1 mrg pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
712 1.1 mrg anon = NULL;
713 1.1 mrg uvm_lock_pageq();
714 1.1 mrg nextpg = p->pageq.tqe_next;
715 1.1 mrg uvm_pagefree(p); /* free released page */
716 1.1 mrg
717 1.1 mrg } else {
718 1.1 mrg
719 1.1 mrg #ifdef DIAGNOSTIC
720 1.1 mrg if (uobj->pgops->pgo_releasepg == NULL)
721 1.1 mrg panic("pagedaemon: no pgo_releasepg function");
722 1.1 mrg #endif
723 1.1 mrg
724 1.1 mrg /*
725 1.1 mrg * pgo_releasepg nukes the page and gets "nextpg" for us.
726 1.1 mrg * it returns with the page queues locked (when given nextpg ptr).
727 1.1 mrg */
728 1.1 mrg if (!uobj->pgops->pgo_releasepg(p, &nextpg))
729 1.1 mrg uobj = NULL; /* uobj died after release */
730 1.3 chs
731 1.3 chs /*
732 1.3 chs * lock page queues here so that they're always locked
733 1.3 chs * at the end of the loop.
734 1.3 chs */
735 1.3 chs uvm_lock_pageq();
736 1.1 mrg }
737 1.1 mrg
738 1.1 mrg } else { /* page was not released during I/O */
739 1.1 mrg
740 1.1 mrg uvm_lock_pageq();
741 1.1 mrg nextpg = p->pageq.tqe_next;
742 1.1 mrg
743 1.1 mrg if (result != VM_PAGER_OK) {
744 1.1 mrg
745 1.1 mrg /* pageout was a failure... */
746 1.1 mrg if (result != VM_PAGER_AGAIN)
747 1.1 mrg uvm_pageactivate(p);
748 1.1 mrg pmap_clear_reference(PMAP_PGARG(p));
749 1.1 mrg /* XXXCDC: if (swap_backed) FREE p's swap block? */
750 1.1 mrg
751 1.1 mrg } else {
752 1.1 mrg
753 1.1 mrg /* pageout was a success... */
754 1.1 mrg pmap_clear_reference(PMAP_PGARG(p));
755 1.1 mrg pmap_clear_modify(PMAP_PGARG(p));
756 1.1 mrg p->flags |= PG_CLEAN;
757 1.1 mrg /* XXX: could free page here, but old pagedaemon does not */
758 1.1 mrg
759 1.1 mrg }
760 1.1 mrg }
761 1.1 mrg
762 1.1 mrg /*
763 1.1 mrg * drop object lock (if there is an object left). do a safety
764 1.1 mrg * check of nextpg to make sure it is on the inactive queue
765 1.1 mrg * (it should be since PG_BUSY pages on the inactive queue can't
766 1.1 mrg * be re-queued [note: not true for active queue]).
767 1.1 mrg */
768 1.1 mrg
769 1.1 mrg if (anon)
770 1.1 mrg simple_unlock(&anon->an_lock);
771 1.1 mrg else if (uobj)
772 1.1 mrg simple_unlock(&uobj->vmobjlock);
773 1.1 mrg
774 1.1 mrg } /* if (p) */ else {
775 1.1 mrg
776 1.1 mrg /* if p is null in this loop, make sure it stays null in next loop */
777 1.1 mrg nextpg = NULL;
778 1.1 mrg
779 1.3 chs /*
780 1.3 chs * lock page queues here just so they're always locked
781 1.3 chs * at the end of the loop.
782 1.3 chs */
783 1.3 chs uvm_lock_pageq();
784 1.1 mrg }
785 1.1 mrg
786 1.1 mrg if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
787 1.1 mrg printf("pagedaemon: invalid nextpg! reverting to queue head\n");
788 1.1 mrg nextpg = pglst->tqh_first; /* reload! */
789 1.1 mrg }
790 1.1 mrg
791 1.1 mrg } /* end of "inactive" 'for' loop */
792 1.1 mrg return(retval);
793 1.1 mrg }
794 1.1 mrg
795 1.1 mrg /*
796 1.1 mrg * uvmpd_scan: scan the page queues and attempt to meet our targets.
797 1.1 mrg *
798 1.1 mrg * => called with pageq's locked
799 1.1 mrg */
800 1.1 mrg
801 1.1 mrg void uvmpd_scan()
802 1.1 mrg
803 1.1 mrg {
804 1.1 mrg int s, free, pages_freed, page_shortage;
805 1.1 mrg struct vm_page *p, *nextpg;
806 1.1 mrg struct uvm_object *uobj;
807 1.1 mrg boolean_t got_it;
808 1.1 mrg UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
809 1.1 mrg
810 1.1 mrg uvmexp.pdrevs++; /* counter */
811 1.1 mrg
812 1.1 mrg #ifdef __GNUC__
813 1.1 mrg uobj = NULL; /* XXX gcc */
814 1.1 mrg #endif
815 1.1 mrg /*
816 1.1 mrg * get current "free" page count
817 1.1 mrg */
818 1.1 mrg s = splimp();
819 1.1 mrg uvm_lock_fpageq();
820 1.1 mrg free = uvmexp.free;
821 1.1 mrg uvm_unlock_fpageq();
822 1.1 mrg splx(s);
823 1.1 mrg
824 1.1 mrg #ifndef __SWAP_BROKEN
825 1.1 mrg /*
826 1.1 mrg * swap out some processes if we are below our free target.
827 1.1 mrg * we need to unlock the page queues for this.
828 1.1 mrg */
829 1.1 mrg if (free < uvmexp.freetarg) {
830 1.1 mrg
831 1.1 mrg uvmexp.pdswout++;
832 1.1 mrg UVMHIST_LOG(pdhist," free %d < target %d: swapout", free, uvmexp.freetarg,
833 1.1 mrg 0,0);
834 1.1 mrg uvm_unlock_pageq();
835 1.1 mrg uvm_swapout_threads();
836 1.1 mrg pmap_update(); /* update so we can scan inactive q */
837 1.1 mrg uvm_lock_pageq();
838 1.1 mrg
839 1.1 mrg }
840 1.1 mrg #endif
841 1.1 mrg
842 1.1 mrg /*
843 1.1 mrg * now we want to work on meeting our targets. first we work on our
844 1.1 mrg * free target by converting inactive pages into free pages. then
845 1.1 mrg * we work on meeting our inactive target by converting active pages
846 1.1 mrg * to inactive ones.
847 1.1 mrg */
848 1.1 mrg
849 1.1 mrg UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
850 1.1 mrg pages_freed = uvmexp.pdfreed; /* so far... */
851 1.1 mrg
852 1.1 mrg /*
853 1.1 mrg * do loop #1! alternate starting queue between swap and object based
854 1.1 mrg * on the low bit of uvmexp.pdrevs (which we bump by one each call).
855 1.1 mrg */
856 1.1 mrg
857 1.1 mrg got_it = FALSE;
858 1.1 mrg if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
859 1.1 mrg got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
860 1.1 mrg if (!got_it)
861 1.1 mrg got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
862 1.1 mrg if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
863 1.1 mrg (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
864 1.1 mrg
865 1.1 mrg /*
866 1.1 mrg * we have done the scan to get free pages. now we work on meeting
867 1.1 mrg * our inactive target.
868 1.1 mrg */
869 1.1 mrg
870 1.1 mrg page_shortage = uvmexp.inactarg - uvmexp.inactive;
871 1.1 mrg pages_freed = uvmexp.pdfreed - pages_freed; /* # pages freed in loop */
872 1.1 mrg if (page_shortage <= 0 && pages_freed == 0)
873 1.1 mrg page_shortage = 1;
874 1.1 mrg
875 1.1 mrg UVMHIST_LOG(pdhist, " second loop: page_shortage=%d", page_shortage,0,0,0);
876 1.1 mrg for (p = uvm.page_active.tqh_first ;
877 1.1 mrg p != NULL && page_shortage > 0 ; p = nextpg) {
878 1.1 mrg
879 1.1 mrg nextpg = p->pageq.tqe_next;
880 1.1 mrg if (p->flags & PG_BUSY)
881 1.1 mrg continue; /* quick check before trying to lock */
882 1.1 mrg
883 1.1 mrg /*
884 1.1 mrg * lock owner
885 1.1 mrg */
886 1.1 mrg /* is page anon owned or ownerless? */
887 1.1 mrg if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
888 1.1 mrg
889 1.1 mrg #ifdef DIAGNOSTIC
890 1.1 mrg if (p->uanon == NULL)
891 1.1 mrg panic("pagedaemon: page with no anon or object detected - loop 2");
892 1.1 mrg #endif
893 1.1 mrg
894 1.1 mrg if (!simple_lock_try(&p->uanon->an_lock))
895 1.1 mrg continue;
896 1.1 mrg
897 1.1 mrg /* take over the page? */
898 1.1 mrg if ((p->pqflags & PQ_ANON) == 0) {
899 1.1 mrg
900 1.1 mrg #ifdef DIAGNOSTIC
901 1.1 mrg if (p->loan_count < 1)
902 1.1 mrg panic("pagedaemon: non-loaned ownerless page detected - loop 2");
903 1.1 mrg #endif
904 1.1 mrg
905 1.1 mrg p->loan_count--;
906 1.1 mrg p->pqflags |= PQ_ANON;
907 1.1 mrg }
908 1.1 mrg
909 1.1 mrg } else {
910 1.1 mrg
911 1.1 mrg if (!simple_lock_try(&p->uobject->vmobjlock))
912 1.1 mrg continue;
913 1.1 mrg
914 1.1 mrg }
915 1.1 mrg
916 1.1 mrg if ((p->flags & PG_BUSY) == 0) {
917 1.1 mrg pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
918 1.1 mrg /* no need to check wire_count as pg is "active" */
919 1.1 mrg uvm_pagedeactivate(p);
920 1.1 mrg uvmexp.pddeact++;
921 1.1 mrg page_shortage--;
922 1.1 mrg }
923 1.1 mrg
924 1.1 mrg if (p->pqflags & PQ_ANON)
925 1.1 mrg simple_unlock(&p->uanon->an_lock);
926 1.1 mrg else
927 1.1 mrg simple_unlock(&p->uobject->vmobjlock);
928 1.1 mrg }
929 1.1 mrg
930 1.1 mrg /*
931 1.1 mrg * done scan
932 1.1 mrg */
933 1.1 mrg }
934