uvm_pdaemon.c revision 1.17 1 /* $NetBSD: uvm_pdaemon.c,v 1.17 1999/07/22 22:58:39 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
42 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 #include "opt_uvmhist.h"
70
71 /*
72 * uvm_pdaemon.c: the page daemon
73 */
74
75 #include <sys/param.h>
76 #include <sys/proc.h>
77 #include <sys/systm.h>
78 #include <sys/kernel.h>
79 #include <sys/pool.h>
80
81 #include <vm/vm.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_kern.h>
84
85 #include <uvm/uvm.h>
86
87 /*
88 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedeamon will reactivate
89 * in a pass thru the inactive list when swap is full. the value should be
90 * "small"... if it's too large we'll cycle the active pages thru the inactive
91 * queue too quickly to for them to be referenced and avoid being freed.
92 */
93
94 #define UVMPD_NUMDIRTYREACTS 16
95
96
97 /*
98 * local prototypes
99 */
100
101 static void uvmpd_scan __P((void));
102 static boolean_t uvmpd_scan_inactive __P((struct pglist *));
103 static void uvmpd_tune __P((void));
104
105
106 /*
107 * uvm_wait: wait (sleep) for the page daemon to free some pages
108 *
109 * => should be called with all locks released
110 * => should _not_ be called by the page daemon (to avoid deadlock)
111 */
112
113 void uvm_wait(wmsg)
114 char *wmsg;
115 {
116 int timo = 0;
117 int s = splbio();
118
119 /*
120 * check for page daemon going to sleep (waiting for itself)
121 */
122
123 if (curproc == uvm.pagedaemon_proc) {
124 /*
125 * now we have a problem: the pagedaemon wants to go to
126 * sleep until it frees more memory. but how can it
127 * free more memory if it is asleep? that is a deadlock.
128 * we have two options:
129 * [1] panic now
130 * [2] put a timeout on the sleep, thus causing the
131 * pagedaemon to only pause (rather than sleep forever)
132 *
133 * note that option [2] will only help us if we get lucky
134 * and some other process on the system breaks the deadlock
135 * by exiting or freeing memory (thus allowing the pagedaemon
136 * to continue). for now we panic if DEBUG is defined,
137 * otherwise we hope for the best with option [2] (better
138 * yet, this should never happen in the first place!).
139 */
140
141 printf("pagedaemon: deadlock detected!\n");
142 timo = hz >> 3; /* set timeout */
143 #if defined(DEBUG)
144 /* DEBUG: panic so we can debug it */
145 panic("pagedaemon deadlock");
146 #endif
147 }
148
149 simple_lock(&uvm.pagedaemon_lock);
150 wakeup(&uvm.pagedaemon); /* wake the daemon! */
151 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg,
152 timo);
153
154 splx(s);
155 }
156
157
158 /*
159 * uvmpd_tune: tune paging parameters
160 *
161 * => called when ever memory is added (or removed?) to the system
162 * => caller must call with page queues locked
163 */
164
165 static void
166 uvmpd_tune()
167 {
168 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
169
170 uvmexp.freemin = uvmexp.npages / 20;
171
172 /* between 16k and 256k */
173 /* XXX: what are these values good for? */
174 uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
175 uvmexp.freemin = min(uvmexp.freemin, (256*1024) >> PAGE_SHIFT);
176
177 uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
178 if (uvmexp.freetarg <= uvmexp.freemin)
179 uvmexp.freetarg = uvmexp.freemin + 1;
180
181 /* uvmexp.inactarg: computed in main daemon loop */
182
183 uvmexp.wiredmax = uvmexp.npages / 3;
184 UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
185 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
186 }
187
188 /*
189 * uvm_pageout: the main loop for the pagedaemon
190 */
191
192 void
193 uvm_pageout()
194 {
195 int npages = 0;
196 int s;
197 struct uvm_aiodesc *aio, *nextaio;
198 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
199
200 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
201
202 /*
203 * ensure correct priority and set paging parameters...
204 */
205
206 uvm.pagedaemon_proc = curproc;
207 (void) spl0();
208 uvm_lock_pageq();
209 npages = uvmexp.npages;
210 uvmpd_tune();
211 uvm_unlock_pageq();
212
213 /*
214 * main loop
215 */
216 while (TRUE) {
217
218 /*
219 * carefully attempt to go to sleep (without losing "wakeups"!).
220 * we need splbio because we want to make sure the aio_done list
221 * is totally empty before we go to sleep.
222 */
223
224 s = splbio();
225 simple_lock(&uvm.pagedaemon_lock);
226
227 /*
228 * if we've got done aio's, then bypass the sleep
229 */
230
231 if (uvm.aio_done.tqh_first == NULL) {
232 UVMHIST_LOG(maphist," <<SLEEPING>>",0,0,0,0);
233 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
234 &uvm.pagedaemon_lock, FALSE, "daemon_slp", 0);
235 uvmexp.pdwoke++;
236 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
237
238 /* relock pagedaemon_lock, still at splbio */
239 simple_lock(&uvm.pagedaemon_lock);
240 }
241
242 /*
243 * check for done aio structures
244 */
245
246 aio = uvm.aio_done.tqh_first; /* save current list (if any)*/
247 if (aio) {
248 TAILQ_INIT(&uvm.aio_done); /* zero global list */
249 }
250
251 simple_unlock(&uvm.pagedaemon_lock); /* unlock */
252 splx(s); /* drop splbio */
253
254 /*
255 * first clear out any pending aios (to free space in case we
256 * want to pageout more stuff).
257 */
258
259 for (/*null*/; aio != NULL ; aio = nextaio) {
260
261 uvmexp.paging -= aio->npages;
262 nextaio = aio->aioq.tqe_next;
263 aio->aiodone(aio);
264
265 }
266
267 /* Next, drain pool resources */
268 pool_drain(0);
269
270 /*
271 * now lock page queues and recompute inactive count
272 */
273 uvm_lock_pageq();
274
275 if (npages != uvmexp.npages) { /* check for new pages? */
276 npages = uvmexp.npages;
277 uvmpd_tune();
278 }
279
280 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
281 if (uvmexp.inactarg <= uvmexp.freetarg)
282 uvmexp.inactarg = uvmexp.freetarg + 1;
283
284 UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d",
285 uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
286 uvmexp.inactarg);
287
288 /*
289 * scan if needed
290 * [XXX: note we are reading uvm.free without locking]
291 */
292 if (uvmexp.free < uvmexp.freetarg ||
293 uvmexp.inactive < uvmexp.inactarg)
294 uvmpd_scan();
295
296 /*
297 * done scan. unlock page queues (the only lock we are holding)
298 */
299 uvm_unlock_pageq();
300
301 /*
302 * done! restart loop.
303 */
304 if (uvmexp.free > uvmexp.reserve_kernel ||
305 uvmexp.paging == 0)
306 wakeup(&uvmexp.free);
307 }
308 /*NOTREACHED*/
309 }
310
311 /*
312 * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
313 * its own function for ease of reading.
314 *
315 * => called with page queues locked
316 * => we work on meeting our free target by converting inactive pages
317 * into free pages.
318 * => we handle the building of swap-backed clusters
319 * => we return TRUE if we are exiting because we met our target
320 */
321
322 static boolean_t
323 uvmpd_scan_inactive(pglst)
324 struct pglist *pglst;
325 {
326 boolean_t retval = FALSE; /* assume we haven't hit target */
327 int s, free, result;
328 struct vm_page *p, *nextpg;
329 struct uvm_object *uobj;
330 struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
331 int npages;
332 struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; /* XXX: see below */
333 int swnpages, swcpages; /* XXX: see below */
334 int swslot;
335 struct vm_anon *anon;
336 boolean_t swap_backed;
337 vaddr_t start;
338 int dirtyreacts;
339 UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
340
341 /*
342 * note: we currently keep swap-backed pages on a seperate inactive
343 * list from object-backed pages. however, merging the two lists
344 * back together again hasn't been ruled out. thus, we keep our
345 * swap cluster in "swpps" rather than in pps (allows us to mix
346 * clustering types in the event of a mixed inactive queue).
347 */
348
349 /*
350 * swslot is non-zero if we are building a swap cluster. we want
351 * to stay in the loop while we have a page to scan or we have
352 * a swap-cluster to build.
353 */
354 swslot = 0;
355 swnpages = swcpages = 0;
356 free = 0;
357 dirtyreacts = 0;
358
359 for (p = pglst->tqh_first ; p != NULL || swslot != 0 ; p = nextpg) {
360
361 /*
362 * note that p can be NULL iff we have traversed the whole
363 * list and need to do one final swap-backed clustered pageout.
364 */
365 if (p) {
366 /*
367 * update our copy of "free" and see if we've met
368 * our target
369 */
370 s = uvm_lock_fpageq();
371 free = uvmexp.free;
372 uvm_unlock_fpageq(s);
373
374 if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
375 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
376 UVMHIST_LOG(pdhist," met free target: "
377 "exit loop", 0, 0, 0, 0);
378 retval = TRUE; /* hit the target! */
379
380 if (swslot == 0)
381 /* exit now if no swap-i/o pending */
382 break;
383
384 /* set p to null to signal final swap i/o */
385 p = NULL;
386 }
387 }
388
389 uobj = NULL; /* be safe and shut gcc up */
390 anon = NULL; /* be safe and shut gcc up */
391
392 if (p) { /* if (we have a new page to consider) */
393 /*
394 * we are below target and have a new page to consider.
395 */
396 uvmexp.pdscans++;
397 nextpg = p->pageq.tqe_next;
398
399 /*
400 * move referenced pages back to active queue and
401 * skip to next page (unlikely to happen since
402 * inactive pages shouldn't have any valid mappings
403 * and we cleared reference before deactivating).
404 */
405 if (pmap_is_referenced(PMAP_PGARG(p))) {
406 uvm_pageactivate(p);
407 uvmexp.pdreact++;
408 continue;
409 }
410
411 /*
412 * first we attempt to lock the object that this page
413 * belongs to. if our attempt fails we skip on to
414 * the next page (no harm done). it is important to
415 * "try" locking the object as we are locking in the
416 * wrong order (pageq -> object) and we don't want to
417 * get deadlocked.
418 *
419 * the only time we exepct to see an ownerless page
420 * (i.e. a page with no uobject and !PQ_ANON) is if an
421 * anon has loaned a page from a uvm_object and the
422 * uvm_object has dropped the ownership. in that
423 * case, the anon can "take over" the loaned page
424 * and make it its own.
425 */
426
427 /* is page part of an anon or ownerless ? */
428 if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
429
430 anon = p->uanon;
431
432 #ifdef DIAGNOSTIC
433 /* to be on inactive q, page must be part
434 * of _something_ */
435 if (anon == NULL)
436 panic("pagedaemon: page with no anon "
437 "or object detected - loop 1");
438 #endif
439
440 if (!simple_lock_try(&anon->an_lock))
441 /* lock failed, skip this page */
442 continue;
443
444 /*
445 * if the page is ownerless, claim it in the
446 * name of "anon"!
447 */
448 if ((p->pqflags & PQ_ANON) == 0) {
449 #ifdef DIAGNOSTIC
450 if (p->loan_count < 1)
451 panic("pagedaemon: non-loaned "
452 "ownerless page detected -"
453 " loop 1");
454 #endif
455 p->loan_count--;
456 p->pqflags |= PQ_ANON; /* anon now owns it */
457 }
458
459 if (p->flags & PG_BUSY) {
460 simple_unlock(&anon->an_lock);
461 uvmexp.pdbusy++;
462 /* someone else owns page, skip it */
463 continue;
464 }
465
466 uvmexp.pdanscan++;
467
468 } else {
469
470 uobj = p->uobject;
471
472 if (!simple_lock_try(&uobj->vmobjlock))
473 /* lock failed, skip this page */
474 continue;
475
476 if (p->flags & PG_BUSY) {
477 simple_unlock(&uobj->vmobjlock);
478 uvmexp.pdbusy++;
479 /* someone else owns page, skip it */
480 continue;
481 }
482
483 uvmexp.pdobscan++;
484 }
485
486 /*
487 * we now have the object and the page queues locked.
488 * the page is not busy. if the page is clean we
489 * can free it now and continue.
490 */
491
492 if (p->flags & PG_CLEAN) {
493 if (p->pqflags & PQ_SWAPBACKED) {
494 /* this page now lives only in swap */
495 simple_lock(&uvm.swap_data_lock);
496 uvmexp.swpgonly++;
497 simple_unlock(&uvm.swap_data_lock);
498 }
499
500 /* zap all mappings with pmap_page_protect... */
501 pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
502 uvm_pagefree(p);
503 uvmexp.pdfreed++;
504
505 if (anon) {
506 #ifdef DIAGNOSTIC
507 /*
508 * an anonymous page can only be clean
509 * if it has valid backing store.
510 */
511 if (anon->an_swslot == 0)
512 panic("pagedaemon: clean anon "
513 "page without backing store?");
514 #endif
515 /* remove from object */
516 anon->u.an_page = NULL;
517 simple_unlock(&anon->an_lock);
518 } else {
519 /* pagefree has already removed the
520 * page from the object */
521 simple_unlock(&uobj->vmobjlock);
522 }
523 continue;
524 }
525
526 /*
527 * this page is dirty, skip it if we'll have met our
528 * free target when all the current pageouts complete.
529 */
530 if (free + uvmexp.paging > uvmexp.freetarg << 2) {
531 if (anon) {
532 simple_unlock(&anon->an_lock);
533 } else {
534 simple_unlock(&uobj->vmobjlock);
535 }
536 continue;
537 }
538
539 /*
540 * this page is dirty, but we can't page it out
541 * since all pages in swap are only in swap.
542 * reactivate it so that we eventually cycle
543 * all pages thru the inactive queue.
544 */
545 #ifdef DIAGNOSTIC
546 if (uvmexp.swpgonly > uvmexp.swpages) {
547 panic("uvmexp.swpgonly botch");
548 }
549 #endif
550 if ((p->pqflags & PQ_SWAPBACKED) &&
551 uvmexp.swpgonly == uvmexp.swpages) {
552 dirtyreacts++;
553 uvm_pageactivate(p);
554 if (anon) {
555 simple_unlock(&anon->an_lock);
556 } else {
557 simple_unlock(&uobj->vmobjlock);
558 }
559 continue;
560 }
561
562 /*
563 * if the page is swap-backed and dirty and swap space
564 * is full, free any swap allocated to the page
565 * so that other pages can be paged out.
566 */
567 #ifdef DIAGNOSTIC
568 if (uvmexp.swpginuse > uvmexp.swpages) {
569 panic("uvmexp.swpginuse botch");
570 }
571 #endif
572 if ((p->pqflags & PQ_SWAPBACKED) &&
573 uvmexp.swpginuse == uvmexp.swpages) {
574
575 if ((p->pqflags & PQ_ANON) &&
576 p->uanon->an_swslot) {
577 uvm_swap_free(p->uanon->an_swslot, 1);
578 p->uanon->an_swslot = 0;
579 }
580 if (p->pqflags & PQ_AOBJ) {
581 uao_dropswap(p->uobject,
582 p->offset >> PAGE_SHIFT);
583 }
584 }
585
586 /*
587 * the page we are looking at is dirty. we must
588 * clean it before it can be freed. to do this we
589 * first mark the page busy so that no one else will
590 * touch the page. we write protect all the mappings
591 * of the page so that no one touches it while it is
592 * in I/O.
593 */
594
595 swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
596 p->flags |= PG_BUSY; /* now we own it */
597 UVM_PAGE_OWN(p, "scan_inactive");
598 pmap_page_protect(PMAP_PGARG(p), VM_PROT_READ);
599 uvmexp.pgswapout++;
600
601 /*
602 * for swap-backed pages we need to (re)allocate
603 * swap space.
604 */
605 if (swap_backed) {
606
607 /*
608 * free old swap slot (if any)
609 */
610 if (anon) {
611 if (anon->an_swslot) {
612 uvm_swap_free(anon->an_swslot,
613 1);
614 anon->an_swslot = 0;
615 }
616 } else {
617 uao_dropswap(uobj,
618 p->offset >> PAGE_SHIFT);
619 }
620
621 /*
622 * start new cluster (if necessary)
623 */
624 if (swslot == 0) {
625 /* want this much */
626 swnpages = MAXBSIZE >> PAGE_SHIFT;
627
628 swslot = uvm_swap_alloc(&swnpages,
629 TRUE);
630
631 if (swslot == 0) {
632 /* no swap? give up! */
633 p->flags &= ~PG_BUSY;
634 UVM_PAGE_OWN(p, NULL);
635 if (anon)
636 simple_unlock(
637 &anon->an_lock);
638 else
639 simple_unlock(
640 &uobj->vmobjlock);
641 continue;
642 }
643 swcpages = 0; /* cluster is empty */
644 }
645
646 /*
647 * add block to cluster
648 */
649 swpps[swcpages] = p;
650 if (anon)
651 anon->an_swslot = swslot + swcpages;
652 else
653 uao_set_swslot(uobj,
654 p->offset >> PAGE_SHIFT,
655 swslot + swcpages);
656 swcpages++;
657
658 /* done (swap-backed) */
659 }
660
661 /* end: if (p) ["if we have new page to consider"] */
662 } else {
663
664 /* if p == NULL we must be doing a last swap i/o */
665 swap_backed = TRUE;
666 }
667
668 /*
669 * now consider doing the pageout.
670 *
671 * for swap-backed pages, we do the pageout if we have either
672 * filled the cluster (in which case (swnpages == swcpages) or
673 * run out of pages (p == NULL).
674 *
675 * for object pages, we always do the pageout.
676 */
677 if (swap_backed) {
678
679 if (p) { /* if we just added a page to cluster */
680 if (anon)
681 simple_unlock(&anon->an_lock);
682 else
683 simple_unlock(&uobj->vmobjlock);
684
685 /* cluster not full yet? */
686 if (swcpages < swnpages)
687 continue;
688 }
689
690 /* starting I/O now... set up for it */
691 npages = swcpages;
692 ppsp = swpps;
693 /* for swap-backed pages only */
694 start = (vaddr_t) swslot;
695
696 /* if this is final pageout we could have a few
697 * extra swap blocks */
698 if (swcpages < swnpages) {
699 uvm_swap_free(swslot + swcpages,
700 (swnpages - swcpages));
701 }
702
703 } else {
704
705 /* normal object pageout */
706 ppsp = pps;
707 npages = sizeof(pps) / sizeof(struct vm_page *);
708 /* not looked at because PGO_ALLPAGES is set */
709 start = 0;
710
711 }
712
713 /*
714 * now do the pageout.
715 *
716 * for swap_backed pages we have already built the cluster.
717 * for !swap_backed pages, uvm_pager_put will call the object's
718 * "make put cluster" function to build a cluster on our behalf.
719 *
720 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
721 * it to free the cluster pages for us on a successful I/O (it
722 * always does this for un-successful I/O requests). this
723 * allows us to do clustered pageout without having to deal
724 * with cluster pages at this level.
725 *
726 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
727 * IN: locked: uobj (if !swap_backed), page queues
728 * OUT: locked: uobj (if !swap_backed && result !=VM_PAGER_PEND)
729 * !locked: pageqs, uobj (if swap_backed || VM_PAGER_PEND)
730 *
731 * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
732 */
733
734 /* locked: uobj (if !swap_backed), page queues */
735 uvmexp.pdpageouts++;
736 result = uvm_pager_put((swap_backed) ? NULL : uobj, p,
737 &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
738 /* locked: uobj (if !swap_backed && result != PEND) */
739 /* unlocked: pageqs, object (if swap_backed ||result == PEND) */
740
741 /*
742 * if we did i/o to swap, zero swslot to indicate that we are
743 * no longer building a swap-backed cluster.
744 */
745
746 if (swap_backed)
747 swslot = 0; /* done with this cluster */
748
749 /*
750 * first, we check for VM_PAGER_PEND which means that the
751 * async I/O is in progress and the async I/O done routine
752 * will clean up after us. in this case we move on to the
753 * next page.
754 *
755 * there is a very remote chance that the pending async i/o can
756 * finish _before_ we get here. if that happens, our page "p"
757 * may no longer be on the inactive queue. so we verify this
758 * when determining the next page (starting over at the head if
759 * we've lost our inactive page).
760 */
761
762 if (result == VM_PAGER_PEND) {
763 uvmexp.paging += npages;
764 uvm_lock_pageq(); /* relock page queues */
765 uvmexp.pdpending++;
766 if (p) {
767 if (p->pqflags & PQ_INACTIVE)
768 /* reload! */
769 nextpg = p->pageq.tqe_next;
770 else
771 /* reload! */
772 nextpg = pglst->tqh_first;
773 } else {
774 nextpg = NULL; /* done list */
775 }
776 continue;
777 }
778
779 /*
780 * clean up "p" if we have one
781 */
782
783 if (p) {
784 /*
785 * the I/O request to "p" is done and uvm_pager_put
786 * has freed any cluster pages it may have allocated
787 * during I/O. all that is left for us to do is
788 * clean up page "p" (which is still PG_BUSY).
789 *
790 * our result could be one of the following:
791 * VM_PAGER_OK: successful pageout
792 *
793 * VM_PAGER_AGAIN: tmp resource shortage, we skip
794 * to next page
795 * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
796 * "reactivate" page to get it out of the way (it
797 * will eventually drift back into the inactive
798 * queue for a retry).
799 * VM_PAGER_UNLOCK: should never see this as it is
800 * only valid for "get" operations
801 */
802
803 /* relock p's object: page queues not lock yet, so
804 * no need for "try" */
805
806 /* !swap_backed case: already locked... */
807 if (swap_backed) {
808 if (anon)
809 simple_lock(&anon->an_lock);
810 else
811 simple_lock(&uobj->vmobjlock);
812 }
813
814 #ifdef DIAGNOSTIC
815 if (result == VM_PAGER_UNLOCK)
816 panic("pagedaemon: pageout returned "
817 "invalid 'unlock' code");
818 #endif
819
820 /* handle PG_WANTED now */
821 if (p->flags & PG_WANTED)
822 /* still holding object lock */
823 wakeup(p);
824
825 p->flags &= ~(PG_BUSY|PG_WANTED);
826 UVM_PAGE_OWN(p, NULL);
827
828 /* released during I/O? */
829 if (p->flags & PG_RELEASED) {
830 if (anon) {
831 /* remove page so we can get nextpg */
832 anon->u.an_page = NULL;
833
834 simple_unlock(&anon->an_lock);
835 uvm_anfree(anon); /* kills anon */
836 pmap_page_protect(PMAP_PGARG(p),
837 VM_PROT_NONE);
838 anon = NULL;
839 uvm_lock_pageq();
840 nextpg = p->pageq.tqe_next;
841 /* free released page */
842 uvm_pagefree(p);
843
844 } else {
845
846 #ifdef DIAGNOSTIC
847 if (uobj->pgops->pgo_releasepg == NULL)
848 panic("pagedaemon: no "
849 "pgo_releasepg function");
850 #endif
851
852 /*
853 * pgo_releasepg nukes the page and
854 * gets "nextpg" for us. it returns
855 * with the page queues locked (when
856 * given nextpg ptr).
857 */
858 if (!uobj->pgops->pgo_releasepg(p,
859 &nextpg))
860 /* uobj died after release */
861 uobj = NULL;
862
863 /*
864 * lock page queues here so that they're
865 * always locked at the end of the loop.
866 */
867 uvm_lock_pageq();
868 }
869
870 } else { /* page was not released during I/O */
871
872 uvm_lock_pageq();
873 nextpg = p->pageq.tqe_next;
874
875 if (result != VM_PAGER_OK) {
876
877 /* pageout was a failure... */
878 if (result != VM_PAGER_AGAIN)
879 uvm_pageactivate(p);
880 pmap_clear_reference(PMAP_PGARG(p));
881 /* XXXCDC: if (swap_backed) FREE p's
882 * swap block? */
883
884 } else {
885
886 /* pageout was a success... */
887 pmap_clear_reference(PMAP_PGARG(p));
888 pmap_clear_modify(PMAP_PGARG(p));
889 p->flags |= PG_CLEAN;
890 /* XXX: could free page here, but old
891 * pagedaemon does not */
892
893 }
894 }
895
896 /*
897 * drop object lock (if there is an object left). do
898 * a safety check of nextpg to make sure it is on the
899 * inactive queue (it should be since PG_BUSY pages on
900 * the inactive queue can't be re-queued [note: not
901 * true for active queue]).
902 */
903
904 if (anon)
905 simple_unlock(&anon->an_lock);
906 else if (uobj)
907 simple_unlock(&uobj->vmobjlock);
908
909 } /* if (p) */ else {
910
911 /* if p is null in this loop, make sure it stays null
912 * in next loop */
913 nextpg = NULL;
914
915 /*
916 * lock page queues here just so they're always locked
917 * at the end of the loop.
918 */
919 uvm_lock_pageq();
920 }
921
922 if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
923 printf("pagedaemon: invalid nextpg! reverting to "
924 "queue head\n");
925 nextpg = pglst->tqh_first; /* reload! */
926 }
927
928 } /* end of "inactive" 'for' loop */
929 return (retval);
930 }
931
932 /*
933 * uvmpd_scan: scan the page queues and attempt to meet our targets.
934 *
935 * => called with pageq's locked
936 */
937
938 void
939 uvmpd_scan()
940 {
941 int s, free, inactive_shortage, swap_shortage, pages_freed;
942 struct vm_page *p, *nextpg;
943 struct uvm_object *uobj;
944 boolean_t got_it;
945 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
946
947 uvmexp.pdrevs++; /* counter */
948
949 #ifdef __GNUC__
950 uobj = NULL; /* XXX gcc */
951 #endif
952 /*
953 * get current "free" page count
954 */
955 s = uvm_lock_fpageq();
956 free = uvmexp.free;
957 uvm_unlock_fpageq(s);
958
959 #ifndef __SWAP_BROKEN
960 /*
961 * swap out some processes if we are below our free target.
962 * we need to unlock the page queues for this.
963 */
964 if (free < uvmexp.freetarg) {
965
966 uvmexp.pdswout++;
967 UVMHIST_LOG(pdhist," free %d < target %d: swapout", free,
968 uvmexp.freetarg, 0, 0);
969 uvm_unlock_pageq();
970 uvm_swapout_threads();
971 pmap_update(); /* update so we can scan inactive q */
972 uvm_lock_pageq();
973
974 }
975 #endif
976
977 /*
978 * now we want to work on meeting our targets. first we work on our
979 * free target by converting inactive pages into free pages. then
980 * we work on meeting our inactive target by converting active pages
981 * to inactive ones.
982 */
983
984 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
985
986 /*
987 * do loop #1! alternate starting queue between swap and object based
988 * on the low bit of uvmexp.pdrevs (which we bump by one each call).
989 */
990
991 got_it = FALSE;
992 pages_freed = uvmexp.pdfreed;
993 if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
994 got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
995 if (!got_it)
996 got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
997 if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
998 (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
999 pages_freed = uvmexp.pdfreed - pages_freed;
1000
1001 /*
1002 * we have done the scan to get free pages. now we work on meeting
1003 * our inactive target.
1004 */
1005
1006 inactive_shortage = uvmexp.inactarg - uvmexp.inactive;
1007
1008 /*
1009 * detect if we're not going to be able to page anything out
1010 * until we free some swap resources from active pages.
1011 */
1012 swap_shortage = 0;
1013 if (uvmexp.free < uvmexp.freetarg &&
1014 uvmexp.swpginuse == uvmexp.swpages &&
1015 uvmexp.swpgonly < uvmexp.swpages &&
1016 pages_freed == 0) {
1017 swap_shortage = uvmexp.freetarg - uvmexp.free;
1018 }
1019
1020 UVMHIST_LOG(pdhist, " loop 2: inactive_shortage=%d swap_shortage=%d",
1021 inactive_shortage, swap_shortage,0,0);
1022 for (p = TAILQ_FIRST(&uvm.page_active);
1023 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
1024 p = nextpg) {
1025 nextpg = p->pageq.tqe_next;
1026 if (p->flags & PG_BUSY)
1027 continue; /* quick check before trying to lock */
1028
1029 /*
1030 * lock the page's owner.
1031 */
1032 /* is page anon owned or ownerless? */
1033 if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
1034
1035 #ifdef DIAGNOSTIC
1036 if (p->uanon == NULL)
1037 panic("pagedaemon: page with no anon or "
1038 "object detected - loop 2");
1039 #endif
1040 if (!simple_lock_try(&p->uanon->an_lock))
1041 continue;
1042
1043 /* take over the page? */
1044 if ((p->pqflags & PQ_ANON) == 0) {
1045 #ifdef DIAGNOSTIC
1046 if (p->loan_count < 1)
1047 panic("pagedaemon: non-loaned "
1048 "ownerless page detected - loop 2");
1049 #endif
1050 p->loan_count--;
1051 p->pqflags |= PQ_ANON;
1052 }
1053 } else {
1054 if (!simple_lock_try(&p->uobject->vmobjlock))
1055 continue;
1056 }
1057 /*
1058 * skip this page if it's busy.
1059 */
1060 if ((p->flags & PG_BUSY) != 0) {
1061 if (p->pqflags & PQ_ANON)
1062 simple_unlock(&p->uanon->an_lock);
1063 else
1064 simple_unlock(&p->uobject->vmobjlock);
1065 continue;
1066 }
1067
1068 /*
1069 * if there's a shortage of swap, free any swap allocated
1070 * to this page so that other pages can be paged out.
1071 */
1072 if (swap_shortage > 0) {
1073 if ((p->pqflags & PQ_ANON) && p->uanon->an_swslot) {
1074 uvm_swap_free(p->uanon->an_swslot, 1);
1075 p->uanon->an_swslot = 0;
1076 p->flags &= ~PG_CLEAN;
1077 swap_shortage--;
1078 }
1079 if (p->pqflags & PQ_AOBJ) {
1080 int slot = uao_set_swslot(p->uobject,
1081 p->offset >> PAGE_SHIFT, 0);
1082 if (slot) {
1083 uvm_swap_free(slot, 1);
1084 p->flags &= ~PG_CLEAN;
1085 swap_shortage--;
1086 }
1087 }
1088 }
1089
1090 /*
1091 * deactivate this page if there's a shortage of
1092 * inactive pages.
1093 */
1094 if (inactive_shortage > 0) {
1095 pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
1096 /* no need to check wire_count as pg is "active" */
1097 uvm_pagedeactivate(p);
1098 uvmexp.pddeact++;
1099 inactive_shortage--;
1100 }
1101
1102 if (p->pqflags & PQ_ANON)
1103 simple_unlock(&p->uanon->an_lock);
1104 else
1105 simple_unlock(&p->uobject->vmobjlock);
1106 }
1107 }
1108