uvm_pdaemon.c revision 1.14 1 /* $NetBSD: uvm_pdaemon.c,v 1.14 1999/03/26 17:33:30 chs Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
42 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 #include "opt_uvmhist.h"
70
71 /*
72 * uvm_pdaemon.c: the page daemon
73 */
74
75 #include <sys/param.h>
76 #include <sys/proc.h>
77 #include <sys/systm.h>
78 #include <sys/kernel.h>
79 #include <sys/pool.h>
80
81 #include <vm/vm.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_kern.h>
84
85 #include <uvm/uvm.h>
86
87 /*
88 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedeamon will reactivate
89 * in a pass thru the inactive list when swap is full. the value should be
90 * "small"... if it's too large we'll cycle the active pages thru the inactive
91 * queue too quickly to for them to be referenced and avoid being freed.
92 */
93
94 #define UVMPD_NUMDIRTYREACTS 16
95
96
97 /*
98 * local prototypes
99 */
100
101 static void uvmpd_scan __P((void));
102 static boolean_t uvmpd_scan_inactive __P((struct pglist *));
103 static void uvmpd_tune __P((void));
104
105
106 /*
107 * uvm_wait: wait (sleep) for the page daemon to free some pages
108 *
109 * => should be called with all locks released
110 * => should _not_ be called by the page daemon (to avoid deadlock)
111 */
112
113 void uvm_wait(wmsg)
114 char *wmsg;
115 {
116 int timo = 0;
117 int s = splbio();
118
119 /*
120 * check for page daemon going to sleep (waiting for itself)
121 */
122
123 if (curproc == uvm.pagedaemon_proc) {
124 /*
125 * now we have a problem: the pagedaemon wants to go to
126 * sleep until it frees more memory. but how can it
127 * free more memory if it is asleep? that is a deadlock.
128 * we have two options:
129 * [1] panic now
130 * [2] put a timeout on the sleep, thus causing the
131 * pagedaemon to only pause (rather than sleep forever)
132 *
133 * note that option [2] will only help us if we get lucky
134 * and some other process on the system breaks the deadlock
135 * by exiting or freeing memory (thus allowing the pagedaemon
136 * to continue). for now we panic if DEBUG is defined,
137 * otherwise we hope for the best with option [2] (better
138 * yet, this should never happen in the first place!).
139 */
140
141 printf("pagedaemon: deadlock detected!\n");
142 timo = hz >> 3; /* set timeout */
143 #if defined(DEBUG)
144 /* DEBUG: panic so we can debug it */
145 panic("pagedaemon deadlock");
146 #endif
147 }
148
149 simple_lock(&uvm.pagedaemon_lock);
150 thread_wakeup(&uvm.pagedaemon); /* wake the daemon! */
151 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg,
152 timo);
153
154 splx(s);
155 }
156
157
158 /*
159 * uvmpd_tune: tune paging parameters
160 *
161 * => called when ever memory is added (or removed?) to the system
162 * => caller must call with page queues locked
163 */
164
165 static void
166 uvmpd_tune()
167 {
168 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
169
170 uvmexp.freemin = uvmexp.npages / 20;
171
172 /* between 16k and 256k */
173 /* XXX: what are these values good for? */
174 uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
175 uvmexp.freemin = min(uvmexp.freemin, (256*1024) >> PAGE_SHIFT);
176
177 uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
178 if (uvmexp.freetarg <= uvmexp.freemin)
179 uvmexp.freetarg = uvmexp.freemin + 1;
180
181 /* uvmexp.inactarg: computed in main daemon loop */
182
183 uvmexp.wiredmax = uvmexp.npages / 3;
184 UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
185 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
186 }
187
188 /*
189 * uvm_pageout: the main loop for the pagedaemon
190 */
191
192 void
193 uvm_pageout()
194 {
195 int npages = 0;
196 int s;
197 struct uvm_aiodesc *aio, *nextaio;
198 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
199
200 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
201
202 /*
203 * ensure correct priority and set paging parameters...
204 */
205
206 uvm.pagedaemon_proc = curproc;
207 (void) spl0();
208 uvm_lock_pageq();
209 npages = uvmexp.npages;
210 uvmpd_tune();
211 uvm_unlock_pageq();
212
213 /*
214 * main loop
215 */
216 while (TRUE) {
217
218 /*
219 * carefully attempt to go to sleep (without losing "wakeups"!).
220 * we need splbio because we want to make sure the aio_done list
221 * is totally empty before we go to sleep.
222 */
223
224 s = splbio();
225 simple_lock(&uvm.pagedaemon_lock);
226
227 /*
228 * if we've got done aio's, then bypass the sleep
229 */
230
231 if (uvm.aio_done.tqh_first == NULL) {
232 UVMHIST_LOG(maphist," <<SLEEPING>>",0,0,0,0);
233 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
234 &uvm.pagedaemon_lock, FALSE, "daemon_slp", 0);
235 uvmexp.pdwoke++;
236 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
237
238 /* relock pagedaemon_lock, still at splbio */
239 simple_lock(&uvm.pagedaemon_lock);
240 }
241
242 /*
243 * check for done aio structures
244 */
245
246 aio = uvm.aio_done.tqh_first; /* save current list (if any)*/
247 if (aio) {
248 TAILQ_INIT(&uvm.aio_done); /* zero global list */
249 }
250
251 simple_unlock(&uvm.pagedaemon_lock); /* unlock */
252 splx(s); /* drop splbio */
253
254 /*
255 * first clear out any pending aios (to free space in case we
256 * want to pageout more stuff).
257 */
258
259 for (/*null*/; aio != NULL ; aio = nextaio) {
260
261 uvmexp.paging -= aio->npages;
262 nextaio = aio->aioq.tqe_next;
263 aio->aiodone(aio);
264
265 }
266
267 /* Next, drain pool resources */
268 pool_drain(0);
269
270 /*
271 * now lock page queues and recompute inactive count
272 */
273 uvm_lock_pageq();
274
275 if (npages != uvmexp.npages) { /* check for new pages? */
276 npages = uvmexp.npages;
277 uvmpd_tune();
278 }
279
280 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
281 if (uvmexp.inactarg <= uvmexp.freetarg)
282 uvmexp.inactarg = uvmexp.freetarg + 1;
283
284 UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d",
285 uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
286 uvmexp.inactarg);
287
288 /*
289 * scan if needed
290 * [XXX: note we are reading uvm.free without locking]
291 */
292 if (uvmexp.free < uvmexp.freetarg ||
293 uvmexp.inactive < uvmexp.inactarg)
294 uvmpd_scan();
295
296 /*
297 * done scan. unlock page queues (the only lock we are holding)
298 */
299 uvm_unlock_pageq();
300
301 /*
302 * done! restart loop.
303 */
304 if (uvmexp.free >= uvmexp.reserve_kernel ||
305 uvmexp.paging == 0)
306 thread_wakeup(&uvmexp.free);
307 }
308 /*NOTREACHED*/
309 }
310
311 /*
312 * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
313 * its own function for ease of reading.
314 *
315 * => called with page queues locked
316 * => we work on meeting our free target by converting inactive pages
317 * into free pages.
318 * => we handle the building of swap-backed clusters
319 * => we return TRUE if we are exiting because we met our target
320 */
321
322 static boolean_t
323 uvmpd_scan_inactive(pglst)
324 struct pglist *pglst;
325 {
326 boolean_t retval = FALSE; /* assume we haven't hit target */
327 int s, free, result;
328 struct vm_page *p, *nextpg;
329 struct uvm_object *uobj;
330 struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
331 int npages;
332 struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; /* XXX: see below */
333 int swnpages, swcpages; /* XXX: see below */
334 int swslot;
335 struct vm_anon *anon;
336 boolean_t swap_backed;
337 vaddr_t start;
338 int dirtyreacts;
339 UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
340
341 /*
342 * note: we currently keep swap-backed pages on a seperate inactive
343 * list from object-backed pages. however, merging the two lists
344 * back together again hasn't been ruled out. thus, we keep our
345 * swap cluster in "swpps" rather than in pps (allows us to mix
346 * clustering types in the event of a mixed inactive queue).
347 */
348
349 /*
350 * swslot is non-zero if we are building a swap cluster. we want
351 * to stay in the loop while we have a page to scan or we have
352 * a swap-cluster to build.
353 */
354 swslot = 0;
355 swnpages = swcpages = 0;
356 free = 0;
357 dirtyreacts = 0;
358
359 for (p = pglst->tqh_first ; p != NULL || swslot != 0 ; p = nextpg) {
360
361 /*
362 * note that p can be NULL iff we have traversed the whole
363 * list and need to do one final swap-backed clustered pageout.
364 */
365 if (p) {
366 /*
367 * update our copy of "free" and see if we've met
368 * our target
369 */
370 s = splimp();
371 uvm_lock_fpageq();
372 free = uvmexp.free;
373 uvm_unlock_fpageq();
374 splx(s);
375
376 if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
377 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
378 UVMHIST_LOG(pdhist," met free target: "
379 "exit loop", 0, 0, 0, 0);
380 retval = TRUE; /* hit the target! */
381
382 if (swslot == 0)
383 /* exit now if no swap-i/o pending */
384 break;
385
386 /* set p to null to signal final swap i/o */
387 p = NULL;
388 }
389 }
390
391 uobj = NULL; /* be safe and shut gcc up */
392 anon = NULL; /* be safe and shut gcc up */
393
394 if (p) { /* if (we have a new page to consider) */
395 /*
396 * we are below target and have a new page to consider.
397 */
398 uvmexp.pdscans++;
399 nextpg = p->pageq.tqe_next;
400
401 /*
402 * move referenced pages back to active queue and
403 * skip to next page (unlikely to happen since
404 * inactive pages shouldn't have any valid mappings
405 * and we cleared reference before deactivating).
406 */
407 if (pmap_is_referenced(PMAP_PGARG(p))) {
408 uvm_pageactivate(p);
409 uvmexp.pdreact++;
410 continue;
411 }
412
413 /*
414 * first we attempt to lock the object that this page
415 * belongs to. if our attempt fails we skip on to
416 * the next page (no harm done). it is important to
417 * "try" locking the object as we are locking in the
418 * wrong order (pageq -> object) and we don't want to
419 * get deadlocked.
420 *
421 * the only time we exepct to see an ownerless page
422 * (i.e. a page with no uobject and !PQ_ANON) is if an
423 * anon has loaned a page from a uvm_object and the
424 * uvm_object has dropped the ownership. in that
425 * case, the anon can "take over" the loaned page
426 * and make it its own.
427 */
428
429 /* is page part of an anon or ownerless ? */
430 if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
431
432 anon = p->uanon;
433
434 #ifdef DIAGNOSTIC
435 /* to be on inactive q, page must be part
436 * of _something_ */
437 if (anon == NULL)
438 panic("pagedaemon: page with no anon "
439 "or object detected - loop 1");
440 #endif
441
442 if (!simple_lock_try(&anon->an_lock))
443 /* lock failed, skip this page */
444 continue;
445
446 /*
447 * if the page is ownerless, claim it in the
448 * name of "anon"!
449 */
450 if ((p->pqflags & PQ_ANON) == 0) {
451 #ifdef DIAGNOSTIC
452 if (p->loan_count < 1)
453 panic("pagedaemon: non-loaned "
454 "ownerless page detected -"
455 " loop 1");
456 #endif
457 p->loan_count--;
458 p->pqflags |= PQ_ANON; /* anon now owns it */
459 }
460
461 if (p->flags & PG_BUSY) {
462 simple_unlock(&anon->an_lock);
463 uvmexp.pdbusy++;
464 /* someone else owns page, skip it */
465 continue;
466 }
467
468 uvmexp.pdanscan++;
469
470 } else {
471
472 uobj = p->uobject;
473
474 if (!simple_lock_try(&uobj->vmobjlock))
475 /* lock failed, skip this page */
476 continue;
477
478 if (p->flags & PG_BUSY) {
479 simple_unlock(&uobj->vmobjlock);
480 uvmexp.pdbusy++;
481 /* someone else owns page, skip it */
482 continue;
483 }
484
485 uvmexp.pdobscan++;
486 }
487
488 /*
489 * we now have the object and the page queues locked.
490 * the page is not busy. if the page is clean we
491 * can free it now and continue.
492 */
493
494 if (p->flags & PG_CLEAN) {
495 if (p->pqflags & PQ_SWAPBACKED) {
496 /* this page now lives only in swap */
497 simple_lock(&uvm.swap_data_lock);
498 uvmexp.swpgonly++;
499 simple_unlock(&uvm.swap_data_lock);
500 }
501
502 /* zap all mappings with pmap_page_protect... */
503 pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
504 uvm_pagefree(p);
505 uvmexp.pdfreed++;
506
507 if (anon) {
508 #ifdef DIAGNOSTIC
509 /*
510 * an anonymous page can only be clean
511 * if it has valid backing store.
512 */
513 if (anon->an_swslot == 0)
514 panic("pagedaemon: clean anon "
515 "page without backing store?");
516 #endif
517 /* remove from object */
518 anon->u.an_page = NULL;
519 simple_unlock(&anon->an_lock);
520 } else {
521 /* pagefree has already removed the
522 * page from the object */
523 simple_unlock(&uobj->vmobjlock);
524 }
525 continue;
526 }
527
528 /*
529 * this page is dirty, skip it if we'll have met our
530 * free target when all the current pageouts complete.
531 */
532 if (free + uvmexp.paging > uvmexp.freetarg << 2) {
533 if (anon) {
534 simple_unlock(&anon->an_lock);
535 } else {
536 simple_unlock(&uobj->vmobjlock);
537 }
538 continue;
539 }
540
541 /*
542 * this page is dirty, but we can't page it out
543 * since all pages in swap are only in swap.
544 * reactivate it so that we eventually cycle
545 * all pages thru the inactive queue.
546 */
547 #ifdef DIAGNOSTIC
548 if (uvmexp.swpgonly > uvmexp.swpages) {
549 panic("uvmexp.swpgonly botch");
550 }
551 #endif
552 if ((p->pqflags & PQ_SWAPBACKED) &&
553 uvmexp.swpgonly == uvmexp.swpages) {
554 dirtyreacts++;
555 uvm_pageactivate(p);
556 if (anon) {
557 simple_unlock(&anon->an_lock);
558 } else {
559 simple_unlock(&uobj->vmobjlock);
560 }
561 continue;
562 }
563
564 /*
565 * if the page is swap-backed and dirty and swap space
566 * is full, free any swap allocated to the page
567 * so that other pages can be paged out.
568 */
569 #ifdef DIAGNOSTIC
570 if (uvmexp.swpginuse > uvmexp.swpages) {
571 panic("uvmexp.swpginuse botch");
572 }
573 #endif
574 if ((p->pqflags & PQ_SWAPBACKED) &&
575 uvmexp.swpginuse == uvmexp.swpages) {
576
577 if ((p->pqflags & PQ_ANON) &&
578 p->uanon->an_swslot) {
579 uvm_swap_free(p->uanon->an_swslot, 1);
580 p->uanon->an_swslot = 0;
581 }
582 if (p->pqflags & PQ_AOBJ) {
583 uao_dropswap(p->uobject,
584 p->offset >> PAGE_SHIFT);
585 }
586 }
587
588 /*
589 * the page we are looking at is dirty. we must
590 * clean it before it can be freed. to do this we
591 * first mark the page busy so that no one else will
592 * touch the page. we write protect all the mappings
593 * of the page so that no one touches it while it is
594 * in I/O.
595 */
596
597 swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
598 p->flags |= PG_BUSY; /* now we own it */
599 UVM_PAGE_OWN(p, "scan_inactive");
600 pmap_page_protect(PMAP_PGARG(p), VM_PROT_READ);
601 uvmexp.pgswapout++;
602
603 /*
604 * for swap-backed pages we need to (re)allocate
605 * swap space.
606 */
607 if (swap_backed) {
608
609 /*
610 * free old swap slot (if any)
611 */
612 if (anon) {
613 if (anon->an_swslot) {
614 uvm_swap_free(anon->an_swslot,
615 1);
616 anon->an_swslot = 0;
617 }
618 } else {
619 uao_dropswap(uobj,
620 p->offset >> PAGE_SHIFT);
621 }
622
623 /*
624 * start new cluster (if necessary)
625 */
626 if (swslot == 0) {
627 /* want this much */
628 swnpages = MAXBSIZE >> PAGE_SHIFT;
629
630 swslot = uvm_swap_alloc(&swnpages,
631 TRUE);
632
633 if (swslot == 0) {
634 /* no swap? give up! */
635 p->flags &= ~PG_BUSY;
636 UVM_PAGE_OWN(p, NULL);
637 if (anon)
638 simple_unlock(
639 &anon->an_lock);
640 else
641 simple_unlock(
642 &uobj->vmobjlock);
643 continue;
644 }
645 swcpages = 0; /* cluster is empty */
646 }
647
648 /*
649 * add block to cluster
650 */
651 swpps[swcpages] = p;
652 if (anon)
653 anon->an_swslot = swslot + swcpages;
654 else
655 uao_set_swslot(uobj,
656 p->offset >> PAGE_SHIFT,
657 swslot + swcpages);
658 swcpages++;
659
660 /* done (swap-backed) */
661 }
662
663 /* end: if (p) ["if we have new page to consider"] */
664 } else {
665
666 /* if p == NULL we must be doing a last swap i/o */
667 swap_backed = TRUE;
668 }
669
670 /*
671 * now consider doing the pageout.
672 *
673 * for swap-backed pages, we do the pageout if we have either
674 * filled the cluster (in which case (swnpages == swcpages) or
675 * run out of pages (p == NULL).
676 *
677 * for object pages, we always do the pageout.
678 */
679 if (swap_backed) {
680
681 if (p) { /* if we just added a page to cluster */
682 if (anon)
683 simple_unlock(&anon->an_lock);
684 else
685 simple_unlock(&uobj->vmobjlock);
686
687 /* cluster not full yet? */
688 if (swcpages < swnpages)
689 continue;
690 }
691
692 /* starting I/O now... set up for it */
693 npages = swcpages;
694 ppsp = swpps;
695 /* for swap-backed pages only */
696 start = (vaddr_t) swslot;
697
698 /* if this is final pageout we could have a few
699 * extra swap blocks */
700 if (swcpages < swnpages) {
701 uvm_swap_free(swslot + swcpages,
702 (swnpages - swcpages));
703 }
704
705 } else {
706
707 /* normal object pageout */
708 ppsp = pps;
709 npages = sizeof(pps) / sizeof(struct vm_page *);
710 /* not looked at because PGO_ALLPAGES is set */
711 start = 0;
712
713 }
714
715 /*
716 * now do the pageout.
717 *
718 * for swap_backed pages we have already built the cluster.
719 * for !swap_backed pages, uvm_pager_put will call the object's
720 * "make put cluster" function to build a cluster on our behalf.
721 *
722 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
723 * it to free the cluster pages for us on a successful I/O (it
724 * always does this for un-successful I/O requests). this
725 * allows us to do clustered pageout without having to deal
726 * with cluster pages at this level.
727 *
728 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
729 * IN: locked: uobj (if !swap_backed), page queues
730 * OUT: locked: uobj (if !swap_backed && result !=VM_PAGER_PEND)
731 * !locked: pageqs, uobj (if swap_backed || VM_PAGER_PEND)
732 *
733 * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
734 */
735
736 /* locked: uobj (if !swap_backed), page queues */
737 uvmexp.pdpageouts++;
738 result = uvm_pager_put((swap_backed) ? NULL : uobj, p,
739 &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
740 /* locked: uobj (if !swap_backed && result != PEND) */
741 /* unlocked: pageqs, object (if swap_backed ||result == PEND) */
742
743 /*
744 * if we did i/o to swap, zero swslot to indicate that we are
745 * no longer building a swap-backed cluster.
746 */
747
748 if (swap_backed)
749 swslot = 0; /* done with this cluster */
750
751 /*
752 * first, we check for VM_PAGER_PEND which means that the
753 * async I/O is in progress and the async I/O done routine
754 * will clean up after us. in this case we move on to the
755 * next page.
756 *
757 * there is a very remote chance that the pending async i/o can
758 * finish _before_ we get here. if that happens, our page "p"
759 * may no longer be on the inactive queue. so we verify this
760 * when determining the next page (starting over at the head if
761 * we've lost our inactive page).
762 */
763
764 if (result == VM_PAGER_PEND) {
765 uvmexp.paging += npages;
766 uvm_lock_pageq(); /* relock page queues */
767 uvmexp.pdpending++;
768 if (p) {
769 if (p->pqflags & PQ_INACTIVE)
770 /* reload! */
771 nextpg = p->pageq.tqe_next;
772 else
773 /* reload! */
774 nextpg = pglst->tqh_first;
775 } else {
776 nextpg = NULL; /* done list */
777 }
778 continue;
779 }
780
781 /*
782 * clean up "p" if we have one
783 */
784
785 if (p) {
786 /*
787 * the I/O request to "p" is done and uvm_pager_put
788 * has freed any cluster pages it may have allocated
789 * during I/O. all that is left for us to do is
790 * clean up page "p" (which is still PG_BUSY).
791 *
792 * our result could be one of the following:
793 * VM_PAGER_OK: successful pageout
794 *
795 * VM_PAGER_AGAIN: tmp resource shortage, we skip
796 * to next page
797 * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
798 * "reactivate" page to get it out of the way (it
799 * will eventually drift back into the inactive
800 * queue for a retry).
801 * VM_PAGER_UNLOCK: should never see this as it is
802 * only valid for "get" operations
803 */
804
805 /* relock p's object: page queues not lock yet, so
806 * no need for "try" */
807
808 /* !swap_backed case: already locked... */
809 if (swap_backed) {
810 if (anon)
811 simple_lock(&anon->an_lock);
812 else
813 simple_lock(&uobj->vmobjlock);
814 }
815
816 #ifdef DIAGNOSTIC
817 if (result == VM_PAGER_UNLOCK)
818 panic("pagedaemon: pageout returned "
819 "invalid 'unlock' code");
820 #endif
821
822 /* handle PG_WANTED now */
823 if (p->flags & PG_WANTED)
824 /* still holding object lock */
825 thread_wakeup(p);
826
827 p->flags &= ~(PG_BUSY|PG_WANTED);
828 UVM_PAGE_OWN(p, NULL);
829
830 /* released during I/O? */
831 if (p->flags & PG_RELEASED) {
832 if (anon) {
833 /* remove page so we can get nextpg */
834 anon->u.an_page = NULL;
835
836 simple_unlock(&anon->an_lock);
837 uvm_anfree(anon); /* kills anon */
838 pmap_page_protect(PMAP_PGARG(p),
839 VM_PROT_NONE);
840 anon = NULL;
841 uvm_lock_pageq();
842 nextpg = p->pageq.tqe_next;
843 /* free released page */
844 uvm_pagefree(p);
845
846 } else {
847
848 #ifdef DIAGNOSTIC
849 if (uobj->pgops->pgo_releasepg == NULL)
850 panic("pagedaemon: no "
851 "pgo_releasepg function");
852 #endif
853
854 /*
855 * pgo_releasepg nukes the page and
856 * gets "nextpg" for us. it returns
857 * with the page queues locked (when
858 * given nextpg ptr).
859 */
860 if (!uobj->pgops->pgo_releasepg(p,
861 &nextpg))
862 /* uobj died after release */
863 uobj = NULL;
864
865 /*
866 * lock page queues here so that they're
867 * always locked at the end of the loop.
868 */
869 uvm_lock_pageq();
870 }
871
872 } else { /* page was not released during I/O */
873
874 uvm_lock_pageq();
875 nextpg = p->pageq.tqe_next;
876
877 if (result != VM_PAGER_OK) {
878
879 /* pageout was a failure... */
880 if (result != VM_PAGER_AGAIN)
881 uvm_pageactivate(p);
882 pmap_clear_reference(PMAP_PGARG(p));
883 /* XXXCDC: if (swap_backed) FREE p's
884 * swap block? */
885
886 } else {
887
888 /* pageout was a success... */
889 pmap_clear_reference(PMAP_PGARG(p));
890 pmap_clear_modify(PMAP_PGARG(p));
891 p->flags |= PG_CLEAN;
892 /* XXX: could free page here, but old
893 * pagedaemon does not */
894
895 }
896 }
897
898 /*
899 * drop object lock (if there is an object left). do
900 * a safety check of nextpg to make sure it is on the
901 * inactive queue (it should be since PG_BUSY pages on
902 * the inactive queue can't be re-queued [note: not
903 * true for active queue]).
904 */
905
906 if (anon)
907 simple_unlock(&anon->an_lock);
908 else if (uobj)
909 simple_unlock(&uobj->vmobjlock);
910
911 } /* if (p) */ else {
912
913 /* if p is null in this loop, make sure it stays null
914 * in next loop */
915 nextpg = NULL;
916
917 /*
918 * lock page queues here just so they're always locked
919 * at the end of the loop.
920 */
921 uvm_lock_pageq();
922 }
923
924 if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
925 printf("pagedaemon: invalid nextpg! reverting to "
926 "queue head\n");
927 nextpg = pglst->tqh_first; /* reload! */
928 }
929
930 } /* end of "inactive" 'for' loop */
931 return (retval);
932 }
933
934 /*
935 * uvmpd_scan: scan the page queues and attempt to meet our targets.
936 *
937 * => called with pageq's locked
938 */
939
940 void
941 uvmpd_scan()
942 {
943 int s, free, inactive_shortage, swap_shortage, pages_freed;
944 struct vm_page *p, *nextpg;
945 struct uvm_object *uobj;
946 boolean_t got_it;
947 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
948
949 uvmexp.pdrevs++; /* counter */
950
951 #ifdef __GNUC__
952 uobj = NULL; /* XXX gcc */
953 #endif
954 /*
955 * get current "free" page count
956 */
957 s = splimp();
958 uvm_lock_fpageq();
959 free = uvmexp.free;
960 uvm_unlock_fpageq();
961 splx(s);
962
963 #ifndef __SWAP_BROKEN
964 /*
965 * swap out some processes if we are below our free target.
966 * we need to unlock the page queues for this.
967 */
968 if (free < uvmexp.freetarg) {
969
970 uvmexp.pdswout++;
971 UVMHIST_LOG(pdhist," free %d < target %d: swapout", free,
972 uvmexp.freetarg, 0, 0);
973 uvm_unlock_pageq();
974 uvm_swapout_threads();
975 pmap_update(); /* update so we can scan inactive q */
976 uvm_lock_pageq();
977
978 }
979 #endif
980
981 /*
982 * now we want to work on meeting our targets. first we work on our
983 * free target by converting inactive pages into free pages. then
984 * we work on meeting our inactive target by converting active pages
985 * to inactive ones.
986 */
987
988 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
989
990 /*
991 * do loop #1! alternate starting queue between swap and object based
992 * on the low bit of uvmexp.pdrevs (which we bump by one each call).
993 */
994
995 got_it = FALSE;
996 pages_freed = uvmexp.pdfreed;
997 if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
998 got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
999 if (!got_it)
1000 got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
1001 if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
1002 (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
1003 pages_freed = uvmexp.pdfreed - pages_freed;
1004
1005 /*
1006 * we have done the scan to get free pages. now we work on meeting
1007 * our inactive target.
1008 */
1009
1010 inactive_shortage = uvmexp.inactarg - uvmexp.inactive;
1011
1012 /*
1013 * detect if we're not going to be able to page anything out
1014 * until we free some swap resources from active pages.
1015 */
1016 swap_shortage = 0;
1017 if (uvmexp.free < uvmexp.freetarg &&
1018 uvmexp.swpginuse == uvmexp.swpages &&
1019 uvmexp.swpgonly < uvmexp.swpages &&
1020 pages_freed == 0) {
1021 swap_shortage = uvmexp.freetarg - uvmexp.free;
1022 }
1023
1024 UVMHIST_LOG(pdhist, " loop 2: inactive_shortage=%d swap_shortage=%d",
1025 inactive_shortage, swap_shortage,0,0);
1026 for (p = TAILQ_FIRST(&uvm.page_active);
1027 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
1028 p = nextpg) {
1029 nextpg = p->pageq.tqe_next;
1030 if (p->flags & PG_BUSY)
1031 continue; /* quick check before trying to lock */
1032
1033 /*
1034 * lock the page's owner.
1035 */
1036 /* is page anon owned or ownerless? */
1037 if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
1038
1039 #ifdef DIAGNOSTIC
1040 if (p->uanon == NULL)
1041 panic("pagedaemon: page with no anon or "
1042 "object detected - loop 2");
1043 #endif
1044 if (!simple_lock_try(&p->uanon->an_lock))
1045 continue;
1046
1047 /* take over the page? */
1048 if ((p->pqflags & PQ_ANON) == 0) {
1049 #ifdef DIAGNOSTIC
1050 if (p->loan_count < 1)
1051 panic("pagedaemon: non-loaned "
1052 "ownerless page detected - loop 2");
1053 #endif
1054 p->loan_count--;
1055 p->pqflags |= PQ_ANON;
1056 }
1057 } else {
1058 if (!simple_lock_try(&p->uobject->vmobjlock))
1059 continue;
1060 }
1061 /*
1062 * skip this page if it's busy.
1063 */
1064 if ((p->flags & PG_BUSY) != 0) {
1065 if (p->pqflags & PQ_ANON)
1066 simple_unlock(&p->uanon->an_lock);
1067 else
1068 simple_unlock(&p->uobject->vmobjlock);
1069 continue;
1070 }
1071
1072 /*
1073 * if there's a shortage of swap, free any swap allocated
1074 * to this page so that other pages can be paged out.
1075 */
1076 if (swap_shortage > 0) {
1077 if ((p->pqflags & PQ_ANON) && p->uanon->an_swslot) {
1078 uvm_swap_free(p->uanon->an_swslot, 1);
1079 p->uanon->an_swslot = 0;
1080 p->flags &= ~PG_CLEAN;
1081 swap_shortage--;
1082 }
1083 if (p->pqflags & PQ_AOBJ) {
1084 int slot = uao_set_swslot(p->uobject,
1085 p->offset >> PAGE_SHIFT, 0);
1086 if (slot) {
1087 uvm_swap_free(slot, 1);
1088 p->flags &= ~PG_CLEAN;
1089 swap_shortage--;
1090 }
1091 }
1092 }
1093
1094 /*
1095 * deactivate this page if there's a shortage of
1096 * inactive pages.
1097 */
1098 if (inactive_shortage > 0) {
1099 pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
1100 /* no need to check wire_count as pg is "active" */
1101 uvm_pagedeactivate(p);
1102 uvmexp.pddeact++;
1103 inactive_shortage--;
1104 }
1105
1106 if (p->pqflags & PQ_ANON)
1107 simple_unlock(&p->uanon->an_lock);
1108 else
1109 simple_unlock(&p->uobject->vmobjlock);
1110 }
1111 }
1112