uvm_pdaemon.c revision 1.20 1 /* $NetBSD: uvm_pdaemon.c,v 1.20 2000/06/26 14:21:18 mrg Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
42 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 #include "opt_uvmhist.h"
70
71 /*
72 * uvm_pdaemon.c: the page daemon
73 */
74
75 #include <sys/param.h>
76 #include <sys/proc.h>
77 #include <sys/systm.h>
78 #include <sys/kernel.h>
79 #include <sys/pool.h>
80
81 #include <vm/vm.h>
82
83 #include <uvm/uvm.h>
84
85 /*
86 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedeamon will reactivate
87 * in a pass thru the inactive list when swap is full. the value should be
88 * "small"... if it's too large we'll cycle the active pages thru the inactive
89 * queue too quickly to for them to be referenced and avoid being freed.
90 */
91
92 #define UVMPD_NUMDIRTYREACTS 16
93
94
95 /*
96 * local prototypes
97 */
98
99 static void uvmpd_scan __P((void));
100 static boolean_t uvmpd_scan_inactive __P((struct pglist *));
101 static void uvmpd_tune __P((void));
102
103
104 /*
105 * uvm_wait: wait (sleep) for the page daemon to free some pages
106 *
107 * => should be called with all locks released
108 * => should _not_ be called by the page daemon (to avoid deadlock)
109 */
110
111 void
112 uvm_wait(wmsg)
113 const char *wmsg;
114 {
115 int timo = 0;
116 int s = splbio();
117
118 /*
119 * check for page daemon going to sleep (waiting for itself)
120 */
121
122 if (curproc == uvm.pagedaemon_proc) {
123 /*
124 * now we have a problem: the pagedaemon wants to go to
125 * sleep until it frees more memory. but how can it
126 * free more memory if it is asleep? that is a deadlock.
127 * we have two options:
128 * [1] panic now
129 * [2] put a timeout on the sleep, thus causing the
130 * pagedaemon to only pause (rather than sleep forever)
131 *
132 * note that option [2] will only help us if we get lucky
133 * and some other process on the system breaks the deadlock
134 * by exiting or freeing memory (thus allowing the pagedaemon
135 * to continue). for now we panic if DEBUG is defined,
136 * otherwise we hope for the best with option [2] (better
137 * yet, this should never happen in the first place!).
138 */
139
140 printf("pagedaemon: deadlock detected!\n");
141 timo = hz >> 3; /* set timeout */
142 #if defined(DEBUG)
143 /* DEBUG: panic so we can debug it */
144 panic("pagedaemon deadlock");
145 #endif
146 }
147
148 simple_lock(&uvm.pagedaemon_lock);
149 wakeup(&uvm.pagedaemon); /* wake the daemon! */
150 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg,
151 timo);
152
153 splx(s);
154 }
155
156
157 /*
158 * uvmpd_tune: tune paging parameters
159 *
160 * => called when ever memory is added (or removed?) to the system
161 * => caller must call with page queues locked
162 */
163
164 static void
165 uvmpd_tune()
166 {
167 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
168
169 uvmexp.freemin = uvmexp.npages / 20;
170
171 /* between 16k and 256k */
172 /* XXX: what are these values good for? */
173 uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
174 uvmexp.freemin = min(uvmexp.freemin, (256*1024) >> PAGE_SHIFT);
175
176 uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
177 if (uvmexp.freetarg <= uvmexp.freemin)
178 uvmexp.freetarg = uvmexp.freemin + 1;
179
180 /* uvmexp.inactarg: computed in main daemon loop */
181
182 uvmexp.wiredmax = uvmexp.npages / 3;
183 UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
184 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
185 }
186
187 /*
188 * uvm_pageout: the main loop for the pagedaemon
189 */
190
191 void
192 uvm_pageout()
193 {
194 int npages = 0;
195 int s;
196 struct uvm_aiodesc *aio, *nextaio;
197 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
198
199 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
200
201 /*
202 * ensure correct priority and set paging parameters...
203 */
204
205 uvm.pagedaemon_proc = curproc;
206 (void) spl0();
207 uvm_lock_pageq();
208 npages = uvmexp.npages;
209 uvmpd_tune();
210 uvm_unlock_pageq();
211
212 /*
213 * main loop
214 */
215 while (TRUE) {
216
217 /*
218 * carefully attempt to go to sleep (without losing "wakeups"!).
219 * we need splbio because we want to make sure the aio_done list
220 * is totally empty before we go to sleep.
221 */
222
223 s = splbio();
224 simple_lock(&uvm.pagedaemon_lock);
225
226 /*
227 * if we've got done aio's, then bypass the sleep
228 */
229
230 if (uvm.aio_done.tqh_first == NULL) {
231 UVMHIST_LOG(maphist," <<SLEEPING>>",0,0,0,0);
232 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
233 &uvm.pagedaemon_lock, FALSE, "daemon_slp", 0);
234 uvmexp.pdwoke++;
235 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
236
237 /* relock pagedaemon_lock, still at splbio */
238 simple_lock(&uvm.pagedaemon_lock);
239 }
240
241 /*
242 * check for done aio structures
243 */
244
245 aio = uvm.aio_done.tqh_first; /* save current list (if any)*/
246 if (aio) {
247 TAILQ_INIT(&uvm.aio_done); /* zero global list */
248 }
249
250 simple_unlock(&uvm.pagedaemon_lock); /* unlock */
251 splx(s); /* drop splbio */
252
253 /*
254 * first clear out any pending aios (to free space in case we
255 * want to pageout more stuff).
256 */
257
258 for (/*null*/; aio != NULL ; aio = nextaio) {
259
260 uvmexp.paging -= aio->npages;
261 nextaio = aio->aioq.tqe_next;
262 aio->aiodone(aio);
263
264 }
265
266 /* Next, drain pool resources */
267 pool_drain(0);
268
269 /*
270 * now lock page queues and recompute inactive count
271 */
272 uvm_lock_pageq();
273
274 if (npages != uvmexp.npages) { /* check for new pages? */
275 npages = uvmexp.npages;
276 uvmpd_tune();
277 }
278
279 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
280 if (uvmexp.inactarg <= uvmexp.freetarg)
281 uvmexp.inactarg = uvmexp.freetarg + 1;
282
283 UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d",
284 uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
285 uvmexp.inactarg);
286
287 /*
288 * scan if needed
289 * [XXX: note we are reading uvm.free without locking]
290 */
291 if (uvmexp.free < uvmexp.freetarg ||
292 uvmexp.inactive < uvmexp.inactarg)
293 uvmpd_scan();
294
295 /*
296 * done scan. unlock page queues (the only lock we are holding)
297 */
298 uvm_unlock_pageq();
299
300 /*
301 * done! restart loop.
302 */
303 if (uvmexp.free > uvmexp.reserve_kernel ||
304 uvmexp.paging == 0)
305 wakeup(&uvmexp.free);
306 }
307 /*NOTREACHED*/
308 }
309
310 /*
311 * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
312 * its own function for ease of reading.
313 *
314 * => called with page queues locked
315 * => we work on meeting our free target by converting inactive pages
316 * into free pages.
317 * => we handle the building of swap-backed clusters
318 * => we return TRUE if we are exiting because we met our target
319 */
320
321 static boolean_t
322 uvmpd_scan_inactive(pglst)
323 struct pglist *pglst;
324 {
325 boolean_t retval = FALSE; /* assume we haven't hit target */
326 int s, free, result;
327 struct vm_page *p, *nextpg;
328 struct uvm_object *uobj;
329 struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
330 int npages;
331 struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; /* XXX: see below */
332 int swnpages, swcpages; /* XXX: see below */
333 int swslot;
334 struct vm_anon *anon;
335 boolean_t swap_backed;
336 vaddr_t start;
337 int dirtyreacts;
338 UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
339
340 /*
341 * note: we currently keep swap-backed pages on a seperate inactive
342 * list from object-backed pages. however, merging the two lists
343 * back together again hasn't been ruled out. thus, we keep our
344 * swap cluster in "swpps" rather than in pps (allows us to mix
345 * clustering types in the event of a mixed inactive queue).
346 */
347
348 /*
349 * swslot is non-zero if we are building a swap cluster. we want
350 * to stay in the loop while we have a page to scan or we have
351 * a swap-cluster to build.
352 */
353 swslot = 0;
354 swnpages = swcpages = 0;
355 free = 0;
356 dirtyreacts = 0;
357
358 for (p = pglst->tqh_first ; p != NULL || swslot != 0 ; p = nextpg) {
359
360 /*
361 * note that p can be NULL iff we have traversed the whole
362 * list and need to do one final swap-backed clustered pageout.
363 */
364 if (p) {
365 /*
366 * update our copy of "free" and see if we've met
367 * our target
368 */
369 s = uvm_lock_fpageq();
370 free = uvmexp.free;
371 uvm_unlock_fpageq(s);
372
373 if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
374 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
375 UVMHIST_LOG(pdhist," met free target: "
376 "exit loop", 0, 0, 0, 0);
377 retval = TRUE; /* hit the target! */
378
379 if (swslot == 0)
380 /* exit now if no swap-i/o pending */
381 break;
382
383 /* set p to null to signal final swap i/o */
384 p = NULL;
385 }
386 }
387
388 uobj = NULL; /* be safe and shut gcc up */
389 anon = NULL; /* be safe and shut gcc up */
390
391 if (p) { /* if (we have a new page to consider) */
392 /*
393 * we are below target and have a new page to consider.
394 */
395 uvmexp.pdscans++;
396 nextpg = p->pageq.tqe_next;
397
398 /*
399 * move referenced pages back to active queue and
400 * skip to next page (unlikely to happen since
401 * inactive pages shouldn't have any valid mappings
402 * and we cleared reference before deactivating).
403 */
404 if (pmap_is_referenced(p)) {
405 uvm_pageactivate(p);
406 uvmexp.pdreact++;
407 continue;
408 }
409
410 /*
411 * first we attempt to lock the object that this page
412 * belongs to. if our attempt fails we skip on to
413 * the next page (no harm done). it is important to
414 * "try" locking the object as we are locking in the
415 * wrong order (pageq -> object) and we don't want to
416 * get deadlocked.
417 *
418 * the only time we exepct to see an ownerless page
419 * (i.e. a page with no uobject and !PQ_ANON) is if an
420 * anon has loaned a page from a uvm_object and the
421 * uvm_object has dropped the ownership. in that
422 * case, the anon can "take over" the loaned page
423 * and make it its own.
424 */
425
426 /* is page part of an anon or ownerless ? */
427 if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
428
429 anon = p->uanon;
430
431 #ifdef DIAGNOSTIC
432 /* to be on inactive q, page must be part
433 * of _something_ */
434 if (anon == NULL)
435 panic("pagedaemon: page with no anon "
436 "or object detected - loop 1");
437 #endif
438
439 if (!simple_lock_try(&anon->an_lock))
440 /* lock failed, skip this page */
441 continue;
442
443 /*
444 * if the page is ownerless, claim it in the
445 * name of "anon"!
446 */
447 if ((p->pqflags & PQ_ANON) == 0) {
448 #ifdef DIAGNOSTIC
449 if (p->loan_count < 1)
450 panic("pagedaemon: non-loaned "
451 "ownerless page detected -"
452 " loop 1");
453 #endif
454 p->loan_count--;
455 p->pqflags |= PQ_ANON; /* anon now owns it */
456 }
457
458 if (p->flags & PG_BUSY) {
459 simple_unlock(&anon->an_lock);
460 uvmexp.pdbusy++;
461 /* someone else owns page, skip it */
462 continue;
463 }
464
465 uvmexp.pdanscan++;
466
467 } else {
468
469 uobj = p->uobject;
470
471 if (!simple_lock_try(&uobj->vmobjlock))
472 /* lock failed, skip this page */
473 continue;
474
475 if (p->flags & PG_BUSY) {
476 simple_unlock(&uobj->vmobjlock);
477 uvmexp.pdbusy++;
478 /* someone else owns page, skip it */
479 continue;
480 }
481
482 uvmexp.pdobscan++;
483 }
484
485 /*
486 * we now have the object and the page queues locked.
487 * the page is not busy. if the page is clean we
488 * can free it now and continue.
489 */
490
491 if (p->flags & PG_CLEAN) {
492 if (p->pqflags & PQ_SWAPBACKED) {
493 /* this page now lives only in swap */
494 simple_lock(&uvm.swap_data_lock);
495 uvmexp.swpgonly++;
496 simple_unlock(&uvm.swap_data_lock);
497 }
498
499 /* zap all mappings with pmap_page_protect... */
500 pmap_page_protect(p, VM_PROT_NONE);
501 uvm_pagefree(p);
502 uvmexp.pdfreed++;
503
504 if (anon) {
505 #ifdef DIAGNOSTIC
506 /*
507 * an anonymous page can only be clean
508 * if it has valid backing store.
509 */
510 if (anon->an_swslot == 0)
511 panic("pagedaemon: clean anon "
512 "page without backing store?");
513 #endif
514 /* remove from object */
515 anon->u.an_page = NULL;
516 simple_unlock(&anon->an_lock);
517 } else {
518 /* pagefree has already removed the
519 * page from the object */
520 simple_unlock(&uobj->vmobjlock);
521 }
522 continue;
523 }
524
525 /*
526 * this page is dirty, skip it if we'll have met our
527 * free target when all the current pageouts complete.
528 */
529 if (free + uvmexp.paging > uvmexp.freetarg << 2) {
530 if (anon) {
531 simple_unlock(&anon->an_lock);
532 } else {
533 simple_unlock(&uobj->vmobjlock);
534 }
535 continue;
536 }
537
538 /*
539 * this page is dirty, but we can't page it out
540 * since all pages in swap are only in swap.
541 * reactivate it so that we eventually cycle
542 * all pages thru the inactive queue.
543 */
544 #ifdef DIAGNOSTIC
545 if (uvmexp.swpgonly > uvmexp.swpages) {
546 panic("uvmexp.swpgonly botch");
547 }
548 #endif
549 if ((p->pqflags & PQ_SWAPBACKED) &&
550 uvmexp.swpgonly == uvmexp.swpages) {
551 dirtyreacts++;
552 uvm_pageactivate(p);
553 if (anon) {
554 simple_unlock(&anon->an_lock);
555 } else {
556 simple_unlock(&uobj->vmobjlock);
557 }
558 continue;
559 }
560
561 /*
562 * if the page is swap-backed and dirty and swap space
563 * is full, free any swap allocated to the page
564 * so that other pages can be paged out.
565 */
566 #ifdef DIAGNOSTIC
567 if (uvmexp.swpginuse > uvmexp.swpages) {
568 panic("uvmexp.swpginuse botch");
569 }
570 #endif
571 if ((p->pqflags & PQ_SWAPBACKED) &&
572 uvmexp.swpginuse == uvmexp.swpages) {
573
574 if ((p->pqflags & PQ_ANON) &&
575 p->uanon->an_swslot) {
576 uvm_swap_free(p->uanon->an_swslot, 1);
577 p->uanon->an_swslot = 0;
578 }
579 if (p->pqflags & PQ_AOBJ) {
580 uao_dropswap(p->uobject,
581 p->offset >> PAGE_SHIFT);
582 }
583 }
584
585 /*
586 * the page we are looking at is dirty. we must
587 * clean it before it can be freed. to do this we
588 * first mark the page busy so that no one else will
589 * touch the page. we write protect all the mappings
590 * of the page so that no one touches it while it is
591 * in I/O.
592 */
593
594 swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
595 p->flags |= PG_BUSY; /* now we own it */
596 UVM_PAGE_OWN(p, "scan_inactive");
597 pmap_page_protect(p, VM_PROT_READ);
598 uvmexp.pgswapout++;
599
600 /*
601 * for swap-backed pages we need to (re)allocate
602 * swap space.
603 */
604 if (swap_backed) {
605
606 /*
607 * free old swap slot (if any)
608 */
609 if (anon) {
610 if (anon->an_swslot) {
611 uvm_swap_free(anon->an_swslot,
612 1);
613 anon->an_swslot = 0;
614 }
615 } else {
616 uao_dropswap(uobj,
617 p->offset >> PAGE_SHIFT);
618 }
619
620 /*
621 * start new cluster (if necessary)
622 */
623 if (swslot == 0) {
624 /* want this much */
625 swnpages = MAXBSIZE >> PAGE_SHIFT;
626
627 swslot = uvm_swap_alloc(&swnpages,
628 TRUE);
629
630 if (swslot == 0) {
631 /* no swap? give up! */
632 p->flags &= ~PG_BUSY;
633 UVM_PAGE_OWN(p, NULL);
634 if (anon)
635 simple_unlock(
636 &anon->an_lock);
637 else
638 simple_unlock(
639 &uobj->vmobjlock);
640 continue;
641 }
642 swcpages = 0; /* cluster is empty */
643 }
644
645 /*
646 * add block to cluster
647 */
648 swpps[swcpages] = p;
649 if (anon)
650 anon->an_swslot = swslot + swcpages;
651 else
652 uao_set_swslot(uobj,
653 p->offset >> PAGE_SHIFT,
654 swslot + swcpages);
655 swcpages++;
656
657 /* done (swap-backed) */
658 }
659
660 /* end: if (p) ["if we have new page to consider"] */
661 } else {
662
663 /* if p == NULL we must be doing a last swap i/o */
664 swap_backed = TRUE;
665 }
666
667 /*
668 * now consider doing the pageout.
669 *
670 * for swap-backed pages, we do the pageout if we have either
671 * filled the cluster (in which case (swnpages == swcpages) or
672 * run out of pages (p == NULL).
673 *
674 * for object pages, we always do the pageout.
675 */
676 if (swap_backed) {
677
678 if (p) { /* if we just added a page to cluster */
679 if (anon)
680 simple_unlock(&anon->an_lock);
681 else
682 simple_unlock(&uobj->vmobjlock);
683
684 /* cluster not full yet? */
685 if (swcpages < swnpages)
686 continue;
687 }
688
689 /* starting I/O now... set up for it */
690 npages = swcpages;
691 ppsp = swpps;
692 /* for swap-backed pages only */
693 start = (vaddr_t) swslot;
694
695 /* if this is final pageout we could have a few
696 * extra swap blocks */
697 if (swcpages < swnpages) {
698 uvm_swap_free(swslot + swcpages,
699 (swnpages - swcpages));
700 }
701
702 } else {
703
704 /* normal object pageout */
705 ppsp = pps;
706 npages = sizeof(pps) / sizeof(struct vm_page *);
707 /* not looked at because PGO_ALLPAGES is set */
708 start = 0;
709
710 }
711
712 /*
713 * now do the pageout.
714 *
715 * for swap_backed pages we have already built the cluster.
716 * for !swap_backed pages, uvm_pager_put will call the object's
717 * "make put cluster" function to build a cluster on our behalf.
718 *
719 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
720 * it to free the cluster pages for us on a successful I/O (it
721 * always does this for un-successful I/O requests). this
722 * allows us to do clustered pageout without having to deal
723 * with cluster pages at this level.
724 *
725 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
726 * IN: locked: uobj (if !swap_backed), page queues
727 * OUT: locked: uobj (if !swap_backed && result !=VM_PAGER_PEND)
728 * !locked: pageqs, uobj (if swap_backed || VM_PAGER_PEND)
729 *
730 * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
731 */
732
733 /* locked: uobj (if !swap_backed), page queues */
734 uvmexp.pdpageouts++;
735 result = uvm_pager_put((swap_backed) ? NULL : uobj, p,
736 &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
737 /* locked: uobj (if !swap_backed && result != PEND) */
738 /* unlocked: pageqs, object (if swap_backed ||result == PEND) */
739
740 /*
741 * if we did i/o to swap, zero swslot to indicate that we are
742 * no longer building a swap-backed cluster.
743 */
744
745 if (swap_backed)
746 swslot = 0; /* done with this cluster */
747
748 /*
749 * first, we check for VM_PAGER_PEND which means that the
750 * async I/O is in progress and the async I/O done routine
751 * will clean up after us. in this case we move on to the
752 * next page.
753 *
754 * there is a very remote chance that the pending async i/o can
755 * finish _before_ we get here. if that happens, our page "p"
756 * may no longer be on the inactive queue. so we verify this
757 * when determining the next page (starting over at the head if
758 * we've lost our inactive page).
759 */
760
761 if (result == VM_PAGER_PEND) {
762 uvmexp.paging += npages;
763 uvm_lock_pageq(); /* relock page queues */
764 uvmexp.pdpending++;
765 if (p) {
766 if (p->pqflags & PQ_INACTIVE)
767 /* reload! */
768 nextpg = p->pageq.tqe_next;
769 else
770 /* reload! */
771 nextpg = pglst->tqh_first;
772 } else {
773 nextpg = NULL; /* done list */
774 }
775 continue;
776 }
777
778 /*
779 * clean up "p" if we have one
780 */
781
782 if (p) {
783 /*
784 * the I/O request to "p" is done and uvm_pager_put
785 * has freed any cluster pages it may have allocated
786 * during I/O. all that is left for us to do is
787 * clean up page "p" (which is still PG_BUSY).
788 *
789 * our result could be one of the following:
790 * VM_PAGER_OK: successful pageout
791 *
792 * VM_PAGER_AGAIN: tmp resource shortage, we skip
793 * to next page
794 * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
795 * "reactivate" page to get it out of the way (it
796 * will eventually drift back into the inactive
797 * queue for a retry).
798 * VM_PAGER_UNLOCK: should never see this as it is
799 * only valid for "get" operations
800 */
801
802 /* relock p's object: page queues not lock yet, so
803 * no need for "try" */
804
805 /* !swap_backed case: already locked... */
806 if (swap_backed) {
807 if (anon)
808 simple_lock(&anon->an_lock);
809 else
810 simple_lock(&uobj->vmobjlock);
811 }
812
813 #ifdef DIAGNOSTIC
814 if (result == VM_PAGER_UNLOCK)
815 panic("pagedaemon: pageout returned "
816 "invalid 'unlock' code");
817 #endif
818
819 /* handle PG_WANTED now */
820 if (p->flags & PG_WANTED)
821 /* still holding object lock */
822 wakeup(p);
823
824 p->flags &= ~(PG_BUSY|PG_WANTED);
825 UVM_PAGE_OWN(p, NULL);
826
827 /* released during I/O? */
828 if (p->flags & PG_RELEASED) {
829 if (anon) {
830 /* remove page so we can get nextpg */
831 anon->u.an_page = NULL;
832
833 simple_unlock(&anon->an_lock);
834 uvm_anfree(anon); /* kills anon */
835 pmap_page_protect(p, VM_PROT_NONE);
836 anon = NULL;
837 uvm_lock_pageq();
838 nextpg = p->pageq.tqe_next;
839 /* free released page */
840 uvm_pagefree(p);
841
842 } else {
843
844 #ifdef DIAGNOSTIC
845 if (uobj->pgops->pgo_releasepg == NULL)
846 panic("pagedaemon: no "
847 "pgo_releasepg function");
848 #endif
849
850 /*
851 * pgo_releasepg nukes the page and
852 * gets "nextpg" for us. it returns
853 * with the page queues locked (when
854 * given nextpg ptr).
855 */
856 if (!uobj->pgops->pgo_releasepg(p,
857 &nextpg))
858 /* uobj died after release */
859 uobj = NULL;
860
861 /*
862 * lock page queues here so that they're
863 * always locked at the end of the loop.
864 */
865 uvm_lock_pageq();
866 }
867
868 } else { /* page was not released during I/O */
869
870 uvm_lock_pageq();
871 nextpg = p->pageq.tqe_next;
872
873 if (result != VM_PAGER_OK) {
874
875 /* pageout was a failure... */
876 if (result != VM_PAGER_AGAIN)
877 uvm_pageactivate(p);
878 pmap_clear_reference(p);
879 /* XXXCDC: if (swap_backed) FREE p's
880 * swap block? */
881
882 } else {
883
884 /* pageout was a success... */
885 pmap_clear_reference(p);
886 pmap_clear_modify(p);
887 p->flags |= PG_CLEAN;
888 /* XXX: could free page here, but old
889 * pagedaemon does not */
890
891 }
892 }
893
894 /*
895 * drop object lock (if there is an object left). do
896 * a safety check of nextpg to make sure it is on the
897 * inactive queue (it should be since PG_BUSY pages on
898 * the inactive queue can't be re-queued [note: not
899 * true for active queue]).
900 */
901
902 if (anon)
903 simple_unlock(&anon->an_lock);
904 else if (uobj)
905 simple_unlock(&uobj->vmobjlock);
906
907 } /* if (p) */ else {
908
909 /* if p is null in this loop, make sure it stays null
910 * in next loop */
911 nextpg = NULL;
912
913 /*
914 * lock page queues here just so they're always locked
915 * at the end of the loop.
916 */
917 uvm_lock_pageq();
918 }
919
920 if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
921 printf("pagedaemon: invalid nextpg! reverting to "
922 "queue head\n");
923 nextpg = pglst->tqh_first; /* reload! */
924 }
925
926 } /* end of "inactive" 'for' loop */
927 return (retval);
928 }
929
930 /*
931 * uvmpd_scan: scan the page queues and attempt to meet our targets.
932 *
933 * => called with pageq's locked
934 */
935
936 void
937 uvmpd_scan()
938 {
939 int s, free, inactive_shortage, swap_shortage, pages_freed;
940 struct vm_page *p, *nextpg;
941 struct uvm_object *uobj;
942 boolean_t got_it;
943 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
944
945 uvmexp.pdrevs++; /* counter */
946
947 #ifdef __GNUC__
948 uobj = NULL; /* XXX gcc */
949 #endif
950 /*
951 * get current "free" page count
952 */
953 s = uvm_lock_fpageq();
954 free = uvmexp.free;
955 uvm_unlock_fpageq(s);
956
957 #ifndef __SWAP_BROKEN
958 /*
959 * swap out some processes if we are below our free target.
960 * we need to unlock the page queues for this.
961 */
962 if (free < uvmexp.freetarg) {
963
964 uvmexp.pdswout++;
965 UVMHIST_LOG(pdhist," free %d < target %d: swapout", free,
966 uvmexp.freetarg, 0, 0);
967 uvm_unlock_pageq();
968 uvm_swapout_threads();
969 pmap_update(); /* update so we can scan inactive q */
970 uvm_lock_pageq();
971
972 }
973 #endif
974
975 /*
976 * now we want to work on meeting our targets. first we work on our
977 * free target by converting inactive pages into free pages. then
978 * we work on meeting our inactive target by converting active pages
979 * to inactive ones.
980 */
981
982 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
983
984 /*
985 * do loop #1! alternate starting queue between swap and object based
986 * on the low bit of uvmexp.pdrevs (which we bump by one each call).
987 */
988
989 got_it = FALSE;
990 pages_freed = uvmexp.pdfreed;
991 if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
992 got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
993 if (!got_it)
994 got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
995 if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
996 (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
997 pages_freed = uvmexp.pdfreed - pages_freed;
998
999 /*
1000 * we have done the scan to get free pages. now we work on meeting
1001 * our inactive target.
1002 */
1003
1004 inactive_shortage = uvmexp.inactarg - uvmexp.inactive;
1005
1006 /*
1007 * detect if we're not going to be able to page anything out
1008 * until we free some swap resources from active pages.
1009 */
1010 swap_shortage = 0;
1011 if (uvmexp.free < uvmexp.freetarg &&
1012 uvmexp.swpginuse == uvmexp.swpages &&
1013 uvmexp.swpgonly < uvmexp.swpages &&
1014 pages_freed == 0) {
1015 swap_shortage = uvmexp.freetarg - uvmexp.free;
1016 }
1017
1018 UVMHIST_LOG(pdhist, " loop 2: inactive_shortage=%d swap_shortage=%d",
1019 inactive_shortage, swap_shortage,0,0);
1020 for (p = TAILQ_FIRST(&uvm.page_active);
1021 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
1022 p = nextpg) {
1023 nextpg = p->pageq.tqe_next;
1024 if (p->flags & PG_BUSY)
1025 continue; /* quick check before trying to lock */
1026
1027 /*
1028 * lock the page's owner.
1029 */
1030 /* is page anon owned or ownerless? */
1031 if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
1032
1033 #ifdef DIAGNOSTIC
1034 if (p->uanon == NULL)
1035 panic("pagedaemon: page with no anon or "
1036 "object detected - loop 2");
1037 #endif
1038 if (!simple_lock_try(&p->uanon->an_lock))
1039 continue;
1040
1041 /* take over the page? */
1042 if ((p->pqflags & PQ_ANON) == 0) {
1043 #ifdef DIAGNOSTIC
1044 if (p->loan_count < 1)
1045 panic("pagedaemon: non-loaned "
1046 "ownerless page detected - loop 2");
1047 #endif
1048 p->loan_count--;
1049 p->pqflags |= PQ_ANON;
1050 }
1051 } else {
1052 if (!simple_lock_try(&p->uobject->vmobjlock))
1053 continue;
1054 }
1055 /*
1056 * skip this page if it's busy.
1057 */
1058 if ((p->flags & PG_BUSY) != 0) {
1059 if (p->pqflags & PQ_ANON)
1060 simple_unlock(&p->uanon->an_lock);
1061 else
1062 simple_unlock(&p->uobject->vmobjlock);
1063 continue;
1064 }
1065
1066 /*
1067 * if there's a shortage of swap, free any swap allocated
1068 * to this page so that other pages can be paged out.
1069 */
1070 if (swap_shortage > 0) {
1071 if ((p->pqflags & PQ_ANON) && p->uanon->an_swslot) {
1072 uvm_swap_free(p->uanon->an_swslot, 1);
1073 p->uanon->an_swslot = 0;
1074 p->flags &= ~PG_CLEAN;
1075 swap_shortage--;
1076 }
1077 if (p->pqflags & PQ_AOBJ) {
1078 int slot = uao_set_swslot(p->uobject,
1079 p->offset >> PAGE_SHIFT, 0);
1080 if (slot) {
1081 uvm_swap_free(slot, 1);
1082 p->flags &= ~PG_CLEAN;
1083 swap_shortage--;
1084 }
1085 }
1086 }
1087
1088 /*
1089 * deactivate this page if there's a shortage of
1090 * inactive pages.
1091 */
1092 if (inactive_shortage > 0) {
1093 pmap_page_protect(p, VM_PROT_NONE);
1094 /* no need to check wire_count as pg is "active" */
1095 uvm_pagedeactivate(p);
1096 uvmexp.pddeact++;
1097 inactive_shortage--;
1098 }
1099
1100 if (p->pqflags & PQ_ANON)
1101 simple_unlock(&p->uanon->an_lock);
1102 else
1103 simple_unlock(&p->uobject->vmobjlock);
1104 }
1105 }
1106