uvm_pdaemon.c revision 1.101 1 /* $NetBSD: uvm_pdaemon.c,v 1.101 2010/06/02 15:48:49 pooka Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
42 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 /*
70 * uvm_pdaemon.c: the page daemon
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.101 2010/06/02 15:48:49 pooka Exp $");
75
76 #include "opt_uvmhist.h"
77 #include "opt_readahead.h"
78
79 #include <sys/param.h>
80 #include <sys/proc.h>
81 #include <sys/systm.h>
82 #include <sys/kernel.h>
83 #include <sys/pool.h>
84 #include <sys/buf.h>
85 #include <sys/module.h>
86 #include <sys/atomic.h>
87
88 #include <uvm/uvm.h>
89 #include <uvm/uvm_pdpolicy.h>
90
91 /*
92 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
93 * in a pass thru the inactive list when swap is full. the value should be
94 * "small"... if it's too large we'll cycle the active pages thru the inactive
95 * queue too quickly to for them to be referenced and avoid being freed.
96 */
97
98 #define UVMPD_NUMDIRTYREACTS 16
99
100 #define UVMPD_NUMTRYLOCKOWNER 16
101
102 /*
103 * local prototypes
104 */
105
106 static void uvmpd_scan(void);
107 static void uvmpd_scan_queue(void);
108 static void uvmpd_tune(void);
109
110 static unsigned int uvm_pagedaemon_waiters;
111
112 /*
113 * XXX hack to avoid hangs when large processes fork.
114 */
115 u_int uvm_extrapages;
116
117 static kmutex_t uvm_reclaim_lock;
118
119 SLIST_HEAD(uvm_reclaim_hooks, uvm_reclaim_hook) uvm_reclaim_list;
120
121 /*
122 * uvm_wait: wait (sleep) for the page daemon to free some pages
123 *
124 * => should be called with all locks released
125 * => should _not_ be called by the page daemon (to avoid deadlock)
126 */
127
128 void
129 uvm_wait(const char *wmsg)
130 {
131 int timo = 0;
132
133 mutex_spin_enter(&uvm_fpageqlock);
134
135 /*
136 * check for page daemon going to sleep (waiting for itself)
137 */
138
139 if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
140 /*
141 * now we have a problem: the pagedaemon wants to go to
142 * sleep until it frees more memory. but how can it
143 * free more memory if it is asleep? that is a deadlock.
144 * we have two options:
145 * [1] panic now
146 * [2] put a timeout on the sleep, thus causing the
147 * pagedaemon to only pause (rather than sleep forever)
148 *
149 * note that option [2] will only help us if we get lucky
150 * and some other process on the system breaks the deadlock
151 * by exiting or freeing memory (thus allowing the pagedaemon
152 * to continue). for now we panic if DEBUG is defined,
153 * otherwise we hope for the best with option [2] (better
154 * yet, this should never happen in the first place!).
155 */
156
157 printf("pagedaemon: deadlock detected!\n");
158 timo = hz >> 3; /* set timeout */
159 #if defined(DEBUG)
160 /* DEBUG: panic so we can debug it */
161 panic("pagedaemon deadlock");
162 #endif
163 }
164
165 uvm_pagedaemon_waiters++;
166 wakeup(&uvm.pagedaemon); /* wake the daemon! */
167 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo);
168 }
169
170 /*
171 * uvm_kick_pdaemon: perform checks to determine if we need to
172 * give the pagedaemon a nudge, and do so if necessary.
173 *
174 * => called with uvm_fpageqlock held.
175 */
176
177 void
178 uvm_kick_pdaemon(void)
179 {
180
181 KASSERT(mutex_owned(&uvm_fpageqlock));
182
183 if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
184 (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
185 uvmpdpol_needsscan_p())) {
186 wakeup(&uvm.pagedaemon);
187 }
188 }
189
190 /*
191 * uvmpd_tune: tune paging parameters
192 *
193 * => called when ever memory is added (or removed?) to the system
194 * => caller must call with page queues locked
195 */
196
197 static void
198 uvmpd_tune(void)
199 {
200 int val;
201
202 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
203
204 /*
205 * try to keep 0.5% of available RAM free, but limit to between
206 * 128k and 1024k per-CPU. XXX: what are these values good for?
207 */
208 val = uvmexp.npages / 200;
209 val = MAX(val, (128*1024) >> PAGE_SHIFT);
210 val = MIN(val, (1024*1024) >> PAGE_SHIFT);
211 val *= ncpu;
212
213 /* Make sure there's always a user page free. */
214 if (val < uvmexp.reserve_kernel + 1)
215 val = uvmexp.reserve_kernel + 1;
216 uvmexp.freemin = val;
217
218 /* Calculate free target. */
219 val = (uvmexp.freemin * 4) / 3;
220 if (val <= uvmexp.freemin)
221 val = uvmexp.freemin + 1;
222 uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0);
223
224 uvmexp.wiredmax = uvmexp.npages / 3;
225 UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
226 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
227 }
228
229 /*
230 * uvm_pageout: the main loop for the pagedaemon
231 */
232
233 void
234 uvm_pageout(void *arg)
235 {
236 int bufcnt, npages = 0;
237 int extrapages = 0;
238 struct pool *pp;
239 uint64_t where;
240 struct uvm_reclaim_hook *hook;
241
242 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
243
244 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
245
246 /*
247 * ensure correct priority and set paging parameters...
248 */
249
250 uvm.pagedaemon_lwp = curlwp;
251 mutex_enter(&uvm_pageqlock);
252 npages = uvmexp.npages;
253 uvmpd_tune();
254 mutex_exit(&uvm_pageqlock);
255
256 /*
257 * main loop
258 */
259
260 for (;;) {
261 bool needsscan, needsfree;
262
263 mutex_spin_enter(&uvm_fpageqlock);
264 if (uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) {
265 UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
266 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
267 &uvm_fpageqlock, false, "pgdaemon", 0);
268 uvmexp.pdwoke++;
269 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
270 } else {
271 mutex_spin_exit(&uvm_fpageqlock);
272 }
273
274 /*
275 * now lock page queues and recompute inactive count
276 */
277
278 mutex_enter(&uvm_pageqlock);
279 if (npages != uvmexp.npages || extrapages != uvm_extrapages) {
280 npages = uvmexp.npages;
281 extrapages = uvm_extrapages;
282 mutex_spin_enter(&uvm_fpageqlock);
283 uvmpd_tune();
284 mutex_spin_exit(&uvm_fpageqlock);
285 }
286
287 uvmpdpol_tune();
288
289 /*
290 * Estimate a hint. Note that bufmem are returned to
291 * system only when entire pool page is empty.
292 */
293 mutex_spin_enter(&uvm_fpageqlock);
294 bufcnt = uvmexp.freetarg - uvmexp.free;
295 if (bufcnt < 0)
296 bufcnt = 0;
297
298 UVMHIST_LOG(pdhist," free/ftarg=%d/%d",
299 uvmexp.free, uvmexp.freetarg, 0,0);
300
301 needsfree = uvmexp.free + uvmexp.paging < uvmexp.freetarg;
302 needsscan = needsfree || uvmpdpol_needsscan_p();
303
304 /*
305 * scan if needed
306 */
307 if (needsscan) {
308 mutex_spin_exit(&uvm_fpageqlock);
309 uvmpd_scan();
310 mutex_spin_enter(&uvm_fpageqlock);
311 }
312
313 /*
314 * if there's any free memory to be had,
315 * wake up any waiters.
316 */
317 if (uvmexp.free > uvmexp.reserve_kernel ||
318 uvmexp.paging == 0) {
319 wakeup(&uvmexp.free);
320 uvm_pagedaemon_waiters = 0;
321 }
322 mutex_spin_exit(&uvm_fpageqlock);
323
324 /*
325 * scan done. unlock page queues (the only lock we are holding)
326 */
327 mutex_exit(&uvm_pageqlock);
328
329 /*
330 * if we don't need free memory, we're done.
331 */
332
333 if (!needsfree)
334 continue;
335
336 /*
337 * start draining pool resources now that we're not
338 * holding any locks.
339 */
340 pool_drain_start(&pp, &where);
341
342 /*
343 * kill unused metadata buffers.
344 */
345 mutex_enter(&bufcache_lock);
346 buf_drain(bufcnt << PAGE_SHIFT);
347 mutex_exit(&bufcache_lock);
348
349 mutex_enter(&uvm_reclaim_lock);
350 SLIST_FOREACH(hook, &uvm_reclaim_list, uvm_reclaim_next) {
351 (*hook->uvm_reclaim_hook)();
352 }
353 mutex_exit(&uvm_reclaim_lock);
354
355 /*
356 * complete draining the pools.
357 */
358 pool_drain_end(pp, where);
359 }
360 /*NOTREACHED*/
361 }
362
363
364 /*
365 * uvm_aiodone_worker: a workqueue callback for the aiodone daemon.
366 */
367
368 void
369 uvm_aiodone_worker(struct work *wk, void *dummy)
370 {
371 struct buf *bp = (void *)wk;
372
373 KASSERT(&bp->b_work == wk);
374
375 /*
376 * process an i/o that's done.
377 */
378
379 (*bp->b_iodone)(bp);
380 }
381
382 void
383 uvm_pageout_start(int npages)
384 {
385
386 mutex_spin_enter(&uvm_fpageqlock);
387 uvmexp.paging += npages;
388 mutex_spin_exit(&uvm_fpageqlock);
389 }
390
391 void
392 uvm_pageout_done(int npages)
393 {
394
395 mutex_spin_enter(&uvm_fpageqlock);
396 KASSERT(uvmexp.paging >= npages);
397 uvmexp.paging -= npages;
398
399 /*
400 * wake up either of pagedaemon or LWPs waiting for it.
401 */
402
403 if (uvmexp.free <= uvmexp.reserve_kernel) {
404 wakeup(&uvm.pagedaemon);
405 } else {
406 wakeup(&uvmexp.free);
407 uvm_pagedaemon_waiters = 0;
408 }
409 mutex_spin_exit(&uvm_fpageqlock);
410 }
411
412 /*
413 * uvmpd_trylockowner: trylock the page's owner.
414 *
415 * => called with pageq locked.
416 * => resolve orphaned O->A loaned page.
417 * => return the locked mutex on success. otherwise, return NULL.
418 */
419
420 kmutex_t *
421 uvmpd_trylockowner(struct vm_page *pg)
422 {
423 struct uvm_object *uobj = pg->uobject;
424 kmutex_t *slock;
425
426 KASSERT(mutex_owned(&uvm_pageqlock));
427
428 if (uobj != NULL) {
429 slock = &uobj->vmobjlock;
430 } else {
431 struct vm_anon *anon = pg->uanon;
432
433 KASSERT(anon != NULL);
434 slock = &anon->an_lock;
435 }
436
437 if (!mutex_tryenter(slock)) {
438 return NULL;
439 }
440
441 if (uobj == NULL) {
442
443 /*
444 * set PQ_ANON if it isn't set already.
445 */
446
447 if ((pg->pqflags & PQ_ANON) == 0) {
448 KASSERT(pg->loan_count > 0);
449 pg->loan_count--;
450 pg->pqflags |= PQ_ANON;
451 /* anon now owns it */
452 }
453 }
454
455 return slock;
456 }
457
458 #if defined(VMSWAP)
459 struct swapcluster {
460 int swc_slot;
461 int swc_nallocated;
462 int swc_nused;
463 struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
464 };
465
466 static void
467 swapcluster_init(struct swapcluster *swc)
468 {
469
470 swc->swc_slot = 0;
471 swc->swc_nused = 0;
472 }
473
474 static int
475 swapcluster_allocslots(struct swapcluster *swc)
476 {
477 int slot;
478 int npages;
479
480 if (swc->swc_slot != 0) {
481 return 0;
482 }
483
484 /* Even with strange MAXPHYS, the shift
485 implicitly rounds down to a page. */
486 npages = MAXPHYS >> PAGE_SHIFT;
487 slot = uvm_swap_alloc(&npages, true);
488 if (slot == 0) {
489 return ENOMEM;
490 }
491 swc->swc_slot = slot;
492 swc->swc_nallocated = npages;
493 swc->swc_nused = 0;
494
495 return 0;
496 }
497
498 static int
499 swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
500 {
501 int slot;
502 struct uvm_object *uobj;
503
504 KASSERT(swc->swc_slot != 0);
505 KASSERT(swc->swc_nused < swc->swc_nallocated);
506 KASSERT((pg->pqflags & PQ_SWAPBACKED) != 0);
507
508 slot = swc->swc_slot + swc->swc_nused;
509 uobj = pg->uobject;
510 if (uobj == NULL) {
511 KASSERT(mutex_owned(&pg->uanon->an_lock));
512 pg->uanon->an_swslot = slot;
513 } else {
514 int result;
515
516 KASSERT(mutex_owned(&uobj->vmobjlock));
517 result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
518 if (result == -1) {
519 return ENOMEM;
520 }
521 }
522 swc->swc_pages[swc->swc_nused] = pg;
523 swc->swc_nused++;
524
525 return 0;
526 }
527
528 static void
529 swapcluster_flush(struct swapcluster *swc, bool now)
530 {
531 int slot;
532 int nused;
533 int nallocated;
534 int error;
535
536 if (swc->swc_slot == 0) {
537 return;
538 }
539 KASSERT(swc->swc_nused <= swc->swc_nallocated);
540
541 slot = swc->swc_slot;
542 nused = swc->swc_nused;
543 nallocated = swc->swc_nallocated;
544
545 /*
546 * if this is the final pageout we could have a few
547 * unused swap blocks. if so, free them now.
548 */
549
550 if (nused < nallocated) {
551 if (!now) {
552 return;
553 }
554 uvm_swap_free(slot + nused, nallocated - nused);
555 }
556
557 /*
558 * now start the pageout.
559 */
560
561 if (nused > 0) {
562 uvmexp.pdpageouts++;
563 uvm_pageout_start(nused);
564 error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
565 KASSERT(error == 0 || error == ENOMEM);
566 }
567
568 /*
569 * zero swslot to indicate that we are
570 * no longer building a swap-backed cluster.
571 */
572
573 swc->swc_slot = 0;
574 swc->swc_nused = 0;
575 }
576
577 static int
578 swapcluster_nused(struct swapcluster *swc)
579 {
580
581 return swc->swc_nused;
582 }
583
584 /*
585 * uvmpd_dropswap: free any swap allocated to this page.
586 *
587 * => called with owner locked.
588 * => return true if a page had an associated slot.
589 */
590
591 static bool
592 uvmpd_dropswap(struct vm_page *pg)
593 {
594 bool result = false;
595 struct vm_anon *anon = pg->uanon;
596
597 if ((pg->pqflags & PQ_ANON) && anon->an_swslot) {
598 uvm_swap_free(anon->an_swslot, 1);
599 anon->an_swslot = 0;
600 pg->flags &= ~PG_CLEAN;
601 result = true;
602 } else if (pg->pqflags & PQ_AOBJ) {
603 int slot = uao_set_swslot(pg->uobject,
604 pg->offset >> PAGE_SHIFT, 0);
605 if (slot) {
606 uvm_swap_free(slot, 1);
607 pg->flags &= ~PG_CLEAN;
608 result = true;
609 }
610 }
611
612 return result;
613 }
614
615 /*
616 * uvmpd_trydropswap: try to free any swap allocated to this page.
617 *
618 * => return true if a slot is successfully freed.
619 */
620
621 bool
622 uvmpd_trydropswap(struct vm_page *pg)
623 {
624 kmutex_t *slock;
625 bool result;
626
627 if ((pg->flags & PG_BUSY) != 0) {
628 return false;
629 }
630
631 /*
632 * lock the page's owner.
633 */
634
635 slock = uvmpd_trylockowner(pg);
636 if (slock == NULL) {
637 return false;
638 }
639
640 /*
641 * skip this page if it's busy.
642 */
643
644 if ((pg->flags & PG_BUSY) != 0) {
645 mutex_exit(slock);
646 return false;
647 }
648
649 result = uvmpd_dropswap(pg);
650
651 mutex_exit(slock);
652
653 return result;
654 }
655
656 #endif /* defined(VMSWAP) */
657
658 /*
659 * uvmpd_scan_queue: scan an replace candidate list for pages
660 * to clean or free.
661 *
662 * => called with page queues locked
663 * => we work on meeting our free target by converting inactive pages
664 * into free pages.
665 * => we handle the building of swap-backed clusters
666 */
667
668 static void
669 uvmpd_scan_queue(void)
670 {
671 struct vm_page *p;
672 struct uvm_object *uobj;
673 struct vm_anon *anon;
674 #if defined(VMSWAP)
675 struct swapcluster swc;
676 #endif /* defined(VMSWAP) */
677 int dirtyreacts;
678 int lockownerfail;
679 kmutex_t *slock;
680 UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
681
682 /*
683 * swslot is non-zero if we are building a swap cluster. we want
684 * to stay in the loop while we have a page to scan or we have
685 * a swap-cluster to build.
686 */
687
688 #if defined(VMSWAP)
689 swapcluster_init(&swc);
690 #endif /* defined(VMSWAP) */
691
692 dirtyreacts = 0;
693 lockownerfail = 0;
694 uvmpdpol_scaninit();
695
696 while (/* CONSTCOND */ 1) {
697
698 /*
699 * see if we've met the free target.
700 */
701
702 if (uvmexp.free + uvmexp.paging
703 #if defined(VMSWAP)
704 + swapcluster_nused(&swc)
705 #endif /* defined(VMSWAP) */
706 >= uvmexp.freetarg << 2 ||
707 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
708 UVMHIST_LOG(pdhist," met free target: "
709 "exit loop", 0, 0, 0, 0);
710 break;
711 }
712
713 p = uvmpdpol_selectvictim();
714 if (p == NULL) {
715 break;
716 }
717 KASSERT(uvmpdpol_pageisqueued_p(p));
718 KASSERT(p->wire_count == 0);
719
720 /*
721 * we are below target and have a new page to consider.
722 */
723
724 anon = p->uanon;
725 uobj = p->uobject;
726
727 /*
728 * first we attempt to lock the object that this page
729 * belongs to. if our attempt fails we skip on to
730 * the next page (no harm done). it is important to
731 * "try" locking the object as we are locking in the
732 * wrong order (pageq -> object) and we don't want to
733 * deadlock.
734 *
735 * the only time we expect to see an ownerless page
736 * (i.e. a page with no uobject and !PQ_ANON) is if an
737 * anon has loaned a page from a uvm_object and the
738 * uvm_object has dropped the ownership. in that
739 * case, the anon can "take over" the loaned page
740 * and make it its own.
741 */
742
743 slock = uvmpd_trylockowner(p);
744 if (slock == NULL) {
745 /*
746 * yield cpu to make a chance for an LWP holding
747 * the lock run. otherwise we can busy-loop too long
748 * if the page queue is filled with a lot of pages
749 * from few objects.
750 */
751 lockownerfail++;
752 if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) {
753 mutex_exit(&uvm_pageqlock);
754 /* XXX Better than yielding but inadequate. */
755 kpause("livelock", false, 1, NULL);
756 mutex_enter(&uvm_pageqlock);
757 lockownerfail = 0;
758 }
759 continue;
760 }
761 if (p->flags & PG_BUSY) {
762 mutex_exit(slock);
763 uvmexp.pdbusy++;
764 continue;
765 }
766
767 /* does the page belong to an object? */
768 if (uobj != NULL) {
769 uvmexp.pdobscan++;
770 } else {
771 #if defined(VMSWAP)
772 KASSERT(anon != NULL);
773 uvmexp.pdanscan++;
774 #else /* defined(VMSWAP) */
775 panic("%s: anon", __func__);
776 #endif /* defined(VMSWAP) */
777 }
778
779
780 /*
781 * we now have the object and the page queues locked.
782 * if the page is not swap-backed, call the object's
783 * pager to flush and free the page.
784 */
785
786 #if defined(READAHEAD_STATS)
787 if ((p->pqflags & PQ_READAHEAD) != 0) {
788 p->pqflags &= ~PQ_READAHEAD;
789 uvm_ra_miss.ev_count++;
790 }
791 #endif /* defined(READAHEAD_STATS) */
792
793 if ((p->pqflags & PQ_SWAPBACKED) == 0) {
794 KASSERT(uobj != NULL);
795 mutex_exit(&uvm_pageqlock);
796 (void) (uobj->pgops->pgo_put)(uobj, p->offset,
797 p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
798 mutex_enter(&uvm_pageqlock);
799 continue;
800 }
801
802 /*
803 * the page is swap-backed. remove all the permissions
804 * from the page so we can sync the modified info
805 * without any race conditions. if the page is clean
806 * we can free it now and continue.
807 */
808
809 pmap_page_protect(p, VM_PROT_NONE);
810 if ((p->flags & PG_CLEAN) && pmap_clear_modify(p)) {
811 p->flags &= ~(PG_CLEAN);
812 }
813 if (p->flags & PG_CLEAN) {
814 int slot;
815 int pageidx;
816
817 pageidx = p->offset >> PAGE_SHIFT;
818 uvm_pagefree(p);
819 uvmexp.pdfreed++;
820
821 /*
822 * for anons, we need to remove the page
823 * from the anon ourselves. for aobjs,
824 * pagefree did that for us.
825 */
826
827 if (anon) {
828 KASSERT(anon->an_swslot != 0);
829 anon->an_page = NULL;
830 slot = anon->an_swslot;
831 } else {
832 slot = uao_find_swslot(uobj, pageidx);
833 }
834 mutex_exit(slock);
835
836 if (slot > 0) {
837 /* this page is now only in swap. */
838 mutex_enter(&uvm_swap_data_lock);
839 KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
840 uvmexp.swpgonly++;
841 mutex_exit(&uvm_swap_data_lock);
842 }
843 continue;
844 }
845
846 #if defined(VMSWAP)
847 /*
848 * this page is dirty, skip it if we'll have met our
849 * free target when all the current pageouts complete.
850 */
851
852 if (uvmexp.free + uvmexp.paging > uvmexp.freetarg << 2) {
853 mutex_exit(slock);
854 continue;
855 }
856
857 /*
858 * free any swap space allocated to the page since
859 * we'll have to write it again with its new data.
860 */
861
862 uvmpd_dropswap(p);
863
864 /*
865 * start new swap pageout cluster (if necessary).
866 *
867 * if swap is full reactivate this page so that
868 * we eventually cycle all pages through the
869 * inactive queue.
870 */
871
872 if (swapcluster_allocslots(&swc)) {
873 dirtyreacts++;
874 uvm_pageactivate(p);
875 mutex_exit(slock);
876 continue;
877 }
878
879 /*
880 * at this point, we're definitely going reuse this
881 * page. mark the page busy and delayed-free.
882 * we should remove the page from the page queues
883 * so we don't ever look at it again.
884 * adjust counters and such.
885 */
886
887 p->flags |= PG_BUSY;
888 UVM_PAGE_OWN(p, "scan_queue");
889
890 p->flags |= PG_PAGEOUT;
891 uvm_pagedequeue(p);
892
893 uvmexp.pgswapout++;
894 mutex_exit(&uvm_pageqlock);
895
896 /*
897 * add the new page to the cluster.
898 */
899
900 if (swapcluster_add(&swc, p)) {
901 p->flags &= ~(PG_BUSY|PG_PAGEOUT);
902 UVM_PAGE_OWN(p, NULL);
903 mutex_enter(&uvm_pageqlock);
904 dirtyreacts++;
905 uvm_pageactivate(p);
906 mutex_exit(slock);
907 continue;
908 }
909 mutex_exit(slock);
910
911 swapcluster_flush(&swc, false);
912 mutex_enter(&uvm_pageqlock);
913
914 /*
915 * the pageout is in progress. bump counters and set up
916 * for the next loop.
917 */
918
919 uvmexp.pdpending++;
920
921 #else /* defined(VMSWAP) */
922 uvm_pageactivate(p);
923 mutex_exit(slock);
924 #endif /* defined(VMSWAP) */
925 }
926
927 #if defined(VMSWAP)
928 mutex_exit(&uvm_pageqlock);
929 swapcluster_flush(&swc, true);
930 mutex_enter(&uvm_pageqlock);
931 #endif /* defined(VMSWAP) */
932 }
933
934 /*
935 * uvmpd_scan: scan the page queues and attempt to meet our targets.
936 *
937 * => called with pageq's locked
938 */
939
940 static void
941 uvmpd_scan(void)
942 {
943 int swap_shortage, pages_freed;
944 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
945
946 uvmexp.pdrevs++;
947
948 /*
949 * work on meeting our targets. first we work on our free target
950 * by converting inactive pages into free pages. then we work on
951 * meeting our inactive target by converting active pages to
952 * inactive ones.
953 */
954
955 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
956
957 pages_freed = uvmexp.pdfreed;
958 uvmpd_scan_queue();
959 pages_freed = uvmexp.pdfreed - pages_freed;
960
961 /*
962 * detect if we're not going to be able to page anything out
963 * until we free some swap resources from active pages.
964 */
965
966 swap_shortage = 0;
967 if (uvmexp.free < uvmexp.freetarg &&
968 uvmexp.swpginuse >= uvmexp.swpgavail &&
969 !uvm_swapisfull() &&
970 pages_freed == 0) {
971 swap_shortage = uvmexp.freetarg - uvmexp.free;
972 }
973
974 uvmpdpol_balancequeue(swap_shortage);
975
976 /*
977 * if still below the minimum target, try unloading kernel
978 * modules.
979 */
980
981 if (uvmexp.free < uvmexp.freemin) {
982 module_thread_kick();
983 }
984 }
985
986 /*
987 * uvm_reclaimable: decide whether to wait for pagedaemon.
988 *
989 * => return true if it seems to be worth to do uvm_wait.
990 *
991 * XXX should be tunable.
992 * XXX should consider pools, etc?
993 */
994
995 bool
996 uvm_reclaimable(void)
997 {
998 int filepages;
999 int active, inactive;
1000
1001 /*
1002 * if swap is not full, no problem.
1003 */
1004
1005 if (!uvm_swapisfull()) {
1006 return true;
1007 }
1008
1009 /*
1010 * file-backed pages can be reclaimed even when swap is full.
1011 * if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
1012 *
1013 * XXX assume the worst case, ie. all wired pages are file-backed.
1014 *
1015 * XXX should consider about other reclaimable memory.
1016 * XXX ie. pools, traditional buffer cache.
1017 */
1018
1019 filepages = uvmexp.filepages + uvmexp.execpages - uvmexp.wired;
1020 uvm_estimatepageable(&active, &inactive);
1021 if (filepages >= MIN((active + inactive) >> 4,
1022 5 * 1024 * 1024 >> PAGE_SHIFT)) {
1023 return true;
1024 }
1025
1026 /*
1027 * kill the process, fail allocation, etc..
1028 */
1029
1030 return false;
1031 }
1032
1033 void
1034 uvm_estimatepageable(int *active, int *inactive)
1035 {
1036
1037 uvmpdpol_estimatepageable(active, inactive);
1038 }
1039
1040 void
1041 uvm_reclaim_init(void)
1042 {
1043
1044 /* Initialize UVM reclaim hooks. */
1045 mutex_init(&uvm_reclaim_lock, MUTEX_DEFAULT, IPL_NONE);
1046 SLIST_INIT(&uvm_reclaim_list);
1047 }
1048
1049 void
1050 uvm_reclaim_hook_add(struct uvm_reclaim_hook *hook)
1051 {
1052
1053 KASSERT(hook != NULL);
1054
1055 mutex_enter(&uvm_reclaim_lock);
1056 SLIST_INSERT_HEAD(&uvm_reclaim_list, hook, uvm_reclaim_next);
1057 mutex_exit(&uvm_reclaim_lock);
1058 }
1059
1060 void
1061 uvm_reclaim_hook_del(struct uvm_reclaim_hook *hook_entry)
1062 {
1063 struct uvm_reclaim_hook *hook;
1064
1065 KASSERT(hook_entry != NULL);
1066
1067 mutex_enter(&uvm_reclaim_lock);
1068 SLIST_FOREACH(hook, &uvm_reclaim_list, uvm_reclaim_next) {
1069 if (hook != hook_entry) {
1070 continue;
1071 }
1072
1073 SLIST_REMOVE(&uvm_reclaim_list, hook, uvm_reclaim_hook,
1074 uvm_reclaim_next);
1075 break;
1076 }
1077
1078 mutex_exit(&uvm_reclaim_lock);
1079 }
1080