uvm_pdaemon.c revision 1.123 1 /* $NetBSD: uvm_pdaemon.c,v 1.123 2020/01/15 17:55:45 ad Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
37 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
38 *
39 *
40 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
41 * All rights reserved.
42 *
43 * Permission to use, copy, modify and distribute this software and
44 * its documentation is hereby granted, provided that both the copyright
45 * notice and this permission notice appear in all copies of the
46 * software, derivative works or modified versions, and any portions
47 * thereof, and that both notices appear in supporting documentation.
48 *
49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
52 *
53 * Carnegie Mellon requests users of this software to return to
54 *
55 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
56 * School of Computer Science
57 * Carnegie Mellon University
58 * Pittsburgh PA 15213-3890
59 *
60 * any improvements or extensions that they make and grant Carnegie the
61 * rights to redistribute these changes.
62 */
63
64 /*
65 * uvm_pdaemon.c: the page daemon
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.123 2020/01/15 17:55:45 ad Exp $");
70
71 #include "opt_uvmhist.h"
72 #include "opt_readahead.h"
73
74 #include <sys/param.h>
75 #include <sys/proc.h>
76 #include <sys/systm.h>
77 #include <sys/kernel.h>
78 #include <sys/pool.h>
79 #include <sys/buf.h>
80 #include <sys/module.h>
81 #include <sys/atomic.h>
82 #include <sys/kthread.h>
83
84 #include <uvm/uvm.h>
85 #include <uvm/uvm_pdpolicy.h>
86 #include <uvm/uvm_pgflcache.h>
87
88 #ifdef UVMHIST
89 UVMHIST_DEFINE(pdhist);
90 #endif
91
92 /*
93 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
94 * in a pass thru the inactive list when swap is full. the value should be
95 * "small"... if it's too large we'll cycle the active pages thru the inactive
96 * queue too quickly to for them to be referenced and avoid being freed.
97 */
98
99 #define UVMPD_NUMDIRTYREACTS 16
100
101 #define UVMPD_NUMTRYLOCKOWNER 128
102
103 /*
104 * local prototypes
105 */
106
107 static void uvmpd_scan(void);
108 static void uvmpd_scan_queue(void);
109 static void uvmpd_tune(void);
110 static void uvmpd_pool_drain_thread(void *);
111 static void uvmpd_pool_drain_wakeup(void);
112
113 static unsigned int uvm_pagedaemon_waiters;
114
115 /* State for the pool drainer thread */
116 static kmutex_t uvmpd_lock __cacheline_aligned;
117 static kcondvar_t uvmpd_pool_drain_cv;
118 static bool uvmpd_pool_drain_run = false;
119
120 /*
121 * XXX hack to avoid hangs when large processes fork.
122 */
123 u_int uvm_extrapages;
124
125 /*
126 * uvm_wait: wait (sleep) for the page daemon to free some pages
127 *
128 * => should be called with all locks released
129 * => should _not_ be called by the page daemon (to avoid deadlock)
130 */
131
132 void
133 uvm_wait(const char *wmsg)
134 {
135 int timo = 0;
136
137 if (uvm.pagedaemon_lwp == NULL)
138 panic("out of memory before the pagedaemon thread exists");
139
140 mutex_spin_enter(&uvmpd_lock);
141
142 /*
143 * check for page daemon going to sleep (waiting for itself)
144 */
145
146 if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
147 /*
148 * now we have a problem: the pagedaemon wants to go to
149 * sleep until it frees more memory. but how can it
150 * free more memory if it is asleep? that is a deadlock.
151 * we have two options:
152 * [1] panic now
153 * [2] put a timeout on the sleep, thus causing the
154 * pagedaemon to only pause (rather than sleep forever)
155 *
156 * note that option [2] will only help us if we get lucky
157 * and some other process on the system breaks the deadlock
158 * by exiting or freeing memory (thus allowing the pagedaemon
159 * to continue). for now we panic if DEBUG is defined,
160 * otherwise we hope for the best with option [2] (better
161 * yet, this should never happen in the first place!).
162 */
163
164 printf("pagedaemon: deadlock detected!\n");
165 timo = hz >> 3; /* set timeout */
166 #if defined(DEBUG)
167 /* DEBUG: panic so we can debug it */
168 panic("pagedaemon deadlock");
169 #endif
170 }
171
172 uvm_pagedaemon_waiters++;
173 wakeup(&uvm.pagedaemon); /* wake the daemon! */
174 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvmpd_lock, false, wmsg, timo);
175 }
176
177 /*
178 * uvm_kick_pdaemon: perform checks to determine if we need to
179 * give the pagedaemon a nudge, and do so if necessary.
180 */
181
182 void
183 uvm_kick_pdaemon(void)
184 {
185 int fpages = uvm_availmem();
186
187 if (fpages + uvmexp.paging < uvmexp.freemin ||
188 (fpages + uvmexp.paging < uvmexp.freetarg &&
189 uvmpdpol_needsscan_p()) ||
190 uvm_km_va_starved_p()) {
191 mutex_spin_enter(&uvmpd_lock);
192 wakeup(&uvm.pagedaemon);
193 mutex_spin_exit(&uvmpd_lock);
194 }
195 }
196
197 /*
198 * uvmpd_tune: tune paging parameters
199 *
200 * => called when ever memory is added (or removed?) to the system
201 */
202
203 static void
204 uvmpd_tune(void)
205 {
206 int val;
207
208 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
209
210 /*
211 * try to keep 0.5% of available RAM free, but limit to between
212 * 128k and 1024k per-CPU. XXX: what are these values good for?
213 */
214 val = uvmexp.npages / 200;
215 val = MAX(val, (128*1024) >> PAGE_SHIFT);
216 val = MIN(val, (1024*1024) >> PAGE_SHIFT);
217 val *= ncpu;
218
219 /* Make sure there's always a user page free. */
220 if (val < uvmexp.reserve_kernel + 1)
221 val = uvmexp.reserve_kernel + 1;
222 uvmexp.freemin = val;
223
224 /* Calculate free target. */
225 val = (uvmexp.freemin * 4) / 3;
226 if (val <= uvmexp.freemin)
227 val = uvmexp.freemin + 1;
228 uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0);
229
230 uvmexp.wiredmax = uvmexp.npages / 3;
231 UVMHIST_LOG(pdhist, "<- done, freemin=%jd, freetarg=%jd, wiredmax=%jd",
232 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
233 }
234
235 /*
236 * uvm_pageout: the main loop for the pagedaemon
237 */
238
239 void
240 uvm_pageout(void *arg)
241 {
242 int npages = 0;
243 int extrapages = 0;
244 int fpages;
245
246 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
247
248 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
249
250 mutex_init(&uvmpd_lock, MUTEX_DEFAULT, IPL_VM);
251 cv_init(&uvmpd_pool_drain_cv, "pooldrain");
252
253 /* Create the pool drainer kernel thread. */
254 if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL,
255 uvmpd_pool_drain_thread, NULL, NULL, "pooldrain"))
256 panic("fork pooldrain");
257
258 /*
259 * ensure correct priority and set paging parameters...
260 */
261
262 uvm.pagedaemon_lwp = curlwp;
263 npages = uvmexp.npages;
264 uvmpd_tune();
265
266 /*
267 * main loop
268 */
269
270 for (;;) {
271 bool needsscan, needsfree, kmem_va_starved;
272
273 kmem_va_starved = uvm_km_va_starved_p();
274
275 mutex_spin_enter(&uvmpd_lock);
276 if ((uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) &&
277 !kmem_va_starved) {
278 UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
279 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
280 &uvmpd_lock, false, "pgdaemon", 0);
281 uvmexp.pdwoke++;
282 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
283 } else {
284 mutex_spin_exit(&uvmpd_lock);
285 }
286
287 /*
288 * now recompute inactive count
289 */
290
291 if (npages != uvmexp.npages || extrapages != uvm_extrapages) {
292 npages = uvmexp.npages;
293 extrapages = uvm_extrapages;
294 uvmpd_tune();
295 }
296
297 uvmpdpol_tune();
298
299 /*
300 * Estimate a hint. Note that bufmem are returned to
301 * system only when entire pool page is empty.
302 */
303 fpages = uvm_availmem();
304 UVMHIST_LOG(pdhist," free/ftarg=%jd/%jd",
305 fpages, uvmexp.freetarg, 0,0);
306
307 needsfree = fpages + uvmexp.paging < uvmexp.freetarg;
308 needsscan = needsfree || uvmpdpol_needsscan_p();
309
310 /*
311 * scan if needed
312 */
313 if (needsscan) {
314 uvmpd_scan();
315 }
316
317 /*
318 * if there's any free memory to be had,
319 * wake up any waiters.
320 */
321 if (uvm_availmem() > uvmexp.reserve_kernel ||
322 uvmexp.paging == 0) {
323 mutex_spin_enter(&uvmpd_lock);
324 wakeup(&uvmexp.free);
325 uvm_pagedaemon_waiters = 0;
326 mutex_spin_exit(&uvmpd_lock);
327 }
328
329 /*
330 * scan done. if we don't need free memory, we're done.
331 */
332
333 if (!needsfree && !kmem_va_starved)
334 continue;
335
336 /*
337 * kick the pool drainer thread.
338 */
339
340 uvmpd_pool_drain_wakeup();
341 }
342 /*NOTREACHED*/
343 }
344
345
346 /*
347 * uvm_aiodone_worker: a workqueue callback for the aiodone daemon.
348 */
349
350 void
351 uvm_aiodone_worker(struct work *wk, void *dummy)
352 {
353 struct buf *bp = (void *)wk;
354
355 KASSERT(&bp->b_work == wk);
356
357 /*
358 * process an i/o that's done.
359 */
360
361 (*bp->b_iodone)(bp);
362 }
363
364 void
365 uvm_pageout_start(int npages)
366 {
367
368 atomic_add_int(&uvmexp.paging, npages);
369 }
370
371 void
372 uvm_pageout_done(int npages)
373 {
374
375 KASSERT(uvmexp.paging >= npages);
376 atomic_add_int(&uvmexp.paging, -npages);
377
378 /*
379 * wake up either of pagedaemon or LWPs waiting for it.
380 */
381
382 mutex_spin_enter(&uvmpd_lock);
383 if (uvm_availmem() <= uvmexp.reserve_kernel) {
384 wakeup(&uvm.pagedaemon);
385 } else if (uvm_pagedaemon_waiters != 0) {
386 wakeup(&uvmexp.free);
387 uvm_pagedaemon_waiters = 0;
388 }
389 mutex_spin_exit(&uvmpd_lock);
390 }
391
392 /*
393 * uvmpd_trylockowner: trylock the page's owner.
394 *
395 * => called with page interlock held.
396 * => resolve orphaned O->A loaned page.
397 * => return the locked mutex on success. otherwise, return NULL.
398 */
399
400 kmutex_t *
401 uvmpd_trylockowner(struct vm_page *pg)
402 {
403 struct uvm_object *uobj = pg->uobject;
404 struct vm_anon *anon = pg->uanon;
405 int tries, count;
406 bool running;
407 kmutex_t *slock;
408
409 KASSERT(mutex_owned(&pg->interlock));
410
411 if (uobj != NULL) {
412 slock = uobj->vmobjlock;
413 KASSERTMSG(slock != NULL, "pg %p uobj %p, NULL lock", pg, uobj);
414 } else if (anon != NULL) {
415 slock = anon->an_lock;
416 KASSERTMSG(slock != NULL, "pg %p anon %p, NULL lock", pg, anon);
417 } else {
418 /* Page may be in state of flux - ignore. */
419 mutex_exit(&pg->interlock);
420 return NULL;
421 }
422
423 /*
424 * Now try to lock the objects. We'll try hard, but don't really
425 * plan on spending more than a millisecond or so here.
426 */
427 tries = (curlwp == uvm.pagedaemon_lwp ? UVMPD_NUMTRYLOCKOWNER : 1);
428 for (;;) {
429 if (mutex_tryenter(slock)) {
430 if (uobj == NULL) {
431 /*
432 * set PG_ANON if it isn't set already.
433 */
434 if ((pg->flags & PG_ANON) == 0) {
435 KASSERT(pg->loan_count > 0);
436 pg->loan_count--;
437 pg->flags |= PG_ANON;
438 /* anon now owns it */
439 }
440 }
441 mutex_exit(&pg->interlock);
442 return slock;
443 }
444 running = mutex_owner_running(slock);
445 if (!running || --tries <= 0) {
446 break;
447 }
448 count = SPINLOCK_BACKOFF_MAX;
449 SPINLOCK_BACKOFF(count);
450 }
451
452 /*
453 * We didn't get the lock; chances are the very next page on the
454 * queue also has the same lock, so if the lock owner is not running
455 * take a breather and allow them to make progress. There could be
456 * only 1 CPU in the system, or the pagedaemon could have preempted
457 * the owner in kernel, or any number of other things could be going
458 * on.
459 */
460 mutex_exit(&pg->interlock);
461 if (curlwp == uvm.pagedaemon_lwp) {
462 if (!running) {
463 (void)kpause("pdpglock", false, 1, NULL);
464 }
465 uvmexp.pdbusy++;
466 }
467 return NULL;
468 }
469
470 #if defined(VMSWAP)
471 struct swapcluster {
472 int swc_slot;
473 int swc_nallocated;
474 int swc_nused;
475 struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
476 };
477
478 static void
479 swapcluster_init(struct swapcluster *swc)
480 {
481
482 swc->swc_slot = 0;
483 swc->swc_nused = 0;
484 }
485
486 static int
487 swapcluster_allocslots(struct swapcluster *swc)
488 {
489 int slot;
490 int npages;
491
492 if (swc->swc_slot != 0) {
493 return 0;
494 }
495
496 /* Even with strange MAXPHYS, the shift
497 implicitly rounds down to a page. */
498 npages = MAXPHYS >> PAGE_SHIFT;
499 slot = uvm_swap_alloc(&npages, true);
500 if (slot == 0) {
501 return ENOMEM;
502 }
503 swc->swc_slot = slot;
504 swc->swc_nallocated = npages;
505 swc->swc_nused = 0;
506
507 return 0;
508 }
509
510 static int
511 swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
512 {
513 int slot;
514 struct uvm_object *uobj;
515
516 KASSERT(swc->swc_slot != 0);
517 KASSERT(swc->swc_nused < swc->swc_nallocated);
518 KASSERT((pg->flags & PG_SWAPBACKED) != 0);
519
520 slot = swc->swc_slot + swc->swc_nused;
521 uobj = pg->uobject;
522 if (uobj == NULL) {
523 KASSERT(mutex_owned(pg->uanon->an_lock));
524 pg->uanon->an_swslot = slot;
525 } else {
526 int result;
527
528 KASSERT(mutex_owned(uobj->vmobjlock));
529 result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
530 if (result == -1) {
531 return ENOMEM;
532 }
533 }
534 swc->swc_pages[swc->swc_nused] = pg;
535 swc->swc_nused++;
536
537 return 0;
538 }
539
540 static void
541 swapcluster_flush(struct swapcluster *swc, bool now)
542 {
543 int slot;
544 int nused;
545 int nallocated;
546 int error __diagused;
547
548 if (swc->swc_slot == 0) {
549 return;
550 }
551 KASSERT(swc->swc_nused <= swc->swc_nallocated);
552
553 slot = swc->swc_slot;
554 nused = swc->swc_nused;
555 nallocated = swc->swc_nallocated;
556
557 /*
558 * if this is the final pageout we could have a few
559 * unused swap blocks. if so, free them now.
560 */
561
562 if (nused < nallocated) {
563 if (!now) {
564 return;
565 }
566 uvm_swap_free(slot + nused, nallocated - nused);
567 }
568
569 /*
570 * now start the pageout.
571 */
572
573 if (nused > 0) {
574 uvmexp.pdpageouts++;
575 uvm_pageout_start(nused);
576 error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
577 KASSERT(error == 0 || error == ENOMEM);
578 }
579
580 /*
581 * zero swslot to indicate that we are
582 * no longer building a swap-backed cluster.
583 */
584
585 swc->swc_slot = 0;
586 swc->swc_nused = 0;
587 }
588
589 static int
590 swapcluster_nused(struct swapcluster *swc)
591 {
592
593 return swc->swc_nused;
594 }
595
596 /*
597 * uvmpd_dropswap: free any swap allocated to this page.
598 *
599 * => called with owner locked.
600 * => return true if a page had an associated slot.
601 */
602
603 bool
604 uvmpd_dropswap(struct vm_page *pg)
605 {
606 bool result = false;
607 struct vm_anon *anon = pg->uanon;
608
609 if ((pg->flags & PG_ANON) && anon->an_swslot) {
610 uvm_swap_free(anon->an_swslot, 1);
611 anon->an_swslot = 0;
612 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
613 result = true;
614 } else if (pg->flags & PG_AOBJ) {
615 int slot = uao_set_swslot(pg->uobject,
616 pg->offset >> PAGE_SHIFT, 0);
617 if (slot) {
618 uvm_swap_free(slot, 1);
619 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
620 result = true;
621 }
622 }
623
624 return result;
625 }
626
627 #endif /* defined(VMSWAP) */
628
629 /*
630 * uvmpd_scan_queue: scan an replace candidate list for pages
631 * to clean or free.
632 *
633 * => we work on meeting our free target by converting inactive pages
634 * into free pages.
635 * => we handle the building of swap-backed clusters
636 */
637
638 static void
639 uvmpd_scan_queue(void)
640 {
641 struct vm_page *p;
642 struct uvm_object *uobj;
643 struct vm_anon *anon;
644 #if defined(VMSWAP)
645 struct swapcluster swc;
646 #endif /* defined(VMSWAP) */
647 int dirtyreacts;
648 kmutex_t *slock;
649 UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
650
651 /*
652 * swslot is non-zero if we are building a swap cluster. we want
653 * to stay in the loop while we have a page to scan or we have
654 * a swap-cluster to build.
655 */
656
657 #if defined(VMSWAP)
658 swapcluster_init(&swc);
659 #endif /* defined(VMSWAP) */
660
661 dirtyreacts = 0;
662 uvmpdpol_scaninit();
663
664 while (/* CONSTCOND */ 1) {
665
666 /*
667 * see if we've met the free target.
668 */
669
670 if (uvm_availmem() + uvmexp.paging
671 #if defined(VMSWAP)
672 + swapcluster_nused(&swc)
673 #endif /* defined(VMSWAP) */
674 >= uvmexp.freetarg << 2 ||
675 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
676 UVMHIST_LOG(pdhist," met free target: "
677 "exit loop", 0, 0, 0, 0);
678 break;
679 }
680
681 /*
682 * first we have the pdpolicy select a victim page
683 * and attempt to lock the object that the page
684 * belongs to. if our attempt fails we skip on to
685 * the next page (no harm done). it is important to
686 * "try" locking the object as we are locking in the
687 * wrong order (pageq -> object) and we don't want to
688 * deadlock.
689 *
690 * the only time we expect to see an ownerless page
691 * (i.e. a page with no uobject and !PG_ANON) is if an
692 * anon has loaned a page from a uvm_object and the
693 * uvm_object has dropped the ownership. in that
694 * case, the anon can "take over" the loaned page
695 * and make it its own.
696 */
697
698 p = uvmpdpol_selectvictim(&slock);
699 if (p == NULL) {
700 break;
701 }
702 KASSERT(uvmpdpol_pageisqueued_p(p));
703 KASSERT(uvm_page_owner_locked_p(p));
704 KASSERT(p->wire_count == 0);
705
706 /*
707 * we are below target and have a new page to consider.
708 */
709
710 anon = p->uanon;
711 uobj = p->uobject;
712
713 if (p->flags & PG_BUSY) {
714 mutex_exit(slock);
715 uvmexp.pdbusy++;
716 continue;
717 }
718
719 /* does the page belong to an object? */
720 if (uobj != NULL) {
721 uvmexp.pdobscan++;
722 } else {
723 #if defined(VMSWAP)
724 KASSERT(anon != NULL);
725 uvmexp.pdanscan++;
726 #else /* defined(VMSWAP) */
727 panic("%s: anon", __func__);
728 #endif /* defined(VMSWAP) */
729 }
730
731
732 /*
733 * we now have the object locked.
734 * if the page is not swap-backed, call the object's
735 * pager to flush and free the page.
736 */
737
738 #if defined(READAHEAD_STATS)
739 if ((p->flags & PG_READAHEAD) != 0) {
740 p->flags &= ~PG_READAHEAD;
741 uvm_ra_miss.ev_count++;
742 }
743 #endif /* defined(READAHEAD_STATS) */
744
745 if ((p->flags & PG_SWAPBACKED) == 0) {
746 KASSERT(uobj != NULL);
747 (void) (uobj->pgops->pgo_put)(uobj, p->offset,
748 p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
749 continue;
750 }
751
752 /*
753 * the page is swap-backed. remove all the permissions
754 * from the page so we can sync the modified info
755 * without any race conditions. if the page is clean
756 * we can free it now and continue.
757 */
758
759 pmap_page_protect(p, VM_PROT_NONE);
760 if (uvm_pagegetdirty(p) == UVM_PAGE_STATUS_UNKNOWN) {
761 if (pmap_clear_modify(p)) {
762 uvm_pagemarkdirty(p, UVM_PAGE_STATUS_DIRTY);
763 } else {
764 uvm_pagemarkdirty(p, UVM_PAGE_STATUS_CLEAN);
765 }
766 }
767 if (uvm_pagegetdirty(p) != UVM_PAGE_STATUS_DIRTY) {
768 int slot;
769 int pageidx;
770
771 pageidx = p->offset >> PAGE_SHIFT;
772 uvm_pagefree(p);
773 atomic_inc_uint(&uvmexp.pdfreed);
774
775 /*
776 * for anons, we need to remove the page
777 * from the anon ourselves. for aobjs,
778 * pagefree did that for us.
779 */
780
781 if (anon) {
782 KASSERT(anon->an_swslot != 0);
783 anon->an_page = NULL;
784 slot = anon->an_swslot;
785 } else {
786 slot = uao_find_swslot(uobj, pageidx);
787 }
788 if (slot > 0) {
789 /* this page is now only in swap. */
790 KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
791 atomic_inc_uint(&uvmexp.swpgonly);
792 }
793 mutex_exit(slock);
794 continue;
795 }
796
797 #if defined(VMSWAP)
798 /*
799 * this page is dirty, skip it if we'll have met our
800 * free target when all the current pageouts complete.
801 */
802
803 if (uvm_availmem() + uvmexp.paging > uvmexp.freetarg << 2) {
804 mutex_exit(slock);
805 continue;
806 }
807
808 /*
809 * free any swap space allocated to the page since
810 * we'll have to write it again with its new data.
811 */
812
813 uvmpd_dropswap(p);
814
815 /*
816 * start new swap pageout cluster (if necessary).
817 *
818 * if swap is full reactivate this page so that
819 * we eventually cycle all pages through the
820 * inactive queue.
821 */
822
823 if (swapcluster_allocslots(&swc)) {
824 dirtyreacts++;
825 uvm_pagelock(p);
826 uvm_pageactivate(p);
827 uvm_pageunlock(p);
828 mutex_exit(slock);
829 continue;
830 }
831
832 /*
833 * at this point, we're definitely going reuse this
834 * page. mark the page busy and delayed-free.
835 * we should remove the page from the page queues
836 * so we don't ever look at it again.
837 * adjust counters and such.
838 */
839
840 p->flags |= PG_BUSY;
841 UVM_PAGE_OWN(p, "scan_queue");
842 p->flags |= PG_PAGEOUT;
843 uvmexp.pgswapout++;
844
845 uvm_pagelock(p);
846 uvm_pagedequeue(p);
847 uvm_pageunlock(p);
848
849 /*
850 * add the new page to the cluster.
851 */
852
853 if (swapcluster_add(&swc, p)) {
854 p->flags &= ~(PG_BUSY|PG_PAGEOUT);
855 UVM_PAGE_OWN(p, NULL);
856 dirtyreacts++;
857 uvm_pagelock(p);
858 uvm_pageactivate(p);
859 uvm_pageunlock(p);
860 mutex_exit(slock);
861 continue;
862 }
863 mutex_exit(slock);
864
865 swapcluster_flush(&swc, false);
866
867 /*
868 * the pageout is in progress. bump counters and set up
869 * for the next loop.
870 */
871
872 atomic_inc_uint(&uvmexp.pdpending);
873
874 #else /* defined(VMSWAP) */
875 uvm_pagelock(p);
876 uvm_pageactivate(p);
877 uvm_pageunlock(p);
878 mutex_exit(slock);
879 #endif /* defined(VMSWAP) */
880 }
881
882 uvmpdpol_scanfini();
883
884 #if defined(VMSWAP)
885 swapcluster_flush(&swc, true);
886 #endif /* defined(VMSWAP) */
887 }
888
889 /*
890 * uvmpd_scan: scan the page queues and attempt to meet our targets.
891 */
892
893 static void
894 uvmpd_scan(void)
895 {
896 int swap_shortage, pages_freed, fpages;
897 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
898
899 uvmexp.pdrevs++;
900
901 /*
902 * work on meeting our targets. first we work on our free target
903 * by converting inactive pages into free pages. then we work on
904 * meeting our inactive target by converting active pages to
905 * inactive ones.
906 */
907
908 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
909
910 pages_freed = uvmexp.pdfreed;
911 uvmpd_scan_queue();
912 pages_freed = uvmexp.pdfreed - pages_freed;
913
914 /*
915 * detect if we're not going to be able to page anything out
916 * until we free some swap resources from active pages.
917 */
918
919 swap_shortage = 0;
920 fpages = uvm_availmem();
921 if (fpages < uvmexp.freetarg &&
922 uvmexp.swpginuse >= uvmexp.swpgavail &&
923 !uvm_swapisfull() &&
924 pages_freed == 0) {
925 swap_shortage = uvmexp.freetarg - fpages;
926 }
927
928 uvmpdpol_balancequeue(swap_shortage);
929
930 /*
931 * if still below the minimum target, try unloading kernel
932 * modules.
933 */
934
935 if (uvm_availmem() < uvmexp.freemin) {
936 module_thread_kick();
937 }
938 }
939
940 /*
941 * uvm_reclaimable: decide whether to wait for pagedaemon.
942 *
943 * => return true if it seems to be worth to do uvm_wait.
944 *
945 * XXX should be tunable.
946 * XXX should consider pools, etc?
947 */
948
949 bool
950 uvm_reclaimable(void)
951 {
952 int filepages;
953 int active, inactive;
954
955 /*
956 * if swap is not full, no problem.
957 */
958
959 if (!uvm_swapisfull()) {
960 return true;
961 }
962
963 /*
964 * file-backed pages can be reclaimed even when swap is full.
965 * if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
966 *
967 * XXX assume the worst case, ie. all wired pages are file-backed.
968 *
969 * XXX should consider about other reclaimable memory.
970 * XXX ie. pools, traditional buffer cache.
971 */
972
973 cpu_count_sync_all();
974 filepages = (int)cpu_count_get(CPU_COUNT_FILEPAGES) +
975 (int)cpu_count_get(CPU_COUNT_EXECPAGES) - uvmexp.wired;
976 uvm_estimatepageable(&active, &inactive);
977 if (filepages >= MIN((active + inactive) >> 4,
978 5 * 1024 * 1024 >> PAGE_SHIFT)) {
979 return true;
980 }
981
982 /*
983 * kill the process, fail allocation, etc..
984 */
985
986 return false;
987 }
988
989 void
990 uvm_estimatepageable(int *active, int *inactive)
991 {
992
993 uvmpdpol_estimatepageable(active, inactive);
994 }
995
996
997 /*
998 * Use a separate thread for draining pools.
999 * This work can't done from the main pagedaemon thread because
1000 * some pool allocators need to take vm_map locks.
1001 */
1002
1003 static void
1004 uvmpd_pool_drain_thread(void *arg)
1005 {
1006 struct pool *firstpool, *curpool;
1007 int bufcnt, lastslept;
1008 bool cycled;
1009
1010 firstpool = NULL;
1011 cycled = true;
1012 for (;;) {
1013 /*
1014 * sleep until awoken by the pagedaemon.
1015 */
1016 mutex_enter(&uvmpd_lock);
1017 if (!uvmpd_pool_drain_run) {
1018 lastslept = hardclock_ticks;
1019 cv_wait(&uvmpd_pool_drain_cv, &uvmpd_lock);
1020 if (hardclock_ticks != lastslept) {
1021 cycled = false;
1022 firstpool = NULL;
1023 }
1024 }
1025 uvmpd_pool_drain_run = false;
1026 mutex_exit(&uvmpd_lock);
1027
1028 /*
1029 * rate limit draining, otherwise in desperate circumstances
1030 * this can totally saturate the system with xcall activity.
1031 */
1032 if (cycled) {
1033 kpause("uvmpdlmt", false, 1, NULL);
1034 cycled = false;
1035 firstpool = NULL;
1036 }
1037
1038 /*
1039 * drain and temporarily disable the freelist cache.
1040 */
1041 uvm_pgflcache_pause();
1042
1043 /*
1044 * kill unused metadata buffers.
1045 */
1046 bufcnt = uvmexp.freetarg - uvm_availmem();
1047 if (bufcnt < 0)
1048 bufcnt = 0;
1049
1050 mutex_enter(&bufcache_lock);
1051 buf_drain(bufcnt << PAGE_SHIFT);
1052 mutex_exit(&bufcache_lock);
1053
1054 /*
1055 * drain a pool, and then re-enable the freelist cache.
1056 */
1057 (void)pool_drain(&curpool);
1058 KASSERT(curpool != NULL);
1059 if (firstpool == NULL) {
1060 firstpool = curpool;
1061 } else if (firstpool == curpool) {
1062 cycled = true;
1063 }
1064 uvm_pgflcache_resume();
1065 }
1066 /*NOTREACHED*/
1067 }
1068
1069 static void
1070 uvmpd_pool_drain_wakeup(void)
1071 {
1072
1073 mutex_enter(&uvmpd_lock);
1074 uvmpd_pool_drain_run = true;
1075 cv_signal(&uvmpd_pool_drain_cv);
1076 mutex_exit(&uvmpd_lock);
1077 }
1078