uvm_pdaemon.c revision 1.126 1 /* $NetBSD: uvm_pdaemon.c,v 1.126 2020/04/13 15:54:45 maxv Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
37 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
38 *
39 *
40 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
41 * All rights reserved.
42 *
43 * Permission to use, copy, modify and distribute this software and
44 * its documentation is hereby granted, provided that both the copyright
45 * notice and this permission notice appear in all copies of the
46 * software, derivative works or modified versions, and any portions
47 * thereof, and that both notices appear in supporting documentation.
48 *
49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
52 *
53 * Carnegie Mellon requests users of this software to return to
54 *
55 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
56 * School of Computer Science
57 * Carnegie Mellon University
58 * Pittsburgh PA 15213-3890
59 *
60 * any improvements or extensions that they make and grant Carnegie the
61 * rights to redistribute these changes.
62 */
63
64 /*
65 * uvm_pdaemon.c: the page daemon
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.126 2020/04/13 15:54:45 maxv Exp $");
70
71 #include "opt_uvmhist.h"
72 #include "opt_readahead.h"
73
74 #define __RWLOCK_PRIVATE
75
76 #include <sys/param.h>
77 #include <sys/proc.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/pool.h>
81 #include <sys/buf.h>
82 #include <sys/module.h>
83 #include <sys/atomic.h>
84 #include <sys/kthread.h>
85
86 #include <uvm/uvm.h>
87 #include <uvm/uvm_pdpolicy.h>
88 #include <uvm/uvm_pgflcache.h>
89
90 #ifdef UVMHIST
91 UVMHIST_DEFINE(pdhist);
92 #endif
93
94 /*
95 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
96 * in a pass thru the inactive list when swap is full. the value should be
97 * "small"... if it's too large we'll cycle the active pages thru the inactive
98 * queue too quickly to for them to be referenced and avoid being freed.
99 */
100
101 #define UVMPD_NUMDIRTYREACTS 16
102
103 #define UVMPD_NUMTRYLOCKOWNER 128
104
105 /*
106 * local prototypes
107 */
108
109 static void uvmpd_scan(void);
110 static void uvmpd_scan_queue(void);
111 static void uvmpd_tune(void);
112 static void uvmpd_pool_drain_thread(void *);
113 static void uvmpd_pool_drain_wakeup(void);
114
115 static unsigned int uvm_pagedaemon_waiters;
116
117 /* State for the pool drainer thread */
118 static kmutex_t uvmpd_lock __cacheline_aligned;
119 static kcondvar_t uvmpd_pool_drain_cv;
120 static bool uvmpd_pool_drain_run = false;
121
122 /*
123 * XXX hack to avoid hangs when large processes fork.
124 */
125 u_int uvm_extrapages;
126
127 /*
128 * uvm_wait: wait (sleep) for the page daemon to free some pages
129 *
130 * => should be called with all locks released
131 * => should _not_ be called by the page daemon (to avoid deadlock)
132 */
133
134 void
135 uvm_wait(const char *wmsg)
136 {
137 int timo = 0;
138
139 if (uvm.pagedaemon_lwp == NULL)
140 panic("out of memory before the pagedaemon thread exists");
141
142 mutex_spin_enter(&uvmpd_lock);
143
144 /*
145 * check for page daemon going to sleep (waiting for itself)
146 */
147
148 if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
149 /*
150 * now we have a problem: the pagedaemon wants to go to
151 * sleep until it frees more memory. but how can it
152 * free more memory if it is asleep? that is a deadlock.
153 * we have two options:
154 * [1] panic now
155 * [2] put a timeout on the sleep, thus causing the
156 * pagedaemon to only pause (rather than sleep forever)
157 *
158 * note that option [2] will only help us if we get lucky
159 * and some other process on the system breaks the deadlock
160 * by exiting or freeing memory (thus allowing the pagedaemon
161 * to continue). for now we panic if DEBUG is defined,
162 * otherwise we hope for the best with option [2] (better
163 * yet, this should never happen in the first place!).
164 */
165
166 printf("pagedaemon: deadlock detected!\n");
167 timo = hz >> 3; /* set timeout */
168 #if defined(DEBUG)
169 /* DEBUG: panic so we can debug it */
170 panic("pagedaemon deadlock");
171 #endif
172 }
173
174 uvm_pagedaemon_waiters++;
175 wakeup(&uvm.pagedaemon); /* wake the daemon! */
176 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvmpd_lock, false, wmsg, timo);
177 }
178
179 /*
180 * uvm_kick_pdaemon: perform checks to determine if we need to
181 * give the pagedaemon a nudge, and do so if necessary.
182 */
183
184 void
185 uvm_kick_pdaemon(void)
186 {
187 int fpages = uvm_availmem();
188
189 if (fpages + uvmexp.paging < uvmexp.freemin ||
190 (fpages + uvmexp.paging < uvmexp.freetarg &&
191 uvmpdpol_needsscan_p()) ||
192 uvm_km_va_starved_p()) {
193 mutex_spin_enter(&uvmpd_lock);
194 wakeup(&uvm.pagedaemon);
195 mutex_spin_exit(&uvmpd_lock);
196 }
197 }
198
199 /*
200 * uvmpd_tune: tune paging parameters
201 *
202 * => called when ever memory is added (or removed?) to the system
203 */
204
205 static void
206 uvmpd_tune(void)
207 {
208 int val;
209
210 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
211
212 /*
213 * try to keep 0.5% of available RAM free, but limit to between
214 * 128k and 1024k per-CPU. XXX: what are these values good for?
215 */
216 val = uvmexp.npages / 200;
217 val = MAX(val, (128*1024) >> PAGE_SHIFT);
218 val = MIN(val, (1024*1024) >> PAGE_SHIFT);
219 val *= ncpu;
220
221 /* Make sure there's always a user page free. */
222 if (val < uvmexp.reserve_kernel + 1)
223 val = uvmexp.reserve_kernel + 1;
224 uvmexp.freemin = val;
225
226 /* Calculate free target. */
227 val = (uvmexp.freemin * 4) / 3;
228 if (val <= uvmexp.freemin)
229 val = uvmexp.freemin + 1;
230 uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0);
231
232 uvmexp.wiredmax = uvmexp.npages / 3;
233 UVMHIST_LOG(pdhist, "<- done, freemin=%jd, freetarg=%jd, wiredmax=%jd",
234 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
235 }
236
237 /*
238 * uvm_pageout: the main loop for the pagedaemon
239 */
240
241 void
242 uvm_pageout(void *arg)
243 {
244 int npages = 0;
245 int extrapages = 0;
246 int fpages;
247
248 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
249
250 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
251
252 mutex_init(&uvmpd_lock, MUTEX_DEFAULT, IPL_VM);
253 cv_init(&uvmpd_pool_drain_cv, "pooldrain");
254
255 /* Create the pool drainer kernel thread. */
256 if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL,
257 uvmpd_pool_drain_thread, NULL, NULL, "pooldrain"))
258 panic("fork pooldrain");
259
260 /*
261 * ensure correct priority and set paging parameters...
262 */
263
264 uvm.pagedaemon_lwp = curlwp;
265 npages = uvmexp.npages;
266 uvmpd_tune();
267
268 /*
269 * main loop
270 */
271
272 for (;;) {
273 bool needsscan, needsfree, kmem_va_starved;
274
275 kmem_va_starved = uvm_km_va_starved_p();
276
277 mutex_spin_enter(&uvmpd_lock);
278 if ((uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) &&
279 !kmem_va_starved) {
280 UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
281 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
282 &uvmpd_lock, false, "pgdaemon", 0);
283 uvmexp.pdwoke++;
284 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
285 } else {
286 mutex_spin_exit(&uvmpd_lock);
287 }
288
289 /*
290 * now recompute inactive count
291 */
292
293 if (npages != uvmexp.npages || extrapages != uvm_extrapages) {
294 npages = uvmexp.npages;
295 extrapages = uvm_extrapages;
296 uvmpd_tune();
297 }
298
299 uvmpdpol_tune();
300
301 /*
302 * Estimate a hint. Note that bufmem are returned to
303 * system only when entire pool page is empty.
304 */
305 fpages = uvm_availmem();
306 UVMHIST_LOG(pdhist," free/ftarg=%jd/%jd",
307 fpages, uvmexp.freetarg, 0,0);
308
309 needsfree = fpages + uvmexp.paging < uvmexp.freetarg;
310 needsscan = needsfree || uvmpdpol_needsscan_p();
311
312 /*
313 * scan if needed
314 */
315 if (needsscan) {
316 uvmpd_scan();
317 }
318
319 /*
320 * if there's any free memory to be had,
321 * wake up any waiters.
322 */
323 if (uvm_availmem() > uvmexp.reserve_kernel ||
324 uvmexp.paging == 0) {
325 mutex_spin_enter(&uvmpd_lock);
326 wakeup(&uvmexp.free);
327 uvm_pagedaemon_waiters = 0;
328 mutex_spin_exit(&uvmpd_lock);
329 }
330
331 /*
332 * scan done. if we don't need free memory, we're done.
333 */
334
335 if (!needsfree && !kmem_va_starved)
336 continue;
337
338 /*
339 * kick the pool drainer thread.
340 */
341
342 uvmpd_pool_drain_wakeup();
343 }
344 /*NOTREACHED*/
345 }
346
347 void
348 uvm_pageout_start(int npages)
349 {
350
351 atomic_add_int(&uvmexp.paging, npages);
352 }
353
354 void
355 uvm_pageout_done(int npages)
356 {
357
358 KASSERT(uvmexp.paging >= npages);
359 atomic_add_int(&uvmexp.paging, -npages);
360
361 /*
362 * wake up either of pagedaemon or LWPs waiting for it.
363 */
364
365 mutex_spin_enter(&uvmpd_lock);
366 if (uvm_availmem() <= uvmexp.reserve_kernel) {
367 wakeup(&uvm.pagedaemon);
368 } else if (uvm_pagedaemon_waiters != 0) {
369 wakeup(&uvmexp.free);
370 uvm_pagedaemon_waiters = 0;
371 }
372 mutex_spin_exit(&uvmpd_lock);
373 }
374
375 /*
376 * uvmpd_trylockowner: trylock the page's owner.
377 *
378 * => called with page interlock held.
379 * => resolve orphaned O->A loaned page.
380 * => return the locked mutex on success. otherwise, return NULL.
381 */
382
383 krwlock_t *
384 uvmpd_trylockowner(struct vm_page *pg)
385 {
386 struct uvm_object *uobj = pg->uobject;
387 struct vm_anon *anon = pg->uanon;
388 int tries, count;
389 bool running;
390 krwlock_t *slock;
391
392 KASSERT(mutex_owned(&pg->interlock));
393
394 if (uobj != NULL) {
395 slock = uobj->vmobjlock;
396 KASSERTMSG(slock != NULL, "pg %p uobj %p, NULL lock", pg, uobj);
397 } else if (anon != NULL) {
398 slock = anon->an_lock;
399 KASSERTMSG(slock != NULL, "pg %p anon %p, NULL lock", pg, anon);
400 } else {
401 /* Page may be in state of flux - ignore. */
402 mutex_exit(&pg->interlock);
403 return NULL;
404 }
405
406 /*
407 * Now try to lock the objects. We'll try hard, but don't really
408 * plan on spending more than a millisecond or so here.
409 */
410 tries = (curlwp == uvm.pagedaemon_lwp ? UVMPD_NUMTRYLOCKOWNER : 1);
411 for (;;) {
412 if (rw_tryenter(slock, RW_WRITER)) {
413 if (uobj == NULL) {
414 /*
415 * set PG_ANON if it isn't set already.
416 */
417 if ((pg->flags & PG_ANON) == 0) {
418 KASSERT(pg->loan_count > 0);
419 pg->loan_count--;
420 pg->flags |= PG_ANON;
421 /* anon now owns it */
422 }
423 }
424 mutex_exit(&pg->interlock);
425 return slock;
426 }
427 running = rw_owner_running(slock);
428 if (!running || --tries <= 0) {
429 break;
430 }
431 count = SPINLOCK_BACKOFF_MAX;
432 SPINLOCK_BACKOFF(count);
433 }
434
435 /*
436 * We didn't get the lock; chances are the very next page on the
437 * queue also has the same lock, so if the lock owner is not running
438 * take a breather and allow them to make progress. There could be
439 * only 1 CPU in the system, or the pagedaemon could have preempted
440 * the owner in kernel, or any number of other things could be going
441 * on.
442 */
443 mutex_exit(&pg->interlock);
444 if (curlwp == uvm.pagedaemon_lwp) {
445 if (!running) {
446 (void)kpause("pdpglock", false, 1, NULL);
447 }
448 uvmexp.pdbusy++;
449 }
450 return NULL;
451 }
452
453 #if defined(VMSWAP)
454 struct swapcluster {
455 int swc_slot;
456 int swc_nallocated;
457 int swc_nused;
458 struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
459 };
460
461 static void
462 swapcluster_init(struct swapcluster *swc)
463 {
464
465 swc->swc_slot = 0;
466 swc->swc_nused = 0;
467 }
468
469 static int
470 swapcluster_allocslots(struct swapcluster *swc)
471 {
472 int slot;
473 int npages;
474
475 if (swc->swc_slot != 0) {
476 return 0;
477 }
478
479 /* Even with strange MAXPHYS, the shift
480 implicitly rounds down to a page. */
481 npages = MAXPHYS >> PAGE_SHIFT;
482 slot = uvm_swap_alloc(&npages, true);
483 if (slot == 0) {
484 return ENOMEM;
485 }
486 swc->swc_slot = slot;
487 swc->swc_nallocated = npages;
488 swc->swc_nused = 0;
489
490 return 0;
491 }
492
493 static int
494 swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
495 {
496 int slot;
497 struct uvm_object *uobj;
498
499 KASSERT(swc->swc_slot != 0);
500 KASSERT(swc->swc_nused < swc->swc_nallocated);
501 KASSERT((pg->flags & PG_SWAPBACKED) != 0);
502
503 slot = swc->swc_slot + swc->swc_nused;
504 uobj = pg->uobject;
505 if (uobj == NULL) {
506 KASSERT(rw_write_held(pg->uanon->an_lock));
507 pg->uanon->an_swslot = slot;
508 } else {
509 int result;
510
511 KASSERT(rw_write_held(uobj->vmobjlock));
512 result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
513 if (result == -1) {
514 return ENOMEM;
515 }
516 }
517 swc->swc_pages[swc->swc_nused] = pg;
518 swc->swc_nused++;
519
520 return 0;
521 }
522
523 static void
524 swapcluster_flush(struct swapcluster *swc, bool now)
525 {
526 int slot;
527 int nused;
528 int nallocated;
529 int error __diagused;
530
531 if (swc->swc_slot == 0) {
532 return;
533 }
534 KASSERT(swc->swc_nused <= swc->swc_nallocated);
535
536 slot = swc->swc_slot;
537 nused = swc->swc_nused;
538 nallocated = swc->swc_nallocated;
539
540 /*
541 * if this is the final pageout we could have a few
542 * unused swap blocks. if so, free them now.
543 */
544
545 if (nused < nallocated) {
546 if (!now) {
547 return;
548 }
549 uvm_swap_free(slot + nused, nallocated - nused);
550 }
551
552 /*
553 * now start the pageout.
554 */
555
556 if (nused > 0) {
557 uvmexp.pdpageouts++;
558 uvm_pageout_start(nused);
559 error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
560 KASSERT(error == 0 || error == ENOMEM);
561 }
562
563 /*
564 * zero swslot to indicate that we are
565 * no longer building a swap-backed cluster.
566 */
567
568 swc->swc_slot = 0;
569 swc->swc_nused = 0;
570 }
571
572 static int
573 swapcluster_nused(struct swapcluster *swc)
574 {
575
576 return swc->swc_nused;
577 }
578
579 /*
580 * uvmpd_dropswap: free any swap allocated to this page.
581 *
582 * => called with owner locked.
583 * => return true if a page had an associated slot.
584 */
585
586 bool
587 uvmpd_dropswap(struct vm_page *pg)
588 {
589 bool result = false;
590 struct vm_anon *anon = pg->uanon;
591
592 if ((pg->flags & PG_ANON) && anon->an_swslot) {
593 uvm_swap_free(anon->an_swslot, 1);
594 anon->an_swslot = 0;
595 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
596 result = true;
597 } else if (pg->flags & PG_AOBJ) {
598 int slot = uao_set_swslot(pg->uobject,
599 pg->offset >> PAGE_SHIFT, 0);
600 if (slot) {
601 uvm_swap_free(slot, 1);
602 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
603 result = true;
604 }
605 }
606
607 return result;
608 }
609
610 #endif /* defined(VMSWAP) */
611
612 /*
613 * uvmpd_scan_queue: scan an replace candidate list for pages
614 * to clean or free.
615 *
616 * => we work on meeting our free target by converting inactive pages
617 * into free pages.
618 * => we handle the building of swap-backed clusters
619 */
620
621 static void
622 uvmpd_scan_queue(void)
623 {
624 struct vm_page *p;
625 struct uvm_object *uobj;
626 struct vm_anon *anon;
627 #if defined(VMSWAP)
628 struct swapcluster swc;
629 #endif /* defined(VMSWAP) */
630 int dirtyreacts;
631 krwlock_t *slock;
632 UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
633
634 /*
635 * swslot is non-zero if we are building a swap cluster. we want
636 * to stay in the loop while we have a page to scan or we have
637 * a swap-cluster to build.
638 */
639
640 #if defined(VMSWAP)
641 swapcluster_init(&swc);
642 #endif /* defined(VMSWAP) */
643
644 dirtyreacts = 0;
645 uvmpdpol_scaninit();
646
647 while (/* CONSTCOND */ 1) {
648
649 /*
650 * see if we've met the free target.
651 */
652
653 if (uvm_availmem() + uvmexp.paging
654 #if defined(VMSWAP)
655 + swapcluster_nused(&swc)
656 #endif /* defined(VMSWAP) */
657 >= uvmexp.freetarg << 2 ||
658 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
659 UVMHIST_LOG(pdhist," met free target: "
660 "exit loop", 0, 0, 0, 0);
661 break;
662 }
663
664 /*
665 * first we have the pdpolicy select a victim page
666 * and attempt to lock the object that the page
667 * belongs to. if our attempt fails we skip on to
668 * the next page (no harm done). it is important to
669 * "try" locking the object as we are locking in the
670 * wrong order (pageq -> object) and we don't want to
671 * deadlock.
672 *
673 * the only time we expect to see an ownerless page
674 * (i.e. a page with no uobject and !PG_ANON) is if an
675 * anon has loaned a page from a uvm_object and the
676 * uvm_object has dropped the ownership. in that
677 * case, the anon can "take over" the loaned page
678 * and make it its own.
679 */
680
681 p = uvmpdpol_selectvictim(&slock);
682 if (p == NULL) {
683 break;
684 }
685 KASSERT(uvmpdpol_pageisqueued_p(p));
686 KASSERT(uvm_page_owner_locked_p(p, true));
687 KASSERT(p->wire_count == 0);
688
689 /*
690 * we are below target and have a new page to consider.
691 */
692
693 anon = p->uanon;
694 uobj = p->uobject;
695
696 if (p->flags & PG_BUSY) {
697 rw_exit(slock);
698 uvmexp.pdbusy++;
699 continue;
700 }
701
702 /* does the page belong to an object? */
703 if (uobj != NULL) {
704 uvmexp.pdobscan++;
705 } else {
706 #if defined(VMSWAP)
707 KASSERT(anon != NULL);
708 uvmexp.pdanscan++;
709 #else /* defined(VMSWAP) */
710 panic("%s: anon", __func__);
711 #endif /* defined(VMSWAP) */
712 }
713
714
715 /*
716 * we now have the object locked.
717 * if the page is not swap-backed, call the object's
718 * pager to flush and free the page.
719 */
720
721 #if defined(READAHEAD_STATS)
722 if ((p->flags & PG_READAHEAD) != 0) {
723 p->flags &= ~PG_READAHEAD;
724 uvm_ra_miss.ev_count++;
725 }
726 #endif /* defined(READAHEAD_STATS) */
727
728 if ((p->flags & PG_SWAPBACKED) == 0) {
729 KASSERT(uobj != NULL);
730 (void) (uobj->pgops->pgo_put)(uobj, p->offset,
731 p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
732 continue;
733 }
734
735 /*
736 * the page is swap-backed. remove all the permissions
737 * from the page so we can sync the modified info
738 * without any race conditions. if the page is clean
739 * we can free it now and continue.
740 */
741
742 pmap_page_protect(p, VM_PROT_NONE);
743 if (uvm_pagegetdirty(p) == UVM_PAGE_STATUS_UNKNOWN) {
744 if (pmap_clear_modify(p)) {
745 uvm_pagemarkdirty(p, UVM_PAGE_STATUS_DIRTY);
746 } else {
747 uvm_pagemarkdirty(p, UVM_PAGE_STATUS_CLEAN);
748 }
749 }
750 if (uvm_pagegetdirty(p) != UVM_PAGE_STATUS_DIRTY) {
751 int slot;
752 int pageidx;
753
754 pageidx = p->offset >> PAGE_SHIFT;
755 uvm_pagefree(p);
756 atomic_inc_uint(&uvmexp.pdfreed);
757
758 /*
759 * for anons, we need to remove the page
760 * from the anon ourselves. for aobjs,
761 * pagefree did that for us.
762 */
763
764 if (anon) {
765 KASSERT(anon->an_swslot != 0);
766 anon->an_page = NULL;
767 slot = anon->an_swslot;
768 } else {
769 slot = uao_find_swslot(uobj, pageidx);
770 }
771 if (slot > 0) {
772 /* this page is now only in swap. */
773 KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
774 atomic_inc_uint(&uvmexp.swpgonly);
775 }
776 rw_exit(slock);
777 continue;
778 }
779
780 #if defined(VMSWAP)
781 /*
782 * this page is dirty, skip it if we'll have met our
783 * free target when all the current pageouts complete.
784 */
785
786 if (uvm_availmem() + uvmexp.paging > uvmexp.freetarg << 2) {
787 rw_exit(slock);
788 continue;
789 }
790
791 /*
792 * free any swap space allocated to the page since
793 * we'll have to write it again with its new data.
794 */
795
796 uvmpd_dropswap(p);
797
798 /*
799 * start new swap pageout cluster (if necessary).
800 *
801 * if swap is full reactivate this page so that
802 * we eventually cycle all pages through the
803 * inactive queue.
804 */
805
806 if (swapcluster_allocslots(&swc)) {
807 dirtyreacts++;
808 uvm_pagelock(p);
809 uvm_pageactivate(p);
810 uvm_pageunlock(p);
811 rw_exit(slock);
812 continue;
813 }
814
815 /*
816 * at this point, we're definitely going reuse this
817 * page. mark the page busy and delayed-free.
818 * we should remove the page from the page queues
819 * so we don't ever look at it again.
820 * adjust counters and such.
821 */
822
823 p->flags |= PG_BUSY;
824 UVM_PAGE_OWN(p, "scan_queue");
825 p->flags |= PG_PAGEOUT;
826 uvmexp.pgswapout++;
827
828 uvm_pagelock(p);
829 uvm_pagedequeue(p);
830 uvm_pageunlock(p);
831
832 /*
833 * add the new page to the cluster.
834 */
835
836 if (swapcluster_add(&swc, p)) {
837 p->flags &= ~(PG_BUSY|PG_PAGEOUT);
838 UVM_PAGE_OWN(p, NULL);
839 dirtyreacts++;
840 uvm_pagelock(p);
841 uvm_pageactivate(p);
842 uvm_pageunlock(p);
843 rw_exit(slock);
844 continue;
845 }
846 rw_exit(slock);
847
848 swapcluster_flush(&swc, false);
849
850 /*
851 * the pageout is in progress. bump counters and set up
852 * for the next loop.
853 */
854
855 atomic_inc_uint(&uvmexp.pdpending);
856
857 #else /* defined(VMSWAP) */
858 uvm_pagelock(p);
859 uvm_pageactivate(p);
860 uvm_pageunlock(p);
861 rw_exit(slock);
862 #endif /* defined(VMSWAP) */
863 }
864
865 uvmpdpol_scanfini();
866
867 #if defined(VMSWAP)
868 swapcluster_flush(&swc, true);
869 #endif /* defined(VMSWAP) */
870 }
871
872 /*
873 * uvmpd_scan: scan the page queues and attempt to meet our targets.
874 */
875
876 static void
877 uvmpd_scan(void)
878 {
879 int swap_shortage, pages_freed, fpages;
880 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
881
882 uvmexp.pdrevs++;
883
884 /*
885 * work on meeting our targets. first we work on our free target
886 * by converting inactive pages into free pages. then we work on
887 * meeting our inactive target by converting active pages to
888 * inactive ones.
889 */
890
891 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
892
893 pages_freed = uvmexp.pdfreed;
894 uvmpd_scan_queue();
895 pages_freed = uvmexp.pdfreed - pages_freed;
896
897 /*
898 * detect if we're not going to be able to page anything out
899 * until we free some swap resources from active pages.
900 */
901
902 swap_shortage = 0;
903 fpages = uvm_availmem();
904 if (fpages < uvmexp.freetarg &&
905 uvmexp.swpginuse >= uvmexp.swpgavail &&
906 !uvm_swapisfull() &&
907 pages_freed == 0) {
908 swap_shortage = uvmexp.freetarg - fpages;
909 }
910
911 uvmpdpol_balancequeue(swap_shortage);
912
913 /*
914 * if still below the minimum target, try unloading kernel
915 * modules.
916 */
917
918 if (uvm_availmem() < uvmexp.freemin) {
919 module_thread_kick();
920 }
921 }
922
923 /*
924 * uvm_reclaimable: decide whether to wait for pagedaemon.
925 *
926 * => return true if it seems to be worth to do uvm_wait.
927 *
928 * XXX should be tunable.
929 * XXX should consider pools, etc?
930 */
931
932 bool
933 uvm_reclaimable(void)
934 {
935 int filepages;
936 int active, inactive;
937
938 /*
939 * if swap is not full, no problem.
940 */
941
942 if (!uvm_swapisfull()) {
943 return true;
944 }
945
946 /*
947 * file-backed pages can be reclaimed even when swap is full.
948 * if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
949 *
950 * XXX assume the worst case, ie. all wired pages are file-backed.
951 *
952 * XXX should consider about other reclaimable memory.
953 * XXX ie. pools, traditional buffer cache.
954 */
955
956 cpu_count_sync_all();
957 filepages = (int)cpu_count_get(CPU_COUNT_FILEPAGES) +
958 (int)cpu_count_get(CPU_COUNT_EXECPAGES) - uvmexp.wired;
959 uvm_estimatepageable(&active, &inactive);
960 if (filepages >= MIN((active + inactive) >> 4,
961 5 * 1024 * 1024 >> PAGE_SHIFT)) {
962 return true;
963 }
964
965 /*
966 * kill the process, fail allocation, etc..
967 */
968
969 return false;
970 }
971
972 void
973 uvm_estimatepageable(int *active, int *inactive)
974 {
975
976 uvmpdpol_estimatepageable(active, inactive);
977 }
978
979
980 /*
981 * Use a separate thread for draining pools.
982 * This work can't done from the main pagedaemon thread because
983 * some pool allocators need to take vm_map locks.
984 */
985
986 static void
987 uvmpd_pool_drain_thread(void *arg)
988 {
989 struct pool *firstpool, *curpool;
990 int bufcnt, lastslept;
991 bool cycled;
992
993 firstpool = NULL;
994 cycled = true;
995 for (;;) {
996 /*
997 * sleep until awoken by the pagedaemon.
998 */
999 mutex_enter(&uvmpd_lock);
1000 if (!uvmpd_pool_drain_run) {
1001 lastslept = getticks();
1002 cv_wait(&uvmpd_pool_drain_cv, &uvmpd_lock);
1003 if (getticks() != lastslept) {
1004 cycled = false;
1005 firstpool = NULL;
1006 }
1007 }
1008 uvmpd_pool_drain_run = false;
1009 mutex_exit(&uvmpd_lock);
1010
1011 /*
1012 * rate limit draining, otherwise in desperate circumstances
1013 * this can totally saturate the system with xcall activity.
1014 */
1015 if (cycled) {
1016 kpause("uvmpdlmt", false, 1, NULL);
1017 cycled = false;
1018 firstpool = NULL;
1019 }
1020
1021 /*
1022 * drain and temporarily disable the freelist cache.
1023 */
1024 uvm_pgflcache_pause();
1025
1026 /*
1027 * kill unused metadata buffers.
1028 */
1029 bufcnt = uvmexp.freetarg - uvm_availmem();
1030 if (bufcnt < 0)
1031 bufcnt = 0;
1032
1033 mutex_enter(&bufcache_lock);
1034 buf_drain(bufcnt << PAGE_SHIFT);
1035 mutex_exit(&bufcache_lock);
1036
1037 /*
1038 * drain a pool, and then re-enable the freelist cache.
1039 */
1040 (void)pool_drain(&curpool);
1041 KASSERT(curpool != NULL);
1042 if (firstpool == NULL) {
1043 firstpool = curpool;
1044 } else if (firstpool == curpool) {
1045 cycled = true;
1046 }
1047 uvm_pgflcache_resume();
1048 }
1049 /*NOTREACHED*/
1050 }
1051
1052 static void
1053 uvmpd_pool_drain_wakeup(void)
1054 {
1055
1056 mutex_enter(&uvmpd_lock);
1057 uvmpd_pool_drain_run = true;
1058 cv_signal(&uvmpd_pool_drain_cv);
1059 mutex_exit(&uvmpd_lock);
1060 }
1061