uvm_pdaemon.c revision 1.124 1 /* $NetBSD: uvm_pdaemon.c,v 1.124 2020/02/18 20:23:17 chs Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
37 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
38 *
39 *
40 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
41 * All rights reserved.
42 *
43 * Permission to use, copy, modify and distribute this software and
44 * its documentation is hereby granted, provided that both the copyright
45 * notice and this permission notice appear in all copies of the
46 * software, derivative works or modified versions, and any portions
47 * thereof, and that both notices appear in supporting documentation.
48 *
49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
52 *
53 * Carnegie Mellon requests users of this software to return to
54 *
55 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
56 * School of Computer Science
57 * Carnegie Mellon University
58 * Pittsburgh PA 15213-3890
59 *
60 * any improvements or extensions that they make and grant Carnegie the
61 * rights to redistribute these changes.
62 */
63
64 /*
65 * uvm_pdaemon.c: the page daemon
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.124 2020/02/18 20:23:17 chs Exp $");
70
71 #include "opt_uvmhist.h"
72 #include "opt_readahead.h"
73
74 #include <sys/param.h>
75 #include <sys/proc.h>
76 #include <sys/systm.h>
77 #include <sys/kernel.h>
78 #include <sys/pool.h>
79 #include <sys/buf.h>
80 #include <sys/module.h>
81 #include <sys/atomic.h>
82 #include <sys/kthread.h>
83
84 #include <uvm/uvm.h>
85 #include <uvm/uvm_pdpolicy.h>
86 #include <uvm/uvm_pgflcache.h>
87
88 #ifdef UVMHIST
89 UVMHIST_DEFINE(pdhist);
90 #endif
91
92 /*
93 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
94 * in a pass thru the inactive list when swap is full. the value should be
95 * "small"... if it's too large we'll cycle the active pages thru the inactive
96 * queue too quickly to for them to be referenced and avoid being freed.
97 */
98
99 #define UVMPD_NUMDIRTYREACTS 16
100
101 #define UVMPD_NUMTRYLOCKOWNER 128
102
103 /*
104 * local prototypes
105 */
106
107 static void uvmpd_scan(void);
108 static void uvmpd_scan_queue(void);
109 static void uvmpd_tune(void);
110 static void uvmpd_pool_drain_thread(void *);
111 static void uvmpd_pool_drain_wakeup(void);
112
113 static unsigned int uvm_pagedaemon_waiters;
114
115 /* State for the pool drainer thread */
116 static kmutex_t uvmpd_lock __cacheline_aligned;
117 static kcondvar_t uvmpd_pool_drain_cv;
118 static bool uvmpd_pool_drain_run = false;
119
120 /*
121 * XXX hack to avoid hangs when large processes fork.
122 */
123 u_int uvm_extrapages;
124
125 /*
126 * uvm_wait: wait (sleep) for the page daemon to free some pages
127 *
128 * => should be called with all locks released
129 * => should _not_ be called by the page daemon (to avoid deadlock)
130 */
131
132 void
133 uvm_wait(const char *wmsg)
134 {
135 int timo = 0;
136
137 if (uvm.pagedaemon_lwp == NULL)
138 panic("out of memory before the pagedaemon thread exists");
139
140 mutex_spin_enter(&uvmpd_lock);
141
142 /*
143 * check for page daemon going to sleep (waiting for itself)
144 */
145
146 if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
147 /*
148 * now we have a problem: the pagedaemon wants to go to
149 * sleep until it frees more memory. but how can it
150 * free more memory if it is asleep? that is a deadlock.
151 * we have two options:
152 * [1] panic now
153 * [2] put a timeout on the sleep, thus causing the
154 * pagedaemon to only pause (rather than sleep forever)
155 *
156 * note that option [2] will only help us if we get lucky
157 * and some other process on the system breaks the deadlock
158 * by exiting or freeing memory (thus allowing the pagedaemon
159 * to continue). for now we panic if DEBUG is defined,
160 * otherwise we hope for the best with option [2] (better
161 * yet, this should never happen in the first place!).
162 */
163
164 printf("pagedaemon: deadlock detected!\n");
165 timo = hz >> 3; /* set timeout */
166 #if defined(DEBUG)
167 /* DEBUG: panic so we can debug it */
168 panic("pagedaemon deadlock");
169 #endif
170 }
171
172 uvm_pagedaemon_waiters++;
173 wakeup(&uvm.pagedaemon); /* wake the daemon! */
174 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvmpd_lock, false, wmsg, timo);
175 }
176
177 /*
178 * uvm_kick_pdaemon: perform checks to determine if we need to
179 * give the pagedaemon a nudge, and do so if necessary.
180 */
181
182 void
183 uvm_kick_pdaemon(void)
184 {
185 int fpages = uvm_availmem();
186
187 if (fpages + uvmexp.paging < uvmexp.freemin ||
188 (fpages + uvmexp.paging < uvmexp.freetarg &&
189 uvmpdpol_needsscan_p()) ||
190 uvm_km_va_starved_p()) {
191 mutex_spin_enter(&uvmpd_lock);
192 wakeup(&uvm.pagedaemon);
193 mutex_spin_exit(&uvmpd_lock);
194 }
195 }
196
197 /*
198 * uvmpd_tune: tune paging parameters
199 *
200 * => called when ever memory is added (or removed?) to the system
201 */
202
203 static void
204 uvmpd_tune(void)
205 {
206 int val;
207
208 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
209
210 /*
211 * try to keep 0.5% of available RAM free, but limit to between
212 * 128k and 1024k per-CPU. XXX: what are these values good for?
213 */
214 val = uvmexp.npages / 200;
215 val = MAX(val, (128*1024) >> PAGE_SHIFT);
216 val = MIN(val, (1024*1024) >> PAGE_SHIFT);
217 val *= ncpu;
218
219 /* Make sure there's always a user page free. */
220 if (val < uvmexp.reserve_kernel + 1)
221 val = uvmexp.reserve_kernel + 1;
222 uvmexp.freemin = val;
223
224 /* Calculate free target. */
225 val = (uvmexp.freemin * 4) / 3;
226 if (val <= uvmexp.freemin)
227 val = uvmexp.freemin + 1;
228 uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0);
229
230 uvmexp.wiredmax = uvmexp.npages / 3;
231 UVMHIST_LOG(pdhist, "<- done, freemin=%jd, freetarg=%jd, wiredmax=%jd",
232 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
233 }
234
235 /*
236 * uvm_pageout: the main loop for the pagedaemon
237 */
238
239 void
240 uvm_pageout(void *arg)
241 {
242 int npages = 0;
243 int extrapages = 0;
244 int fpages;
245
246 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
247
248 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
249
250 mutex_init(&uvmpd_lock, MUTEX_DEFAULT, IPL_VM);
251 cv_init(&uvmpd_pool_drain_cv, "pooldrain");
252
253 /* Create the pool drainer kernel thread. */
254 if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL,
255 uvmpd_pool_drain_thread, NULL, NULL, "pooldrain"))
256 panic("fork pooldrain");
257
258 /*
259 * ensure correct priority and set paging parameters...
260 */
261
262 uvm.pagedaemon_lwp = curlwp;
263 npages = uvmexp.npages;
264 uvmpd_tune();
265
266 /*
267 * main loop
268 */
269
270 for (;;) {
271 bool needsscan, needsfree, kmem_va_starved;
272
273 kmem_va_starved = uvm_km_va_starved_p();
274
275 mutex_spin_enter(&uvmpd_lock);
276 if ((uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) &&
277 !kmem_va_starved) {
278 UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
279 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
280 &uvmpd_lock, false, "pgdaemon", 0);
281 uvmexp.pdwoke++;
282 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
283 } else {
284 mutex_spin_exit(&uvmpd_lock);
285 }
286
287 /*
288 * now recompute inactive count
289 */
290
291 if (npages != uvmexp.npages || extrapages != uvm_extrapages) {
292 npages = uvmexp.npages;
293 extrapages = uvm_extrapages;
294 uvmpd_tune();
295 }
296
297 uvmpdpol_tune();
298
299 /*
300 * Estimate a hint. Note that bufmem are returned to
301 * system only when entire pool page is empty.
302 */
303 fpages = uvm_availmem();
304 UVMHIST_LOG(pdhist," free/ftarg=%jd/%jd",
305 fpages, uvmexp.freetarg, 0,0);
306
307 needsfree = fpages + uvmexp.paging < uvmexp.freetarg;
308 needsscan = needsfree || uvmpdpol_needsscan_p();
309
310 /*
311 * scan if needed
312 */
313 if (needsscan) {
314 uvmpd_scan();
315 }
316
317 /*
318 * if there's any free memory to be had,
319 * wake up any waiters.
320 */
321 if (uvm_availmem() > uvmexp.reserve_kernel ||
322 uvmexp.paging == 0) {
323 mutex_spin_enter(&uvmpd_lock);
324 wakeup(&uvmexp.free);
325 uvm_pagedaemon_waiters = 0;
326 mutex_spin_exit(&uvmpd_lock);
327 }
328
329 /*
330 * scan done. if we don't need free memory, we're done.
331 */
332
333 if (!needsfree && !kmem_va_starved)
334 continue;
335
336 /*
337 * kick the pool drainer thread.
338 */
339
340 uvmpd_pool_drain_wakeup();
341 }
342 /*NOTREACHED*/
343 }
344
345 void
346 uvm_pageout_start(int npages)
347 {
348
349 atomic_add_int(&uvmexp.paging, npages);
350 }
351
352 void
353 uvm_pageout_done(int npages)
354 {
355
356 KASSERT(uvmexp.paging >= npages);
357 atomic_add_int(&uvmexp.paging, -npages);
358
359 /*
360 * wake up either of pagedaemon or LWPs waiting for it.
361 */
362
363 mutex_spin_enter(&uvmpd_lock);
364 if (uvm_availmem() <= uvmexp.reserve_kernel) {
365 wakeup(&uvm.pagedaemon);
366 } else if (uvm_pagedaemon_waiters != 0) {
367 wakeup(&uvmexp.free);
368 uvm_pagedaemon_waiters = 0;
369 }
370 mutex_spin_exit(&uvmpd_lock);
371 }
372
373 /*
374 * uvmpd_trylockowner: trylock the page's owner.
375 *
376 * => called with page interlock held.
377 * => resolve orphaned O->A loaned page.
378 * => return the locked mutex on success. otherwise, return NULL.
379 */
380
381 kmutex_t *
382 uvmpd_trylockowner(struct vm_page *pg)
383 {
384 struct uvm_object *uobj = pg->uobject;
385 struct vm_anon *anon = pg->uanon;
386 int tries, count;
387 bool running;
388 kmutex_t *slock;
389
390 KASSERT(mutex_owned(&pg->interlock));
391
392 if (uobj != NULL) {
393 slock = uobj->vmobjlock;
394 KASSERTMSG(slock != NULL, "pg %p uobj %p, NULL lock", pg, uobj);
395 } else if (anon != NULL) {
396 slock = anon->an_lock;
397 KASSERTMSG(slock != NULL, "pg %p anon %p, NULL lock", pg, anon);
398 } else {
399 /* Page may be in state of flux - ignore. */
400 mutex_exit(&pg->interlock);
401 return NULL;
402 }
403
404 /*
405 * Now try to lock the objects. We'll try hard, but don't really
406 * plan on spending more than a millisecond or so here.
407 */
408 tries = (curlwp == uvm.pagedaemon_lwp ? UVMPD_NUMTRYLOCKOWNER : 1);
409 for (;;) {
410 if (mutex_tryenter(slock)) {
411 if (uobj == NULL) {
412 /*
413 * set PG_ANON if it isn't set already.
414 */
415 if ((pg->flags & PG_ANON) == 0) {
416 KASSERT(pg->loan_count > 0);
417 pg->loan_count--;
418 pg->flags |= PG_ANON;
419 /* anon now owns it */
420 }
421 }
422 mutex_exit(&pg->interlock);
423 return slock;
424 }
425 running = mutex_owner_running(slock);
426 if (!running || --tries <= 0) {
427 break;
428 }
429 count = SPINLOCK_BACKOFF_MAX;
430 SPINLOCK_BACKOFF(count);
431 }
432
433 /*
434 * We didn't get the lock; chances are the very next page on the
435 * queue also has the same lock, so if the lock owner is not running
436 * take a breather and allow them to make progress. There could be
437 * only 1 CPU in the system, or the pagedaemon could have preempted
438 * the owner in kernel, or any number of other things could be going
439 * on.
440 */
441 mutex_exit(&pg->interlock);
442 if (curlwp == uvm.pagedaemon_lwp) {
443 if (!running) {
444 (void)kpause("pdpglock", false, 1, NULL);
445 }
446 uvmexp.pdbusy++;
447 }
448 return NULL;
449 }
450
451 #if defined(VMSWAP)
452 struct swapcluster {
453 int swc_slot;
454 int swc_nallocated;
455 int swc_nused;
456 struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
457 };
458
459 static void
460 swapcluster_init(struct swapcluster *swc)
461 {
462
463 swc->swc_slot = 0;
464 swc->swc_nused = 0;
465 }
466
467 static int
468 swapcluster_allocslots(struct swapcluster *swc)
469 {
470 int slot;
471 int npages;
472
473 if (swc->swc_slot != 0) {
474 return 0;
475 }
476
477 /* Even with strange MAXPHYS, the shift
478 implicitly rounds down to a page. */
479 npages = MAXPHYS >> PAGE_SHIFT;
480 slot = uvm_swap_alloc(&npages, true);
481 if (slot == 0) {
482 return ENOMEM;
483 }
484 swc->swc_slot = slot;
485 swc->swc_nallocated = npages;
486 swc->swc_nused = 0;
487
488 return 0;
489 }
490
491 static int
492 swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
493 {
494 int slot;
495 struct uvm_object *uobj;
496
497 KASSERT(swc->swc_slot != 0);
498 KASSERT(swc->swc_nused < swc->swc_nallocated);
499 KASSERT((pg->flags & PG_SWAPBACKED) != 0);
500
501 slot = swc->swc_slot + swc->swc_nused;
502 uobj = pg->uobject;
503 if (uobj == NULL) {
504 KASSERT(mutex_owned(pg->uanon->an_lock));
505 pg->uanon->an_swslot = slot;
506 } else {
507 int result;
508
509 KASSERT(mutex_owned(uobj->vmobjlock));
510 result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
511 if (result == -1) {
512 return ENOMEM;
513 }
514 }
515 swc->swc_pages[swc->swc_nused] = pg;
516 swc->swc_nused++;
517
518 return 0;
519 }
520
521 static void
522 swapcluster_flush(struct swapcluster *swc, bool now)
523 {
524 int slot;
525 int nused;
526 int nallocated;
527 int error __diagused;
528
529 if (swc->swc_slot == 0) {
530 return;
531 }
532 KASSERT(swc->swc_nused <= swc->swc_nallocated);
533
534 slot = swc->swc_slot;
535 nused = swc->swc_nused;
536 nallocated = swc->swc_nallocated;
537
538 /*
539 * if this is the final pageout we could have a few
540 * unused swap blocks. if so, free them now.
541 */
542
543 if (nused < nallocated) {
544 if (!now) {
545 return;
546 }
547 uvm_swap_free(slot + nused, nallocated - nused);
548 }
549
550 /*
551 * now start the pageout.
552 */
553
554 if (nused > 0) {
555 uvmexp.pdpageouts++;
556 uvm_pageout_start(nused);
557 error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
558 KASSERT(error == 0 || error == ENOMEM);
559 }
560
561 /*
562 * zero swslot to indicate that we are
563 * no longer building a swap-backed cluster.
564 */
565
566 swc->swc_slot = 0;
567 swc->swc_nused = 0;
568 }
569
570 static int
571 swapcluster_nused(struct swapcluster *swc)
572 {
573
574 return swc->swc_nused;
575 }
576
577 /*
578 * uvmpd_dropswap: free any swap allocated to this page.
579 *
580 * => called with owner locked.
581 * => return true if a page had an associated slot.
582 */
583
584 bool
585 uvmpd_dropswap(struct vm_page *pg)
586 {
587 bool result = false;
588 struct vm_anon *anon = pg->uanon;
589
590 if ((pg->flags & PG_ANON) && anon->an_swslot) {
591 uvm_swap_free(anon->an_swslot, 1);
592 anon->an_swslot = 0;
593 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
594 result = true;
595 } else if (pg->flags & PG_AOBJ) {
596 int slot = uao_set_swslot(pg->uobject,
597 pg->offset >> PAGE_SHIFT, 0);
598 if (slot) {
599 uvm_swap_free(slot, 1);
600 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
601 result = true;
602 }
603 }
604
605 return result;
606 }
607
608 #endif /* defined(VMSWAP) */
609
610 /*
611 * uvmpd_scan_queue: scan an replace candidate list for pages
612 * to clean or free.
613 *
614 * => we work on meeting our free target by converting inactive pages
615 * into free pages.
616 * => we handle the building of swap-backed clusters
617 */
618
619 static void
620 uvmpd_scan_queue(void)
621 {
622 struct vm_page *p;
623 struct uvm_object *uobj;
624 struct vm_anon *anon;
625 #if defined(VMSWAP)
626 struct swapcluster swc;
627 #endif /* defined(VMSWAP) */
628 int dirtyreacts;
629 kmutex_t *slock;
630 UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
631
632 /*
633 * swslot is non-zero if we are building a swap cluster. we want
634 * to stay in the loop while we have a page to scan or we have
635 * a swap-cluster to build.
636 */
637
638 #if defined(VMSWAP)
639 swapcluster_init(&swc);
640 #endif /* defined(VMSWAP) */
641
642 dirtyreacts = 0;
643 uvmpdpol_scaninit();
644
645 while (/* CONSTCOND */ 1) {
646
647 /*
648 * see if we've met the free target.
649 */
650
651 if (uvm_availmem() + uvmexp.paging
652 #if defined(VMSWAP)
653 + swapcluster_nused(&swc)
654 #endif /* defined(VMSWAP) */
655 >= uvmexp.freetarg << 2 ||
656 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
657 UVMHIST_LOG(pdhist," met free target: "
658 "exit loop", 0, 0, 0, 0);
659 break;
660 }
661
662 /*
663 * first we have the pdpolicy select a victim page
664 * and attempt to lock the object that the page
665 * belongs to. if our attempt fails we skip on to
666 * the next page (no harm done). it is important to
667 * "try" locking the object as we are locking in the
668 * wrong order (pageq -> object) and we don't want to
669 * deadlock.
670 *
671 * the only time we expect to see an ownerless page
672 * (i.e. a page with no uobject and !PG_ANON) is if an
673 * anon has loaned a page from a uvm_object and the
674 * uvm_object has dropped the ownership. in that
675 * case, the anon can "take over" the loaned page
676 * and make it its own.
677 */
678
679 p = uvmpdpol_selectvictim(&slock);
680 if (p == NULL) {
681 break;
682 }
683 KASSERT(uvmpdpol_pageisqueued_p(p));
684 KASSERT(uvm_page_owner_locked_p(p));
685 KASSERT(p->wire_count == 0);
686
687 /*
688 * we are below target and have a new page to consider.
689 */
690
691 anon = p->uanon;
692 uobj = p->uobject;
693
694 if (p->flags & PG_BUSY) {
695 mutex_exit(slock);
696 uvmexp.pdbusy++;
697 continue;
698 }
699
700 /* does the page belong to an object? */
701 if (uobj != NULL) {
702 uvmexp.pdobscan++;
703 } else {
704 #if defined(VMSWAP)
705 KASSERT(anon != NULL);
706 uvmexp.pdanscan++;
707 #else /* defined(VMSWAP) */
708 panic("%s: anon", __func__);
709 #endif /* defined(VMSWAP) */
710 }
711
712
713 /*
714 * we now have the object locked.
715 * if the page is not swap-backed, call the object's
716 * pager to flush and free the page.
717 */
718
719 #if defined(READAHEAD_STATS)
720 if ((p->flags & PG_READAHEAD) != 0) {
721 p->flags &= ~PG_READAHEAD;
722 uvm_ra_miss.ev_count++;
723 }
724 #endif /* defined(READAHEAD_STATS) */
725
726 if ((p->flags & PG_SWAPBACKED) == 0) {
727 KASSERT(uobj != NULL);
728 (void) (uobj->pgops->pgo_put)(uobj, p->offset,
729 p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
730 continue;
731 }
732
733 /*
734 * the page is swap-backed. remove all the permissions
735 * from the page so we can sync the modified info
736 * without any race conditions. if the page is clean
737 * we can free it now and continue.
738 */
739
740 pmap_page_protect(p, VM_PROT_NONE);
741 if (uvm_pagegetdirty(p) == UVM_PAGE_STATUS_UNKNOWN) {
742 if (pmap_clear_modify(p)) {
743 uvm_pagemarkdirty(p, UVM_PAGE_STATUS_DIRTY);
744 } else {
745 uvm_pagemarkdirty(p, UVM_PAGE_STATUS_CLEAN);
746 }
747 }
748 if (uvm_pagegetdirty(p) != UVM_PAGE_STATUS_DIRTY) {
749 int slot;
750 int pageidx;
751
752 pageidx = p->offset >> PAGE_SHIFT;
753 uvm_pagefree(p);
754 atomic_inc_uint(&uvmexp.pdfreed);
755
756 /*
757 * for anons, we need to remove the page
758 * from the anon ourselves. for aobjs,
759 * pagefree did that for us.
760 */
761
762 if (anon) {
763 KASSERT(anon->an_swslot != 0);
764 anon->an_page = NULL;
765 slot = anon->an_swslot;
766 } else {
767 slot = uao_find_swslot(uobj, pageidx);
768 }
769 if (slot > 0) {
770 /* this page is now only in swap. */
771 KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
772 atomic_inc_uint(&uvmexp.swpgonly);
773 }
774 mutex_exit(slock);
775 continue;
776 }
777
778 #if defined(VMSWAP)
779 /*
780 * this page is dirty, skip it if we'll have met our
781 * free target when all the current pageouts complete.
782 */
783
784 if (uvm_availmem() + uvmexp.paging > uvmexp.freetarg << 2) {
785 mutex_exit(slock);
786 continue;
787 }
788
789 /*
790 * free any swap space allocated to the page since
791 * we'll have to write it again with its new data.
792 */
793
794 uvmpd_dropswap(p);
795
796 /*
797 * start new swap pageout cluster (if necessary).
798 *
799 * if swap is full reactivate this page so that
800 * we eventually cycle all pages through the
801 * inactive queue.
802 */
803
804 if (swapcluster_allocslots(&swc)) {
805 dirtyreacts++;
806 uvm_pagelock(p);
807 uvm_pageactivate(p);
808 uvm_pageunlock(p);
809 mutex_exit(slock);
810 continue;
811 }
812
813 /*
814 * at this point, we're definitely going reuse this
815 * page. mark the page busy and delayed-free.
816 * we should remove the page from the page queues
817 * so we don't ever look at it again.
818 * adjust counters and such.
819 */
820
821 p->flags |= PG_BUSY;
822 UVM_PAGE_OWN(p, "scan_queue");
823 p->flags |= PG_PAGEOUT;
824 uvmexp.pgswapout++;
825
826 uvm_pagelock(p);
827 uvm_pagedequeue(p);
828 uvm_pageunlock(p);
829
830 /*
831 * add the new page to the cluster.
832 */
833
834 if (swapcluster_add(&swc, p)) {
835 p->flags &= ~(PG_BUSY|PG_PAGEOUT);
836 UVM_PAGE_OWN(p, NULL);
837 dirtyreacts++;
838 uvm_pagelock(p);
839 uvm_pageactivate(p);
840 uvm_pageunlock(p);
841 mutex_exit(slock);
842 continue;
843 }
844 mutex_exit(slock);
845
846 swapcluster_flush(&swc, false);
847
848 /*
849 * the pageout is in progress. bump counters and set up
850 * for the next loop.
851 */
852
853 atomic_inc_uint(&uvmexp.pdpending);
854
855 #else /* defined(VMSWAP) */
856 uvm_pagelock(p);
857 uvm_pageactivate(p);
858 uvm_pageunlock(p);
859 mutex_exit(slock);
860 #endif /* defined(VMSWAP) */
861 }
862
863 uvmpdpol_scanfini();
864
865 #if defined(VMSWAP)
866 swapcluster_flush(&swc, true);
867 #endif /* defined(VMSWAP) */
868 }
869
870 /*
871 * uvmpd_scan: scan the page queues and attempt to meet our targets.
872 */
873
874 static void
875 uvmpd_scan(void)
876 {
877 int swap_shortage, pages_freed, fpages;
878 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
879
880 uvmexp.pdrevs++;
881
882 /*
883 * work on meeting our targets. first we work on our free target
884 * by converting inactive pages into free pages. then we work on
885 * meeting our inactive target by converting active pages to
886 * inactive ones.
887 */
888
889 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
890
891 pages_freed = uvmexp.pdfreed;
892 uvmpd_scan_queue();
893 pages_freed = uvmexp.pdfreed - pages_freed;
894
895 /*
896 * detect if we're not going to be able to page anything out
897 * until we free some swap resources from active pages.
898 */
899
900 swap_shortage = 0;
901 fpages = uvm_availmem();
902 if (fpages < uvmexp.freetarg &&
903 uvmexp.swpginuse >= uvmexp.swpgavail &&
904 !uvm_swapisfull() &&
905 pages_freed == 0) {
906 swap_shortage = uvmexp.freetarg - fpages;
907 }
908
909 uvmpdpol_balancequeue(swap_shortage);
910
911 /*
912 * if still below the minimum target, try unloading kernel
913 * modules.
914 */
915
916 if (uvm_availmem() < uvmexp.freemin) {
917 module_thread_kick();
918 }
919 }
920
921 /*
922 * uvm_reclaimable: decide whether to wait for pagedaemon.
923 *
924 * => return true if it seems to be worth to do uvm_wait.
925 *
926 * XXX should be tunable.
927 * XXX should consider pools, etc?
928 */
929
930 bool
931 uvm_reclaimable(void)
932 {
933 int filepages;
934 int active, inactive;
935
936 /*
937 * if swap is not full, no problem.
938 */
939
940 if (!uvm_swapisfull()) {
941 return true;
942 }
943
944 /*
945 * file-backed pages can be reclaimed even when swap is full.
946 * if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
947 *
948 * XXX assume the worst case, ie. all wired pages are file-backed.
949 *
950 * XXX should consider about other reclaimable memory.
951 * XXX ie. pools, traditional buffer cache.
952 */
953
954 cpu_count_sync_all();
955 filepages = (int)cpu_count_get(CPU_COUNT_FILEPAGES) +
956 (int)cpu_count_get(CPU_COUNT_EXECPAGES) - uvmexp.wired;
957 uvm_estimatepageable(&active, &inactive);
958 if (filepages >= MIN((active + inactive) >> 4,
959 5 * 1024 * 1024 >> PAGE_SHIFT)) {
960 return true;
961 }
962
963 /*
964 * kill the process, fail allocation, etc..
965 */
966
967 return false;
968 }
969
970 void
971 uvm_estimatepageable(int *active, int *inactive)
972 {
973
974 uvmpdpol_estimatepageable(active, inactive);
975 }
976
977
978 /*
979 * Use a separate thread for draining pools.
980 * This work can't done from the main pagedaemon thread because
981 * some pool allocators need to take vm_map locks.
982 */
983
984 static void
985 uvmpd_pool_drain_thread(void *arg)
986 {
987 struct pool *firstpool, *curpool;
988 int bufcnt, lastslept;
989 bool cycled;
990
991 firstpool = NULL;
992 cycled = true;
993 for (;;) {
994 /*
995 * sleep until awoken by the pagedaemon.
996 */
997 mutex_enter(&uvmpd_lock);
998 if (!uvmpd_pool_drain_run) {
999 lastslept = hardclock_ticks;
1000 cv_wait(&uvmpd_pool_drain_cv, &uvmpd_lock);
1001 if (hardclock_ticks != lastslept) {
1002 cycled = false;
1003 firstpool = NULL;
1004 }
1005 }
1006 uvmpd_pool_drain_run = false;
1007 mutex_exit(&uvmpd_lock);
1008
1009 /*
1010 * rate limit draining, otherwise in desperate circumstances
1011 * this can totally saturate the system with xcall activity.
1012 */
1013 if (cycled) {
1014 kpause("uvmpdlmt", false, 1, NULL);
1015 cycled = false;
1016 firstpool = NULL;
1017 }
1018
1019 /*
1020 * drain and temporarily disable the freelist cache.
1021 */
1022 uvm_pgflcache_pause();
1023
1024 /*
1025 * kill unused metadata buffers.
1026 */
1027 bufcnt = uvmexp.freetarg - uvm_availmem();
1028 if (bufcnt < 0)
1029 bufcnt = 0;
1030
1031 mutex_enter(&bufcache_lock);
1032 buf_drain(bufcnt << PAGE_SHIFT);
1033 mutex_exit(&bufcache_lock);
1034
1035 /*
1036 * drain a pool, and then re-enable the freelist cache.
1037 */
1038 (void)pool_drain(&curpool);
1039 KASSERT(curpool != NULL);
1040 if (firstpool == NULL) {
1041 firstpool = curpool;
1042 } else if (firstpool == curpool) {
1043 cycled = true;
1044 }
1045 uvm_pgflcache_resume();
1046 }
1047 /*NOTREACHED*/
1048 }
1049
1050 static void
1051 uvmpd_pool_drain_wakeup(void)
1052 {
1053
1054 mutex_enter(&uvmpd_lock);
1055 uvmpd_pool_drain_run = true;
1056 cv_signal(&uvmpd_pool_drain_cv);
1057 mutex_exit(&uvmpd_lock);
1058 }
1059